diff options
424 files changed, 7034 insertions, 2743 deletions
diff --git a/.buildkite/pipeline.yaml b/.buildkite/pipeline.yaml index 3bc5041c0..9163db56d 100644 --- a/.buildkite/pipeline.yaml +++ b/.buildkite/pipeline.yaml @@ -90,6 +90,9 @@ steps: label: ":person_in_lotus_position: KVM tests" command: make kvm-tests - <<: *common + label: ":weight_lifter: Fsstress test" + command: make fsstress-test + - <<: *common label: ":docker: Containerd 1.3.9 tests" command: make containerd-test-1.3.9 - <<: *common @@ -144,6 +144,7 @@ dev: $(RUNTIME_BIN) ## Installs a set of local runtimes. Requires sudo. @$(call configure_noreload,$(RUNTIME)-p,--net-raw --profile) @$(call configure_noreload,$(RUNTIME)-vfs2-d,--net-raw --debug --strace --log-packets --vfs2) @$(call configure_noreload,$(RUNTIME)-vfs2-fuse-d,--net-raw --debug --strace --log-packets --vfs2 --fuse) + @$(call configure_noreload,$(RUNTIME)-vfs2-cgroup-d,--net-raw --debug --strace --log-packets --vfs2 --cgroupfs) @$(call reload_docker) .PHONY: dev @@ -340,7 +341,8 @@ BENCHMARKS_FILTER := . BENCHMARKS_OPTIONS := -test.benchtime=30s BENCHMARKS_ARGS := -test.v -test.bench=$(BENCHMARKS_FILTER) $(BENCHMARKS_OPTIONS) BENCHMARKS_PROFILE := -pprof-dir=/tmp/profile -pprof-cpu -pprof-heap -pprof-block -pprof-mutex -BENCH_RUNTIME_ARGS ?= --vfs2 +BENCH_VFS := --vfs2 +BENCH_RUNTIME_ARGS ?= init-benchmark-table: ## Initializes a BigQuery table with the benchmark schema. @$(call run,//tools/parsers:parser,init --project=$(BENCHMARKS_PROJECT) --dataset=$(BENCHMARKS_DATASET) --table=$(BENCHMARKS_TABLE)) @@ -361,13 +363,14 @@ run_benchmark = \ benchmark-platforms: load-benchmarks $(RUNTIME_BIN) ## Runs benchmarks for runc and all given platforms in BENCHMARK_PLATFORMS. @$(foreach PLATFORM,$(BENCHMARKS_PLATFORMS), \ - $(call run_benchmark,$(PLATFORM),--platform=$(PLATFORM) $(BENCH_RUNTIME_ARGS)) && \ - ) true + $(call run_benchmark,$(PLATFORM),--platform=$(PLATFORM) $(BENCH_RUNTIME_ARGS) --vfs2) && \ + $(call run_benchmark,$(PLATFORM)_vfs1,--platform=$(PLATFORM) $(BENCH_RUNTIME_ARGS)) && \ + ) true @$(call run_benchmark,runc) .PHONY: benchmark-platforms run-benchmark: load-benchmarks $(RUNTIME_BIN) ## Runs single benchmark and optionally sends data to BigQuery. - @$(call run_benchmark,$(RUNTIME),$(BENCH_RUNTIME_ARGS)) + @$(call run_benchmark,$(RUNTIME)$(BENCH_VFS),$(BENCH_RUNTIME_ARGS) $(BENCH_VFS)) .PHONY: run-benchmark ## diff --git a/g3doc/architecture_guide/platforms.md b/g3doc/architecture_guide/platforms.md index d112c9a28..e19c77236 100644 --- a/g3doc/architecture_guide/platforms.md +++ b/g3doc/architecture_guide/platforms.md @@ -18,8 +18,8 @@ type Context interface { } type AddressSpace interface { - MapFile(addr usermem.Addr, f File, fr FileRange, at usermem.AccessType, ...) error - Unmap(addr usermem.Addr, length uint64) + MapFile(addr hostarch.Addr, f File, fr FileRange, at hostarch.AccessType, ...) error + Unmap(addr hostarch.Addr, length uint64) } ``` @@ -55,8 +55,6 @@ global: # Same story for underscores. - "should not use ALL_CAPS in Go names" - "should not use underscores in Go names" - # TODO(b/179817829): Upgrade to flock to v0.8.0. - - "flock.NewFlock is deprecated: Use New instead" exclude: # Generated: exempt all. - pkg/shim/runtimeoptions/runtimeoptions_cri.go diff --git a/pkg/abi/linux/fs.go b/pkg/abi/linux/fs.go index 0d921ed6f..cad24fcc7 100644 --- a/pkg/abi/linux/fs.go +++ b/pkg/abi/linux/fs.go @@ -19,8 +19,10 @@ package linux // See linux/magic.h. const ( ANON_INODE_FS_MAGIC = 0x09041934 + CGROUP_SUPER_MAGIC = 0x27e0eb DEVPTS_SUPER_MAGIC = 0x00001cd1 EXT_SUPER_MAGIC = 0xef53 + FUSE_SUPER_MAGIC = 0x65735546 OVERLAYFS_SUPER_MAGIC = 0x794c7630 PIPEFS_MAGIC = 0x50495045 PROC_SUPER_MAGIC = 0x9fa0 @@ -29,7 +31,6 @@ const ( SYSFS_MAGIC = 0x62656572 TMPFS_MAGIC = 0x01021994 V9FS_MAGIC = 0x01021997 - FUSE_SUPER_MAGIC = 0x65735546 ) // Filesystem path limits, from uapi/linux/limits.h. diff --git a/pkg/abi/linux/ptrace_amd64.go b/pkg/abi/linux/ptrace_amd64.go index 50e22fe7e..e722971f1 100644 --- a/pkg/abi/linux/ptrace_amd64.go +++ b/pkg/abi/linux/ptrace_amd64.go @@ -61,3 +61,8 @@ func (p *PtraceRegs) InstructionPointer() uint64 { func (p *PtraceRegs) StackPointer() uint64 { return p.Rsp } + +// SetStackPointer sets the stack pointer to the specified value. +func (p *PtraceRegs) SetStackPointer(sp uint64) { + p.Rsp = sp +} diff --git a/pkg/abi/linux/ptrace_arm64.go b/pkg/abi/linux/ptrace_arm64.go index da36811d2..3d0906565 100644 --- a/pkg/abi/linux/ptrace_arm64.go +++ b/pkg/abi/linux/ptrace_arm64.go @@ -38,3 +38,8 @@ func (p *PtraceRegs) InstructionPointer() uint64 { func (p *PtraceRegs) StackPointer() uint64 { return p.Sp } + +// SetStackPointer sets the stack pointer to the specified value. +func (p *PtraceRegs) SetStackPointer(sp uint64) { + p.Sp = sp +} diff --git a/pkg/coverage/BUILD b/pkg/coverage/BUILD index a198e8028..ace5895f8 100644 --- a/pkg/coverage/BUILD +++ b/pkg/coverage/BUILD @@ -7,8 +7,8 @@ go_library( srcs = ["coverage.go"], visibility = ["//:sandbox"], deps = [ + "//pkg/hostarch", "//pkg/sync", - "//pkg/usermem", "@io_bazel_rules_go//go/tools/coverdata", ], ) diff --git a/pkg/coverage/coverage.go b/pkg/coverage/coverage.go index 6f3d72e83..a6778a005 100644 --- a/pkg/coverage/coverage.go +++ b/pkg/coverage/coverage.go @@ -28,8 +28,8 @@ import ( "sort" "testing" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" "github.com/bazelbuild/rules_go/go/tools/coverdata" ) @@ -141,7 +141,7 @@ func ConsumeCoverageData(w io.Writer) int { // Non-zero coverage data found; consume it and report as a PC. counters[index] = 0 pc := globalData.syntheticPCs[fileNum][index] - usermem.ByteOrder.PutUint64(pcBuffer[:], pc) + hostarch.ByteOrder.PutUint64(pcBuffer[:], pc) n, err := w.Write(pcBuffer[:]) if err != nil { if err == io.EOF { diff --git a/pkg/hostarch/BUILD b/pkg/hostarch/BUILD new file mode 100644 index 000000000..1e8def4d9 --- /dev/null +++ b/pkg/hostarch/BUILD @@ -0,0 +1,42 @@ +load("//tools:defs.bzl", "go_library", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +package(licenses = ["notice"]) + +go_template_instance( + name = "addr_range", + out = "addr_range.go", + package = "hostarch", + prefix = "Addr", + template = "//pkg/segment:generic_range", + types = { + "T": "Addr", + }, +) + +go_test( + name = "hostarch_test", + size = "small", + srcs = [ + "addr_range_seq_test.go", + ], + library = ":hostarch", +) + +go_library( + name = "hostarch", + srcs = [ + "access_type.go", + "addr.go", + "addr_range.go", + "addr_range_seq_unsafe.go", + "hostarch.go", + "hostarch_arm64.go", + "hostarch_x86.go", + ], + visibility = ["//:sandbox"], + deps = [ + "//pkg/gohacks", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/pkg/usermem/access_type.go b/pkg/hostarch/access_type.go index 2cfca29af..e30476840 100644 --- a/pkg/usermem/access_type.go +++ b/pkg/hostarch/access_type.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package usermem +package hostarch import "golang.org/x/sys/unix" diff --git a/pkg/usermem/addr.go b/pkg/hostarch/addr.go index c4100481e..0cf0f3c81 100644 --- a/pkg/usermem/addr.go +++ b/pkg/hostarch/addr.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package usermem +package hostarch import ( "fmt" @@ -57,7 +57,7 @@ func (v Addr) RoundUp() (addr Addr, ok bool) { func (v Addr) MustRoundUp() Addr { addr, ok := v.RoundUp() if !ok { - panic(fmt.Sprintf("usermem.Addr(%d).RoundUp() wraps", v)) + panic(fmt.Sprintf("hostarch.Addr(%d).RoundUp() wraps", v)) } return addr } diff --git a/pkg/usermem/addr_range_seq_test.go b/pkg/hostarch/addr_range_seq_test.go index 82f735026..5726dfd19 100644 --- a/pkg/usermem/addr_range_seq_test.go +++ b/pkg/hostarch/addr_range_seq_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package usermem +package hostarch import ( "testing" diff --git a/pkg/usermem/addr_range_seq_unsafe.go b/pkg/hostarch/addr_range_seq_unsafe.go index c9a1415a0..ecc17d595 100644 --- a/pkg/usermem/addr_range_seq_unsafe.go +++ b/pkg/hostarch/addr_range_seq_unsafe.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package usermem +package hostarch import ( "bytes" diff --git a/pkg/hostarch/hostarch.go b/pkg/hostarch/hostarch.go new file mode 100644 index 000000000..fdd29c567 --- /dev/null +++ b/pkg/hostarch/hostarch.go @@ -0,0 +1,7 @@ +// Copyright 2021 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package hostarch contains host arch address operations for user memory. +package hostarch diff --git a/pkg/usermem/usermem_arm64.go b/pkg/hostarch/hostarch_arm64.go index 7e7529585..a31a8aeeb 100644 --- a/pkg/usermem/usermem_arm64.go +++ b/pkg/hostarch/hostarch_arm64.go @@ -14,7 +14,7 @@ // +build arm64 -package usermem +package hostarch import ( "encoding/binary" diff --git a/pkg/usermem/usermem_x86.go b/pkg/hostarch/hostarch_x86.go index d96f829fb..af6ef2b7f 100644 --- a/pkg/usermem/usermem_x86.go +++ b/pkg/hostarch/hostarch_x86.go @@ -14,7 +14,7 @@ // +build amd64 386 -package usermem +package hostarch import "encoding/binary" diff --git a/pkg/marshal/BUILD b/pkg/marshal/BUILD index aac0161fa..7cd89e639 100644 --- a/pkg/marshal/BUILD +++ b/pkg/marshal/BUILD @@ -11,5 +11,5 @@ go_library( visibility = [ "//:sandbox", ], - deps = ["//pkg/usermem"], + deps = ["//pkg/hostarch"], ) diff --git a/pkg/marshal/marshal.go b/pkg/marshal/marshal.go index d8cb44b40..eb036feae 100644 --- a/pkg/marshal/marshal.go +++ b/pkg/marshal/marshal.go @@ -23,7 +23,7 @@ package marshal import ( "io" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // CopyContext defines the memory operations required to marshal to and from @@ -36,11 +36,11 @@ type CopyContext interface { // CopyOutBytes writes the contents of b to the task's memory. See // kernel.CopyOutBytes. - CopyOutBytes(addr usermem.Addr, b []byte) (int, error) + CopyOutBytes(addr hostarch.Addr, b []byte) (int, error) // CopyInBytes reads the contents of the task's memory to b. See // kernel.CopyInBytes. - CopyInBytes(addr usermem.Addr, b []byte) (int, error) + CopyInBytes(addr hostarch.Addr, b []byte) (int, error) } // Marshallable represents operations on a type that can be marshalled to and @@ -108,7 +108,7 @@ type Marshallable interface { // If the copy-in from the task memory is only partially successful, CopyIn // should still attempt to deserialize as much data as possible. See comment // for UnmarshalBytes. - CopyIn(cc CopyContext, addr usermem.Addr) (int, error) + CopyIn(cc CopyContext, addr hostarch.Addr) (int, error) // CopyOut serializes a Marshallable type to a task's memory. This may only // be called from a task goroutine. This is more efficient than calling @@ -119,7 +119,7 @@ type Marshallable interface { // The copy-out to the task memory may be partially successful, in which // case CopyOut returns how much data was serialized. See comment for // MarshalBytes for implications. - CopyOut(cc CopyContext, addr usermem.Addr) (int, error) + CopyOut(cc CopyContext, addr hostarch.Addr) (int, error) // CopyOutN is like CopyOut, but explicitly requests a partial // copy-out. Note that this may yield unexpected results for non-packed @@ -127,7 +127,7 @@ type Marshallable interface { // comment on MarshalBytes. // // The limit must be less than or equal to SizeBytes(). - CopyOutN(cc CopyContext, addr usermem.Addr, limit int) (int, error) + CopyOutN(cc CopyContext, addr hostarch.Addr, limit int) (int, error) } // go-marshal generates additional functions for a type based on additional @@ -157,10 +157,10 @@ type Marshallable interface { // func UnmarshalUnsafeFooSlice(dst []Foo, src []byte) (int, error) { ... } // // // CopyFooSliceIn copies in a slice of Foo objects from the task's memory. -// func CopyFooSliceIn(cc marshal.CopyContext, addr usermem.Addr, dst []Foo) (int, error) { ... } +// func CopyFooSliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []Foo) (int, error) { ... } // // // CopyFooSliceIn copies out a slice of Foo objects to the task's memory. -// func CopyFooSliceOut(cc marshal.CopyContext, addr usermem.Addr, src []Foo) (int, error) { ... } +// func CopyFooSliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []Foo) (int, error) { ... } // // The name of the functions are of the format "Copy%sIn" and "Copy%sOut", where // %s is the first argument to the slice clause. This directive is not supported @@ -175,10 +175,10 @@ type Marshallable interface { // This is only valid on newtypes on primitives, and causes the generated // functions to accept slices of the inner type instead: // -// func CopyInt32SliceIn(cc marshal.CopyContext, addr usermem.Addr, dst []int32) (int, error) { ... } +// func CopyInt32SliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []int32) (int, error) { ... } // // Without "inner", they would instead be: // -// func CopyInt32SliceIn(cc marshal.CopyContext, addr usermem.Addr, dst []Int32) (int, error) { ... } +// func CopyInt32SliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []Int32) (int, error) { ... } // // This may help avoid a cast depending on how the generated functions are used. diff --git a/pkg/marshal/marshal_impl_util.go b/pkg/marshal/marshal_impl_util.go index ea75e09f2..9e6a6fa29 100644 --- a/pkg/marshal/marshal_impl_util.go +++ b/pkg/marshal/marshal_impl_util.go @@ -17,7 +17,7 @@ package marshal import ( "io" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // StubMarshallable implements the Marshallable interface. @@ -63,16 +63,16 @@ func (StubMarshallable) UnmarshalUnsafe(src []byte) { } // CopyIn implements Marshallable.CopyIn. -func (StubMarshallable) CopyIn(cc CopyContext, addr usermem.Addr) (int, error) { +func (StubMarshallable) CopyIn(cc CopyContext, addr hostarch.Addr) (int, error) { panic("Please implement your own CopyIn function") } // CopyOut implements Marshallable.CopyOut. -func (StubMarshallable) CopyOut(cc CopyContext, addr usermem.Addr) (int, error) { +func (StubMarshallable) CopyOut(cc CopyContext, addr hostarch.Addr) (int, error) { panic("Please implement your own CopyOut function") } // CopyOutN implements Marshallable.CopyOutN. -func (StubMarshallable) CopyOutN(cc CopyContext, addr usermem.Addr, limit int) (int, error) { +func (StubMarshallable) CopyOutN(cc CopyContext, addr hostarch.Addr, limit int) (int, error) { panic("Please implement your own CopyOutN function") } diff --git a/pkg/marshal/primitive/BUILD b/pkg/marshal/primitive/BUILD index d77a11c79..190b57c29 100644 --- a/pkg/marshal/primitive/BUILD +++ b/pkg/marshal/primitive/BUILD @@ -13,6 +13,7 @@ go_library( ], deps = [ "//pkg/context", + "//pkg/hostarch", "//pkg/marshal", "//pkg/usermem", ], diff --git a/pkg/marshal/primitive/primitive.go b/pkg/marshal/primitive/primitive.go index 4b342de6b..32c8ed138 100644 --- a/pkg/marshal/primitive/primitive.go +++ b/pkg/marshal/primitive/primitive.go @@ -20,6 +20,7 @@ import ( "io" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/usermem" ) @@ -102,17 +103,17 @@ func (b *ByteSlice) UnmarshalUnsafe(src []byte) { } // CopyIn implements marshal.Marshallable.CopyIn. -func (b *ByteSlice) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) { +func (b *ByteSlice) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { return cc.CopyInBytes(addr, *b) } // CopyOut implements marshal.Marshallable.CopyOut. -func (b *ByteSlice) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) { +func (b *ByteSlice) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { return cc.CopyOutBytes(addr, *b) } // CopyOutN implements marshal.Marshallable.CopyOutN. -func (b *ByteSlice) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) { +func (b *ByteSlice) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { return cc.CopyOutBytes(addr, (*b)[:limit]) } @@ -131,7 +132,7 @@ var _ marshal.Marshallable = (*ByteSlice)(nil) // CopyInt8In is a convenient wrapper for copying in an int8 from the task's // memory. -func CopyInt8In(cc marshal.CopyContext, addr usermem.Addr, dst *int8) (int, error) { +func CopyInt8In(cc marshal.CopyContext, addr hostarch.Addr, dst *int8) (int, error) { var buf Int8 n, err := buf.CopyIn(cc, addr) if err != nil { @@ -143,14 +144,14 @@ func CopyInt8In(cc marshal.CopyContext, addr usermem.Addr, dst *int8) (int, erro // CopyInt8Out is a convenient wrapper for copying out an int8 to the task's // memory. -func CopyInt8Out(cc marshal.CopyContext, addr usermem.Addr, src int8) (int, error) { +func CopyInt8Out(cc marshal.CopyContext, addr hostarch.Addr, src int8) (int, error) { srcP := Int8(src) return srcP.CopyOut(cc, addr) } // CopyUint8In is a convenient wrapper for copying in a uint8 from the task's // memory. -func CopyUint8In(cc marshal.CopyContext, addr usermem.Addr, dst *uint8) (int, error) { +func CopyUint8In(cc marshal.CopyContext, addr hostarch.Addr, dst *uint8) (int, error) { var buf Uint8 n, err := buf.CopyIn(cc, addr) if err != nil { @@ -162,7 +163,7 @@ func CopyUint8In(cc marshal.CopyContext, addr usermem.Addr, dst *uint8) (int, er // CopyUint8Out is a convenient wrapper for copying out a uint8 to the task's // memory. -func CopyUint8Out(cc marshal.CopyContext, addr usermem.Addr, src uint8) (int, error) { +func CopyUint8Out(cc marshal.CopyContext, addr hostarch.Addr, src uint8) (int, error) { srcP := Uint8(src) return srcP.CopyOut(cc, addr) } @@ -171,7 +172,7 @@ func CopyUint8Out(cc marshal.CopyContext, addr usermem.Addr, src uint8) (int, er // CopyInt16In is a convenient wrapper for copying in an int16 from the task's // memory. -func CopyInt16In(cc marshal.CopyContext, addr usermem.Addr, dst *int16) (int, error) { +func CopyInt16In(cc marshal.CopyContext, addr hostarch.Addr, dst *int16) (int, error) { var buf Int16 n, err := buf.CopyIn(cc, addr) if err != nil { @@ -183,14 +184,14 @@ func CopyInt16In(cc marshal.CopyContext, addr usermem.Addr, dst *int16) (int, er // CopyInt16Out is a convenient wrapper for copying out an int16 to the task's // memory. -func CopyInt16Out(cc marshal.CopyContext, addr usermem.Addr, src int16) (int, error) { +func CopyInt16Out(cc marshal.CopyContext, addr hostarch.Addr, src int16) (int, error) { srcP := Int16(src) return srcP.CopyOut(cc, addr) } // CopyUint16In is a convenient wrapper for copying in a uint16 from the task's // memory. -func CopyUint16In(cc marshal.CopyContext, addr usermem.Addr, dst *uint16) (int, error) { +func CopyUint16In(cc marshal.CopyContext, addr hostarch.Addr, dst *uint16) (int, error) { var buf Uint16 n, err := buf.CopyIn(cc, addr) if err != nil { @@ -202,7 +203,7 @@ func CopyUint16In(cc marshal.CopyContext, addr usermem.Addr, dst *uint16) (int, // CopyUint16Out is a convenient wrapper for copying out a uint16 to the task's // memory. -func CopyUint16Out(cc marshal.CopyContext, addr usermem.Addr, src uint16) (int, error) { +func CopyUint16Out(cc marshal.CopyContext, addr hostarch.Addr, src uint16) (int, error) { srcP := Uint16(src) return srcP.CopyOut(cc, addr) } @@ -211,7 +212,7 @@ func CopyUint16Out(cc marshal.CopyContext, addr usermem.Addr, src uint16) (int, // CopyInt32In is a convenient wrapper for copying in an int32 from the task's // memory. -func CopyInt32In(cc marshal.CopyContext, addr usermem.Addr, dst *int32) (int, error) { +func CopyInt32In(cc marshal.CopyContext, addr hostarch.Addr, dst *int32) (int, error) { var buf Int32 n, err := buf.CopyIn(cc, addr) if err != nil { @@ -223,14 +224,14 @@ func CopyInt32In(cc marshal.CopyContext, addr usermem.Addr, dst *int32) (int, er // CopyInt32Out is a convenient wrapper for copying out an int32 to the task's // memory. -func CopyInt32Out(cc marshal.CopyContext, addr usermem.Addr, src int32) (int, error) { +func CopyInt32Out(cc marshal.CopyContext, addr hostarch.Addr, src int32) (int, error) { srcP := Int32(src) return srcP.CopyOut(cc, addr) } // CopyUint32In is a convenient wrapper for copying in a uint32 from the task's // memory. -func CopyUint32In(cc marshal.CopyContext, addr usermem.Addr, dst *uint32) (int, error) { +func CopyUint32In(cc marshal.CopyContext, addr hostarch.Addr, dst *uint32) (int, error) { var buf Uint32 n, err := buf.CopyIn(cc, addr) if err != nil { @@ -242,7 +243,7 @@ func CopyUint32In(cc marshal.CopyContext, addr usermem.Addr, dst *uint32) (int, // CopyUint32Out is a convenient wrapper for copying out a uint32 to the task's // memory. -func CopyUint32Out(cc marshal.CopyContext, addr usermem.Addr, src uint32) (int, error) { +func CopyUint32Out(cc marshal.CopyContext, addr hostarch.Addr, src uint32) (int, error) { srcP := Uint32(src) return srcP.CopyOut(cc, addr) } @@ -251,7 +252,7 @@ func CopyUint32Out(cc marshal.CopyContext, addr usermem.Addr, src uint32) (int, // CopyInt64In is a convenient wrapper for copying in an int64 from the task's // memory. -func CopyInt64In(cc marshal.CopyContext, addr usermem.Addr, dst *int64) (int, error) { +func CopyInt64In(cc marshal.CopyContext, addr hostarch.Addr, dst *int64) (int, error) { var buf Int64 n, err := buf.CopyIn(cc, addr) if err != nil { @@ -263,14 +264,14 @@ func CopyInt64In(cc marshal.CopyContext, addr usermem.Addr, dst *int64) (int, er // CopyInt64Out is a convenient wrapper for copying out an int64 to the task's // memory. -func CopyInt64Out(cc marshal.CopyContext, addr usermem.Addr, src int64) (int, error) { +func CopyInt64Out(cc marshal.CopyContext, addr hostarch.Addr, src int64) (int, error) { srcP := Int64(src) return srcP.CopyOut(cc, addr) } // CopyUint64In is a convenient wrapper for copying in a uint64 from the task's // memory. -func CopyUint64In(cc marshal.CopyContext, addr usermem.Addr, dst *uint64) (int, error) { +func CopyUint64In(cc marshal.CopyContext, addr hostarch.Addr, dst *uint64) (int, error) { var buf Uint64 n, err := buf.CopyIn(cc, addr) if err != nil { @@ -282,14 +283,14 @@ func CopyUint64In(cc marshal.CopyContext, addr usermem.Addr, dst *uint64) (int, // CopyUint64Out is a convenient wrapper for copying out a uint64 to the task's // memory. -func CopyUint64Out(cc marshal.CopyContext, addr usermem.Addr, src uint64) (int, error) { +func CopyUint64Out(cc marshal.CopyContext, addr hostarch.Addr, src uint64) (int, error) { srcP := Uint64(src) return srcP.CopyOut(cc, addr) } // CopyByteSliceIn is a convenient wrapper for copying in a []byte from the // task's memory. -func CopyByteSliceIn(cc marshal.CopyContext, addr usermem.Addr, dst *[]byte) (int, error) { +func CopyByteSliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst *[]byte) (int, error) { var buf ByteSlice n, err := buf.CopyIn(cc, addr) if err != nil { @@ -301,14 +302,14 @@ func CopyByteSliceIn(cc marshal.CopyContext, addr usermem.Addr, dst *[]byte) (in // CopyByteSliceOut is a convenient wrapper for copying out a []byte to the // task's memory. -func CopyByteSliceOut(cc marshal.CopyContext, addr usermem.Addr, src []byte) (int, error) { +func CopyByteSliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []byte) (int, error) { srcP := ByteSlice(src) return srcP.CopyOut(cc, addr) } // CopyStringIn is a convenient wrapper for copying in a string from the // task's memory. -func CopyStringIn(cc marshal.CopyContext, addr usermem.Addr, dst *string) (int, error) { +func CopyStringIn(cc marshal.CopyContext, addr hostarch.Addr, dst *string) (int, error) { var buf ByteSlice n, err := buf.CopyIn(cc, addr) if err != nil { @@ -320,12 +321,12 @@ func CopyStringIn(cc marshal.CopyContext, addr usermem.Addr, dst *string) (int, // CopyStringOut is a convenient wrapper for copying out a string to the task's // memory. -func CopyStringOut(cc marshal.CopyContext, addr usermem.Addr, src string) (int, error) { +func CopyStringOut(cc marshal.CopyContext, addr hostarch.Addr, src string) (int, error) { srcP := ByteSlice(src) return srcP.CopyOut(cc, addr) } -// IOCopyContext wraps an object implementing usermem.IO to implement +// IOCopyContext wraps an object implementing hostarch.IO to implement // marshal.CopyContext. type IOCopyContext struct { Ctx context.Context @@ -339,11 +340,11 @@ func (i *IOCopyContext) CopyScratchBuffer(size int) []byte { } // CopyOutBytes implements marshal.CopyContext.CopyOutBytes. -func (i *IOCopyContext) CopyOutBytes(addr usermem.Addr, b []byte) (int, error) { +func (i *IOCopyContext) CopyOutBytes(addr hostarch.Addr, b []byte) (int, error) { return i.IO.CopyOut(i.Ctx, addr, b, i.Opts) } // CopyInBytes implements marshal.CopyContext.CopyInBytes. -func (i *IOCopyContext) CopyInBytes(addr usermem.Addr, b []byte) (int, error) { +func (i *IOCopyContext) CopyInBytes(addr hostarch.Addr, b []byte) (int, error) { return i.IO.CopyIn(i.Ctx, addr, b, i.Opts) } diff --git a/pkg/merkletree/BUILD b/pkg/merkletree/BUILD index 501a9ef21..dcd6c3bf5 100644 --- a/pkg/merkletree/BUILD +++ b/pkg/merkletree/BUILD @@ -8,7 +8,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", - "//pkg/usermem", + "//pkg/hostarch", ], ) @@ -18,6 +18,6 @@ go_test( library = ":merkletree", deps = [ "//pkg/abi/linux", - "//pkg/usermem", + "//pkg/hostarch", ], ) diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go index d7209ace3..6450f664c 100644 --- a/pkg/merkletree/merkletree.go +++ b/pkg/merkletree/merkletree.go @@ -24,7 +24,8 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) const ( @@ -65,7 +66,7 @@ type Layout struct { // of a tree. dataSize specifies the size of input data in bytes. func InitLayout(dataSize int64, hashAlgorithms int, dataAndTreeInSameFile bool) (Layout, error) { layout := Layout{ - blockSize: usermem.PageSize, + blockSize: hostarch.PageSize, } // TODO(b/156980949): Allow config SHA384. @@ -237,6 +238,7 @@ func Generate(params *GenerateParams) ([]byte, error) { Mode: params.Mode, UID: params.UID, GID: params.GID, + Children: params.Children, SymlinkTarget: params.SymlinkTarget, } diff --git a/pkg/merkletree/merkletree_test.go b/pkg/merkletree/merkletree_test.go index ed332b3f1..5d6f8df1b 100644 --- a/pkg/merkletree/merkletree_test.go +++ b/pkg/merkletree/merkletree_test.go @@ -24,7 +24,8 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) func TestLayout(t *testing.T) { @@ -58,7 +59,7 @@ func TestLayout(t *testing.T) { hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: true, expectedDigestSize: 32, - expectedLevelOffset: []int64{usermem.PageSize}, + expectedLevelOffset: []int64{hostarch.PageSize}, }, { name: "SmallSizeSHA512SameFile", @@ -66,7 +67,7 @@ func TestLayout(t *testing.T) { hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: true, expectedDigestSize: 64, - expectedLevelOffset: []int64{usermem.PageSize}, + expectedLevelOffset: []int64{hostarch.PageSize}, }, { name: "MiddleSizeSHA256SeparateFile", @@ -74,7 +75,7 @@ func TestLayout(t *testing.T) { hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: false, expectedDigestSize: 32, - expectedLevelOffset: []int64{0, 2 * usermem.PageSize, 3 * usermem.PageSize}, + expectedLevelOffset: []int64{0, 2 * hostarch.PageSize, 3 * hostarch.PageSize}, }, { name: "MiddleSizeSHA512SeparateFile", @@ -82,7 +83,7 @@ func TestLayout(t *testing.T) { hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: false, expectedDigestSize: 64, - expectedLevelOffset: []int64{0, 4 * usermem.PageSize, 5 * usermem.PageSize}, + expectedLevelOffset: []int64{0, 4 * hostarch.PageSize, 5 * hostarch.PageSize}, }, { name: "MiddleSizeSHA256SameFile", @@ -90,7 +91,7 @@ func TestLayout(t *testing.T) { hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: true, expectedDigestSize: 32, - expectedLevelOffset: []int64{245 * usermem.PageSize, 247 * usermem.PageSize, 248 * usermem.PageSize}, + expectedLevelOffset: []int64{245 * hostarch.PageSize, 247 * hostarch.PageSize, 248 * hostarch.PageSize}, }, { name: "MiddleSizeSHA512SameFile", @@ -98,39 +99,39 @@ func TestLayout(t *testing.T) { hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: true, expectedDigestSize: 64, - expectedLevelOffset: []int64{245 * usermem.PageSize, 249 * usermem.PageSize, 250 * usermem.PageSize}, + expectedLevelOffset: []int64{245 * hostarch.PageSize, 249 * hostarch.PageSize, 250 * hostarch.PageSize}, }, { name: "LargeSizeSHA256SeparateFile", - dataSize: 4096 * int64(usermem.PageSize), + dataSize: 4096 * int64(hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: false, expectedDigestSize: 32, - expectedLevelOffset: []int64{0, 32 * usermem.PageSize, 33 * usermem.PageSize}, + expectedLevelOffset: []int64{0, 32 * hostarch.PageSize, 33 * hostarch.PageSize}, }, { name: "LargeSizeSHA512SeparateFile", - dataSize: 4096 * int64(usermem.PageSize), + dataSize: 4096 * int64(hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: false, expectedDigestSize: 64, - expectedLevelOffset: []int64{0, 64 * usermem.PageSize, 65 * usermem.PageSize}, + expectedLevelOffset: []int64{0, 64 * hostarch.PageSize, 65 * hostarch.PageSize}, }, { name: "LargeSizeSHA256SameFile", - dataSize: 4096 * int64(usermem.PageSize), + dataSize: 4096 * int64(hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: true, expectedDigestSize: 32, - expectedLevelOffset: []int64{4096 * usermem.PageSize, 4128 * usermem.PageSize, 4129 * usermem.PageSize}, + expectedLevelOffset: []int64{4096 * hostarch.PageSize, 4128 * hostarch.PageSize, 4129 * hostarch.PageSize}, }, { name: "LargeSizeSHA512SameFile", - dataSize: 4096 * int64(usermem.PageSize), + dataSize: 4096 * int64(hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: true, expectedDigestSize: 64, - expectedLevelOffset: []int64{4096 * usermem.PageSize, 4160 * usermem.PageSize, 4161 * usermem.PageSize}, + expectedLevelOffset: []int64{4096 * hostarch.PageSize, 4160 * hostarch.PageSize, 4161 * hostarch.PageSize}, }, } @@ -140,8 +141,8 @@ func TestLayout(t *testing.T) { if err != nil { t.Fatalf("Failed to InitLayout: %v", err) } - if l.blockSize != int64(usermem.PageSize) { - t.Errorf("Got blockSize %d, want %d", l.blockSize, usermem.PageSize) + if l.blockSize != int64(hostarch.PageSize) { + t.Errorf("Got blockSize %d, want %d", l.blockSize, hostarch.PageSize) } if l.digestSize != tc.expectedDigestSize { t.Errorf("Got digestSize %d, want %d", l.digestSize, sha256DigestSize) @@ -202,56 +203,56 @@ func TestGenerate(t *testing.T) { }{ { name: "OnePageZeroesSHA256SeparateFile", - data: bytes.Repeat([]byte{0}, usermem.PageSize), + data: bytes.Repeat([]byte{0}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: false, expectedHash: []byte{9, 115, 238, 230, 38, 140, 195, 70, 207, 144, 202, 118, 23, 113, 32, 129, 226, 239, 177, 69, 161, 26, 14, 113, 16, 37, 30, 96, 19, 148, 132, 27}, }, { name: "OnePageZeroesSHA256SameFile", - data: bytes.Repeat([]byte{0}, usermem.PageSize), + data: bytes.Repeat([]byte{0}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: true, expectedHash: []byte{9, 115, 238, 230, 38, 140, 195, 70, 207, 144, 202, 118, 23, 113, 32, 129, 226, 239, 177, 69, 161, 26, 14, 113, 16, 37, 30, 96, 19, 148, 132, 27}, }, { name: "OnePageZeroesSHA512SeparateFile", - data: bytes.Repeat([]byte{0}, usermem.PageSize), + data: bytes.Repeat([]byte{0}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: false, expectedHash: []byte{127, 8, 95, 11, 83, 101, 51, 39, 170, 235, 39, 43, 135, 243, 145, 118, 148, 58, 27, 155, 182, 205, 44, 47, 5, 223, 215, 17, 35, 16, 43, 104, 43, 11, 8, 88, 171, 7, 249, 243, 14, 62, 126, 218, 23, 159, 237, 237, 42, 226, 39, 25, 87, 48, 253, 191, 116, 213, 37, 3, 187, 152, 154, 14}, }, { name: "OnePageZeroesSHA512SameFile", - data: bytes.Repeat([]byte{0}, usermem.PageSize), + data: bytes.Repeat([]byte{0}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: true, expectedHash: []byte{127, 8, 95, 11, 83, 101, 51, 39, 170, 235, 39, 43, 135, 243, 145, 118, 148, 58, 27, 155, 182, 205, 44, 47, 5, 223, 215, 17, 35, 16, 43, 104, 43, 11, 8, 88, 171, 7, 249, 243, 14, 62, 126, 218, 23, 159, 237, 237, 42, 226, 39, 25, 87, 48, 253, 191, 116, 213, 37, 3, 187, 152, 154, 14}, }, { name: "MultiplePageZeroesSHA256SeparateFile", - data: bytes.Repeat([]byte{0}, 128*usermem.PageSize+1), + data: bytes.Repeat([]byte{0}, 128*hostarch.PageSize+1), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: false, expectedHash: []byte{247, 158, 42, 215, 180, 106, 0, 28, 77, 64, 132, 162, 74, 65, 250, 161, 243, 66, 129, 44, 197, 8, 145, 14, 94, 206, 156, 184, 145, 145, 20, 185}, }, { name: "MultiplePageZeroesSHA256SameFile", - data: bytes.Repeat([]byte{0}, 128*usermem.PageSize+1), + data: bytes.Repeat([]byte{0}, 128*hostarch.PageSize+1), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: true, expectedHash: []byte{247, 158, 42, 215, 180, 106, 0, 28, 77, 64, 132, 162, 74, 65, 250, 161, 243, 66, 129, 44, 197, 8, 145, 14, 94, 206, 156, 184, 145, 145, 20, 185}, }, { name: "MultiplePageZeroesSHA512SeparateFile", - data: bytes.Repeat([]byte{0}, 128*usermem.PageSize+1), + data: bytes.Repeat([]byte{0}, 128*hostarch.PageSize+1), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: false, expectedHash: []byte{100, 121, 14, 30, 104, 200, 142, 182, 190, 78, 23, 68, 157, 174, 23, 75, 174, 250, 250, 25, 66, 45, 235, 103, 129, 49, 78, 127, 173, 154, 121, 35, 37, 115, 60, 217, 26, 205, 253, 253, 236, 145, 107, 109, 232, 19, 72, 92, 4, 191, 181, 205, 191, 57, 234, 177, 144, 235, 143, 30, 15, 197, 109, 81}, }, { name: "MultiplePageZeroesSHA512SameFile", - data: bytes.Repeat([]byte{0}, 128*usermem.PageSize+1), + data: bytes.Repeat([]byte{0}, 128*hostarch.PageSize+1), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: true, expectedHash: []byte{100, 121, 14, 30, 104, 200, 142, 182, 190, 78, 23, 68, 157, 174, 23, 75, 174, 250, 250, 25, 66, 45, 235, 103, 129, 49, 78, 127, 173, 154, 121, 35, 37, 115, 60, 217, 26, 205, 253, 253, 236, 145, 107, 109, 232, 19, 72, 92, 4, 191, 181, 205, 191, 57, 234, 177, 144, 235, 143, 30, 15, 197, 109, 81}, @@ -286,28 +287,28 @@ func TestGenerate(t *testing.T) { }, { name: "OnePageASHA256SeparateFile", - data: bytes.Repeat([]byte{'a'}, usermem.PageSize), + data: bytes.Repeat([]byte{'a'}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: false, expectedHash: []byte{132, 54, 112, 142, 156, 19, 50, 140, 138, 240, 192, 154, 100, 120, 242, 69, 64, 217, 62, 166, 127, 88, 23, 197, 100, 66, 255, 215, 214, 229, 54, 1}, }, { name: "OnePageASHA256SameFile", - data: bytes.Repeat([]byte{'a'}, usermem.PageSize), + data: bytes.Repeat([]byte{'a'}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: true, expectedHash: []byte{132, 54, 112, 142, 156, 19, 50, 140, 138, 240, 192, 154, 100, 120, 242, 69, 64, 217, 62, 166, 127, 88, 23, 197, 100, 66, 255, 215, 214, 229, 54, 1}, }, { name: "OnePageASHA512SeparateFile", - data: bytes.Repeat([]byte{'a'}, usermem.PageSize), + data: bytes.Repeat([]byte{'a'}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: false, expectedHash: []byte{165, 46, 176, 116, 47, 209, 101, 193, 64, 185, 30, 9, 52, 22, 24, 154, 135, 220, 232, 168, 215, 45, 222, 226, 207, 104, 160, 10, 156, 98, 245, 250, 76, 21, 68, 204, 65, 118, 69, 52, 210, 155, 36, 109, 233, 103, 1, 40, 218, 89, 125, 38, 247, 194, 2, 225, 119, 155, 65, 99, 182, 111, 110, 145}, }, { name: "OnePageASHA512SameFile", - data: bytes.Repeat([]byte{'a'}, usermem.PageSize), + data: bytes.Repeat([]byte{'a'}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: true, expectedHash: []byte{165, 46, 176, 116, 47, 209, 101, 193, 64, 185, 30, 9, 52, 22, 24, 154, 135, 220, 232, 168, 215, 45, 222, 226, 207, 104, 160, 10, 156, 98, 245, 250, 76, 21, 68, 204, 65, 118, 69, 52, 210, 155, 36, 109, 233, 103, 1, 40, 218, 89, 125, 38, 247, 194, 2, 225, 119, 155, 65, 99, 182, 111, 110, 145}, @@ -415,14 +416,14 @@ func TestVerifyInvalidRange(t *testing.T) { // Verify range starts outside data range. { name: "StartOutsideRange", - verifyStart: usermem.PageSize, + verifyStart: hostarch.PageSize, verifySize: 1, }, // Verify range ends outside data range. { name: "EndOutsideRange", verifyStart: 0, - verifySize: 2 * usermem.PageSize, + verifySize: 2 * hostarch.PageSize, }, // Verify range with negative size. { @@ -434,7 +435,7 @@ func TestVerifyInvalidRange(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { var buf bytes.Buffer - _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, false /* dataAndTreeInSameFile */, false /* isSymlink */, tc.verifyStart, tc.verifySize, &buf) + _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, false /* dataAndTreeInSameFile */, false /* isSymlink */, tc.verifyStart, tc.verifySize, &buf) if _, err := Verify(¶ms); errors.Is(err, nil) { t.Errorf("Verification succeeded when expected to fail") } @@ -467,7 +468,7 @@ func TestVerifyUnmodifiedMetadata(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { var buf bytes.Buffer - _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, tc.isSymlink, 0 /* verifyStart */, 0 /* verifySize */, &buf) + _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, tc.isSymlink, 0 /* verifyStart */, 0 /* verifySize */, &buf) if tc.isSymlink { params.SymlinkTarget = defaultSymlinkPath } @@ -495,7 +496,7 @@ func TestVerifyModifiedName(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { var buf bytes.Buffer - _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) + _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) params.Name += "abc" if _, err := Verify(¶ms); errors.Is(err, nil) { t.Errorf("Verification succeeded when expected to fail") @@ -521,7 +522,7 @@ func TestVerifyModifiedSize(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { var buf bytes.Buffer - _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) + _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) params.Size-- if _, err := Verify(¶ms); errors.Is(err, nil) { t.Errorf("Verification succeeded when expected to fail") @@ -547,7 +548,7 @@ func TestVerifyModifiedMode(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { var buf bytes.Buffer - _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) + _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) params.Mode++ if _, err := Verify(¶ms); errors.Is(err, nil) { t.Errorf("Verification succeeded when expected to fail") @@ -573,7 +574,7 @@ func TestVerifyModifiedUID(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { var buf bytes.Buffer - _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) + _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) params.UID++ if _, err := Verify(¶ms); errors.Is(err, nil) { t.Errorf("Verification succeeded when expected to fail") @@ -599,7 +600,7 @@ func TestVerifyModifiedGID(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { var buf bytes.Buffer - _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) + _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) params.GID++ if _, err := Verify(¶ms); errors.Is(err, nil) { t.Errorf("Verification succeeded when expected to fail") @@ -625,7 +626,7 @@ func TestVerifyModifiedChildren(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { var buf bytes.Buffer - _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) + _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) params.Children["abc"] = struct{}{} if _, err := Verify(¶ms); errors.Is(err, nil) { t.Errorf("Verification succeeded when expected to fail") @@ -636,7 +637,7 @@ func TestVerifyModifiedChildren(t *testing.T) { func TestVerifyModifiedSymlink(t *testing.T) { var buf bytes.Buffer - _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, false /* dataAndTreeInSameFile */, true /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) + _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, false /* dataAndTreeInSameFile */, true /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) params.SymlinkTarget = "merkle_modified_test_link" if _, err := Verify(¶ms); err == nil { t.Errorf("Verification succeeded when expected to fail") @@ -652,30 +653,30 @@ func TestModifyOutsideVerifyRange(t *testing.T) { }{ { name: "BeforeRangeSeparateFile", - modifyByte: 4*usermem.PageSize - 1, + modifyByte: 4*hostarch.PageSize - 1, dataAndTreeInSameFile: false, }, { name: "BeforeRangeSameFile", - modifyByte: 4*usermem.PageSize - 1, + modifyByte: 4*hostarch.PageSize - 1, dataAndTreeInSameFile: true, }, { name: "AfterRangeSeparateFile", - modifyByte: 5 * usermem.PageSize, + modifyByte: 5 * hostarch.PageSize, dataAndTreeInSameFile: false, }, { name: "AfterRangeSameFile", - modifyByte: 5 * usermem.PageSize, + modifyByte: 5 * hostarch.PageSize, dataAndTreeInSameFile: true, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - dataSize := int64(8 * usermem.PageSize) - verifyStart := int64(4 * usermem.PageSize) - verifySize := int64(usermem.PageSize) + dataSize := int64(8 * hostarch.PageSize) + verifyStart := int64(4 * hostarch.PageSize) + verifySize := int64(hostarch.PageSize) var buf bytes.Buffer // Modified byte is outside verify range. Verify should succeed. data, params := prepareVerify(t, dataSize, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, verifyStart, verifySize, &buf) @@ -712,16 +713,16 @@ func TestModifyInsideVerifyRange(t *testing.T) { // to fail. { name: "BlockAlignedRangeSeparateFile", - verifyStart: 4 * usermem.PageSize, - verifySize: usermem.PageSize, - modifyByte: 4 * usermem.PageSize, + verifyStart: 4 * hostarch.PageSize, + verifySize: hostarch.PageSize, + modifyByte: 4 * hostarch.PageSize, dataAndTreeInSameFile: false, }, { name: "BlockAlignedRangeSameFile", - verifyStart: 4 * usermem.PageSize, - verifySize: usermem.PageSize, - modifyByte: 4 * usermem.PageSize, + verifyStart: 4 * hostarch.PageSize, + verifySize: hostarch.PageSize, + modifyByte: 4 * hostarch.PageSize, dataAndTreeInSameFile: true, }, // The tests below use a non-block-aligned verify range. @@ -729,48 +730,48 @@ func TestModifyInsideVerifyRange(t *testing.T) { // verify to fail. { name: "ModifyStartSeparateFile", - verifyStart: 4*usermem.PageSize + 123, - verifySize: 2 * usermem.PageSize, - modifyByte: 4*usermem.PageSize + 123, + verifyStart: 4*hostarch.PageSize + 123, + verifySize: 2 * hostarch.PageSize, + modifyByte: 4*hostarch.PageSize + 123, dataAndTreeInSameFile: false, }, { name: "ModifyStartSameFile", - verifyStart: 4*usermem.PageSize + 123, - verifySize: 2 * usermem.PageSize, - modifyByte: 4*usermem.PageSize + 123, + verifyStart: 4*hostarch.PageSize + 123, + verifySize: 2 * hostarch.PageSize, + modifyByte: 4*hostarch.PageSize + 123, dataAndTreeInSameFile: true, }, // Modifying a byte at the end of verify range should cause // verify to fail. { name: "ModifyEndSeparateFile", - verifyStart: 4*usermem.PageSize + 123, - verifySize: 2 * usermem.PageSize, - modifyByte: 6*usermem.PageSize + 123, + verifyStart: 4*hostarch.PageSize + 123, + verifySize: 2 * hostarch.PageSize, + modifyByte: 6*hostarch.PageSize + 123, dataAndTreeInSameFile: false, }, { name: "ModifyEndSameFile", - verifyStart: 4*usermem.PageSize + 123, - verifySize: 2 * usermem.PageSize, - modifyByte: 6*usermem.PageSize + 123, + verifyStart: 4*hostarch.PageSize + 123, + verifySize: 2 * hostarch.PageSize, + modifyByte: 6*hostarch.PageSize + 123, dataAndTreeInSameFile: true, }, // Modifying a byte in the middle verified block should cause // verify to fail. { name: "ModifyMiddleSeparateFile", - verifyStart: 4*usermem.PageSize + 123, - verifySize: 2 * usermem.PageSize, - modifyByte: 5*usermem.PageSize + 123, + verifyStart: 4*hostarch.PageSize + 123, + verifySize: 2 * hostarch.PageSize, + modifyByte: 5*hostarch.PageSize + 123, dataAndTreeInSameFile: false, }, { name: "ModifyMiddleSameFile", - verifyStart: 4*usermem.PageSize + 123, - verifySize: 2 * usermem.PageSize, - modifyByte: 5*usermem.PageSize + 123, + verifyStart: 4*hostarch.PageSize + 123, + verifySize: 2 * hostarch.PageSize, + modifyByte: 5*hostarch.PageSize + 123, dataAndTreeInSameFile: true, }, // Modifying a byte in the first block in the verified range @@ -778,16 +779,16 @@ func TestModifyInsideVerifyRange(t *testing.T) { // out of verify range. { name: "ModifyFirstBlockSeparateFile", - verifyStart: 4*usermem.PageSize + 123, - verifySize: 2 * usermem.PageSize, - modifyByte: 4*usermem.PageSize + 122, + verifyStart: 4*hostarch.PageSize + 123, + verifySize: 2 * hostarch.PageSize, + modifyByte: 4*hostarch.PageSize + 122, dataAndTreeInSameFile: false, }, { name: "ModifyFirstBlockSameFile", - verifyStart: 4*usermem.PageSize + 123, - verifySize: 2 * usermem.PageSize, - modifyByte: 4*usermem.PageSize + 122, + verifyStart: 4*hostarch.PageSize + 123, + verifySize: 2 * hostarch.PageSize, + modifyByte: 4*hostarch.PageSize + 122, dataAndTreeInSameFile: true, }, // Modifying a byte in the last block in the verified range @@ -795,22 +796,22 @@ func TestModifyInsideVerifyRange(t *testing.T) { // out of verify range. { name: "ModifyLastBlockSeparateFile", - verifyStart: 4*usermem.PageSize + 123, - verifySize: 2 * usermem.PageSize, - modifyByte: 6*usermem.PageSize + 124, + verifyStart: 4*hostarch.PageSize + 123, + verifySize: 2 * hostarch.PageSize, + modifyByte: 6*hostarch.PageSize + 124, dataAndTreeInSameFile: false, }, { name: "ModifyLastBlockSameFile", - verifyStart: 4*usermem.PageSize + 123, - verifySize: 2 * usermem.PageSize, - modifyByte: 6*usermem.PageSize + 124, + verifyStart: 4*hostarch.PageSize + 123, + verifySize: 2 * hostarch.PageSize, + modifyByte: 6*hostarch.PageSize + 124, dataAndTreeInSameFile: true, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - dataSize := int64(8 * usermem.PageSize) + dataSize := int64(8 * hostarch.PageSize) var buf bytes.Buffer data, params := prepareVerify(t, dataSize, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, tc.verifyStart, tc.verifySize, &buf) // Flip a bit in data and checks Verify results. @@ -854,7 +855,7 @@ func TestVerifyRandom(t *testing.T) { rand.Seed(time.Now().UnixNano()) // Use a random dataSize. Minimum size 2 so that we can pick a random // portion from it. - dataSize := rand.Int63n(200*usermem.PageSize) + 2 + dataSize := rand.Int63n(200*hostarch.PageSize) + 2 // Pick a random portion of data. start := rand.Int63n(dataSize - 1) diff --git a/pkg/ring0/BUILD b/pkg/ring0/BUILD index 885958456..fda6ba601 100644 --- a/pkg/ring0/BUILD +++ b/pkg/ring0/BUILD @@ -77,10 +77,10 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/cpuid", + "//pkg/hostarch", "//pkg/ring0/pagetables", "//pkg/safecopy", "//pkg/sentry/arch", "//pkg/sentry/arch/fpu", - "//pkg/usermem", ], ) diff --git a/pkg/ring0/defs_amd64.go b/pkg/ring0/defs_amd64.go index ceddf719d..76776c65c 100644 --- a/pkg/ring0/defs_amd64.go +++ b/pkg/ring0/defs_amd64.go @@ -17,7 +17,7 @@ package ring0 import ( - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) var ( @@ -25,7 +25,7 @@ var ( UserspaceSize = uintptr(1) << (VirtualAddressBits() - 1) // MaximumUserAddress is the largest possible user address. - MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1) + MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(hostarch.PageSize-1) // KernelStartAddress is the starting kernel address. KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1) diff --git a/pkg/ring0/defs_arm64.go b/pkg/ring0/defs_arm64.go index c372b02bb..0125690d2 100644 --- a/pkg/ring0/defs_arm64.go +++ b/pkg/ring0/defs_arm64.go @@ -17,7 +17,7 @@ package ring0 import ( - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) var ( @@ -25,7 +25,7 @@ var ( UserspaceSize = uintptr(1) << (VirtualAddressBits()) // MaximumUserAddress is the largest possible user address. - MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1) + MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(hostarch.PageSize-1) // KernelStartAddress is the starting kernel address. KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1) diff --git a/pkg/ring0/gen_offsets/BUILD b/pkg/ring0/gen_offsets/BUILD index f421e1687..9ea8f9a4f 100644 --- a/pkg/ring0/gen_offsets/BUILD +++ b/pkg/ring0/gen_offsets/BUILD @@ -33,9 +33,9 @@ go_binary( ], deps = [ "//pkg/cpuid", + "//pkg/hostarch", "//pkg/ring0/pagetables", "//pkg/sentry/arch", "//pkg/sentry/arch/fpu", - "//pkg/usermem", ], ) diff --git a/pkg/ring0/kernel_amd64.go b/pkg/ring0/kernel_amd64.go index 33c259757..92d2330cb 100644 --- a/pkg/ring0/kernel_amd64.go +++ b/pkg/ring0/kernel_amd64.go @@ -20,7 +20,7 @@ import ( "encoding/binary" "reflect" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // init initializes architecture-specific state. @@ -34,7 +34,7 @@ func (k *Kernel) init(maxCPUs int) { entries = make([]kernelEntry, maxCPUs+padding-1) totalSize := entrySize * uintptr(maxCPUs+padding-1) addr := reflect.ValueOf(&entries[0]).Pointer() - if addr&(usermem.PageSize-1) == 0 && totalSize >= usermem.PageSize { + if addr&(hostarch.PageSize-1) == 0 && totalSize >= hostarch.PageSize { // The runtime forces power-of-2 alignment for allocations, and we are therefore // safe once the first address is aligned and the chunk is at least a full page. break @@ -44,10 +44,10 @@ func (k *Kernel) init(maxCPUs int) { k.cpuEntries = entries k.globalIDT = &idt64{} - if reflect.TypeOf(idt64{}).Size() != usermem.PageSize { + if reflect.TypeOf(idt64{}).Size() != hostarch.PageSize { panic("Size of globalIDT should be PageSize") } - if reflect.ValueOf(k.globalIDT).Pointer()&(usermem.PageSize-1) != 0 { + if reflect.ValueOf(k.globalIDT).Pointer()&(hostarch.PageSize-1) != 0 { panic("Allocated globalIDT should be page aligned") } @@ -71,13 +71,13 @@ func (k *Kernel) EntryRegions() map[uintptr]uintptr { addr := reflect.ValueOf(&k.cpuEntries[0]).Pointer() size := reflect.TypeOf(kernelEntry{}).Size() * uintptr(len(k.cpuEntries)) - end, _ := usermem.Addr(addr + size).RoundUp() - regions[uintptr(usermem.Addr(addr).RoundDown())] = uintptr(end) + end, _ := hostarch.Addr(addr + size).RoundUp() + regions[uintptr(hostarch.Addr(addr).RoundDown())] = uintptr(end) addr = reflect.ValueOf(k.globalIDT).Pointer() size = reflect.TypeOf(idt64{}).Size() - end, _ = usermem.Addr(addr + size).RoundUp() - regions[uintptr(usermem.Addr(addr).RoundDown())] = uintptr(end) + end, _ = hostarch.Addr(addr + size).RoundUp() + regions[uintptr(hostarch.Addr(addr).RoundDown())] = uintptr(end) return regions } diff --git a/pkg/ring0/lib_arm64.go b/pkg/ring0/lib_arm64.go index f1c323bac..5eabd4296 100644 --- a/pkg/ring0/lib_arm64.go +++ b/pkg/ring0/lib_arm64.go @@ -65,9 +65,10 @@ func LoadFloatingPoint(*byte) // SaveFloatingPoint saves floating point state. func SaveFloatingPoint(*byte) +// FPSIMDDisableTrap disables fpsimd. func FPSIMDDisableTrap() -// DisableVFP disables fpsimd. +// FPSIMDEnableTrap enables fpsimd. func FPSIMDEnableTrap() // Init sets function pointers based on architectural features. diff --git a/pkg/ring0/pagetables/BUILD b/pkg/ring0/pagetables/BUILD index 65a978cbb..f8f160cc6 100644 --- a/pkg/ring0/pagetables/BUILD +++ b/pkg/ring0/pagetables/BUILD @@ -68,8 +68,8 @@ go_library( "//pkg/sentry/platform/kvm:__subpackages__", ], deps = [ + "//pkg/hostarch", "//pkg/sync", - "//pkg/usermem", ], ) @@ -84,5 +84,8 @@ go_test( ":walker_check_arm64", ], library = ":pagetables", - deps = ["//pkg/usermem"], + deps = [ + "//pkg/hostarch", + "//pkg/usermem", + ], ) diff --git a/pkg/ring0/pagetables/allocator_unsafe.go b/pkg/ring0/pagetables/allocator_unsafe.go index d08bfdeb3..191d0942b 100644 --- a/pkg/ring0/pagetables/allocator_unsafe.go +++ b/pkg/ring0/pagetables/allocator_unsafe.go @@ -17,23 +17,23 @@ package pagetables import ( "unsafe" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // newAlignedPTEs returns a set of aligned PTEs. func newAlignedPTEs() *PTEs { ptes := new(PTEs) - offset := physicalFor(ptes) & (usermem.PageSize - 1) + offset := physicalFor(ptes) & (hostarch.PageSize - 1) if offset == 0 { // Already aligned. return ptes } // Need to force an aligned allocation. - unaligned := make([]byte, (2*usermem.PageSize)-1) - offset = uintptr(unsafe.Pointer(&unaligned[0])) & (usermem.PageSize - 1) + unaligned := make([]byte, (2*hostarch.PageSize)-1) + offset = uintptr(unsafe.Pointer(&unaligned[0])) & (hostarch.PageSize - 1) if offset != 0 { - offset = usermem.PageSize - offset + offset = hostarch.PageSize - offset } return (*PTEs)(unsafe.Pointer(&unaligned[offset])) } diff --git a/pkg/ring0/pagetables/pagetables.go b/pkg/ring0/pagetables/pagetables.go index 8c0a6aa82..3f17fba49 100644 --- a/pkg/ring0/pagetables/pagetables.go +++ b/pkg/ring0/pagetables/pagetables.go @@ -21,7 +21,7 @@ package pagetables import ( - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // PageTables is a set of page tables. @@ -142,7 +142,7 @@ func (*mapVisitor) requiresSplit() bool { return true } // // +checkescape:hard,stack //go:nosplit -func (p *PageTables) Map(addr usermem.Addr, length uintptr, opts MapOpts, physical uintptr) bool { +func (p *PageTables) Map(addr hostarch.Addr, length uintptr, opts MapOpts, physical uintptr) bool { if p.readOnlyShared { panic("Should not modify read-only shared pagetables.") } @@ -198,7 +198,7 @@ func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) bool { // // +checkescape:hard,stack //go:nosplit -func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool { +func (p *PageTables) Unmap(addr hostarch.Addr, length uintptr) bool { if p.readOnlyShared { panic("Should not modify read-only shared pagetables.") } @@ -249,7 +249,7 @@ func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) bool { // // +checkescape:hard,stack //go:nosplit -func (p *PageTables) IsEmpty(addr usermem.Addr, length uintptr) bool { +func (p *PageTables) IsEmpty(addr hostarch.Addr, length uintptr) bool { w := emptyWalker{ pageTables: p, } @@ -298,9 +298,9 @@ func (*lookupVisitor) requiresSplit() bool { return false } // // +checkescape:hard,stack //go:nosplit -func (p *PageTables) Lookup(addr usermem.Addr, findFirst bool) (virtual usermem.Addr, physical, size uintptr, opts MapOpts) { - mask := uintptr(usermem.PageSize - 1) - addr &^= usermem.Addr(mask) +func (p *PageTables) Lookup(addr hostarch.Addr, findFirst bool) (virtual hostarch.Addr, physical, size uintptr, opts MapOpts) { + mask := uintptr(hostarch.PageSize - 1) + addr &^= hostarch.Addr(mask) w := lookupWalker{ pageTables: p, visitor: lookupVisitor{ @@ -308,12 +308,12 @@ func (p *PageTables) Lookup(addr usermem.Addr, findFirst bool) (virtual usermem. findFirst: findFirst, }, } - end := ^usermem.Addr(0) &^ usermem.Addr(mask) + end := ^hostarch.Addr(0) &^ hostarch.Addr(mask) if !findFirst { end = addr + 1 } w.iterateRange(uintptr(addr), uintptr(end)) - return usermem.Addr(w.visitor.target), w.visitor.physical, w.visitor.size, w.visitor.opts + return hostarch.Addr(w.visitor.target), w.visitor.physical, w.visitor.size, w.visitor.opts } // MarkReadOnlyShared marks the pagetables read-only and can be shared. diff --git a/pkg/ring0/pagetables/pagetables_aarch64.go b/pkg/ring0/pagetables/pagetables_aarch64.go index 163a3aea3..86eb00a4f 100644 --- a/pkg/ring0/pagetables/pagetables_aarch64.go +++ b/pkg/ring0/pagetables/pagetables_aarch64.go @@ -19,7 +19,7 @@ package pagetables import ( "sync/atomic" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // archPageTables is architecture-specific data. @@ -85,7 +85,7 @@ const ( // MapOpts are x86 options. type MapOpts struct { // AccessType defines permissions. - AccessType usermem.AccessType + AccessType hostarch.AccessType // Global indicates the page is globally accessible. Global bool @@ -120,7 +120,7 @@ func (p *PTE) Opts() MapOpts { v := atomic.LoadUintptr((*uintptr)(p)) return MapOpts{ - AccessType: usermem.AccessType{ + AccessType: hostarch.AccessType{ Read: true, Write: v&readOnly == 0, Execute: v&xn == 0, diff --git a/pkg/ring0/pagetables/pagetables_amd64_test.go b/pkg/ring0/pagetables/pagetables_amd64_test.go index 54e8e554f..a13c616ae 100644 --- a/pkg/ring0/pagetables/pagetables_amd64_test.go +++ b/pkg/ring0/pagetables/pagetables_amd64_test.go @@ -19,19 +19,19 @@ package pagetables import ( "testing" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) func Test2MAnd4K(t *testing.T) { pt := New(NewRuntimeAllocator()) // Map a small page and a huge page. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) - pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*47) + pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*42) + pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: hostarch.Read}, pmdSize*47) checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, - {0x00007f0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: usermem.Read}}, + {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.ReadWrite}}, + {0x00007f0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: hostarch.Read}}, }) } @@ -39,12 +39,12 @@ func Test1GAnd4K(t *testing.T) { pt := New(NewRuntimeAllocator()) // Map a small page and a super page. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) - pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*47) + pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*42) + pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: hostarch.Read}, pudSize*47) checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, - {0x00007f0000000000, pudSize, pudSize * 47, MapOpts{AccessType: usermem.Read}}, + {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.ReadWrite}}, + {0x00007f0000000000, pudSize, pudSize * 47, MapOpts{AccessType: hostarch.Read}}, }) } @@ -52,12 +52,12 @@ func TestSplit1GPage(t *testing.T) { pt := New(NewRuntimeAllocator()) // Map a super page and knock out the middle. - pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*42) - pt.Unmap(usermem.Addr(0x00007f0000000000+pteSize), pudSize-(2*pteSize)) + pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: hostarch.Read}, pudSize*42) + pt.Unmap(hostarch.Addr(0x00007f0000000000+pteSize), pudSize-(2*pteSize)) checkMappings(t, pt, []mapping{ - {0x00007f0000000000, pteSize, pudSize * 42, MapOpts{AccessType: usermem.Read}}, - {0x00007f0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, MapOpts{AccessType: usermem.Read}}, + {0x00007f0000000000, pteSize, pudSize * 42, MapOpts{AccessType: hostarch.Read}}, + {0x00007f0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, MapOpts{AccessType: hostarch.Read}}, }) } @@ -65,11 +65,11 @@ func TestSplit2MPage(t *testing.T) { pt := New(NewRuntimeAllocator()) // Map a huge page and knock out the middle. - pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*42) - pt.Unmap(usermem.Addr(0x00007f0000000000+pteSize), pmdSize-(2*pteSize)) + pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: hostarch.Read}, pmdSize*42) + pt.Unmap(hostarch.Addr(0x00007f0000000000+pteSize), pmdSize-(2*pteSize)) checkMappings(t, pt, []mapping{ - {0x00007f0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: usermem.Read}}, - {0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: usermem.Read}}, + {0x00007f0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: hostarch.Read}}, + {0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: hostarch.Read}}, }) } diff --git a/pkg/ring0/pagetables/pagetables_arm64_test.go b/pkg/ring0/pagetables/pagetables_arm64_test.go index 2f73d424f..69320c2fb 100644 --- a/pkg/ring0/pagetables/pagetables_arm64_test.go +++ b/pkg/ring0/pagetables/pagetables_arm64_test.go @@ -58,7 +58,7 @@ func TestSplit1GPage(t *testing.T) { // Map a super page and knock out the middle. pt.Map(0x0000ff0000000000, pudSize, MapOpts{AccessType: usermem.Read, User: true}, pudSize*42) - pt.Unmap(usermem.Addr(0x0000ff0000000000+pteSize), pudSize-(2*pteSize)) + pt.Unmap(hostarch.Addr(0x0000ff0000000000+pteSize), pudSize-(2*pteSize)) checkMappings(t, pt, []mapping{ {0x0000ff0000000000, pteSize, pudSize * 42, MapOpts{AccessType: usermem.Read, User: true}}, @@ -71,7 +71,7 @@ func TestSplit2MPage(t *testing.T) { // Map a huge page and knock out the middle. pt.Map(0x0000ff0000000000, pmdSize, MapOpts{AccessType: usermem.Read, User: true}, pmdSize*42) - pt.Unmap(usermem.Addr(0x0000ff0000000000+pteSize), pmdSize-(2*pteSize)) + pt.Unmap(hostarch.Addr(0x0000ff0000000000+pteSize), pmdSize-(2*pteSize)) checkMappings(t, pt, []mapping{ {0x0000ff0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: usermem.Read, User: true}}, diff --git a/pkg/ring0/pagetables/pagetables_test.go b/pkg/ring0/pagetables/pagetables_test.go index 772f4fc5e..df93dcb6a 100644 --- a/pkg/ring0/pagetables/pagetables_test.go +++ b/pkg/ring0/pagetables/pagetables_test.go @@ -15,9 +15,8 @@ package pagetables import ( + "gvisor.dev/gvisor/pkg/hostarch" "testing" - - "gvisor.dev/gvisor/pkg/usermem" ) type mapping struct { @@ -90,7 +89,7 @@ func TestUnmap(t *testing.T) { pt := New(NewRuntimeAllocator()) // Map and unmap one entry. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) + pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*42) pt.Unmap(0x400000, pteSize) checkMappings(t, pt, nil) @@ -100,10 +99,10 @@ func TestReadOnly(t *testing.T) { pt := New(NewRuntimeAllocator()) // Map one entry. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42) + pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.Read}, pteSize*42) checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}}, + {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.Read}}, }) } @@ -111,10 +110,10 @@ func TestReadWrite(t *testing.T) { pt := New(NewRuntimeAllocator()) // Map one entry. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) + pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*42) checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, + {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.ReadWrite}}, }) } @@ -122,12 +121,12 @@ func TestSerialEntries(t *testing.T) { pt := New(NewRuntimeAllocator()) // Map two sequential entries. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) - pt.Map(0x401000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*47) + pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*42) + pt.Map(0x401000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*47) checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, - {0x401000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.ReadWrite}}, + {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.ReadWrite}}, + {0x401000, pteSize, pteSize * 47, MapOpts{AccessType: hostarch.ReadWrite}}, }) } @@ -135,11 +134,11 @@ func TestSpanningEntries(t *testing.T) { pt := New(NewRuntimeAllocator()) // Span a pgd with two pages. - pt.Map(0x00007efffffff000, 2*pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42) + pt.Map(0x00007efffffff000, 2*pteSize, MapOpts{AccessType: hostarch.Read}, pteSize*42) checkMappings(t, pt, []mapping{ - {0x00007efffffff000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}}, - {0x00007f0000000000, pteSize, pteSize * 43, MapOpts{AccessType: usermem.Read}}, + {0x00007efffffff000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.Read}}, + {0x00007f0000000000, pteSize, pteSize * 43, MapOpts{AccessType: hostarch.Read}}, }) } @@ -147,11 +146,11 @@ func TestSparseEntries(t *testing.T) { pt := New(NewRuntimeAllocator()) // Map two entries in different pgds. - pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) - pt.Map(0x00007f0000000000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*47) + pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*42) + pt.Map(0x00007f0000000000, pteSize, MapOpts{AccessType: hostarch.Read}, pteSize*47) checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, - {0x00007f0000000000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.Read}}, + {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.ReadWrite}}, + {0x00007f0000000000, pteSize, pteSize * 47, MapOpts{AccessType: hostarch.Read}}, }) } diff --git a/pkg/ring0/pagetables/pagetables_x86.go b/pkg/ring0/pagetables/pagetables_x86.go index 32edd2f0a..e43698173 100644 --- a/pkg/ring0/pagetables/pagetables_x86.go +++ b/pkg/ring0/pagetables/pagetables_x86.go @@ -19,7 +19,7 @@ package pagetables import ( "sync/atomic" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // archPageTables is architecture-specific data. @@ -63,7 +63,7 @@ const ( // MapOpts are x86 options. type MapOpts struct { // AccessType defines permissions. - AccessType usermem.AccessType + AccessType hostarch.AccessType // Global indicates the page is globally accessible. Global bool @@ -97,7 +97,7 @@ func (p *PTE) Valid() bool { func (p *PTE) Opts() MapOpts { v := atomic.LoadUintptr((*uintptr)(p)) return MapOpts{ - AccessType: usermem.AccessType{ + AccessType: hostarch.AccessType{ Read: v&present != 0, Write: v&writable != 0, Execute: v&executeDisable == 0, diff --git a/pkg/safecopy/safecopy_test.go b/pkg/safecopy/safecopy_test.go index d2ce8ff86..611f36253 100644 --- a/pkg/safecopy/safecopy_test.go +++ b/pkg/safecopy/safecopy_test.go @@ -27,7 +27,7 @@ import ( "golang.org/x/sys/unix" ) -// Size of a page in bytes. Cloned from usermem.PageSize to avoid a circular +// Size of a page in bytes. Cloned from hostarch.PageSize to avoid a circular // dependency. const pageSize = 4096 diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD index 201dd072f..169fc0ac3 100644 --- a/pkg/seccomp/BUILD +++ b/pkg/seccomp/BUILD @@ -54,6 +54,6 @@ go_test( deps = [ "//pkg/abi/linux", "//pkg/bpf", - "//pkg/usermem", + "//pkg/hostarch", ], ) diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go index db06d1f1b..68feddf31 100644 --- a/pkg/seccomp/seccomp_test.go +++ b/pkg/seccomp/seccomp_test.go @@ -29,7 +29,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // newVictim makes a victim binary. @@ -57,7 +57,7 @@ func dataAsInput(d *linux.SeccompData) bpf.Input { d.MarshalUnsafe(buf) return bpf.InputBytes{ Data: buf, - Order: usermem.ByteOrder, + Order: hostarch.ByteOrder, } } diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD index f660f1614..c9c52530d 100644 --- a/pkg/sentry/arch/BUILD +++ b/pkg/sentry/arch/BUILD @@ -32,6 +32,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/cpuid", + "//pkg/hostarch", "//pkg/log", "//pkg/marshal", "//pkg/marshal/primitive", diff --git a/pkg/sentry/arch/arch.go b/pkg/sentry/arch/arch.go index 921151137..290863ee6 100644 --- a/pkg/sentry/arch/arch.go +++ b/pkg/sentry/arch/arch.go @@ -22,11 +22,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/usermem" ) // Arch describes an architecture. @@ -188,11 +188,11 @@ type Context interface { // returned layout must be no lower than min, and MaxAddr for the returned // layout must be no higher than max. Repeated calls to NewMmapLayout may // return different layouts. - NewMmapLayout(min, max usermem.Addr, limits *limits.LimitSet) (MmapLayout, error) + NewMmapLayout(min, max hostarch.Addr, limits *limits.LimitSet) (MmapLayout, error) // PIELoadAddress returns a preferred load address for a // position-independent executable within l. - PIELoadAddress(l MmapLayout) usermem.Addr + PIELoadAddress(l MmapLayout) hostarch.Addr // FeatureSet returns the FeatureSet in use in this context. FeatureSet() *cpuid.FeatureSet @@ -257,18 +257,18 @@ const ( // +stateify savable type MmapLayout struct { // MinAddr is the lowest mappable address. - MinAddr usermem.Addr + MinAddr hostarch.Addr // MaxAddr is the highest mappable address. - MaxAddr usermem.Addr + MaxAddr hostarch.Addr // BottomUpBase is the lowest address that may be returned for a // MmapBottomUp mmap. - BottomUpBase usermem.Addr + BottomUpBase hostarch.Addr // TopDownBase is the highest address that may be returned for a // MmapTopDown mmap. - TopDownBase usermem.Addr + TopDownBase hostarch.Addr // DefaultDirection is the direction for most non-fixed mmaps in this // layout. @@ -316,9 +316,9 @@ type SyscallArgument struct { // SyscallArguments represents the set of arguments passed to a syscall. type SyscallArguments [6]SyscallArgument -// Pointer returns the usermem.Addr representation of a pointer argument. -func (a SyscallArgument) Pointer() usermem.Addr { - return usermem.Addr(a.Value) +// Pointer returns the hostarch.Addr representation of a pointer argument. +func (a SyscallArgument) Pointer() hostarch.Addr { + return hostarch.Addr(a.Value) } // Int returns the int32 representation of a 32-bit signed integer argument. diff --git a/pkg/sentry/arch/arch_amd64.go b/pkg/sentry/arch/arch_amd64.go index 2571be60f..d6b4d2357 100644 --- a/pkg/sentry/arch/arch_amd64.go +++ b/pkg/sentry/arch/arch_amd64.go @@ -23,11 +23,11 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/usermem" ) // Host specifies the host architecture. @@ -37,7 +37,7 @@ const Host = AMD64 const ( // maxAddr64 is the maximum userspace address. It is TASK_SIZE in Linux // for a 64-bit process. - maxAddr64 usermem.Addr = (1 << 47) - usermem.PageSize + maxAddr64 hostarch.Addr = (1 << 47) - hostarch.PageSize // maxStackRand64 is the maximum randomization to apply to the stack. // It is defined by arch/x86/mm/mmap.c:stack_maxrandom_size in Linux. @@ -45,7 +45,7 @@ const ( // maxMmapRand64 is the maximum randomization to apply to the mmap // layout. It is defined by arch/x86/mm/mmap.c:arch_mmap_rnd in Linux. - maxMmapRand64 = (1 << 28) * usermem.PageSize + maxMmapRand64 = (1 << 28) * hostarch.PageSize // minGap64 is the minimum gap to leave at the top of the address space // for the stack. It is defined by arch/x86/mm/mmap.c:MIN_GAP in Linux. @@ -56,7 +56,7 @@ const ( // // The Platform {Min,Max}UserAddress() may preclude loading at this // address. See other preferredFoo comments below. - preferredPIELoadAddr usermem.Addr = maxAddr64 / 3 * 2 + preferredPIELoadAddr hostarch.Addr = maxAddr64 / 3 * 2 ) // These constants are selected as heuristics to help make the Platform's @@ -92,13 +92,13 @@ const ( // This is all "preferred" because the layout min/max address may not // allow us to select such a TopDownBase, in which case we have to fall // back to a layout that TSAN may not be happy with. - preferredTopDownAllocMin usermem.Addr = 0x7e8000000000 - preferredAllocationGap = 128 << 30 // 128 GB - preferredTopDownBaseMin = preferredTopDownAllocMin + preferredAllocationGap + preferredTopDownAllocMin hostarch.Addr = 0x7e8000000000 + preferredAllocationGap = 128 << 30 // 128 GB + preferredTopDownBaseMin = preferredTopDownAllocMin + preferredAllocationGap // minMmapRand64 is the smallest we are willing to make the // randomization to stay above preferredTopDownBaseMin. - minMmapRand64 = (1 << 26) * usermem.PageSize + minMmapRand64 = (1 << 26) * hostarch.PageSize ) // context64 represents an AMD64 context. @@ -207,12 +207,12 @@ func (c *context64) FeatureSet() *cpuid.FeatureSet { } // mmapRand returns a random adjustment for randomizing an mmap layout. -func mmapRand(max uint64) usermem.Addr { - return usermem.Addr(rand.Int63n(int64(max))).RoundDown() +func mmapRand(max uint64) hostarch.Addr { + return hostarch.Addr(rand.Int63n(int64(max))).RoundDown() } // NewMmapLayout implements Context.NewMmapLayout consistently with Linux. -func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (MmapLayout, error) { +func (c *context64) NewMmapLayout(min, max hostarch.Addr, r *limits.LimitSet) (MmapLayout, error) { min, ok := min.RoundUp() if !ok { return MmapLayout{}, unix.EINVAL @@ -230,7 +230,7 @@ func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (Mm // MAX_GAP in Linux. maxGap := (max / 6) * 5 - gap := usermem.Addr(stackSize.Cur) + gap := hostarch.Addr(stackSize.Cur) if gap < minGap64 { gap = minGap64 } @@ -243,7 +243,7 @@ func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (Mm } topDownMin := max - gap - maxMmapRand64 - maxRand := usermem.Addr(maxMmapRand64) + maxRand := hostarch.Addr(maxMmapRand64) if topDownMin < preferredTopDownBaseMin { // Try to keep TopDownBase above preferredTopDownBaseMin by // shrinking maxRand. @@ -278,7 +278,7 @@ func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (Mm } // PIELoadAddress implements Context.PIELoadAddress. -func (c *context64) PIELoadAddress(l MmapLayout) usermem.Addr { +func (c *context64) PIELoadAddress(l MmapLayout) hostarch.Addr { base := preferredPIELoadAddr max, ok := base.AddLength(maxMmapRand64) if !ok { @@ -311,7 +311,7 @@ func (c *context64) PtracePeekUser(addr uintptr) (marshal.Marshallable, error) { regs := c.ptraceGetRegs() buf := make([]byte, regs.SizeBytes()) regs.MarshalUnsafe(buf) - return c.Native(uintptr(usermem.ByteOrder.Uint64(buf[addr:]))), nil + return c.Native(uintptr(hostarch.ByteOrder.Uint64(buf[addr:]))), nil } // Note: x86 debug registers are missing. return c.Native(0), nil @@ -326,7 +326,7 @@ func (c *context64) PtracePokeUser(addr, data uintptr) error { regs := c.ptraceGetRegs() buf := make([]byte, regs.SizeBytes()) regs.MarshalUnsafe(buf) - usermem.ByteOrder.PutUint64(buf[addr:], uint64(data)) + hostarch.ByteOrder.PutUint64(buf[addr:], uint64(data)) _, err := c.PtraceSetRegs(bytes.NewBuffer(buf)) return err } diff --git a/pkg/sentry/arch/arch_arm64.go b/pkg/sentry/arch/arch_arm64.go index 14ad9483b..348f238fd 100644 --- a/pkg/sentry/arch/arch_arm64.go +++ b/pkg/sentry/arch/arch_arm64.go @@ -22,11 +22,11 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/usermem" ) // Host specifies the host architecture. @@ -36,7 +36,7 @@ const Host = ARM64 const ( // maxAddr64 is the maximum userspace address. It is TASK_SIZE in Linux // for a 64-bit process. - maxAddr64 usermem.Addr = (1 << 48) + maxAddr64 hostarch.Addr = (1 << 48) // maxStackRand64 is the maximum randomization to apply to the stack. // It is defined by arch/arm64/mm/mmap.c:(STACK_RND_MASK << PAGE_SHIFT) in Linux. @@ -44,7 +44,7 @@ const ( // maxMmapRand64 is the maximum randomization to apply to the mmap // layout. It is defined by arch/arm64/mm/mmap.c:arch_mmap_rnd in Linux. - maxMmapRand64 = (1 << 33) * usermem.PageSize + maxMmapRand64 = (1 << 33) * hostarch.PageSize // minGap64 is the minimum gap to leave at the top of the address space // for the stack. It is defined by arch/arm64/mm/mmap.c:MIN_GAP in Linux. @@ -55,7 +55,7 @@ const ( // // The Platform {Min,Max}UserAddress() may preclude loading at this // address. See other preferredFoo comments below. - preferredPIELoadAddr usermem.Addr = maxAddr64 / 6 * 5 + preferredPIELoadAddr hostarch.Addr = maxAddr64 / 6 * 5 ) var ( @@ -66,13 +66,13 @@ var ( // These constants are selected as heuristics to help make the Platform's // potentially limited address space conform as closely to Linux as possible. const ( - preferredTopDownAllocMin usermem.Addr = 0x7e8000000000 - preferredAllocationGap = 128 << 30 // 128 GB - preferredTopDownBaseMin = preferredTopDownAllocMin + preferredAllocationGap + preferredTopDownAllocMin hostarch.Addr = 0x7e8000000000 + preferredAllocationGap = 128 << 30 // 128 GB + preferredTopDownBaseMin = preferredTopDownAllocMin + preferredAllocationGap // minMmapRand64 is the smallest we are willing to make the // randomization to stay above preferredTopDownBaseMin. - minMmapRand64 = (1 << 18) * usermem.PageSize + minMmapRand64 = (1 << 18) * hostarch.PageSize ) // context64 represents an ARM64 context. @@ -187,12 +187,12 @@ func (c *context64) FeatureSet() *cpuid.FeatureSet { } // mmapRand returns a random adjustment for randomizing an mmap layout. -func mmapRand(max uint64) usermem.Addr { - return usermem.Addr(rand.Int63n(int64(max))).RoundDown() +func mmapRand(max uint64) hostarch.Addr { + return hostarch.Addr(rand.Int63n(int64(max))).RoundDown() } // NewMmapLayout implements Context.NewMmapLayout consistently with Linux. -func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (MmapLayout, error) { +func (c *context64) NewMmapLayout(min, max hostarch.Addr, r *limits.LimitSet) (MmapLayout, error) { min, ok := min.RoundUp() if !ok { return MmapLayout{}, unix.EINVAL @@ -210,7 +210,7 @@ func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (Mm // MAX_GAP in Linux. maxGap := (max / 6) * 5 - gap := usermem.Addr(stackSize.Cur) + gap := hostarch.Addr(stackSize.Cur) if gap < minGap64 { gap = minGap64 } @@ -223,7 +223,7 @@ func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (Mm } topDownMin := max - gap - maxMmapRand64 - maxRand := usermem.Addr(maxMmapRand64) + maxRand := hostarch.Addr(maxMmapRand64) if topDownMin < preferredTopDownBaseMin { // Try to keep TopDownBase above preferredTopDownBaseMin by // shrinking maxRand. @@ -258,7 +258,7 @@ func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (Mm } // PIELoadAddress implements Context.PIELoadAddress. -func (c *context64) PIELoadAddress(l MmapLayout) usermem.Addr { +func (c *context64) PIELoadAddress(l MmapLayout) hostarch.Addr { base := preferredPIELoadAddr max, ok := base.AddLength(maxMmapRand64) if !ok { diff --git a/pkg/sentry/arch/auxv.go b/pkg/sentry/arch/auxv.go index 2b4c8f3fc..19ca18121 100644 --- a/pkg/sentry/arch/auxv.go +++ b/pkg/sentry/arch/auxv.go @@ -15,7 +15,7 @@ package arch import ( - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // An AuxEntry represents an entry in an ELF auxiliary vector. @@ -23,7 +23,7 @@ import ( // +stateify savable type AuxEntry struct { Key uint64 - Value usermem.Addr + Value hostarch.Addr } // An Auxv represents an ELF auxiliary vector. diff --git a/pkg/sentry/arch/fpu/BUILD b/pkg/sentry/arch/fpu/BUILD index 0a5395267..4e4f20639 100644 --- a/pkg/sentry/arch/fpu/BUILD +++ b/pkg/sentry/arch/fpu/BUILD @@ -13,9 +13,9 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/cpuid", + "//pkg/hostarch", "//pkg/sync", "//pkg/syserror", - "//pkg/usermem", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/arch/fpu/fpu_amd64.go b/pkg/sentry/arch/fpu/fpu_amd64.go index 3a62f51be..1e9625bee 100644 --- a/pkg/sentry/arch/fpu/fpu_amd64.go +++ b/pkg/sentry/arch/fpu/fpu_amd64.go @@ -21,9 +21,9 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // initX86FPState (defined in asm files) sets up initial state. @@ -146,11 +146,11 @@ const ( // any of the reserved bits of the MXCSR register." - Intel SDM Vol. 1, Section // 10.5.1.2 "SSE State") func sanitizeMXCSR(f State) { - mxcsr := usermem.ByteOrder.Uint32(f[mxcsrOffset:]) + mxcsr := hostarch.ByteOrder.Uint32(f[mxcsrOffset:]) initMXCSRMask.Do(func() { temp := State(alignedBytes(uint(ptraceFPRegsSize), 16)) initX86FPState(&temp[0], false /* useXsave */) - mxcsrMask = usermem.ByteOrder.Uint32(temp[mxcsrMaskOffset:]) + mxcsrMask = hostarch.ByteOrder.Uint32(temp[mxcsrMaskOffset:]) if mxcsrMask == 0 { // "If the value of the MXCSR_MASK field is 00000000H, then the // MXCSR_MASK value is the default value of 0000FFBFH." - Intel SDM @@ -160,7 +160,7 @@ func sanitizeMXCSR(f State) { } }) mxcsr &= mxcsrMask - usermem.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr) + hostarch.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr) } // PtraceGetXstateRegs implements ptrace(PTRACE_GETREGS, NT_X86_XSTATE) by @@ -177,7 +177,7 @@ func (s *State) PtraceGetXstateRegs(dst io.Writer, maxlen int, featureSet *cpuid // Area". Linux uses the first 8 bytes of this area to store the OS XSTATE // mask. GDB relies on this: see // gdb/x86-linux-nat.c:x86_linux_read_description(). - usermem.ByteOrder.PutUint64(f[userXstateXCR0Offset:], featureSet.ValidXCR0Mask()) + hostarch.ByteOrder.PutUint64(f[userXstateXCR0Offset:], featureSet.ValidXCR0Mask()) if len(f) > maxlen { f = f[:maxlen] } @@ -208,9 +208,9 @@ func (s *State) PtraceSetXstateRegs(src io.Reader, maxlen int, featureSet *cpuid // Force reserved bits in MXCSR to 0. This is consistent with Linux. sanitizeMXCSR(State(f)) // Users can't enable *more* XCR0 bits than what we, and the CPU, support. - xstateBV := usermem.ByteOrder.Uint64(f[xstateBVOffset:]) + xstateBV := hostarch.ByteOrder.Uint64(f[xstateBVOffset:]) xstateBV &= featureSet.ValidXCR0Mask() - usermem.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV) + hostarch.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV) // Force XCOMP_BV and reserved bytes in the XSAVE header to 0. reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes] for i := range reserved { @@ -266,7 +266,7 @@ func (s *State) AfterLoad() { // What was in use? savedBV := fxsaveBV if len(old) >= xstateBVOffset+8 { - savedBV = usermem.ByteOrder.Uint64(old[xstateBVOffset:]) + savedBV = hostarch.ByteOrder.Uint64(old[xstateBVOffset:]) } // Supported features must be a superset of saved features. diff --git a/pkg/sentry/arch/fpu/fpu_arm64.go b/pkg/sentry/arch/fpu/fpu_arm64.go index d2f62631d..46634661f 100644 --- a/pkg/sentry/arch/fpu/fpu_arm64.go +++ b/pkg/sentry/arch/fpu/fpu_arm64.go @@ -58,6 +58,8 @@ func (s *State) Fork() State { } // BytePointer returns a pointer to the first byte of the state. +// +//go:nosplit func (s *State) BytePointer() *byte { return &(*s)[0] } diff --git a/pkg/sentry/arch/signal.go b/pkg/sentry/arch/signal.go index 35d2e07c3..67d7edf68 100644 --- a/pkg/sentry/arch/signal.go +++ b/pkg/sentry/arch/signal.go @@ -16,7 +16,7 @@ package arch import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // SignalAct represents the action that should be taken when a signal is @@ -154,107 +154,107 @@ func (s *SignalInfo) FixSignalCodeForUser() { // PID returns the si_pid field. func (s *SignalInfo) PID() int32 { - return int32(usermem.ByteOrder.Uint32(s.Fields[0:4])) + return int32(hostarch.ByteOrder.Uint32(s.Fields[0:4])) } // SetPID mutates the si_pid field. func (s *SignalInfo) SetPID(val int32) { - usermem.ByteOrder.PutUint32(s.Fields[0:4], uint32(val)) + hostarch.ByteOrder.PutUint32(s.Fields[0:4], uint32(val)) } // UID returns the si_uid field. func (s *SignalInfo) UID() int32 { - return int32(usermem.ByteOrder.Uint32(s.Fields[4:8])) + return int32(hostarch.ByteOrder.Uint32(s.Fields[4:8])) } // SetUID mutates the si_uid field. func (s *SignalInfo) SetUID(val int32) { - usermem.ByteOrder.PutUint32(s.Fields[4:8], uint32(val)) + hostarch.ByteOrder.PutUint32(s.Fields[4:8], uint32(val)) } // Sigval returns the sigval field, which is aliased to both si_int and si_ptr. func (s *SignalInfo) Sigval() uint64 { - return usermem.ByteOrder.Uint64(s.Fields[8:16]) + return hostarch.ByteOrder.Uint64(s.Fields[8:16]) } // SetSigval mutates the sigval field. func (s *SignalInfo) SetSigval(val uint64) { - usermem.ByteOrder.PutUint64(s.Fields[8:16], val) + hostarch.ByteOrder.PutUint64(s.Fields[8:16], val) } // TimerID returns the si_timerid field. func (s *SignalInfo) TimerID() linux.TimerID { - return linux.TimerID(usermem.ByteOrder.Uint32(s.Fields[0:4])) + return linux.TimerID(hostarch.ByteOrder.Uint32(s.Fields[0:4])) } // SetTimerID sets the si_timerid field. func (s *SignalInfo) SetTimerID(val linux.TimerID) { - usermem.ByteOrder.PutUint32(s.Fields[0:4], uint32(val)) + hostarch.ByteOrder.PutUint32(s.Fields[0:4], uint32(val)) } // Overrun returns the si_overrun field. func (s *SignalInfo) Overrun() int32 { - return int32(usermem.ByteOrder.Uint32(s.Fields[4:8])) + return int32(hostarch.ByteOrder.Uint32(s.Fields[4:8])) } // SetOverrun sets the si_overrun field. func (s *SignalInfo) SetOverrun(val int32) { - usermem.ByteOrder.PutUint32(s.Fields[4:8], uint32(val)) + hostarch.ByteOrder.PutUint32(s.Fields[4:8], uint32(val)) } // Addr returns the si_addr field. func (s *SignalInfo) Addr() uint64 { - return usermem.ByteOrder.Uint64(s.Fields[0:8]) + return hostarch.ByteOrder.Uint64(s.Fields[0:8]) } // SetAddr sets the si_addr field. func (s *SignalInfo) SetAddr(val uint64) { - usermem.ByteOrder.PutUint64(s.Fields[0:8], val) + hostarch.ByteOrder.PutUint64(s.Fields[0:8], val) } // Status returns the si_status field. func (s *SignalInfo) Status() int32 { - return int32(usermem.ByteOrder.Uint32(s.Fields[8:12])) + return int32(hostarch.ByteOrder.Uint32(s.Fields[8:12])) } // SetStatus mutates the si_status field. func (s *SignalInfo) SetStatus(val int32) { - usermem.ByteOrder.PutUint32(s.Fields[8:12], uint32(val)) + hostarch.ByteOrder.PutUint32(s.Fields[8:12], uint32(val)) } // CallAddr returns the si_call_addr field. func (s *SignalInfo) CallAddr() uint64 { - return usermem.ByteOrder.Uint64(s.Fields[0:8]) + return hostarch.ByteOrder.Uint64(s.Fields[0:8]) } // SetCallAddr mutates the si_call_addr field. func (s *SignalInfo) SetCallAddr(val uint64) { - usermem.ByteOrder.PutUint64(s.Fields[0:8], val) + hostarch.ByteOrder.PutUint64(s.Fields[0:8], val) } // Syscall returns the si_syscall field. func (s *SignalInfo) Syscall() int32 { - return int32(usermem.ByteOrder.Uint32(s.Fields[8:12])) + return int32(hostarch.ByteOrder.Uint32(s.Fields[8:12])) } // SetSyscall mutates the si_syscall field. func (s *SignalInfo) SetSyscall(val int32) { - usermem.ByteOrder.PutUint32(s.Fields[8:12], uint32(val)) + hostarch.ByteOrder.PutUint32(s.Fields[8:12], uint32(val)) } // Arch returns the si_arch field. func (s *SignalInfo) Arch() uint32 { - return usermem.ByteOrder.Uint32(s.Fields[12:16]) + return hostarch.ByteOrder.Uint32(s.Fields[12:16]) } // SetArch mutates the si_arch field. func (s *SignalInfo) SetArch(val uint32) { - usermem.ByteOrder.PutUint32(s.Fields[12:16], val) + hostarch.ByteOrder.PutUint32(s.Fields[12:16], val) } // Band returns the si_band field. func (s *SignalInfo) Band() int64 { - return int64(usermem.ByteOrder.Uint64(s.Fields[0:8])) + return int64(hostarch.ByteOrder.Uint64(s.Fields[0:8])) } // SetBand mutates the si_band field. @@ -262,15 +262,15 @@ func (s *SignalInfo) SetBand(val int64) { // Note: this assumes the platform uses `long` as `__ARCH_SI_BAND_T`. // On some platforms, which gVisor doesn't support, `__ARCH_SI_BAND_T` is // `int`. See siginfo.h. - usermem.ByteOrder.PutUint64(s.Fields[0:8], uint64(val)) + hostarch.ByteOrder.PutUint64(s.Fields[0:8], uint64(val)) } // FD returns the si_fd field. func (s *SignalInfo) FD() uint32 { - return usermem.ByteOrder.Uint32(s.Fields[8:12]) + return hostarch.ByteOrder.Uint32(s.Fields[8:12]) } // SetFD mutates the si_fd field. func (s *SignalInfo) SetFD(val uint32) { - usermem.ByteOrder.PutUint32(s.Fields[8:12], val) + hostarch.ByteOrder.PutUint32(s.Fields[8:12], val) } diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go index ee3743483..082ed92b1 100644 --- a/pkg/sentry/arch/signal_amd64.go +++ b/pkg/sentry/arch/signal_amd64.go @@ -21,10 +21,10 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch/fpu" - "gvisor.dev/gvisor/pkg/usermem" ) // SignalContext64 is equivalent to struct sigcontext, the type passed as the @@ -133,7 +133,7 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt // space on the user stack naturally caps the amount of memory the // sentry will allocate for this purpose. fpSize, _ := c.fpuFrameSize() - sp = (sp - usermem.Addr(fpSize)) & ^usermem.Addr(63) + sp = (sp - hostarch.Addr(fpSize)) & ^hostarch.Addr(63) // Construct the UContext64 now since we need its size. uc := &UContext64{ @@ -180,8 +180,8 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt ucSize := uc.SizeBytes() // st.Arch.Width() is for the restorer address. sizeof(siginfo) == 128. frameSize := int(st.Arch.Width()) + ucSize + 128 - frameBottom := (sp-usermem.Addr(frameSize)) & ^usermem.Addr(15) - 8 - sp = frameBottom + usermem.Addr(frameSize) + frameBottom := (sp-hostarch.Addr(frameSize)) & ^hostarch.Addr(15) - 8 + sp = frameBottom + hostarch.Addr(frameSize) st.Bottom = sp // Prior to proceeding, figure out if the frame will exhaust the range diff --git a/pkg/sentry/arch/signal_arm64.go b/pkg/sentry/arch/signal_arm64.go index 53281dcba..da71fb873 100644 --- a/pkg/sentry/arch/signal_arm64.go +++ b/pkg/sentry/arch/signal_arm64.go @@ -19,9 +19,9 @@ package arch import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/arch/fpu" - "gvisor.dev/gvisor/pkg/usermem" ) // SignalContext64 is equivalent to struct sigcontext, the type passed as the @@ -107,8 +107,8 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt // sizeof(siginfo) == 128. // R30 stores the restorer address. frameSize := ucSize + 128 - frameBottom := (sp - usermem.Addr(frameSize)) & ^usermem.Addr(15) - sp = frameBottom + usermem.Addr(frameSize) + frameBottom := (sp - hostarch.Addr(frameSize)) & ^hostarch.Addr(15) + sp = frameBottom + hostarch.Addr(frameSize) st.Bottom = sp // Prior to proceeding, figure out if the frame will exhaust the range diff --git a/pkg/sentry/arch/signal_stack.go b/pkg/sentry/arch/signal_stack.go index a1eae98f9..c732c7503 100644 --- a/pkg/sentry/arch/signal_stack.go +++ b/pkg/sentry/arch/signal_stack.go @@ -17,8 +17,8 @@ package arch import ( + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" - "gvisor.dev/gvisor/pkg/usermem" ) const ( @@ -36,8 +36,8 @@ func (s SignalStack) IsEnabled() bool { } // Top returns the stack's top address. -func (s SignalStack) Top() usermem.Addr { - return usermem.Addr(s.Addr + s.Size) +func (s SignalStack) Top() hostarch.Addr { + return hostarch.Addr(s.Addr + s.Size) } // SetOnStack marks this signal stack as in use. @@ -49,8 +49,8 @@ func (s *SignalStack) SetOnStack() { } // Contains checks if the stack pointer is within this stack. -func (s *SignalStack) Contains(sp usermem.Addr) bool { - return usermem.Addr(s.Addr) < sp && sp <= usermem.Addr(s.Addr+s.Size) +func (s *SignalStack) Contains(sp hostarch.Addr) bool { + return hostarch.Addr(s.Addr) < sp && sp <= hostarch.Addr(s.Addr+s.Size) } // NativeSignalStack is a type that is equivalent to stack_t in the guest diff --git a/pkg/sentry/arch/stack.go b/pkg/sentry/arch/stack.go index 5f06c751d..65a794c7c 100644 --- a/pkg/sentry/arch/stack.go +++ b/pkg/sentry/arch/stack.go @@ -16,18 +16,20 @@ package arch import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" + "gvisor.dev/gvisor/pkg/usermem" ) -// Stack is a simple wrapper around a usermem.IO and an address. Stack +// Stack is a simple wrapper around a hostarch.IO and an address. Stack // implements marshal.CopyContext, and marshallable values can be pushed or // popped from the stack through the marshal.Marshallable interface. // // Stack is not thread-safe. type Stack struct { // Our arch info. - // We use this for automatic Native conversion of usermem.Addrs during + // We use this for automatic Native conversion of hostarch.Addrs during // Push() and Pop(). Arch Context @@ -35,7 +37,7 @@ type Stack struct { IO usermem.IO // Our current stack bottom. - Bottom usermem.Addr + Bottom hostarch.Addr // Scratch buffer used for marshalling to avoid having to repeatedly // allocate scratch memory. @@ -59,20 +61,20 @@ func (s *Stack) CopyScratchBuffer(size int) []byte { // StackBottomMagic is the special address callers must past to all stack // marshalling operations to cause the src/dst address to be computed based on // the current end of the stack. -const StackBottomMagic = ^usermem.Addr(0) // usermem.Addr(-1) +const StackBottomMagic = ^hostarch.Addr(0) // hostarch.Addr(-1) // CopyOutBytes implements marshal.CopyContext.CopyOutBytes. CopyOutBytes // computes an appropriate address based on the current end of the // stack. Callers use the sentinel address StackBottomMagic to marshal methods // to indicate this. -func (s *Stack) CopyOutBytes(sentinel usermem.Addr, b []byte) (int, error) { +func (s *Stack) CopyOutBytes(sentinel hostarch.Addr, b []byte) (int, error) { if sentinel != StackBottomMagic { panic("Attempted to copy out to stack with absolute address") } c := len(b) - n, err := s.IO.CopyOut(context.Background(), s.Bottom-usermem.Addr(c), b, usermem.IOOpts{}) + n, err := s.IO.CopyOut(context.Background(), s.Bottom-hostarch.Addr(c), b, usermem.IOOpts{}) if err == nil && n == c { - s.Bottom -= usermem.Addr(n) + s.Bottom -= hostarch.Addr(n) } return n, err } @@ -81,21 +83,21 @@ func (s *Stack) CopyOutBytes(sentinel usermem.Addr, b []byte) (int, error) { // an appropriate address based on the current end of the stack. Callers must // use the sentinel address StackBottomMagic to marshal methods to indicate // this. -func (s *Stack) CopyInBytes(sentinel usermem.Addr, b []byte) (int, error) { +func (s *Stack) CopyInBytes(sentinel hostarch.Addr, b []byte) (int, error) { if sentinel != StackBottomMagic { panic("Attempted to copy in from stack with absolute address") } n, err := s.IO.CopyIn(context.Background(), s.Bottom, b, usermem.IOOpts{}) if err == nil { - s.Bottom += usermem.Addr(n) + s.Bottom += hostarch.Addr(n) } return n, err } // Align aligns the stack to the given offset. func (s *Stack) Align(offset int) { - if s.Bottom%usermem.Addr(offset) != 0 { - s.Bottom -= (s.Bottom % usermem.Addr(offset)) + if s.Bottom%hostarch.Addr(offset) != 0 { + s.Bottom -= (s.Bottom % hostarch.Addr(offset)) } } @@ -119,16 +121,16 @@ func (s *Stack) PushNullTerminatedByteSlice(bs []byte) (int, error) { // stack. type StackLayout struct { // ArgvStart is the beginning of the argument vector. - ArgvStart usermem.Addr + ArgvStart hostarch.Addr // ArgvEnd is the end of the argument vector. - ArgvEnd usermem.Addr + ArgvEnd hostarch.Addr // EnvvStart is the beginning of the environment vector. - EnvvStart usermem.Addr + EnvvStart hostarch.Addr // EnvvEnd is the end of the environment vector. - EnvvEnd usermem.Addr + EnvvEnd hostarch.Addr } // Load pushes the given args, env and aux vector to the stack using the @@ -148,7 +150,7 @@ func (s *Stack) Load(args []string, env []string, aux Auxv) (StackLayout, error) // to be in this order. See: https://www.uclibc.org/docs/psABI-x86_64.pdf // page 29. l.EnvvEnd = s.Bottom - envAddrs := make([]usermem.Addr, len(env)) + envAddrs := make([]hostarch.Addr, len(env)) for i := len(env) - 1; i >= 0; i-- { if _, err := s.PushNullTerminatedByteSlice([]byte(env[i])); err != nil { return StackLayout{}, err @@ -159,7 +161,7 @@ func (s *Stack) Load(args []string, env []string, aux Auxv) (StackLayout, error) // Push our strings. l.ArgvEnd = s.Bottom - argAddrs := make([]usermem.Addr, len(args)) + argAddrs := make([]hostarch.Addr, len(args)) for i := len(args) - 1; i >= 0; i-- { if _, err := s.PushNullTerminatedByteSlice([]byte(args[i])); err != nil { return StackLayout{}, err @@ -178,7 +180,7 @@ func (s *Stack) Load(args []string, env []string, aux Auxv) (StackLayout, error) argvSize := s.Arch.Width() * uint(len(args)+1) envvSize := s.Arch.Width() * uint(len(env)+1) auxvSize := s.Arch.Width() * 2 * uint(len(aux)+1) - total := usermem.Addr(argvSize) + usermem.Addr(envvSize) + usermem.Addr(auxvSize) + usermem.Addr(s.Arch.Width()) + total := hostarch.Addr(argvSize) + hostarch.Addr(envvSize) + hostarch.Addr(auxvSize) + hostarch.Addr(s.Arch.Width()) expectedBottom := s.Bottom - total if expectedBottom%32 != 0 { s.Bottom -= expectedBottom % 32 @@ -188,11 +190,11 @@ func (s *Stack) Load(args []string, env []string, aux Auxv) (StackLayout, error) // NOTE: We need an extra zero here per spec. // The Push function will automatically terminate // strings and arrays with a single null value. - auxv := make([]usermem.Addr, 0, len(aux)) + auxv := make([]hostarch.Addr, 0, len(aux)) for _, a := range aux { - auxv = append(auxv, usermem.Addr(a.Key), a.Value) + auxv = append(auxv, hostarch.Addr(a.Key), a.Value) } - auxv = append(auxv, usermem.Addr(0)) + auxv = append(auxv, hostarch.Addr(0)) _, err := s.pushAddrSliceAndTerminator(auxv) if err != nil { return StackLayout{}, err diff --git a/pkg/sentry/arch/stack_unsafe.go b/pkg/sentry/arch/stack_unsafe.go index 0e478e434..f4712d58f 100644 --- a/pkg/sentry/arch/stack_unsafe.go +++ b/pkg/sentry/arch/stack_unsafe.go @@ -17,19 +17,19 @@ package arch import ( "unsafe" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" - "gvisor.dev/gvisor/pkg/usermem" ) // pushAddrSliceAndTerminator copies a slices of addresses to the stack, and // also pushes an extra null address element at the end of the slice. // // Internally, we unsafely transmute the slice type from the arch-dependent -// []usermem.Addr type, to a slice of fixed-sized ints so that we can pass it to +// []hostarch.Addr type, to a slice of fixed-sized ints so that we can pass it to // go-marshal. // // On error, the contents of the stack and the bottom cursor are undefined. -func (s *Stack) pushAddrSliceAndTerminator(src []usermem.Addr) (int, error) { +func (s *Stack) pushAddrSliceAndTerminator(src []hostarch.Addr) (int, error) { // Note: Stack grows upwards, so push the terminator first. switch s.Arch.Width() { case 8: diff --git a/pkg/sentry/devices/memdev/zero.go b/pkg/sentry/devices/memdev/zero.go index 1929e41cd..49c53452a 100644 --- a/pkg/sentry/devices/memdev/zero.go +++ b/pkg/sentry/devices/memdev/zero.go @@ -93,6 +93,7 @@ func (fd *zeroFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) erro // "/dev/zero (deleted)". opts.Offset = 0 opts.MappingIdentity = &fd.vfsfd + opts.SentryOwnedContent = true opts.MappingIdentity.IncRef() return nil } diff --git a/pkg/sentry/devices/tundev/BUILD b/pkg/sentry/devices/tundev/BUILD index 71c59287c..8b38d574d 100644 --- a/pkg/sentry/devices/tundev/BUILD +++ b/pkg/sentry/devices/tundev/BUILD @@ -9,6 +9,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/sentry/arch", "//pkg/sentry/fsimpl/devtmpfs", "//pkg/sentry/inet", diff --git a/pkg/sentry/devices/tundev/tundev.go b/pkg/sentry/devices/tundev/tundev.go index c43158aa4..a12eeb8e7 100644 --- a/pkg/sentry/devices/tundev/tundev.go +++ b/pkg/sentry/devices/tundev/tundev.go @@ -18,6 +18,7 @@ package tundev import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" "gvisor.dev/gvisor/pkg/sentry/inet" @@ -89,7 +90,7 @@ func (fd *tunFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArg } // Validate flags. - flags, err := netstack.LinuxToTUNFlags(usermem.ByteOrder.Uint16(req.Data[:])) + flags, err := netstack.LinuxToTUNFlags(hostarch.ByteOrder.Uint16(req.Data[:])) if err != nil { return 0, err } @@ -98,7 +99,7 @@ func (fd *tunFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArg case linux.TUNGETIFF: var req linux.IFReq copy(req.IFName[:], fd.device.Name()) - usermem.ByteOrder.PutUint16(req.Data[:], netstack.TUNFlagsToLinux(fd.device.Flags())) + hostarch.ByteOrder.PutUint16(req.Data[:], netstack.TUNFlagsToLinux(fd.device.Flags())) _, err := req.CopyOut(t, data) return 0, err diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD index 420fbae34..0dc100f9b 100644 --- a/pkg/sentry/fs/BUILD +++ b/pkg/sentry/fs/BUILD @@ -48,6 +48,7 @@ go_library( "//pkg/abi/linux", "//pkg/amutex", "//pkg/context", + "//pkg/hostarch", "//pkg/log", "//pkg/p9", "//pkg/refs", diff --git a/pkg/sentry/fs/anon/BUILD b/pkg/sentry/fs/anon/BUILD index aedcecfa1..1ce56d79f 100644 --- a/pkg/sentry/fs/anon/BUILD +++ b/pkg/sentry/fs/anon/BUILD @@ -12,9 +12,9 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", - "//pkg/usermem", ], ) diff --git a/pkg/sentry/fs/anon/anon.go b/pkg/sentry/fs/anon/anon.go index 5c421f5fb..8bda22a8e 100644 --- a/pkg/sentry/fs/anon/anon.go +++ b/pkg/sentry/fs/anon/anon.go @@ -19,9 +19,9 @@ package anon import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/usermem" ) // NewInode constructs an anonymous Inode that is not associated @@ -37,6 +37,6 @@ func NewInode(ctx context.Context) *fs.Inode { Type: fs.Anonymous, DeviceID: PseudoDevice.DeviceID(), InodeID: PseudoDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, }) } diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go index 58deb25fc..5aa668873 100644 --- a/pkg/sentry/fs/copy_up.go +++ b/pkg/sentry/fs/copy_up.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" @@ -339,7 +340,7 @@ func cleanupUpper(ctx context.Context, parent *Inode, name string, copyUpErr err // size is the same used by io.Copy. var copyUpBuffers = sync.Pool{ New: func() interface{} { - b := make([]byte, 8*usermem.PageSize) + b := make([]byte, 8*hostarch.PageSize) return &b }, } diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD index 9379a4d7b..23a3a9a2d 100644 --- a/pkg/sentry/fs/dev/BUILD +++ b/pkg/sentry/fs/dev/BUILD @@ -18,6 +18,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/rand", "//pkg/safemem", "//pkg/sentry/arch", diff --git a/pkg/sentry/fs/dev/dev.go b/pkg/sentry/fs/dev/dev.go index acbd401a0..e84ba7a5d 100644 --- a/pkg/sentry/fs/dev/dev.go +++ b/pkg/sentry/fs/dev/dev.go @@ -19,6 +19,7 @@ import ( "math" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" @@ -49,7 +50,7 @@ func newCharacterDevice(ctx context.Context, iops fs.InodeOperations, msrc *fs.M return fs.NewInode(ctx, iops, msrc, fs.StableAttr{ DeviceID: devDevice.DeviceID(), InodeID: devDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.CharacterDevice, DeviceFileMajor: major, DeviceFileMinor: minor, @@ -60,7 +61,7 @@ func newMemDevice(ctx context.Context, iops fs.InodeOperations, msrc *fs.MountSo return fs.NewInode(ctx, iops, msrc, fs.StableAttr{ DeviceID: devDevice.DeviceID(), InodeID: devDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.CharacterDevice, DeviceFileMajor: memDevMajor, DeviceFileMinor: minor, @@ -72,7 +73,7 @@ func newDirectory(ctx context.Context, contents map[string]*fs.Inode, msrc *fs.M return fs.NewInode(ctx, iops, msrc, fs.StableAttr{ DeviceID: devDevice.DeviceID(), InodeID: devDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.Directory, }) } @@ -82,7 +83,7 @@ func newSymlink(ctx context.Context, target string, msrc *fs.MountSource) *fs.In return fs.NewInode(ctx, iops, msrc, fs.StableAttr{ DeviceID: devDevice.DeviceID(), InodeID: devDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.Symlink, }) } @@ -137,7 +138,7 @@ func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode { return fs.NewInode(ctx, iops, msrc, fs.StableAttr{ DeviceID: devDevice.DeviceID(), InodeID: devDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.Directory, }) } diff --git a/pkg/sentry/fs/dev/net_tun.go b/pkg/sentry/fs/dev/net_tun.go index 11a2984d8..77e8d222a 100644 --- a/pkg/sentry/fs/dev/net_tun.go +++ b/pkg/sentry/fs/dev/net_tun.go @@ -17,6 +17,7 @@ package dev import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -110,7 +111,7 @@ func (n *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io user } // Validate flags. - flags, err := netstack.LinuxToTUNFlags(usermem.ByteOrder.Uint16(req.Data[:])) + flags, err := netstack.LinuxToTUNFlags(hostarch.ByteOrder.Uint16(req.Data[:])) if err != nil { return 0, err } @@ -119,7 +120,7 @@ func (n *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io user case linux.TUNGETIFF: var req linux.IFReq copy(req.IFName[:], n.device.Name()) - usermem.ByteOrder.PutUint16(req.Data[:], netstack.TUNFlagsToLinux(n.device.Flags())) + hostarch.ByteOrder.PutUint16(req.Data[:], netstack.TUNFlagsToLinux(n.device.Flags())) _, err := req.CopyOut(t, data) return 0, err diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD index c83baf464..2120f2bad 100644 --- a/pkg/sentry/fs/fdpipe/BUILD +++ b/pkg/sentry/fs/fdpipe/BUILD @@ -40,6 +40,7 @@ go_test( "//pkg/context", "//pkg/fd", "//pkg/fdnotifier", + "//pkg/hostarch", "//pkg/sentry/contexttest", "//pkg/sentry/fs", "//pkg/syserror", diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go index faeb3908c..ab0e9dac7 100644 --- a/pkg/sentry/fs/fdpipe/pipe_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_test.go @@ -27,6 +27,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) func singlePipeFD() (int, error) { @@ -52,7 +54,7 @@ func mockPipeDirent(t *testing.T) *fs.Dirent { } inode := fs.NewInode(ctx, node, fs.NewMockMountSource(nil), fs.StableAttr{ Type: fs.Pipe, - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, }) return fs.NewDirent(ctx, inode, "") } diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD index d388f0e92..6469cc3a9 100644 --- a/pkg/sentry/fs/fsutil/BUILD +++ b/pkg/sentry/fs/fsutil/BUILD @@ -76,6 +76,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/log", "//pkg/safemem", "//pkg/sentry/arch", @@ -105,6 +106,7 @@ go_test( library = ":fsutil", deps = [ "//pkg/context", + "//pkg/hostarch", "//pkg/safemem", "//pkg/sentry/contexttest", "//pkg/sentry/fs", diff --git a/pkg/sentry/fs/fsutil/dirty_set.go b/pkg/sentry/fs/fsutil/dirty_set.go index 2c9446c1d..38383e730 100644 --- a/pkg/sentry/fs/fsutil/dirty_set.go +++ b/pkg/sentry/fs/fsutil/dirty_set.go @@ -18,9 +18,9 @@ import ( "math" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/usermem" ) // DirtySet maps offsets into a memmap.Mappable to DirtyInfo. It is used to @@ -215,7 +215,7 @@ func syncDirtyRange(ctx context.Context, mr memmap.MappableRange, cache *FileRan if max < wbr.Start { break } - ims, err := mem.MapInternal(cseg.FileRangeOf(wbr), usermem.Read) + ims, err := mem.MapInternal(cseg.FileRangeOf(wbr), hostarch.Read) if err != nil { return err } diff --git a/pkg/sentry/fs/fsutil/dirty_set_test.go b/pkg/sentry/fs/fsutil/dirty_set_test.go index e3579c23c..48448c97c 100644 --- a/pkg/sentry/fs/fsutil/dirty_set_test.go +++ b/pkg/sentry/fs/fsutil/dirty_set_test.go @@ -18,18 +18,18 @@ import ( "reflect" "testing" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/usermem" ) func TestDirtySet(t *testing.T) { var set DirtySet - set.MarkDirty(memmap.MappableRange{0, 2 * usermem.PageSize}) - set.KeepDirty(memmap.MappableRange{usermem.PageSize, 2 * usermem.PageSize}) - set.MarkClean(memmap.MappableRange{0, 2 * usermem.PageSize}) + set.MarkDirty(memmap.MappableRange{0, 2 * hostarch.PageSize}) + set.KeepDirty(memmap.MappableRange{hostarch.PageSize, 2 * hostarch.PageSize}) + set.MarkClean(memmap.MappableRange{0, 2 * hostarch.PageSize}) want := &DirtySegmentDataSlices{ - Start: []uint64{usermem.PageSize}, - End: []uint64{2 * usermem.PageSize}, + Start: []uint64{hostarch.PageSize}, + End: []uint64{2 * hostarch.PageSize}, Values: []DirtyInfo{{Keep: true}}, } if got := set.ExportSortedSlices(); !reflect.DeepEqual(got, want) { diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go index 1dc409d38..fdaceb1db 100644 --- a/pkg/sentry/fs/fsutil/file_range_set.go +++ b/pkg/sentry/fs/fsutil/file_range_set.go @@ -20,11 +20,11 @@ import ( "math" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/usermem" ) // FileRangeSet maps offsets into a memmap.Mappable to offsets into a @@ -130,7 +130,7 @@ func (frs *FileRangeSet) Fill(ctx context.Context, required, optional memmap.Map // MemoryFile.AllocateAndFill truncates down to a page // boundary, but FileRangeSet.Fill is supposed to // zero-fill to the end of the page in this case. - donepgaddr, ok := usermem.Addr(done).RoundUp() + donepgaddr, ok := hostarch.Addr(done).RoundUp() if donepg := uint64(donepgaddr); ok && donepg != done { dsts.DropFirst64(donepg - done) done = donepg @@ -184,7 +184,7 @@ func (frs *FileRangeSet) DropAll(mf *pgalloc.MemoryFile) { // bytes after the new EOF on the same page are zeroed, and pages after the new // EOF are freed. func (frs *FileRangeSet) Truncate(end uint64, mf *pgalloc.MemoryFile) { - pgendaddr, ok := usermem.Addr(end).RoundUp() + pgendaddr, ok := hostarch.Addr(end).RoundUp() if ok { pgend := uint64(pgendaddr) @@ -208,7 +208,7 @@ func (frs *FileRangeSet) Truncate(end uint64, mf *pgalloc.MemoryFile) { if seg.Ok() { fr := seg.FileRange() fr.Start += end - seg.Start() - ims, err := mf.MapInternal(fr, usermem.Write) + ims, err := mf.MapInternal(fr, hostarch.Write) if err != nil { // There's no good recourse from here. This means // that we can't keep cached memory consistent with diff --git a/pkg/sentry/fs/fsutil/host_file_mapper.go b/pkg/sentry/fs/fsutil/host_file_mapper.go index 54f7b7cdc..23528bf25 100644 --- a/pkg/sentry/fs/fsutil/host_file_mapper.go +++ b/pkg/sentry/fs/fsutil/host_file_mapper.go @@ -18,11 +18,11 @@ import ( "fmt" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" ) // HostFileMapper caches mappings of an arbitrary host file descriptor. It is @@ -50,13 +50,13 @@ type HostFileMapper struct { } const ( - chunkShift = usermem.HugePageShift + chunkShift = hostarch.HugePageShift chunkSize = 1 << chunkShift chunkMask = chunkSize - 1 ) func pagesInChunk(mr memmap.MappableRange, chunkStart uint64) int32 { - return int32(mr.Intersect(memmap.MappableRange{chunkStart, chunkStart + chunkSize}).Length() / usermem.PageSize) + return int32(mr.Intersect(memmap.MappableRange{chunkStart, chunkStart + chunkSize}).Length() / hostarch.PageSize) } type mapping struct { diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go index c15d8a946..e1e38b498 100644 --- a/pkg/sentry/fs/fsutil/host_mappable.go +++ b/pkg/sentry/fs/fsutil/host_mappable.go @@ -18,6 +18,7 @@ import ( "math" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/memmap" @@ -59,7 +60,7 @@ func NewHostMappable(backingFile CachedFileObject) *HostMappable { } // AddMapping implements memmap.Mappable.AddMapping. -func (h *HostMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { +func (h *HostMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { // Hot path. Avoid defers. h.mu.Lock() mapped := h.mappings.AddMapping(ms, ar, offset, writable) @@ -71,7 +72,7 @@ func (h *HostMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace, a } // RemoveMapping implements memmap.Mappable.RemoveMapping. -func (h *HostMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { +func (h *HostMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { // Hot path. Avoid defers. h.mu.Lock() unmapped := h.mappings.RemoveMapping(ms, ar, offset, writable) @@ -82,18 +83,18 @@ func (h *HostMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpace } // CopyMapping implements memmap.Mappable.CopyMapping. -func (h *HostMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { +func (h *HostMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { return h.AddMapping(ctx, ms, dstAR, offset, writable) } // Translate implements memmap.Mappable.Translate. -func (h *HostMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { +func (h *HostMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { return []memmap.Translation{ { Source: optional, File: h, Offset: optional.Start, - Perms: usermem.AnyAccess, + Perms: hostarch.AnyAccess, }, }, nil } @@ -124,7 +125,7 @@ func (h *HostMappable) NotifyChangeFD() error { } // MapInternal implements memmap.File.MapInternal. -func (h *HostMappable) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { +func (h *HostMappable) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { return h.hostFileMapper.MapInternal(fr, h.backingFile.FD(), at.Write) } diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go index 0ed7aafa5..7856b354b 100644 --- a/pkg/sentry/fs/fsutil/inode_cached.go +++ b/pkg/sentry/fs/fsutil/inode_cached.go @@ -19,6 +19,7 @@ import ( "io" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -622,7 +623,7 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { switch { case seg.Ok(): // Get internal mappings from the cache. - ims, err := mem.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read) + ims, err := mem.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Read) if err != nil { unlock() return done, err @@ -647,7 +648,7 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { // Read into the cache, then re-enter the loop to read from the // cache. reqMR := memmap.MappableRange{ - Start: uint64(usermem.Addr(gapMR.Start).RoundDown()), + Start: uint64(hostarch.Addr(gapMR.Start).RoundDown()), End: fs.OffsetPageEnd(int64(gapMR.End)), } optMR := gap.Range() @@ -729,7 +730,7 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error case seg.Ok() && seg.Start() < mr.End: // Get internal mappings from the cache. segMR := seg.Range().Intersect(mr) - ims, err := mf.MapInternal(seg.FileRangeOf(segMR), usermem.Write) + ims, err := mf.MapInternal(seg.FileRangeOf(segMR), hostarch.Write) if err != nil { rw.maybeGrowFile() rw.c.dataMu.Unlock() @@ -786,7 +787,7 @@ func (c *CachingInodeOperations) useHostPageCache() bool { } // AddMapping implements memmap.Mappable.AddMapping. -func (c *CachingInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { +func (c *CachingInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { // Hot path. Avoid defers. c.mapsMu.Lock() mapped := c.mappings.AddMapping(ms, ar, offset, writable) @@ -808,7 +809,7 @@ func (c *CachingInodeOperations) AddMapping(ctx context.Context, ms memmap.Mappi } // RemoveMapping implements memmap.Mappable.RemoveMapping. -func (c *CachingInodeOperations) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { +func (c *CachingInodeOperations) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { // Hot path. Avoid defers. c.mapsMu.Lock() unmapped := c.mappings.RemoveMapping(ms, ar, offset, writable) @@ -836,12 +837,12 @@ func (c *CachingInodeOperations) RemoveMapping(ctx context.Context, ms memmap.Ma } // CopyMapping implements memmap.Mappable.CopyMapping. -func (c *CachingInodeOperations) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { +func (c *CachingInodeOperations) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { return c.AddMapping(ctx, ms, dstAR, offset, writable) } // Translate implements memmap.Mappable.Translate. -func (c *CachingInodeOperations) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { +func (c *CachingInodeOperations) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { // Hot path. Avoid defer. if c.useHostPageCache() { mr := optional @@ -853,7 +854,7 @@ func (c *CachingInodeOperations) Translate(ctx context.Context, required, option Source: mr, File: c, Offset: mr.Start, - Perms: usermem.AnyAccess, + Perms: hostarch.AnyAccess, }, }, nil } @@ -885,7 +886,7 @@ func (c *CachingInodeOperations) Translate(ctx context.Context, required, option segMR := seg.Range().Intersect(optional) // TODO(jamieliu): Make Translations writable even if writability is // not required if already kept-dirty by another writable translation. - perms := usermem.AccessType{ + perms := hostarch.AccessType{ Read: true, Execute: true, } @@ -1050,7 +1051,7 @@ func (c *CachingInodeOperations) DecRef(fr memmap.FileRange) { // MapInternal implements memmap.File.MapInternal. This is used when we // directly map an underlying host fd and CachingInodeOperations is used as the // memmap.File during translation. -func (c *CachingInodeOperations) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { +func (c *CachingInodeOperations) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { return c.hostFileMapper.MapInternal(fr, c.backingFile.FD(), at.Write) } diff --git a/pkg/sentry/fs/fsutil/inode_cached_test.go b/pkg/sentry/fs/fsutil/inode_cached_test.go index 1547584c5..e107c3096 100644 --- a/pkg/sentry/fs/fsutil/inode_cached_test.go +++ b/pkg/sentry/fs/fsutil/inode_cached_test.go @@ -20,6 +20,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -249,7 +250,7 @@ func (f *sliceBackingFile) Allocate(ctx context.Context, offset int64, length in type noopMappingSpace struct{} // Invalidate implements memmap.MappingSpace.Invalidate. -func (noopMappingSpace) Invalidate(ar usermem.AddrRange, opts memmap.InvalidateOpts) { +func (noopMappingSpace) Invalidate(ar hostarch.AddrRange, opts memmap.InvalidateOpts) { } func anonInode(ctx context.Context) *fs.Inode { @@ -259,14 +260,14 @@ func anonInode(ctx context.Context) *fs.Inode { }, 0), }, fs.NewPseudoMountSource(ctx), fs.StableAttr{ Type: fs.Anonymous, - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, }) } func pagesOf(bs ...byte) []byte { - buf := make([]byte, 0, len(bs)*usermem.PageSize) + buf := make([]byte, 0, len(bs)*hostarch.PageSize) for _, b := range bs { - buf = append(buf, bytes.Repeat([]byte{b}, usermem.PageSize)...) + buf = append(buf, bytes.Repeat([]byte{b}, hostarch.PageSize)...) } return buf } @@ -292,28 +293,28 @@ func TestRead(t *testing.T) { // expects to only cache mapped pages), then call Translate to force it to // be cached. var ms noopMappingSpace - ar := usermem.AddrRange{usermem.PageSize, 2 * usermem.PageSize} - if err := iops.AddMapping(ctx, ms, ar, usermem.PageSize, true); err != nil { + ar := hostarch.AddrRange{hostarch.PageSize, 2 * hostarch.PageSize} + if err := iops.AddMapping(ctx, ms, ar, hostarch.PageSize, true); err != nil { t.Fatalf("AddMapping got %v, want nil", err) } - mr := memmap.MappableRange{usermem.PageSize, 2 * usermem.PageSize} - if _, err := iops.Translate(ctx, mr, mr, usermem.Read); err != nil { + mr := memmap.MappableRange{hostarch.PageSize, 2 * hostarch.PageSize} + if _, err := iops.Translate(ctx, mr, mr, hostarch.Read); err != nil { t.Fatalf("Translate got %v, want nil", err) } - if cached := iops.cache.Span(); cached != usermem.PageSize { - t.Errorf("SpanRange got %d, want %d", cached, usermem.PageSize) + if cached := iops.cache.Span(); cached != hostarch.PageSize { + t.Errorf("SpanRange got %d, want %d", cached, hostarch.PageSize) } // Try to read 4 pages. The first and third pages should be read directly // from the "file", the second page should be read from the cache, and only // 3 pages (the size of the file) should be readable. - rbuf := make([]byte, 4*usermem.PageSize) + rbuf := make([]byte, 4*hostarch.PageSize) dst := usermem.BytesIOSequence(rbuf) n, err := iops.Read(ctx, file, dst, 0) - if n != 3*usermem.PageSize || (err != nil && err != io.EOF) { - t.Fatalf("Read got (%d, %v), want (%d, nil or EOF)", n, err, 3*usermem.PageSize) + if n != 3*hostarch.PageSize || (err != nil && err != io.EOF) { + t.Fatalf("Read got (%d, %v), want (%d, nil or EOF)", n, err, 3*hostarch.PageSize) } - rbuf = rbuf[:3*usermem.PageSize] + rbuf = rbuf[:3*hostarch.PageSize] // Did we get the bytes we expect? if !bytes.Equal(rbuf, buf) { @@ -323,7 +324,7 @@ func TestRead(t *testing.T) { // Delete the memory mapping before iops.Release(). The cached page will // either be evicted by ctx's pgalloc.MemoryFile, or dropped by // iops.Release(). - iops.RemoveMapping(ctx, ms, ar, usermem.PageSize, true) + iops.RemoveMapping(ctx, ms, ar, hostarch.PageSize, true) } func TestWrite(t *testing.T) { @@ -348,25 +349,25 @@ func TestWrite(t *testing.T) { // CachingInodeOperations expects to only cache mapped pages), then call // Translate to force them to be cached. var ms noopMappingSpace - ar := usermem.AddrRange{usermem.PageSize, 3 * usermem.PageSize} - if err := iops.AddMapping(ctx, ms, ar, usermem.PageSize, true); err != nil { + ar := hostarch.AddrRange{hostarch.PageSize, 3 * hostarch.PageSize} + if err := iops.AddMapping(ctx, ms, ar, hostarch.PageSize, true); err != nil { t.Fatalf("AddMapping got %v, want nil", err) } - defer iops.RemoveMapping(ctx, ms, ar, usermem.PageSize, true) - mr := memmap.MappableRange{usermem.PageSize, 3 * usermem.PageSize} - if _, err := iops.Translate(ctx, mr, mr, usermem.Read); err != nil { + defer iops.RemoveMapping(ctx, ms, ar, hostarch.PageSize, true) + mr := memmap.MappableRange{hostarch.PageSize, 3 * hostarch.PageSize} + if _, err := iops.Translate(ctx, mr, mr, hostarch.Read); err != nil { t.Fatalf("Translate got %v, want nil", err) } - if cached := iops.cache.Span(); cached != 2*usermem.PageSize { - t.Errorf("SpanRange got %d, want %d", cached, 2*usermem.PageSize) + if cached := iops.cache.Span(); cached != 2*hostarch.PageSize { + t.Errorf("SpanRange got %d, want %d", cached, 2*hostarch.PageSize) } // Write to the first 2 pages. wbuf := pagesOf('e', 'f') src := usermem.BytesIOSequence(wbuf) n, err := iops.Write(ctx, src, 0) - if n != 2*usermem.PageSize || err != nil { - t.Fatalf("Write got (%d, %v), want (%d, nil)", n, err, 2*usermem.PageSize) + if n != 2*hostarch.PageSize || err != nil { + t.Fatalf("Write got (%d, %v), want (%d, nil)", n, err, 2*hostarch.PageSize) } // The first page should have been written directly, since it was not cached. @@ -382,7 +383,7 @@ func TestWrite(t *testing.T) { } // Now the second page should have been written as well. - copy(want[usermem.PageSize:], pagesOf('f')) + copy(want[hostarch.PageSize:], pagesOf('f')) if !bytes.Equal(buf, want) { t.Errorf("File contents are %v, want %v", buf, want) } diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD index b210e0e7e..c4a069832 100644 --- a/pkg/sentry/fs/gofer/BUILD +++ b/pkg/sentry/fs/gofer/BUILD @@ -27,6 +27,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/fd", + "//pkg/hostarch", "//pkg/log", "//pkg/p9", "//pkg/refs", diff --git a/pkg/sentry/fs/gofer/attr.go b/pkg/sentry/fs/gofer/attr.go index cffc756cc..d6bff3f40 100644 --- a/pkg/sentry/fs/gofer/attr.go +++ b/pkg/sentry/fs/gofer/attr.go @@ -17,11 +17,11 @@ package gofer import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/usermem" ) // getattr returns the 9p attributes of the p9.File. On success, Mode, Size, and RDev @@ -98,7 +98,7 @@ func bsize(pattr p9.Attr) int64 { // Some files, particularly those that are not on a local file system, // may have no clue of their block size. Better not to report something // misleading or buggy and have a safe default. - return usermem.PageSize + return hostarch.PageSize } // ntype returns an fs.InodeType from 9p attributes. diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go index fb81d903d..1b83643db 100644 --- a/pkg/sentry/fs/inotify.go +++ b/pkg/sentry/fs/inotify.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/uniqueid" @@ -216,7 +217,7 @@ func (i *Inotify) Ioctl(ctx context.Context, _ *File, io usermem.IO, args arch.S n += uint32(e.sizeOf()) } var buf [4]byte - usermem.ByteOrder.PutUint32(buf[:], n) + hostarch.ByteOrder.PutUint32(buf[:], n) _, err := io.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{}) return 0, err diff --git a/pkg/sentry/fs/inotify_event.go b/pkg/sentry/fs/inotify_event.go index 686e1b1cd..399aff1ed 100644 --- a/pkg/sentry/fs/inotify_event.go +++ b/pkg/sentry/fs/inotify_event.go @@ -19,6 +19,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/usermem" ) @@ -100,10 +101,10 @@ func (e *Event) sizeOf() int { // construct the output. We use a buffer allocated ahead of time for // performance. buf must be at least inotifyEventBaseSize bytes. func (e *Event) CopyTo(ctx context.Context, buf []byte, dst usermem.IOSequence) (int64, error) { - usermem.ByteOrder.PutUint32(buf[0:], uint32(e.wd)) - usermem.ByteOrder.PutUint32(buf[4:], e.mask) - usermem.ByteOrder.PutUint32(buf[8:], e.cookie) - usermem.ByteOrder.PutUint32(buf[12:], e.len) + hostarch.ByteOrder.PutUint32(buf[0:], uint32(e.wd)) + hostarch.ByteOrder.PutUint32(buf[4:], e.mask) + hostarch.ByteOrder.PutUint32(buf[8:], e.cookie) + hostarch.ByteOrder.PutUint32(buf[12:], e.len) writeLen := 0 diff --git a/pkg/sentry/fs/offset.go b/pkg/sentry/fs/offset.go index 53b5df175..3a8c97d8f 100644 --- a/pkg/sentry/fs/offset.go +++ b/pkg/sentry/fs/offset.go @@ -17,14 +17,14 @@ package fs import ( "math" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // OffsetPageEnd returns the file offset rounded up to the nearest // page boundary. OffsetPageEnd panics if rounding up causes overflow, // which shouldn't be possible given that offset is an int64. func OffsetPageEnd(offset int64) uint64 { - end, ok := usermem.Addr(offset).RoundUp() + end, ok := hostarch.Addr(offset).RoundUp() if !ok { panic("impossible overflow") } diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go index 01a1235b8..f96f5a3e5 100644 --- a/pkg/sentry/fs/overlay.go +++ b/pkg/sentry/fs/overlay.go @@ -19,11 +19,11 @@ import ( "strings" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // The virtual filesystem implements an overlay configuration. For a high-level @@ -274,7 +274,7 @@ func (o *overlayEntry) markDirectoryDirty() { } // AddMapping implements memmap.Mappable.AddMapping. -func (o *overlayEntry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { +func (o *overlayEntry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { o.mapsMu.Lock() defer o.mapsMu.Unlock() if err := o.inodeLocked().Mappable().AddMapping(ctx, ms, ar, offset, writable); err != nil { @@ -285,7 +285,7 @@ func (o *overlayEntry) AddMapping(ctx context.Context, ms memmap.MappingSpace, a } // RemoveMapping implements memmap.Mappable.RemoveMapping. -func (o *overlayEntry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { +func (o *overlayEntry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { o.mapsMu.Lock() defer o.mapsMu.Unlock() o.inodeLocked().Mappable().RemoveMapping(ctx, ms, ar, offset, writable) @@ -293,7 +293,7 @@ func (o *overlayEntry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace } // CopyMapping implements memmap.Mappable.CopyMapping. -func (o *overlayEntry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { +func (o *overlayEntry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { o.mapsMu.Lock() defer o.mapsMu.Unlock() if err := o.inodeLocked().Mappable().CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil { @@ -304,7 +304,7 @@ func (o *overlayEntry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, } // Translate implements memmap.Mappable.Translate. -func (o *overlayEntry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { +func (o *overlayEntry) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { o.dataMu.RLock() defer o.dataMu.RUnlock() return o.inodeLocked().Mappable().Translate(ctx, required, optional, at) diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index b8b2281a8..7af7e0b45 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -30,6 +30,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/log", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", diff --git a/pkg/sentry/fs/proc/exec_args.go b/pkg/sentry/fs/proc/exec_args.go index e6171dd1d..24426b225 100644 --- a/pkg/sentry/fs/proc/exec_args.go +++ b/pkg/sentry/fs/proc/exec_args.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -113,7 +114,7 @@ func (f *execArgFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequen defer m.DecUsers(ctx) // Figure out the bounds of the exec arg we are trying to read. - var execArgStart, execArgEnd usermem.Addr + var execArgStart, execArgEnd hostarch.Addr switch f.arg { case cmdlineExecArg: execArgStart, execArgEnd = m.ArgvStart(), m.ArgvEnd() @@ -172,8 +173,8 @@ func (f *execArgFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequen // https://elixir.bootlin.com/linux/v4.20/source/fs/proc/base.c#L208 // we'll return one page total between argv and envp because of the // above page restrictions. - if lengthEnvv > usermem.PageSize-len(buf) { - lengthEnvv = usermem.PageSize - len(buf) + if lengthEnvv > hostarch.PageSize-len(buf) { + lengthEnvv = hostarch.PageSize - len(buf) } // Make a new buffer to fit the whole thing tmp := make([]byte, length+lengthEnvv) diff --git a/pkg/sentry/fs/proc/inode.go b/pkg/sentry/fs/proc/inode.go index d2859a4c2..78132f7a5 100644 --- a/pkg/sentry/fs/proc/inode.go +++ b/pkg/sentry/fs/proc/inode.go @@ -17,13 +17,13 @@ package proc import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/usermem" ) // LINT.IfChange @@ -125,7 +125,7 @@ func newProcInode(ctx context.Context, iops fs.InodeOperations, msrc *fs.MountSo sattr := fs.StableAttr{ DeviceID: device.ProcDevice.DeviceID(), InodeID: device.ProcDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: typ, } if t != nil { diff --git a/pkg/sentry/fs/proc/meminfo.go b/pkg/sentry/fs/proc/meminfo.go index 91617267d..7d975d333 100644 --- a/pkg/sentry/fs/proc/meminfo.go +++ b/pkg/sentry/fs/proc/meminfo.go @@ -19,10 +19,10 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/usermem" ) // LINT.IfChange @@ -53,7 +53,7 @@ func (d *meminfoData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) anon := snapshot.Anonymous + snapshot.Tmpfs file := snapshot.PageCache + snapshot.Mapped // We don't actually have active/inactive LRUs, so just make up numbers. - activeFile := (file / 2) &^ (usermem.PageSize - 1) + activeFile := (file / 2) &^ (hostarch.PageSize - 1) inactiveFile := file - activeFile var buf bytes.Buffer diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index 203cfa061..91c35eea9 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" @@ -35,7 +36,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip/header" - "gvisor.dev/gvisor/pkg/usermem" ) // LINT.IfChange @@ -367,10 +367,10 @@ func (n *netRoute) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([] ) if len(rt.GatewayAddr) == header.IPv4AddressSize { flags |= linux.RTF_GATEWAY - gw = usermem.ByteOrder.Uint32(rt.GatewayAddr) + gw = hostarch.ByteOrder.Uint32(rt.GatewayAddr) } if len(rt.DstAddr) == header.IPv4AddressSize { - prefix = usermem.ByteOrder.Uint32(rt.DstAddr) + prefix = hostarch.ByteOrder.Uint32(rt.DstAddr) } l := fmt.Sprintf( "%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d", @@ -520,7 +520,7 @@ func networkToHost16(n uint16) uint16 { // binary.BigEndian.Uint16() require a read of binary.BigEndian and an // interface method call, defeating inlining. buf := [2]byte{byte(n >> 8 & 0xff), byte(n & 0xff)} - return usermem.ByteOrder.Uint16(buf[:]) + return hostarch.ByteOrder.Uint16(buf[:]) } func writeInetAddr(w io.Writer, family int, i linux.SockAddr) { @@ -542,14 +542,14 @@ func writeInetAddr(w io.Writer, family int, i linux.SockAddr) { // __be32 which is a typedef for an unsigned int, and is printed with // %X. This means that for a little-endian machine, Linux prints the // least-significant byte of the address first. To emulate this, we first - // invert the byte order for the address using usermem.ByteOrder.Uint32, + // invert the byte order for the address using hostarch.ByteOrder.Uint32, // which makes it have the equivalent encoding to a __be32 on a little // endian machine. Note that this operation is a no-op on a big endian // machine. Then similar to Linux, we format it with %X, which will print // the most-significant byte of the __be32 address first, which is now // actually the least-significant byte of the original address in // linux.SockAddrInet.Addr on little endian machines, due to the conversion. - addr := usermem.ByteOrder.Uint32(a.Addr[:]) + addr := hostarch.ByteOrder.Uint32(a.Addr[:]) fmt.Fprintf(w, "%08X:%04X ", addr, port) case linux.AF_INET6: @@ -559,10 +559,10 @@ func writeInetAddr(w io.Writer, family int, i linux.SockAddr) { } port := networkToHost16(a.Port) - addr0 := usermem.ByteOrder.Uint32(a.Addr[0:4]) - addr1 := usermem.ByteOrder.Uint32(a.Addr[4:8]) - addr2 := usermem.ByteOrder.Uint32(a.Addr[8:12]) - addr3 := usermem.ByteOrder.Uint32(a.Addr[12:16]) + addr0 := hostarch.ByteOrder.Uint32(a.Addr[0:4]) + addr1 := hostarch.ByteOrder.Uint32(a.Addr[4:8]) + addr2 := hostarch.ByteOrder.Uint32(a.Addr[8:12]) + addr3 := hostarch.ByteOrder.Uint32(a.Addr[12:16]) fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port) } } diff --git a/pkg/sentry/fs/proc/seqfile/BUILD b/pkg/sentry/fs/proc/seqfile/BUILD index 21338d912..713b81e08 100644 --- a/pkg/sentry/fs/proc/seqfile/BUILD +++ b/pkg/sentry/fs/proc/seqfile/BUILD @@ -9,6 +9,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/proc/device", diff --git a/pkg/sentry/fs/proc/seqfile/seqfile.go b/pkg/sentry/fs/proc/seqfile/seqfile.go index 6121f0e95..b01688b1d 100644 --- a/pkg/sentry/fs/proc/seqfile/seqfile.go +++ b/pkg/sentry/fs/proc/seqfile/seqfile.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" @@ -131,7 +132,7 @@ func NewSeqFileInode(ctx context.Context, source SeqSource, msrc *fs.MountSource sattr := fs.StableAttr{ DeviceID: device.ProcDevice.DeviceID(), InodeID: device.ProcDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.SpecialFile, } return fs.NewInode(ctx, iops, msrc, sattr) diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index bbe282c03..1d09afdd7 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" @@ -76,7 +77,7 @@ func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir sattr := fs.StableAttr{ DeviceID: device.ProcDevice.DeviceID(), InodeID: device.ProcDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.SpecialFile, } return fs.NewInode(ctx, tm, msrc, sattr) @@ -136,7 +137,7 @@ func (f *tcpMemFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequen f.tcpMemInode.mu.Lock() defer f.tcpMemInode.mu.Unlock() - src = src.TakeFirst(usermem.PageSize - 1) + src = src.TakeFirst(hostarch.PageSize - 1) size, err := readSize(f.tcpMemInode.dir, f.tcpMemInode.s) if err != nil { return 0, err @@ -192,7 +193,7 @@ func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *f sattr := fs.StableAttr{ DeviceID: device.ProcDevice.DeviceID(), InodeID: device.ProcDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.SpecialFile, } return fs.NewInode(ctx, ts, msrc, sattr) @@ -264,7 +265,7 @@ func (f *tcpSackFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSeque // Only consider size of one memory page for input for performance reasons. // We are only reading if it's zero or not anyway. - src = src.TakeFirst(usermem.PageSize - 1) + src = src.TakeFirst(hostarch.PageSize - 1) var v int32 n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) @@ -294,7 +295,7 @@ func newTCPRecoveryInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack sattr := fs.StableAttr{ DeviceID: device.ProcDevice.DeviceID(), InodeID: device.ProcDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.SpecialFile, } return fs.NewInode(ctx, ts, msrc, sattr) @@ -354,7 +355,7 @@ func (f *tcpRecoveryFile) Write(ctx context.Context, _ *fs.File, src usermem.IOS if src.NumBytes() == 0 { return 0, nil } - src = src.TakeFirst(usermem.PageSize - 1) + src = src.TakeFirst(hostarch.PageSize - 1) var v int32 n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) @@ -413,7 +414,7 @@ func newIPForwardingInode(ctx context.Context, msrc *fs.MountSource, s inet.Stac sattr := fs.StableAttr{ DeviceID: device.ProcDevice.DeviceID(), InodeID: device.ProcDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.SpecialFile, } return fs.NewInode(ctx, ipf, msrc, sattr) @@ -486,7 +487,7 @@ func (f *ipForwardingFile) Write(ctx context.Context, _ *fs.File, src usermem.IO // Only consider size of one memory page for input for performance reasons. // We are only reading if it's zero or not anyway. - src = src.TakeFirst(usermem.PageSize - 1) + src = src.TakeFirst(hostarch.PageSize - 1) var v int32 n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) @@ -524,7 +525,7 @@ func newPortRangeInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) sattr := fs.StableAttr{ DeviceID: device.ProcDevice.DeviceID(), InodeID: device.ProcDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.SpecialFile, } return fs.NewInode(ctx, ipf, msrc, sattr) @@ -589,7 +590,7 @@ func (pf *portRangeFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSe // Only consider size of one memory page for input for performance // reasons. - src = src.TakeFirst(usermem.PageSize - 1) + src = src.TakeFirst(hostarch.PageSize - 1) ports := make([]int32, 2) n, err := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, ports, src.Opts) diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index f43d6c221..ae5ed25f9 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" @@ -469,7 +470,7 @@ func (m *memDataFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequen defer mm.DecUsers(ctx) // Buffer the read data because of MM locks buf := make([]byte, dst.NumBytes()) - n, readErr := mm.CopyIn(ctx, usermem.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true}) + n, readErr := mm.CopyIn(ctx, hostarch.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true}) if n > 0 { if _, err := dst.CopyOut(ctx, buf[:n]); err != nil { return 0, syserror.EFAULT @@ -632,7 +633,7 @@ func (s *taskStatData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) rss = mm.ResidentSetSize() } }) - fmt.Fprintf(&buf, "%d %d ", vss, rss/usermem.PageSize) + fmt.Fprintf(&buf, "%d %d ", vss, rss/hostarch.PageSize) // rsslim. fmt.Fprintf(&buf, "%d ", s.t.ThreadGroup().Limits().Get(limits.Rss).Cur) @@ -684,7 +685,7 @@ func (s *statmData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([ }) var buf bytes.Buffer - fmt.Fprintf(&buf, "%d %d 0 0 0 0 0\n", vss/usermem.PageSize, rss/usermem.PageSize) + fmt.Fprintf(&buf, "%d %d 0 0 0 0 0\n", vss/hostarch.PageSize, rss/hostarch.PageSize) return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*statmData)(nil)}}, 0 } @@ -939,8 +940,8 @@ func (f *auxvecFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequenc buf := make([]byte, size) for i, e := range auxv { - usermem.ByteOrder.PutUint64(buf[16*i:], e.Key) - usermem.ByteOrder.PutUint64(buf[16*i+8:], uint64(e.Value)) + hostarch.ByteOrder.PutUint64(buf[16*i:], e.Key) + hostarch.ByteOrder.PutUint64(buf[16*i+8:], uint64(e.Value)) } n, err := dst.CopyOut(ctx, buf[offset:]) @@ -1020,7 +1021,7 @@ func (f *oomScoreAdjFile) Write(ctx context.Context, _ *fs.File, src usermem.IOS } // Limit input size so as not to impact performance if input size is large. - src = src.TakeFirst(usermem.PageSize - 1) + src = src.TakeFirst(hostarch.PageSize - 1) var v int32 n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) diff --git a/pkg/sentry/fs/proc/uid_gid_map.go b/pkg/sentry/fs/proc/uid_gid_map.go index 2bc9485d8..30d5ad4cf 100644 --- a/pkg/sentry/fs/proc/uid_gid_map.go +++ b/pkg/sentry/fs/proc/uid_gid_map.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -132,7 +133,7 @@ func (imfo *idMapFileOperations) Write(ctx context.Context, file *fs.File, src u // the system page size, and the write must be performed at the start of // the file ..." - user_namespaces(7) srclen := src.NumBytes() - if srclen >= usermem.PageSize || offset != 0 { + if srclen >= hostarch.PageSize || offset != 0 { return 0, syserror.EINVAL } b := make([]byte, srclen) diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD index a51d00d86..4a3d9636b 100644 --- a/pkg/sentry/fs/ramfs/BUILD +++ b/pkg/sentry/fs/ramfs/BUILD @@ -14,13 +14,13 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", "//pkg/sentry/socket/unix/transport", "//pkg/sync", "//pkg/syserror", - "//pkg/usermem", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/pkg/sentry/fs/ramfs/tree.go b/pkg/sentry/fs/ramfs/tree.go index dfc9d3453..0ace636c9 100644 --- a/pkg/sentry/fs/ramfs/tree.go +++ b/pkg/sentry/fs/ramfs/tree.go @@ -20,9 +20,9 @@ import ( "strings" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" - "gvisor.dev/gvisor/pkg/usermem" ) // MakeDirectoryTree constructs a ramfs tree of all directories containing @@ -71,7 +71,7 @@ func emptyDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { return fs.NewInode(ctx, dir, msrc, fs.StableAttr{ DeviceID: anon.PseudoDevice.DeviceID(), InodeID: anon.PseudoDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.Directory, }) } diff --git a/pkg/sentry/fs/sys/BUILD b/pkg/sentry/fs/sys/BUILD index f2e8b9932..fdbc5f912 100644 --- a/pkg/sentry/fs/sys/BUILD +++ b/pkg/sentry/fs/sys/BUILD @@ -14,11 +14,11 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/ramfs", "//pkg/sentry/kernel", - "//pkg/usermem", ], ) diff --git a/pkg/sentry/fs/sys/sys.go b/pkg/sentry/fs/sys/sys.go index 0891645e4..101779a7a 100644 --- a/pkg/sentry/fs/sys/sys.go +++ b/pkg/sentry/fs/sys/sys.go @@ -17,16 +17,16 @@ package sys import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/usermem" ) func newFile(ctx context.Context, node fs.InodeOperations, msrc *fs.MountSource) *fs.Inode { sattr := fs.StableAttr{ DeviceID: sysfsDevice.DeviceID(), InodeID: sysfsDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.SpecialFile, } return fs.NewInode(ctx, node, msrc, sattr) @@ -37,7 +37,7 @@ func newDir(ctx context.Context, msrc *fs.MountSource, contents map[string]*fs.I return fs.NewInode(ctx, d, msrc, fs.StableAttr{ DeviceID: sysfsDevice.DeviceID(), InodeID: sysfsDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.SpecialDirectory, }) } diff --git a/pkg/sentry/fs/timerfd/BUILD b/pkg/sentry/fs/timerfd/BUILD index d16cdb4df..c7977a217 100644 --- a/pkg/sentry/fs/timerfd/BUILD +++ b/pkg/sentry/fs/timerfd/BUILD @@ -8,6 +8,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/context", + "//pkg/hostarch", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", diff --git a/pkg/sentry/fs/timerfd/timerfd.go b/pkg/sentry/fs/timerfd/timerfd.go index 46511a6ac..c8ebe256c 100644 --- a/pkg/sentry/fs/timerfd/timerfd.go +++ b/pkg/sentry/fs/timerfd/timerfd.go @@ -20,6 +20,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -124,7 +125,7 @@ func (t *TimerOperations) Read(ctx context.Context, file *fs.File, dst usermem.I } if val := atomic.SwapUint64(&t.val, 0); val != 0 { var buf [sizeofUint64]byte - usermem.ByteOrder.PutUint64(buf[:], val) + hostarch.ByteOrder.PutUint64(buf[:], val) if _, err := dst.CopyOut(ctx, buf[:]); err != nil { // Linux does not undo consuming the number of expirations even if // writing to userspace fails. diff --git a/pkg/sentry/fs/tmpfs/BUILD b/pkg/sentry/fs/tmpfs/BUILD index b521a86a2..90398376a 100644 --- a/pkg/sentry/fs/tmpfs/BUILD +++ b/pkg/sentry/fs/tmpfs/BUILD @@ -15,6 +15,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/safemem", "//pkg/sentry/device", "//pkg/sentry/fs", @@ -42,6 +43,7 @@ go_test( library = ":tmpfs", deps = [ "//pkg/context", + "//pkg/hostarch", "//pkg/sentry/fs", "//pkg/sentry/kernel/contexttest", "//pkg/sentry/usage", diff --git a/pkg/sentry/fs/tmpfs/file_test.go b/pkg/sentry/fs/tmpfs/file_test.go index d4d613ea9..1718f9372 100644 --- a/pkg/sentry/fs/tmpfs/file_test.go +++ b/pkg/sentry/fs/tmpfs/file_test.go @@ -19,6 +19,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" "gvisor.dev/gvisor/pkg/sentry/usage" @@ -31,7 +32,7 @@ func newFileInode(ctx context.Context) *fs.Inode { return fs.NewInode(ctx, iops, m, fs.StableAttr{ DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.RegularFile, }) } diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go index ad4aea282..f4de8c968 100644 --- a/pkg/sentry/fs/tmpfs/inode_file.go +++ b/pkg/sentry/fs/tmpfs/inode_file.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -125,7 +126,7 @@ func NewMemfdInode(ctx context.Context, allowSeals bool) *fs.Inode { Type: fs.RegularFile, DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, }) } @@ -392,7 +393,7 @@ func (rw *fileReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { switch { case seg.Ok(): // Get internal mappings. - ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read) + ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Read) if err != nil { return done, err } @@ -463,7 +464,7 @@ func (rw *fileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) // // See Linux, mm/filemap.c:generic_perform_write() and // mm/shmem.c:shmem_write_begin(). - if pgstart := int64(usermem.Addr(rw.f.attr.Size).RoundDown()); end > pgstart { + if pgstart := int64(hostarch.Addr(rw.f.attr.Size).RoundDown()); end > pgstart { end = pgstart } if end <= rw.offset { @@ -483,8 +484,8 @@ func (rw *fileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) mf := rw.f.kernel.MemoryFile() // Page-aligned mr for when we need to allocate memory. RoundUp can't // overflow since end is an int64. - pgstartaddr := usermem.Addr(rw.offset).RoundDown() - pgendaddr, _ := usermem.Addr(end).RoundUp() + pgstartaddr := hostarch.Addr(rw.offset).RoundDown() + pgendaddr, _ := hostarch.Addr(end).RoundUp() pgMR := memmap.MappableRange{uint64(pgstartaddr), uint64(pgendaddr)} var done uint64 @@ -494,7 +495,7 @@ func (rw *fileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) switch { case seg.Ok(): // Get internal mappings. - ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Write) + ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Write) if err != nil { return done, err } @@ -527,7 +528,7 @@ func (rw *fileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) } // AddMapping implements memmap.Mappable.AddMapping. -func (f *fileInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { +func (f *fileInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { f.mapsMu.Lock() defer f.mapsMu.Unlock() @@ -544,7 +545,7 @@ func (f *fileInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingS pagesBefore := f.writableMappingPages // ar is guaranteed to be page aligned per memmap.Mappable. - f.writableMappingPages += uint64(ar.Length() / usermem.PageSize) + f.writableMappingPages += uint64(ar.Length() / hostarch.PageSize) if f.writableMappingPages < pagesBefore { panic(fmt.Sprintf("Overflow while mapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, f.writableMappingPages)) @@ -555,7 +556,7 @@ func (f *fileInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingS } // RemoveMapping implements memmap.Mappable.RemoveMapping. -func (f *fileInodeOperations) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { +func (f *fileInodeOperations) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { f.mapsMu.Lock() defer f.mapsMu.Unlock() @@ -565,7 +566,7 @@ func (f *fileInodeOperations) RemoveMapping(ctx context.Context, ms memmap.Mappi pagesBefore := f.writableMappingPages // ar is guaranteed to be page aligned per memmap.Mappable. - f.writableMappingPages -= uint64(ar.Length() / usermem.PageSize) + f.writableMappingPages -= uint64(ar.Length() / hostarch.PageSize) if f.writableMappingPages > pagesBefore { panic(fmt.Sprintf("Underflow while unmapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, f.writableMappingPages)) @@ -574,12 +575,12 @@ func (f *fileInodeOperations) RemoveMapping(ctx context.Context, ms memmap.Mappi } // CopyMapping implements memmap.Mappable.CopyMapping. -func (f *fileInodeOperations) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { +func (f *fileInodeOperations) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { return f.AddMapping(ctx, ms, dstAR, offset, writable) } // Translate implements memmap.Mappable.Translate. -func (f *fileInodeOperations) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { +func (f *fileInodeOperations) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { f.dataMu.Lock() defer f.dataMu.Unlock() @@ -612,7 +613,7 @@ func (f *fileInodeOperations) Translate(ctx context.Context, required, optional Source: segMR, File: mf, Offset: seg.FileRangeOf(segMR).Start, - Perms: usermem.AnyAccess, + Perms: hostarch.AnyAccess, }) translatedEnd = segMR.End } diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go index cf4ed5de0..577052888 100644 --- a/pkg/sentry/fs/tmpfs/tmpfs.go +++ b/pkg/sentry/fs/tmpfs/tmpfs.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" @@ -28,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) var fsInfo = fs.Info{ @@ -41,8 +41,8 @@ var fsInfo = fs.Info{ // chosen to ensure that BlockSize * Blocks does not overflow int64 (which // applications may also handle incorrectly). // TODO(b/29637826): allow configuring a tmpfs size and enforce it. - TotalBlocks: math.MaxInt64 / usermem.PageSize, - FreeBlocks: math.MaxInt64 / usermem.PageSize, + TotalBlocks: math.MaxInt64 / hostarch.PageSize, + FreeBlocks: math.MaxInt64 / hostarch.PageSize, } // rename implements fs.InodeOperations.Rename for tmpfs nodes. @@ -99,7 +99,7 @@ func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwn return fs.NewInode(ctx, d, msrc, fs.StableAttr{ DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.Directory, }) } @@ -232,7 +232,7 @@ func (d *Dir) newCreateOps() *ramfs.CreateOps { return fs.NewInode(ctx, iops, dir.MountSource, fs.StableAttr{ DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.RegularFile, }), nil }, @@ -281,7 +281,7 @@ func NewSymlink(ctx context.Context, target string, owner fs.FileOwner, msrc *fs return fs.NewInode(ctx, s, msrc, fs.StableAttr{ DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.Symlink, }) } @@ -311,7 +311,7 @@ func NewSocket(ctx context.Context, socket transport.BoundEndpoint, owner fs.Fil return fs.NewInode(ctx, s, msrc, fs.StableAttr{ DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.Socket, }) } @@ -348,7 +348,7 @@ func NewFifo(ctx context.Context, owner fs.FileOwner, perms fs.FilePermissions, return fs.NewInode(ctx, fifoIops, msrc, fs.StableAttr{ DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.Pipe, }) } diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD index e6d0eb359..86ada820e 100644 --- a/pkg/sentry/fs/tty/BUILD +++ b/pkg/sentry/fs/tty/BUILD @@ -17,6 +17,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/marshal/primitive", "//pkg/refs", "//pkg/safemem", diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go index c2da80bc2..13c9dbe7d 100644 --- a/pkg/sentry/fs/tty/dir.go +++ b/pkg/sentry/fs/tty/dir.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -122,7 +123,7 @@ func newDir(ctx context.Context, m *fs.MountSource) *fs.Inode { // TODO(b/75267214): Since ptsDevice must be shared between // different mounts, we must not assign fixed numbers. InodeID: ptsDevice.NextIno(), - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, Type: fs.Directory, }) } diff --git a/pkg/sentry/fsimpl/cgroupfs/BUILD b/pkg/sentry/fsimpl/cgroupfs/BUILD new file mode 100644 index 000000000..48913068a --- /dev/null +++ b/pkg/sentry/fsimpl/cgroupfs/BUILD @@ -0,0 +1,47 @@ +load("//tools:defs.bzl", "go_library") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +licenses(["notice"]) + +go_template_instance( + name = "dir_refs", + out = "dir_refs.go", + package = "cgroupfs", + prefix = "dir", + template = "//pkg/refsvfs2:refs_template", + types = { + "T": "dir", + }, +) + +go_library( + name = "cgroupfs", + srcs = [ + "base.go", + "cgroupfs.go", + "cpu.go", + "cpuacct.go", + "cpuset.go", + "dir_refs.go", + "memory.go", + ], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/coverage", + "//pkg/log", + "//pkg/refs", + "//pkg/refsvfs2", + "//pkg/sentry/arch", + "//pkg/sentry/fsimpl/kernfs", + "//pkg/sentry/kernel", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/memmap", + "//pkg/sentry/usage", + "//pkg/sentry/vfs", + "//pkg/sync", + "//pkg/syserror", + "//pkg/usermem", + ], +) diff --git a/pkg/sentry/fsimpl/cgroupfs/base.go b/pkg/sentry/fsimpl/cgroupfs/base.go new file mode 100644 index 000000000..39c1013e1 --- /dev/null +++ b/pkg/sentry/fsimpl/cgroupfs/base.go @@ -0,0 +1,233 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cgroupfs + +import ( + "bytes" + "fmt" + "sort" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/usermem" +) + +// controllerCommon implements kernel.CgroupController. +// +// Must call init before use. +// +// +stateify savable +type controllerCommon struct { + ty kernel.CgroupControllerType + fs *filesystem +} + +func (c *controllerCommon) init(ty kernel.CgroupControllerType, fs *filesystem) { + c.ty = ty + c.fs = fs +} + +// Type implements kernel.CgroupController.Type. +func (c *controllerCommon) Type() kernel.CgroupControllerType { + return kernel.CgroupControllerType(c.ty) +} + +// HierarchyID implements kernel.CgroupController.HierarchyID. +func (c *controllerCommon) HierarchyID() uint32 { + return c.fs.hierarchyID +} + +// NumCgroups implements kernel.CgroupController.NumCgroups. +func (c *controllerCommon) NumCgroups() uint64 { + return atomic.LoadUint64(&c.fs.numCgroups) +} + +// Enabled implements kernel.CgroupController.Enabled. +// +// Controllers are currently always enabled. +func (c *controllerCommon) Enabled() bool { + return true +} + +// Filesystem implements kernel.CgroupController.Filesystem. +func (c *controllerCommon) Filesystem() *vfs.Filesystem { + return c.fs.VFSFilesystem() +} + +// RootCgroup implements kernel.CgroupController.RootCgroup. +func (c *controllerCommon) RootCgroup() kernel.Cgroup { + return c.fs.rootCgroup() +} + +// controller is an interface for common functionality related to all cgroups. +// It is an extension of the public cgroup interface, containing cgroup +// functionality private to cgroupfs. +type controller interface { + kernel.CgroupController + + // AddControlFiles should extend the contents map with inodes representing + // control files defined by this controller. + AddControlFiles(ctx context.Context, creds *auth.Credentials, c *cgroupInode, contents map[string]kernfs.Inode) +} + +// cgroupInode implements kernel.CgroupImpl and kernfs.Inode. +// +// +stateify savable +type cgroupInode struct { + dir + fs *filesystem + + // ts is the list of tasks in this cgroup. The kernel is responsible for + // removing tasks from this list before they're destroyed, so any tasks on + // this list are always valid. + // + // ts, and cgroup membership in general is protected by fs.tasksMu. + ts map[*kernel.Task]struct{} +} + +var _ kernel.CgroupImpl = (*cgroupInode)(nil) + +func (fs *filesystem) newCgroupInode(ctx context.Context, creds *auth.Credentials) kernfs.Inode { + c := &cgroupInode{ + fs: fs, + ts: make(map[*kernel.Task]struct{}), + } + + contents := make(map[string]kernfs.Inode) + contents["cgroup.procs"] = fs.newControllerFile(ctx, creds, &cgroupProcsData{c}) + contents["tasks"] = fs.newControllerFile(ctx, creds, &tasksData{c}) + + for _, ctl := range fs.controllers { + ctl.AddControlFiles(ctx, creds, c, contents) + } + + c.dir.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|linux.FileMode(0555)) + c.dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) + c.dir.InitRefs() + c.dir.IncLinks(c.dir.OrderedChildren.Populate(contents)) + + atomic.AddUint64(&fs.numCgroups, 1) + + return c +} + +func (c *cgroupInode) HierarchyID() uint32 { + return c.fs.hierarchyID +} + +// Controllers implements kernel.CgroupImpl.Controllers. +func (c *cgroupInode) Controllers() []kernel.CgroupController { + return c.fs.kcontrollers +} + +// Enter implements kernel.CgroupImpl.Enter. +func (c *cgroupInode) Enter(t *kernel.Task) { + c.fs.tasksMu.Lock() + c.ts[t] = struct{}{} + c.fs.tasksMu.Unlock() +} + +// Leave implements kernel.CgroupImpl.Leave. +func (c *cgroupInode) Leave(t *kernel.Task) { + c.fs.tasksMu.Lock() + delete(c.ts, t) + c.fs.tasksMu.Unlock() +} + +func sortTIDs(tids []kernel.ThreadID) { + sort.Slice(tids, func(i, j int) bool { return tids[i] < tids[j] }) +} + +// +stateify savable +type cgroupProcsData struct { + *cgroupInode +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *cgroupProcsData) Generate(ctx context.Context, buf *bytes.Buffer) error { + t := kernel.TaskFromContext(ctx) + currPidns := t.ThreadGroup().PIDNamespace() + + pgids := make(map[kernel.ThreadID]struct{}) + + d.fs.tasksMu.RLock() + defer d.fs.tasksMu.RUnlock() + + for task := range d.ts { + // Map dedups pgid, since iterating over all tasks produces multiple + // entries for the group leaders. + if pgid := currPidns.IDOfThreadGroup(task.ThreadGroup()); pgid != 0 { + pgids[pgid] = struct{}{} + } + } + + pgidList := make([]kernel.ThreadID, 0, len(pgids)) + for pgid, _ := range pgids { + pgidList = append(pgidList, pgid) + } + sortTIDs(pgidList) + + for _, pgid := range pgidList { + fmt.Fprintf(buf, "%d\n", pgid) + } + + return nil +} + +// Write implements vfs.WritableDynamicBytesSource.Write. +func (d *cgroupProcsData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + // TODO(b/183137098): Payload is the pid for a process to add to this cgroup. + return src.NumBytes(), nil +} + +// +stateify savable +type tasksData struct { + *cgroupInode +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *tasksData) Generate(ctx context.Context, buf *bytes.Buffer) error { + t := kernel.TaskFromContext(ctx) + currPidns := t.ThreadGroup().PIDNamespace() + + var pids []kernel.ThreadID + + d.fs.tasksMu.RLock() + defer d.fs.tasksMu.RUnlock() + + for task := range d.ts { + if pid := currPidns.IDOfTask(task); pid != 0 { + pids = append(pids, pid) + } + } + sortTIDs(pids) + + for _, pid := range pids { + fmt.Fprintf(buf, "%d\n", pid) + } + + return nil +} + +// Write implements vfs.WritableDynamicBytesSource.Write. +func (d *tasksData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + // TODO(b/183137098): Payload is the pid for a process to add to this cgroup. + return src.NumBytes(), nil +} diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go new file mode 100644 index 000000000..ca8caee5f --- /dev/null +++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go @@ -0,0 +1,412 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package cgroupfs implements cgroupfs. +// +// A cgroup is a collection of tasks on the system, organized into a tree-like +// structure similar to a filesystem directory tree. In fact, each cgroup is +// represented by a directory on cgroupfs, and is manipulated through control +// files in the directory. +// +// All cgroups on a system are organized into hierarchies. Hierarchies are a +// distinct tree of cgroups, with a common set of controllers. One or more +// cgroupfs mounts may point to each hierarchy. These mounts provide a common +// view into the same tree of cgroups. +// +// A controller (also known as a "resource controller", or a cgroup "subsystem") +// determines the behaviour of each cgroup. +// +// In addition to cgroupfs, the kernel has a cgroup registry that tracks +// system-wide state related to cgroups such as active hierarchies and the +// controllers associated with them. +// +// Since cgroupfs doesn't allow hardlinks, there is a unique mapping between +// cgroupfs dentries and inodes. +// +// # Synchronization +// +// Cgroup hierarchy creation and destruction is protected by the +// kernel.CgroupRegistry.mu. Once created, a hierarchy's set of controllers, the +// filesystem associated with it, and the root cgroup for the hierarchy are +// immutable. +// +// Membership of tasks within cgroups is protected by +// cgroupfs.filesystem.tasksMu. Tasks also maintain a set of all cgroups they're +// in, and this list is protected by Task.mu. +// +// Lock order: +// +// kernel.CgroupRegistry.mu +// cgroupfs.filesystem.mu +// Task.mu +// cgroupfs.filesystem.tasksMu. +package cgroupfs + +import ( + "fmt" + "sort" + "strconv" + "strings" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" +) + +const ( + // Name is the default filesystem name. + Name = "cgroup" + readonlyFileMode = linux.FileMode(0444) + writableFileMode = linux.FileMode(0644) + defaultMaxCachedDentries = uint64(1000) +) + +const ( + controllerCPU = kernel.CgroupControllerType("cpu") + controllerCPUAcct = kernel.CgroupControllerType("cpuacct") + controllerCPUSet = kernel.CgroupControllerType("cpuset") + controllerMemory = kernel.CgroupControllerType("memory") +) + +var allControllers = []kernel.CgroupControllerType{controllerCPU, controllerCPUAcct, controllerCPUSet, controllerMemory} + +// SupportedMountOptions is the set of supported mount options for cgroupfs. +var SupportedMountOptions = []string{"all", "cpu", "cpuacct", "cpuset", "memory"} + +// FilesystemType implements vfs.FilesystemType. +// +// +stateify savable +type FilesystemType struct{} + +// InternalData contains internal data passed in to the cgroupfs mount via +// vfs.GetFilesystemOptions.InternalData. +// +// +stateify savable +type InternalData struct { + DefaultControlValues map[string]int64 +} + +// filesystem implements vfs.FilesystemImpl. +// +// +stateify savable +type filesystem struct { + kernfs.Filesystem + devMinor uint32 + + // hierarchyID is the id the cgroup registry assigns to this hierarchy. Has + // the value kernel.InvalidCgroupHierarchyID until the FS is fully + // initialized. + // + // hierarchyID is immutable after initialization. + hierarchyID uint32 + + // controllers and kcontrollers are both the list of controllers attached to + // this cgroupfs. Both lists are the same set of controllers, but typecast + // to different interfaces for convenience. Both must stay in sync, and are + // immutable. + controllers []controller + kcontrollers []kernel.CgroupController + + numCgroups uint64 // Protected by atomic ops. + + root *kernfs.Dentry + + // tasksMu serializes task membership changes across all cgroups within a + // filesystem. + tasksMu sync.RWMutex `state:"nosave"` +} + +// Name implements vfs.FilesystemType.Name. +func (FilesystemType) Name() string { + return Name +} + +// Release implements vfs.FilesystemType.Release. +func (FilesystemType) Release(ctx context.Context) {} + +// GetFilesystem implements vfs.FilesystemType.GetFilesystem. +func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { + devMinor, err := vfsObj.GetAnonBlockDevMinor() + if err != nil { + return nil, nil, err + } + + mopts := vfs.GenericParseMountOptions(opts.Data) + maxCachedDentries := defaultMaxCachedDentries + if str, ok := mopts["dentry_cache_limit"]; ok { + delete(mopts, "dentry_cache_limit") + maxCachedDentries, err = strconv.ParseUint(str, 10, 64) + if err != nil { + ctx.Warningf("sys.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str) + return nil, nil, syserror.EINVAL + } + } + + var wantControllers []kernel.CgroupControllerType + if _, ok := mopts["cpu"]; ok { + delete(mopts, "cpu") + wantControllers = append(wantControllers, controllerCPU) + } + if _, ok := mopts["cpuacct"]; ok { + delete(mopts, "cpuacct") + wantControllers = append(wantControllers, controllerCPUAcct) + } + if _, ok := mopts["cpuset"]; ok { + delete(mopts, "cpuset") + wantControllers = append(wantControllers, controllerCPUSet) + } + if _, ok := mopts["memory"]; ok { + delete(mopts, "memory") + wantControllers = append(wantControllers, controllerMemory) + } + if _, ok := mopts["all"]; ok { + if len(wantControllers) > 0 { + ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: other controllers specified with all: %v", wantControllers) + return nil, nil, syserror.EINVAL + } + + delete(mopts, "all") + wantControllers = allControllers + } + + if len(wantControllers) == 0 { + // Specifying no controllers implies all controllers. + wantControllers = allControllers + } + + if len(mopts) != 0 { + ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: unknown options: %v", mopts) + return nil, nil, syserror.EINVAL + } + + k := kernel.KernelFromContext(ctx) + r := k.CgroupRegistry() + + // "It is not possible to mount the same controller against multiple + // cgroup hierarchies. For example, it is not possible to mount both + // the cpu and cpuacct controllers against one hierarchy, and to mount + // the cpu controller alone against another hierarchy." - man cgroups(7) + // + // Is there a hierarchy available with all the controllers we want? If so, + // this mount is a view into the same hierarchy. + // + // Note: we're guaranteed to have at least one requested controller, since + // no explicit controller name implies all controllers. + if vfsfs := r.FindHierarchy(wantControllers); vfsfs != nil { + fs := vfsfs.Impl().(*filesystem) + ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: mounting new view to hierarchy %v", fs.hierarchyID) + fs.root.IncRef() + return vfsfs, fs.root.VFSDentry(), nil + } + + // No existing hierarchy with the exactly controllers found. Make a new + // one. Note that it's possible this mount creation is unsatisfiable, if one + // or more of the requested controllers are already on existing + // hierarchies. We'll find out about such collisions when we try to register + // the new hierarchy later. + fs := &filesystem{ + devMinor: devMinor, + } + fs.MaxCachedDentries = maxCachedDentries + fs.VFSFilesystem().Init(vfsObj, &fsType, fs) + + var defaults map[string]int64 + if opts.InternalData != nil { + ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: default control values: %v", defaults) + defaults = opts.InternalData.(*InternalData).DefaultControlValues + } + + for _, ty := range wantControllers { + var c controller + switch ty { + case controllerMemory: + c = newMemoryController(fs, defaults) + case controllerCPU: + c = newCPUController(fs, defaults) + case controllerCPUAcct: + c = newCPUAcctController(fs) + case controllerCPUSet: + c = newCPUSetController(fs) + default: + panic(fmt.Sprintf("Unreachable: unknown cgroup controller %q", ty)) + } + fs.controllers = append(fs.controllers, c) + } + + if len(defaults) != 0 { + // Internal data is always provided at sentry startup and unused values + // indicate a problem with the sandbox config. Fail fast. + panic(fmt.Sprintf("cgroupfs.FilesystemType.GetFilesystem: unknown internal mount data: %v", defaults)) + } + + // Controllers usually appear in alphabetical order when displayed. Sort it + // here now, so it never needs to be sorted elsewhere. + sort.Slice(fs.controllers, func(i, j int) bool { return fs.controllers[i].Type() < fs.controllers[j].Type() }) + fs.kcontrollers = make([]kernel.CgroupController, 0, len(fs.controllers)) + for _, c := range fs.controllers { + fs.kcontrollers = append(fs.kcontrollers, c) + } + + root := fs.newCgroupInode(ctx, creds) + var rootD kernfs.Dentry + rootD.InitRoot(&fs.Filesystem, root) + fs.root = &rootD + + // Register controllers. The registry may be modified concurrently, so if we + // get an error, we raced with someone else who registered the same + // controllers first. + hid, err := r.Register(fs.kcontrollers) + if err != nil { + ctx.Infof("cgroupfs.FilesystemType.GetFilesystem: failed to register new hierarchy with controllers %v: %v", wantControllers, err) + rootD.DecRef(ctx) + fs.VFSFilesystem().DecRef(ctx) + return nil, nil, syserror.EBUSY + } + fs.hierarchyID = hid + + // Move all existing tasks to the root of the new hierarchy. + k.PopulateNewCgroupHierarchy(fs.rootCgroup()) + + return fs.VFSFilesystem(), rootD.VFSDentry(), nil +} + +func (fs *filesystem) rootCgroup() kernel.Cgroup { + return kernel.Cgroup{ + Dentry: fs.root, + CgroupImpl: fs.root.Inode().(kernel.CgroupImpl), + } +} + +// Release implements vfs.FilesystemImpl.Release. +func (fs *filesystem) Release(ctx context.Context) { + k := kernel.KernelFromContext(ctx) + r := k.CgroupRegistry() + + if fs.hierarchyID != kernel.InvalidCgroupHierarchyID { + k.ReleaseCgroupHierarchy(fs.hierarchyID) + r.Unregister(fs.hierarchyID) + } + + fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) + fs.Filesystem.Release(ctx) +} + +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + var cnames []string + for _, c := range fs.controllers { + cnames = append(cnames, string(c.Type())) + } + return strings.Join(cnames, ",") +} + +// +stateify savable +type implStatFS struct{} + +// StatFS implements kernfs.Inode.StatFS. +func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) { + return vfs.GenericStatFS(linux.CGROUP_SUPER_MAGIC), nil +} + +// dir implements kernfs.Inode for a generic cgroup resource controller +// directory. Specific controllers extend this to add their own functionality. +// +// +stateify savable +type dir struct { + dirRefs + kernfs.InodeAlwaysValid + kernfs.InodeAttrs + kernfs.InodeNotSymlink + kernfs.InodeDirectoryNoNewChildren // TODO(b/183137098): Implement mkdir. + kernfs.OrderedChildren + implStatFS + + locks vfs.FileLocks +} + +// Keep implements kernfs.Inode.Keep. +func (*dir) Keep() bool { + return true +} + +// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. +func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { + return syserror.EPERM +} + +// Open implements kernfs.Inode.Open. +func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, kd *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), kd, &d.OrderedChildren, &d.locks, &opts, kernfs.GenericDirectoryFDOptions{ + SeekEnd: kernfs.SeekEndStaticEntries, + }) + if err != nil { + return nil, err + } + return fd.VFSFileDescription(), nil +} + +// DecRef implements kernfs.Inode.DecRef. +func (d *dir) DecRef(ctx context.Context) { + d.dirRefs.DecRef(func() { d.Destroy(ctx) }) +} + +// StatFS implements kernfs.Inode.StatFS. +func (d *dir) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) { + return vfs.GenericStatFS(linux.CGROUP_SUPER_MAGIC), nil +} + +// controllerFile represents a generic control file that appears within a cgroup +// directory. +// +// +stateify savable +type controllerFile struct { + kernfs.DynamicBytesFile +} + +func (fs *filesystem) newControllerFile(ctx context.Context, creds *auth.Credentials, data vfs.DynamicBytesSource) kernfs.Inode { + f := &controllerFile{} + f.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), data, readonlyFileMode) + return f +} + +func (fs *filesystem) newControllerWritableFile(ctx context.Context, creds *auth.Credentials, data vfs.WritableDynamicBytesSource) kernfs.Inode { + f := &controllerFile{} + f.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), data, writableFileMode) + return f +} + +// staticControllerFile represents a generic control file that appears within a +// cgroup directory which always returns the same data when read. +// staticControllerFiles are not writable. +// +// +stateify savable +type staticControllerFile struct { + kernfs.DynamicBytesFile + vfs.StaticData +} + +// Note: We let the caller provide the mode so that static files may be used to +// fake both readable and writable control files. However, static files are +// effectively readonly, as attempting to write to them will return EIO +// regardless of the mode. +func (fs *filesystem) newStaticControllerFile(ctx context.Context, creds *auth.Credentials, mode linux.FileMode, data string) kernfs.Inode { + f := &staticControllerFile{StaticData: vfs.StaticData{Data: data}} + f.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), f, mode) + return f +} diff --git a/pkg/sentry/fsimpl/cgroupfs/cpu.go b/pkg/sentry/fsimpl/cgroupfs/cpu.go new file mode 100644 index 000000000..24d86a277 --- /dev/null +++ b/pkg/sentry/fsimpl/cgroupfs/cpu.go @@ -0,0 +1,70 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cgroupfs + +import ( + "fmt" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" +) + +// +stateify savable +type cpuController struct { + controllerCommon + + // CFS bandwidth control parameters, values in microseconds. + cfsPeriod int64 + cfsQuota int64 + + // CPU shares, values should be (num core * 1024). + shares int64 +} + +var _ controller = (*cpuController)(nil) + +func newCPUController(fs *filesystem, defaults map[string]int64) *cpuController { + // Default values for controller parameters from Linux. + c := &cpuController{ + cfsPeriod: 100000, + cfsQuota: -1, + shares: 1024, + } + + if val, ok := defaults["cpu.cfs_period_us"]; ok { + c.cfsPeriod = val + delete(defaults, "cpu.cfs_period_us") + } + if val, ok := defaults["cpu.cfs_quota_us"]; ok { + c.cfsQuota = val + delete(defaults, "cpu.cfs_quota_us") + } + if val, ok := defaults["cpu.shares"]; ok { + c.shares = val + delete(defaults, "cpu.shares") + } + + c.controllerCommon.init(controllerCPU, fs) + return c +} + +// AddControlFiles implements controller.AddControlFiles. +func (c *cpuController) AddControlFiles(ctx context.Context, creds *auth.Credentials, _ *cgroupInode, contents map[string]kernfs.Inode) { + contents["cpu.cfs_period_us"] = c.fs.newStaticControllerFile(ctx, creds, linux.FileMode(0644), fmt.Sprintf("%d\n", c.cfsPeriod)) + contents["cpu.cfs_quota_us"] = c.fs.newStaticControllerFile(ctx, creds, linux.FileMode(0644), fmt.Sprintf("%d\n", c.cfsQuota)) + contents["cpu.shares"] = c.fs.newStaticControllerFile(ctx, creds, linux.FileMode(0644), fmt.Sprintf("%d\n", c.shares)) +} diff --git a/pkg/sentry/fsimpl/cgroupfs/cpuacct.go b/pkg/sentry/fsimpl/cgroupfs/cpuacct.go new file mode 100644 index 000000000..d4104a00e --- /dev/null +++ b/pkg/sentry/fsimpl/cgroupfs/cpuacct.go @@ -0,0 +1,114 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cgroupfs + +import ( + "bytes" + "fmt" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/usage" +) + +// +stateify savable +type cpuacctController struct { + controllerCommon +} + +var _ controller = (*cpuacctController)(nil) + +func newCPUAcctController(fs *filesystem) *cpuacctController { + c := &cpuacctController{} + c.controllerCommon.init(controllerCPUAcct, fs) + return c +} + +// AddControlFiles implements controller.AddControlFiles. +func (c *cpuacctController) AddControlFiles(ctx context.Context, creds *auth.Credentials, cg *cgroupInode, contents map[string]kernfs.Inode) { + cpuacctCG := &cpuacctCgroup{cg} + contents["cpuacct.stat"] = c.fs.newControllerFile(ctx, creds, &cpuacctStatData{cpuacctCG}) + contents["cpuacct.usage"] = c.fs.newControllerFile(ctx, creds, &cpuacctUsageData{cpuacctCG}) + contents["cpuacct.usage_user"] = c.fs.newControllerFile(ctx, creds, &cpuacctUsageUserData{cpuacctCG}) + contents["cpuacct.usage_sys"] = c.fs.newControllerFile(ctx, creds, &cpuacctUsageSysData{cpuacctCG}) +} + +// +stateify savable +type cpuacctCgroup struct { + *cgroupInode +} + +func (c *cpuacctCgroup) collectCPUStats() usage.CPUStats { + var cs usage.CPUStats + c.fs.tasksMu.RLock() + // Note: This isn't very accurate, since the tasks are potentially + // still running as we accumulate their stats. + for t := range c.ts { + cs.Accumulate(t.CPUStats()) + } + c.fs.tasksMu.RUnlock() + return cs +} + +// +stateify savable +type cpuacctStatData struct { + *cpuacctCgroup +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *cpuacctStatData) Generate(ctx context.Context, buf *bytes.Buffer) error { + cs := d.collectCPUStats() + fmt.Fprintf(buf, "user %d\n", linux.ClockTFromDuration(cs.UserTime)) + fmt.Fprintf(buf, "system %d\n", linux.ClockTFromDuration(cs.SysTime)) + return nil +} + +// +stateify savable +type cpuacctUsageData struct { + *cpuacctCgroup +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *cpuacctUsageData) Generate(ctx context.Context, buf *bytes.Buffer) error { + cs := d.collectCPUStats() + fmt.Fprintf(buf, "%d\n", cs.UserTime.Nanoseconds()+cs.SysTime.Nanoseconds()) + return nil +} + +// +stateify savable +type cpuacctUsageUserData struct { + *cpuacctCgroup +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *cpuacctUsageUserData) Generate(ctx context.Context, buf *bytes.Buffer) error { + cs := d.collectCPUStats() + fmt.Fprintf(buf, "%d\n", cs.UserTime.Nanoseconds()) + return nil +} + +// +stateify savable +type cpuacctUsageSysData struct { + *cpuacctCgroup +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *cpuacctUsageSysData) Generate(ctx context.Context, buf *bytes.Buffer) error { + cs := d.collectCPUStats() + fmt.Fprintf(buf, "%d\n", cs.SysTime.Nanoseconds()) + return nil +} diff --git a/pkg/sentry/fsimpl/cgroupfs/cpuset.go b/pkg/sentry/fsimpl/cgroupfs/cpuset.go new file mode 100644 index 000000000..ac547f8e2 --- /dev/null +++ b/pkg/sentry/fsimpl/cgroupfs/cpuset.go @@ -0,0 +1,39 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cgroupfs + +import ( + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" +) + +// +stateify savable +type cpusetController struct { + controllerCommon +} + +var _ controller = (*cpusetController)(nil) + +func newCPUSetController(fs *filesystem) *cpusetController { + c := &cpusetController{} + c.controllerCommon.init(controllerCPUSet, fs) + return c +} + +// AddControlFiles implements controller.AddControlFiles. +func (c *cpusetController) AddControlFiles(ctx context.Context, creds *auth.Credentials, _ *cgroupInode, contents map[string]kernfs.Inode) { + // This controller is currently intentionally empty. +} diff --git a/pkg/sentry/fsimpl/cgroupfs/memory.go b/pkg/sentry/fsimpl/cgroupfs/memory.go new file mode 100644 index 000000000..485c98376 --- /dev/null +++ b/pkg/sentry/fsimpl/cgroupfs/memory.go @@ -0,0 +1,74 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cgroupfs + +import ( + "bytes" + "fmt" + "math" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/usage" +) + +// +stateify savable +type memoryController struct { + controllerCommon + + limitBytes int64 +} + +var _ controller = (*memoryController)(nil) + +func newMemoryController(fs *filesystem, defaults map[string]int64) *memoryController { + c := &memoryController{ + // Linux sets this to (PAGE_COUNTER_MAX * PAGE_SIZE) by default, which + // is ~ 2**63 on a 64-bit system. So essentially, inifinity. The exact + // value isn't very important. + limitBytes: math.MaxInt64, + } + if val, ok := defaults["memory.limit_in_bytes"]; ok { + c.limitBytes = val + delete(defaults, "memory.limit_in_bytes") + } + c.controllerCommon.init(controllerMemory, fs) + return c +} + +// AddControlFiles implements controller.AddControlFiles. +func (c *memoryController) AddControlFiles(ctx context.Context, creds *auth.Credentials, _ *cgroupInode, contents map[string]kernfs.Inode) { + contents["memory.usage_in_bytes"] = c.fs.newControllerFile(ctx, creds, &memoryUsageInBytesData{}) + contents["memory.limit_in_bytes"] = c.fs.newStaticControllerFile(ctx, creds, linux.FileMode(0644), fmt.Sprintf("%d\n", c.limitBytes)) +} + +// +stateify savable +type memoryUsageInBytesData struct{} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *memoryUsageInBytesData) Generate(ctx context.Context, buf *bytes.Buffer) error { + // TODO(b/183151557): This is a giant hack, we're using system-wide + // accounting since we know there is only one cgroup. + k := kernel.KernelFromContext(ctx) + mf := k.MemoryFile() + mf.UpdateUsage() + _, totalBytes := usage.MemoryAccounting.Copy() + + fmt.Fprintf(buf, "%d\n", totalBytes) + return nil +} diff --git a/pkg/sentry/fsimpl/eventfd/BUILD b/pkg/sentry/fsimpl/eventfd/BUILD index bcb01bb08..c09fdc7f9 100644 --- a/pkg/sentry/fsimpl/eventfd/BUILD +++ b/pkg/sentry/fsimpl/eventfd/BUILD @@ -10,6 +10,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/fdnotifier", + "//pkg/hostarch", "//pkg/log", "//pkg/sentry/vfs", "//pkg/syserror", diff --git a/pkg/sentry/fsimpl/eventfd/eventfd.go b/pkg/sentry/fsimpl/eventfd/eventfd.go index 30bd05357..4f79cfcb7 100644 --- a/pkg/sentry/fsimpl/eventfd/eventfd.go +++ b/pkg/sentry/fsimpl/eventfd/eventfd.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" @@ -188,7 +189,7 @@ func (efd *EventFileDescription) read(ctx context.Context, dst usermem.IOSequenc efd.queue.Notify(waiter.WritableEvents) var buf [8]byte - usermem.ByteOrder.PutUint64(buf[:], val) + hostarch.ByteOrder.PutUint64(buf[:], val) _, err := dst.CopyOut(ctx, buf[:]) return err } @@ -196,7 +197,7 @@ func (efd *EventFileDescription) read(ctx context.Context, dst usermem.IOSequenc // Preconditions: Must be called with efd.mu locked. func (efd *EventFileDescription) hostWriteLocked(val uint64) error { var buf [8]byte - usermem.ByteOrder.PutUint64(buf[:], val) + hostarch.ByteOrder.PutUint64(buf[:], val) _, err := unix.Write(efd.hostfd, buf[:]) if err == unix.EWOULDBLOCK { return syserror.ErrWouldBlock @@ -209,7 +210,7 @@ func (efd *EventFileDescription) write(ctx context.Context, src usermem.IOSequen if _, err := src.CopyIn(ctx, buf[:]); err != nil { return err } - val := usermem.ByteOrder.Uint64(buf[:]) + val := hostarch.ByteOrder.Uint64(buf[:]) return efd.Signal(val) } diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD index 155c0f56d..3a4777fbe 100644 --- a/pkg/sentry/fsimpl/fuse/BUILD +++ b/pkg/sentry/fsimpl/fuse/BUILD @@ -46,6 +46,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/log", "//pkg/marshal", "//pkg/refs", @@ -75,6 +76,7 @@ go_test( library = ":fuse", deps = [ "//pkg/abi/linux", + "//pkg/hostarch", "//pkg/marshal", "//pkg/sentry/fsimpl/testutil", "//pkg/sentry/kernel", diff --git a/pkg/sentry/fsimpl/fuse/read_write.go b/pkg/sentry/fsimpl/fuse/read_write.go index 23ce91849..66ea889f9 100644 --- a/pkg/sentry/fsimpl/fuse/read_write.go +++ b/pkg/sentry/fsimpl/fuse/read_write.go @@ -20,11 +20,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // ReadInPages sends FUSE_READ requests for the size after round it up to @@ -43,10 +43,10 @@ func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off ui } // Round up to a multiple of page size. - readSize, _ := usermem.PageRoundUp(uint64(size)) + readSize, _ := hostarch.PageRoundUp(uint64(size)) // One request cannnot exceed either maxRead or maxPages. - maxPages := fs.conn.maxRead >> usermem.PageShift + maxPages := fs.conn.maxRead >> hostarch.PageShift if maxPages > uint32(fs.conn.maxPages) { maxPages = uint32(fs.conn.maxPages) } @@ -54,9 +54,9 @@ func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off ui var outs [][]byte var sizeRead uint32 - // readSize is a multiple of usermem.PageSize. + // readSize is a multiple of hostarch.PageSize. // Always request bytes as a multiple of pages. - pagesRead, pagesToRead := uint32(0), uint32(readSize>>usermem.PageShift) + pagesRead, pagesToRead := uint32(0), uint32(readSize>>hostarch.PageShift) // Reuse the same struct for unmarshalling to avoid unnecessary memory allocation. in := linux.FUSEReadIn{ @@ -76,8 +76,8 @@ func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off ui pagesCanRead = maxPages } - in.Offset = off + (uint64(pagesRead) << usermem.PageShift) - in.Size = pagesCanRead << usermem.PageShift + in.Offset = off + (uint64(pagesRead) << hostarch.PageShift) + in.Size = pagesCanRead << hostarch.PageShift // TODO(gvisor.dev/issue/3247): support async read. @@ -159,7 +159,7 @@ func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, off uint64, } // One request cannnot exceed either maxWrite or maxPages. - maxWrite := uint32(fs.conn.maxPages) << usermem.PageShift + maxWrite := uint32(fs.conn.maxPages) << hostarch.PageShift if maxWrite > fs.conn.maxWrite { maxWrite = fs.conn.maxWrite } @@ -188,8 +188,8 @@ func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, off uint64, // Limit the write size to one page. // Note that the bigWrites flag is obsolete, // latest libfuse always sets it on. - if !fs.conn.bigWrites && toWrite > usermem.PageSize { - toWrite = usermem.PageSize + if !fs.conn.bigWrites && toWrite > hostarch.PageSize { + toWrite = hostarch.PageSize } // Limit the write size to maxWrite. diff --git a/pkg/sentry/fsimpl/fuse/request_response.go b/pkg/sentry/fsimpl/fuse/request_response.go index 10fb9d7d2..8a72489fa 100644 --- a/pkg/sentry/fsimpl/fuse/request_response.go +++ b/pkg/sentry/fsimpl/fuse/request_response.go @@ -19,10 +19,10 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/usermem" ) // fuseInitRes is a variable-length wrapper of linux.FUSEInitOut. The FUSE @@ -45,29 +45,29 @@ func (r *fuseInitRes) UnmarshalBytes(src []byte) { out := &r.initOut // Introduced before FUSE kernel version 7.13. - out.Major = uint32(usermem.ByteOrder.Uint32(src[:4])) + out.Major = uint32(hostarch.ByteOrder.Uint32(src[:4])) src = src[4:] - out.Minor = uint32(usermem.ByteOrder.Uint32(src[:4])) + out.Minor = uint32(hostarch.ByteOrder.Uint32(src[:4])) src = src[4:] - out.MaxReadahead = uint32(usermem.ByteOrder.Uint32(src[:4])) + out.MaxReadahead = uint32(hostarch.ByteOrder.Uint32(src[:4])) src = src[4:] - out.Flags = uint32(usermem.ByteOrder.Uint32(src[:4])) + out.Flags = uint32(hostarch.ByteOrder.Uint32(src[:4])) src = src[4:] - out.MaxBackground = uint16(usermem.ByteOrder.Uint16(src[:2])) + out.MaxBackground = uint16(hostarch.ByteOrder.Uint16(src[:2])) src = src[2:] - out.CongestionThreshold = uint16(usermem.ByteOrder.Uint16(src[:2])) + out.CongestionThreshold = uint16(hostarch.ByteOrder.Uint16(src[:2])) src = src[2:] - out.MaxWrite = uint32(usermem.ByteOrder.Uint32(src[:4])) + out.MaxWrite = uint32(hostarch.ByteOrder.Uint32(src[:4])) src = src[4:] // Introduced in FUSE kernel version 7.23. if len(src) >= 4 { - out.TimeGran = uint32(usermem.ByteOrder.Uint32(src[:4])) + out.TimeGran = uint32(hostarch.ByteOrder.Uint32(src[:4])) src = src[4:] } // Introduced in FUSE kernel version 7.28. if len(src) >= 2 { - out.MaxPages = uint16(usermem.ByteOrder.Uint16(src[:2])) + out.MaxPages = uint16(hostarch.ByteOrder.Uint16(src[:2])) src = src[2:] } _ = src // Remove unused warning. diff --git a/pkg/sentry/fsimpl/fuse/utils_test.go b/pkg/sentry/fsimpl/fuse/utils_test.go index 2c0cc0f4e..b0bab0066 100644 --- a/pkg/sentry/fsimpl/fuse/utils_test.go +++ b/pkg/sentry/fsimpl/fuse/utils_test.go @@ -24,7 +24,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) func setup(t *testing.T) *testutil.System { @@ -82,12 +83,12 @@ func (t *testPayload) SizeBytes() int { // MarshalBytes implements marshal.Marshallable.MarshalBytes. func (t *testPayload) MarshalBytes(dst []byte) { - usermem.ByteOrder.PutUint32(dst[:4], t.data) + hostarch.ByteOrder.PutUint32(dst[:4], t.data) } // UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. func (t *testPayload) UnmarshalBytes(src []byte) { - *t = testPayload{data: usermem.ByteOrder.Uint32(src[:4])} + *t = testPayload{data: hostarch.ByteOrder.Uint32(src[:4])} } // Packed implements marshal.Marshallable.Packed. @@ -106,17 +107,17 @@ func (t *testPayload) UnmarshalUnsafe(src []byte) { } // CopyOutN implements marshal.Marshallable.CopyOutN. -func (t *testPayload) CopyOutN(task marshal.CopyContext, addr usermem.Addr, limit int) (int, error) { +func (t *testPayload) CopyOutN(task marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { panic("not implemented") } // CopyOut implements marshal.Marshallable.CopyOut. -func (t *testPayload) CopyOut(task marshal.CopyContext, addr usermem.Addr) (int, error) { +func (t *testPayload) CopyOut(task marshal.CopyContext, addr hostarch.Addr) (int, error) { panic("not implemented") } // CopyIn implements marshal.Marshallable.CopyIn. -func (t *testPayload) CopyIn(task marshal.CopyContext, addr usermem.Addr) (int, error) { +func (t *testPayload) CopyIn(task marshal.CopyContext, addr hostarch.Addr) (int, error) { panic("not implemented") } diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD index 807b6ed1f..6d5258a9b 100644 --- a/pkg/sentry/fsimpl/gofer/BUILD +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -51,6 +51,7 @@ go_library( "//pkg/fd", "//pkg/fdnotifier", "//pkg/fspath", + "//pkg/hostarch", "//pkg/log", "//pkg/p9", "//pkg/refs", diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go index 9da01cba3..177e42649 100644 --- a/pkg/sentry/fsimpl/gofer/directory.go +++ b/pkg/sentry/fsimpl/gofer/directory.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/refsvfs2" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -28,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) func (d *dentry) isDir() bool { @@ -98,7 +98,7 @@ func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) { mode: uint32(opts.mode), uid: uint32(opts.kuid), gid: uint32(opts.kgid), - blockSize: usermem.PageSize, // arbitrary + blockSize: hostarch.PageSize, // arbitrary atime: now, mtime: now, ctime: now, diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 692da02c1..526136324 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -44,6 +44,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" @@ -60,7 +61,6 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/unet" - "gvisor.dev/gvisor/pkg/usermem" ) // Name is the default filesystem name. @@ -872,7 +872,7 @@ func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, ma mode: uint32(attr.Mode), uid: uint32(fs.opts.dfltuid), gid: uint32(fs.opts.dfltgid), - blockSize: usermem.PageSize, + blockSize: hostarch.PageSize, readFD: -1, writeFD: -1, mmapFD: -1, @@ -1104,24 +1104,27 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs defer d.metadataMu.Unlock() // As with Linux, if the UID, GID, or file size is changing, we have to - // clear permission bits. Note that when set, clearSGID causes - // permissions to be updated, but does not modify stat.Mask, as - // modification would cause an extra inotify flag to be set. - clearSGID := stat.Mask&linux.STATX_UID != 0 && stat.UID != atomic.LoadUint32(&d.uid) || - stat.Mask&linux.STATX_GID != 0 && stat.GID != atomic.LoadUint32(&d.gid) || + // clear permission bits. Note that when set, clearSGID may cause + // permissions to be updated. + clearSGID := (stat.Mask&linux.STATX_UID != 0 && stat.UID != atomic.LoadUint32(&d.uid)) || + (stat.Mask&linux.STATX_GID != 0 && stat.GID != atomic.LoadUint32(&d.gid)) || stat.Mask&linux.STATX_SIZE != 0 if clearSGID { if stat.Mask&linux.STATX_MODE != 0 { stat.Mode = uint16(vfs.ClearSUIDAndSGID(uint32(stat.Mode))) } else { - stat.Mode = uint16(vfs.ClearSUIDAndSGID(atomic.LoadUint32(&d.mode))) + oldMode := atomic.LoadUint32(&d.mode) + if updatedMode := vfs.ClearSUIDAndSGID(oldMode); updatedMode != oldMode { + stat.Mode = uint16(updatedMode) + stat.Mask |= linux.STATX_MODE + } } } if !d.isSynthetic() { if stat.Mask != 0 { if err := d.file.setAttr(ctx, p9.SetAttrMask{ - Permissions: stat.Mask&linux.STATX_MODE != 0 || clearSGID, + Permissions: stat.Mask&linux.STATX_MODE != 0, UID: stat.Mask&linux.STATX_UID != 0, GID: stat.Mask&linux.STATX_GID != 0, Size: stat.Mask&linux.STATX_SIZE != 0, @@ -1156,7 +1159,7 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs return nil } } - if stat.Mask&linux.STATX_MODE != 0 || clearSGID { + if stat.Mask&linux.STATX_MODE != 0 { atomic.StoreUint32(&d.mode, d.fileType()|uint32(stat.Mode)) } if stat.Mask&linux.STATX_UID != 0 { @@ -1217,8 +1220,8 @@ func (d *dentry) updateSizeLocked(newSize uint64) { // so we can't race with Write or another truncate.) d.dataMu.Unlock() if d.size < oldSize { - oldpgend, _ := usermem.PageRoundUp(oldSize) - newpgend, _ := usermem.PageRoundUp(d.size) + oldpgend, _ := hostarch.PageRoundUp(oldSize) + newpgend, _ := hostarch.PageRoundUp(d.size) if oldpgend != newpgend { d.mapsMu.Lock() d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{ diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index 4f1ad0c88..713f0a480 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/safemem" @@ -291,8 +292,8 @@ func (fd *regularFileFD) writeCache(ctx context.Context, d *dentry, offset int64 } // Remove touched pages from the cache. - pgstart := usermem.PageRoundDown(uint64(offset)) - pgend, ok := usermem.PageRoundUp(uint64(offset + src.NumBytes())) + pgstart := hostarch.PageRoundDown(uint64(offset)) + pgend, ok := hostarch.PageRoundUp(uint64(offset + src.NumBytes())) if !ok { return syserror.EINVAL } @@ -408,7 +409,7 @@ func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) switch { case seg.Ok(): // Get internal mappings from the cache. - ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read) + ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Read) if err != nil { dataMuUnlock() rw.d.handleMu.RUnlock() @@ -434,9 +435,9 @@ func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) if fillCache { // Read into the cache, then re-enter the loop to read from the // cache. - gapEnd, _ := usermem.PageRoundUp(gapMR.End) + gapEnd, _ := hostarch.PageRoundUp(gapMR.End) reqMR := memmap.MappableRange{ - Start: usermem.PageRoundDown(gapMR.Start), + Start: hostarch.PageRoundDown(gapMR.Start), End: gapEnd, } optMR := gap.Range() @@ -527,7 +528,7 @@ func (rw *dentryReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, erro case seg.Ok(): // Get internal mappings from the cache. segMR := seg.Range().Intersect(mr) - ims, err := mf.MapInternal(seg.FileRangeOf(segMR), usermem.Write) + ims, err := mf.MapInternal(seg.FileRangeOf(segMR), hostarch.Write) if err != nil { retErr = err goto exitLoop @@ -700,6 +701,7 @@ func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpt } // After this point, d may be used as a memmap.Mappable. d.pf.hostFileMapperInitOnce.Do(d.pf.hostFileMapper.Init) + opts.SentryOwnedContent = d.fs.opts.forcePageCache return vfs.GenericConfigureMMap(&fd.vfsfd, d, opts) } @@ -714,7 +716,7 @@ func (d *dentry) mayCachePages() bool { } // AddMapping implements memmap.Mappable.AddMapping. -func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { +func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { d.mapsMu.Lock() mapped := d.mappings.AddMapping(ms, ar, offset, writable) // Do this unconditionally since whether we have a host FD can change @@ -735,7 +737,7 @@ func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar user } // RemoveMapping implements memmap.Mappable.RemoveMapping. -func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { +func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { d.mapsMu.Lock() unmapped := d.mappings.RemoveMapping(ms, ar, offset, writable) for _, r := range unmapped { @@ -759,12 +761,12 @@ func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar u } // CopyMapping implements memmap.Mappable.CopyMapping. -func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { +func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { return d.AddMapping(ctx, ms, dstAR, offset, writable) } // Translate implements memmap.Mappable.Translate. -func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { +func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { d.handleMu.RLock() if d.mmapFD >= 0 && !d.fs.opts.forcePageCache { d.handleMu.RUnlock() @@ -777,7 +779,7 @@ func (d *dentry) Translate(ctx context.Context, required, optional memmap.Mappab Source: mr, File: &d.pf, Offset: mr.Start, - Perms: usermem.AnyAccess, + Perms: hostarch.AnyAccess, }, }, nil } @@ -786,7 +788,7 @@ func (d *dentry) Translate(ctx context.Context, required, optional memmap.Mappab // Constrain translations to d.size (rounded up) to prevent translation to // pages that may be concurrently truncated. - pgend, _ := usermem.PageRoundUp(d.size) + pgend, _ := hostarch.PageRoundUp(d.size) var beyondEOF bool if required.End > pgend { if required.Start >= pgend { @@ -811,7 +813,7 @@ func (d *dentry) Translate(ctx context.Context, required, optional memmap.Mappab segMR := seg.Range().Intersect(optional) // TODO(jamieliu): Make Translations writable even if writability is // not required if already kept-dirty by another writable translation. - perms := usermem.AccessType{ + perms := hostarch.AccessType{ Read: true, Execute: true, } @@ -954,7 +956,7 @@ func (d *dentryPlatformFile) DecRef(fr memmap.FileRange) { } // MapInternal implements memmap.File.MapInternal. -func (d *dentryPlatformFile) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { +func (d *dentryPlatformFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { d.handleMu.RLock() defer d.handleMu.RUnlock() return d.hostFileMapper.MapInternal(fr, int(d.mmapFD), at.Write) diff --git a/pkg/sentry/fsimpl/gofer/save_restore.go b/pkg/sentry/fsimpl/gofer/save_restore.go index c90071e4e..83e841a51 100644 --- a/pkg/sentry/fsimpl/gofer/save_restore.go +++ b/pkg/sentry/fsimpl/gofer/save_restore.go @@ -22,12 +22,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/refsvfs2" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) type saveRestoreContextID int @@ -85,7 +85,7 @@ func (fs *filesystem) PrepareSave(ctx context.Context) error { func (fd *specialFileFD) savePipeData(ctx context.Context) error { fd.bufMu.Lock() defer fd.bufMu.Unlock() - var buf [usermem.PageSize]byte + var buf [hostarch.PageSize]byte for { n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:])), ^uint64(0)) if n != 0 { diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index 4ae9d6d5e..b94dfeb7f 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -47,6 +47,7 @@ go_library( "//pkg/context", "//pkg/fdnotifier", "//pkg/fspath", + "//pkg/hostarch", "//pkg/iovec", "//pkg/log", "//pkg/marshal/primitive", diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index b9cce4181..3b90375b6 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/hostfd" @@ -431,8 +432,8 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre } oldSize := uint64(hostStat.Size) if s.Size < oldSize { - oldpgend, _ := usermem.PageRoundUp(oldSize) - newpgend, _ := usermem.PageRoundUp(s.Size) + oldpgend, _ := hostarch.PageRoundUp(oldSize) + newpgend, _ := hostarch.PageRoundUp(s.Size) if oldpgend != newpgend { i.CachedMappable.InvalidateRange(memmap.MappableRange{newpgend, oldpgend}) } diff --git a/pkg/sentry/fsimpl/host/save_restore.go b/pkg/sentry/fsimpl/host/save_restore.go index 5688bddc8..31301c715 100644 --- a/pkg/sentry/fsimpl/host/save_restore.go +++ b/pkg/sentry/fsimpl/host/save_restore.go @@ -21,9 +21,9 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/hostfd" - "gvisor.dev/gvisor/pkg/usermem" ) // beforeSave is invoked by stateify. @@ -38,7 +38,7 @@ func (i *inode) beforeSave() { // EBADF from the read. i.bufMu.Lock() defer i.bufMu.Unlock() - var buf [usermem.PageSize]byte + var buf [hostarch.PageSize]byte for { n, err := hostfd.Preadv2(int32(i.hostFD), safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:])), -1 /* offset */, 0 /* flags */) if n != 0 { diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD index 6dbc7e34d..b7d13cced 100644 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ b/pkg/sentry/fsimpl/kernfs/BUILD @@ -105,6 +105,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/fspath", + "//pkg/hostarch", "//pkg/log", "//pkg/refs", "//pkg/refsvfs2", diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go index 65054b0ea..84b1c3745 100644 --- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go +++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go @@ -25,8 +25,10 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -// DynamicBytesFile implements kernfs.Inode and represents a read-only -// file whose contents are backed by a vfs.DynamicBytesSource. +// DynamicBytesFile implements kernfs.Inode and represents a read-only file +// whose contents are backed by a vfs.DynamicBytesSource. If data additionally +// implements vfs.WritableDynamicBytesSource, the file also supports dispatching +// writes to the implementer, but note that this will not update the source data. // // Must be instantiated with NewDynamicBytesFile or initialized with Init // before first use. @@ -40,7 +42,9 @@ type DynamicBytesFile struct { InodeNotSymlink locks vfs.FileLocks - data vfs.DynamicBytesSource + // data can additionally implement vfs.WritableDynamicBytesSource to support + // writes. + data vfs.DynamicBytesSource } var _ Inode = (*DynamicBytesFile)(nil) diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 6b890a39c..3d0866ecf 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -20,12 +20,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // InodeNoopRefCount partially implements the Inode interface, specifically the @@ -206,7 +206,7 @@ func (a *InodeAttrs) Init(ctx context.Context, creds *auth.Credentials, devMajor atomic.StoreUint32(&a.uid, uint32(creds.EffectiveKUID)) atomic.StoreUint32(&a.gid, uint32(creds.EffectiveKGID)) atomic.StoreUint32(&a.nlink, nlink) - atomic.StoreUint32(&a.blockSize, usermem.PageSize) + atomic.StoreUint32(&a.blockSize, hostarch.PageSize) now := ktime.NowFromContext(ctx).Nanoseconds() atomic.StoreInt64(&a.atime, now) atomic.StoreInt64(&a.mtime, now) diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index 565d723f0..16486eeae 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -61,6 +61,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/refsvfs2" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -508,6 +509,15 @@ func (d *Dentry) Inode() Inode { return d.inode } +// FSLocalPath returns an absolute path to d, relative to the root of its +// filesystem. +func (d *Dentry) FSLocalPath() string { + var b fspath.Builder + _ = genericPrependPath(vfs.VirtualDentry{}, nil, d, &b) + b.PrependByte('/') + return b.String() +} + // The Inode interface maps filesystem-level operations that operate on paths to // equivalent operations on specific filesystem nodes. // diff --git a/pkg/sentry/fsimpl/kernfs/mmap_util.go b/pkg/sentry/fsimpl/kernfs/mmap_util.go index bd6a134b4..d1539d904 100644 --- a/pkg/sentry/fsimpl/kernfs/mmap_util.go +++ b/pkg/sentry/fsimpl/kernfs/mmap_util.go @@ -16,11 +16,11 @@ package kernfs import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" ) // inodePlatformFile implements memmap.File. It exists solely because inode @@ -66,7 +66,7 @@ func (i *inodePlatformFile) DecRef(fr memmap.FileRange) { } // MapInternal implements memmap.File.MapInternal. -func (i *inodePlatformFile) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { +func (i *inodePlatformFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { return i.fileMapper.MapInternal(fr, i.hostFD, at.Write) } @@ -100,7 +100,7 @@ func (i *CachedMappable) Init(hostFD int) { } // AddMapping implements memmap.Mappable.AddMapping. -func (i *CachedMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { +func (i *CachedMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { i.mapsMu.Lock() mapped := i.mappings.AddMapping(ms, ar, offset, writable) for _, r := range mapped { @@ -111,7 +111,7 @@ func (i *CachedMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace, } // RemoveMapping implements memmap.Mappable.RemoveMapping. -func (i *CachedMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { +func (i *CachedMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { i.mapsMu.Lock() unmapped := i.mappings.RemoveMapping(ms, ar, offset, writable) for _, r := range unmapped { @@ -121,19 +121,19 @@ func (i *CachedMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpa } // CopyMapping implements memmap.Mappable.CopyMapping. -func (i *CachedMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { +func (i *CachedMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { return i.AddMapping(ctx, ms, dstAR, offset, writable) } // Translate implements memmap.Mappable.Translate. -func (i *CachedMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { +func (i *CachedMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { mr := optional return []memmap.Translation{ { Source: mr, File: &i.pf, Offset: mr.Start, - Perms: usermem.AnyAccess, + Perms: hostarch.AnyAccess, }, }, nil } diff --git a/pkg/sentry/fsimpl/overlay/BUILD b/pkg/sentry/fsimpl/overlay/BUILD index bf13bbbf4..5504476c8 100644 --- a/pkg/sentry/fsimpl/overlay/BUILD +++ b/pkg/sentry/fsimpl/overlay/BUILD @@ -30,6 +30,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/fspath", + "//pkg/hostarch", "//pkg/log", "//pkg/refs", "//pkg/refsvfs2", diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go index 27b00cf6f..45aa5a494 100644 --- a/pkg/sentry/fsimpl/overlay/copy_up.go +++ b/pkg/sentry/fsimpl/overlay/copy_up.go @@ -21,11 +21,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) func (d *dentry) isCopiedUp() bool { @@ -138,8 +138,8 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { // We may have memory mappings of the file on the lower layer. // Switch to mapping the file on the upper layer instead. mmapOpts = &memmap.MMapOpts{ - Perms: usermem.ReadWrite, - MaxPerms: usermem.ReadWrite, + Perms: hostarch.ReadWrite, + MaxPerms: hostarch.ReadWrite, } if err := newFD.ConfigureMMap(ctx, mmapOpts); err != nil { cleanupUndoCopyUp() diff --git a/pkg/sentry/fsimpl/overlay/regular_file.go b/pkg/sentry/fsimpl/overlay/regular_file.go index d791c06db..43bfd69a3 100644 --- a/pkg/sentry/fsimpl/overlay/regular_file.go +++ b/pkg/sentry/fsimpl/overlay/regular_file.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -445,7 +446,7 @@ func (fd *regularFileFD) ensureMappable(ctx context.Context, opts *memmap.MMapOp } // AddMapping implements memmap.Mappable.AddMapping. -func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { +func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { d.mapsMu.Lock() defer d.mapsMu.Unlock() if err := d.wrappedMappable.AddMapping(ctx, ms, ar, offset, writable); err != nil { @@ -458,7 +459,7 @@ func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar user } // RemoveMapping implements memmap.Mappable.RemoveMapping. -func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { +func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { d.mapsMu.Lock() defer d.mapsMu.Unlock() d.wrappedMappable.RemoveMapping(ctx, ms, ar, offset, writable) @@ -468,7 +469,7 @@ func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar u } // CopyMapping implements memmap.Mappable.CopyMapping. -func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { +func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { d.mapsMu.Lock() defer d.mapsMu.Unlock() if err := d.wrappedMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil { @@ -481,7 +482,7 @@ func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, } // Translate implements memmap.Mappable.Translate. -func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { +func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { d.dataMu.RLock() defer d.dataMu.RUnlock() return d.wrappedMappable.Translate(ctx, required, optional, at) diff --git a/pkg/sentry/fsimpl/pipefs/BUILD b/pkg/sentry/fsimpl/pipefs/BUILD index 5950a2d59..278ee3c92 100644 --- a/pkg/sentry/fsimpl/pipefs/BUILD +++ b/pkg/sentry/fsimpl/pipefs/BUILD @@ -10,12 +10,12 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/fspath", + "//pkg/hostarch", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/time", "//pkg/sentry/vfs", "//pkg/syserror", - "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go index 3f05e444e..08aedc2ad 100644 --- a/pkg/sentry/fsimpl/pipefs/pipefs.go +++ b/pkg/sentry/fsimpl/pipefs/pipefs.go @@ -22,13 +22,13 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // +stateify savable @@ -131,7 +131,7 @@ func (i *inode) Stat(_ context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOpti ts := linux.NsecToStatxTimestamp(i.ctime.Nanoseconds()) return linux.Statx{ Mask: linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME | linux.STATX_INO | linux.STATX_SIZE | linux.STATX_BLOCKS, - Blksize: usermem.PageSize, + Blksize: hostarch.PageSize, Nlink: 1, UID: uint32(i.uid), GID: uint32(i.gid), diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index d47a4fff9..2b628bd55 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -81,6 +81,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/log", "//pkg/refs", "//pkg/refsvfs2", diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go index 254a8b062..ce8f55b1f 100644 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ b/pkg/sentry/fsimpl/proc/filesystem.go @@ -86,13 +86,13 @@ func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualF procfs.MaxCachedDentries = maxCachedDentries procfs.VFSFilesystem().Init(vfsObj, &ft, procfs) - var cgroups map[string]string + var fakeCgroupControllers map[string]string if opts.InternalData != nil { data := opts.InternalData.(*InternalData) - cgroups = data.Cgroups + fakeCgroupControllers = data.Cgroups } - inode := procfs.newTasksInode(ctx, k, pidns, cgroups) + inode := procfs.newTasksInode(ctx, k, pidns, fakeCgroupControllers) var dentry kernfs.Dentry dentry.InitRoot(&procfs.Filesystem, inode) return procfs.VFSFilesystem(), dentry.VFSDentry(), nil diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index fea138f93..d05cc1508 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -47,7 +47,7 @@ type taskInode struct { var _ kernfs.Inode = (*taskInode)(nil) -func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, cgroupControllers map[string]string) (kernfs.Inode, error) { +func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, fakeCgroupControllers map[string]string) (kernfs.Inode, error) { if task.ExitState() == kernel.TaskExitDead { return nil, syserror.ESRCH } @@ -82,10 +82,12 @@ func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns "uid_map": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &idMapData{task: task, gids: false}), } if isThreadGroup { - contents["task"] = fs.newSubtasks(ctx, task, pidns, cgroupControllers) + contents["task"] = fs.newSubtasks(ctx, task, pidns, fakeCgroupControllers) } - if len(cgroupControllers) > 0 { - contents["cgroup"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, newCgroupData(cgroupControllers)) + if len(fakeCgroupControllers) > 0 { + contents["cgroup"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, newFakeCgroupData(fakeCgroupControllers)) + } else { + contents["cgroup"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &taskCgroupData{task: task}) } taskInode := &taskInode{task: task} @@ -226,11 +228,14 @@ func newIO(t *kernel.Task, isThreadGroup bool) *ioData { return &ioData{ioUsage: t} } -// newCgroupData creates inode that shows cgroup information. -// From man 7 cgroups: "For each cgroup hierarchy of which the process is a -// member, there is one entry containing three colon-separated fields: -// hierarchy-ID:controller-list:cgroup-path" -func newCgroupData(controllers map[string]string) dynamicInode { +// newFakeCgroupData creates an inode that shows fake cgroup +// information passed in as mount options. From man 7 cgroups: "For +// each cgroup hierarchy of which the process is a member, there is +// one entry containing three colon-separated fields: +// hierarchy-ID:controller-list:cgroup-path" +// +// TODO(b/182488796): Remove once all users adopt cgroupfs. +func newFakeCgroupData(controllers map[string]string) dynamicInode { var buf bytes.Buffer // The hierarchy ids must be positive integers (for cgroup v1), but the diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index fdae163d1..b294dfd6a 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" @@ -122,8 +123,8 @@ func (d *auxvData) Generate(ctx context.Context, buf *bytes.Buffer) error { buf.Grow((len(auxv) + 1) * 16) for _, e := range auxv { var tmp [16]byte - usermem.ByteOrder.PutUint64(tmp[:8], e.Key) - usermem.ByteOrder.PutUint64(tmp[8:], uint64(e.Value)) + hostarch.ByteOrder.PutUint64(tmp[:8], e.Key) + hostarch.ByteOrder.PutUint64(tmp[8:], uint64(e.Value)) buf.Write(tmp[:]) } var atNull [16]byte @@ -168,15 +169,15 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error { defer m.DecUsers(ctx) // Figure out the bounds of the exec arg we are trying to read. - var ar usermem.AddrRange + var ar hostarch.AddrRange switch d.arg { case cmdlineDataArg: - ar = usermem.AddrRange{ + ar = hostarch.AddrRange{ Start: m.ArgvStart(), End: m.ArgvEnd(), } case environDataArg: - ar = usermem.AddrRange{ + ar = hostarch.AddrRange{ Start: m.EnvvStart(), End: m.EnvvEnd(), } @@ -192,7 +193,7 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error { // until Linux 4.9 (272ddc8b3735 "proc: don't use FOLL_FORCE for reading // cmdline and environment"). writer := &bufferWriter{buf: buf} - if n, err := m.CopyInTo(ctx, usermem.AddrRangeSeqOf(ar), writer, usermem.IOOpts{}); n == 0 || err != nil { + if n, err := m.CopyInTo(ctx, hostarch.AddrRangeSeqOf(ar), writer, usermem.IOOpts{}); n == 0 || err != nil { // Nothing to copy or something went wrong. return err } @@ -209,7 +210,7 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error { } // There is no NULL terminator in the string, return into envp. - arEnvv := usermem.AddrRange{ + arEnvv := hostarch.AddrRange{ Start: m.EnvvStart(), End: m.EnvvEnd(), } @@ -218,11 +219,11 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error { // https://elixir.bootlin.com/linux/v4.20/source/fs/proc/base.c#L208 // we'll return one page total between argv and envp because of the // above page restrictions. - if buf.Len() >= usermem.PageSize { + if buf.Len() >= hostarch.PageSize { // Returned at least one page already, nothing else to add. return nil } - remaining := usermem.PageSize - buf.Len() + remaining := hostarch.PageSize - buf.Len() if int(arEnvv.Length()) > remaining { end, ok := arEnvv.Start.AddLength(uint64(remaining)) if !ok { @@ -230,7 +231,7 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error { } arEnvv.End = end } - if _, err := m.CopyInTo(ctx, usermem.AddrRangeSeqOf(arEnvv), writer, usermem.IOOpts{}); err != nil { + if _, err := m.CopyInTo(ctx, hostarch.AddrRangeSeqOf(arEnvv), writer, usermem.IOOpts{}); err != nil { return err } @@ -323,7 +324,7 @@ func (d *idMapData) Write(ctx context.Context, src usermem.IOSequence, offset in // the system page size, and the write must be performed at the start of // the file ..." - user_namespaces(7) srclen := src.NumBytes() - if srclen >= usermem.PageSize || offset != 0 { + if srclen >= hostarch.PageSize || offset != 0 { return 0, syserror.EINVAL } b := make([]byte, srclen) @@ -481,7 +482,7 @@ func (fd *memFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64 defer m.DecUsers(ctx) // Buffer the read data because of MM locks buf := make([]byte, dst.NumBytes()) - n, readErr := m.CopyIn(ctx, usermem.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true}) + n, readErr := m.CopyIn(ctx, hostarch.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true}) if n > 0 { if _, err := dst.CopyOut(ctx, buf[:n]); err != nil { return 0, syserror.EFAULT @@ -613,7 +614,7 @@ func (s *taskStatData) Generate(ctx context.Context, buf *bytes.Buffer) error { rss = mm.ResidentSetSize() } }) - fmt.Fprintf(buf, "%d %d ", vss, rss/usermem.PageSize) + fmt.Fprintf(buf, "%d %d ", vss, rss/hostarch.PageSize) // rsslim. fmt.Fprintf(buf, "%d ", s.task.ThreadGroup().Limits().Get(limits.Rss).Cur) @@ -655,7 +656,7 @@ func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error { } }) - fmt.Fprintf(buf, "%d %d 0 0 0 0 0\n", vss/usermem.PageSize, rss/usermem.PageSize) + fmt.Fprintf(buf, "%d %d 0 0 0 0 0\n", vss/hostarch.PageSize, rss/hostarch.PageSize) return nil } @@ -774,7 +775,7 @@ func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset } // Limit input size so as not to impact performance if input size is large. - src = src.TakeFirst(usermem.PageSize - 1) + src = src.TakeFirst(hostarch.PageSize - 1) var v int32 n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) @@ -1099,3 +1100,32 @@ func (fd *namespaceFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) err func (fd *namespaceFD) Release(ctx context.Context) { fd.inode.DecRef(ctx) } + +// taskCgroupData generates data for /proc/[pid]/cgroup. +// +// +stateify savable +type taskCgroupData struct { + dynamicBytesFileSetAttr + task *kernel.Task +} + +var _ dynamicInode = (*taskCgroupData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *taskCgroupData) Generate(ctx context.Context, buf *bytes.Buffer) error { + // When a task is existing on Linux, a task's cgroup set is cleared and + // reset to the initial cgroup set, which is essentially the set of root + // cgroups. Because of this, the /proc/<pid>/cgroup file is always readable + // on Linux throughout a task's lifetime. + // + // The sentry removes tasks from cgroups during the exit process, but + // doesn't move them into an initial cgroup set, so partway through task + // exit this file show a task is in no cgroups, which is incorrect. Instead, + // once a task has left its cgroups, we return an error. + if d.task.ExitState() >= kernel.TaskExitInitiated { + return syserror.ESRCH + } + + d.task.GenerateProcTaskCgroup(buf) + return nil +} diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go index d4f6a5a9b..177cb828f 100644 --- a/pkg/sentry/fsimpl/proc/task_net.go +++ b/pkg/sentry/fsimpl/proc/task_net.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/inet" @@ -34,7 +35,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip/header" - "gvisor.dev/gvisor/pkg/usermem" ) func (fs *filesystem) newTaskNetDir(ctx context.Context, task *kernel.Task) kernfs.Inode { @@ -295,7 +295,7 @@ func networkToHost16(n uint16) uint16 { // binary.BigEndian.Uint16() require a read of binary.BigEndian and an // interface method call, defeating inlining. buf := [2]byte{byte(n >> 8 & 0xff), byte(n & 0xff)} - return usermem.ByteOrder.Uint16(buf[:]) + return hostarch.ByteOrder.Uint16(buf[:]) } func writeInetAddr(w io.Writer, family int, i linux.SockAddr) { @@ -317,14 +317,14 @@ func writeInetAddr(w io.Writer, family int, i linux.SockAddr) { // __be32 which is a typedef for an unsigned int, and is printed with // %X. This means that for a little-endian machine, Linux prints the // least-significant byte of the address first. To emulate this, we first - // invert the byte order for the address using usermem.ByteOrder.Uint32, + // invert the byte order for the address using hostarch.ByteOrder.Uint32, // which makes it have the equivalent encoding to a __be32 on a little // endian machine. Note that this operation is a no-op on a big endian // machine. Then similar to Linux, we format it with %X, which will print // the most-significant byte of the __be32 address first, which is now // actually the least-significant byte of the original address in // linux.SockAddrInet.Addr on little endian machines, due to the conversion. - addr := usermem.ByteOrder.Uint32(a.Addr[:]) + addr := hostarch.ByteOrder.Uint32(a.Addr[:]) fmt.Fprintf(w, "%08X:%04X ", addr, port) case linux.AF_INET6: @@ -334,10 +334,10 @@ func writeInetAddr(w io.Writer, family int, i linux.SockAddr) { } port := networkToHost16(a.Port) - addr0 := usermem.ByteOrder.Uint32(a.Addr[0:4]) - addr1 := usermem.ByteOrder.Uint32(a.Addr[4:8]) - addr2 := usermem.ByteOrder.Uint32(a.Addr[8:12]) - addr3 := usermem.ByteOrder.Uint32(a.Addr[12:16]) + addr0 := hostarch.ByteOrder.Uint32(a.Addr[0:4]) + addr1 := hostarch.ByteOrder.Uint32(a.Addr[4:8]) + addr2 := hostarch.ByteOrder.Uint32(a.Addr[8:12]) + addr3 := hostarch.ByteOrder.Uint32(a.Addr[12:16]) fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port) } } @@ -739,10 +739,10 @@ func (d *netRouteData) Generate(ctx context.Context, buf *bytes.Buffer) error { ) if len(rt.GatewayAddr) == header.IPv4AddressSize { flags |= linux.RTF_GATEWAY - gw = usermem.ByteOrder.Uint32(rt.GatewayAddr) + gw = hostarch.ByteOrder.Uint32(rt.GatewayAddr) } if len(rt.DstAddr) == header.IPv4AddressSize { - prefix = usermem.ByteOrder.Uint32(rt.DstAddr) + prefix = hostarch.ByteOrder.Uint32(rt.DstAddr) } l := fmt.Sprintf( "%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d", diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go index fdc580610..7c7543f14 100644 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ b/pkg/sentry/fsimpl/proc/tasks.go @@ -54,15 +54,15 @@ type tasksInode struct { // '/proc/self' and '/proc/thread-self' have custom directory offsets in // Linux. So handle them outside of OrderedChildren. - // cgroupControllers is a map of controller name to directory in the + // fakeCgroupControllers is a map of controller name to directory in the // cgroup hierarchy. These controllers are immutable and will be listed // in /proc/pid/cgroup if not nil. - cgroupControllers map[string]string + fakeCgroupControllers map[string]string } var _ kernfs.Inode = (*tasksInode)(nil) -func (fs *filesystem) newTasksInode(ctx context.Context, k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) *tasksInode { +func (fs *filesystem) newTasksInode(ctx context.Context, k *kernel.Kernel, pidns *kernel.PIDNamespace, fakeCgroupControllers map[string]string) *tasksInode { root := auth.NewRootCredentials(pidns.UserNamespace()) contents := map[string]kernfs.Inode{ "cpuinfo": fs.newInode(ctx, root, 0444, newStaticFileSetStat(cpuInfoData(k))), @@ -76,11 +76,16 @@ func (fs *filesystem) newTasksInode(ctx context.Context, k *kernel.Kernel, pidns "uptime": fs.newInode(ctx, root, 0444, &uptimeData{}), "version": fs.newInode(ctx, root, 0444, &versionData{}), } + // If fakeCgroupControllers are provided, don't create a cgroupfs backed + // /proc/cgroup as it will not match the fake controllers. + if len(fakeCgroupControllers) == 0 { + contents["cgroups"] = fs.newInode(ctx, root, 0444, &cgroupsData{}) + } inode := &tasksInode{ - pidns: pidns, - fs: fs, - cgroupControllers: cgroupControllers, + pidns: pidns, + fs: fs, + fakeCgroupControllers: fakeCgroupControllers, } inode.InodeAttrs.Init(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555) inode.InitRefs() @@ -118,7 +123,7 @@ func (i *tasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, err return nil, syserror.ENOENT } - return i.fs.newTaskInode(ctx, task, i.pidns, true, i.cgroupControllers) + return i.fs.newTaskInode(ctx, task, i.pidns, true, i.fakeCgroupControllers) } // IterDirents implements kernfs.inodeDirectory.IterDirents. diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go index 01b7a6678..e1a8b4409 100644 --- a/pkg/sentry/fsimpl/proc/tasks_files.go +++ b/pkg/sentry/fsimpl/proc/tasks_files.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -28,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // +stateify savable @@ -270,7 +270,7 @@ func (*meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { anon := snapshot.Anonymous + snapshot.Tmpfs file := snapshot.PageCache + snapshot.Mapped // We don't actually have active/inactive LRUs, so just make up numbers. - activeFile := (file / 2) &^ (usermem.PageSize - 1) + activeFile := (file / 2) &^ (hostarch.PageSize - 1) inactiveFile := file - activeFile fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024) @@ -384,3 +384,19 @@ func (d *filesystemsData) Generate(ctx context.Context, buf *bytes.Buffer) error k.VFS().GenerateProcFilesystems(buf) return nil } + +// cgroupsData backs /proc/cgroups. +// +// +stateify savable +type cgroupsData struct { + dynamicBytesFileSetAttr +} + +var _ dynamicInode = (*cgroupsData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (*cgroupsData) Generate(ctx context.Context, buf *bytes.Buffer) error { + r := kernel.KernelFromContext(ctx).CgroupRegistry() + r.GenerateProcCgroups(buf) + return nil +} diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index fb274b78e..9b14dd6b9 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -214,7 +215,7 @@ func (d *tcpSackData) Write(ctx context.Context, src usermem.IOSequence, offset } // Limit the amount of memory allocated. - src = src.TakeFirst(usermem.PageSize - 1) + src = src.TakeFirst(hostarch.PageSize - 1) var v int32 n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) @@ -262,7 +263,7 @@ func (d *tcpRecoveryData) Write(ctx context.Context, src usermem.IOSequence, off } // Limit the amount of memory allocated. - src = src.TakeFirst(usermem.PageSize - 1) + src = src.TakeFirst(hostarch.PageSize - 1) var v int32 n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) @@ -318,7 +319,7 @@ func (d *tcpMemData) Write(ctx context.Context, src usermem.IOSequence, offset i defer d.mu.Unlock() // Limit the amount of memory allocated. - src = src.TakeFirst(usermem.PageSize - 1) + src = src.TakeFirst(hostarch.PageSize - 1) size, err := d.readSizeLocked() if err != nil { return 0, err @@ -406,7 +407,7 @@ func (ipf *ipForwarding) Write(ctx context.Context, src usermem.IOSequence, offs } // Limit input size so as not to impact performance if input size is large. - src = src.TakeFirst(usermem.PageSize - 1) + src = src.TakeFirst(hostarch.PageSize - 1) var v int32 n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) @@ -463,7 +464,7 @@ func (pr *portRange) Write(ctx context.Context, src usermem.IOSequence, offset i // Limit input size so as not to impact performance if input size is // large. - src = src.TakeFirst(usermem.PageSize - 1) + src = src.TakeFirst(hostarch.PageSize - 1) ports := make([]int32, 2) n, err := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, ports, src.Opts) diff --git a/pkg/sentry/fsimpl/proc/yama.go b/pkg/sentry/fsimpl/proc/yama.go index aebfe8944..e039ec45e 100644 --- a/pkg/sentry/fsimpl/proc/yama.go +++ b/pkg/sentry/fsimpl/proc/yama.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -62,7 +63,7 @@ func (s *yamaPtraceScope) Write(ctx context.Context, src usermem.IOSequence, off } // Limit the amount of memory allocated. - src = src.TakeFirst(usermem.PageSize - 1) + src = src.TakeFirst(hostarch.PageSize - 1) var v int32 n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) diff --git a/pkg/sentry/fsimpl/testutil/BUILD b/pkg/sentry/fsimpl/testutil/BUILD index 400a97996..b3f9d1010 100644 --- a/pkg/sentry/fsimpl/testutil/BUILD +++ b/pkg/sentry/fsimpl/testutil/BUILD @@ -15,6 +15,7 @@ go_library( "//pkg/context", "//pkg/cpuid", "//pkg/fspath", + "//pkg/hostarch", "//pkg/memutil", "//pkg/sentry/fsbridge", "//pkg/sentry/fsimpl/tmpfs", diff --git a/pkg/sentry/fsimpl/testutil/testutil.go b/pkg/sentry/fsimpl/testutil/testutil.go index 1a8525b06..59e6f9c92 100644 --- a/pkg/sentry/fsimpl/testutil/testutil.go +++ b/pkg/sentry/fsimpl/testutil/testutil.go @@ -30,6 +30,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // System represents the context for a single test. @@ -105,7 +107,7 @@ func (s *System) Destroy() { // ReadToEnd reads the contents of fd until EOF to a string. func (s *System) ReadToEnd(fd *vfs.FileDescription) (string, error) { - buf := make([]byte, usermem.PageSize) + buf := make([]byte, hostarch.PageSize) bufIOSeq := usermem.BytesIOSequence(buf) opts := vfs.ReadOptions{} diff --git a/pkg/sentry/fsimpl/timerfd/BUILD b/pkg/sentry/fsimpl/timerfd/BUILD index fbb02a271..7ce7dc429 100644 --- a/pkg/sentry/fsimpl/timerfd/BUILD +++ b/pkg/sentry/fsimpl/timerfd/BUILD @@ -8,6 +8,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/context", + "//pkg/hostarch", "//pkg/sentry/kernel/time", "//pkg/sentry/vfs", "//pkg/syserror", diff --git a/pkg/sentry/fsimpl/timerfd/timerfd.go b/pkg/sentry/fsimpl/timerfd/timerfd.go index 64d33c3a8..cbb8b67c5 100644 --- a/pkg/sentry/fsimpl/timerfd/timerfd.go +++ b/pkg/sentry/fsimpl/timerfd/timerfd.go @@ -19,6 +19,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" @@ -72,7 +73,7 @@ func (tfd *TimerFileDescription) Read(ctx context.Context, dst usermem.IOSequenc } if val := atomic.SwapUint64(&tfd.val, 0); val != 0 { var buf [sizeofUint64]byte - usermem.ByteOrder.PutUint64(buf[:], val) + hostarch.ByteOrder.PutUint64(buf[:], val) if _, err := dst.CopyOut(ctx, buf[:]); err != nil { // Linux does not undo consuming the number of // expirations even if writing to userspace fails. diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index 09957c2b7..e21fddd7f 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -59,6 +59,7 @@ go_library( "//pkg/amutex", "//pkg/context", "//pkg/fspath", + "//pkg/hostarch", "//pkg/log", "//pkg/refs", "//pkg/refsvfs2", diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index a6d161882..c45bddff6 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -224,7 +225,7 @@ func (rf *regularFile) truncateLocked(newSize uint64) (bool, error) { } // AddMapping implements memmap.Mappable.AddMapping. -func (rf *regularFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { +func (rf *regularFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { rf.mapsMu.Lock() defer rf.mapsMu.Unlock() rf.dataMu.RLock() @@ -240,7 +241,7 @@ func (rf *regularFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, a pagesBefore := rf.writableMappingPages // ar is guaranteed to be page aligned per memmap.Mappable. - rf.writableMappingPages += uint64(ar.Length() / usermem.PageSize) + rf.writableMappingPages += uint64(ar.Length() / hostarch.PageSize) if rf.writableMappingPages < pagesBefore { panic(fmt.Sprintf("Overflow while mapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, rf.writableMappingPages)) @@ -251,7 +252,7 @@ func (rf *regularFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, a } // RemoveMapping implements memmap.Mappable.RemoveMapping. -func (rf *regularFile) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { +func (rf *regularFile) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { rf.mapsMu.Lock() defer rf.mapsMu.Unlock() @@ -261,7 +262,7 @@ func (rf *regularFile) RemoveMapping(ctx context.Context, ms memmap.MappingSpace pagesBefore := rf.writableMappingPages // ar is guaranteed to be page aligned per memmap.Mappable. - rf.writableMappingPages -= uint64(ar.Length() / usermem.PageSize) + rf.writableMappingPages -= uint64(ar.Length() / hostarch.PageSize) if rf.writableMappingPages > pagesBefore { panic(fmt.Sprintf("Underflow while unmapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, rf.writableMappingPages)) @@ -270,12 +271,12 @@ func (rf *regularFile) RemoveMapping(ctx context.Context, ms memmap.MappingSpace } // CopyMapping implements memmap.Mappable.CopyMapping. -func (rf *regularFile) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { +func (rf *regularFile) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { return rf.AddMapping(ctx, ms, dstAR, offset, writable) } // Translate implements memmap.Mappable.Translate. -func (rf *regularFile) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { +func (rf *regularFile) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { rf.dataMu.Lock() defer rf.dataMu.Unlock() @@ -307,7 +308,7 @@ func (rf *regularFile) Translate(ctx context.Context, required, optional memmap. Source: segMR, File: rf.memFile, Offset: seg.FileRangeOf(segMR).Start, - Perms: usermem.AnyAccess, + Perms: hostarch.AnyAccess, }) translatedEnd = segMR.End } @@ -487,6 +488,7 @@ func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) ( // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { file := fd.inode().impl.(*regularFile) + opts.SentryOwnedContent = true return vfs.GenericConfigureMMap(&fd.vfsfd, file, opts) } @@ -539,7 +541,7 @@ func (rw *regularFileReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, er switch { case seg.Ok(): // Get internal mappings. - ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read) + ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Read) if err != nil { return done, err } @@ -608,7 +610,7 @@ func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, // // See Linux, mm/filemap.c:generic_perform_write() and // mm/shmem.c:shmem_write_begin(). - if pgstart := uint64(usermem.Addr(rw.file.size).RoundDown()); end > pgstart { + if pgstart := uint64(hostarch.Addr(rw.file.size).RoundDown()); end > pgstart { end = pgstart } if end <= rw.off { @@ -619,8 +621,8 @@ func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, // Page-aligned mr for when we need to allocate memory. RoundUp can't // overflow since end is an int64. - pgstartaddr := usermem.Addr(rw.off).RoundDown() - pgendaddr, _ := usermem.Addr(end).RoundUp() + pgstartaddr := hostarch.Addr(rw.off).RoundDown() + pgendaddr, _ := hostarch.Addr(end).RoundUp() pgMR := memmap.MappableRange{uint64(pgstartaddr), uint64(pgendaddr)} var ( @@ -633,7 +635,7 @@ func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, switch { case seg.Ok(): // Get internal mappings. - ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Write) + ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Write) if err != nil { retErr = err goto exitLoop diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 8df81f589..9ae25ce9e 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -36,6 +36,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/pgalloc" @@ -43,7 +44,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/vfs/memxattr" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // Name is the default filesystem name. @@ -252,8 +252,8 @@ func (d *dentry) releaseChildrenLocked(ctx context.Context) { // immutable var globalStatfs = linux.Statfs{ Type: linux.TMPFS_MAGIC, - BlockSize: usermem.PageSize, - FragmentSize: usermem.PageSize, + BlockSize: hostarch.PageSize, + FragmentSize: hostarch.PageSize, NameLength: linux.NAME_MAX, // tmpfs currently does not support configurable size limits. In Linux, @@ -263,9 +263,9 @@ var globalStatfs = linux.Statfs{ // chosen to ensure that BlockSize * Blocks does not overflow int64 (which // applications may also handle incorrectly). // TODO(b/29637826): allow configuring a tmpfs size and enforce it. - Blocks: math.MaxInt64 / usermem.PageSize, - BlocksFree: math.MaxInt64 / usermem.PageSize, - BlocksAvailable: math.MaxInt64 / usermem.PageSize, + Blocks: math.MaxInt64 / hostarch.PageSize, + BlocksFree: math.MaxInt64 / hostarch.PageSize, + BlocksAvailable: math.MaxInt64 / hostarch.PageSize, } // dentry implements vfs.DentryImpl. @@ -485,7 +485,7 @@ func (i *inode) statTo(stat *linux.Statx) { linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_SIZE | linux.STATX_BLOCKS | linux.STATX_ATIME | linux.STATX_CTIME | linux.STATX_MTIME - stat.Blksize = usermem.PageSize + stat.Blksize = hostarch.PageSize stat.Nlink = atomic.LoadUint32(&i.nlink) stat.UID = atomic.LoadUint32(&i.uid) stat.GID = atomic.LoadUint32(&i.gid) diff --git a/pkg/sentry/fsimpl/verity/BUILD b/pkg/sentry/fsimpl/verity/BUILD index e265be0ee..d473a922d 100644 --- a/pkg/sentry/fsimpl/verity/BUILD +++ b/pkg/sentry/fsimpl/verity/BUILD @@ -14,13 +14,16 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/fspath", + "//pkg/hostarch", "//pkg/marshal/primitive", "//pkg/merkletree", "//pkg/refsvfs2", + "//pkg/safemem", "//pkg/sentry/arch", "//pkg/sentry/fs/lock", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", "//pkg/sync", diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go index 0d9b0ee2c..6d6e0e77a 100644 --- a/pkg/sentry/fsimpl/verity/verity.go +++ b/pkg/sentry/fsimpl/verity/verity.go @@ -34,6 +34,8 @@ package verity import ( + "bytes" + "encoding/hex" "encoding/json" "fmt" "math" @@ -44,13 +46,16 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/merkletree" "gvisor.dev/gvisor/pkg/refsvfs2" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -101,6 +106,13 @@ var ( verityMu sync.RWMutex ) +// Mount option names for verityfs. +const ( + moptLowerPath = "lower_path" + moptRootHash = "root_hash" + moptRootName = "root_name" +) + // HashAlgorithm is a type specifying the algorithm used to hash the file // content. type HashAlgorithm int @@ -167,6 +179,9 @@ type filesystem struct { // system. alg HashAlgorithm + // opts is the string mount options passed to opts.Data. + opts string + // renameMu synchronizes renaming with non-renaming operations in order // to ensure consistent lock ordering between dentry.dirMu in different // dentries. @@ -189,9 +204,6 @@ type filesystem struct { // // +stateify savable type InternalFilesystemOptions struct { - // RootMerkleFileName is the name of the verity root Merkle tree file. - RootMerkleFileName string - // LowerName is the name of the filesystem wrapped by verity fs. LowerName string @@ -199,9 +211,6 @@ type InternalFilesystemOptions struct { // system. Alg HashAlgorithm - // RootHash is the root hash of the overall verity file system. - RootHash []byte - // AllowRuntimeEnable specifies whether the verity file system allows // enabling verification for files (i.e. building Merkle trees) during // runtime. @@ -235,28 +244,99 @@ func alertIntegrityViolation(msg string) error { // GetFilesystem implements vfs.FilesystemType.GetFilesystem. func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { + mopts := vfs.GenericParseMountOptions(opts.Data) + var rootHash []byte + if encodedRootHash, ok := mopts[moptRootHash]; ok { + delete(mopts, moptRootHash) + hash, err := hex.DecodeString(encodedRootHash) + if err != nil { + ctx.Warningf("verity.FilesystemType.GetFilesystem: Failed to decode root hash: %v", err) + return nil, nil, syserror.EINVAL + } + rootHash = hash + } + var lowerPathname string + if path, ok := mopts[moptLowerPath]; ok { + delete(mopts, moptLowerPath) + lowerPathname = path + } + rootName := "root" + if root, ok := mopts[moptRootName]; ok { + delete(mopts, moptRootName) + rootName = root + } + + // Check for unparsed options. + if len(mopts) != 0 { + ctx.Warningf("verity.FilesystemType.GetFilesystem: unknown options: %v", mopts) + return nil, nil, syserror.EINVAL + } + + // Handle internal options. iopts, ok := opts.InternalData.(InternalFilesystemOptions) - if !ok { + if len(lowerPathname) == 0 && !ok { ctx.Warningf("verity.FilesystemType.GetFilesystem: missing verity configs") return nil, nil, syserror.EINVAL } + if len(lowerPathname) != 0 { + if ok { + ctx.Warningf("verity.FilesystemType.GetFilesystem: unexpected verity configs with specified lower path") + return nil, nil, syserror.EINVAL + } + iopts = InternalFilesystemOptions{ + AllowRuntimeEnable: len(rootHash) == 0, + Action: ErrorOnViolation, + } + } action = iopts.Action - // Mount the lower file system. The lower file system is wrapped inside - // verity, and should not be exposed or connected. - mopts := &vfs.MountOptions{ - GetFilesystemOptions: iopts.LowerGetFSOptions, - InternalMount: true, - } - mnt, err := vfsObj.MountDisconnected(ctx, creds, "", iopts.LowerName, mopts) - if err != nil { - return nil, nil, err + var lowerMount *vfs.Mount + var mountedLowerVD vfs.VirtualDentry + // Use an existing mount if lowerPath is provided. + if len(lowerPathname) != 0 { + vfsroot := vfs.RootFromContext(ctx) + if vfsroot.Ok() { + defer vfsroot.DecRef(ctx) + } + lowerPath := fspath.Parse(lowerPathname) + if !lowerPath.Absolute { + ctx.Infof("verity.FilesystemType.GetFilesystem: lower_path %q must be absolute", lowerPathname) + return nil, nil, syserror.EINVAL + } + var err error + mountedLowerVD, err = vfsObj.GetDentryAt(ctx, creds, &vfs.PathOperation{ + Root: vfsroot, + Start: vfsroot, + Path: lowerPath, + FollowFinalSymlink: true, + }, &vfs.GetDentryOptions{ + CheckSearchable: true, + }) + if err != nil { + ctx.Infof("verity.FilesystemType.GetFilesystem: failed to resolve lower_path %q: %v", lowerPathname, err) + return nil, nil, err + } + lowerMount = mountedLowerVD.Mount() + defer mountedLowerVD.DecRef(ctx) + } else { + // Mount the lower file system. The lower file system is wrapped inside + // verity, and should not be exposed or connected. + mountOpts := &vfs.MountOptions{ + GetFilesystemOptions: iopts.LowerGetFSOptions, + InternalMount: true, + } + mnt, err := vfsObj.MountDisconnected(ctx, creds, "", iopts.LowerName, mountOpts) + if err != nil { + return nil, nil, err + } + lowerMount = mnt } fs := &filesystem{ creds: creds.Fork(), alg: iopts.Alg, - lowerMount: mnt, + lowerMount: lowerMount, + opts: opts.Data, allowRuntimeEnable: iopts.AllowRuntimeEnable, } fs.vfsfs.Init(vfsObj, &fstype, fs) @@ -264,11 +344,11 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // Construct the root dentry. d := fs.newDentry() d.refs = 1 - lowerVD := vfs.MakeVirtualDentry(mnt, mnt.Root()) + lowerVD := vfs.MakeVirtualDentry(lowerMount, lowerMount.Root()) lowerVD.IncRef() d.lowerVD = lowerVD - rootMerkleName := merkleRootPrefix + iopts.RootMerkleFileName + rootMerkleName := merkleRootPrefix + rootName lowerMerkleVD, err := vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{ Root: lowerVD, @@ -348,7 +428,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt d.mode = uint32(stat.Mode) d.uid = stat.UID d.gid = stat.GID - d.hash = make([]byte, len(iopts.RootHash)) + d.hash = make([]byte, len(rootHash)) d.childrenNames = make(map[string]struct{}) if !d.isDir() { @@ -423,7 +503,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt } d.hashMu.Lock() - copy(d.hash, iopts.RootHash) + copy(d.hash, rootHash) d.hashMu.Unlock() d.vfsd.Init(d) @@ -439,7 +519,7 @@ func (fs *filesystem) Release(ctx context.Context) { // MountOptions implements vfs.FilesystemImpl.MountOptions. func (fs *filesystem) MountOptions() string { - return "" + return fs.opts } // dentry implements vfs.DentryImpl. @@ -720,6 +800,10 @@ type fileDescription struct { // underlying file system. lowerFD *vfs.FileDescription + // lowerMappable is the memmap.Mappable corresponding to this file in the + // underlying file system. + lowerMappable memmap.Mappable + // merkleReader is the read-only FileDescription corresponding to the // Merkle tree file in the underlying file system. merkleReader *vfs.FileDescription @@ -1033,7 +1117,7 @@ func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) { } // measureVerity returns the hash of fd, saved in verityDigest. -func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest usermem.Addr) (uintptr, error) { +func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest hostarch.Addr) (uintptr, error) { t := kernel.TaskFromContext(ctx) if t == nil { return 0, syserror.EINVAL @@ -1072,11 +1156,11 @@ func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest userm } // Now copy the root hash bytes to the memory after metadata. - _, err := t.CopyOutBytes(usermem.Addr(uintptr(verityDigest)+linux.SizeOfDigestMetadata), fd.d.hash) + _, err := t.CopyOutBytes(hostarch.Addr(uintptr(verityDigest)+linux.SizeOfDigestMetadata), fd.d.hash) return 0, err } -func (fd *fileDescription) verityFlags(ctx context.Context, flags usermem.Addr) (uintptr, error) { +func (fd *fileDescription) verityFlags(ctx context.Context, flags hostarch.Addr) (uintptr, error) { f := int32(0) fd.d.hashMu.RLock() @@ -1199,6 +1283,24 @@ func (fd *fileDescription) Write(ctx context.Context, src usermem.IOSequence, op return 0, syserror.EROFS } +// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. +func (fd *fileDescription) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { + if err := fd.lowerFD.ConfigureMMap(ctx, opts); err != nil { + return err + } + fd.lowerMappable = opts.Mappable + if opts.MappingIdentity != nil { + opts.MappingIdentity.DecRef(ctx) + opts.MappingIdentity = nil + } + + // Check if mmap is allowed on the lower filesystem. + if !opts.SentryOwnedContent { + return syserror.ENODEV + } + return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts) +} + // LockBSD implements vfs.FileDescriptionImpl.LockBSD. func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error { return fd.lowerFD.LockBSD(ctx, ownerPID, t, block) @@ -1224,6 +1326,115 @@ func (fd *fileDescription) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t return fd.lowerFD.TestPOSIX(ctx, uid, t, r) } +// Translate implements memmap.Mappable.Translate. +func (fd *fileDescription) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { + ts, err := fd.lowerMappable.Translate(ctx, required, optional, at) + if err != nil { + return ts, err + } + + // dataSize is the size of the whole file. + dataSize, err := fd.merkleReader.GetXattr(ctx, &vfs.GetXattrOptions{ + Name: merkleSizeXattr, + Size: sizeOfStringInt32, + }) + + // The Merkle tree file for the child should have been created and + // contains the expected xattrs. If the xattr does not exist, it + // indicates unexpected modifications to the file system. + if err == syserror.ENODATA { + return ts, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", merkleSizeXattr, err)) + } + if err != nil { + return ts, err + } + + // The dataSize xattr should be an integer. If it's not, it indicates + // unexpected modifications to the file system. + size, err := strconv.Atoi(dataSize) + if err != nil { + return ts, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", merkleSizeXattr, err)) + } + + merkleReader := FileReadWriteSeeker{ + FD: fd.merkleReader, + Ctx: ctx, + } + + for _, t := range ts { + // Content integrity relies on sentry owning the backing data. MapInternal is guaranteed + // to fetch sentry owned memory because we disallow verity mmaps otherwise. + ims, err := t.File.MapInternal(memmap.FileRange{t.Offset, t.Offset + t.Source.Length()}, hostarch.Read) + if err != nil { + return nil, err + } + dataReader := mmapReadSeeker{ims, t.Source.Start} + var buf bytes.Buffer + _, err = merkletree.Verify(&merkletree.VerifyParams{ + Out: &buf, + File: &dataReader, + Tree: &merkleReader, + Size: int64(size), + Name: fd.d.name, + Mode: fd.d.mode, + UID: fd.d.uid, + GID: fd.d.gid, + HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(), + ReadOffset: int64(t.Source.Start), + ReadSize: int64(t.Source.Length()), + Expected: fd.d.hash, + DataAndTreeInSameFile: false, + }) + if err != nil { + return ts, alertIntegrityViolation(fmt.Sprintf("Verification failed: %v", err)) + } + } + return ts, err +} + +// AddMapping implements memmap.Mappable.AddMapping. +func (fd *fileDescription) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { + return fd.lowerMappable.AddMapping(ctx, ms, ar, offset, writable) +} + +// RemoveMapping implements memmap.Mappable.RemoveMapping. +func (fd *fileDescription) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { + fd.lowerMappable.RemoveMapping(ctx, ms, ar, offset, writable) +} + +// CopyMapping implements memmap.Mappable.CopyMapping. +func (fd *fileDescription) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { + return fd.lowerMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable) +} + +// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. +func (fd *fileDescription) InvalidateUnsavable(context.Context) error { + return nil +} + +// mmapReadSeeker is a helper struct used by fileDescription.Translate to pass +// a safemem.BlockSeq pointing to the mapped region as io.ReaderAt. +type mmapReadSeeker struct { + safemem.BlockSeq + Offset uint64 +} + +// ReadAt implements io.ReaderAt.ReadAt. off is the offset into the mapped file. +func (r *mmapReadSeeker) ReadAt(p []byte, off int64) (int, error) { + bs := r.BlockSeq + // Adjust the offset into the mapped file to get the offset into the internally + // mapped region. + readOffset := off - int64(r.Offset) + if readOffset < 0 { + return 0, syserror.EINVAL + } + bs.DropFirst64(uint64(readOffset)) + view := bs.TakeFirst64(uint64(len(p))) + dst := safemem.BlockSeqOf(safemem.BlockFromSafeSlice(p)) + n, err := safemem.CopySeq(dst, view) + return int(n), err +} + // FileReadWriteSeeker is a helper struct to pass a vfs.FileDescription as // io.Reader/io.Writer/io.ReadSeeker/io.ReaderAt/io.WriterAt/etc. type FileReadWriteSeeker struct { diff --git a/pkg/sentry/fsimpl/verity/verity_test.go b/pkg/sentry/fsimpl/verity/verity_test.go index 57bd65202..5c78a0019 100644 --- a/pkg/sentry/fsimpl/verity/verity_test.go +++ b/pkg/sentry/fsimpl/verity/verity_test.go @@ -89,10 +89,11 @@ func newVerityRoot(t *testing.T, hashAlg HashAlgorithm) (*vfs.VirtualFilesystem, AllowUserMount: true, }) + data := "root_name=" + rootMerkleFilename mntns, err := vfsObj.NewMountNamespace(ctx, auth.CredentialsFromContext(ctx), "", "verity", &vfs.MountOptions{ GetFilesystemOptions: vfs.GetFilesystemOptions{ + Data: data, InternalData: InternalFilesystemOptions{ - RootMerkleFileName: rootMerkleFilename, LowerName: "tmpfs", Alg: hashAlg, AllowRuntimeEnable: true, diff --git a/pkg/sentry/hostmm/BUILD b/pkg/sentry/hostmm/BUILD index 300b7ccce..66fa1ad40 100644 --- a/pkg/sentry/hostmm/BUILD +++ b/pkg/sentry/hostmm/BUILD @@ -13,8 +13,8 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/fd", + "//pkg/hostarch", "//pkg/log", - "//pkg/usermem", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/hostmm/hostmm.go b/pkg/sentry/hostmm/hostmm.go index c47b96b54..285ea9050 100644 --- a/pkg/sentry/hostmm/hostmm.go +++ b/pkg/sentry/hostmm/hostmm.go @@ -23,8 +23,8 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/usermem" ) // NotifyCurrentMemcgPressureCallback requests that f is called whenever the @@ -88,7 +88,7 @@ func NotifyCurrentMemcgPressureCallback(f func(), level string) (func(), error) if n != sizeofUint64 { panic(fmt.Sprintf("short read from memory pressure level eventfd: got %d bytes, wanted %d", n, sizeofUint64)) } - val := usermem.ByteOrder.Uint64(buf[:]) + val := hostarch.ByteOrder.Uint64(buf[:]) if val >= stopVal { // Assume this was due to the notifier's "destructor" (the // function returned by NotifyCurrentMemcgPressureCallback @@ -103,7 +103,7 @@ func NotifyCurrentMemcgPressureCallback(f func(), level string) (func(), error) return func() { rw := fd.NewReadWriter(eventFD.FD()) var buf [sizeofUint64]byte - usermem.ByteOrder.PutUint64(buf[:], stopVal) + hostarch.ByteOrder.PutUint64(buf[:], stopVal) for { n, err := rw.Write(buf[:]) if err != nil { diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index c53e3e720..a1ec6daab 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -141,6 +141,7 @@ go_library( srcs = [ "abstract_socket_namespace.go", "aio.go", + "cgroup.go", "context.go", "fd_table.go", "fd_table_refs.go", @@ -178,6 +179,7 @@ go_library( "task.go", "task_acct.go", "task_block.go", + "task_cgroup.go", "task_clone.go", "task_context.go", "task_exec.go", @@ -226,6 +228,7 @@ go_library( "//pkg/eventchannel", "//pkg/fspath", "//pkg/goid", + "//pkg/hostarch", "//pkg/log", "//pkg/marshal", "//pkg/marshal/primitive", @@ -240,6 +243,7 @@ go_library( "//pkg/sentry/fs/lock", "//pkg/sentry/fs/timerfd", "//pkg/sentry/fsbridge", + "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/fsimpl/pipefs", "//pkg/sentry/fsimpl/sockfs", "//pkg/sentry/fsimpl/timerfd", @@ -294,6 +298,7 @@ go_test( deps = [ "//pkg/abi", "//pkg/context", + "//pkg/hostarch", "//pkg/sentry/arch", "//pkg/sentry/contexttest", "//pkg/sentry/fs", @@ -305,6 +310,5 @@ go_test( "//pkg/sentry/usage", "//pkg/sync", "//pkg/syserror", - "//pkg/usermem", ], ) diff --git a/pkg/sentry/kernel/cgroup.go b/pkg/sentry/kernel/cgroup.go new file mode 100644 index 000000000..1f1c63f37 --- /dev/null +++ b/pkg/sentry/kernel/cgroup.go @@ -0,0 +1,281 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kernel + +import ( + "bytes" + "fmt" + "sort" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" +) + +// InvalidCgroupHierarchyID indicates an uninitialized hierarchy ID. +const InvalidCgroupHierarchyID uint32 = 0 + +// CgroupControllerType is the name of a cgroup controller. +type CgroupControllerType string + +// CgroupController is the common interface to cgroup controllers available to +// the entire sentry. The controllers themselves are defined by cgroupfs. +// +// Callers of this interface are often unable access synchronization needed to +// ensure returned values remain valid. Some of values returned from this +// interface are thus snapshots in time, and may become stale. This is ok for +// many callers like procfs. +type CgroupController interface { + // Returns the type of this cgroup controller (ex "memory", "cpu"). Returned + // value is valid for the lifetime of the controller. + Type() CgroupControllerType + + // Hierarchy returns the ID of the hierarchy this cgroup controller is + // attached to. Returned value is valid for the lifetime of the controller. + HierarchyID() uint32 + + // Filesystem returns the filesystem this controller is attached to. + // Returned value is valid for the lifetime of the controller. + Filesystem() *vfs.Filesystem + + // RootCgroup returns the root cgroup for this controller. Returned value is + // valid for the lifetime of the controller. + RootCgroup() Cgroup + + // NumCgroups returns the number of cgroups managed by this controller. + // Returned value is a snapshot in time. + NumCgroups() uint64 + + // Enabled returns whether this controller is enabled. Returned value is a + // snapshot in time. + Enabled() bool +} + +// Cgroup represents a named pointer to a cgroup in cgroupfs. When a task enters +// a cgroup, it holds a reference on the underlying dentry pointing to the +// cgroup. +// +// +stateify savable +type Cgroup struct { + *kernfs.Dentry + CgroupImpl +} + +func (c *Cgroup) decRef() { + c.Dentry.DecRef(context.Background()) +} + +// Path returns the absolute path of c, relative to its hierarchy root. +func (c *Cgroup) Path() string { + return c.FSLocalPath() +} + +// HierarchyID returns the id of the hierarchy that contains this cgroup. +func (c *Cgroup) HierarchyID() uint32 { + // Note: a cgroup is guaranteed to have at least one controller. + return c.Controllers()[0].HierarchyID() +} + +// CgroupImpl is the common interface to cgroups. +type CgroupImpl interface { + Controllers() []CgroupController + Enter(t *Task) + Leave(t *Task) +} + +// hierarchy represents a cgroupfs filesystem instance, with a unique set of +// controllers attached to it. Multiple cgroupfs mounts may reference the same +// hierarchy. +// +// +stateify savable +type hierarchy struct { + id uint32 + // These are a subset of the controllers in CgroupRegistry.controllers, + // grouped here by hierarchy for conveninent lookup. + controllers map[CgroupControllerType]CgroupController + // fs is not owned by hierarchy. The FS is responsible for unregistering the + // hierarchy on destruction, which removes this association. + fs *vfs.Filesystem +} + +func (h *hierarchy) match(ctypes []CgroupControllerType) bool { + if len(ctypes) != len(h.controllers) { + return false + } + for _, ty := range ctypes { + if _, ok := h.controllers[ty]; !ok { + return false + } + } + return true +} + +// CgroupRegistry tracks the active set of cgroup controllers on the system. +// +// +stateify savable +type CgroupRegistry struct { + // lastHierarchyID is the id of the last allocated cgroup hierarchy. Valid + // ids are from 1 to math.MaxUint32. Must be accessed through atomic ops. + // + lastHierarchyID uint32 + + mu sync.Mutex `state:"nosave"` + + // controllers is the set of currently known cgroup controllers on the + // system. Protected by mu. + // + // +checklocks:mu + controllers map[CgroupControllerType]CgroupController + + // hierarchies is the active set of cgroup hierarchies. Protected by mu. + // + // +checklocks:mu + hierarchies map[uint32]hierarchy +} + +func newCgroupRegistry() *CgroupRegistry { + return &CgroupRegistry{ + controllers: make(map[CgroupControllerType]CgroupController), + hierarchies: make(map[uint32]hierarchy), + } +} + +// nextHierarchyID returns a newly allocated, unique hierarchy ID. +func (r *CgroupRegistry) nextHierarchyID() (uint32, error) { + if hid := atomic.AddUint32(&r.lastHierarchyID, 1); hid != 0 { + return hid, nil + } + return InvalidCgroupHierarchyID, fmt.Errorf("cgroup hierarchy ID overflow") +} + +// FindHierarchy returns a cgroup filesystem containing exactly the set of +// controllers named in names. If no such FS is found, FindHierarchy return +// nil. FindHierarchy takes a reference on the returned FS, which is transferred +// to the caller. +func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Filesystem { + r.mu.Lock() + defer r.mu.Unlock() + + for _, h := range r.hierarchies { + if h.match(ctypes) { + h.fs.IncRef() + return h.fs + } + } + + return nil +} + +// Register registers the provided set of controllers with the registry as a new +// hierarchy. If any controller is already registered, the function returns an +// error without modifying the registry. The hierarchy can be later referenced +// by the returned id. +func (r *CgroupRegistry) Register(cs []CgroupController) (uint32, error) { + r.mu.Lock() + defer r.mu.Unlock() + + if len(cs) == 0 { + return InvalidCgroupHierarchyID, fmt.Errorf("can't register hierarchy with no controllers") + } + + for _, c := range cs { + if _, ok := r.controllers[c.Type()]; ok { + return InvalidCgroupHierarchyID, fmt.Errorf("controllers may only be mounted on a single hierarchy") + } + } + + hid, err := r.nextHierarchyID() + if err != nil { + return hid, err + } + + h := hierarchy{ + id: hid, + controllers: make(map[CgroupControllerType]CgroupController), + fs: cs[0].Filesystem(), + } + for _, c := range cs { + n := c.Type() + r.controllers[n] = c + h.controllers[n] = c + } + r.hierarchies[hid] = h + return hid, nil +} + +// Unregister removes a previously registered hierarchy from the registry. If +// the controller was not previously registered, Unregister is a no-op. +func (r *CgroupRegistry) Unregister(hid uint32) { + r.mu.Lock() + defer r.mu.Unlock() + + if h, ok := r.hierarchies[hid]; ok { + for name, _ := range h.controllers { + delete(r.controllers, name) + } + delete(r.hierarchies, hid) + } +} + +// computeInitialGroups takes a reference on each of the returned cgroups. The +// caller takes ownership of this returned reference. +func (r *CgroupRegistry) computeInitialGroups(inherit map[Cgroup]struct{}) map[Cgroup]struct{} { + r.mu.Lock() + defer r.mu.Unlock() + + ctlSet := make(map[CgroupControllerType]CgroupController) + cgset := make(map[Cgroup]struct{}) + + // Remember controllers from the inherited cgroups set... + for cg, _ := range inherit { + cg.IncRef() // Ref transferred to caller. + for _, ctl := range cg.Controllers() { + ctlSet[ctl.Type()] = ctl + cgset[cg] = struct{}{} + } + } + + // ... and add the root cgroups of all the missing controllers. + for name, ctl := range r.controllers { + if _, ok := ctlSet[name]; !ok { + cg := ctl.RootCgroup() + cg.IncRef() // Ref transferred to caller. + cgset[cg] = struct{}{} + } + } + return cgset +} + +// GenerateProcCgroups writes the contents of /proc/cgroups to buf. +func (r *CgroupRegistry) GenerateProcCgroups(buf *bytes.Buffer) { + r.mu.Lock() + entries := make([]string, 0, len(r.controllers)) + for _, c := range r.controllers { + en := 0 + if c.Enabled() { + en = 1 + } + entries = append(entries, fmt.Sprintf("%s\t%d\t%d\t%d\n", c.Type(), c.HierarchyID(), c.NumCgroups(), en)) + } + r.mu.Unlock() + + sort.Strings(entries) + fmt.Fprint(buf, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n") + for _, e := range entries { + fmt.Fprint(buf, e) + } +} diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD index 7ecbd29ab..564c3d42e 100644 --- a/pkg/sentry/kernel/eventfd/BUILD +++ b/pkg/sentry/kernel/eventfd/BUILD @@ -10,6 +10,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/fdnotifier", + "//pkg/hostarch", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go index 2aca02fd5..4466fbc9d 100644 --- a/pkg/sentry/kernel/eventfd/eventfd.go +++ b/pkg/sentry/kernel/eventfd/eventfd.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -186,7 +187,7 @@ func (e *EventOperations) read(ctx context.Context, dst usermem.IOSequence) erro e.wq.Notify(waiter.WritableEvents) var buf [8]byte - usermem.ByteOrder.PutUint64(buf[:], val) + hostarch.ByteOrder.PutUint64(buf[:], val) _, err := dst.CopyOut(ctx, buf[:]) return err } @@ -194,7 +195,7 @@ func (e *EventOperations) read(ctx context.Context, dst usermem.IOSequence) erro // Must be called with e.mu locked. func (e *EventOperations) hostWrite(val uint64) error { var buf [8]byte - usermem.ByteOrder.PutUint64(buf[:], val) + hostarch.ByteOrder.PutUint64(buf[:], val) _, err := unix.Write(e.hostfd, buf[:]) if err == unix.EWOULDBLOCK { return syserror.ErrWouldBlock @@ -207,7 +208,7 @@ func (e *EventOperations) write(ctx context.Context, src usermem.IOSequence) err if _, err := src.CopyIn(ctx, buf[:]); err != nil { return err } - val := usermem.ByteOrder.Uint64(buf[:]) + val := hostarch.ByteOrder.Uint64(buf[:]) return e.Signal(val) } diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD index 041e3d4ca..a75686cf3 100644 --- a/pkg/sentry/kernel/futex/BUILD +++ b/pkg/sentry/kernel/futex/BUILD @@ -37,6 +37,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/log", "//pkg/sentry/memmap", "//pkg/sync", @@ -52,8 +53,8 @@ go_test( library = ":futex", deps = [ "//pkg/context", + "//pkg/hostarch", "//pkg/sync", - "//pkg/usermem", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go index e4dcc4d40..0427cf3f4 100644 --- a/pkg/sentry/kernel/futex/futex.go +++ b/pkg/sentry/kernel/futex/futex.go @@ -20,10 +20,10 @@ package futex import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // KeyKind indicates the type of a Key. @@ -83,8 +83,8 @@ func (k *Key) clone() Key { } // Preconditions: k.Kind == KindPrivate or KindSharedPrivate. -func (k *Key) addr() usermem.Addr { - return usermem.Addr(k.Offset) +func (k *Key) addr() hostarch.Addr { + return hostarch.Addr(k.Offset) } // matches returns true if a wakeup on k2 should wake a waiter waiting on k. @@ -97,14 +97,14 @@ func (k *Key) matches(k2 *Key) bool { type Target interface { context.Context - // SwapUint32 gives access to usermem.IO.SwapUint32. - SwapUint32(addr usermem.Addr, new uint32) (uint32, error) + // SwapUint32 gives access to hostarch.IO.SwapUint32. + SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) - // CompareAndSwap gives access to usermem.IO.CompareAndSwapUint32. - CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error) + // CompareAndSwap gives access to hostarch.IO.CompareAndSwapUint32. + CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) - // LoadUint32 gives access to usermem.IO.LoadUint32. - LoadUint32(addr usermem.Addr) (uint32, error) + // LoadUint32 gives access to hostarch.IO.LoadUint32. + LoadUint32(addr hostarch.Addr) (uint32, error) // GetSharedKey returns a Key with kind KindSharedPrivate or // KindSharedMappable corresponding to the memory mapped at address addr. @@ -112,11 +112,11 @@ type Target interface { // If GetSharedKey returns a Key with a non-nil MappingIdentity, a // reference is held on the MappingIdentity, which must be dropped by the // caller when the Key is no longer in use. - GetSharedKey(addr usermem.Addr) (Key, error) + GetSharedKey(addr hostarch.Addr) (Key, error) } // check performs a basic equality check on the given address. -func check(t Target, addr usermem.Addr, val uint32) error { +func check(t Target, addr hostarch.Addr, val uint32) error { cur, err := t.LoadUint32(addr) if err != nil { return err @@ -128,7 +128,7 @@ func check(t Target, addr usermem.Addr, val uint32) error { } // atomicOp performs a complex operation on the given address. -func atomicOp(t Target, addr usermem.Addr, opIn uint32) (bool, error) { +func atomicOp(t Target, addr hostarch.Addr, opIn uint32) (bool, error) { opType := (opIn >> 28) & 0xf cmp := (opIn >> 24) & 0xf opArg := (opIn >> 12) & 0xfff @@ -328,7 +328,7 @@ const ( ) // getKey returns a Key representing address addr in c. -func getKey(t Target, addr usermem.Addr, private bool) (Key, error) { +func getKey(t Target, addr hostarch.Addr, private bool) (Key, error) { // Ensure the address is aligned. // It must be a DWORD boundary. if addr&0x3 != 0 { @@ -341,7 +341,7 @@ func getKey(t Target, addr usermem.Addr, private bool) (Key, error) { } // bucketIndexForAddr returns the index into Manager.buckets for addr. -func bucketIndexForAddr(addr usermem.Addr) uintptr { +func bucketIndexForAddr(addr hostarch.Addr) uintptr { // - The bottom 2 bits of addr must be 0, per getKey. // // - On amd64, the top 16 bits of addr (bits 48-63) must be equal to bit 47 @@ -448,7 +448,7 @@ func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) { // Wake wakes up to n waiters matching the bitmask on the given addr. // The number of waiters woken is returned. -func (m *Manager) Wake(t Target, addr usermem.Addr, private bool, bitmask uint32, n int) (int, error) { +func (m *Manager) Wake(t Target, addr hostarch.Addr, private bool, bitmask uint32, n int) (int, error) { // This function is very hot; avoid defer. k, err := getKey(t, addr, private) if err != nil { @@ -463,7 +463,7 @@ func (m *Manager) Wake(t Target, addr usermem.Addr, private bool, bitmask uint32 return r, nil } -func (m *Manager) doRequeue(t Target, addr, naddr usermem.Addr, private bool, checkval bool, val uint32, nwake int, nreq int) (int, error) { +func (m *Manager) doRequeue(t Target, addr, naddr hostarch.Addr, private bool, checkval bool, val uint32, nwake int, nreq int) (int, error) { k1, err := getKey(t, addr, private) if err != nil { return 0, err @@ -498,14 +498,14 @@ func (m *Manager) doRequeue(t Target, addr, naddr usermem.Addr, private bool, ch // Requeue wakes up to nwake waiters on the given addr, and unconditionally // requeues up to nreq waiters on naddr. -func (m *Manager) Requeue(t Target, addr, naddr usermem.Addr, private bool, nwake int, nreq int) (int, error) { +func (m *Manager) Requeue(t Target, addr, naddr hostarch.Addr, private bool, nwake int, nreq int) (int, error) { return m.doRequeue(t, addr, naddr, private, false, 0, nwake, nreq) } // RequeueCmp atomically checks that the addr contains val (via the Target), // wakes up to nwake waiters on addr and then unconditionally requeues nreq // waiters on naddr. -func (m *Manager) RequeueCmp(t Target, addr, naddr usermem.Addr, private bool, val uint32, nwake int, nreq int) (int, error) { +func (m *Manager) RequeueCmp(t Target, addr, naddr hostarch.Addr, private bool, val uint32, nwake int, nreq int) (int, error) { return m.doRequeue(t, addr, naddr, private, true, val, nwake, nreq) } @@ -513,7 +513,7 @@ func (m *Manager) RequeueCmp(t Target, addr, naddr usermem.Addr, private bool, v // waiters unconditionally from addr1, and, based on the original value at addr2 // and a comparison encoded in op, wakes up to nwake2 waiters from addr2. // It returns the total number of waiters woken. -func (m *Manager) WakeOp(t Target, addr1, addr2 usermem.Addr, private bool, nwake1 int, nwake2 int, op uint32) (int, error) { +func (m *Manager) WakeOp(t Target, addr1, addr2 hostarch.Addr, private bool, nwake1 int, nwake2 int, op uint32) (int, error) { k1, err := getKey(t, addr1, private) if err != nil { return 0, err @@ -553,7 +553,7 @@ func (m *Manager) WakeOp(t Target, addr1, addr2 usermem.Addr, private bool, nwak // enqueues w to be woken by a send to w.C. If WaitPrepare returns nil, the // Waiter must be subsequently removed by calling WaitComplete, whether or not // a wakeup is received on w.C. -func (m *Manager) WaitPrepare(w *Waiter, t Target, addr usermem.Addr, private bool, val uint32, bitmask uint32) error { +func (m *Manager) WaitPrepare(w *Waiter, t Target, addr hostarch.Addr, private bool, val uint32, bitmask uint32) error { k, err := getKey(t, addr, private) if err != nil { return err @@ -631,7 +631,7 @@ func (m *Manager) WaitComplete(w *Waiter, t Target) { // FUTEX_OWNER_DIED is only set by the Linux when robust lists are in use (see // exit_robust_list()). Given we don't support robust lists, although handled // below, it's never set. -func (m *Manager) LockPI(w *Waiter, t Target, addr usermem.Addr, tid uint32, private, try bool) (bool, error) { +func (m *Manager) LockPI(w *Waiter, t Target, addr hostarch.Addr, tid uint32, private, try bool) (bool, error) { k, err := getKey(t, addr, private) if err != nil { return false, err @@ -663,7 +663,7 @@ func (m *Manager) LockPI(w *Waiter, t Target, addr usermem.Addr, tid uint32, pri return success, nil } -func (m *Manager) lockPILocked(w *Waiter, t Target, addr usermem.Addr, tid uint32, b *bucket, try bool) (bool, error) { +func (m *Manager) lockPILocked(w *Waiter, t Target, addr hostarch.Addr, tid uint32, b *bucket, try bool) (bool, error) { for { cur, err := t.LoadUint32(addr) if err != nil { @@ -724,7 +724,7 @@ func (m *Manager) lockPILocked(w *Waiter, t Target, addr usermem.Addr, tid uint3 // The address provided must contain the caller's TID. If there are waiters, // TID of the next waiter (FIFO) is set to the given address, and the waiter // woken up. If there are no waiters, 0 is set to the address. -func (m *Manager) UnlockPI(t Target, addr usermem.Addr, tid uint32, private bool) error { +func (m *Manager) UnlockPI(t Target, addr hostarch.Addr, tid uint32, private bool) error { k, err := getKey(t, addr, private) if err != nil { return err @@ -738,7 +738,7 @@ func (m *Manager) UnlockPI(t Target, addr usermem.Addr, tid uint32, private bool return err } -func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *bucket, key *Key) error { +func (m *Manager) unlockPILocked(t Target, addr hostarch.Addr, tid uint32, b *bucket, key *Key) error { cur, err := t.LoadUint32(addr) if err != nil { return err diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go index ba7f95d8a..deba44e5c 100644 --- a/pkg/sentry/kernel/futex/futex_test.go +++ b/pkg/sentry/kernel/futex/futex_test.go @@ -23,8 +23,8 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" ) // testData implements the Target interface, and allows us to @@ -43,23 +43,23 @@ func newTestData(size uint) testData { } } -func (t testData) SwapUint32(addr usermem.Addr, new uint32) (uint32, error) { +func (t testData) SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) { val := atomic.SwapUint32((*uint32)(unsafe.Pointer(&t.data[addr])), new) return val, nil } -func (t testData) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error) { +func (t testData) CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) { if atomic.CompareAndSwapUint32((*uint32)(unsafe.Pointer(&t.data[addr])), old, new) { return old, nil } return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t.data[addr]))), nil } -func (t testData) LoadUint32(addr usermem.Addr) (uint32, error) { +func (t testData) LoadUint32(addr hostarch.Addr) (uint32, error) { return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t.data[addr]))), nil } -func (t testData) GetSharedKey(addr usermem.Addr) (Key, error) { +func (t testData) GetSharedKey(addr hostarch.Addr) (Key, error) { return Key{ Kind: KindSharedMappable, Offset: uint64(addr), @@ -73,7 +73,7 @@ func futexKind(private bool) string { return "shared" } -func newPreparedTestWaiter(t *testing.T, m *Manager, ta Target, addr usermem.Addr, private bool, val uint32, bitmask uint32) *Waiter { +func newPreparedTestWaiter(t *testing.T, m *Manager, ta Target, addr hostarch.Addr, private bool, val uint32, bitmask uint32) *Waiter { w := NewWaiter() if err := m.WaitPrepare(w, ta, addr, private, val, bitmask); err != nil { t.Fatalf("WaitPrepare failed: %v", err) @@ -463,12 +463,12 @@ const ( // Beyond being used as a Locker, this is a simple mechanism for // changing the underlying values for simpler tests. type testMutex struct { - a usermem.Addr + a hostarch.Addr d testData m *Manager } -func newTestMutex(addr usermem.Addr, d testData, m *Manager) *testMutex { +func newTestMutex(addr hostarch.Addr, d testData, m *Manager) *testMutex { return &testMutex{a: addr, d: d, m: m} } diff --git a/pkg/sentry/kernel/kcov.go b/pkg/sentry/kernel/kcov.go index 4fcdfc541..4b943106b 100644 --- a/pkg/sentry/kernel/kcov.go +++ b/pkg/sentry/kernel/kcov.go @@ -22,13 +22,13 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/coverage" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // kcovAreaSizeMax is the maximum number of uint64 entries allowed in the kcov @@ -130,7 +130,7 @@ func (kcov *Kcov) InitTrace(size uint64) error { // To simplify all the logic around mapping, we require that the length of the // shared region is a multiple of the system page size. - if (8*size)&(usermem.PageSize-1) != 0 { + if (8*size)&(hostarch.PageSize-1) != 0 { return syserror.EINVAL } @@ -286,7 +286,7 @@ func (rw *kcovReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { } // Get internal mappings. - bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, usermem.Read) + bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, hostarch.Read) if err != nil { return 0, err } @@ -314,7 +314,7 @@ func (rw *kcovReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) } // Get internal mapping. - bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, usermem.Write) + bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, hostarch.Write) if err != nil { return 0, err } diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 43065b45a..9a4fd64cb 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -294,6 +294,11 @@ type Kernel struct { // YAMAPtraceScope is the current level of YAMA ptrace restrictions. YAMAPtraceScope int32 + + // cgroupRegistry contains the set of active cgroup controllers on the + // system. It is controller by cgroupfs. Nil if cgroupfs is unavailable on + // the system. + cgroupRegistry *CgroupRegistry } // InitKernelArgs holds arguments to Init. @@ -438,6 +443,8 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.socketMount = socketMount k.socketsVFS2 = make(map[*vfs.FileDescription]*SocketRecord) + + k.cgroupRegistry = newCgroupRegistry() } return nil } @@ -1815,6 +1822,11 @@ func (k *Kernel) SocketMount() *vfs.Mount { return k.socketMount } +// CgroupRegistry returns the cgroup registry. +func (k *Kernel) CgroupRegistry() *CgroupRegistry { + return k.cgroupRegistry +} + // Release releases resources owned by k. // // Precondition: This should only be called after the kernel is fully @@ -1831,3 +1843,43 @@ func (k *Kernel) Release() { k.timekeeper.Destroy() k.vdso.Release(ctx) } + +// PopulateNewCgroupHierarchy moves all tasks into a newly created cgroup +// hierarchy. +// +// Precondition: root must be a new cgroup with no tasks. This implies the +// controllers for root are also new and currently manage no task, which in turn +// implies the new cgroup can be populated without migrating tasks between +// cgroups. +func (k *Kernel) PopulateNewCgroupHierarchy(root Cgroup) { + k.tasks.mu.RLock() + k.tasks.forEachTaskLocked(func(t *Task) { + if t.ExitState() != TaskExitNone { + return + } + t.mu.Lock() + t.enterCgroupLocked(root) + t.mu.Unlock() + }) + k.tasks.mu.RUnlock() +} + +// ReleaseCgroupHierarchy moves all tasks out of all cgroups belonging to the +// hierarchy with the provided id. This is intended for use during hierarchy +// teardown, as otherwise the tasks would be orphaned w.r.t to some controllers. +func (k *Kernel) ReleaseCgroupHierarchy(hid uint32) { + k.tasks.mu.RLock() + k.tasks.forEachTaskLocked(func(t *Task) { + if t.ExitState() != TaskExitNone { + return + } + t.mu.Lock() + for cg, _ := range t.cgroups { + if cg.HierarchyID() == hid { + t.leaveCgroupLocked(cg) + } + } + t.mu.Unlock() + }) + k.tasks.mu.RUnlock() +} diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD index beba6d97d..34c617b08 100644 --- a/pkg/sentry/kernel/pipe/BUILD +++ b/pkg/sentry/kernel/pipe/BUILD @@ -21,6 +21,7 @@ go_library( "//pkg/abi/linux", "//pkg/amutex", "//pkg/context", + "//pkg/hostarch", "//pkg/marshal/primitive", "//pkg/safemem", "//pkg/sentry/arch", diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index d004f2357..06769931a 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -22,18 +22,18 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) const ( // MinimumPipeSize is a hard limit of the minimum size of a pipe. // It corresponds to fs/pipe.c:pipe_min_size. - MinimumPipeSize = usermem.PageSize + MinimumPipeSize = hostarch.PageSize // MaximumPipeSize is a hard limit on the maximum size of a pipe. // It corresponds to fs/pipe.c:pipe_max_size. @@ -41,7 +41,7 @@ const ( // DefaultPipeSize is the system-wide default size of a pipe in bytes. // It corresponds to pipe_fs_i.h:PIPE_DEF_BUFFERS. - DefaultPipeSize = 16 * usermem.PageSize + DefaultPipeSize = 16 * hostarch.PageSize // atomicIOBytes is the maximum number of bytes that the pipe will // guarantee atomic reads or writes atomically. diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index e524afad5..95b948edb 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -17,6 +17,7 @@ package pipe import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -274,7 +275,7 @@ func (fd *VFSPipeFD) SpliceToNonPipe(ctx context.Context, out *vfs.FileDescripti } src := usermem.IOSequence{ IO: fd, - Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}), + Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}), } var ( @@ -302,7 +303,7 @@ func (fd *VFSPipeFD) SpliceToNonPipe(ctx context.Context, out *vfs.FileDescripti func (fd *VFSPipeFD) SpliceFromNonPipe(ctx context.Context, in *vfs.FileDescription, off, count int64) (int64, error) { dst := usermem.IOSequence{ IO: fd, - Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}), + Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}), } var ( @@ -328,7 +329,7 @@ func (fd *VFSPipeFD) SpliceFromNonPipe(ctx context.Context, in *vfs.FileDescript // fd.pipe.Notify(waiter.WritableEvents) after the read is completed. // // Preconditions: fd.pipe.mu must be locked. -func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, opts usermem.IOOpts) (int, error) { +func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) { n, err := fd.pipe.peekLocked(int64(len(dst)), func(srcs safemem.BlockSeq) (uint64, error) { return safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), srcs) }) @@ -340,7 +341,7 @@ func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, // is completed. // // Preconditions: fd.pipe.mu must be locked. -func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, opts usermem.IOOpts) (int, error) { +func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) { n, err := fd.pipe.writeLocked(int64(len(src)), func(dsts safemem.BlockSeq) (uint64, error) { return safemem.CopySeq(dsts, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src))) }) @@ -350,7 +351,7 @@ func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, // ZeroOut implements usermem.IO.ZeroOut. // // Preconditions: fd.pipe.mu must be locked. -func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { +func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { n, err := fd.pipe.writeLocked(toZero, func(dsts safemem.BlockSeq) (uint64, error) { return safemem.ZeroSeq(dsts) }) @@ -362,7 +363,7 @@ func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int6 // fd.pipe.Notify(waiter.WritableEvents) after the read is completed. // // Preconditions: fd.pipe.mu must be locked. -func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) { +func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) { return fd.pipe.peekLocked(ars.NumBytes(), func(srcs safemem.BlockSeq) (uint64, error) { return dst.WriteFromBlocks(srcs) }) @@ -373,25 +374,25 @@ func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst // is completed. // // Preconditions: fd.pipe.mu must be locked. -func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) { +func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) { return fd.pipe.writeLocked(ars.NumBytes(), func(dsts safemem.BlockSeq) (uint64, error) { return src.ReadToBlocks(dsts) }) } // SwapUint32 implements usermem.IO.SwapUint32. -func (fd *VFSPipeFD) SwapUint32(ctx context.Context, addr usermem.Addr, new uint32, opts usermem.IOOpts) (uint32, error) { +func (fd *VFSPipeFD) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts usermem.IOOpts) (uint32, error) { // How did a pipe get passed as the virtual address space to futex(2)? panic("VFSPipeFD.SwapUint32 called unexpectedly") } // CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32. -func (fd *VFSPipeFD) CompareAndSwapUint32(ctx context.Context, addr usermem.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) { +func (fd *VFSPipeFD) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) { panic("VFSPipeFD.CompareAndSwapUint32 called unexpectedly") } // LoadUint32 implements usermem.IO.LoadUint32. -func (fd *VFSPipeFD) LoadUint32(ctx context.Context, addr usermem.Addr, opts usermem.IOOpts) (uint32, error) { +func (fd *VFSPipeFD) LoadUint32(ctx context.Context, addr hostarch.Addr, opts usermem.IOOpts) (uint32, error) { panic("VFSPipeFD.LoadUint32 called unexpectedly") } diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index f5a60e749..57c7659e7 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -19,6 +19,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/mm" @@ -1011,7 +1012,7 @@ func (t *Task) ptraceSetOptionsLocked(opts uintptr) error { } // Ptrace implements the ptrace system call. -func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error { +func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { // PTRACE_TRACEME ignores all other arguments. if req == linux.PTRACE_TRACEME { return t.ptraceTraceme() @@ -1190,7 +1191,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error { panic(fmt.Sprintf("%#x + %#x overflows. Invalid reg size > %#x", ar.Start, n, ar.Length())) } ar.End = end - return t.CopyOutIovecs(data, usermem.AddrRangeSeqOf(ar)) + return t.CopyOutIovecs(data, hostarch.AddrRangeSeqOf(ar)) case linux.PTRACE_SETREGSET: ars, err := t.CopyInIovecs(data, 1) @@ -1214,8 +1215,8 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error { return err } t.p.FullStateChanged() - ar.End -= usermem.Addr(n) - return t.CopyOutIovecs(data, usermem.AddrRangeSeqOf(ar)) + ar.End -= hostarch.Addr(n) + return t.CopyOutIovecs(data, hostarch.AddrRangeSeqOf(ar)) case linux.PTRACE_GETSIGINFO: t.tg.pidns.owner.mu.RLock() @@ -1267,7 +1268,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error { case linux.PTRACE_GETEVENTMSG: t.tg.pidns.owner.mu.RLock() defer t.tg.pidns.owner.mu.RUnlock() - _, err := primitive.CopyUint64Out(t, usermem.Addr(data), target.ptraceEventMsg) + _, err := primitive.CopyUint64Out(t, hostarch.Addr(data), target.ptraceEventMsg) return err // PEEKSIGINFO is unimplemented but seems to have no users anywhere. diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go index 7aea3dcd8..5ae05b5c3 100644 --- a/pkg/sentry/kernel/ptrace_amd64.go +++ b/pkg/sentry/kernel/ptrace_amd64.go @@ -18,12 +18,13 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) // ptraceArch implements arch-specific ptrace commands. -func (t *Task) ptraceArch(target *Task, req int64, addr, data usermem.Addr) error { +func (t *Task) ptraceArch(target *Task, req int64, addr, data hostarch.Addr) error { switch req { case linux.PTRACE_PEEKUSR: // aka PTRACE_PEEKUSER n, err := target.Arch().PtracePeekUser(uintptr(addr)) diff --git a/pkg/sentry/kernel/ptrace_arm64.go b/pkg/sentry/kernel/ptrace_arm64.go index d971b96b3..46dd84cbc 100644 --- a/pkg/sentry/kernel/ptrace_arm64.go +++ b/pkg/sentry/kernel/ptrace_arm64.go @@ -17,11 +17,11 @@ package kernel import ( + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // ptraceArch implements arch-specific ptrace commands. -func (t *Task) ptraceArch(target *Task, req int64, addr, data usermem.Addr) error { +func (t *Task) ptraceArch(target *Task, req int64, addr, data hostarch.Addr) error { return syserror.EIO } diff --git a/pkg/sentry/kernel/rseq.go b/pkg/sentry/kernel/rseq.go index 2a9023fdf..4bc5bca44 100644 --- a/pkg/sentry/kernel/rseq.go +++ b/pkg/sentry/kernel/rseq.go @@ -18,6 +18,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/hostcpu" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -43,8 +44,8 @@ type OldRSeqCriticalRegion struct { // application handler while its instruction pointer is in CriticalSection, // set the instruction pointer to Restart and application register r10 (on // amd64) to the former instruction pointer. - CriticalSection usermem.AddrRange - Restart usermem.Addr + CriticalSection hostarch.AddrRange + Restart hostarch.Addr } // RSeqAvailable returns true if t supports (old and new) restartable sequences. @@ -55,7 +56,7 @@ func (t *Task) RSeqAvailable() bool { // SetRSeq registers addr as this thread's rseq structure. // // Preconditions: The caller must be running on the task goroutine. -func (t *Task) SetRSeq(addr usermem.Addr, length, signature uint32) error { +func (t *Task) SetRSeq(addr hostarch.Addr, length, signature uint32) error { if t.rseqAddr != 0 { if t.rseqAddr != addr { return syserror.EINVAL @@ -100,7 +101,7 @@ func (t *Task) SetRSeq(addr usermem.Addr, length, signature uint32) error { // ClearRSeq unregisters addr as this thread's rseq structure. // // Preconditions: The caller must be running on the task goroutine. -func (t *Task) ClearRSeq(addr usermem.Addr, length, signature uint32) error { +func (t *Task) ClearRSeq(addr hostarch.Addr, length, signature uint32) error { if t.rseqAddr == 0 { return syserror.EINVAL } @@ -166,7 +167,7 @@ func (t *Task) SetOldRSeqCriticalRegion(r OldRSeqCriticalRegion) error { // CPU number. // // Preconditions: The caller must be running on the task goroutine. -func (t *Task) OldRSeqCPUAddr() usermem.Addr { +func (t *Task) OldRSeqCPUAddr() hostarch.Addr { return t.oldRSeqCPUAddr } @@ -177,7 +178,7 @@ func (t *Task) OldRSeqCPUAddr() usermem.Addr { // * t.RSeqAvailable() == true. // * The caller must be running on the task goroutine. // * t's AddressSpace must be active. -func (t *Task) SetOldRSeqCPUAddr(addr usermem.Addr) error { +func (t *Task) SetOldRSeqCPUAddr(addr hostarch.Addr) error { t.oldRSeqCPUAddr = addr // Check that addr is writable. @@ -221,7 +222,7 @@ func (t *Task) oldRSeqCopyOutCPU() error { } buf := t.CopyScratchBuffer(4) - usermem.ByteOrder.PutUint32(buf, uint32(t.rseqCPU)) + hostarch.ByteOrder.PutUint32(buf, uint32(t.rseqCPU)) _, err := t.CopyOutBytes(t.oldRSeqCPUAddr, buf) return err } @@ -236,8 +237,8 @@ func (t *Task) rseqCopyOutCPU() error { buf := t.CopyScratchBuffer(8) // CPUIDStart and CPUID are the first two fields in linux.RSeq. - usermem.ByteOrder.PutUint32(buf, uint32(t.rseqCPU)) // CPUIDStart - usermem.ByteOrder.PutUint32(buf[4:], uint32(t.rseqCPU)) // CPUID + hostarch.ByteOrder.PutUint32(buf, uint32(t.rseqCPU)) // CPUIDStart + hostarch.ByteOrder.PutUint32(buf[4:], uint32(t.rseqCPU)) // CPUID // N.B. This write is not atomic, but since this occurs on the task // goroutine then as long as userspace uses a single-instruction read // it can't see an invalid value. @@ -251,8 +252,8 @@ func (t *Task) rseqCopyOutCPU() error { func (t *Task) rseqClearCPU() error { buf := t.CopyScratchBuffer(8) // CPUIDStart and CPUID are the first two fields in linux.RSeq. - usermem.ByteOrder.PutUint32(buf, 0) // CPUIDStart - usermem.ByteOrder.PutUint32(buf[4:], linux.RSEQ_CPU_ID_UNINITIALIZED) // CPUID + hostarch.ByteOrder.PutUint32(buf, 0) // CPUIDStart + hostarch.ByteOrder.PutUint32(buf[4:], linux.RSEQ_CPU_ID_UNINITIALIZED) // CPUID // N.B. This write is not atomic, but since this occurs on the task // goroutine then as long as userspace uses a single-instruction read // it can't see an invalid value. @@ -305,7 +306,7 @@ func (t *Task) rseqAddrInterrupt() { return } - critAddr := usermem.Addr(usermem.ByteOrder.Uint64(buf)) + critAddr := hostarch.Addr(hostarch.ByteOrder.Uint64(buf)) if critAddr == 0 { return } @@ -325,7 +326,7 @@ func (t *Task) rseqAddrInterrupt() { return } - start := usermem.Addr(cs.Start) + start := hostarch.Addr(cs.Start) critRange, ok := start.ToRange(cs.PostCommitOffset) if !ok { t.Debugf("Invalid start and offset in %+v", cs) @@ -334,7 +335,7 @@ func (t *Task) rseqAddrInterrupt() { return } - abort := usermem.Addr(cs.Abort) + abort := hostarch.Addr(cs.Abort) if critRange.Contains(abort) { t.Debugf("Abort in critical section in %+v", cs) t.forceSignal(linux.SIGSEGV, false /* unconditional */) @@ -353,7 +354,7 @@ func (t *Task) rseqAddrInterrupt() { return } - sig := usermem.ByteOrder.Uint32(buf) + sig := hostarch.ByteOrder.Uint32(buf) if sig != t.rseqSignature { t.Debugf("Mismatched rseq signature %d != %d", sig, t.rseqSignature) t.forceSignal(linux.SIGSEGV, false /* unconditional */) @@ -376,7 +377,7 @@ func (t *Task) rseqAddrInterrupt() { } // Finally we can actually decide whether or not to restart. - if !critRange.Contains(usermem.Addr(t.Arch().IP())) { + if !critRange.Contains(hostarch.Addr(t.Arch().IP())) { return } @@ -386,7 +387,7 @@ func (t *Task) rseqAddrInterrupt() { // Preconditions: The caller must be running on the task goroutine. func (t *Task) oldRSeqInterrupt() { r := t.tg.oldRSeqCritical.Load().(*OldRSeqCriticalRegion) - if ip := t.Arch().IP(); r.CriticalSection.Contains(usermem.Addr(ip)) { + if ip := t.Arch().IP(); r.CriticalSection.Contains(hostarch.Addr(ip)) { t.Debugf("Interrupted rseq critical section at %#x; restarting at %#x", ip, r.Restart) t.Arch().SetIP(uintptr(r.Restart)) t.Arch().SetOldRSeqInterruptedIP(ip) diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go index 8163a6132..a95e174a2 100644 --- a/pkg/sentry/kernel/seccomp.go +++ b/pkg/sentry/kernel/seccomp.go @@ -18,9 +18,9 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) const maxSyscallFilterInstructions = 1 << 15 @@ -35,11 +35,11 @@ func dataAsBPFInput(t *Task, d *linux.SeccompData) bpf.Input { return bpf.InputBytes{ Data: buf, // Go-marshal always uses the native byte order. - Order: usermem.ByteOrder, + Order: hostarch.ByteOrder, } } -func seccompSiginfo(t *Task, errno, sysno int32, ip usermem.Addr) *arch.SignalInfo { +func seccompSiginfo(t *Task, errno, sysno int32, ip hostarch.Addr) *arch.SignalInfo { si := &arch.SignalInfo{ Signo: int32(linux.SIGSYS), Errno: errno, @@ -56,7 +56,7 @@ func seccompSiginfo(t *Task, errno, sysno int32, ip usermem.Addr) *arch.SignalIn // in because vsyscalls do not use the values in t.Arch().) // // Preconditions: The caller must be running on the task goroutine. -func (t *Task) checkSeccompSyscall(sysno int32, args arch.SyscallArguments, ip usermem.Addr) linux.BPFAction { +func (t *Task) checkSeccompSyscall(sysno int32, args arch.SyscallArguments, ip hostarch.Addr) linux.BPFAction { result := linux.BPFAction(t.evaluateSyscallFilters(sysno, args, ip)) action := result & linux.SECCOMP_RET_ACTION switch action { @@ -102,7 +102,7 @@ func (t *Task) checkSeccompSyscall(sysno int32, args arch.SyscallArguments, ip u return action } -func (t *Task) evaluateSyscallFilters(sysno int32, args arch.SyscallArguments, ip usermem.Addr) uint32 { +func (t *Task) evaluateSyscallFilters(sysno int32, args arch.SyscallArguments, ip hostarch.Addr) uint32 { data := linux.SeccompData{ Nr: sysno, Arch: t.image.st.AuditNumber, diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD index 073e14507..1c3c0794f 100644 --- a/pkg/sentry/kernel/shm/BUILD +++ b/pkg/sentry/kernel/shm/BUILD @@ -28,6 +28,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/log", "//pkg/refs", "//pkg/refsvfs2", diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index 92d60ba78..a73f1bdca 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -38,6 +38,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -47,7 +48,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // Key represents a shm segment key. Analogous to a file name. @@ -197,13 +197,13 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui } var sizeAligned uint64 - if val, ok := usermem.Addr(size).RoundUp(); ok { + if val, ok := hostarch.Addr(size).RoundUp(); ok { sizeAligned = uint64(val) } else { return nil, syserror.EINVAL } - if numPages := sizeAligned / usermem.PageSize; r.totalPages+numPages > linux.SHMALL { + if numPages := sizeAligned / hostarch.PageSize; r.totalPages+numPages > linux.SHMALL { // "... allocating a segment of the requested size would cause the // system to exceed the system-wide limit on shared memory (SHMALL)." // - man shmget(2) @@ -232,7 +232,7 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFileProvider)) } - effectiveSize := uint64(usermem.Addr(size).MustRoundUp()) + effectiveSize := uint64(hostarch.Addr(size).MustRoundUp()) fr, err := mfp.MemoryFile().Allocate(effectiveSize, usage.Anonymous) if err != nil { return nil, err @@ -267,7 +267,7 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi r.shms[id] = shm r.keysToShms[key] = shm - r.totalPages += effectiveSize / usermem.PageSize + r.totalPages += effectiveSize / hostarch.PageSize return shm, nil } @@ -318,7 +318,7 @@ func (r *Registry) remove(s *Shm) { } delete(r.shms, s.ID) - r.totalPages -= s.effectiveSize / usermem.PageSize + r.totalPages -= s.effectiveSize / hostarch.PageSize } // Release drops the self-reference of each active shm segment in the registry. @@ -386,7 +386,7 @@ type Shm struct { // effectiveSize of the segment, rounding up to the next page // boundary. Immutable. // - // Invariant: effectiveSize must be a multiple of usermem.PageSize. + // Invariant: effectiveSize must be a multiple of hostarch.PageSize. effectiveSize uint64 // fr is the offset into mfp.MemoryFile() that backs this contents of this @@ -467,7 +467,7 @@ func (s *Shm) Msync(context.Context, memmap.MappableRange) error { } // AddMapping implements memmap.Mappable.AddMapping. -func (s *Shm) AddMapping(ctx context.Context, _ memmap.MappingSpace, _ usermem.AddrRange, _ uint64, _ bool) error { +func (s *Shm) AddMapping(ctx context.Context, _ memmap.MappingSpace, _ hostarch.AddrRange, _ uint64, _ bool) error { s.mu.Lock() defer s.mu.Unlock() s.attachTime = ktime.NowFromContext(ctx) @@ -482,7 +482,7 @@ func (s *Shm) AddMapping(ctx context.Context, _ memmap.MappingSpace, _ usermem.A } // RemoveMapping implements memmap.Mappable.RemoveMapping. -func (s *Shm) RemoveMapping(ctx context.Context, _ memmap.MappingSpace, _ usermem.AddrRange, _ uint64, _ bool) { +func (s *Shm) RemoveMapping(ctx context.Context, _ memmap.MappingSpace, _ hostarch.AddrRange, _ uint64, _ bool) { s.mu.Lock() defer s.mu.Unlock() // RemoveMapping may be called during task exit, when ctx @@ -503,12 +503,12 @@ func (s *Shm) RemoveMapping(ctx context.Context, _ memmap.MappingSpace, _ userme } // CopyMapping implements memmap.Mappable.CopyMapping. -func (*Shm) CopyMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, usermem.AddrRange, uint64, bool) error { +func (*Shm) CopyMapping(context.Context, memmap.MappingSpace, hostarch.AddrRange, hostarch.AddrRange, uint64, bool) error { return nil } // Translate implements memmap.Mappable.Translate. -func (s *Shm) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { +func (s *Shm) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { var err error if required.End > s.fr.Length() { err = &memmap.BusError{syserror.EFAULT} @@ -519,7 +519,7 @@ func (s *Shm) Translate(ctx context.Context, required, optional memmap.MappableR Source: source, File: s.mfp.MemoryFile(), Offset: s.fr.Start + source.Start, - Perms: usermem.AnyAccess, + Perms: hostarch.AnyAccess, }, }, err } @@ -543,7 +543,7 @@ type AttachOpts struct { // // Postconditions: The returned MMapOpts are valid only as long as a reference // continues to be held on s. -func (s *Shm) ConfigureAttach(ctx context.Context, addr usermem.Addr, opts AttachOpts) (memmap.MMapOpts, error) { +func (s *Shm) ConfigureAttach(ctx context.Context, addr hostarch.Addr, opts AttachOpts) (memmap.MMapOpts, error) { s.mu.Lock() defer s.mu.Unlock() if s.pendingDestruction && s.ReadRefs() == 0 { @@ -565,12 +565,12 @@ func (s *Shm) ConfigureAttach(ctx context.Context, addr usermem.Addr, opts Attac Offset: 0, Addr: addr, Fixed: opts.Remap, - Perms: usermem.AccessType{ + Perms: hostarch.AccessType{ Read: true, Write: !opts.Readonly, Execute: opts.Execute, }, - MaxPerms: usermem.AnyAccess, + MaxPerms: hostarch.AnyAccess, Mappable: s, MappingIdentity: s, }, nil diff --git a/pkg/sentry/kernel/syscalls.go b/pkg/sentry/kernel/syscalls.go index 332bdb8e8..953d4310e 100644 --- a/pkg/sentry/kernel/syscalls.go +++ b/pkg/sentry/kernel/syscalls.go @@ -20,9 +20,9 @@ import ( "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/bits" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" ) // maxSyscallNum is the highest supported syscall number. @@ -243,7 +243,7 @@ type SyscallTable struct { // Emulate is a collection of instruction addresses to emulate. The // keys are addresses, and the values are system call numbers. - Emulate map[usermem.Addr]uintptr + Emulate map[hostarch.Addr]uintptr // The function to call in case of a missing system call. Missing MissingFn @@ -316,7 +316,7 @@ func (s *SyscallTable) Init() { } if s.Emulate == nil { // Ensure non-nil emulate table. - s.Emulate = make(map[usermem.Addr]uintptr) + s.Emulate = make(map[hostarch.Addr]uintptr) } max := s.MaxSysno() // Checked during RegisterSyscallTable. @@ -359,7 +359,7 @@ func (s *SyscallTable) LookupNo(name string) (uintptr, error) { } // LookupEmulate looks up an emulation syscall number. -func (s *SyscallTable) LookupEmulate(addr usermem.Addr) (uintptr, bool) { +func (s *SyscallTable) LookupEmulate(addr hostarch.Addr) (uintptr, bool) { sysno, ok := s.Emulate[addr] return sysno, ok } diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 36141dd09..be1371855 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/inet" @@ -33,7 +34,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -470,7 +470,7 @@ type Task struct { // ThreadID to 0, and wake any futex waiters. // // cleartid is exclusive to the task goroutine. - cleartid usermem.Addr + cleartid hostarch.Addr // This is mostly a fake cpumask just for sched_set/getaffinity as we // don't really control the affinity. @@ -540,12 +540,12 @@ type Task struct { // oldRSeqCPUAddr is a pointer to the userspace old rseq CPU variable. // // oldRSeqCPUAddr is exclusive to the task goroutine. - oldRSeqCPUAddr usermem.Addr + oldRSeqCPUAddr hostarch.Addr // rseqAddr is a pointer to the userspace linux.RSeq structure. // // rseqAddr is exclusive to the task goroutine. - rseqAddr usermem.Addr + rseqAddr hostarch.Addr // rseqSignature is the signature that the rseq abort IP must be signed // with. @@ -575,7 +575,7 @@ type Task struct { // robustList is a pointer to the head of the tasks's robust futex // list. - robustList usermem.Addr + robustList hostarch.Addr // startTime is the real time at which the task started. It is set when // a Task is created or invokes execve(2). @@ -587,6 +587,12 @@ type Task struct { // // kcov is exclusive to the task goroutine. kcov *Kcov + + // cgroups is the set of cgroups this task belongs to. This may be empty if + // no cgroup controllers are enabled. Protected by mu. + // + // +checklocks:mu + cgroups map[Cgroup]struct{} } func (t *Task) savePtraceTracer() *Task { @@ -652,7 +658,7 @@ func (t *Task) Kernel() *Kernel { // SetClearTID sets t's cleartid. // // Preconditions: The caller must be running on the task goroutine. -func (t *Task) SetClearTID(addr usermem.Addr) { +func (t *Task) SetClearTID(addr hostarch.Addr) { t.cleartid = addr } diff --git a/pkg/sentry/kernel/task_cgroup.go b/pkg/sentry/kernel/task_cgroup.go new file mode 100644 index 000000000..25d2504fa --- /dev/null +++ b/pkg/sentry/kernel/task_cgroup.go @@ -0,0 +1,138 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kernel + +import ( + "bytes" + "fmt" + "sort" + "strings" + + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/syserror" +) + +// EnterInitialCgroups moves t into an initial set of cgroups. +// +// Precondition: t isn't in any cgroups yet, t.cgs is empty. +// +// +checklocksignore parent.mu is conditionally acquired. +func (t *Task) EnterInitialCgroups(parent *Task) { + var inherit map[Cgroup]struct{} + if parent != nil { + parent.mu.Lock() + defer parent.mu.Unlock() + inherit = parent.cgroups + } + joinSet := t.k.cgroupRegistry.computeInitialGroups(inherit) + + t.mu.Lock() + defer t.mu.Unlock() + // Transfer ownership of joinSet refs to the task's cgset. + t.cgroups = joinSet + for c, _ := range t.cgroups { + // Since t isn't in any cgroup yet, we can skip the check against + // existing cgroups. + c.Enter(t) + } +} + +// EnterCgroup moves t into c. +func (t *Task) EnterCgroup(c Cgroup) error { + newControllers := make(map[CgroupControllerType]struct{}) + for _, ctl := range c.Controllers() { + newControllers[ctl.Type()] = struct{}{} + } + + t.mu.Lock() + defer t.mu.Unlock() + + for oldCG, _ := range t.cgroups { + for _, oldCtl := range oldCG.Controllers() { + if _, ok := newControllers[oldCtl.Type()]; ok { + // Already in a cgroup with the same controller as one of the + // new ones. Requires migration between cgroups. + // + // TODO(b/183137098): Implement cgroup migration. + log.Warningf("Cgroup migration is not implemented") + return syserror.EBUSY + } + } + } + + // No migration required. + t.enterCgroupLocked(c) + + return nil +} + +// +checklocks:t.mu +func (t *Task) enterCgroupLocked(c Cgroup) { + c.IncRef() + t.cgroups[c] = struct{}{} + c.Enter(t) +} + +// LeaveCgroups removes t out from all its cgroups. +func (t *Task) LeaveCgroups() { + t.mu.Lock() + defer t.mu.Unlock() + for c, _ := range t.cgroups { + t.leaveCgroupLocked(c) + } +} + +// +checklocks:t.mu +func (t *Task) leaveCgroupLocked(c Cgroup) { + c.Leave(t) + delete(t.cgroups, c) + c.decRef() +} + +// taskCgroupEntry represents a line in /proc/<pid>/cgroup, and is used to +// format a cgroup for display. +type taskCgroupEntry struct { + hierarchyID uint32 + controllers string + path string +} + +// GenerateProcTaskCgroup writes the contents of /proc/<pid>/cgroup for t to buf. +func (t *Task) GenerateProcTaskCgroup(buf *bytes.Buffer) { + t.mu.Lock() + defer t.mu.Unlock() + + cgEntries := make([]taskCgroupEntry, 0, len(t.cgroups)) + for c, _ := range t.cgroups { + ctls := c.Controllers() + ctlNames := make([]string, 0, len(ctls)) + for _, ctl := range ctls { + ctlNames = append(ctlNames, string(ctl.Type())) + } + + cgEntries = append(cgEntries, taskCgroupEntry{ + // Note: We're guaranteed to have at least one controller, and all + // controllers are guaranteed to be on the same hierarchy. + hierarchyID: ctls[0].HierarchyID(), + controllers: strings.Join(ctlNames, ","), + path: c.Path(), + }) + } + + sort.Slice(cgEntries, func(i, j int) bool { return cgEntries[i].hierarchyID > cgEntries[j].hierarchyID }) + for _, cgE := range cgEntries { + fmt.Fprintf(buf, "%d:%s:%s\n", cgE.hierarchyID, cgE.controllers, cgE.path) + } +} diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index f305e69c0..405771f3f 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" "gvisor.dev/gvisor/pkg/cleanup" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -85,12 +86,12 @@ type CloneOptions struct { // Stack is the initial stack pointer of the new task. If Stack is 0, the // new task will start with the same stack pointer as its parent. - Stack usermem.Addr + Stack hostarch.Addr // If SetTLS is true, set the new task's TLS (thread-local storage) // descriptor to TLS. If SetTLS is false, TLS is ignored. SetTLS bool - TLS usermem.Addr + TLS hostarch.Addr // If ChildClearTID is true, when the child exits, 0 is written to the // address ChildTID in the child's memory, and if the write is successful a @@ -101,7 +102,7 @@ type CloneOptions struct { // Linux, failed writes are silently ignored.) ChildClearTID bool ChildSetTID bool - ChildTID usermem.Addr + ChildTID hostarch.Addr // If ParentSetTID is true, the child's thread ID (in the parent's PID // namespace) is written to address ParentTID in the parent's memory. (As @@ -112,7 +113,7 @@ type CloneOptions struct { // and child's memory, but this is a documentation error fixed by // 87ab04792ced ("clone.2: Fix description of CLONE_PARENT_SETTID"). ParentSetTID bool - ParentTID usermem.Addr + ParentTID hostarch.Addr // If Vfork is true, place the parent in vforkStop until the cloned task // releases its TaskImage. @@ -268,7 +269,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { } tg := t.tg - rseqAddr := usermem.Addr(0) + rseqAddr := hostarch.Addr(0) rseqSignature := uint32(0) if opts.NewThreadGroup { if tg.mounts != nil { diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index ad59e4f60..b1af1a7ef 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -275,6 +275,10 @@ func (*runExitMain) execute(t *Task) taskRunState { t.fsContext.DecRef(t) t.fdTable.DecRef(t) + // Detach task from all cgroups. This must happen before potentially the + // last ref to the cgroupfs mount is dropped below. + t.LeaveCgroups() + t.mu.Lock() if t.mountNamespaceVFS2 != nil { t.mountNamespaceVFS2.DecRef(t) diff --git a/pkg/sentry/kernel/task_futex.go b/pkg/sentry/kernel/task_futex.go index 195c7da9b..4dc41b82b 100644 --- a/pkg/sentry/kernel/task_futex.go +++ b/pkg/sentry/kernel/task_futex.go @@ -16,6 +16,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/usermem" @@ -30,33 +31,33 @@ func (t *Task) Futex() *futex.Manager { } // SwapUint32 implements futex.Target.SwapUint32. -func (t *Task) SwapUint32(addr usermem.Addr, new uint32) (uint32, error) { +func (t *Task) SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) { return t.MemoryManager().SwapUint32(t, addr, new, usermem.IOOpts{ AddressSpaceActive: true, }) } // CompareAndSwapUint32 implements futex.Target.CompareAndSwapUint32. -func (t *Task) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error) { +func (t *Task) CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) { return t.MemoryManager().CompareAndSwapUint32(t, addr, old, new, usermem.IOOpts{ AddressSpaceActive: true, }) } // LoadUint32 implements futex.Target.LoadUint32. -func (t *Task) LoadUint32(addr usermem.Addr) (uint32, error) { +func (t *Task) LoadUint32(addr hostarch.Addr) (uint32, error) { return t.MemoryManager().LoadUint32(t, addr, usermem.IOOpts{ AddressSpaceActive: true, }) } // GetSharedKey implements futex.Target.GetSharedKey. -func (t *Task) GetSharedKey(addr usermem.Addr) (futex.Key, error) { +func (t *Task) GetSharedKey(addr hostarch.Addr) (futex.Key, error) { return t.MemoryManager().GetSharedFutexKey(t, addr) } // GetRobustList sets the robust futex list for the task. -func (t *Task) GetRobustList() usermem.Addr { +func (t *Task) GetRobustList() hostarch.Addr { t.mu.Lock() addr := t.robustList t.mu.Unlock() @@ -64,7 +65,7 @@ func (t *Task) GetRobustList() usermem.Addr { } // SetRobustList sets the robust futex list for the task. -func (t *Task) SetRobustList(addr usermem.Addr) { +func (t *Task) SetRobustList(addr hostarch.Addr) { t.mu.Lock() t.robustList = addr t.mu.Unlock() @@ -84,28 +85,28 @@ func (t *Task) exitRobustList() { } var rl linux.RobustListHead - if _, err := rl.CopyIn(t, usermem.Addr(addr)); err != nil { + if _, err := rl.CopyIn(t, hostarch.Addr(addr)); err != nil { return } next := primitive.Uint64(rl.List) done := 0 - var pendingLockAddr usermem.Addr + var pendingLockAddr hostarch.Addr if rl.ListOpPending != 0 { - pendingLockAddr = usermem.Addr(rl.ListOpPending + rl.FutexOffset) + pendingLockAddr = hostarch.Addr(rl.ListOpPending + rl.FutexOffset) } // Wake up normal elements. - for usermem.Addr(next) != addr { + for hostarch.Addr(next) != addr { // We traverse to the next element of the list before we // actually wake anything. This prevents the race where waking // this futex causes a modification of the list. - thisLockAddr := usermem.Addr(uint64(next) + rl.FutexOffset) + thisLockAddr := hostarch.Addr(uint64(next) + rl.FutexOffset) // Try to decode the next element in the list before waking the // current futex. But don't check the error until after we've // woken the current futex. Linux does it in this order too - _, nextErr := next.CopyIn(t, usermem.Addr(next)) + _, nextErr := next.CopyIn(t, hostarch.Addr(next)) // Wakeup the current futex if it's not pending. if thisLockAddr != pendingLockAddr { @@ -133,7 +134,7 @@ func (t *Task) exitRobustList() { } // wakeRobustListOne wakes a single futex from the robust list. -func (t *Task) wakeRobustListOne(addr usermem.Addr) { +func (t *Task) wakeRobustListOne(addr hostarch.Addr) { // Bit 0 in address signals PI futex. pi := addr&1 == 1 addr = addr &^ 1 diff --git a/pkg/sentry/kernel/task_image.go b/pkg/sentry/kernel/task_image.go index ce5fbd299..bd5543d4e 100644 --- a/pkg/sentry/kernel/task_image.go +++ b/pkg/sentry/kernel/task_image.go @@ -19,12 +19,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/sentry/loader" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/usermem" ) var errNoSyscalls = syserr.New("no syscall table found", linux.ENOEXEC) @@ -129,7 +129,7 @@ func (t *Task) Stack() *arch.Stack { return &arch.Stack{ Arch: t.Arch(), IO: t.MemoryManager(), - Bottom: usermem.Addr(t.Arch().Stack()), + Bottom: hostarch.Addr(t.Arch().Stack()), } } diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go index c70e5e6ce..72b9a0384 100644 --- a/pkg/sentry/kernel/task_log.go +++ b/pkg/sentry/kernel/task_log.go @@ -20,6 +20,7 @@ import ( "sort" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/usermem" ) @@ -108,9 +109,9 @@ func (t *Task) debugDumpStack() { return } t.Debugf("Stack:") - start := usermem.Addr(t.Arch().Stack()) + start := hostarch.Addr(t.Arch().Stack()) // Round addr down to a 16-byte boundary. - start &= ^usermem.Addr(15) + start &= ^hostarch.Addr(15) // Print 16 bytes per line, one byte at a time. for offset := uint64(0); offset < maxStackDebugBytes; offset += 16 { addr, ok := start.AddLength(offset) @@ -127,7 +128,7 @@ func (t *Task) debugDumpStack() { t.Debugf("%x: % x", addr, data[:n]) } if err != nil { - t.Debugf("Error reading stack at address %x: %v", addr+usermem.Addr(n), err) + t.Debugf("Error reading stack at address %x: %v", addr+hostarch.Addr(n), err) break } } @@ -147,9 +148,9 @@ func (t *Task) debugDumpCode() { } t.Debugf("Code:") // Print code on both sides of the instruction register. - start := usermem.Addr(t.Arch().IP()) - maxCodeDebugBytes/2 + start := hostarch.Addr(t.Arch().IP()) - maxCodeDebugBytes/2 // Round addr down to a 16-byte boundary. - start &= ^usermem.Addr(15) + start &= ^hostarch.Addr(15) // Print 16 bytes per line, one byte at a time. for offset := uint64(0); offset < maxCodeDebugBytes; offset += 16 { addr, ok := start.AddLength(offset) @@ -166,7 +167,7 @@ func (t *Task) debugDumpCode() { t.Debugf("%x: % x", addr, data[:n]) } if err != nil { - t.Debugf("Error reading stack at address %x: %v", addr+usermem.Addr(n), err) + t.Debugf("Error reading stack at address %x: %v", addr+hostarch.Addr(n), err) break } } diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go index 3ccecf4b6..068f25af1 100644 --- a/pkg/sentry/kernel/task_run.go +++ b/pkg/sentry/kernel/task_run.go @@ -23,13 +23,13 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/goid" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/hostcpu" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // A taskRunState is a reified state in the task state machine. See README.md @@ -148,7 +148,7 @@ func (*runApp) handleCPUIDInstruction(t *Task) error { region := trace.StartRegion(t.traceContext, cpuidRegion) expected := arch.CPUIDInstruction[:] found := make([]byte, len(expected)) - _, err := t.CopyInBytes(usermem.Addr(t.Arch().IP()), found) + _, err := t.CopyInBytes(hostarch.Addr(t.Arch().IP()), found) if err == nil && bytes.Equal(expected, found) { // Skip the cpuid instruction. t.Arch().CPUIDEmulate(t) @@ -307,8 +307,8 @@ func (app *runApp) execute(t *Task) taskRunState { // normally. if at.Any() { region := trace.StartRegion(t.traceContext, faultRegion) - addr := usermem.Addr(info.Addr()) - err := t.MemoryManager().HandleUserFault(t, addr, at, usermem.Addr(t.Arch().Stack())) + addr := hostarch.Addr(info.Addr()) + err := t.MemoryManager().HandleUserFault(t, addr, at, hostarch.Addr(t.Arch().Stack())) region.End() if err == nil { // The fault was handled appropriately. diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go index 75af3af79..c2b9fc08f 100644 --- a/pkg/sentry/kernel/task_signals.go +++ b/pkg/sentry/kernel/task_signals.go @@ -23,11 +23,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/eventchannel" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -243,7 +243,7 @@ func (t *Task) deliverSignalToHandler(info *arch.SignalInfo, act arch.SignalAct) // Are executing on the main stack, // or the provided alternate stack? - sp := usermem.Addr(t.Arch().Stack()) + sp := hostarch.Addr(t.Arch().Stack()) // N.B. This is a *copy* of the alternate stack that the user's signal // handler expects to see in its ucontext (even if it's not in use). @@ -251,7 +251,7 @@ func (t *Task) deliverSignalToHandler(info *arch.SignalInfo, act arch.SignalAct) if act.IsOnStack() && alt.IsEnabled() { alt.SetOnStack() if !alt.Contains(sp) { - sp = usermem.Addr(alt.Top()) + sp = hostarch.Addr(alt.Top()) } } @@ -652,7 +652,7 @@ func (t *Task) SignalStack() arch.SignalStack { // onSignalStack returns true if the task is executing on the given signal stack. func (t *Task) onSignalStack(alt arch.SignalStack) bool { - sp := usermem.Addr(t.Arch().Stack()) + sp := hostarch.Addr(t.Arch().Stack()) return alt.Contains(sp) } @@ -720,7 +720,7 @@ func (tg *ThreadGroup) SetSignalAct(sig linux.Signal, actptr *arch.SignalAct) (a // CopyOutSignalAct converts the given SignalAct into an architecture-specific // type and then copies it out to task memory. -func (t *Task) CopyOutSignalAct(addr usermem.Addr, s *arch.SignalAct) error { +func (t *Task) CopyOutSignalAct(addr hostarch.Addr, s *arch.SignalAct) error { n := t.Arch().NewSignalAct() n.SerializeFrom(s) _, err := n.CopyOut(t, addr) @@ -729,7 +729,7 @@ func (t *Task) CopyOutSignalAct(addr usermem.Addr, s *arch.SignalAct) error { // CopyInSignalAct copies an architecture-specific sigaction type from task // memory and then converts it into a SignalAct. -func (t *Task) CopyInSignalAct(addr usermem.Addr) (arch.SignalAct, error) { +func (t *Task) CopyInSignalAct(addr hostarch.Addr) (arch.SignalAct, error) { n := t.Arch().NewSignalAct() var s arch.SignalAct if _, err := n.CopyIn(t, addr); err != nil { @@ -741,7 +741,7 @@ func (t *Task) CopyInSignalAct(addr usermem.Addr) (arch.SignalAct, error) { // CopyOutSignalStack converts the given SignalStack into an // architecture-specific type and then copies it out to task memory. -func (t *Task) CopyOutSignalStack(addr usermem.Addr, s *arch.SignalStack) error { +func (t *Task) CopyOutSignalStack(addr hostarch.Addr, s *arch.SignalStack) error { n := t.Arch().NewSignalStack() n.SerializeFrom(s) _, err := n.CopyOut(t, addr) @@ -750,7 +750,7 @@ func (t *Task) CopyOutSignalStack(addr usermem.Addr, s *arch.SignalStack) error // CopyInSignalStack copies an architecture-specific stack_t from task memory // and then converts it into a SignalStack. -func (t *Task) CopyInSignalStack(addr usermem.Addr) (arch.SignalStack, error) { +func (t *Task) CopyInSignalStack(addr hostarch.Addr) (arch.SignalStack, error) { n := t.Arch().NewSignalStack() var s arch.SignalStack if _, err := n.CopyIn(t, addr); err != nil { diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go index 36e1384f1..32031cd70 100644 --- a/pkg/sentry/kernel/task_start.go +++ b/pkg/sentry/kernel/task_start.go @@ -17,6 +17,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -25,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // TaskConfig defines the configuration of a new Task (see below). @@ -86,7 +86,7 @@ type TaskConfig struct { MountNamespaceVFS2 *vfs.MountNamespace // RSeqAddr is a pointer to the the userspace linux.RSeq structure. - RSeqAddr usermem.Addr + RSeqAddr hostarch.Addr // RSeqSignature is the signature that the rseq abort IP must be signed // with. @@ -151,6 +151,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) { rseqSignature: cfg.RSeqSignature, futexWaiter: futex.NewWaiter(), containerID: cfg.ContainerID, + cgroups: make(map[Cgroup]struct{}), } t.creds.Store(cfg.Credentials) t.endStopCond.L = &t.tg.signalHandlers.mu @@ -189,6 +190,10 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) { t.parent.children[t] = struct{}{} } + if VFS2Enabled { + t.EnterInitialCgroups(t.parent) + } + if tg.leader == nil { // New thread group. tg.leader = t diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go index 2e84bd88a..2c658d001 100644 --- a/pkg/sentry/kernel/task_syscall.go +++ b/pkg/sentry/kernel/task_syscall.go @@ -22,12 +22,12 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bits" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) var vsyscallCount = metric.MustCreateNewUint64Metric("/kernel/vsyscall_count", false /* sync */, "Number of times vsyscalls were invoked by the application") @@ -153,7 +153,7 @@ func (t *Task) doSyscall() taskRunState { // Check seccomp filters. The nil check is for performance (as seccomp use // is rare), not needed for correctness. if t.syscallFilters.Load() != nil { - switch r := t.checkSeccompSyscall(int32(sysno), args, usermem.Addr(t.Arch().IP())); r { + switch r := t.checkSeccompSyscall(int32(sysno), args, hostarch.Addr(t.Arch().IP())); r { case linux.SECCOMP_RET_ERRNO, linux.SECCOMP_RET_TRAP: t.Debugf("Syscall %d: denied by seccomp", sysno) return (*runSyscallExit)(nil) @@ -283,12 +283,12 @@ func (*runSyscallExit) execute(t *Task) taskRunState { // doVsyscall is the entry point for a vsyscall invocation of syscall sysno, as // indicated by an execution fault at address addr. doVsyscall returns the // task's next run state. -func (t *Task) doVsyscall(addr usermem.Addr, sysno uintptr) taskRunState { +func (t *Task) doVsyscall(addr hostarch.Addr, sysno uintptr) taskRunState { vsyscallCount.Increment() // Grab the caller up front, to make sure there's a sensible stack. caller := t.Arch().Native(uintptr(0)) - if _, err := caller.CopyIn(t, usermem.Addr(t.Arch().Stack())); err != nil { + if _, err := caller.CopyIn(t, hostarch.Addr(t.Arch().Stack())); err != nil { t.Debugf("vsyscall %d: error reading return address from stack: %v", sysno, err) t.forceSignal(linux.SIGSEGV, false /* unconditional */) t.SendSignal(SignalInfoPriv(linux.SIGSEGV)) @@ -322,7 +322,7 @@ func (t *Task) doVsyscall(addr usermem.Addr, sysno uintptr) taskRunState { } type runVsyscallAfterPtraceEventSeccomp struct { - addr usermem.Addr + addr hostarch.Addr sysno uintptr caller marshal.Marshallable } @@ -337,7 +337,7 @@ func (r *runVsyscallAfterPtraceEventSeccomp) execute(t *Task) taskRunState { // currently emulated call. ... The tracer MUST NOT modify rip or rsp." - // Documentation/prctl/seccomp_filter.txt. On Linux, changing orig_ax or ip // causes do_exit(SIGSYS), and changing sp is ignored. - if (sysno != ^uintptr(0) && sysno != r.sysno) || usermem.Addr(t.Arch().IP()) != r.addr { + if (sysno != ^uintptr(0) && sysno != r.sysno) || hostarch.Addr(t.Arch().IP()) != r.addr { t.PrepareExit(ExitStatus{Signo: int(linux.SIGSYS)}) return (*runExit)(nil) } diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go index 94dabbcd8..fc6d9438a 100644 --- a/pkg/sentry/kernel/task_usermem.go +++ b/pkg/sentry/kernel/task_usermem.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -27,7 +28,7 @@ import ( // MAX_RW_COUNT is the maximum size in bytes of a single read or write. // Reads and writes that exceed this size may be silently truncated. // (Linux: include/linux/fs.h:MAX_RW_COUNT) -var MAX_RW_COUNT = int(usermem.Addr(math.MaxInt32).RoundDown()) +var MAX_RW_COUNT = int(hostarch.Addr(math.MaxInt32).RoundDown()) // Activate ensures that the task has an active address space. func (t *Task) Activate() { @@ -49,7 +50,7 @@ func (t *Task) Deactivate() { // data without reflection and pass in a byte slice. // // This Task's AddressSpace must be active. -func (t *Task) CopyInBytes(addr usermem.Addr, dst []byte) (int, error) { +func (t *Task) CopyInBytes(addr hostarch.Addr, dst []byte) (int, error) { return t.MemoryManager().CopyIn(t, addr, dst, usermem.IOOpts{ AddressSpaceActive: true, }) @@ -59,7 +60,7 @@ func (t *Task) CopyInBytes(addr usermem.Addr, dst []byte) (int, error) { // data without reflection and pass in a byte slice. // // This Task's AddressSpace must be active. -func (t *Task) CopyOutBytes(addr usermem.Addr, src []byte) (int, error) { +func (t *Task) CopyOutBytes(addr hostarch.Addr, src []byte) (int, error) { return t.MemoryManager().CopyOut(t, addr, src, usermem.IOOpts{ AddressSpaceActive: true, }) @@ -70,7 +71,7 @@ func (t *Task) CopyOutBytes(addr usermem.Addr, src []byte) (int, error) { // user memory that is unmapped or not readable by the user. // // This Task's AddressSpace must be active. -func (t *Task) CopyInString(addr usermem.Addr, maxlen int) (string, error) { +func (t *Task) CopyInString(addr hostarch.Addr, maxlen int) (string, error) { return usermem.CopyStringIn(t, t.MemoryManager(), addr, maxlen, usermem.IOOpts{ AddressSpaceActive: true, }) @@ -90,7 +91,7 @@ func (t *Task) CopyInString(addr usermem.Addr, maxlen int) (string, error) { // { "abc" } => 4 (3 for length, 1 for elements) // // This Task's AddressSpace must be active. -func (t *Task) CopyInVector(addr usermem.Addr, maxElemSize, maxTotalSize int) ([]string, error) { +func (t *Task) CopyInVector(addr hostarch.Addr, maxElemSize, maxTotalSize int) ([]string, error) { var v []string for { argAddr := t.Arch().Native(0) @@ -109,12 +110,12 @@ func (t *Task) CopyInVector(addr usermem.Addr, maxElemSize, maxTotalSize int) ([ if maxTotalSize < thisMax { thisMax = maxTotalSize } - arg, err := t.CopyInString(usermem.Addr(t.Arch().Value(argAddr)), thisMax) + arg, err := t.CopyInString(hostarch.Addr(t.Arch().Value(argAddr)), thisMax) if err != nil { return v, err } v = append(v, arg) - addr += usermem.Addr(t.Arch().Width()) + addr += hostarch.Addr(t.Arch().Width()) maxTotalSize -= len(arg) + 1 } return v, nil @@ -126,7 +127,7 @@ func (t *Task) CopyInVector(addr usermem.Addr, maxElemSize, maxTotalSize int) ([ // Preconditions: Same as usermem.IO.CopyOut, plus: // * The caller must be running on the task goroutine. // * t's AddressSpace must be active. -func (t *Task) CopyOutIovecs(addr usermem.Addr, src usermem.AddrRangeSeq) error { +func (t *Task) CopyOutIovecs(addr hostarch.Addr, src hostarch.AddrRangeSeq) error { switch t.Arch().Width() { case 8: const itemLen = 16 @@ -137,8 +138,8 @@ func (t *Task) CopyOutIovecs(addr usermem.Addr, src usermem.AddrRangeSeq) error b := t.CopyScratchBuffer(itemLen) for ; !src.IsEmpty(); src = src.Tail() { ar := src.Head() - usermem.ByteOrder.PutUint64(b[0:8], uint64(ar.Start)) - usermem.ByteOrder.PutUint64(b[8:16], uint64(ar.Length())) + hostarch.ByteOrder.PutUint64(b[0:8], uint64(ar.Start)) + hostarch.ByteOrder.PutUint64(b[8:16], uint64(ar.Length())) if _, err := t.CopyOutBytes(addr, b); err != nil { return err } @@ -153,8 +154,8 @@ func (t *Task) CopyOutIovecs(addr usermem.Addr, src usermem.AddrRangeSeq) error } // CopyInIovecs copies an array of numIovecs struct iovecs from the memory -// mapped at addr, converts them to usermem.AddrRanges, and returns them as a -// usermem.AddrRangeSeq. +// mapped at addr, converts them to hostarch.AddrRanges, and returns them as a +// hostarch.AddrRangeSeq. // // CopyInIovecs shares the following properties with Linux's // lib/iov_iter.c:import_iovec() => fs/read_write.c:rw_copy_check_uvector(): @@ -175,42 +176,42 @@ func (t *Task) CopyOutIovecs(addr usermem.Addr, src usermem.AddrRangeSeq) error // Preconditions: Same as usermem.IO.CopyIn, plus: // * The caller must be running on the task goroutine. // * t's AddressSpace must be active. -func (t *Task) CopyInIovecs(addr usermem.Addr, numIovecs int) (usermem.AddrRangeSeq, error) { +func (t *Task) CopyInIovecs(addr hostarch.Addr, numIovecs int) (hostarch.AddrRangeSeq, error) { if numIovecs == 0 { - return usermem.AddrRangeSeq{}, nil + return hostarch.AddrRangeSeq{}, nil } - var dst []usermem.AddrRange + var dst []hostarch.AddrRange if numIovecs > 1 { - dst = make([]usermem.AddrRange, 0, numIovecs) + dst = make([]hostarch.AddrRange, 0, numIovecs) } switch t.Arch().Width() { case 8: const itemLen = 16 if _, ok := addr.AddLength(uint64(numIovecs) * itemLen); !ok { - return usermem.AddrRangeSeq{}, syserror.EFAULT + return hostarch.AddrRangeSeq{}, syserror.EFAULT } b := t.CopyScratchBuffer(itemLen) for i := 0; i < numIovecs; i++ { if _, err := t.CopyInBytes(addr, b); err != nil { - return usermem.AddrRangeSeq{}, err + return hostarch.AddrRangeSeq{}, err } - base := usermem.Addr(usermem.ByteOrder.Uint64(b[0:8])) - length := usermem.ByteOrder.Uint64(b[8:16]) + base := hostarch.Addr(hostarch.ByteOrder.Uint64(b[0:8])) + length := hostarch.ByteOrder.Uint64(b[8:16]) if length > math.MaxInt64 { - return usermem.AddrRangeSeq{}, syserror.EINVAL + return hostarch.AddrRangeSeq{}, syserror.EINVAL } ar, ok := t.MemoryManager().CheckIORange(base, int64(length)) if !ok { - return usermem.AddrRangeSeq{}, syserror.EFAULT + return hostarch.AddrRangeSeq{}, syserror.EFAULT } if numIovecs == 1 { // Special case to avoid allocating dst. - return usermem.AddrRangeSeqOf(ar).TakeFirst(MAX_RW_COUNT), nil + return hostarch.AddrRangeSeqOf(ar).TakeFirst(MAX_RW_COUNT), nil } dst = append(dst, ar) @@ -218,7 +219,7 @@ func (t *Task) CopyInIovecs(addr usermem.Addr, numIovecs int) (usermem.AddrRange } default: - return usermem.AddrRangeSeq{}, syserror.ENOSYS + return hostarch.AddrRangeSeq{}, syserror.ENOSYS } // Truncate to MAX_RW_COUNT. @@ -226,13 +227,13 @@ func (t *Task) CopyInIovecs(addr usermem.Addr, numIovecs int) (usermem.AddrRange for i := range dst { dstlen := uint64(dst[i].Length()) if rem := uint64(MAX_RW_COUNT) - total; rem < dstlen { - dst[i].End -= usermem.Addr(dstlen - rem) + dst[i].End -= hostarch.Addr(dstlen - rem) dstlen = rem } total += dstlen } - return usermem.AddrRangeSeqFromSlice(dst), nil + return hostarch.AddrRangeSeqFromSlice(dst), nil } // SingleIOSequence returns a usermem.IOSequence representing [addr, @@ -245,7 +246,7 @@ func (t *Task) CopyInIovecs(addr usermem.Addr, numIovecs int) (usermem.AddrRange // write syscalls in Linux do not use import_single_range(). However they check // access_ok() in fs/read_write.c:vfs_read/vfs_write, and overflowing address // ranges are truncated to MAX_RW_COUNT by fs/read_write.c:rw_verify_area().) -func (t *Task) SingleIOSequence(addr usermem.Addr, length int, opts usermem.IOOpts) (usermem.IOSequence, error) { +func (t *Task) SingleIOSequence(addr hostarch.Addr, length int, opts usermem.IOOpts) (usermem.IOSequence, error) { if length > MAX_RW_COUNT { length = MAX_RW_COUNT } @@ -255,7 +256,7 @@ func (t *Task) SingleIOSequence(addr usermem.Addr, length int, opts usermem.IOOp } return usermem.IOSequence{ IO: t.MemoryManager(), - Addrs: usermem.AddrRangeSeqOf(ar), + Addrs: hostarch.AddrRangeSeqOf(ar), Opts: opts, }, nil } @@ -267,7 +268,7 @@ func (t *Task) SingleIOSequence(addr usermem.Addr, length int, opts usermem.IOOp // IovecsIOSequence is analogous to Linux's lib/iov_iter.c:import_iovec(). // // Preconditions: Same as Task.CopyInIovecs. -func (t *Task) IovecsIOSequence(addr usermem.Addr, iovcnt int, opts usermem.IOOpts) (usermem.IOSequence, error) { +func (t *Task) IovecsIOSequence(addr hostarch.Addr, iovcnt int, opts usermem.IOOpts) (usermem.IOSequence, error) { if iovcnt < 0 || iovcnt > linux.UIO_MAXIOV { return usermem.IOSequence{}, syserror.EINVAL } @@ -317,7 +318,7 @@ func (cc *taskCopyContext) getMemoryManager() (*mm.MemoryManager, error) { } // CopyInBytes implements marshal.CopyContext.CopyInBytes. -func (cc *taskCopyContext) CopyInBytes(addr usermem.Addr, dst []byte) (int, error) { +func (cc *taskCopyContext) CopyInBytes(addr hostarch.Addr, dst []byte) (int, error) { tmm, err := cc.getMemoryManager() if err != nil { return 0, err @@ -327,7 +328,7 @@ func (cc *taskCopyContext) CopyInBytes(addr usermem.Addr, dst []byte) (int, erro } // CopyOutBytes implements marshal.CopyContext.CopyOutBytes. -func (cc *taskCopyContext) CopyOutBytes(addr usermem.Addr, src []byte) (int, error) { +func (cc *taskCopyContext) CopyOutBytes(addr hostarch.Addr, src []byte) (int, error) { tmm, err := cc.getMemoryManager() if err != nil { return 0, err @@ -360,11 +361,11 @@ func (cc *ownTaskCopyContext) CopyScratchBuffer(size int) []byte { } // CopyInBytes implements marshal.CopyContext.CopyInBytes. -func (cc *ownTaskCopyContext) CopyInBytes(addr usermem.Addr, dst []byte) (int, error) { +func (cc *ownTaskCopyContext) CopyInBytes(addr hostarch.Addr, dst []byte) (int, error) { return cc.t.MemoryManager().CopyIn(cc.t, addr, dst, cc.opts) } // CopyOutBytes implements marshal.CopyContext.CopyOutBytes. -func (cc *ownTaskCopyContext) CopyOutBytes(addr usermem.Addr, src []byte) (int, error) { +func (cc *ownTaskCopyContext) CopyOutBytes(addr hostarch.Addr, src []byte) (int, error) { return cc.t.MemoryManager().CopyOut(cc.t, addr, src, cc.opts) } diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go index 09d070ec8..77ad62445 100644 --- a/pkg/sentry/kernel/threads.go +++ b/pkg/sentry/kernel/threads.go @@ -114,6 +114,15 @@ func (ts *TaskSet) forEachThreadGroupLocked(f func(tg *ThreadGroup)) { } } +// forEachTaskLocked applies f to each Task in ts. +// +// Preconditions: ts.mu must be locked (for reading or writing). +func (ts *TaskSet) forEachTaskLocked(f func(t *Task)) { + for t := range ts.Root.tids { + f(t) + } +} + // A PIDNamespace represents a PID namespace, a bimap between thread IDs and // tasks. See the pid_namespaces(7) man page for further details. // diff --git a/pkg/sentry/kernel/timekeeper_test.go b/pkg/sentry/kernel/timekeeper_test.go index cf2f7ca72..dfc3c0719 100644 --- a/pkg/sentry/kernel/timekeeper_test.go +++ b/pkg/sentry/kernel/timekeeper_test.go @@ -17,12 +17,12 @@ package kernel import ( "testing" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/pgalloc" sentrytime "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // mockClocks is a sentrytime.Clocks that simply returns the times in the @@ -54,7 +54,7 @@ func (c *mockClocks) GetTime(id sentrytime.ClockID) (int64, error) { func stateTestClocklessTimekeeper(tb testing.TB) *Timekeeper { ctx := contexttest.Context(tb) mfp := pgalloc.MemoryFileProviderFromContext(ctx) - fr, err := mfp.MemoryFile().Allocate(usermem.PageSize, usage.Anonymous) + fr, err := mfp.MemoryFile().Allocate(hostarch.PageSize, usage.Anonymous) if err != nil { tb.Fatalf("failed to allocate memory: %v", err) } diff --git a/pkg/sentry/kernel/vdso.go b/pkg/sentry/kernel/vdso.go index 9e5c2d26f..cc0917504 100644 --- a/pkg/sentry/kernel/vdso.go +++ b/pkg/sentry/kernel/vdso.go @@ -17,10 +17,10 @@ package kernel import ( "fmt" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/usermem" ) // vdsoParams are the parameters exposed to the VDSO. @@ -96,7 +96,7 @@ func NewVDSOParamPage(mfp pgalloc.MemoryFileProvider, fr memmap.FileRange) *VDSO // access returns a mapping of the param page. func (v *VDSOParamPage) access() (safemem.Block, error) { - bs, err := v.mfp.MemoryFile().MapInternal(v.fr, usermem.ReadWrite) + bs, err := v.mfp.MemoryFile().MapInternal(v.fr, hostarch.ReadWrite) if err != nil { return safemem.Block{}, err } diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD index ab074b400..ecb6603a1 100644 --- a/pkg/sentry/loader/BUILD +++ b/pkg/sentry/loader/BUILD @@ -18,6 +18,7 @@ go_library( "//pkg/binary", "//pkg/context", "//pkg/cpuid", + "//pkg/hostarch", "//pkg/log", "//pkg/rand", "//pkg/safemem", diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go index cd9fa4031..e92d9fdc3 100644 --- a/pkg/sentry/loader/elf.go +++ b/pkg/sentry/loader/elf.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsbridge" @@ -41,7 +42,7 @@ const ( // maxTotalPhdrSize is the maximum combined size of all program // headers. Linux limits this to one page. - maxTotalPhdrSize = usermem.PageSize + maxTotalPhdrSize = hostarch.PageSize ) var ( @@ -52,8 +53,8 @@ var ( prog64Size = int(binary.Size(elf.Prog64{})) ) -func progFlagsAsPerms(f elf.ProgFlag) usermem.AccessType { - var p usermem.AccessType +func progFlagsAsPerms(f elf.ProgFlag) hostarch.AccessType { + var p hostarch.AccessType if f&elf.PF_R == elf.PF_R { p.Read = true } @@ -75,7 +76,7 @@ type elfInfo struct { arch arch.Arch // entry is the program entry point. - entry usermem.Addr + entry hostarch.Addr // phdrs are the program headers. phdrs []elf.ProgHeader @@ -230,7 +231,7 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) { return elfInfo{ os: os, arch: a, - entry: usermem.Addr(hdr.Entry), + entry: hostarch.Addr(hdr.Entry), phdrs: phdrs, phdrOff: hdr.Phoff, phdrSize: prog64Size, @@ -240,9 +241,9 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) { // mapSegment maps a phdr into the Task. offset is the offset to apply to // phdr.Vaddr. -func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr *elf.ProgHeader, offset usermem.Addr) error { +func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr *elf.ProgHeader, offset hostarch.Addr) error { // We must make a page-aligned mapping. - adjust := usermem.Addr(phdr.Vaddr).PageOffset() + adjust := hostarch.Addr(phdr.Vaddr).PageOffset() addr, ok := offset.AddLength(phdr.Vaddr) if !ok { @@ -250,14 +251,14 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr ctx.Warningf("Computed segment load address overflows: %#x + %#x", phdr.Vaddr, offset) return syserror.ENOEXEC } - addr -= usermem.Addr(adjust) + addr -= hostarch.Addr(adjust) fileSize := phdr.Filesz + adjust if fileSize < phdr.Filesz { ctx.Infof("Computed segment file size overflows: %#x + %#x", phdr.Filesz, adjust) return syserror.ENOEXEC } - ms, ok := usermem.Addr(fileSize).RoundUp() + ms, ok := hostarch.Addr(fileSize).RoundUp() if !ok { ctx.Infof("fileSize %#x too large", fileSize) return syserror.ENOEXEC @@ -281,7 +282,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr Unmap: true, Private: true, Perms: prot, - MaxPerms: usermem.AnyAccess, + MaxPerms: hostarch.AnyAccess, } defer func() { if mopts.MappingIdentity != nil { @@ -312,7 +313,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr panic(fmt.Sprintf("zeroSize too big? %#x", uint64(zeroSize))) } if _, err := m.ZeroOut(ctx, zeroAddr, zeroSize, usermem.IOOpts{IgnorePermissions: true}); err != nil { - ctx.Warningf("Failed to zero end of page [%#x, %#x): %v", zeroAddr, zeroAddr+usermem.Addr(zeroSize), err) + ctx.Warningf("Failed to zero end of page [%#x, %#x): %v", zeroAddr, zeroAddr+hostarch.Addr(zeroSize), err) return err } } @@ -330,7 +331,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr if !ok { panic(fmt.Sprintf("anonymous memory doesn't fit in pre-sized range? %#x + %#x", addr, mapSize)) } - anonSize, ok := usermem.Addr(memSize - mapSize).RoundUp() + anonSize, ok := hostarch.Addr(memSize - mapSize).RoundUp() if !ok { ctx.Infof("extra anon pages too large: %#x", memSize-mapSize) return syserror.ENOEXEC @@ -339,7 +340,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr // N.B. Linux uses vm_brk_flags to map these pages, which only // honors the X bit, always mapping at least RW. ignoring These // pages are not included in the final brk region. - prot := usermem.ReadWrite + prot := hostarch.ReadWrite if phdr.Flags&elf.PF_X == elf.PF_X { prot.Execute = true } @@ -352,7 +353,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr Fixed: true, Private: true, Perms: prot, - MaxPerms: usermem.AnyAccess, + MaxPerms: hostarch.AnyAccess, }); err != nil { ctx.Infof("Error mapping PT_LOAD segment %v anonymous memory: %v", phdr, err) return err @@ -371,19 +372,19 @@ type loadedELF struct { arch arch.Arch // entry is the entry point of the ELF. - entry usermem.Addr + entry hostarch.Addr // start is the end of the ELF. - start usermem.Addr + start hostarch.Addr // end is the end of the ELF. - end usermem.Addr + end hostarch.Addr // interpter is the path to the ELF interpreter. interpreter string // phdrAddr is the address of the ELF program headers. - phdrAddr usermem.Addr + phdrAddr hostarch.Addr // phdrSize is the size of a single program header in the ELF. phdrSize int @@ -407,14 +408,14 @@ type loadedELF struct { // It does not load the ELF interpreter, or return any auxv entries. // // Preconditions: f is an ELF file. -func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, info elfInfo, sharedLoadOffset usermem.Addr) (loadedELF, error) { +func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, info elfInfo, sharedLoadOffset hostarch.Addr) (loadedELF, error) { first := true - var start, end usermem.Addr + var start, end hostarch.Addr var interpreter string for _, phdr := range info.phdrs { switch phdr.Type { case elf.PT_LOAD: - vaddr := usermem.Addr(phdr.Vaddr) + vaddr := hostarch.Addr(phdr.Vaddr) if first { first = false start = vaddr @@ -492,7 +493,7 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, in // Note that the vaddr of the first PT_LOAD segment is ignored when // choosing the load address (even if it is non-zero). The vaddr does // become an offset from that load address. - var offset usermem.Addr + var offset hostarch.Addr if info.sharedObject { totalSize := end - start totalSize, ok := totalSize.RoundUp() @@ -688,8 +689,8 @@ func loadELF(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, error // ELF-specific auxv entries. bin.auxv = arch.Auxv{ arch.AuxEntry{linux.AT_PHDR, bin.phdrAddr}, - arch.AuxEntry{linux.AT_PHENT, usermem.Addr(bin.phdrSize)}, - arch.AuxEntry{linux.AT_PHNUM, usermem.Addr(bin.phdrNum)}, + arch.AuxEntry{linux.AT_PHENT, hostarch.Addr(bin.phdrSize)}, + arch.AuxEntry{linux.AT_PHNUM, hostarch.Addr(bin.phdrNum)}, arch.AuxEntry{linux.AT_ENTRY, bin.entry}, } if bin.interpreter != "" { diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go index c69b62db9..47e3775a3 100644 --- a/pkg/sentry/loader/loader.go +++ b/pkg/sentry/loader/loader.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsbridge" @@ -266,17 +267,17 @@ func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *V // Add generic auxv entries. auxv := append(loaded.auxv, arch.Auxv{ - arch.AuxEntry{linux.AT_UID, usermem.Addr(c.RealKUID.In(c.UserNamespace).OrOverflow())}, - arch.AuxEntry{linux.AT_EUID, usermem.Addr(c.EffectiveKUID.In(c.UserNamespace).OrOverflow())}, - arch.AuxEntry{linux.AT_GID, usermem.Addr(c.RealKGID.In(c.UserNamespace).OrOverflow())}, - arch.AuxEntry{linux.AT_EGID, usermem.Addr(c.EffectiveKGID.In(c.UserNamespace).OrOverflow())}, + arch.AuxEntry{linux.AT_UID, hostarch.Addr(c.RealKUID.In(c.UserNamespace).OrOverflow())}, + arch.AuxEntry{linux.AT_EUID, hostarch.Addr(c.EffectiveKUID.In(c.UserNamespace).OrOverflow())}, + arch.AuxEntry{linux.AT_GID, hostarch.Addr(c.RealKGID.In(c.UserNamespace).OrOverflow())}, + arch.AuxEntry{linux.AT_EGID, hostarch.Addr(c.EffectiveKGID.In(c.UserNamespace).OrOverflow())}, // The conditions that require AT_SECURE = 1 never arise. See // kernel.Task.updateCredsForExecLocked. arch.AuxEntry{linux.AT_SECURE, 0}, arch.AuxEntry{linux.AT_CLKTCK, linux.CLOCKS_PER_SEC}, arch.AuxEntry{linux.AT_EXECFN, execfn}, arch.AuxEntry{linux.AT_RANDOM, random}, - arch.AuxEntry{linux.AT_PAGESZ, usermem.PageSize}, + arch.AuxEntry{linux.AT_PAGESZ, hostarch.PageSize}, arch.AuxEntry{linux.AT_SYSINFO_EHDR, vdsoAddr}, }...) auxv = append(auxv, extraAuxv...) diff --git a/pkg/sentry/loader/vdso.go b/pkg/sentry/loader/vdso.go index a32d37d62..fd54261fd 100644 --- a/pkg/sentry/loader/vdso.go +++ b/pkg/sentry/loader/vdso.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -90,7 +91,7 @@ func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, erro var first *elf.ProgHeader var prev *elf.ProgHeader - var prevEnd usermem.Addr + var prevEnd hostarch.Addr for i, phdr := range info.phdrs { if phdr.Type != elf.PT_LOAD { continue @@ -119,7 +120,7 @@ func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, erro return elfInfo{}, syserror.ENOEXEC } - start := usermem.Addr(memoryOffset) + start := hostarch.Addr(memoryOffset) end, ok := start.AddLength(phdr.Memsz) if !ok { log.Warningf("PT_LOAD segment size overflows: %#x + %#x", start, end) @@ -210,7 +211,7 @@ func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) { } // Then copy it into a VDSO mapping. - size, ok := usermem.Addr(len(vdsodata.Binary)).RoundUp() + size, ok := hostarch.Addr(len(vdsodata.Binary)).RoundUp() if !ok { return nil, fmt.Errorf("VDSO size overflows? %#x", len(vdsodata.Binary)) } @@ -221,7 +222,7 @@ func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) { return nil, fmt.Errorf("unable to allocate VDSO memory: %v", err) } - ims, err := mf.MapInternal(vdso, usermem.ReadWrite) + ims, err := mf.MapInternal(vdso, hostarch.ReadWrite) if err != nil { mf.DecRef(vdso) return nil, fmt.Errorf("unable to map VDSO memory: %v", err) @@ -234,7 +235,7 @@ func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) { } // Finally, allocate a param page for this VDSO. - paramPage, err := mf.Allocate(usermem.PageSize, usage.System) + paramPage, err := mf.Allocate(hostarch.PageSize, usage.System) if err != nil { mf.DecRef(vdso) return nil, fmt.Errorf("unable to allocate VDSO param page: %v", err) @@ -266,7 +267,7 @@ func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) { // compatibility with such binaries, we load the VDSO much like Linux. // // loadVDSO takes a reference on the VDSO and parameter page FrameRegions. -func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (usermem.Addr, error) { +func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (hostarch.Addr, error) { if v.os != bin.os { ctx.Warningf("Binary ELF OS %v and VDSO ELF OS %v differ", bin.os, v.os) return 0, syserror.ENOEXEC @@ -297,8 +298,8 @@ func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) Fixed: true, Unmap: true, Private: true, - Perms: usermem.Read, - MaxPerms: usermem.Read, + Perms: hostarch.Read, + MaxPerms: hostarch.Read, }) if err != nil { ctx.Infof("Unable to map VDSO param page: %v", err) @@ -318,8 +319,8 @@ func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) Fixed: true, Unmap: true, Private: true, - Perms: usermem.Read, - MaxPerms: usermem.AnyAccess, + Perms: hostarch.Read, + MaxPerms: hostarch.AnyAccess, }) if err != nil { ctx.Infof("Unable to map VDSO: %v", err) @@ -349,7 +350,7 @@ func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) return 0, syserror.ENOEXEC } segPage := segAddr.RoundDown() - segSize := usermem.Addr(phdr.Memsz) + segSize := hostarch.Addr(phdr.Memsz) segSize, ok = segSize.AddLength(segAddr.PageOffset()) if !ok { ctx.Warningf("PT_LOAD segment memsize %#x + offset %#x overflows", phdr.Memsz, segAddr.PageOffset()) @@ -371,7 +372,7 @@ func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) } perms := progFlagsAsPerms(phdr.Flags) - if perms != usermem.Read { + if perms != hostarch.Read { if err := m.MProtect(segPage, uint64(segSize), perms, false); err != nil { ctx.Warningf("Unable to set PT_LOAD segment protections %+v at [%#x, %#x): %v", perms, segAddr, segEnd, err) return 0, syserror.ENOEXEC diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD index 2c95669cd..c30e88725 100644 --- a/pkg/sentry/memmap/BUILD +++ b/pkg/sentry/memmap/BUILD @@ -51,6 +51,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/context", + "//pkg/hostarch", "//pkg/log", "//pkg/safemem", "//pkg/syserror", @@ -63,5 +64,5 @@ go_test( size = "small", srcs = ["mapping_set_test.go"], library = ":memmap", - deps = ["//pkg/usermem"], + deps = ["//pkg/hostarch"], ) diff --git a/pkg/sentry/memmap/mapping_set.go b/pkg/sentry/memmap/mapping_set.go index 457ed87f8..32863bb5e 100644 --- a/pkg/sentry/memmap/mapping_set.go +++ b/pkg/sentry/memmap/mapping_set.go @@ -18,7 +18,7 @@ import ( "fmt" "math" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // MappingSet maps offsets into a Mappable to mappings of those offsets. It is @@ -39,7 +39,7 @@ type MappingsOfRange map[MappingOfRange]struct{} // +stateify savable type MappingOfRange struct { MappingSpace MappingSpace - AddrRange usermem.AddrRange + AddrRange hostarch.AddrRange Writable bool } @@ -89,9 +89,9 @@ func (mappingSetFunctions) Merge(r1 MappableRange, val1 MappingsOfRange, r2 Mapp // region with k1. k2 := MappingOfRange{ MappingSpace: k1.MappingSpace, - AddrRange: usermem.AddrRange{ + AddrRange: hostarch.AddrRange{ Start: k1.AddrRange.End, - End: k1.AddrRange.End + usermem.Addr(r2.Length()), + End: k1.AddrRange.End + hostarch.Addr(r2.Length()), }, Writable: k1.Writable, } @@ -102,7 +102,7 @@ func (mappingSetFunctions) Merge(r1 MappableRange, val1 MappingsOfRange, r2 Mapp // OK. Add it to the merged map. merged[MappingOfRange{ MappingSpace: k1.MappingSpace, - AddrRange: usermem.AddrRange{ + AddrRange: hostarch.AddrRange{ Start: k1.AddrRange.Start, End: k2.AddrRange.End, }, @@ -124,11 +124,11 @@ func (mappingSetFunctions) Split(r MappableRange, val MappingsOfRange, split uin // split is a value in MappableRange, we need the offset into the // corresponding MappingsOfRange. - offset := usermem.Addr(split - r.Start) + offset := hostarch.Addr(split - r.Start) for k := range val { k1 := MappingOfRange{ MappingSpace: k.MappingSpace, - AddrRange: usermem.AddrRange{ + AddrRange: hostarch.AddrRange{ Start: k.AddrRange.Start, End: k.AddrRange.Start + offset, }, @@ -138,7 +138,7 @@ func (mappingSetFunctions) Split(r MappableRange, val MappingsOfRange, split uin k2 := MappingOfRange{ MappingSpace: k.MappingSpace, - AddrRange: usermem.AddrRange{ + AddrRange: hostarch.AddrRange{ Start: k.AddrRange.Start + offset, End: k.AddrRange.End, }, @@ -157,18 +157,18 @@ func (mappingSetFunctions) Split(r MappableRange, val MappingsOfRange, split uin // indicating that ms maps addresses [0x4000, 0x6000) to MappableRange [0x0, // 0x2000). Then for subsetRange = [0x1000, 0x2000), subsetMapping returns a // MappingOfRange for which AddrRange = [0x5000, 0x6000). -func subsetMapping(wholeRange, subsetRange MappableRange, ms MappingSpace, addr usermem.Addr, writable bool) MappingOfRange { +func subsetMapping(wholeRange, subsetRange MappableRange, ms MappingSpace, addr hostarch.Addr, writable bool) MappingOfRange { if !wholeRange.IsSupersetOf(subsetRange) { panic(fmt.Sprintf("%v is not a superset of %v", wholeRange, subsetRange)) } offset := subsetRange.Start - wholeRange.Start - start := addr + usermem.Addr(offset) + start := addr + hostarch.Addr(offset) return MappingOfRange{ MappingSpace: ms, - AddrRange: usermem.AddrRange{ + AddrRange: hostarch.AddrRange{ Start: start, - End: start + usermem.Addr(subsetRange.Length()), + End: start + hostarch.Addr(subsetRange.Length()), }, Writable: writable, } @@ -178,7 +178,7 @@ func subsetMapping(wholeRange, subsetRange MappableRange, ms MappingSpace, addr // previously had no mappings. // // Preconditions: Same as Mappable.AddMapping. -func (s *MappingSet) AddMapping(ms MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) []MappableRange { +func (s *MappingSet) AddMapping(ms MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) []MappableRange { mr := MappableRange{offset, offset + uint64(ar.Length())} var mapped []MappableRange seg, gap := s.Find(mr.Start) @@ -205,7 +205,7 @@ func (s *MappingSet) AddMapping(ms MappingSpace, ar usermem.AddrRange, offset ui // MappableRanges that now have no mappings. // // Preconditions: Same as Mappable.RemoveMapping. -func (s *MappingSet) RemoveMapping(ms MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) []MappableRange { +func (s *MappingSet) RemoveMapping(ms MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) []MappableRange { mr := MappableRange{offset, offset + uint64(ar.Length())} var unmapped []MappableRange diff --git a/pkg/sentry/memmap/mapping_set_test.go b/pkg/sentry/memmap/mapping_set_test.go index d39efe38f..5cb81fde7 100644 --- a/pkg/sentry/memmap/mapping_set_test.go +++ b/pkg/sentry/memmap/mapping_set_test.go @@ -15,24 +15,23 @@ package memmap import ( + "gvisor.dev/gvisor/pkg/hostarch" "reflect" "testing" - - "gvisor.dev/gvisor/pkg/usermem" ) type testMappingSpace struct { // Ideally we'd store the full ranges that were invalidated, rather // than individual calls to Invalidate, as they are an implementation // detail, but this is the simplest way for now. - inv []usermem.AddrRange + inv []hostarch.AddrRange } func (n *testMappingSpace) reset() { - n.inv = []usermem.AddrRange{} + n.inv = []hostarch.AddrRange{} } -func (n *testMappingSpace) Invalidate(ar usermem.AddrRange, opts InvalidateOpts) { +func (n *testMappingSpace) Invalidate(ar hostarch.AddrRange, opts InvalidateOpts) { n.inv = append(n.inv, ar) } @@ -40,16 +39,16 @@ func TestAddRemoveMapping(t *testing.T) { set := MappingSet{} ms := &testMappingSpace{} - mapped := set.AddMapping(ms, usermem.AddrRange{0x10000, 0x12000}, 0x1000, true) + mapped := set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x12000}, 0x1000, true) if got, want := mapped, []MappableRange{{0x1000, 0x3000}}; !reflect.DeepEqual(got, want) { t.Errorf("AddMapping: got %+v, wanted %+v", got, want) } - // Mappings (usermem.AddrRanges => memmap.MappableRange): + // Mappings (hostarch.AddrRanges => memmap.MappableRange): // [0x10000, 0x12000) => [0x1000, 0x3000) t.Log(&set) - mapped = set.AddMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, true) + mapped = set.AddMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, true) if len(mapped) != 0 { t.Errorf("AddMapping: got %+v, wanted []", mapped) } @@ -59,7 +58,7 @@ func TestAddRemoveMapping(t *testing.T) { // [0x11000, 0x12000) and [0x20000, 0x21000) => [0x2000, 0x3000) t.Log(&set) - mapped = set.AddMapping(ms, usermem.AddrRange{0x30000, 0x31000}, 0x4000, true) + mapped = set.AddMapping(ms, hostarch.AddrRange{0x30000, 0x31000}, 0x4000, true) if got, want := mapped, []MappableRange{{0x4000, 0x5000}}; !reflect.DeepEqual(got, want) { t.Errorf("AddMapping: got %+v, wanted %+v", got, want) } @@ -70,7 +69,7 @@ func TestAddRemoveMapping(t *testing.T) { // [0x30000, 0x31000) => [0x4000, 0x5000) t.Log(&set) - mapped = set.AddMapping(ms, usermem.AddrRange{0x12000, 0x15000}, 0x3000, true) + mapped = set.AddMapping(ms, hostarch.AddrRange{0x12000, 0x15000}, 0x3000, true) if got, want := mapped, []MappableRange{{0x3000, 0x4000}, {0x5000, 0x6000}}; !reflect.DeepEqual(got, want) { t.Errorf("AddMapping: got %+v, wanted %+v", got, want) } @@ -83,7 +82,7 @@ func TestAddRemoveMapping(t *testing.T) { // [0x14000, 0x15000) => [0x5000, 0x6000) t.Log(&set) - unmapped := set.RemoveMapping(ms, usermem.AddrRange{0x10000, 0x11000}, 0x1000, true) + unmapped := set.RemoveMapping(ms, hostarch.AddrRange{0x10000, 0x11000}, 0x1000, true) if got, want := unmapped, []MappableRange{{0x1000, 0x2000}}; !reflect.DeepEqual(got, want) { t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) } @@ -95,7 +94,7 @@ func TestAddRemoveMapping(t *testing.T) { // [0x14000, 0x15000) => [0x5000, 0x6000) t.Log(&set) - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, true) + unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, true) if len(unmapped) != 0 { t.Errorf("RemoveMapping: got %+v, wanted []", unmapped) } @@ -106,7 +105,7 @@ func TestAddRemoveMapping(t *testing.T) { // [0x14000, 0x15000) => [0x5000, 0x6000) t.Log(&set) - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x11000, 0x15000}, 0x2000, true) + unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x11000, 0x15000}, 0x2000, true) if got, want := unmapped, []MappableRange{{0x2000, 0x4000}, {0x5000, 0x6000}}; !reflect.DeepEqual(got, want) { t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) } @@ -115,7 +114,7 @@ func TestAddRemoveMapping(t *testing.T) { // [0x30000, 0x31000) => [0x4000, 0x5000) t.Log(&set) - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x30000, 0x31000}, 0x4000, true) + unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x30000, 0x31000}, 0x4000, true) if got, want := unmapped, []MappableRange{{0x4000, 0x5000}}; !reflect.DeepEqual(got, want) { t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) } @@ -125,12 +124,12 @@ func TestInvalidateWholeMapping(t *testing.T) { set := MappingSet{} ms := &testMappingSpace{} - set.AddMapping(ms, usermem.AddrRange{0x10000, 0x11000}, 0, true) + set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x11000}, 0, true) // Mappings: // [0x10000, 0x11000) => [0, 0x1000) t.Log(&set) set.Invalidate(MappableRange{0, 0x1000}, InvalidateOpts{}) - if got, want := ms.inv, []usermem.AddrRange{{0x10000, 0x11000}}; !reflect.DeepEqual(got, want) { + if got, want := ms.inv, []hostarch.AddrRange{{0x10000, 0x11000}}; !reflect.DeepEqual(got, want) { t.Errorf("Invalidate: got %+v, wanted %+v", got, want) } } @@ -139,12 +138,12 @@ func TestInvalidatePartialMapping(t *testing.T) { set := MappingSet{} ms := &testMappingSpace{} - set.AddMapping(ms, usermem.AddrRange{0x10000, 0x13000}, 0, true) + set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x13000}, 0, true) // Mappings: // [0x10000, 0x13000) => [0, 0x3000) t.Log(&set) set.Invalidate(MappableRange{0x1000, 0x2000}, InvalidateOpts{}) - if got, want := ms.inv, []usermem.AddrRange{{0x11000, 0x12000}}; !reflect.DeepEqual(got, want) { + if got, want := ms.inv, []hostarch.AddrRange{{0x11000, 0x12000}}; !reflect.DeepEqual(got, want) { t.Errorf("Invalidate: got %+v, wanted %+v", got, want) } } @@ -153,14 +152,14 @@ func TestInvalidateMultipleMappings(t *testing.T) { set := MappingSet{} ms := &testMappingSpace{} - set.AddMapping(ms, usermem.AddrRange{0x10000, 0x11000}, 0, true) - set.AddMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, true) + set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x11000}, 0, true) + set.AddMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, true) // Mappings: // [0x10000, 0x11000) => [0, 0x1000) // [0x12000, 0x13000) => [0x2000, 0x3000) t.Log(&set) set.Invalidate(MappableRange{0, 0x3000}, InvalidateOpts{}) - if got, want := ms.inv, []usermem.AddrRange{{0x10000, 0x11000}, {0x20000, 0x21000}}; !reflect.DeepEqual(got, want) { + if got, want := ms.inv, []hostarch.AddrRange{{0x10000, 0x11000}, {0x20000, 0x21000}}; !reflect.DeepEqual(got, want) { t.Errorf("Invalidate: got %+v, wanted %+v", got, want) } } @@ -170,17 +169,17 @@ func TestInvalidateOverlappingMappings(t *testing.T) { ms1 := &testMappingSpace{} ms2 := &testMappingSpace{} - set.AddMapping(ms1, usermem.AddrRange{0x10000, 0x12000}, 0, true) - set.AddMapping(ms2, usermem.AddrRange{0x20000, 0x22000}, 0x1000, true) + set.AddMapping(ms1, hostarch.AddrRange{0x10000, 0x12000}, 0, true) + set.AddMapping(ms2, hostarch.AddrRange{0x20000, 0x22000}, 0x1000, true) // Mappings: // ms1:[0x10000, 0x12000) => [0, 0x2000) // ms2:[0x11000, 0x13000) => [0x1000, 0x3000) t.Log(&set) set.Invalidate(MappableRange{0x1000, 0x2000}, InvalidateOpts{}) - if got, want := ms1.inv, []usermem.AddrRange{{0x11000, 0x12000}}; !reflect.DeepEqual(got, want) { + if got, want := ms1.inv, []hostarch.AddrRange{{0x11000, 0x12000}}; !reflect.DeepEqual(got, want) { t.Errorf("Invalidate: ms1: got %+v, wanted %+v", got, want) } - if got, want := ms2.inv, []usermem.AddrRange{{0x20000, 0x21000}}; !reflect.DeepEqual(got, want) { + if got, want := ms2.inv, []hostarch.AddrRange{{0x20000, 0x21000}}; !reflect.DeepEqual(got, want) { t.Errorf("Invalidate: ms1: got %+v, wanted %+v", got, want) } } @@ -189,7 +188,7 @@ func TestMixedWritableMappings(t *testing.T) { set := MappingSet{} ms := &testMappingSpace{} - mapped := set.AddMapping(ms, usermem.AddrRange{0x10000, 0x12000}, 0x1000, true) + mapped := set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x12000}, 0x1000, true) if got, want := mapped, []MappableRange{{0x1000, 0x3000}}; !reflect.DeepEqual(got, want) { t.Errorf("AddMapping: got %+v, wanted %+v", got, want) } @@ -198,7 +197,7 @@ func TestMixedWritableMappings(t *testing.T) { // [0x10000, 0x12000) writable => [0x1000, 0x3000) t.Log(&set) - mapped = set.AddMapping(ms, usermem.AddrRange{0x20000, 0x22000}, 0x2000, false) + mapped = set.AddMapping(ms, hostarch.AddrRange{0x20000, 0x22000}, 0x2000, false) if got, want := mapped, []MappableRange{{0x3000, 0x4000}}; !reflect.DeepEqual(got, want) { t.Errorf("AddMapping: got %+v, wanted %+v", got, want) } @@ -211,14 +210,14 @@ func TestMixedWritableMappings(t *testing.T) { // Unmap should fail because we specified the readonly map address range, but // asked to unmap a writable segment. - unmapped := set.RemoveMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, true) + unmapped := set.RemoveMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, true) if len(unmapped) != 0 { t.Errorf("RemoveMapping: got %+v, wanted []", unmapped) } // Readonly mapping removed, but writable mapping still exists in the range, // so no mappable range fully unmapped. - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, false) + unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, false) if len(unmapped) != 0 { t.Errorf("RemoveMapping: got %+v, wanted []", unmapped) } @@ -228,7 +227,7 @@ func TestMixedWritableMappings(t *testing.T) { // [0x21000, 0x22000) readonly => [0x3000, 0x4000) t.Log(&set) - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x11000, 0x12000}, 0x2000, true) + unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x11000, 0x12000}, 0x2000, true) if got, want := unmapped, []MappableRange{{0x2000, 0x3000}}; !reflect.DeepEqual(got, want) { t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) } @@ -239,12 +238,12 @@ func TestMixedWritableMappings(t *testing.T) { t.Log(&set) // Unmap should fail since writable bit doesn't match. - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x10000, 0x12000}, 0x1000, false) + unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x10000, 0x12000}, 0x1000, false) if len(unmapped) != 0 { t.Errorf("RemoveMapping: got %+v, wanted []", unmapped) } - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x10000, 0x12000}, 0x1000, true) + unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x10000, 0x12000}, 0x1000, true) if got, want := unmapped, []MappableRange{{0x1000, 0x2000}}; !reflect.DeepEqual(got, want) { t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) } @@ -253,7 +252,7 @@ func TestMixedWritableMappings(t *testing.T) { // [0x21000, 0x22000) readonly => [0x3000, 0x4000) t.Log(&set) - unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x21000, 0x22000}, 0x3000, false) + unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x21000, 0x22000}, 0x3000, false) if got, want := unmapped, []MappableRange{{0x3000, 0x4000}}; !reflect.DeepEqual(got, want) { t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want) } diff --git a/pkg/sentry/memmap/memmap.go b/pkg/sentry/memmap/memmap.go index 49e21026e..610686ea0 100644 --- a/pkg/sentry/memmap/memmap.go +++ b/pkg/sentry/memmap/memmap.go @@ -19,8 +19,8 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" - "gvisor.dev/gvisor/pkg/usermem" ) // Mappable represents a memory-mappable object, a mutable mapping from uint64 @@ -29,8 +29,8 @@ import ( // See mm/mm.go for Mappable's place in the lock order. // // All Mappable methods have the following preconditions: -// * usermem.AddrRanges and MappableRanges must be non-empty (Length() != 0). -// * usermem.Addrs and Mappable offsets must be page-aligned. +// * hostarch.AddrRanges and MappableRanges must be non-empty (Length() != 0). +// * hostarch.Addrs and Mappable offsets must be page-aligned. type Mappable interface { // AddMapping notifies the Mappable of a mapping from addresses ar in ms to // offsets [offset, offset+ar.Length()) in this Mappable. @@ -42,7 +42,7 @@ type Mappable interface { // lifetime of the mapping. // // Preconditions: offset+ar.Length() does not overflow. - AddMapping(ctx context.Context, ms MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error + AddMapping(ctx context.Context, ms MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error // RemoveMapping notifies the Mappable of the removal of a mapping from // addresses ar in ms to offsets [offset, offset+ar.Length()) in this @@ -52,7 +52,7 @@ type Mappable interface { // * offset+ar.Length() does not overflow. // * The removed mapping must exist. writable must match the // corresponding call to AddMapping. - RemoveMapping(ctx context.Context, ms MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) + RemoveMapping(ctx context.Context, ms MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) // CopyMapping notifies the Mappable of an attempt to copy a mapping in ms // from srcAR to dstAR. For most Mappables, this is equivalent to @@ -66,7 +66,7 @@ type Mappable interface { // * offset+srcAR.Length() and offset+dstAR.Length() do not overflow. // * The mapping at srcAR must exist. writable must match the // corresponding call to AddMapping. - CopyMapping(ctx context.Context, ms MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error + CopyMapping(ctx context.Context, ms MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error // Translate returns the Mappable's current mappings for at least the range // of offsets specified by required, and at most the range of offsets @@ -90,7 +90,7 @@ type Mappable interface { // synchronize with invalidation. // // Postconditions: See CheckTranslateResult. - Translate(ctx context.Context, required, optional MappableRange, at usermem.AccessType) ([]Translation, error) + Translate(ctx context.Context, required, optional MappableRange, at hostarch.AccessType) ([]Translation, error) // InvalidateUnsavable requests that the Mappable invalidate Translations // that cannot be preserved across save/restore. @@ -113,7 +113,7 @@ type Translation struct { // Perms is the set of permissions for which platform.AddressSpace.MapFile // and platform.AddressSpace.MapInternal on this Translation is permitted. - Perms usermem.AccessType + Perms hostarch.AccessType } // FileRange returns the FileRange represented by t. @@ -125,18 +125,18 @@ func (t Translation) FileRange() FileRange { // postconditions for Mappable.Translate(required, optional, at). // // Preconditions: Same as Mappable.Translate. -func CheckTranslateResult(required, optional MappableRange, at usermem.AccessType, ts []Translation, terr error) error { +func CheckTranslateResult(required, optional MappableRange, at hostarch.AccessType, ts []Translation, terr error) error { // Verify that the inputs to Mappable.Translate were valid. if !required.WellFormed() || required.Length() == 0 { panic(fmt.Sprintf("invalid required range: %v", required)) } - if !usermem.Addr(required.Start).IsPageAligned() || !usermem.Addr(required.End).IsPageAligned() { + if !hostarch.Addr(required.Start).IsPageAligned() || !hostarch.Addr(required.End).IsPageAligned() { panic(fmt.Sprintf("unaligned required range: %v", required)) } if !optional.IsSupersetOf(required) { panic(fmt.Sprintf("optional range %v is not a superset of required range %v", optional, required)) } - if !usermem.Addr(optional.Start).IsPageAligned() || !usermem.Addr(optional.End).IsPageAligned() { + if !hostarch.Addr(optional.Start).IsPageAligned() || !hostarch.Addr(optional.End).IsPageAligned() { panic(fmt.Sprintf("unaligned optional range: %v", optional)) } @@ -148,13 +148,13 @@ func CheckTranslateResult(required, optional MappableRange, at usermem.AccessTyp if !t.Source.WellFormed() || t.Source.Length() == 0 { return fmt.Errorf("Translation %+v has invalid Source", t) } - if !usermem.Addr(t.Source.Start).IsPageAligned() || !usermem.Addr(t.Source.End).IsPageAligned() { + if !hostarch.Addr(t.Source.Start).IsPageAligned() || !hostarch.Addr(t.Source.End).IsPageAligned() { return fmt.Errorf("Translation %+v has unaligned Source", t) } if t.File == nil { return fmt.Errorf("Translation %+v has nil File", t) } - if !usermem.Addr(t.Offset).IsPageAligned() { + if !hostarch.Addr(t.Offset).IsPageAligned() { return fmt.Errorf("Translation %+v has unaligned Offset", t) } // Translations must be contiguous and in increasing order of @@ -210,7 +210,7 @@ func (mr MappableRange) String() string { return fmt.Sprintf("[%#x, %#x)", mr.Start, mr.End) } -// MappingSpace represents a mutable mapping from usermem.Addrs to (Mappable, +// MappingSpace represents a mutable mapping from hostarch.Addrs to (Mappable, // uint64 offset) pairs. type MappingSpace interface { // Invalidate is called to notify the MappingSpace that values returned by @@ -223,7 +223,7 @@ type MappingSpace interface { // Preconditions: // * ar.Length() != 0. // * ar must be page-aligned. - Invalidate(ar usermem.AddrRange, opts InvalidateOpts) + Invalidate(ar hostarch.AddrRange, opts InvalidateOpts) } // InvalidateOpts holds options to MappingSpace.Invalidate. @@ -321,7 +321,7 @@ type MMapOpts struct { Offset uint64 // Addr is the suggested address for the mapping. - Addr usermem.Addr + Addr hostarch.Addr // Fixed specifies whether this is a fixed mapping (it must be located at // Addr). @@ -338,7 +338,7 @@ type MMapOpts struct { Map32Bit bool // Perms is the set of permissions to the applied to this mapping. - Perms usermem.AccessType + Perms hostarch.AccessType // MaxPerms limits the set of permissions that may ever apply to this // mapping. If Mappable is not nil, all memmap.Translations returned by @@ -346,7 +346,7 @@ type MMapOpts struct { // // Preconditions: MaxAccessType should be an effective AccessType, as // access cannot be limited beyond effective AccessTypes. - MaxPerms usermem.AccessType + MaxPerms hostarch.AccessType // Private is true if writes to the mapping should be propagated to a copy // that is exclusive to the MemoryManager. @@ -375,6 +375,11 @@ type MMapOpts struct { // // If Force is true, Unmap and Fixed must be true. Force bool + + // SentryOwnedContent indicates the sentry exclusively controls the + // underlying memory backing the mapping thus the memory content is + // guaranteed not to be modified outside the sentry's purview. + SentryOwnedContent bool } // File represents a host file that may be mapped into an platform.AddressSpace. @@ -410,7 +415,7 @@ type File interface { // // Postconditions: The returned mapping is valid as long as at least one // reference is held on the mapped pages. - MapInternal(fr FileRange, at usermem.AccessType) (safemem.BlockSeq, error) + MapInternal(fr FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) // FD returns the file descriptor represented by the File. // diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index 6dbeccfe2..b417c2da7 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -28,14 +28,14 @@ go_template_instance( "trackGaps": "1", }, imports = { - "usermem": "gvisor.dev/gvisor/pkg/usermem", + "hostarch": "gvisor.dev/gvisor/pkg/hostarch", }, package = "mm", prefix = "vma", template = "//pkg/segment:generic_set", types = { - "Key": "usermem.Addr", - "Range": "usermem.AddrRange", + "Key": "hostarch.Addr", + "Range": "hostarch.AddrRange", "Value": "vma", "Functions": "vmaSetFunctions", }, @@ -48,14 +48,14 @@ go_template_instance( "minDegree": "8", }, imports = { - "usermem": "gvisor.dev/gvisor/pkg/usermem", + "hostarch": "gvisor.dev/gvisor/pkg/hostarch", }, package = "mm", prefix = "pma", template = "//pkg/segment:generic_set", types = { - "Key": "usermem.Addr", - "Range": "usermem.AddrRange", + "Key": "hostarch.Addr", + "Range": "hostarch.AddrRange", "Value": "pma", "Functions": "pmaSetFunctions", }, @@ -125,6 +125,7 @@ go_library( "//pkg/abi/linux", "//pkg/atomicbitops", "//pkg/context", + "//pkg/hostarch", "//pkg/log", "//pkg/refs", "//pkg/refsvfs2", @@ -155,6 +156,7 @@ go_test( library = ":mm", deps = [ "//pkg/context", + "//pkg/hostarch", "//pkg/sentry/arch", "//pkg/sentry/contexttest", "//pkg/sentry/limits", diff --git a/pkg/sentry/mm/address_space.go b/pkg/sentry/mm/address_space.go index a93e76c75..534e0e957 100644 --- a/pkg/sentry/mm/address_space.go +++ b/pkg/sentry/mm/address_space.go @@ -19,8 +19,8 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/usermem" ) // AddressSpace returns the platform.AddressSpace bound to mm. @@ -172,17 +172,17 @@ func (mm *MemoryManager) Deactivate() { // * ar.Length() != 0. // * ar must be page-aligned. // * pseg == mm.pmas.LowerBoundSegment(ar.Start). -func (mm *MemoryManager) mapASLocked(pseg pmaIterator, ar usermem.AddrRange, precommit bool) error { +func (mm *MemoryManager) mapASLocked(pseg pmaIterator, ar hostarch.AddrRange, precommit bool) error { // By default, map entire pmas at a time, under the assumption that there // is no cost to mapping more of a pma than necessary. - mapAR := usermem.AddrRange{0, ^usermem.Addr(usermem.PageSize - 1)} + mapAR := hostarch.AddrRange{0, ^hostarch.Addr(hostarch.PageSize - 1)} if precommit { // When explicitly precommitting, only map ar, since overmapping may // incur unexpected resource usage. mapAR = ar } else if mapUnit := mm.p.MapUnit(); mapUnit != 0 { // Limit the range we map to ar, aligned to mapUnit. - mapMask := usermem.Addr(mapUnit - 1) + mapMask := hostarch.Addr(mapUnit - 1) mapAR.Start = ar.Start &^ mapMask // If rounding ar.End up overflows, just keep the existing mapAR.End. if end := (ar.End + mapMask) &^ mapMask; end >= ar.End { @@ -218,7 +218,7 @@ func (mm *MemoryManager) mapASLocked(pseg pmaIterator, ar usermem.AddrRange, pre // unmapASLocked removes all AddressSpace mappings for addresses in ar. // // Preconditions: mm.activeMu must be locked. -func (mm *MemoryManager) unmapASLocked(ar usermem.AddrRange) { +func (mm *MemoryManager) unmapASLocked(ar hostarch.AddrRange) { if mm.as == nil { // No AddressSpace? Force all mappings to be unmapped on the next // Activate. diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go index 5ab2ef79f..346866d3c 100644 --- a/pkg/sentry/mm/aio_context.go +++ b/pkg/sentry/mm/aio_context.go @@ -17,6 +17,7 @@ package mm import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/usage" @@ -83,7 +84,7 @@ func (mm *MemoryManager) destroyAIOContextLocked(ctx context.Context, id uint64) // the same address. Then it would be unmapping memory that it doesn't own. // This is, however, the way Linux implements AIO. Keeps the same [weird] // semantics in case anyone relies on it. - mm.MUnmap(ctx, usermem.Addr(id), aioRingBufferSize) + mm.MUnmap(ctx, hostarch.Addr(id), aioRingBufferSize) delete(mm.aioManager.contexts, id) aioCtx.destroy() @@ -259,7 +260,7 @@ type aioMappable struct { fr memmap.FileRange } -var aioRingBufferSize = uint64(usermem.Addr(linux.AIORingSize).MustRoundUp()) +var aioRingBufferSize = uint64(hostarch.Addr(linux.AIORingSize).MustRoundUp()) func newAIOMappable(mfp pgalloc.MemoryFileProvider) (*aioMappable, error) { fr, err := mfp.MemoryFile().Allocate(aioRingBufferSize, usage.Anonymous) @@ -300,7 +301,7 @@ func (m *aioMappable) Msync(ctx context.Context, mr memmap.MappableRange) error } // AddMapping implements memmap.Mappable.AddMapping. -func (m *aioMappable) AddMapping(_ context.Context, _ memmap.MappingSpace, ar usermem.AddrRange, offset uint64, _ bool) error { +func (m *aioMappable) AddMapping(_ context.Context, _ memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, _ bool) error { // Don't allow mappings to be expanded (in Linux, fs/aio.c:aio_ring_mmap() // sets VM_DONTEXPAND). if offset != 0 || uint64(ar.Length()) != aioRingBufferSize { @@ -310,11 +311,11 @@ func (m *aioMappable) AddMapping(_ context.Context, _ memmap.MappingSpace, ar us } // RemoveMapping implements memmap.Mappable.RemoveMapping. -func (m *aioMappable) RemoveMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, uint64, bool) { +func (m *aioMappable) RemoveMapping(context.Context, memmap.MappingSpace, hostarch.AddrRange, uint64, bool) { } // CopyMapping implements memmap.Mappable.CopyMapping. -func (m *aioMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, _ bool) error { +func (m *aioMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, _ bool) error { // Don't allow mappings to be expanded (in Linux, fs/aio.c:aio_ring_mmap() // sets VM_DONTEXPAND). if offset != 0 || uint64(dstAR.Length()) != aioRingBufferSize { @@ -346,7 +347,7 @@ func (m *aioMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, s } // Translate implements memmap.Mappable.Translate. -func (m *aioMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { +func (m *aioMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { var err error if required.End > m.fr.Length() { err = &memmap.BusError{syserror.EFAULT} @@ -357,7 +358,7 @@ func (m *aioMappable) Translate(ctx context.Context, required, optional memmap.M Source: source, File: m.mfp.MemoryFile(), Offset: m.fr.Start + source.Start, - Perms: usermem.AnyAccess, + Perms: hostarch.AnyAccess, }, }, err } @@ -389,8 +390,8 @@ func (mm *MemoryManager) NewAIOContext(ctx context.Context, events uint32) (uint // Linux uses "do_mmap_pgoff(..., PROT_READ | PROT_WRITE, ...)" in // fs/aio.c:aio_setup_ring(). Since we don't implement AIO_RING_MAGIC, // user mode should not write to this page. - Perms: usermem.Read, - MaxPerms: usermem.Read, + Perms: hostarch.Read, + MaxPerms: hostarch.Read, }) if err != nil { return 0, err @@ -435,6 +436,6 @@ func (mm *MemoryManager) LookupAIOContext(ctx context.Context, id uint64) (*AIOC // bytes from id). func (mm *MemoryManager) isValidAddr(ctx context.Context, id uint64) bool { var buf [4]byte - _, err := mm.CopyIn(ctx, usermem.Addr(id), buf[:], usermem.IOOpts{}) + _, err := mm.CopyIn(ctx, hostarch.Addr(id), buf[:], usermem.IOOpts{}) return err == nil } diff --git a/pkg/sentry/mm/io.go b/pkg/sentry/mm/io.go index a8ac48080..16f318ab3 100644 --- a/pkg/sentry/mm/io.go +++ b/pkg/sentry/mm/io.go @@ -16,6 +16,7 @@ package mm import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/syserror" @@ -60,11 +61,11 @@ const ( rwMapMinBytes = 512 ) -// CheckIORange is similar to usermem.Addr.ToRange, but applies bounds checks +// CheckIORange is similar to hostarch.Addr.ToRange, but applies bounds checks // consistent with Linux's arch/x86/include/asm/uaccess.h:access_ok(). // // Preconditions: length >= 0. -func (mm *MemoryManager) CheckIORange(addr usermem.Addr, length int64) (usermem.AddrRange, bool) { +func (mm *MemoryManager) CheckIORange(addr hostarch.Addr, length int64) (hostarch.AddrRange, bool) { // Note that access_ok() constrains end even if length == 0. ar, ok := addr.ToRange(uint64(length)) return ar, (ok && ar.End <= mm.layout.MaxAddr) @@ -72,7 +73,7 @@ func (mm *MemoryManager) CheckIORange(addr usermem.Addr, length int64) (usermem. // checkIOVec applies bound checks consistent with Linux's // arch/x86/include/asm/uaccess.h:access_ok() to ars. -func (mm *MemoryManager) checkIOVec(ars usermem.AddrRangeSeq) bool { +func (mm *MemoryManager) checkIOVec(ars hostarch.AddrRangeSeq) bool { for !ars.IsEmpty() { ar := ars.Head() if _, ok := mm.CheckIORange(ar.Start, int64(ar.Length())); !ok { @@ -100,7 +101,7 @@ func translateIOError(ctx context.Context, err error) error { } // CopyOut implements usermem.IO.CopyOut. -func (mm *MemoryManager) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, opts usermem.IOOpts) (int, error) { +func (mm *MemoryManager) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) { ar, ok := mm.CheckIORange(addr, int64(len(src))) if !ok { return 0, syserror.EFAULT @@ -116,24 +117,24 @@ func (mm *MemoryManager) CopyOut(ctx context.Context, addr usermem.Addr, src []b } // Go through internal mappings. - n64, err := mm.withInternalMappings(ctx, ar, usermem.Write, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { + n64, err := mm.withInternalMappings(ctx, ar, hostarch.Write, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { n, err := safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src))) return n, translateIOError(ctx, err) }) return int(n64), err } -func (mm *MemoryManager) asCopyOut(ctx context.Context, addr usermem.Addr, src []byte) (int, error) { +func (mm *MemoryManager) asCopyOut(ctx context.Context, addr hostarch.Addr, src []byte) (int, error) { var done int for { - n, err := mm.as.CopyOut(addr+usermem.Addr(done), src[done:]) + n, err := mm.as.CopyOut(addr+hostarch.Addr(done), src[done:]) done += n if err == nil { return done, nil } if f, ok := err.(platform.SegmentationFault); ok { ar, _ := addr.ToRange(uint64(len(src))) - if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Write); err != nil { + if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Write); err != nil { return done, err } continue @@ -143,7 +144,7 @@ func (mm *MemoryManager) asCopyOut(ctx context.Context, addr usermem.Addr, src [ } // CopyIn implements usermem.IO.CopyIn. -func (mm *MemoryManager) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, opts usermem.IOOpts) (int, error) { +func (mm *MemoryManager) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) { ar, ok := mm.CheckIORange(addr, int64(len(dst))) if !ok { return 0, syserror.EFAULT @@ -159,24 +160,24 @@ func (mm *MemoryManager) CopyIn(ctx context.Context, addr usermem.Addr, dst []by } // Go through internal mappings. - n64, err := mm.withInternalMappings(ctx, ar, usermem.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { + n64, err := mm.withInternalMappings(ctx, ar, hostarch.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { n, err := safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), ims) return n, translateIOError(ctx, err) }) return int(n64), err } -func (mm *MemoryManager) asCopyIn(ctx context.Context, addr usermem.Addr, dst []byte) (int, error) { +func (mm *MemoryManager) asCopyIn(ctx context.Context, addr hostarch.Addr, dst []byte) (int, error) { var done int for { - n, err := mm.as.CopyIn(addr+usermem.Addr(done), dst[done:]) + n, err := mm.as.CopyIn(addr+hostarch.Addr(done), dst[done:]) done += n if err == nil { return done, nil } if f, ok := err.(platform.SegmentationFault); ok { ar, _ := addr.ToRange(uint64(len(dst))) - if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Read); err != nil { + if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Read); err != nil { return done, err } continue @@ -186,7 +187,7 @@ func (mm *MemoryManager) asCopyIn(ctx context.Context, addr usermem.Addr, dst [] } // ZeroOut implements usermem.IO.ZeroOut. -func (mm *MemoryManager) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { +func (mm *MemoryManager) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { ar, ok := mm.CheckIORange(addr, toZero) if !ok { return 0, syserror.EFAULT @@ -202,23 +203,23 @@ func (mm *MemoryManager) ZeroOut(ctx context.Context, addr usermem.Addr, toZero } // Go through internal mappings. - return mm.withInternalMappings(ctx, ar, usermem.Write, opts.IgnorePermissions, func(dsts safemem.BlockSeq) (uint64, error) { + return mm.withInternalMappings(ctx, ar, hostarch.Write, opts.IgnorePermissions, func(dsts safemem.BlockSeq) (uint64, error) { n, err := safemem.ZeroSeq(dsts) return n, translateIOError(ctx, err) }) } -func (mm *MemoryManager) asZeroOut(ctx context.Context, addr usermem.Addr, toZero int64) (int64, error) { +func (mm *MemoryManager) asZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64) (int64, error) { var done int64 for { - n, err := mm.as.ZeroOut(addr+usermem.Addr(done), uintptr(toZero-done)) + n, err := mm.as.ZeroOut(addr+hostarch.Addr(done), uintptr(toZero-done)) done += int64(n) if err == nil { return done, nil } if f, ok := err.(platform.SegmentationFault); ok { ar, _ := addr.ToRange(uint64(toZero)) - if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Write); err != nil { + if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Write); err != nil { return done, err } continue @@ -228,7 +229,7 @@ func (mm *MemoryManager) asZeroOut(ctx context.Context, addr usermem.Addr, toZer } // CopyOutFrom implements usermem.IO.CopyOutFrom. -func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) { +func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) { if !mm.checkIOVec(ars) { return 0, syserror.EFAULT } @@ -269,11 +270,11 @@ func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeS } // Go through internal mappings. - return mm.withVecInternalMappings(ctx, ars, usermem.Write, opts.IgnorePermissions, src.ReadToBlocks) + return mm.withVecInternalMappings(ctx, ars, hostarch.Write, opts.IgnorePermissions, src.ReadToBlocks) } // CopyInTo implements usermem.IO.CopyInTo. -func (mm *MemoryManager) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) { +func (mm *MemoryManager) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) { if !mm.checkIOVec(ars) { return 0, syserror.EFAULT } @@ -306,11 +307,11 @@ func (mm *MemoryManager) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, } // Go through internal mappings. - return mm.withVecInternalMappings(ctx, ars, usermem.Read, opts.IgnorePermissions, dst.WriteFromBlocks) + return mm.withVecInternalMappings(ctx, ars, hostarch.Read, opts.IgnorePermissions, dst.WriteFromBlocks) } // SwapUint32 implements usermem.IO.SwapUint32. -func (mm *MemoryManager) SwapUint32(ctx context.Context, addr usermem.Addr, new uint32, opts usermem.IOOpts) (uint32, error) { +func (mm *MemoryManager) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts usermem.IOOpts) (uint32, error) { ar, ok := mm.CheckIORange(addr, 4) if !ok { return 0, syserror.EFAULT @@ -324,7 +325,7 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr usermem.Addr, new return old, nil } if f, ok := err.(platform.SegmentationFault); ok { - if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.ReadWrite); err != nil { + if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.ReadWrite); err != nil { return 0, err } continue @@ -335,7 +336,7 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr usermem.Addr, new // Go through internal mappings. var old uint32 - _, err := mm.withInternalMappings(ctx, ar, usermem.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { + _, err := mm.withInternalMappings(ctx, ar, hostarch.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { if ims.NumBlocks() != 1 || ims.NumBytes() != 4 { // Atomicity is unachievable across mappings. return 0, syserror.EFAULT @@ -353,7 +354,7 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr usermem.Addr, new } // CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32. -func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr usermem.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) { +func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) { ar, ok := mm.CheckIORange(addr, 4) if !ok { return 0, syserror.EFAULT @@ -367,7 +368,7 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr usermem. return prev, nil } if f, ok := err.(platform.SegmentationFault); ok { - if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.ReadWrite); err != nil { + if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.ReadWrite); err != nil { return 0, err } continue @@ -378,7 +379,7 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr usermem. // Go through internal mappings. var prev uint32 - _, err := mm.withInternalMappings(ctx, ar, usermem.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { + _, err := mm.withInternalMappings(ctx, ar, hostarch.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { if ims.NumBlocks() != 1 || ims.NumBytes() != 4 { // Atomicity is unachievable across mappings. return 0, syserror.EFAULT @@ -396,7 +397,7 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr usermem. } // LoadUint32 implements usermem.IO.LoadUint32. -func (mm *MemoryManager) LoadUint32(ctx context.Context, addr usermem.Addr, opts usermem.IOOpts) (uint32, error) { +func (mm *MemoryManager) LoadUint32(ctx context.Context, addr hostarch.Addr, opts usermem.IOOpts) (uint32, error) { ar, ok := mm.CheckIORange(addr, 4) if !ok { return 0, syserror.EFAULT @@ -410,7 +411,7 @@ func (mm *MemoryManager) LoadUint32(ctx context.Context, addr usermem.Addr, opts return val, nil } if f, ok := err.(platform.SegmentationFault); ok { - if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Read); err != nil { + if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Read); err != nil { return 0, err } continue @@ -421,7 +422,7 @@ func (mm *MemoryManager) LoadUint32(ctx context.Context, addr usermem.Addr, opts // Go through internal mappings. var val uint32 - _, err := mm.withInternalMappings(ctx, ar, usermem.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { + _, err := mm.withInternalMappings(ctx, ar, hostarch.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { if ims.NumBlocks() != 1 || ims.NumBytes() != 4 { // Atomicity is unachievable across mappings. return 0, syserror.EFAULT @@ -445,11 +446,11 @@ func (mm *MemoryManager) LoadUint32(ctx context.Context, addr usermem.Addr, opts // * mm.as != nil. // * ioar.Length() != 0. // * ioar.Contains(addr). -func (mm *MemoryManager) handleASIOFault(ctx context.Context, addr usermem.Addr, ioar usermem.AddrRange, at usermem.AccessType) error { +func (mm *MemoryManager) handleASIOFault(ctx context.Context, addr hostarch.Addr, ioar hostarch.AddrRange, at hostarch.AccessType) error { // Try to map all remaining pages in the I/O operation. This RoundUp can't // overflow because otherwise it would have been caught by CheckIORange. end, _ := ioar.End.RoundUp() - ar := usermem.AddrRange{addr.RoundDown(), end} + ar := hostarch.AddrRange{addr.RoundDown(), end} // Don't bother trying existingPMAsLocked; in most cases, if we did have // existing pmas, we wouldn't have faulted. @@ -498,7 +499,7 @@ func (mm *MemoryManager) handleASIOFault(ctx context.Context, addr usermem.Addr, // more useful for usermem.IO methods. // // Preconditions: 0 < ar.Length() <= math.MaxInt64. -func (mm *MemoryManager) withInternalMappings(ctx context.Context, ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) { +func (mm *MemoryManager) withInternalMappings(ctx context.Context, ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) { // If pmas are already available, we can do IO without touching mm.vmas or // mm.mappingMu. mm.activeMu.RLock() @@ -567,7 +568,7 @@ func (mm *MemoryManager) withInternalMappings(ctx context.Context, ar usermem.Ad // internal mappings for the subset of ars for which this property holds. // // Preconditions: !ars.IsEmpty(). -func (mm *MemoryManager) withVecInternalMappings(ctx context.Context, ars usermem.AddrRangeSeq, at usermem.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) { +func (mm *MemoryManager) withVecInternalMappings(ctx context.Context, ars hostarch.AddrRangeSeq, at hostarch.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) { // withInternalMappings is faster than withVecInternalMappings because of // iterator plumbing (this isn't generally practical in the vector case due // to iterator invalidation between AddrRanges). Use it if possible. @@ -630,12 +631,12 @@ func (mm *MemoryManager) withVecInternalMappings(ctx context.Context, ars userme // truncatedAddrRangeSeq returns a copy of ars, but with the end truncated to // at most address end on AddrRange arsit.Head(). It is used in vector I/O paths to -// truncate usermem.AddrRangeSeq when errors occur. +// truncate hostarch.AddrRangeSeq when errors occur. // // Preconditions: // * !arsit.IsEmpty(). // * end <= arsit.Head().End. -func truncatedAddrRangeSeq(ars, arsit usermem.AddrRangeSeq, end usermem.Addr) usermem.AddrRangeSeq { +func truncatedAddrRangeSeq(ars, arsit hostarch.AddrRangeSeq, end hostarch.Addr) hostarch.AddrRangeSeq { ar := arsit.Head() if end <= ar.Start { return ars.TakeFirst64(ars.NumBytes() - arsit.NumBytes()) diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go index 120707429..a79ef9223 100644 --- a/pkg/sentry/mm/lifecycle.go +++ b/pkg/sentry/mm/lifecycle.go @@ -19,12 +19,12 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/usermem" ) // NewMemoryManager returns a new MemoryManager with no mappings and 1 user. @@ -139,7 +139,7 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) { } srcvseg := mm.vmas.FirstSegment() dstpgap := mm2.pmas.FirstGap() - var unmapAR usermem.AddrRange + var unmapAR hostarch.AddrRange for srcpseg := mm.pmas.FirstSegment(); srcpseg.Ok(); srcpseg = srcpseg.NextSegment() { pma := srcpseg.ValuePtr() if !pma.private { diff --git a/pkg/sentry/mm/metadata.go b/pkg/sentry/mm/metadata.go index 0cfd60f6c..28c5fead9 100644 --- a/pkg/sentry/mm/metadata.go +++ b/pkg/sentry/mm/metadata.go @@ -16,9 +16,9 @@ package mm import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsbridge" - "gvisor.dev/gvisor/pkg/usermem" ) // Dumpability describes if and how core dumps should be created. @@ -54,14 +54,14 @@ func (mm *MemoryManager) SetDumpability(d Dumpability) { // ArgvStart returns the start of the application argument vector. // // There is no guarantee that this value is sensible w.r.t. ArgvEnd. -func (mm *MemoryManager) ArgvStart() usermem.Addr { +func (mm *MemoryManager) ArgvStart() hostarch.Addr { mm.metadataMu.Lock() defer mm.metadataMu.Unlock() return mm.argv.Start } // SetArgvStart sets the start of the application argument vector. -func (mm *MemoryManager) SetArgvStart(a usermem.Addr) { +func (mm *MemoryManager) SetArgvStart(a hostarch.Addr) { mm.metadataMu.Lock() defer mm.metadataMu.Unlock() mm.argv.Start = a @@ -70,14 +70,14 @@ func (mm *MemoryManager) SetArgvStart(a usermem.Addr) { // ArgvEnd returns the end of the application argument vector. // // There is no guarantee that this value is sensible w.r.t. ArgvStart. -func (mm *MemoryManager) ArgvEnd() usermem.Addr { +func (mm *MemoryManager) ArgvEnd() hostarch.Addr { mm.metadataMu.Lock() defer mm.metadataMu.Unlock() return mm.argv.End } // SetArgvEnd sets the end of the application argument vector. -func (mm *MemoryManager) SetArgvEnd(a usermem.Addr) { +func (mm *MemoryManager) SetArgvEnd(a hostarch.Addr) { mm.metadataMu.Lock() defer mm.metadataMu.Unlock() mm.argv.End = a @@ -86,14 +86,14 @@ func (mm *MemoryManager) SetArgvEnd(a usermem.Addr) { // EnvvStart returns the start of the application environment vector. // // There is no guarantee that this value is sensible w.r.t. EnvvEnd. -func (mm *MemoryManager) EnvvStart() usermem.Addr { +func (mm *MemoryManager) EnvvStart() hostarch.Addr { mm.metadataMu.Lock() defer mm.metadataMu.Unlock() return mm.envv.Start } // SetEnvvStart sets the start of the application environment vector. -func (mm *MemoryManager) SetEnvvStart(a usermem.Addr) { +func (mm *MemoryManager) SetEnvvStart(a hostarch.Addr) { mm.metadataMu.Lock() defer mm.metadataMu.Unlock() mm.envv.Start = a @@ -102,14 +102,14 @@ func (mm *MemoryManager) SetEnvvStart(a usermem.Addr) { // EnvvEnd returns the end of the application environment vector. // // There is no guarantee that this value is sensible w.r.t. EnvvStart. -func (mm *MemoryManager) EnvvEnd() usermem.Addr { +func (mm *MemoryManager) EnvvEnd() hostarch.Addr { mm.metadataMu.Lock() defer mm.metadataMu.Unlock() return mm.envv.End } // SetEnvvEnd sets the end of the application environment vector. -func (mm *MemoryManager) SetEnvvEnd(a usermem.Addr) { +func (mm *MemoryManager) SetEnvvEnd(a hostarch.Addr) { mm.metadataMu.Lock() defer mm.metadataMu.Unlock() mm.envv.End = a diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index 92cc87d84..57969b26c 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -36,6 +36,7 @@ package mm import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsbridge" @@ -43,7 +44,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" ) // MemoryManager implements a virtual address space. @@ -97,7 +97,7 @@ type MemoryManager struct { // binary into the mm. // // brk is protected by mappingMu. - brk usermem.AddrRange + brk hostarch.AddrRange // usageAS is vmas.Span(), cached to accelerate RLIMIT_AS checks. // @@ -198,14 +198,14 @@ type MemoryManager struct { // requirements apply to argv; we do not require that argv.WellFormed(). // // argv is protected by metadataMu. - argv usermem.AddrRange + argv hostarch.AddrRange // envv is the application envv. This is set up by the loader and may be // modified by prctl(PR_SET_MM_ENV_START/PR_SET_MM_ENV_END). No // requirements apply to envv; we do not require that envv.WellFormed(). // // envv is protected by metadataMu. - envv usermem.AddrRange + envv hostarch.AddrRange // auxv is the ELF's auxiliary vector. // @@ -268,20 +268,20 @@ type vma struct { // realPerms are the memory permissions on this vma, as defined by the // application. - realPerms usermem.AccessType `state:".(int)"` + realPerms hostarch.AccessType `state:".(int)"` // effectivePerms are the memory permissions on this vma which are // actually used to control access. // // Invariant: effectivePerms == realPerms.Effective(). - effectivePerms usermem.AccessType `state:"manual"` + effectivePerms hostarch.AccessType `state:"manual"` // maxPerms limits the set of permissions that may ever apply to this // memory, as well as accesses for which usermem.IOOpts.IgnorePermissions // is true (e.g. ptrace(PTRACE_POKEDATA)). // // Invariant: maxPerms == maxPerms.Effective(). - maxPerms usermem.AccessType `state:"manual"` + maxPerms hostarch.AccessType `state:"manual"` // private is true if this is a MAP_PRIVATE mapping, such that writes to // the mapping are propagated to a copy. @@ -421,8 +421,8 @@ type pma struct { off uint64 // translatePerms is the permissions returned by memmap.Mappable.Translate. - // If private is true, translatePerms is usermem.AnyAccess. - translatePerms usermem.AccessType + // If private is true, translatePerms is hostarch.AnyAccess. + translatePerms hostarch.AccessType // effectivePerms is the permissions allowed for non-ignorePermissions // accesses. maxPerms is the permissions allowed for ignorePermissions @@ -432,8 +432,8 @@ type pma struct { // // These are stored in the pma so that the IO implementation can avoid // iterating mm.vmas when pmas already exist. - effectivePerms usermem.AccessType - maxPerms usermem.AccessType + effectivePerms hostarch.AccessType + maxPerms hostarch.AccessType // needCOW is true if writes to the mapping must be propagated to a copy. needCOW bool @@ -465,7 +465,7 @@ type privateRefs struct { } type invalidateArgs struct { - ar usermem.AddrRange + ar hostarch.AddrRange opts memmap.InvalidateOpts } diff --git a/pkg/sentry/mm/mm_test.go b/pkg/sentry/mm/mm_test.go index bc53bd41e..1304b0a2f 100644 --- a/pkg/sentry/mm/mm_test.go +++ b/pkg/sentry/mm/mm_test.go @@ -18,6 +18,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/limits" @@ -51,7 +52,7 @@ func TestUsageASUpdates(t *testing.T) { defer mm.DecUsers(ctx) addr, err := mm.MMap(ctx, memmap.MMapOpts{ - Length: 2 * usermem.PageSize, + Length: 2 * hostarch.PageSize, Private: true, }) if err != nil { @@ -62,7 +63,7 @@ func TestUsageASUpdates(t *testing.T) { t.Fatalf("usageAS believes %v bytes are mapped; %v bytes are actually mapped", mm.usageAS, realUsage) } - mm.MUnmap(ctx, addr, usermem.PageSize) + mm.MUnmap(ctx, addr, hostarch.PageSize) realUsage = mm.realUsageAS() if mm.usageAS != realUsage { t.Fatalf("usageAS believes %v bytes are mapped; %v bytes are actually mapped", mm.usageAS, realUsage) @@ -86,10 +87,10 @@ func TestDataASUpdates(t *testing.T) { defer mm.DecUsers(ctx) addr, err := mm.MMap(ctx, memmap.MMapOpts{ - Length: 3 * usermem.PageSize, + Length: 3 * hostarch.PageSize, Private: true, - Perms: usermem.Write, - MaxPerms: usermem.AnyAccess, + Perms: hostarch.Write, + MaxPerms: hostarch.AnyAccess, }) if err != nil { t.Fatalf("MMap got err %v want nil", err) @@ -102,19 +103,19 @@ func TestDataASUpdates(t *testing.T) { t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS) } - mm.MUnmap(ctx, addr, usermem.PageSize) + mm.MUnmap(ctx, addr, hostarch.PageSize) realDataAS = mm.realDataAS() if mm.dataAS != realDataAS { t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS) } - mm.MProtect(addr+usermem.PageSize, usermem.PageSize, usermem.Read, false) + mm.MProtect(addr+hostarch.PageSize, hostarch.PageSize, hostarch.Read, false) realDataAS = mm.realDataAS() if mm.dataAS != realDataAS { t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS) } - mm.MRemap(ctx, addr+2*usermem.PageSize, usermem.PageSize, 2*usermem.PageSize, MRemapOpts{ + mm.MRemap(ctx, addr+2*hostarch.PageSize, hostarch.PageSize, 2*hostarch.PageSize, MRemapOpts{ Move: MRemapMayMove, }) realDataAS = mm.realDataAS() @@ -133,7 +134,7 @@ func TestBrkDataLimitUpdates(t *testing.T) { // Try to extend the brk by one page and expect doing so to fail. oldBrk, _ := mm.Brk(ctx, 0) - if newBrk, _ := mm.Brk(ctx, oldBrk+usermem.PageSize); newBrk != oldBrk { + if newBrk, _ := mm.Brk(ctx, oldBrk+hostarch.PageSize); newBrk != oldBrk { t.Errorf("brk() increased data segment above RLIMIT_DATA (old brk = %#x, new brk = %#x", oldBrk, newBrk) } } @@ -145,10 +146,10 @@ func TestIOAfterUnmap(t *testing.T) { defer mm.DecUsers(ctx) addr, err := mm.MMap(ctx, memmap.MMapOpts{ - Length: usermem.PageSize, + Length: hostarch.PageSize, Private: true, - Perms: usermem.Read, - MaxPerms: usermem.AnyAccess, + Perms: hostarch.Read, + MaxPerms: hostarch.AnyAccess, }) if err != nil { t.Fatalf("MMap got err %v want nil", err) @@ -164,7 +165,7 @@ func TestIOAfterUnmap(t *testing.T) { t.Errorf("CopyIn got %d want 1", n) } - err = mm.MUnmap(ctx, addr, usermem.PageSize) + err = mm.MUnmap(ctx, addr, hostarch.PageSize) if err != nil { t.Fatalf("MUnmap got err %v want nil", err) } @@ -185,10 +186,10 @@ func TestIOAfterMProtect(t *testing.T) { defer mm.DecUsers(ctx) addr, err := mm.MMap(ctx, memmap.MMapOpts{ - Length: usermem.PageSize, + Length: hostarch.PageSize, Private: true, - Perms: usermem.ReadWrite, - MaxPerms: usermem.AnyAccess, + Perms: hostarch.ReadWrite, + MaxPerms: hostarch.AnyAccess, }) if err != nil { t.Fatalf("MMap got err %v want nil", err) @@ -204,7 +205,7 @@ func TestIOAfterMProtect(t *testing.T) { t.Errorf("CopyOut got %d want 1", n) } - err = mm.MProtect(addr, usermem.PageSize, usermem.Read, false) + err = mm.MProtect(addr, hostarch.PageSize, hostarch.Read, false) if err != nil { t.Errorf("MProtect got err %v want nil", err) } diff --git a/pkg/sentry/mm/pma.go b/pkg/sentry/mm/pma.go index 7e5f7de64..5583f62b2 100644 --- a/pkg/sentry/mm/pma.go +++ b/pkg/sentry/mm/pma.go @@ -18,12 +18,12 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safecopy" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // existingPMAsLocked checks that pmas exist for all addresses in ar, and @@ -34,7 +34,7 @@ import ( // Preconditions: // * mm.activeMu must be locked. // * ar.Length() != 0. -func (mm *MemoryManager) existingPMAsLocked(ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool, needInternalMappings bool) pmaIterator { +func (mm *MemoryManager) existingPMAsLocked(ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool, needInternalMappings bool) pmaIterator { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 { panic(fmt.Sprintf("invalid ar: %v", ar)) @@ -70,7 +70,7 @@ func (mm *MemoryManager) existingPMAsLocked(ar usermem.AddrRange, at usermem.Acc // and support access of type (at, ignorePermissions). // // Preconditions: mm.activeMu must be locked. -func (mm *MemoryManager) existingVecPMAsLocked(ars usermem.AddrRangeSeq, at usermem.AccessType, ignorePermissions bool, needInternalMappings bool) bool { +func (mm *MemoryManager) existingVecPMAsLocked(ars hostarch.AddrRangeSeq, at hostarch.AccessType, ignorePermissions bool, needInternalMappings bool) bool { for ; !ars.IsEmpty(); ars = ars.Tail() { if ar := ars.Head(); ar.Length() != 0 && !mm.existingPMAsLocked(ar, at, ignorePermissions, needInternalMappings).Ok() { return false @@ -98,7 +98,7 @@ func (mm *MemoryManager) existingVecPMAsLocked(ars usermem.AddrRangeSeq, at user // * vseg.Range().Contains(ar.Start). // * vmas must exist for all addresses in ar, and support accesses of type at // (i.e. permission checks must have been performed against vmas). -func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, at usermem.AccessType) (pmaIterator, pmaGapIterator, error) { +func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, at hostarch.AccessType) (pmaIterator, pmaGapIterator, error) { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 { panic(fmt.Sprintf("invalid ar: %v", ar)) @@ -118,7 +118,7 @@ func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar end = ar.End.RoundDown() alignerr = syserror.EFAULT } - ar = usermem.AddrRange{ar.Start.RoundDown(), end} + ar = hostarch.AddrRange{ar.Start.RoundDown(), end} pstart, pend, perr := mm.getPMAsInternalLocked(ctx, vseg, ar, at) if pend.Start() <= ar.Start { @@ -145,7 +145,7 @@ func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar // * mm.activeMu must be locked for writing. // * vmas must exist for all addresses in ars, and support accesses of type at // (i.e. permission checks must have been performed against vmas). -func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars usermem.AddrRangeSeq, at usermem.AccessType) (usermem.AddrRangeSeq, error) { +func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars hostarch.AddrRangeSeq, at hostarch.AccessType) (hostarch.AddrRangeSeq, error) { for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() { ar := arsit.Head() if ar.Length() == 0 { @@ -164,7 +164,7 @@ func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars usermem.AddrR end = ar.End.RoundDown() alignerr = syserror.EFAULT } - ar = usermem.AddrRange{ar.Start.RoundDown(), end} + ar = hostarch.AddrRange{ar.Start.RoundDown(), end} _, pend, perr := mm.getPMAsInternalLocked(ctx, mm.vmas.FindSegment(ar.Start), ar, at) if perr != nil { @@ -191,7 +191,7 @@ func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars usermem.AddrR // // getPMAsInternalLocked is an implementation helper for getPMAsLocked and // getVecPMAsLocked; other clients should call one of those instead. -func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, at usermem.AccessType) (pmaIterator, pmaGapIterator, error) { +func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, at hostarch.AccessType) (pmaIterator, pmaGapIterator, error) { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { panic(fmt.Sprintf("invalid ar: %v", ar)) @@ -245,7 +245,7 @@ func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIter pseg, pgap = mm.pmas.Insert(pgap, allocAR, pma{ file: mf, off: fr.Start, - translatePerms: usermem.AnyAccess, + translatePerms: hostarch.AnyAccess, effectivePerms: vma.effectivePerms, maxPerms: vma.maxPerms, // Since we just allocated this memory and have the @@ -335,7 +335,7 @@ func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIter // Neither of these cases has enough spatial locality to // benefit from copying nearby pages, so if the vma is // executable, only copy the pages required. - var copyAR usermem.AddrRange + var copyAR hostarch.AddrRange if vseg.ValuePtr().effectivePerms.Execute { copyAR = pseg.Range().Intersect(ar) } else { @@ -366,7 +366,7 @@ func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIter // Replace the pma with a copy in the part of the address // range where copying was successful. This doesn't change // RSS. - copyAR.End = copyAR.Start + usermem.Addr(fr.Length()) + copyAR.End = copyAR.Start + hostarch.Addr(fr.Length()) if copyAR != pseg.Range() { pseg = mm.pmas.Isolate(pseg, copyAR) pstart = pmaIterator{} // iterators invalidated @@ -380,7 +380,7 @@ func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIter mf.IncRef(fr) oldpma.file = mf oldpma.off = fr.Start - oldpma.translatePerms = usermem.AnyAccess + oldpma.translatePerms = hostarch.AnyAccess oldpma.effectivePerms = vma.effectivePerms oldpma.maxPerms = vma.maxPerms oldpma.needCOW = false @@ -499,14 +499,14 @@ const ( // privateAllocUnit may reduce page faults by allowing fewer, larger pmas // to be mapped, but may result in larger amounts of wasted memory in the // presence of fragmentation. privateAllocUnit must be a power-of-2 - // multiple of usermem.PageSize. - privateAllocUnit = usermem.HugePageSize + // multiple of hostarch.PageSize. + privateAllocUnit = hostarch.HugePageSize privateAllocMask = privateAllocUnit - 1 ) -func privateAligned(ar usermem.AddrRange) usermem.AddrRange { - aligned := usermem.AddrRange{ar.Start &^ privateAllocMask, ar.End} +func privateAligned(ar hostarch.AddrRange) hostarch.AddrRange { + aligned := hostarch.AddrRange{ar.Start &^ privateAllocMask, ar.End} if end := (ar.End + privateAllocMask) &^ privateAllocMask; end >= ar.End { aligned.End = end } @@ -548,7 +548,7 @@ func (mm *MemoryManager) isPMACopyOnWriteLocked(vseg vmaIterator, pseg pmaIterat rseg := mm.privateRefs.refs.FindSegment(fr.Start) if rseg.Ok() && rseg.Value() == 1 && fr.End <= rseg.End() { pma.needCOW = false - // pma.private => pma.translatePerms == usermem.AnyAccess + // pma.private => pma.translatePerms == hostarch.AnyAccess vma := vseg.ValuePtr() pma.effectivePerms = vma.effectivePerms pma.maxPerms = vma.maxPerms @@ -558,7 +558,7 @@ func (mm *MemoryManager) isPMACopyOnWriteLocked(vseg vmaIterator, pseg pmaIterat } // Invalidate implements memmap.MappingSpace.Invalidate. -func (mm *MemoryManager) Invalidate(ar usermem.AddrRange, opts memmap.InvalidateOpts) { +func (mm *MemoryManager) Invalidate(ar hostarch.AddrRange, opts memmap.InvalidateOpts) { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { panic(fmt.Sprintf("invalid ar: %v", ar)) @@ -581,7 +581,7 @@ func (mm *MemoryManager) Invalidate(ar usermem.AddrRange, opts memmap.Invalidate // * mm.activeMu must be locked for writing. // * ar.Length() != 0. // * ar must be page-aligned. -func (mm *MemoryManager) invalidateLocked(ar usermem.AddrRange, invalidatePrivate, invalidateShared bool) { +func (mm *MemoryManager) invalidateLocked(ar hostarch.AddrRange, invalidatePrivate, invalidateShared bool) { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { panic(fmt.Sprintf("invalid ar: %v", ar)) @@ -627,7 +627,7 @@ func (mm *MemoryManager) invalidateLocked(ar usermem.AddrRange, invalidatePrivat // Preconditions: // * ar.Length() != 0. // * ar must be page-aligned. -func (mm *MemoryManager) Pin(ctx context.Context, ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool) ([]PinnedRange, error) { +func (mm *MemoryManager) Pin(ctx context.Context, ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool) ([]PinnedRange, error) { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { panic(fmt.Sprintf("invalid ar: %v", ar)) @@ -683,7 +683,7 @@ func (mm *MemoryManager) Pin(ctx context.Context, ar usermem.AddrRange, at userm // PinnedRanges are returned by MemoryManager.Pin. type PinnedRange struct { // Source is the corresponding range of addresses. - Source usermem.AddrRange + Source hostarch.AddrRange // File is the mapped file. File memmap.File @@ -713,7 +713,7 @@ func Unpin(prs []PinnedRange) { // * !oldAR.Overlaps(newAR). // * mm.pmas.IsEmptyRange(newAR). // * oldAR and newAR must be page-aligned. -func (mm *MemoryManager) movePMAsLocked(oldAR, newAR usermem.AddrRange) { +func (mm *MemoryManager) movePMAsLocked(oldAR, newAR hostarch.AddrRange) { if checkInvariants { if !oldAR.WellFormed() || oldAR.Length() == 0 || !oldAR.IsPageAligned() { panic(fmt.Sprintf("invalid oldAR: %v", oldAR)) @@ -731,7 +731,7 @@ func (mm *MemoryManager) movePMAsLocked(oldAR, newAR usermem.AddrRange) { } type movedPMA struct { - oldAR usermem.AddrRange + oldAR hostarch.AddrRange pma pma } var movedPMAs []movedPMA @@ -751,7 +751,7 @@ func (mm *MemoryManager) movePMAsLocked(oldAR, newAR usermem.AddrRange) { pgap := mm.pmas.FindGap(newAR.Start) for i := range movedPMAs { mpma := &movedPMAs[i] - pmaNewAR := usermem.AddrRange{mpma.oldAR.Start + off, mpma.oldAR.End + off} + pmaNewAR := hostarch.AddrRange{mpma.oldAR.Start + off, mpma.oldAR.End + off} pgap = mm.pmas.Insert(pgap, pmaNewAR, mpma.pma).NextGap() } @@ -776,7 +776,7 @@ func (mm *MemoryManager) movePMAsLocked(oldAR, newAR usermem.AddrRange) { // // Postconditions: getPMAInternalMappingsLocked does not invalidate iterators // into mm.pmas. -func (mm *MemoryManager) getPMAInternalMappingsLocked(pseg pmaIterator, ar usermem.AddrRange) (pmaGapIterator, error) { +func (mm *MemoryManager) getPMAInternalMappingsLocked(pseg pmaIterator, ar hostarch.AddrRange) (pmaGapIterator, error) { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 { panic(fmt.Sprintf("invalid ar: %v", ar)) @@ -808,7 +808,7 @@ func (mm *MemoryManager) getPMAInternalMappingsLocked(pseg pmaIterator, ar userm // // Postconditions: getVecPMAInternalMappingsLocked does not invalidate iterators // into mm.pmas. -func (mm *MemoryManager) getVecPMAInternalMappingsLocked(ars usermem.AddrRangeSeq) (usermem.AddrRangeSeq, error) { +func (mm *MemoryManager) getVecPMAInternalMappingsLocked(ars hostarch.AddrRangeSeq) (hostarch.AddrRangeSeq, error) { for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() { ar := arsit.Head() if ar.Length() == 0 { @@ -829,7 +829,7 @@ func (mm *MemoryManager) getVecPMAInternalMappingsLocked(ars usermem.AddrRangeSe // in ar. // * ar.Length() != 0. // * pseg.Range().Contains(ar.Start). -func (mm *MemoryManager) internalMappingsLocked(pseg pmaIterator, ar usermem.AddrRange) safemem.BlockSeq { +func (mm *MemoryManager) internalMappingsLocked(pseg pmaIterator, ar hostarch.AddrRange) safemem.BlockSeq { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 { panic(fmt.Sprintf("invalid ar: %v", ar)) @@ -866,7 +866,7 @@ func (mm *MemoryManager) internalMappingsLocked(pseg pmaIterator, ar usermem.Add // * mm.activeMu must be locked. // * Internal mappings must have been previously established for all addresses // in ars. -func (mm *MemoryManager) vecInternalMappingsLocked(ars usermem.AddrRangeSeq) safemem.BlockSeq { +func (mm *MemoryManager) vecInternalMappingsLocked(ars hostarch.AddrRangeSeq) safemem.BlockSeq { var ims []safemem.Block for ; !ars.IsEmpty(); ars = ars.Tail() { ar := ars.Head() @@ -931,7 +931,7 @@ func (mm *MemoryManager) decPrivateRef(fr memmap.FileRange) { // MemoryManager to reflect the insertion of a pma at ar. // // Preconditions: mm.activeMu must be locked for writing. -func (mm *MemoryManager) addRSSLocked(ar usermem.AddrRange) { +func (mm *MemoryManager) addRSSLocked(ar hostarch.AddrRange) { mm.curRSS += uint64(ar.Length()) if mm.curRSS > mm.maxRSS { mm.maxRSS = mm.curRSS @@ -942,19 +942,19 @@ func (mm *MemoryManager) addRSSLocked(ar usermem.AddrRange) { // reflect the removal of a pma at ar. // // Preconditions: mm.activeMu must be locked for writing. -func (mm *MemoryManager) removeRSSLocked(ar usermem.AddrRange) { +func (mm *MemoryManager) removeRSSLocked(ar hostarch.AddrRange) { mm.curRSS -= uint64(ar.Length()) } // pmaSetFunctions implements segment.Functions for pmaSet. type pmaSetFunctions struct{} -func (pmaSetFunctions) MinKey() usermem.Addr { +func (pmaSetFunctions) MinKey() hostarch.Addr { return 0 } -func (pmaSetFunctions) MaxKey() usermem.Addr { - return ^usermem.Addr(0) +func (pmaSetFunctions) MaxKey() hostarch.Addr { + return ^hostarch.Addr(0) } func (pmaSetFunctions) ClearValue(pma *pma) { @@ -962,7 +962,7 @@ func (pmaSetFunctions) ClearValue(pma *pma) { pma.internalMappings = safemem.BlockSeq{} } -func (pmaSetFunctions) Merge(ar1 usermem.AddrRange, pma1 pma, ar2 usermem.AddrRange, pma2 pma) (pma, bool) { +func (pmaSetFunctions) Merge(ar1 hostarch.AddrRange, pma1 pma, ar2 hostarch.AddrRange, pma2 pma) (pma, bool) { if pma1.file != pma2.file || pma1.off+uint64(ar1.Length()) != pma2.off || pma1.translatePerms != pma2.translatePerms || @@ -980,7 +980,7 @@ func (pmaSetFunctions) Merge(ar1 usermem.AddrRange, pma1 pma, ar2 usermem.AddrRa return pma1, true } -func (pmaSetFunctions) Split(ar usermem.AddrRange, p pma, split usermem.Addr) (pma, pma) { +func (pmaSetFunctions) Split(ar hostarch.AddrRange, p pma, split hostarch.Addr) (pma, pma) { newlen1 := uint64(split - ar.Start) p2 := p p2.off += newlen1 @@ -997,7 +997,7 @@ func (pmaSetFunctions) Split(ar usermem.AddrRange, p pma, split usermem.Addr) (p // Preconditions: // * mm.activeMu must be locked. // * addr <= pgap.Start(). -func (mm *MemoryManager) findOrSeekPrevUpperBoundPMA(addr usermem.Addr, pgap pmaGapIterator) pmaIterator { +func (mm *MemoryManager) findOrSeekPrevUpperBoundPMA(addr hostarch.Addr, pgap pmaGapIterator) pmaIterator { if checkInvariants { if !pgap.Ok() { panic("terminal pma iterator") @@ -1045,7 +1045,7 @@ func (pseg pmaIterator) fileRange() memmap.FileRange { // Preconditions: // * pseg.Range().IsSupersetOf(ar). // * ar.Length != 0. -func (pseg pmaIterator) fileRangeOf(ar usermem.AddrRange) memmap.FileRange { +func (pseg pmaIterator) fileRangeOf(ar hostarch.AddrRange) memmap.FileRange { if checkInvariants { if !pseg.Ok() { panic("terminal pma iterator") diff --git a/pkg/sentry/mm/procfs.go b/pkg/sentry/mm/procfs.go index 73bfbea49..f1440e884 100644 --- a/pkg/sentry/mm/procfs.go +++ b/pkg/sentry/mm/procfs.go @@ -19,9 +19,9 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/usermem" ) const ( @@ -29,7 +29,7 @@ const ( // include/linux/kdev_t.h:MINORBITS devMinorBits = 20 - vsyscallEnd = usermem.Addr(0xffffffffff601000) + vsyscallEnd = hostarch.Addr(0xffffffffff601000) vsyscallMapsEntry = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" vsyscallSmapsEntry = vsyscallMapsEntry + "Size: 4 kB\n" + @@ -62,7 +62,7 @@ func (mm *MemoryManager) NeedsUpdate(generation int64) bool { func (mm *MemoryManager) ReadMapsDataInto(ctx context.Context, buf *bytes.Buffer) { mm.mappingMu.RLock() defer mm.mappingMu.RUnlock() - var start usermem.Addr + var start hostarch.Addr for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() { mm.appendVMAMapsEntryLocked(ctx, vseg, buf) @@ -88,9 +88,9 @@ func (mm *MemoryManager) ReadMapsSeqFileData(ctx context.Context, handle seqfile mm.mappingMu.RLock() defer mm.mappingMu.RUnlock() var data []seqfile.SeqData - var start usermem.Addr + var start hostarch.Addr if handle != nil { - start = *handle.(*usermem.Addr) + start = *handle.(*hostarch.Addr) } for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() { vmaAddr := vseg.End() @@ -177,7 +177,7 @@ func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaI func (mm *MemoryManager) ReadSmapsDataInto(ctx context.Context, buf *bytes.Buffer) { mm.mappingMu.RLock() defer mm.mappingMu.RUnlock() - var start usermem.Addr + var start hostarch.Addr for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() { mm.vmaSmapsEntryIntoLocked(ctx, vseg, buf) @@ -196,9 +196,9 @@ func (mm *MemoryManager) ReadSmapsSeqFileData(ctx context.Context, handle seqfil mm.mappingMu.RLock() defer mm.mappingMu.RUnlock() var data []seqfile.SeqData - var start usermem.Addr + var start hostarch.Addr if handle != nil { - start = *handle.(*usermem.Addr) + start = *handle.(*hostarch.Addr) } for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() { vmaAddr := vseg.End() @@ -279,8 +279,8 @@ func (mm *MemoryManager) vmaSmapsEntryIntoLocked(ctx context.Context, vseg vmaIt // Swap is not implemented. fmt.Fprintf(b, "Swap: %8d kB\n", 0) fmt.Fprintf(b, "SwapPss: %8d kB\n", 0) - fmt.Fprintf(b, "KernelPageSize: %8d kB\n", usermem.PageSize/1024) - fmt.Fprintf(b, "MMUPageSize: %8d kB\n", usermem.PageSize/1024) + fmt.Fprintf(b, "KernelPageSize: %8d kB\n", hostarch.PageSize/1024) + fmt.Fprintf(b, "MMUPageSize: %8d kB\n", hostarch.PageSize/1024) locked := rss if vma.mlockMode == memmap.MLockNone { locked = 0 diff --git a/pkg/sentry/mm/shm.go b/pkg/sentry/mm/shm.go index 6432731d4..3130be80c 100644 --- a/pkg/sentry/mm/shm.go +++ b/pkg/sentry/mm/shm.go @@ -16,13 +16,13 @@ package mm import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/shm" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // DetachShm unmaps a sysv shared memory segment. -func (mm *MemoryManager) DetachShm(ctx context.Context, addr usermem.Addr) error { +func (mm *MemoryManager) DetachShm(ctx context.Context, addr hostarch.Addr) error { if addr != addr.RoundDown() { // "... shmaddr is not aligned on a page boundary." - man shmdt(2) return syserror.EINVAL @@ -52,7 +52,7 @@ func (mm *MemoryManager) DetachShm(ctx context.Context, addr usermem.Addr) error } // Remove all vmas that could have been created by the same attach. - end := addr + usermem.Addr(detached.EffectiveSize()) + end := addr + hostarch.Addr(detached.EffectiveSize()) for vseg.Ok() && vseg.End() <= end { vma := vseg.ValuePtr() if vma.mappable == detached && uint64(vseg.Start()-addr) == vma.off { diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go index 48d8b6a2b..e748b7ff8 100644 --- a/pkg/sentry/mm/special_mappable.go +++ b/pkg/sentry/mm/special_mappable.go @@ -16,11 +16,11 @@ package mm import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // SpecialMappable implements memmap.MappingIdentity and memmap.Mappable with @@ -77,21 +77,21 @@ func (m *SpecialMappable) Msync(ctx context.Context, mr memmap.MappableRange) er } // AddMapping implements memmap.Mappable.AddMapping. -func (*SpecialMappable) AddMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, uint64, bool) error { +func (*SpecialMappable) AddMapping(context.Context, memmap.MappingSpace, hostarch.AddrRange, uint64, bool) error { return nil } // RemoveMapping implements memmap.Mappable.RemoveMapping. -func (*SpecialMappable) RemoveMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, uint64, bool) { +func (*SpecialMappable) RemoveMapping(context.Context, memmap.MappingSpace, hostarch.AddrRange, uint64, bool) { } // CopyMapping implements memmap.Mappable.CopyMapping. -func (*SpecialMappable) CopyMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, usermem.AddrRange, uint64, bool) error { +func (*SpecialMappable) CopyMapping(context.Context, memmap.MappingSpace, hostarch.AddrRange, hostarch.AddrRange, uint64, bool) error { return nil } // Translate implements memmap.Mappable.Translate. -func (m *SpecialMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { +func (m *SpecialMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { var err error if required.End > m.fr.Length() { err = &memmap.BusError{syserror.EFAULT} @@ -102,7 +102,7 @@ func (m *SpecialMappable) Translate(ctx context.Context, required, optional memm Source: source, File: m.mfp.MemoryFile(), Offset: m.fr.Start + source.Start, - Perms: usermem.AnyAccess, + Perms: hostarch.AnyAccess, }, }, err } @@ -146,7 +146,7 @@ func NewSharedAnonMappable(length uint64, mfp pgalloc.MemoryFileProvider) (*Spec if length == 0 { return nil, syserror.EINVAL } - alignedLen, ok := usermem.Addr(length).RoundUp() + alignedLen, ok := hostarch.Addr(length).RoundUp() if !ok { return nil, syserror.EINVAL } diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go index 69e37330b..7ad6b7c21 100644 --- a/pkg/sentry/mm/syscalls.go +++ b/pkg/sentry/mm/syscalls.go @@ -21,20 +21,20 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // HandleUserFault handles an application page fault. sp is the faulting // application thread's stack pointer. // // Preconditions: mm.as != nil. -func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr usermem.Addr, at usermem.AccessType, sp usermem.Addr) error { - ar, ok := addr.RoundDown().ToRange(usermem.PageSize) +func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr hostarch.Addr, at hostarch.AccessType, sp hostarch.Addr) error { + ar, ok := addr.RoundDown().ToRange(hostarch.PageSize) if !ok { return syserror.EFAULT } @@ -72,11 +72,11 @@ func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr usermem.Addr, } // MMap establishes a memory mapping. -func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (usermem.Addr, error) { +func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (hostarch.Addr, error) { if opts.Length == 0 { return 0, syserror.EINVAL } - length, ok := usermem.Addr(opts.Length).RoundUp() + length, ok := hostarch.Addr(opts.Length).RoundUp() if !ok { return 0, syserror.ENOMEM } @@ -84,7 +84,7 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (userme if opts.Mappable != nil { // Offset must be aligned. - if usermem.Addr(opts.Offset).RoundDown() != usermem.Addr(opts.Offset) { + if hostarch.Addr(opts.Offset).RoundDown() != hostarch.Addr(opts.Offset) { return 0, syserror.EINVAL } // Offset + length must not overflow. @@ -157,7 +157,7 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (userme // Preconditions: // * mm.mappingMu must be locked. // * vseg.Range().IsSupersetOf(ar). -func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, precommit bool) { +func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, precommit bool) { if !vseg.ValuePtr().effectivePerms.Any() { // Linux doesn't populate inaccessible pages. See // mm/gup.c:populate_vma_page_range. @@ -175,7 +175,7 @@ func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar u } // Ensure that we have usable pmas. - pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, usermem.NoAccess) + pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, hostarch.NoAccess) if err != nil { // mm/util.c:vm_mmap_pgoff() ignores the error, if any, from // mm/gup.c:mm_populate(). If it matters, we'll get it again when @@ -203,7 +203,7 @@ func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar u // * vseg.Range().IsSupersetOf(ar). // // Postconditions: mm.mappingMu will be unlocked. -func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, precommit bool) { +func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, precommit bool) { // See populateVMA above for commentary. if !vseg.ValuePtr().effectivePerms.Any() { mm.mappingMu.Unlock() @@ -221,7 +221,7 @@ func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaItera // mm.mappingMu doesn't need to be write-locked for getPMAsLocked, and it // isn't needed at all for mapASLocked. mm.mappingMu.DowngradeLock() - pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, usermem.NoAccess) + pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, hostarch.NoAccess) mm.mappingMu.RUnlock() if err != nil { mm.activeMu.Unlock() @@ -234,7 +234,7 @@ func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaItera } // MapStack allocates the initial process stack. -func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error) { +func (mm *MemoryManager) MapStack(ctx context.Context) (hostarch.AddrRange, error) { // maxStackSize is the maximum supported process stack size in bytes. // // This limit exists because stack growing isn't implemented, so the entire @@ -242,7 +242,7 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error const maxStackSize = 128 << 20 stackSize := limits.FromContext(ctx).Get(limits.Stack) - r, ok := usermem.Addr(stackSize.Cur).RoundUp() + r, ok := hostarch.Addr(stackSize.Cur).RoundUp() sz := uint64(r) if !ok { // RLIM_INFINITY rounds up to 0. @@ -251,16 +251,16 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error ctx.Warningf("Capping stack size from RLIMIT_STACK of %v down to %v.", sz, maxStackSize) sz = maxStackSize } else if sz == 0 { - return usermem.AddrRange{}, syserror.ENOMEM + return hostarch.AddrRange{}, syserror.ENOMEM } - szaddr := usermem.Addr(sz) + szaddr := hostarch.Addr(sz) ctx.Debugf("Allocating stack with size of %v bytes", sz) // Determine the stack's desired location. Unlike Linux, address // randomization can't be disabled. - stackEnd := mm.layout.MaxAddr - usermem.Addr(mrand.Int63n(int64(mm.layout.MaxStackRand))).RoundDown() + stackEnd := mm.layout.MaxAddr - hostarch.Addr(mrand.Int63n(int64(mm.layout.MaxStackRand))).RoundDown() if stackEnd < szaddr { - return usermem.AddrRange{}, syserror.ENOMEM + return hostarch.AddrRange{}, syserror.ENOMEM } stackStart := stackEnd - szaddr mm.mappingMu.Lock() @@ -268,8 +268,8 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error _, ar, err := mm.createVMALocked(ctx, memmap.MMapOpts{ Length: sz, Addr: stackStart, - Perms: usermem.ReadWrite, - MaxPerms: usermem.AnyAccess, + Perms: hostarch.ReadWrite, + MaxPerms: hostarch.AnyAccess, Private: true, GrowsDown: true, MLockMode: mm.defMLockMode, @@ -279,14 +279,14 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error } // MUnmap implements the semantics of Linux's munmap(2). -func (mm *MemoryManager) MUnmap(ctx context.Context, addr usermem.Addr, length uint64) error { +func (mm *MemoryManager) MUnmap(ctx context.Context, addr hostarch.Addr, length uint64) error { if addr != addr.RoundDown() { return syserror.EINVAL } if length == 0 { return syserror.EINVAL } - la, ok := usermem.Addr(length).RoundUp() + la, ok := hostarch.Addr(length).RoundUp() if !ok { return syserror.EINVAL } @@ -308,7 +308,7 @@ type MRemapOpts struct { // NewAddr is the new address for the remapping. NewAddr is ignored unless // Move is MMRemapMustMove. - NewAddr usermem.Addr + NewAddr hostarch.Addr } // MRemapMoveMode controls MRemap's moving behavior. @@ -328,7 +328,7 @@ const ( ) // MRemap implements the semantics of Linux's mremap(2). -func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSize uint64, newSize uint64, opts MRemapOpts) (usermem.Addr, error) { +func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldSize uint64, newSize uint64, opts MRemapOpts) (hostarch.Addr, error) { // "Note that old_address has to be page aligned." - mremap(2) if oldAddr.RoundDown() != oldAddr { return 0, syserror.EINVAL @@ -336,9 +336,9 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi // Linux treats an old_size that rounds up to 0 as 0, which is otherwise a // valid size. However, new_size can't be 0 after rounding. - oldSizeAddr, _ := usermem.Addr(oldSize).RoundUp() + oldSizeAddr, _ := hostarch.Addr(oldSize).RoundUp() oldSize = uint64(oldSizeAddr) - newSizeAddr, ok := usermem.Addr(newSize).RoundUp() + newSizeAddr, ok := hostarch.Addr(newSize).RoundUp() if !ok || newSizeAddr == 0 { return 0, syserror.EINVAL } @@ -392,8 +392,8 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi if newSize < oldSize { // If oldAddr+oldSize didn't overflow, oldAddr+newSize can't // either. - newEnd := oldAddr + usermem.Addr(newSize) - mm.unmapLocked(ctx, usermem.AddrRange{newEnd, oldEnd}) + newEnd := oldAddr + hostarch.Addr(newSize) + mm.unmapLocked(ctx, hostarch.AddrRange{newEnd, oldEnd}) } return oldAddr, nil } @@ -438,7 +438,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi } // Find a location for the new mapping. - var newAR usermem.AddrRange + var newAR hostarch.AddrRange switch opts.Move { case MRemapMayMove: newAddr, err := mm.findAvailableLocked(newSize, findAvailableOpts{}) @@ -457,7 +457,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi if !ok { return 0, syserror.EINVAL } - if (usermem.AddrRange{oldAddr, oldEnd}).Overlaps(newAR) { + if (hostarch.AddrRange{oldAddr, oldEnd}).Overlaps(newAR) { return 0, syserror.EINVAL } @@ -479,8 +479,8 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi // correct: compare Linux's mm/mremap.c:mremap_to() => do_munmap(), // vma_to_resize(). if newSize < oldSize { - oldNewEnd := oldAddr + usermem.Addr(newSize) - mm.unmapLocked(ctx, usermem.AddrRange{oldNewEnd, oldEnd}) + oldNewEnd := oldAddr + hostarch.Addr(newSize) + mm.unmapLocked(ctx, hostarch.AddrRange{oldNewEnd, oldEnd}) oldEnd = oldNewEnd } @@ -488,7 +488,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi vseg = mm.vmas.FindSegment(oldAddr) } - oldAR := usermem.AddrRange{oldAddr, oldEnd} + oldAR := hostarch.AddrRange{oldAddr, oldEnd} // Check that oldEnd maps to the same vma as oldAddr. if vseg.End() < oldEnd { @@ -588,14 +588,14 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi } // MProtect implements the semantics of Linux's mprotect(2). -func (mm *MemoryManager) MProtect(addr usermem.Addr, length uint64, realPerms usermem.AccessType, growsDown bool) error { +func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms hostarch.AccessType, growsDown bool) error { if addr.RoundDown() != addr { return syserror.EINVAL } if length == 0 { return nil } - rlength, ok := usermem.Addr(length).RoundUp() + rlength, ok := hostarch.Addr(length).RoundUp() if !ok { return syserror.ENOMEM } @@ -692,19 +692,19 @@ func (mm *MemoryManager) MProtect(addr usermem.Addr, length uint64, realPerms us } // BrkSetup sets mm's brk address to addr and its brk size to 0. -func (mm *MemoryManager) BrkSetup(ctx context.Context, addr usermem.Addr) { +func (mm *MemoryManager) BrkSetup(ctx context.Context, addr hostarch.Addr) { mm.mappingMu.Lock() defer mm.mappingMu.Unlock() // Unmap the existing brk. if mm.brk.Length() != 0 { mm.unmapLocked(ctx, mm.brk) } - mm.brk = usermem.AddrRange{addr, addr} + mm.brk = hostarch.AddrRange{addr, addr} } // Brk implements the semantics of Linux's brk(2), except that it returns an // error on failure. -func (mm *MemoryManager) Brk(ctx context.Context, addr usermem.Addr) (usermem.Addr, error) { +func (mm *MemoryManager) Brk(ctx context.Context, addr hostarch.Addr) (hostarch.Addr, error) { mm.mappingMu.Lock() // Can't defer mm.mappingMu.Unlock(); see below. @@ -741,8 +741,8 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr usermem.Addr) (usermem.Ad Fixed: true, // Compare Linux's // arch/x86/include/asm/page_types.h:VM_DATA_DEFAULT_FLAGS. - Perms: usermem.ReadWrite, - MaxPerms: usermem.AnyAccess, + Perms: hostarch.ReadWrite, + MaxPerms: hostarch.AnyAccess, Private: true, // Linux: mm/mmap.c:sys_brk() => do_brk_flags() includes // mm->def_flags. @@ -762,7 +762,7 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr usermem.Addr) (usermem.Ad } case newbrkpg < oldbrkpg: - mm.unmapLocked(ctx, usermem.AddrRange{newbrkpg, oldbrkpg}) + mm.unmapLocked(ctx, hostarch.AddrRange{newbrkpg, oldbrkpg}) fallthrough default: @@ -775,9 +775,9 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr usermem.Addr) (usermem.Ad // MLock implements the semantics of Linux's mlock()/mlock2()/munlock(), // depending on mode. -func (mm *MemoryManager) MLock(ctx context.Context, addr usermem.Addr, length uint64, mode memmap.MLockMode) error { +func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length uint64, mode memmap.MLockMode) error { // Linux allows this to overflow. - la, _ := usermem.Addr(length + addr.PageOffset()).RoundUp() + la, _ := hostarch.Addr(length + addr.PageOffset()).RoundUp() ar, ok := addr.RoundDown().ToRange(uint64(la)) if !ok { return syserror.EINVAL @@ -850,7 +850,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr usermem.Addr, length ui mm.mappingMu.RUnlock() return syserror.ENOMEM } - _, _, err := mm.getPMAsLocked(ctx, vseg, vseg.Range().Intersect(ar), usermem.NoAccess) + _, _, err := mm.getPMAsLocked(ctx, vseg, vseg.Range().Intersect(ar), hostarch.NoAccess) if err != nil { mm.activeMu.Unlock() mm.mappingMu.RUnlock() @@ -945,7 +945,7 @@ func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error mm.mappingMu.DowngradeLock() for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() { if vseg.ValuePtr().effectivePerms.Any() { - mm.getPMAsLocked(ctx, vseg, vseg.Range(), usermem.NoAccess) + mm.getPMAsLocked(ctx, vseg, vseg.Range(), hostarch.NoAccess) } } @@ -965,7 +965,7 @@ func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error } // NumaPolicy implements the semantics of Linux's get_mempolicy(MPOL_F_ADDR). -func (mm *MemoryManager) NumaPolicy(addr usermem.Addr) (linux.NumaPolicy, uint64, error) { +func (mm *MemoryManager) NumaPolicy(addr hostarch.Addr) (linux.NumaPolicy, uint64, error) { mm.mappingMu.RLock() defer mm.mappingMu.RUnlock() vseg := mm.vmas.FindSegment(addr) @@ -977,12 +977,12 @@ func (mm *MemoryManager) NumaPolicy(addr usermem.Addr) (linux.NumaPolicy, uint64 } // SetNumaPolicy implements the semantics of Linux's mbind(). -func (mm *MemoryManager) SetNumaPolicy(addr usermem.Addr, length uint64, policy linux.NumaPolicy, nodemask uint64) error { +func (mm *MemoryManager) SetNumaPolicy(addr hostarch.Addr, length uint64, policy linux.NumaPolicy, nodemask uint64) error { if !addr.IsPageAligned() { return syserror.EINVAL } // Linux allows this to overflow. - la, _ := usermem.Addr(length).RoundUp() + la, _ := hostarch.Addr(length).RoundUp() ar, ok := addr.ToRange(uint64(la)) if !ok { return syserror.EINVAL @@ -1018,7 +1018,7 @@ func (mm *MemoryManager) SetNumaPolicy(addr usermem.Addr, length uint64, policy } // SetDontFork implements the semantics of madvise MADV_DONTFORK. -func (mm *MemoryManager) SetDontFork(addr usermem.Addr, length uint64, dontfork bool) error { +func (mm *MemoryManager) SetDontFork(addr hostarch.Addr, length uint64, dontfork bool) error { ar, ok := addr.ToRange(length) if !ok { return syserror.EINVAL @@ -1044,7 +1044,7 @@ func (mm *MemoryManager) SetDontFork(addr usermem.Addr, length uint64, dontfork } // Decommit implements the semantics of Linux's madvise(MADV_DONTNEED). -func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error { +func (mm *MemoryManager) Decommit(addr hostarch.Addr, length uint64) error { ar, ok := addr.ToRange(length) if !ok { return syserror.EINVAL @@ -1112,14 +1112,14 @@ type MSyncOpts struct { } // MSync implements the semantics of Linux's msync(). -func (mm *MemoryManager) MSync(ctx context.Context, addr usermem.Addr, length uint64, opts MSyncOpts) error { +func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length uint64, opts MSyncOpts) error { if addr != addr.RoundDown() { return syserror.EINVAL } if length == 0 { return nil } - la, ok := usermem.Addr(length).RoundUp() + la, ok := hostarch.Addr(length).RoundUp() if !ok { return syserror.ENOMEM } @@ -1188,7 +1188,7 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr usermem.Addr, length ui } // GetSharedFutexKey is used by kernel.Task.GetSharedKey. -func (mm *MemoryManager) GetSharedFutexKey(ctx context.Context, addr usermem.Addr) (futex.Key, error) { +func (mm *MemoryManager) GetSharedFutexKey(ctx context.Context, addr hostarch.Addr) (futex.Key, error) { ar, ok := addr.ToRange(4) // sizeof(int32). if !ok { return futex.Key{}, syserror.EFAULT @@ -1196,7 +1196,7 @@ func (mm *MemoryManager) GetSharedFutexKey(ctx context.Context, addr usermem.Add mm.mappingMu.RLock() defer mm.mappingMu.RUnlock() - vseg, _, err := mm.getVMAsLocked(ctx, ar, usermem.Read, false) + vseg, _, err := mm.getVMAsLocked(ctx, ar, hostarch.Read, false) if err != nil { return futex.Key{}, err } @@ -1230,7 +1230,7 @@ func (mm *MemoryManager) VirtualMemorySize() uint64 { // VirtualMemorySizeRange returns the combined length in bytes of all mappings // in ar in mm. -func (mm *MemoryManager) VirtualMemorySizeRange(ar usermem.AddrRange) uint64 { +func (mm *MemoryManager) VirtualMemorySizeRange(ar hostarch.AddrRange) uint64 { mm.mappingMu.RLock() defer mm.mappingMu.RUnlock() return uint64(mm.vmas.SpanRange(ar)) diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go index b8df72813..0d019e41d 100644 --- a/pkg/sentry/mm/vma.go +++ b/pkg/sentry/mm/vma.go @@ -19,18 +19,18 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // Preconditions: // * mm.mappingMu must be locked for writing. // * opts must be valid as defined by the checks in MMap. -func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOpts) (vmaIterator, usermem.AddrRange, error) { +func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOpts) (vmaIterator, hostarch.AddrRange, error) { if opts.MaxPerms != opts.MaxPerms.Effective() { panic(fmt.Sprintf("Non-effective MaxPerms %s cannot be enforced", opts.MaxPerms)) } @@ -47,7 +47,7 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp if opts.Force && opts.Unmap && opts.Fixed { addr = opts.Addr } else { - return vmaIterator{}, usermem.AddrRange{}, err + return vmaIterator{}, hostarch.AddrRange{}, err } } ar, _ := addr.ToRange(opts.Length) @@ -58,7 +58,7 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp newUsageAS -= uint64(mm.vmas.SpanRange(ar)) } if limitAS := limits.FromContext(ctx).Get(limits.AS).Cur; newUsageAS > limitAS { - return vmaIterator{}, usermem.AddrRange{}, syserror.ENOMEM + return vmaIterator{}, hostarch.AddrRange{}, syserror.ENOMEM } if opts.MLockMode != memmap.MLockNone { @@ -66,14 +66,14 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) { mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur if mlockLimit == 0 { - return vmaIterator{}, usermem.AddrRange{}, syserror.EPERM + return vmaIterator{}, hostarch.AddrRange{}, syserror.EPERM } newLockedAS := mm.lockedAS + opts.Length if opts.Unmap { newLockedAS -= mm.mlockedBytesRangeLocked(ar) } if newLockedAS > mlockLimit { - return vmaIterator{}, usermem.AddrRange{}, syserror.EAGAIN + return vmaIterator{}, hostarch.AddrRange{}, syserror.EAGAIN } } } @@ -93,7 +93,7 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp // The expression for writable is vma.canWriteMappableLocked(), but we // don't yet have a vma. if err := opts.Mappable.AddMapping(ctx, mm, ar, opts.Offset, !opts.Private && opts.MaxPerms.Write); err != nil { - return vmaIterator{}, usermem.AddrRange{}, err + return vmaIterator{}, hostarch.AddrRange{}, err } } @@ -137,7 +137,7 @@ type findAvailableOpts struct { // // - Unmap allows existing guard pages in the returned range. - Addr usermem.Addr + Addr hostarch.Addr Fixed bool Unmap bool Map32Bit bool @@ -153,13 +153,13 @@ const ( // findAvailableLocked finds an allocatable range. // // Preconditions: mm.mappingMu must be locked. -func (mm *MemoryManager) findAvailableLocked(length uint64, opts findAvailableOpts) (usermem.Addr, error) { +func (mm *MemoryManager) findAvailableLocked(length uint64, opts findAvailableOpts) (hostarch.Addr, error) { if opts.Fixed { opts.Map32Bit = false } allowedAR := mm.applicationAddrRange() if opts.Map32Bit { - allowedAR = allowedAR.Intersect(usermem.AddrRange{map32Start, map32End}) + allowedAR = allowedAR.Intersect(hostarch.AddrRange{map32Start, map32End}) } // Does the provided suggestion work? @@ -181,33 +181,33 @@ func (mm *MemoryManager) findAvailableLocked(length uint64, opts findAvailableOp } // Prefer hugepage alignment if a hugepage or more is requested. - alignment := uint64(usermem.PageSize) - if length >= usermem.HugePageSize { - alignment = usermem.HugePageSize + alignment := uint64(hostarch.PageSize) + if length >= hostarch.HugePageSize { + alignment = hostarch.HugePageSize } if opts.Map32Bit { return mm.findLowestAvailableLocked(length, alignment, allowedAR) } if mm.layout.DefaultDirection == arch.MmapBottomUp { - return mm.findLowestAvailableLocked(length, alignment, usermem.AddrRange{mm.layout.BottomUpBase, mm.layout.MaxAddr}) + return mm.findLowestAvailableLocked(length, alignment, hostarch.AddrRange{mm.layout.BottomUpBase, mm.layout.MaxAddr}) } - return mm.findHighestAvailableLocked(length, alignment, usermem.AddrRange{mm.layout.MinAddr, mm.layout.TopDownBase}) + return mm.findHighestAvailableLocked(length, alignment, hostarch.AddrRange{mm.layout.MinAddr, mm.layout.TopDownBase}) } -func (mm *MemoryManager) applicationAddrRange() usermem.AddrRange { - return usermem.AddrRange{mm.layout.MinAddr, mm.layout.MaxAddr} +func (mm *MemoryManager) applicationAddrRange() hostarch.AddrRange { + return hostarch.AddrRange{mm.layout.MinAddr, mm.layout.MaxAddr} } // Preconditions: mm.mappingMu must be locked. -func (mm *MemoryManager) findLowestAvailableLocked(length, alignment uint64, bounds usermem.AddrRange) (usermem.Addr, error) { - for gap := mm.vmas.LowerBoundGap(bounds.Start); gap.Ok() && gap.Start() < bounds.End; gap = gap.NextLargeEnoughGap(usermem.Addr(length)) { +func (mm *MemoryManager) findLowestAvailableLocked(length, alignment uint64, bounds hostarch.AddrRange) (hostarch.Addr, error) { + for gap := mm.vmas.LowerBoundGap(bounds.Start); gap.Ok() && gap.Start() < bounds.End; gap = gap.NextLargeEnoughGap(hostarch.Addr(length)) { if gr := gap.availableRange().Intersect(bounds); uint64(gr.Length()) >= length { // Can we shift up to match the alignment? if offset := uint64(gr.Start) % alignment; offset != 0 { if uint64(gr.Length()) >= length+alignment-offset { // Yes, we're aligned. - return gr.Start + usermem.Addr(alignment-offset), nil + return gr.Start + hostarch.Addr(alignment-offset), nil } } @@ -219,15 +219,15 @@ func (mm *MemoryManager) findLowestAvailableLocked(length, alignment uint64, bou } // Preconditions: mm.mappingMu must be locked. -func (mm *MemoryManager) findHighestAvailableLocked(length, alignment uint64, bounds usermem.AddrRange) (usermem.Addr, error) { - for gap := mm.vmas.UpperBoundGap(bounds.End); gap.Ok() && gap.End() > bounds.Start; gap = gap.PrevLargeEnoughGap(usermem.Addr(length)) { +func (mm *MemoryManager) findHighestAvailableLocked(length, alignment uint64, bounds hostarch.AddrRange) (hostarch.Addr, error) { + for gap := mm.vmas.UpperBoundGap(bounds.End); gap.Ok() && gap.End() > bounds.Start; gap = gap.PrevLargeEnoughGap(hostarch.Addr(length)) { if gr := gap.availableRange().Intersect(bounds); uint64(gr.Length()) >= length { // Can we shift down to match the alignment? - start := gr.End - usermem.Addr(length) + start := gr.End - hostarch.Addr(length) if offset := uint64(start) % alignment; offset != 0 { - if gr.Start <= start-usermem.Addr(offset) { + if gr.Start <= start-hostarch.Addr(offset) { // Yes, we're aligned. - return start - usermem.Addr(offset), nil + return start - hostarch.Addr(offset), nil } } @@ -239,7 +239,7 @@ func (mm *MemoryManager) findHighestAvailableLocked(length, alignment uint64, bo } // Preconditions: mm.mappingMu must be locked. -func (mm *MemoryManager) mlockedBytesRangeLocked(ar usermem.AddrRange) uint64 { +func (mm *MemoryManager) mlockedBytesRangeLocked(ar hostarch.AddrRange) uint64 { var total uint64 for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() { if vseg.ValuePtr().mlockMode != memmap.MLockNone { @@ -264,7 +264,7 @@ func (mm *MemoryManager) mlockedBytesRangeLocked(ar usermem.AddrRange) uint64 { // Preconditions: // * mm.mappingMu must be locked for reading; it may be temporarily unlocked. // * ar.Length() != 0. -func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool) (vmaIterator, vmaGapIterator, error) { +func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool) (vmaIterator, vmaGapIterator, error) { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 { panic(fmt.Sprintf("invalid ar: %v", ar)) @@ -320,7 +320,7 @@ func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar usermem.AddrRange // temporarily unlocked. // // Postconditions: ars is not mutated. -func (mm *MemoryManager) getVecVMAsLocked(ctx context.Context, ars usermem.AddrRangeSeq, at usermem.AccessType, ignorePermissions bool) (usermem.AddrRangeSeq, error) { +func (mm *MemoryManager) getVecVMAsLocked(ctx context.Context, ars hostarch.AddrRangeSeq, at hostarch.AccessType, ignorePermissions bool) (hostarch.AddrRangeSeq, error) { for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() { ar := arsit.Head() if ar.Length() == 0 { @@ -339,7 +339,7 @@ func (mm *MemoryManager) getVecVMAsLocked(ctx context.Context, ars usermem.AddrR // // guardBytes is equivalent to Linux's stack_guard_gap after upstream // 1be7107fbe18 "mm: larger stack guard gap, between vmas". -const guardBytes = 256 * usermem.PageSize +const guardBytes = 256 * hostarch.PageSize // unmapLocked unmaps all addresses in ar and returns the resulting gap in // mm.vmas. @@ -348,7 +348,7 @@ const guardBytes = 256 * usermem.PageSize // * mm.mappingMu must be locked for writing. // * ar.Length() != 0. // * ar must be page-aligned. -func (mm *MemoryManager) unmapLocked(ctx context.Context, ar usermem.AddrRange) vmaGapIterator { +func (mm *MemoryManager) unmapLocked(ctx context.Context, ar hostarch.AddrRange) vmaGapIterator { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { panic(fmt.Sprintf("invalid ar: %v", ar)) @@ -369,7 +369,7 @@ func (mm *MemoryManager) unmapLocked(ctx context.Context, ar usermem.AddrRange) // * mm.mappingMu must be locked for writing. // * ar.Length() != 0. // * ar must be page-aligned. -func (mm *MemoryManager) removeVMAsLocked(ctx context.Context, ar usermem.AddrRange) vmaGapIterator { +func (mm *MemoryManager) removeVMAsLocked(ctx context.Context, ar hostarch.AddrRange) vmaGapIterator { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { panic(fmt.Sprintf("invalid ar: %v", ar)) @@ -426,12 +426,12 @@ func (vma *vma) isPrivateDataLocked() bool { // vmaSetFunctions implements segment.Functions for vmaSet. type vmaSetFunctions struct{} -func (vmaSetFunctions) MinKey() usermem.Addr { +func (vmaSetFunctions) MinKey() hostarch.Addr { return 0 } -func (vmaSetFunctions) MaxKey() usermem.Addr { - return ^usermem.Addr(0) +func (vmaSetFunctions) MaxKey() hostarch.Addr { + return ^hostarch.Addr(0) } func (vmaSetFunctions) ClearValue(vma *vma) { @@ -440,7 +440,7 @@ func (vmaSetFunctions) ClearValue(vma *vma) { vma.hint = "" } -func (vmaSetFunctions) Merge(ar1 usermem.AddrRange, vma1 vma, ar2 usermem.AddrRange, vma2 vma) (vma, bool) { +func (vmaSetFunctions) Merge(ar1 hostarch.AddrRange, vma1 vma, ar2 hostarch.AddrRange, vma2 vma) (vma, bool) { if vma1.mappable != vma2.mappable || (vma1.mappable != nil && vma1.off+uint64(ar1.Length()) != vma2.off) || vma1.realPerms != vma2.realPerms || @@ -462,7 +462,7 @@ func (vmaSetFunctions) Merge(ar1 usermem.AddrRange, vma1 vma, ar2 usermem.AddrRa return vma1, true } -func (vmaSetFunctions) Split(ar usermem.AddrRange, v vma, split usermem.Addr) (vma, vma) { +func (vmaSetFunctions) Split(ar hostarch.AddrRange, v vma, split hostarch.Addr) (vma, vma) { v2 := v if v2.mappable != nil { v2.off += uint64(split - ar.Start) @@ -476,7 +476,7 @@ func (vmaSetFunctions) Split(ar usermem.AddrRange, v vma, split usermem.Addr) (v // Preconditions: // * vseg.ValuePtr().mappable != nil. // * vseg.Range().Contains(addr). -func (vseg vmaIterator) mappableOffsetAt(addr usermem.Addr) uint64 { +func (vseg vmaIterator) mappableOffsetAt(addr hostarch.Addr) uint64 { if checkInvariants { if !vseg.Ok() { panic("terminal vma iterator") @@ -503,7 +503,7 @@ func (vseg vmaIterator) mappableRange() memmap.MappableRange { // * vseg.ValuePtr().mappable != nil. // * vseg.Range().IsSupersetOf(ar). // * ar.Length() != 0. -func (vseg vmaIterator) mappableRangeOf(ar usermem.AddrRange) memmap.MappableRange { +func (vseg vmaIterator) mappableRangeOf(ar hostarch.AddrRange) memmap.MappableRange { if checkInvariants { if !vseg.Ok() { panic("terminal vma iterator") @@ -528,7 +528,7 @@ func (vseg vmaIterator) mappableRangeOf(ar usermem.AddrRange) memmap.MappableRan // * vseg.ValuePtr().mappable != nil. // * vseg.mappableRange().IsSupersetOf(mr). // * mr.Length() != 0. -func (vseg vmaIterator) addrRangeOf(mr memmap.MappableRange) usermem.AddrRange { +func (vseg vmaIterator) addrRangeOf(mr memmap.MappableRange) hostarch.AddrRange { if checkInvariants { if !vseg.Ok() { panic("terminal vma iterator") @@ -546,7 +546,7 @@ func (vseg vmaIterator) addrRangeOf(mr memmap.MappableRange) usermem.AddrRange { vma := vseg.ValuePtr() vstart := vseg.Start() - return usermem.AddrRange{vstart + usermem.Addr(mr.Start-vma.off), vstart + usermem.Addr(mr.End-vma.off)} + return hostarch.AddrRange{vstart + hostarch.Addr(mr.Start-vma.off), vstart + hostarch.Addr(mr.End-vma.off)} } // seekNextLowerBound returns mm.vmas.LowerBoundSegment(addr), but does so by @@ -555,7 +555,7 @@ func (vseg vmaIterator) addrRangeOf(mr memmap.MappableRange) usermem.AddrRange { // Preconditions: // * mm.mappingMu must be locked. // * addr >= vseg.Start(). -func (vseg vmaIterator) seekNextLowerBound(addr usermem.Addr) vmaIterator { +func (vseg vmaIterator) seekNextLowerBound(addr hostarch.Addr) vmaIterator { if checkInvariants { if !vseg.Ok() { panic("terminal vma iterator") @@ -572,7 +572,7 @@ func (vseg vmaIterator) seekNextLowerBound(addr usermem.Addr) vmaIterator { // availableRange returns the subset of vgap.Range() in which new vmas may be // created without MMapOpts.Unmap == true. -func (vgap vmaGapIterator) availableRange() usermem.AddrRange { +func (vgap vmaGapIterator) availableRange() hostarch.AddrRange { ar := vgap.Range() next := vgap.NextSegment() if !next.Ok() || !next.ValuePtr().growsDown { @@ -580,7 +580,7 @@ func (vgap vmaGapIterator) availableRange() usermem.AddrRange { } // Exclude guard pages. if ar.Length() < guardBytes { - return usermem.AddrRange{ar.Start, ar.Start} + return hostarch.AddrRange{ar.Start, ar.Start} } ar.End -= guardBytes return ar diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD index e5bf13c40..57d73d770 100644 --- a/pkg/sentry/pgalloc/BUILD +++ b/pkg/sentry/pgalloc/BUILD @@ -85,6 +85,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/log", "//pkg/memutil", "//pkg/safemem", @@ -106,5 +107,5 @@ go_test( size = "small", srcs = ["pgalloc_test.go"], library = ":pgalloc", - deps = ["//pkg/usermem"], + deps = ["//pkg/hostarch"], ) diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go index a4af3e21b..b81292c46 100644 --- a/pkg/sentry/pgalloc/pgalloc.go +++ b/pkg/sentry/pgalloc/pgalloc.go @@ -31,6 +31,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/hostmm" @@ -38,7 +39,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // MemoryFile is a memmap.File whose pages may be allocated to arbitrary @@ -283,7 +283,7 @@ const ( chunkMask = chunkSize - 1 // maxPage is the highest 64-bit page. - maxPage = math.MaxUint64 &^ (usermem.PageSize - 1) + maxPage = math.MaxUint64 &^ (hostarch.PageSize - 1) ) // NewMemoryFile creates a MemoryFile backed by the given file. If @@ -344,7 +344,7 @@ func NewMemoryFile(file *os.File, opts MemoryFileOpts) (*MemoryFile, error) { m, _, errno := unix.Syscall6( unix.SYS_MMAP, 0, - usermem.PageSize, + hostarch.PageSize, unix.PROT_EXEC, unix.MAP_SHARED, file.Fd(), @@ -357,7 +357,7 @@ func NewMemoryFile(file *os.File, opts MemoryFileOpts) (*MemoryFile, error) { if _, _, errno := unix.Syscall( unix.SYS_MUNMAP, m, - usermem.PageSize, + hostarch.PageSize, 0); errno != 0 { panic(fmt.Sprintf("failed to unmap PROT_EXEC MemoryFile mapping: %v", errno)) } @@ -386,7 +386,7 @@ func (f *MemoryFile) Destroy() { // // Preconditions: length must be page-aligned and non-zero. func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (memmap.FileRange, error) { - if length == 0 || length%usermem.PageSize != 0 { + if length == 0 || length%hostarch.PageSize != 0 { panic(fmt.Sprintf("invalid allocation length: %#x", length)) } @@ -395,9 +395,9 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (memmap.File // Align hugepage-and-larger allocations on hugepage boundaries to try // to take advantage of hugetmpfs. - alignment := uint64(usermem.PageSize) - if length >= usermem.HugePageSize { - alignment = usermem.HugePageSize + alignment := uint64(hostarch.PageSize) + if length >= hostarch.HugePageSize { + alignment = hostarch.HugePageSize } // Find a range in the underlying file. @@ -524,13 +524,13 @@ func (f *MemoryFile) AllocateAndFill(length uint64, kind usage.MemoryKind, r saf if err != nil { return memmap.FileRange{}, err } - dsts, err := f.MapInternal(fr, usermem.Write) + dsts, err := f.MapInternal(fr, hostarch.Write) if err != nil { f.DecRef(fr) return memmap.FileRange{}, err } n, err := safemem.ReadFullToBlocks(r, dsts) - un := uint64(usermem.Addr(n).RoundDown()) + un := uint64(hostarch.Addr(n).RoundDown()) if un < length { // Free unused memory and update fr to contain only the memory that is // still allocated. @@ -552,7 +552,7 @@ const ( // // Preconditions: fr.Length() > 0. func (f *MemoryFile) Decommit(fr memmap.FileRange) error { - if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 { + if !fr.WellFormed() || fr.Length() == 0 || fr.Start%hostarch.PageSize != 0 || fr.End%hostarch.PageSize != 0 { panic(fmt.Sprintf("invalid range: %v", fr)) } @@ -614,7 +614,7 @@ func (f *MemoryFile) markDecommitted(fr memmap.FileRange) { // IncRef implements memmap.File.IncRef. func (f *MemoryFile) IncRef(fr memmap.FileRange) { - if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 { + if !fr.WellFormed() || fr.Length() == 0 || fr.Start%hostarch.PageSize != 0 || fr.End%hostarch.PageSize != 0 { panic(fmt.Sprintf("invalid range: %v", fr)) } @@ -633,7 +633,7 @@ func (f *MemoryFile) IncRef(fr memmap.FileRange) { // DecRef implements memmap.File.DecRef. func (f *MemoryFile) DecRef(fr memmap.FileRange) { - if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 { + if !fr.WellFormed() || fr.Length() == 0 || fr.Start%hostarch.PageSize != 0 || fr.End%hostarch.PageSize != 0 { panic(fmt.Sprintf("invalid range: %v", fr)) } @@ -669,7 +669,7 @@ func (f *MemoryFile) DecRef(fr memmap.FileRange) { } // MapInternal implements memmap.File.MapInternal. -func (f *MemoryFile) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { +func (f *MemoryFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { if !fr.WellFormed() || fr.Length() == 0 { panic(fmt.Sprintf("invalid range: %v", fr)) } @@ -935,7 +935,7 @@ func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func( // Ensure that we have sufficient buffer for the call // (one byte per page). The length of each slice must // be page-aligned. - bufLen := len(s) / usermem.PageSize + bufLen := len(s) / hostarch.PageSize if len(buf) < bufLen { buf = make([]byte, bufLen) } @@ -967,8 +967,8 @@ func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func( } } committedFR := memmap.FileRange{ - Start: r.Start + uint64(i*usermem.PageSize), - End: r.Start + uint64(j*usermem.PageSize), + Start: r.Start + uint64(i*hostarch.PageSize), + End: r.Start + uint64(j*hostarch.PageSize), } // Advance seg to committedFR.Start. for seg.Ok() && seg.End() < committedFR.Start { diff --git a/pkg/sentry/pgalloc/pgalloc_test.go b/pkg/sentry/pgalloc/pgalloc_test.go index 405db141f..8d2b7eb5e 100644 --- a/pkg/sentry/pgalloc/pgalloc_test.go +++ b/pkg/sentry/pgalloc/pgalloc_test.go @@ -17,12 +17,12 @@ package pgalloc import ( "testing" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) const ( - page = usermem.PageSize - hugepage = usermem.HugePageSize + page = hostarch.PageSize + hugepage = hostarch.HugePageSize topPage = (1 << 63) - page ) diff --git a/pkg/sentry/pgalloc/save_restore.go b/pkg/sentry/pgalloc/save_restore.go index e05c8d074..345cdde55 100644 --- a/pkg/sentry/pgalloc/save_restore.go +++ b/pkg/sentry/pgalloc/save_restore.go @@ -23,11 +23,11 @@ import ( "sync/atomic" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/state" "gvisor.dev/gvisor/pkg/state/wire" - "gvisor.dev/gvisor/pkg/usermem" ) // SaveTo writes f's state to the given stream. @@ -49,11 +49,11 @@ func (f *MemoryFile) SaveTo(ctx context.Context, w wire.Writer) error { // Ensure that all pages that contain data have knownCommitted set, since // we only store knownCommitted pages below. - zeroPage := make([]byte, usermem.PageSize) + zeroPage := make([]byte, hostarch.PageSize) err := f.updateUsageLocked(0, func(bs []byte, committed []byte) error { - for pgoff := 0; pgoff < len(bs); pgoff += usermem.PageSize { - i := pgoff / usermem.PageSize - pg := bs[pgoff : pgoff+usermem.PageSize] + for pgoff := 0; pgoff < len(bs); pgoff += hostarch.PageSize { + i := pgoff / hostarch.PageSize + pg := bs[pgoff : pgoff+hostarch.PageSize] if !bytes.Equal(pg, zeroPage) { committed[i] = 1 continue diff --git a/pkg/sentry/platform/BUILD b/pkg/sentry/platform/BUILD index db7d55ef2..7125657b3 100644 --- a/pkg/sentry/platform/BUILD +++ b/pkg/sentry/platform/BUILD @@ -13,6 +13,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/seccomp", "//pkg/sentry/arch", "//pkg/sentry/hostmm", diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index 03a76eb9b..f04898dc1 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -43,6 +43,7 @@ go_library( "//pkg/atomicbitops", "//pkg/context", "//pkg/cpuid", + "//pkg/hostarch", "//pkg/log", "//pkg/procid", "//pkg/ring0", @@ -56,7 +57,6 @@ go_library( "//pkg/sentry/platform/interrupt", "//pkg/sentry/time", "//pkg/sync", - "//pkg/usermem", "@org_golang_x_sys//unix:go_default_library", ], ) @@ -76,6 +76,7 @@ go_test( "requires-kvm", ], deps = [ + "//pkg/hostarch", "//pkg/ring0", "//pkg/ring0/pagetables", "//pkg/sentry/arch", @@ -83,7 +84,6 @@ go_test( "//pkg/sentry/platform", "//pkg/sentry/platform/kvm/testutil", "//pkg/sentry/time", - "//pkg/usermem", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go index 25c21e843..5524e8727 100644 --- a/pkg/sentry/platform/kvm/address_space.go +++ b/pkg/sentry/platform/kvm/address_space.go @@ -18,11 +18,11 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" ) // dirtySet tracks vCPUs for invalidation. @@ -118,7 +118,7 @@ type hostMapEntry struct { // +checkescape:hard,stack // //go:nosplit -func (as *addressSpace) mapLocked(addr usermem.Addr, m hostMapEntry, at usermem.AccessType) (inv bool) { +func (as *addressSpace) mapLocked(addr hostarch.Addr, m hostMapEntry, at hostarch.AccessType) (inv bool) { for m.length > 0 { physical, length, ok := translateToPhysical(m.addr) if !ok { @@ -144,14 +144,14 @@ func (as *addressSpace) mapLocked(addr usermem.Addr, m hostMapEntry, at usermem. }, physical) || inv m.addr += length m.length -= length - addr += usermem.Addr(length) + addr += hostarch.Addr(length) } return inv } // MapFile implements platform.AddressSpace.MapFile. -func (as *addressSpace) MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRange, at usermem.AccessType, precommit bool) error { +func (as *addressSpace) MapFile(addr hostarch.Addr, f memmap.File, fr memmap.FileRange, at hostarch.AccessType, precommit bool) error { as.mu.Lock() defer as.mu.Unlock() @@ -165,7 +165,7 @@ func (as *addressSpace) MapFile(addr usermem.Addr, f memmap.File, fr memmap.File // We don't execute from application file-mapped memory, and guest page // tables don't care if we have execute permission (but they do need pages // to be readable). - bs, err := f.MapInternal(fr, usermem.AccessType{ + bs, err := f.MapInternal(fr, hostarch.AccessType{ Read: at.Read || at.Execute || precommit, Write: at.Write, }) @@ -187,7 +187,7 @@ func (as *addressSpace) MapFile(addr usermem.Addr, f memmap.File, fr memmap.File // lookup in our host page tables for this translation. if precommit { s := b.ToSlice() - for i := 0; i < len(s); i += usermem.PageSize { + for i := 0; i < len(s); i += hostarch.PageSize { _ = s[i] // Touch to commit. } } @@ -201,7 +201,7 @@ func (as *addressSpace) MapFile(addr usermem.Addr, f memmap.File, fr memmap.File length: uintptr(b.Len()), }, at) inv = inv || prev - addr += usermem.Addr(b.Len()) + addr += hostarch.Addr(b.Len()) } if inv { as.invalidate() @@ -215,12 +215,12 @@ func (as *addressSpace) MapFile(addr usermem.Addr, f memmap.File, fr memmap.File // +checkescape:hard,stack // //go:nosplit -func (as *addressSpace) unmapLocked(addr usermem.Addr, length uint64) bool { +func (as *addressSpace) unmapLocked(addr hostarch.Addr, length uint64) bool { return as.pageTables.Unmap(addr, uintptr(length)) } // Unmap unmaps the given range by calling pagetables.PageTables.Unmap. -func (as *addressSpace) Unmap(addr usermem.Addr, length uint64) { +func (as *addressSpace) Unmap(addr hostarch.Addr, length uint64) { as.mu.Lock() defer as.mu.Unlock() diff --git a/pkg/sentry/platform/kvm/bluepill_fault.go b/pkg/sentry/platform/kvm/bluepill_fault.go index 37c53fa02..28a613a54 100644 --- a/pkg/sentry/platform/kvm/bluepill_fault.go +++ b/pkg/sentry/platform/kvm/bluepill_fault.go @@ -18,7 +18,7 @@ import ( "sync/atomic" "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) const ( @@ -47,7 +47,7 @@ func yield() { // //go:nosplit func calculateBluepillFault(physical uintptr, phyRegions []physicalRegion) (virtualStart, physicalStart, length uintptr, ok bool) { - alignedPhysical := physical &^ uintptr(usermem.PageSize-1) + alignedPhysical := physical &^ uintptr(hostarch.PageSize-1) for _, pr := range phyRegions { end := pr.physical + pr.length if physical < pr.physical || physical >= end { diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go index 706fa53dc..f4d4473a8 100644 --- a/pkg/sentry/platform/kvm/context.go +++ b/pkg/sentry/platform/kvm/context.go @@ -18,11 +18,11 @@ import ( "sync/atomic" pkgcontext "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/interrupt" - "gvisor.dev/gvisor/pkg/usermem" ) // context is an implementation of the platform context. @@ -40,7 +40,7 @@ type context struct { } // Switch runs the provided context in the given address space. -func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, _ int32) (*arch.SignalInfo, usermem.AccessType, error) { +func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, _ int32) (*arch.SignalInfo, hostarch.AccessType, error) { as := mm.AddressSpace() localAS := as.(*addressSpace) @@ -50,7 +50,7 @@ func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac a // Enable interrupts (i.e. calls to vCPU.Notify). if !c.interrupt.Enable(cpu) { c.machine.Put(cpu) // Already preempted. - return nil, usermem.NoAccess, platform.ErrContextInterrupt + return nil, hostarch.NoAccess, platform.ErrContextInterrupt } // Set the active address space. diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go index 92c05a9ad..aac0fdffe 100644 --- a/pkg/sentry/platform/kvm/kvm.go +++ b/pkg/sentry/platform/kvm/kvm.go @@ -20,11 +20,11 @@ import ( "os" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" ) // userMemoryRegion is a region of physical memory. @@ -146,13 +146,13 @@ func (*KVM) MapUnit() uint64 { } // MinUserAddress returns the lowest available address. -func (*KVM) MinUserAddress() usermem.Addr { - return usermem.PageSize +func (*KVM) MinUserAddress() hostarch.Addr { + return hostarch.PageSize } // MaxUserAddress returns the first address that may not be used. -func (*KVM) MaxUserAddress() usermem.Addr { - return usermem.Addr(ring0.MaximumUserAddress) +func (*KVM) MaxUserAddress() hostarch.Addr { + return hostarch.Addr(ring0.MaximumUserAddress) } // NewAddressSpace returns a new pagetable root. diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index 5bce16dde..ceff09a60 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -22,6 +22,7 @@ import ( "time" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -29,7 +30,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil" ktime "gvisor.dev/gvisor/pkg/sentry/time" - "gvisor.dev/gvisor/pkg/usermem" ) var dummyFPState = fpu.NewState() @@ -142,8 +142,8 @@ func applicationTest(t testHarness, useHostMappings bool, target func(), fn func // done for regular user code, but is fine for test // purposes.) applyPhysicalRegions(func(pr physicalRegion) bool { - pt.Map(usermem.Addr(pr.virtual), pr.length, pagetables.MapOpts{ - AccessType: usermem.AnyAccess, + pt.Map(hostarch.Addr(pr.virtual), pr.length, pagetables.MapOpts{ + AccessType: hostarch.AnyAccess, User: true, }, pr.physical) return true // Keep iterating. @@ -351,7 +351,7 @@ func TestInvalidate(t *testing.T) { break // Done. } // Unmap the page containing data & invalidate. - pt.Unmap(usermem.Addr(reflect.ValueOf(&data).Pointer() & ^uintptr(usermem.PageSize-1)), usermem.PageSize) + pt.Unmap(hostarch.Addr(reflect.ValueOf(&data).Pointer() & ^uintptr(hostarch.PageSize-1)), hostarch.PageSize) for { var si arch.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 0e4cf01e1..b3d4188a3 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -21,13 +21,13 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/procid" "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/ring0/pagetables" ktime "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" ) // machine contains state associated with the VM as a whole. @@ -227,9 +227,9 @@ func newMachine(vm int) (*machine, error) { applyPhysicalRegions(func(pr physicalRegion) bool { // Map everything in the lower half. m.kernel.PageTables.Map( - usermem.Addr(pr.virtual), + hostarch.Addr(pr.virtual), pr.length, - pagetables.MapOpts{AccessType: usermem.AnyAccess}, + pagetables.MapOpts{AccessType: hostarch.AnyAccess}, pr.physical) return true // Keep iterating. @@ -436,7 +436,7 @@ func (m *machine) Get() *vCPU { } // The vCPU is not be able to transition to - // vCPUGuest|vCPUUser or to vCPUUser because that + // vCPUGuest|vCPUWaiter or to vCPUUser because that // transition requires holding the machine mutex, as we // do now. There is no path to register a waiter on // just the vCPUReady state. diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index 3af96c7e5..e8e209249 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -24,13 +24,13 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/sentry/platform" ktime "gvisor.dev/gvisor/pkg/sentry/time" - "gvisor.dev/gvisor/pkg/usermem" ) // initArchState initializes architecture-specific state. @@ -41,7 +41,7 @@ func (m *machine) initArchState() error { unix.SYS_IOCTL, uintptr(m.fd), _KVM_SET_TSS_ADDR, - uintptr(reservedMemory-(3*usermem.PageSize))); errno != 0 { + uintptr(reservedMemory-(3*hostarch.PageSize))); errno != 0 { return errno } @@ -256,19 +256,19 @@ func (c *vCPU) setSystemTime() error { // nonCanonical generates a canonical address return. // //go:nosplit -func nonCanonical(addr uint64, signal int32, info *arch.SignalInfo) (usermem.AccessType, error) { +func nonCanonical(addr uint64, signal int32, info *arch.SignalInfo) (hostarch.AccessType, error) { *info = arch.SignalInfo{ Signo: signal, Code: arch.SignalInfoKernel, } info.SetAddr(addr) // Include address. - return usermem.NoAccess, platform.ErrContextSignal + return hostarch.NoAccess, platform.ErrContextSignal } // fault generates an appropriate fault return. // //go:nosplit -func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (usermem.AccessType, error) { +func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (hostarch.AccessType, error) { bluepill(c) // Probably no-op, but may not be. faultAddr := ring0.ReadCR2() code, user := c.ErrorCode() @@ -276,12 +276,12 @@ func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (usermem.AccessType, e // The last fault serviced by this CPU was not a user // fault, so we can't reliably trust the faultAddr or // the code provided here. We need to re-execute. - return usermem.NoAccess, platform.ErrContextInterrupt + return hostarch.NoAccess, platform.ErrContextInterrupt } // Reset the pointed SignalInfo. *info = arch.SignalInfo{Signo: signal} info.SetAddr(uint64(faultAddr)) - accessType := usermem.AccessType{ + accessType := hostarch.AccessType{ Read: code&(1<<1) == 0, Write: code&(1<<1) != 0, Execute: code&(1<<4) != 0, @@ -310,14 +310,14 @@ func loadByte(ptr *byte) byte { //go:nosplit func prefaultFloatingPointState(data *fpu.State) { size := len(*data) - for i := 0; i < size; i += usermem.PageSize { + for i := 0; i < size; i += hostarch.PageSize { loadByte(&(*data)[i]) } loadByte(&(*data)[size-1]) } // SwitchToUser unpacks architectural-details. -func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (usermem.AccessType, error) { +func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (hostarch.AccessType, error) { // Check for canonical addresses. if regs := switchOpts.Registers; !ring0.IsCanonical(regs.Rip) { return nonCanonical(regs.Rip, int32(unix.SIGSEGV), info) @@ -353,7 +353,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) switch vector { case ring0.Syscall, ring0.SyscallInt80: // Fast path: system call executed. - return usermem.NoAccess, nil + return hostarch.NoAccess, nil case ring0.PageFault: return c.fault(int32(unix.SIGSEGV), info) @@ -364,7 +364,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) Code: 1, // TRAP_BRKPT (breakpoint). } info.SetAddr(switchOpts.Registers.Rip) // Include address. - return usermem.AccessType{}, platform.ErrContextSignal + return hostarch.AccessType{}, platform.ErrContextSignal case ring0.GeneralProtectionFault, ring0.SegmentNotPresent, @@ -380,9 +380,9 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) // When CPUID faulting is enabled, we will generate a #GP(0) when // userspace executes a CPUID instruction. This is handled above, // because we need to be able to map and read user memory. - return usermem.AccessType{}, platform.ErrContextSignalCPUID + return hostarch.AccessType{}, platform.ErrContextSignalCPUID } - return usermem.AccessType{}, platform.ErrContextSignal + return hostarch.AccessType{}, platform.ErrContextSignal case ring0.InvalidOpcode: *info = arch.SignalInfo{ @@ -390,7 +390,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) Code: 1, // ILL_ILLOPC (illegal opcode). } info.SetAddr(switchOpts.Registers.Rip) // Include address. - return usermem.AccessType{}, platform.ErrContextSignal + return hostarch.AccessType{}, platform.ErrContextSignal case ring0.DivideByZero: *info = arch.SignalInfo{ @@ -398,7 +398,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) Code: 1, // FPE_INTDIV (divide by zero). } info.SetAddr(switchOpts.Registers.Rip) // Include address. - return usermem.AccessType{}, platform.ErrContextSignal + return hostarch.AccessType{}, platform.ErrContextSignal case ring0.Overflow: *info = arch.SignalInfo{ @@ -406,7 +406,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) Code: 2, // FPE_INTOVF (integer overflow). } info.SetAddr(switchOpts.Registers.Rip) // Include address. - return usermem.AccessType{}, platform.ErrContextSignal + return hostarch.AccessType{}, platform.ErrContextSignal case ring0.X87FloatingPointException, ring0.SIMDFloatingPointException: @@ -415,17 +415,17 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) Code: 7, // FPE_FLTINV (invalid operation). } info.SetAddr(switchOpts.Registers.Rip) // Include address. - return usermem.AccessType{}, platform.ErrContextSignal + return hostarch.AccessType{}, platform.ErrContextSignal case ring0.Vector(bounce): // ring0.VirtualizationException - return usermem.NoAccess, platform.ErrContextInterrupt + return hostarch.NoAccess, platform.ErrContextInterrupt case ring0.AlignmentCheck: *info = arch.SignalInfo{ Signo: int32(unix.SIGBUS), Code: 2, // BUS_ADRERR (physical address does not exist). } - return usermem.NoAccess, platform.ErrContextSignal + return hostarch.NoAccess, platform.ErrContextSignal case ring0.NMI: // An NMI is generated only when a fault is not servicable by @@ -471,9 +471,9 @@ func (m *machine) mapUpperHalf(pageTable *pagetables.PageTables) { panic("impossible translation") } pageTable.Map( - usermem.Addr(ring0.KernelStartAddress|r.virtual), + hostarch.Addr(ring0.KernelStartAddress|r.virtual), r.length, - pagetables.MapOpts{AccessType: usermem.Execute}, + pagetables.MapOpts{AccessType: hostarch.Execute}, physical) } }) @@ -484,9 +484,9 @@ func (m *machine) mapUpperHalf(pageTable *pagetables.PageTables) { panic("impossible translation") } pageTable.Map( - usermem.Addr(ring0.KernelStartAddress|start), + hostarch.Addr(ring0.KernelStartAddress|start), regionLen, - pagetables.MapOpts{AccessType: usermem.ReadWrite}, + pagetables.MapOpts{AccessType: hostarch.ReadWrite}, physical) } } diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go index 2055e61a7..cd912f922 100644 --- a/pkg/sentry/platform/kvm/machine_arm64.go +++ b/pkg/sentry/platform/kvm/machine_arm64.go @@ -17,12 +17,12 @@ package kvm import ( + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/usermem" ) type vCPUArchState struct { @@ -53,9 +53,9 @@ const ( func (m *machine) mapUpperHalf(pageTable *pagetables.PageTables) { applyPhysicalRegions(func(pr physicalRegion) bool { pageTable.Map( - usermem.Addr(ring0.KernelStartAddress|pr.virtual), + hostarch.Addr(ring0.KernelStartAddress|pr.virtual), pr.length, - pagetables.MapOpts{AccessType: usermem.AnyAccess, Global: true}, + pagetables.MapOpts{AccessType: hostarch.AnyAccess, Global: true}, pr.physical) return true // Keep iterating. @@ -117,13 +117,13 @@ func availableRegionsForSetMem() (phyRegions []physicalRegion) { // nonCanonical generates a canonical address return. // //go:nosplit -func nonCanonical(addr uint64, signal int32, info *arch.SignalInfo) (usermem.AccessType, error) { +func nonCanonical(addr uint64, signal int32, info *arch.SignalInfo) (hostarch.AccessType, error) { *info = arch.SignalInfo{ Signo: signal, Code: arch.SignalInfoKernel, } info.SetAddr(addr) // Include address. - return usermem.NoAccess, platform.ErrContextSignal + return hostarch.NoAccess, platform.ErrContextSignal } // isInstructionAbort returns true if it is an instruction abort. @@ -148,7 +148,7 @@ func isWriteFault(code uint64) bool { // fault generates an appropriate fault return. // //go:nosplit -func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (usermem.AccessType, error) { +func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (hostarch.AccessType, error) { bluepill(c) // Probably no-op, but may not be. faultAddr := c.GetFaultAddr() code, user := c.ErrorCode() @@ -157,7 +157,7 @@ func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (usermem.AccessType, e // The last fault serviced by this CPU was not a user // fault, so we can't reliably trust the faultAddr or // the code provided here. We need to re-execute. - return usermem.NoAccess, platform.ErrContextInterrupt + return hostarch.NoAccess, platform.ErrContextInterrupt } // Reset the pointed SignalInfo. @@ -174,7 +174,7 @@ func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (usermem.AccessType, e info.Code = 2 } - accessType := usermem.AccessType{ + accessType := hostarch.AccessType{ Read: !isWriteFault(uint64(code)), Write: isWriteFault(uint64(code)), Execute: isInstructionAbort(uint64(code)), diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index e7d5f3193..634e55ec0 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -23,12 +23,12 @@ import ( "unsafe" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/arch/fpu" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/usermem" ) type kvmVcpuInit struct { @@ -209,7 +209,7 @@ func (c *vCPU) getOneRegister(reg *kvmOneReg) error { } // SwitchToUser unpacks architectural-details. -func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (usermem.AccessType, error) { +func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (hostarch.AccessType, error) { // Check for canonical addresses. if regs := switchOpts.Registers; !ring0.IsCanonical(regs.Pc) { return nonCanonical(regs.Pc, int32(unix.SIGSEGV), info) @@ -246,13 +246,13 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) switch vector { case ring0.Syscall: // Fast path: system call executed. - return usermem.NoAccess, nil + return hostarch.NoAccess, nil case ring0.PageFault: return c.fault(int32(unix.SIGSEGV), info) case ring0.El0ErrNMI: return c.fault(int32(unix.SIGBUS), info) case ring0.Vector(bounce): // ring0.VirtualizationException. - return usermem.NoAccess, platform.ErrContextInterrupt + return hostarch.NoAccess, platform.ErrContextInterrupt case ring0.El0SyncUndef: return c.fault(int32(unix.SIGILL), info) case ring0.El0SyncDbg: @@ -261,16 +261,16 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) Code: 1, // TRAP_BRKPT (breakpoint). } info.SetAddr(switchOpts.Registers.Pc) // Include address. - return usermem.AccessType{}, platform.ErrContextSignal + return hostarch.AccessType{}, platform.ErrContextSignal case ring0.El0SyncSpPc: *info = arch.SignalInfo{ Signo: int32(unix.SIGBUS), Code: 2, // BUS_ADRERR (physical address does not exist). } - return usermem.NoAccess, platform.ErrContextSignal + return hostarch.NoAccess, platform.ErrContextSignal case ring0.El0SyncSys, ring0.El0SyncWfx: - return usermem.NoAccess, nil // skip for now. + return hostarch.NoAccess, nil // skip for now. default: panic(fmt.Sprintf("unexpected vector: 0x%x", vector)) } diff --git a/pkg/sentry/platform/kvm/physical_map.go b/pkg/sentry/platform/kvm/physical_map.go index 7376d8b8d..d812e6c26 100644 --- a/pkg/sentry/platform/kvm/physical_map.go +++ b/pkg/sentry/platform/kvm/physical_map.go @@ -19,9 +19,9 @@ import ( "sort" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/ring0" - "gvisor.dev/gvisor/pkg/usermem" ) type region struct { @@ -81,7 +81,7 @@ func fillAddressSpace() (excludedRegions []region) { // faultBlockSize, potentially causing up to faultBlockSize bytes in // internal fragmentation for each physical region. So we need to // account for this properly during allocation. - requiredAddr, ok := usermem.Addr(vSize - pSize + faultBlockSize).RoundUp() + requiredAddr, ok := hostarch.Addr(vSize - pSize + faultBlockSize).RoundUp() if !ok { panic(fmt.Sprintf( "overflow for vSize (%x) - pSize (%x) + faultBlockSize (%x)", @@ -99,7 +99,7 @@ func fillAddressSpace() (excludedRegions []region) { 0, 0) if errno != 0 { // Attempt half the size; overflow not possible. - currentAddr, _ := usermem.Addr(current >> 1).RoundUp() + currentAddr, _ := hostarch.Addr(current >> 1).RoundUp() current = uintptr(currentAddr) continue } @@ -134,8 +134,8 @@ func computePhysicalRegions(excludedRegions []region) (physicalRegions []physica return } if virtual == 0 { - virtual += usermem.PageSize - length -= usermem.PageSize + virtual += hostarch.PageSize + length -= hostarch.PageSize } if end := virtual + length; end > ring0.MaximumUserAddress { length -= (end - ring0.MaximumUserAddress) diff --git a/pkg/sentry/platform/kvm/virtual_map.go b/pkg/sentry/platform/kvm/virtual_map.go index 4dcdbf8a7..01d9eb39d 100644 --- a/pkg/sentry/platform/kvm/virtual_map.go +++ b/pkg/sentry/platform/kvm/virtual_map.go @@ -22,12 +22,12 @@ import ( "regexp" "strconv" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) type virtualRegion struct { region - accessType usermem.AccessType + accessType hostarch.AccessType shared bool offset uintptr filename string @@ -92,7 +92,7 @@ func applyVirtualRegions(fn func(vr virtualRegion)) error { virtual: uintptr(start), length: uintptr(end - start), }, - accessType: usermem.AccessType{ + accessType: hostarch.AccessType{ Read: read, Write: write, Execute: execute, diff --git a/pkg/sentry/platform/kvm/virtual_map_test.go b/pkg/sentry/platform/kvm/virtual_map_test.go index 9b4545fdd..1f4a774f3 100644 --- a/pkg/sentry/platform/kvm/virtual_map_test.go +++ b/pkg/sentry/platform/kvm/virtual_map_test.go @@ -18,12 +18,12 @@ import ( "testing" "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) type checker struct { ok bool - accessType usermem.AccessType + accessType hostarch.AccessType } func (c *checker) Containing(addr uintptr) func(virtualRegion) { @@ -46,7 +46,7 @@ func TestParseMaps(t *testing.T) { // MMap a new page. addr, _, errno := unix.RawSyscall6( - unix.SYS_MMAP, 0, usermem.PageSize, + unix.SYS_MMAP, 0, hostarch.PageSize, unix.PROT_READ|unix.PROT_WRITE, unix.MAP_ANONYMOUS|unix.MAP_PRIVATE, 0, 0) if errno != 0 { @@ -55,19 +55,19 @@ func TestParseMaps(t *testing.T) { // Re-parse maps. if err := applyVirtualRegions(c.Containing(addr)); err != nil { - unix.RawSyscall(unix.SYS_MUNMAP, addr, usermem.PageSize, 0) + unix.RawSyscall(unix.SYS_MUNMAP, addr, hostarch.PageSize, 0) t.Fatalf("unexpected error: %v", err) } // Assert that it now does contain the region. if !c.ok { - unix.RawSyscall(unix.SYS_MUNMAP, addr, usermem.PageSize, 0) + unix.RawSyscall(unix.SYS_MUNMAP, addr, hostarch.PageSize, 0) t.Fatalf("updated map does not contain 0x%08x, expected true", addr) } // Map the region as PROT_NONE. newAddr, _, errno := unix.RawSyscall6( - unix.SYS_MMAP, addr, usermem.PageSize, + unix.SYS_MMAP, addr, hostarch.PageSize, unix.PROT_NONE, unix.MAP_ANONYMOUS|unix.MAP_FIXED|unix.MAP_PRIVATE, 0, 0) if errno != 0 { @@ -89,5 +89,5 @@ func TestParseMaps(t *testing.T) { } // Unmap the region. - unix.RawSyscall(unix.SYS_MUNMAP, addr, usermem.PageSize, 0) + unix.RawSyscall(unix.SYS_MUNMAP, addr, hostarch.PageSize, 0) } diff --git a/pkg/sentry/platform/mmap_min_addr.go b/pkg/sentry/platform/mmap_min_addr.go index 091c2e365..7335bd802 100644 --- a/pkg/sentry/platform/mmap_min_addr.go +++ b/pkg/sentry/platform/mmap_min_addr.go @@ -20,7 +20,7 @@ import ( "strconv" "strings" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // systemMMapMinAddrSource is the source file. @@ -30,8 +30,8 @@ const systemMMapMinAddrSource = "/proc/sys/vm/mmap_min_addr" var systemMMapMinAddr uint64 // SystemMMapMinAddr returns the minimum system address. -func SystemMMapMinAddr() usermem.Addr { - return usermem.Addr(systemMMapMinAddr) +func SystemMMapMinAddr() hostarch.Addr { + return hostarch.Addr(systemMMapMinAddr) } // MMapMinAddr is a size zero struct that implements MinUserAddress based on @@ -41,7 +41,7 @@ type MMapMinAddr struct { } // MinUserAddress implements platform.MinUserAddresss. -func (*MMapMinAddr) MinUserAddress() usermem.Addr { +func (*MMapMinAddr) MinUserAddress() hostarch.Addr { return SystemMMapMinAddr() } diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go index dcfe839a7..ef7814a6f 100644 --- a/pkg/sentry/platform/platform.go +++ b/pkg/sentry/platform/platform.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/seccomp" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/hostmm" @@ -62,16 +63,16 @@ type Platform interface { // for AddressSpace.MapFile. As a special case, a MapUnit of 0 indicates // that the cost of AddressSpace.MapFile is effectively independent of the // number of pages mapped. If MapUnit is non-zero, it must be a power-of-2 - // multiple of usermem.PageSize. + // multiple of hostarch.PageSize. MapUnit() uint64 // MinUserAddress returns the minimum mappable address on this // platform. - MinUserAddress() usermem.Addr + MinUserAddress() hostarch.Addr // MaxUserAddress returns the maximum mappable address on this // platform. - MaxUserAddress() usermem.Addr + MaxUserAddress() hostarch.Addr // NewAddressSpace returns a new memory context for this platform. // @@ -172,7 +173,7 @@ type MemoryManager interface { //usermem.IO provides access to the contents of a virtual memory space. usermem.IO // MMap establishes a memory mapping. - MMap(ctx context.Context, opts memmap.MMapOpts) (usermem.Addr, error) + MMap(ctx context.Context, opts memmap.MMapOpts) (hostarch.Addr, error) // AddressSpace returns the AddressSpace bound to mm. AddressSpace() AddressSpace } @@ -195,7 +196,7 @@ type Context interface { // // - ErrContextSignal: The Context was interrupted by a signal. The // returned *arch.SignalInfo contains information about the signal. If - // arch.SignalInfo.Signo == SIGSEGV, the returned usermem.AccessType + // arch.SignalInfo.Signo == SIGSEGV, the returned hostarch.AccessType // contains the access type of the triggering fault. The caller owns // the returned SignalInfo. // @@ -206,7 +207,7 @@ type Context interface { // concurrent call to Switch(). // // - ErrContextCPUPreempted: See the definition of that error for details. - Switch(ctx context.Context, mm MemoryManager, ac arch.Context, cpu int32) (*arch.SignalInfo, usermem.AccessType, error) + Switch(ctx context.Context, mm MemoryManager, ac arch.Context, cpu int32) (*arch.SignalInfo, hostarch.AccessType, error) // PullFullState() pulls a full state of the application thread. // @@ -302,14 +303,14 @@ type AddressSpace interface { // * at.Any() == true. // * At least one reference must be held on all pages in fr, and must // continue to be held as long as pages are mapped. - MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRange, at usermem.AccessType, precommit bool) error + MapFile(addr hostarch.Addr, f memmap.File, fr memmap.FileRange, at hostarch.AccessType, precommit bool) error // Unmap unmaps the given range. // // Preconditions: // * addr is page-aligned. // * length > 0. - Unmap(addr usermem.Addr, length uint64) + Unmap(addr hostarch.Addr, length uint64) // Release releases this address space. After releasing, a new AddressSpace // must be acquired via platform.NewAddressSpace(). @@ -337,67 +338,67 @@ type AddressSpaceIO interface { // CopyOut copies len(src) bytes from src to the memory mapped at addr. It // returns the number of bytes copied. If the number of bytes copied is < // len(src), it returns a non-nil error explaining why. - CopyOut(addr usermem.Addr, src []byte) (int, error) + CopyOut(addr hostarch.Addr, src []byte) (int, error) // CopyIn copies len(dst) bytes from the memory mapped at addr to dst. // It returns the number of bytes copied. If the number of bytes copied is // < len(dst), it returns a non-nil error explaining why. - CopyIn(addr usermem.Addr, dst []byte) (int, error) + CopyIn(addr hostarch.Addr, dst []byte) (int, error) // ZeroOut sets toZero bytes to 0, starting at addr. It returns the number // of bytes zeroed. If the number of bytes zeroed is < toZero, it returns a // non-nil error explaining why. - ZeroOut(addr usermem.Addr, toZero uintptr) (uintptr, error) + ZeroOut(addr hostarch.Addr, toZero uintptr) (uintptr, error) // SwapUint32 atomically sets the uint32 value at addr to new and returns // the previous value. // // Preconditions: addr must be aligned to a 4-byte boundary. - SwapUint32(addr usermem.Addr, new uint32) (uint32, error) + SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) // CompareAndSwapUint32 atomically compares the uint32 value at addr to // old; if they are equal, the value in memory is replaced by new. In // either case, the previous value stored in memory is returned. // // Preconditions: addr must be aligned to a 4-byte boundary. - CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error) + CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) // LoadUint32 atomically loads the uint32 value at addr and returns it. // // Preconditions: addr must be aligned to a 4-byte boundary. - LoadUint32(addr usermem.Addr) (uint32, error) + LoadUint32(addr hostarch.Addr) (uint32, error) } // NoAddressSpaceIO implements AddressSpaceIO methods by panicking. type NoAddressSpaceIO struct{} // CopyOut implements AddressSpaceIO.CopyOut. -func (NoAddressSpaceIO) CopyOut(addr usermem.Addr, src []byte) (int, error) { +func (NoAddressSpaceIO) CopyOut(addr hostarch.Addr, src []byte) (int, error) { panic("This platform does not support AddressSpaceIO") } // CopyIn implements AddressSpaceIO.CopyIn. -func (NoAddressSpaceIO) CopyIn(addr usermem.Addr, dst []byte) (int, error) { +func (NoAddressSpaceIO) CopyIn(addr hostarch.Addr, dst []byte) (int, error) { panic("This platform does not support AddressSpaceIO") } // ZeroOut implements AddressSpaceIO.ZeroOut. -func (NoAddressSpaceIO) ZeroOut(addr usermem.Addr, toZero uintptr) (uintptr, error) { +func (NoAddressSpaceIO) ZeroOut(addr hostarch.Addr, toZero uintptr) (uintptr, error) { panic("This platform does not support AddressSpaceIO") } // SwapUint32 implements AddressSpaceIO.SwapUint32. -func (NoAddressSpaceIO) SwapUint32(addr usermem.Addr, new uint32) (uint32, error) { +func (NoAddressSpaceIO) SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) { panic("This platform does not support AddressSpaceIO") } // CompareAndSwapUint32 implements AddressSpaceIO.CompareAndSwapUint32. -func (NoAddressSpaceIO) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error) { +func (NoAddressSpaceIO) CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) { panic("This platform does not support AddressSpaceIO") } // LoadUint32 implements AddressSpaceIO.LoadUint32. -func (NoAddressSpaceIO) LoadUint32(addr usermem.Addr) (uint32, error) { +func (NoAddressSpaceIO) LoadUint32(addr hostarch.Addr) (uint32, error) { panic("This platform does not support AddressSpaceIO") } @@ -406,7 +407,7 @@ func (NoAddressSpaceIO) LoadUint32(addr usermem.Addr) (uint32, error) { // permissions. type SegmentationFault struct { // Addr is the address at which the fault occurred. - Addr usermem.Addr + Addr hostarch.Addr } // Error implements error.Error. diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD index 47efde6a2..d101f2f53 100644 --- a/pkg/sentry/platform/ptrace/BUILD +++ b/pkg/sentry/platform/ptrace/BUILD @@ -25,6 +25,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/hostarch", "//pkg/log", "//pkg/procid", "//pkg/safecopy", @@ -35,7 +36,6 @@ go_library( "//pkg/sentry/platform", "//pkg/sentry/platform/interrupt", "//pkg/sync", - "//pkg/usermem", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go index 571bfcc2e..828458ce2 100644 --- a/pkg/sentry/platform/ptrace/ptrace.go +++ b/pkg/sentry/platform/ptrace/ptrace.go @@ -49,11 +49,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" pkgcontext "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/interrupt" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" ) var ( @@ -88,28 +88,28 @@ type context struct { // lastFaultAddr is the last faulting address; this is only meaningful if // lastFaultSP is non-nil. - lastFaultAddr usermem.Addr + lastFaultAddr hostarch.Addr // lastFaultIP is the address of the last faulting instruction; // this is also only meaningful if lastFaultSP is non-nil. - lastFaultIP usermem.Addr + lastFaultIP hostarch.Addr } // Switch runs the provided context in the given address space. -func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, cpu int32) (*arch.SignalInfo, usermem.AccessType, error) { +func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, cpu int32) (*arch.SignalInfo, hostarch.AccessType, error) { as := mm.AddressSpace() s := as.(*subprocess) isSyscall := s.switchToApp(c, ac) var ( faultSP *subprocess - faultAddr usermem.Addr - faultIP usermem.Addr + faultAddr hostarch.Addr + faultIP hostarch.Addr ) if !isSyscall && linux.Signal(c.signalInfo.Signo) == linux.SIGSEGV { faultSP = s - faultAddr = usermem.Addr(c.signalInfo.Addr()) - faultIP = usermem.Addr(ac.IP()) + faultAddr = hostarch.Addr(c.signalInfo.Addr()) + faultIP = hostarch.Addr(ac.IP()) } // Update the context to reflect the outcome of this context switch. @@ -140,14 +140,14 @@ func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac a } if isSyscall { - return nil, usermem.NoAccess, nil + return nil, hostarch.NoAccess, nil } si := c.signalInfo if faultSP == nil { // Non-fault signal. - return &si, usermem.NoAccess, platform.ErrContextSignal + return &si, hostarch.NoAccess, platform.ErrContextSignal } // Got a page fault. Ideally, we'd get real fault type here, but ptrace @@ -157,7 +157,7 @@ func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac a // pointer. // // It was a write fault if the fault is immediately repeated. - at := usermem.Read + at := hostarch.Read if faultAddr == faultIP { at.Execute = true } @@ -235,8 +235,8 @@ func (*PTrace) MapUnit() uint64 { // MaxUserAddress returns the first address that may not be used by user // applications. -func (*PTrace) MaxUserAddress() usermem.Addr { - return usermem.Addr(stubStart) +func (*PTrace) MaxUserAddress() hostarch.Addr { + return hostarch.Addr(stubStart) } // NewAddressSpace returns a new subprocess. diff --git a/pkg/sentry/platform/ptrace/ptrace_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_unsafe.go index 01e73b019..facb96011 100644 --- a/pkg/sentry/platform/ptrace/ptrace_unsafe.go +++ b/pkg/sentry/platform/ptrace/ptrace_unsafe.go @@ -19,9 +19,9 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/arch/fpu" - "gvisor.dev/gvisor/pkg/usermem" ) // getRegs gets the general purpose register set. @@ -122,7 +122,7 @@ func (t *thread) getSignalInfo(si *arch.SignalInfo) error { // // Precondition: the OS thread must be locked and own t. func (t *thread) clone() (*thread, error) { - r, ok := usermem.Addr(stackPointer(&t.initRegs)).RoundUp() + r, ok := hostarch.Addr(stackPointer(&t.initRegs)).RoundUp() if !ok { return nil, unix.EINVAL } diff --git a/pkg/sentry/platform/ptrace/stub_unsafe.go b/pkg/sentry/platform/ptrace/stub_unsafe.go index 780227248..5c9b7784f 100644 --- a/pkg/sentry/platform/ptrace/stub_unsafe.go +++ b/pkg/sentry/platform/ptrace/stub_unsafe.go @@ -19,8 +19,8 @@ import ( "unsafe" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safecopy" - "gvisor.dev/gvisor/pkg/usermem" ) // stub is defined in arch-specific assembly. @@ -45,8 +45,8 @@ func stubInit() { stubLen := int(safecopy.FindEndAddress(stubBegin) - stubBegin) stubSlice := unsafeSlice(stubBegin, stubLen) mapLen := uintptr(stubLen) - if offset := mapLen % usermem.PageSize; offset != 0 { - mapLen += usermem.PageSize - offset + if offset := mapLen % hostarch.PageSize; offset != 0 { + mapLen += hostarch.PageSize - offset } for stubStart > 0 { @@ -70,7 +70,7 @@ func stubInit() { } // Attempt to begin at a lower address. - stubStart -= uintptr(usermem.PageSize) + stubStart -= uintptr(hostarch.PageSize) continue } diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go index acccbfe2e..9c73a725a 100644 --- a/pkg/sentry/platform/ptrace/subprocess.go +++ b/pkg/sentry/platform/ptrace/subprocess.go @@ -20,13 +20,13 @@ import ( "runtime" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/procid" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" ) // Linux kernel errnos which "should never be seen by user programs", but will @@ -69,7 +69,7 @@ type thread struct { // threadPool is a collection of threads. type threadPool struct { // mu protects below. - mu sync.Mutex + mu sync.RWMutex // threads is the collection of threads. // @@ -85,30 +85,42 @@ type threadPool struct { // // Precondition: the runtime OS thread must be locked. func (tp *threadPool) lookupOrCreate(currentTID int32, newThread func() *thread) *thread { - tp.mu.Lock() + // The overwhelming common case is that the thread is already created. + // Optimistically attempt the lookup by only locking for reading. + tp.mu.RLock() t, ok := tp.threads[currentTID] - if !ok { - // Before creating a new thread, see if we can find a thread - // whose system tid has disappeared. - // - // TODO(b/77216482): Other parts of this package depend on - // threads never exiting. - for origTID, t := range tp.threads { - // Signal zero is an easy existence check. - if err := unix.Tgkill(unix.Getpid(), int(origTID), 0); err != nil { - // This thread has been abandoned; reuse it. - delete(tp.threads, origTID) - tp.threads[currentTID] = t - tp.mu.Unlock() - return t - } - } + tp.mu.RUnlock() + if ok { + return t + } - // Create a new thread. - t = newThread() - tp.threads[currentTID] = t + tp.mu.Lock() + defer tp.mu.Unlock() + + // Another goroutine might have created the thread for currentTID in between + // mu.RUnlock() and mu.Lock(). + if t, ok = tp.threads[currentTID]; ok { + return t + } + + // Before creating a new thread, see if we can find a thread + // whose system tid has disappeared. + // + // TODO(b/77216482): Other parts of this package depend on + // threads never exiting. + for origTID, t := range tp.threads { + // Signal zero is an easy existence check. + if err := unix.Tgkill(unix.Getpid(), int(origTID), 0); err != nil { + // This thread has been abandoned; reuse it. + delete(tp.threads, origTID) + tp.threads[currentTID] = t + return t + } } - tp.mu.Unlock() + + // Create a new thread. + t = newThread() + tp.threads[currentTID] = t return t } @@ -228,7 +240,7 @@ func newSubprocess(create func() (*thread, error)) (*subprocess, error) { func (s *subprocess) unmap() { s.Unmap(0, uint64(stubStart)) if maximumUserAddress != stubEnd { - s.Unmap(usermem.Addr(stubEnd), uint64(maximumUserAddress-stubEnd)) + s.Unmap(hostarch.Addr(stubEnd), uint64(maximumUserAddress-stubEnd)) } } @@ -615,7 +627,7 @@ func (s *subprocess) syscall(sysno uintptr, args ...arch.SyscallArgument) (uintp } // MapFile implements platform.AddressSpace.MapFile. -func (s *subprocess) MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRange, at usermem.AccessType, precommit bool) error { +func (s *subprocess) MapFile(addr hostarch.Addr, f memmap.File, fr memmap.FileRange, at hostarch.AccessType, precommit bool) error { var flags int if precommit { flags |= unix.MAP_POPULATE @@ -632,7 +644,7 @@ func (s *subprocess) MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRan } // Unmap implements platform.AddressSpace.Unmap. -func (s *subprocess) Unmap(addr usermem.Addr, length uint64) { +func (s *subprocess) Unmap(addr hostarch.Addr, length uint64) { ar, ok := addr.ToRange(length) if !ok { panic(fmt.Sprintf("addr %#x + length %#x overflows", addr, length)) diff --git a/pkg/sentry/socket/BUILD b/pkg/sentry/socket/BUILD index 0ce42b6cc..080859125 100644 --- a/pkg/sentry/socket/BUILD +++ b/pkg/sentry/socket/BUILD @@ -10,6 +10,7 @@ go_library( "//pkg/abi/linux", "//pkg/binary", "//pkg/context", + "//pkg/hostarch", "//pkg/marshal", "//pkg/sentry/device", "//pkg/sentry/fs", diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD index ebcc891b3..0e0e82365 100644 --- a/pkg/sentry/socket/control/BUILD +++ b/pkg/sentry/socket/control/BUILD @@ -16,6 +16,7 @@ go_library( "//pkg/abi/linux", "//pkg/binary", "//pkg/context", + "//pkg/hostarch", "//pkg/sentry/fs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", @@ -23,7 +24,6 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", "//pkg/syserror", - "//pkg/usermem", ], ) @@ -35,8 +35,8 @@ go_test( deps = [ "//pkg/abi/linux", "//pkg/binary", + "//pkg/hostarch", "//pkg/sentry/socket", - "//pkg/usermem", "@com_github_google_go_cmp//cmp:go_default_library", ], ) diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index 65b556489..45a05cd63 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -20,13 +20,13 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) const maxInt = int(^uint(0) >> 1) @@ -181,12 +181,12 @@ func (c *scmCredentials) Equals(oc transport.CredentialsControlMessage) bool { } func putUint64(buf []byte, n uint64) []byte { - usermem.ByteOrder.PutUint64(buf[len(buf):len(buf)+8], n) + hostarch.ByteOrder.PutUint64(buf[len(buf):len(buf)+8], n) return buf[:len(buf)+8] } func putUint32(buf []byte, n uint32) []byte { - usermem.ByteOrder.PutUint32(buf[len(buf):len(buf)+4], n) + hostarch.ByteOrder.PutUint32(buf[len(buf):len(buf)+4], n) return buf[:len(buf)+4] } @@ -242,7 +242,7 @@ func putCmsgStruct(buf []byte, msgLevel, msgType uint32, align uint, data interf hdrBuf := buf - buf = binary.Marshal(buf, usermem.ByteOrder, data) + buf = binary.Marshal(buf, hostarch.ByteOrder, data) // If the control message data brought us over capacity, omit it. if cap(buf) != cap(ob) { @@ -475,7 +475,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) } var h linux.ControlMessageHeader - binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageHeader], usermem.ByteOrder, &h) + binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageHeader], hostarch.ByteOrder, &h) if h.Length < uint64(linux.SizeOfControlMessageHeader) { return socket.ControlMessages{}, syserror.EINVAL @@ -499,7 +499,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) } for j := i; j < i+rightsSize; j += linux.SizeOfControlMessageRight { - fds = append(fds, int32(usermem.ByteOrder.Uint32(buf[j:j+linux.SizeOfControlMessageRight]))) + fds = append(fds, int32(hostarch.ByteOrder.Uint32(buf[j:j+linux.SizeOfControlMessageRight]))) } i += binary.AlignUp(length, width) @@ -510,7 +510,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) } var creds linux.ControlMessageCredentials - binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageCredentials], usermem.ByteOrder, &creds) + binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageCredentials], hostarch.ByteOrder, &creds) scmCreds, err := NewSCMCredentials(t, creds) if err != nil { return socket.ControlMessages{}, err @@ -523,7 +523,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) return socket.ControlMessages{}, syserror.EINVAL } var ts linux.Timeval - binary.Unmarshal(buf[i:i+linux.SizeOfTimeval], usermem.ByteOrder, &ts) + binary.Unmarshal(buf[i:i+linux.SizeOfTimeval], hostarch.ByteOrder, &ts) cmsgs.IP.Timestamp = ts.ToNsecCapped() cmsgs.IP.HasTimestamp = true i += binary.AlignUp(length, width) @@ -539,7 +539,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) return socket.ControlMessages{}, syserror.EINVAL } cmsgs.IP.HasTOS = true - binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageTOS], usermem.ByteOrder, &cmsgs.IP.TOS) + binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageTOS], hostarch.ByteOrder, &cmsgs.IP.TOS) i += binary.AlignUp(length, width) case linux.IP_PKTINFO: @@ -549,7 +549,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) cmsgs.IP.HasIPPacketInfo = true var packetInfo linux.ControlMessageIPPacketInfo - binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageIPPacketInfo], usermem.ByteOrder, &packetInfo) + binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageIPPacketInfo], hostarch.ByteOrder, &packetInfo) cmsgs.IP.PacketInfo = packetInfo i += binary.AlignUp(length, width) @@ -559,7 +559,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) if length < addr.SizeBytes() { return socket.ControlMessages{}, syserror.EINVAL } - binary.Unmarshal(buf[i:i+addr.SizeBytes()], usermem.ByteOrder, &addr) + binary.Unmarshal(buf[i:i+addr.SizeBytes()], hostarch.ByteOrder, &addr) cmsgs.IP.OriginalDstAddress = &addr i += binary.AlignUp(length, width) @@ -583,7 +583,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) return socket.ControlMessages{}, syserror.EINVAL } cmsgs.IP.HasTClass = true - binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageTClass], usermem.ByteOrder, &cmsgs.IP.TClass) + binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageTClass], hostarch.ByteOrder, &cmsgs.IP.TClass) i += binary.AlignUp(length, width) case linux.IPV6_RECVORIGDSTADDR: @@ -591,7 +591,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) if length < addr.SizeBytes() { return socket.ControlMessages{}, syserror.EINVAL } - binary.Unmarshal(buf[i:i+addr.SizeBytes()], usermem.ByteOrder, &addr) + binary.Unmarshal(buf[i:i+addr.SizeBytes()], hostarch.ByteOrder, &addr) cmsgs.IP.OriginalDstAddress = &addr i += binary.AlignUp(length, width) diff --git a/pkg/sentry/socket/control/control_test.go b/pkg/sentry/socket/control/control_test.go index d40a4cc85..7e28a0cef 100644 --- a/pkg/sentry/socket/control/control_test.go +++ b/pkg/sentry/socket/control/control_test.go @@ -22,8 +22,8 @@ import ( "github.com/google/go-cmp/cmp" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/usermem" ) func TestParse(t *testing.T) { @@ -35,12 +35,12 @@ func TestParse(t *testing.T) { Type: linux.SO_TIMESTAMP, } buf := make([]byte, 0, length) - buf = binary.Marshal(buf, usermem.ByteOrder, &hdr) + buf = binary.Marshal(buf, hostarch.ByteOrder, &hdr) ts := linux.Timeval{ Sec: 2401, Usec: 343, } - buf = binary.Marshal(buf, usermem.ByteOrder, &ts) + buf = binary.Marshal(buf, hostarch.ByteOrder, &ts) cmsg, err := Parse(nil, nil, buf, 8 /* width */) if err != nil { diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD index a8e6f172b..a5c2155a2 100644 --- a/pkg/sentry/socket/hostinet/BUILD +++ b/pkg/sentry/socket/hostinet/BUILD @@ -20,6 +20,7 @@ go_library( "//pkg/binary", "//pkg/context", "//pkg/fdnotifier", + "//pkg/hostarch", "//pkg/log", "//pkg/marshal", "//pkg/marshal/primitive", diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index 2d9dbbdba..a784e23b5 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" @@ -321,7 +322,7 @@ func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error { } // GetSockOpt implements socket.Socket.GetSockOpt. -func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { +func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { if outLen < 0 { return nil, syserr.ErrInvalidArgument } @@ -527,24 +528,24 @@ func parseUnixControlMessages(unixControlMessages []unix.SocketControlMessage) s switch unixCmsg.Header.Type { case linux.SO_TIMESTAMP: controlMessages.IP.HasTimestamp = true - binary.Unmarshal(unixCmsg.Data[:linux.SizeOfTimeval], usermem.ByteOrder, &controlMessages.IP.Timestamp) + binary.Unmarshal(unixCmsg.Data[:linux.SizeOfTimeval], hostarch.ByteOrder, &controlMessages.IP.Timestamp) } case linux.SOL_IP: switch unixCmsg.Header.Type { case linux.IP_TOS: controlMessages.IP.HasTOS = true - binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageTOS], usermem.ByteOrder, &controlMessages.IP.TOS) + binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageTOS], hostarch.ByteOrder, &controlMessages.IP.TOS) case linux.IP_PKTINFO: controlMessages.IP.HasIPPacketInfo = true var packetInfo linux.ControlMessageIPPacketInfo - binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageIPPacketInfo], usermem.ByteOrder, &packetInfo) + binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageIPPacketInfo], hostarch.ByteOrder, &packetInfo) controlMessages.IP.PacketInfo = packetInfo case linux.IP_RECVORIGDSTADDR: var addr linux.SockAddrInet - binary.Unmarshal(unixCmsg.Data[:addr.SizeBytes()], usermem.ByteOrder, &addr) + binary.Unmarshal(unixCmsg.Data[:addr.SizeBytes()], hostarch.ByteOrder, &addr) controlMessages.IP.OriginalDstAddress = &addr case unix.IP_RECVERR: @@ -557,11 +558,11 @@ func parseUnixControlMessages(unixControlMessages []unix.SocketControlMessage) s switch unixCmsg.Header.Type { case linux.IPV6_TCLASS: controlMessages.IP.HasTClass = true - binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageTClass], usermem.ByteOrder, &controlMessages.IP.TClass) + binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageTClass], hostarch.ByteOrder, &controlMessages.IP.TClass) case linux.IPV6_RECVORIGDSTADDR: var addr linux.SockAddrInet6 - binary.Unmarshal(unixCmsg.Data[:addr.SizeBytes()], usermem.ByteOrder, &addr) + binary.Unmarshal(unixCmsg.Data[:addr.SizeBytes()], hostarch.ByteOrder, &addr) controlMessages.IP.OriginalDstAddress = &addr case unix.IPV6_RECVERR: @@ -574,7 +575,7 @@ func parseUnixControlMessages(unixControlMessages []unix.SocketControlMessage) s switch unixCmsg.Header.Type { case linux.TCP_INQ: controlMessages.IP.HasInq = true - binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageInq], usermem.ByteOrder, &controlMessages.IP.Inq) + binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageInq], hostarch.ByteOrder, &controlMessages.IP.Inq) } } } @@ -688,7 +689,7 @@ func (s *socketOpsCommon) State() uint32 { return 0 } - binary.Unmarshal(buf, usermem.ByteOrder, &info) + binary.Unmarshal(buf, hostarch.ByteOrder, &info) return uint32(info.State) } diff --git a/pkg/sentry/socket/hostinet/socket_unsafe.go b/pkg/sentry/socket/hostinet/socket_unsafe.go index 2890e640d..d3be2d825 100644 --- a/pkg/sentry/socket/hostinet/socket_unsafe.go +++ b/pkg/sentry/socket/hostinet/socket_unsafe.go @@ -20,6 +20,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket" @@ -61,7 +62,7 @@ func ioctl(ctx context.Context, fd int, io usermem.IO, args arch.SyscallArgument return 0, translateIOSyscallError(errno) } var buf [4]byte - usermem.ByteOrder.PutUint32(buf[:], uint32(val)) + hostarch.ByteOrder.PutUint32(buf[:], uint32(val)) _, err := io.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{ AddressSpaceActive: true, }) diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go index 5bcf92e14..26e8ae17a 100644 --- a/pkg/sentry/socket/hostinet/stack.go +++ b/pkg/sentry/socket/hostinet/stack.go @@ -22,11 +22,13 @@ import ( "reflect" "strconv" "strings" + "syscall" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/syserr" @@ -146,7 +148,7 @@ func ExtractHostInterfaces(links []syscall.NetlinkMessage, addrs []syscall.Netli return fmt.Errorf("RTM_GETLINK returned RTM_NEWLINK message with invalid data length (%d bytes, expected at least %d bytes)", len(link.Data), unix.SizeofIfInfomsg) } var ifinfo unix.IfInfomsg - binary.Unmarshal(link.Data[:unix.SizeofIfInfomsg], usermem.ByteOrder, &ifinfo) + binary.Unmarshal(link.Data[:unix.SizeofIfInfomsg], hostarch.ByteOrder, &ifinfo) inetIF := inet.Interface{ DeviceType: ifinfo.Type, Flags: ifinfo.Flags, @@ -177,7 +179,7 @@ func ExtractHostInterfaces(links []syscall.NetlinkMessage, addrs []syscall.Netli return fmt.Errorf("RTM_GETADDR returned RTM_NEWADDR message with invalid data length (%d bytes, expected at least %d bytes)", len(addr.Data), unix.SizeofIfAddrmsg) } var ifaddr unix.IfAddrmsg - binary.Unmarshal(addr.Data[:unix.SizeofIfAddrmsg], usermem.ByteOrder, &ifaddr) + binary.Unmarshal(addr.Data[:unix.SizeofIfAddrmsg], hostarch.ByteOrder, &ifaddr) inetAddr := inet.InterfaceAddr{ Family: ifaddr.Family, PrefixLen: ifaddr.Prefixlen, @@ -209,7 +211,7 @@ func ExtractHostRoutes(routeMsgs []syscall.NetlinkMessage) ([]inet.Route, error) } var ifRoute unix.RtMsg - binary.Unmarshal(routeMsg.Data[:unix.SizeofRtMsg], usermem.ByteOrder, &ifRoute) + binary.Unmarshal(routeMsg.Data[:unix.SizeofRtMsg], hostarch.ByteOrder, &ifRoute) inetRoute := inet.Route{ Family: ifRoute.Family, DstLen: ifRoute.Dst_len, @@ -243,7 +245,7 @@ func ExtractHostRoutes(routeMsgs []syscall.NetlinkMessage) ([]inet.Route, error) if len(attr.Value) != expected { return nil, fmt.Errorf("RTM_GETROUTE returned RTM_NEWROUTE message with invalid attribute data length (%d bytes, expected %d bytes)", len(attr.Value), expected) } - binary.Unmarshal(attr.Value, usermem.ByteOrder, &inetRoute.OutputInterface) + binary.Unmarshal(attr.Value, hostarch.ByteOrder, &inetRoute.OutputInterface) } } diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD index 8aea0200f..4381dfa06 100644 --- a/pkg/sentry/socket/netfilter/BUILD +++ b/pkg/sentry/socket/netfilter/BUILD @@ -20,12 +20,12 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/binary", + "//pkg/hostarch", "//pkg/log", "//pkg/sentry/kernel", "//pkg/syserr", "//pkg/tcpip", "//pkg/tcpip/header", "//pkg/tcpip/stack", - "//pkg/usermem", ], ) diff --git a/pkg/sentry/socket/netfilter/extensions.go b/pkg/sentry/socket/netfilter/extensions.go index e339f9bea..4bd305a44 100644 --- a/pkg/sentry/socket/netfilter/extensions.go +++ b/pkg/sentry/socket/netfilter/extensions.go @@ -19,10 +19,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/stack" - "gvisor.dev/gvisor/pkg/usermem" ) // TODO(gvisor.dev/issue/170): The following per-matcher params should be @@ -89,7 +89,7 @@ func marshalEntryMatch(name string, data []byte) []byte { copy(matcher.Name[:], name) buf := make([]byte, 0, size) - buf = binary.Marshal(buf, usermem.ByteOrder, matcher) + buf = binary.Marshal(buf, hostarch.ByteOrder, matcher) return append(buf, make([]byte, size-len(buf))...) } diff --git a/pkg/sentry/socket/netfilter/ipv4.go b/pkg/sentry/socket/netfilter/ipv4.go index 2f913787b..1fc4cb651 100644 --- a/pkg/sentry/socket/netfilter/ipv4.go +++ b/pkg/sentry/socket/netfilter/ipv4.go @@ -19,11 +19,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" - "gvisor.dev/gvisor/pkg/usermem" ) // emptyIPv4Filter is for comparison with a rule's filters to determine whether @@ -142,7 +142,7 @@ func modifyEntries4(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, } var entry linux.IPTEntry buf := optVal[:linux.SizeOfIPTEntry] - binary.Unmarshal(buf, usermem.ByteOrder, &entry) + binary.Unmarshal(buf, hostarch.ByteOrder, &entry) initialOptValLen := len(optVal) optVal = optVal[linux.SizeOfIPTEntry:] diff --git a/pkg/sentry/socket/netfilter/ipv6.go b/pkg/sentry/socket/netfilter/ipv6.go index 263d9d3b5..67a52b628 100644 --- a/pkg/sentry/socket/netfilter/ipv6.go +++ b/pkg/sentry/socket/netfilter/ipv6.go @@ -19,11 +19,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" - "gvisor.dev/gvisor/pkg/usermem" ) // emptyIPv6Filter is for comparison with a rule's filters to determine whether @@ -145,7 +145,7 @@ func modifyEntries6(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, } var entry linux.IP6TEntry buf := optVal[:linux.SizeOfIP6TEntry] - binary.Unmarshal(buf, usermem.ByteOrder, &entry) + binary.Unmarshal(buf, hostarch.ByteOrder, &entry) initialOptValLen := len(optVal) optVal = optVal[linux.SizeOfIP6TEntry:] diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 7ae18b2a3..5200e08ed 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -23,12 +23,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/stack" - "gvisor.dev/gvisor/pkg/usermem" ) // enableLogging controls whether to log the (de)serialization of netfilter @@ -83,7 +83,7 @@ func DefaultLinuxTables() *stack.IPTables { } // GetInfo returns information about iptables. -func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, ipv6 bool) (linux.IPTGetinfo, *syserr.Error) { +func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr hostarch.Addr, ipv6 bool) (linux.IPTGetinfo, *syserr.Error) { // Read in the struct and table name. var info linux.IPTGetinfo if _, err := info.CopyIn(t, outPtr); err != nil { @@ -106,7 +106,7 @@ func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, ipv6 bool) } // GetEntries4 returns netstack's iptables rules. -func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) { +func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr hostarch.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) { // Read in the struct and table name. var userEntries linux.IPTGetEntries if _, err := userEntries.CopyIn(t, outPtr); err != nil { @@ -130,7 +130,7 @@ func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen } // GetEntries6 returns netstack's ip6tables rules. -func GetEntries6(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIP6TGetEntries, *syserr.Error) { +func GetEntries6(t *kernel.Task, stack *stack.Stack, outPtr hostarch.Addr, outLen int) (linux.KernelIP6TGetEntries, *syserr.Error) { // Read in the struct and table name. IPv4 and IPv6 utilize structs // with the same layout. var userEntries linux.IPTGetEntries @@ -179,7 +179,7 @@ func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error { var replace linux.IPTReplace replaceBuf := optVal[:linux.SizeOfIPTReplace] optVal = optVal[linux.SizeOfIPTReplace:] - binary.Unmarshal(replaceBuf, usermem.ByteOrder, &replace) + binary.Unmarshal(replaceBuf, hostarch.ByteOrder, &replace) // TODO(gvisor.dev/issue/170): Support other tables. var table stack.Table @@ -310,7 +310,7 @@ func parseMatchers(filter stack.IPHeaderFilter, optVal []byte) ([]stack.Matcher, } var match linux.XTEntryMatch buf := optVal[:linux.SizeOfXTEntryMatch] - binary.Unmarshal(buf, usermem.ByteOrder, &match) + binary.Unmarshal(buf, hostarch.ByteOrder, &match) nflog("set entries: parsed entry match %q: %+v", match.Name.String(), match) // Check some invariants. @@ -381,7 +381,7 @@ func hookFromLinux(hook int) stack.Hook { // TargetRevision returns a linux.XTGetRevision for a given target. It sets // Revision to the highest supported value, unless the provided revision number // is larger. -func TargetRevision(t *kernel.Task, revPtr usermem.Addr, netProto tcpip.NetworkProtocolNumber) (linux.XTGetRevision, *syserr.Error) { +func TargetRevision(t *kernel.Task, revPtr hostarch.Addr, netProto tcpip.NetworkProtocolNumber) (linux.XTGetRevision, *syserr.Error) { // Read in the target name and version. var rev linux.XTGetRevision if _, err := rev.CopyIn(t, revPtr); err != nil { diff --git a/pkg/sentry/socket/netfilter/owner_matcher.go b/pkg/sentry/socket/netfilter/owner_matcher.go index 5f80d82ea..b2cc6be20 100644 --- a/pkg/sentry/socket/netfilter/owner_matcher.go +++ b/pkg/sentry/socket/netfilter/owner_matcher.go @@ -19,8 +19,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/tcpip/stack" - "gvisor.dev/gvisor/pkg/usermem" ) const matcherNameOwner = "owner" @@ -60,7 +60,7 @@ func (ownerMarshaler) marshal(mr matcher) []byte { } buf := make([]byte, 0, linux.SizeOfIPTOwnerInfo) - return marshalEntryMatch(matcherNameOwner, binary.Marshal(buf, usermem.ByteOrder, iptOwnerInfo)) + return marshalEntryMatch(matcherNameOwner, binary.Marshal(buf, hostarch.ByteOrder, iptOwnerInfo)) } // unmarshal implements matchMaker.unmarshal. @@ -72,7 +72,7 @@ func (ownerMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack. // For alignment reasons, the match's total size may // exceed what's strictly necessary to hold matchData. var matchData linux.IPTOwnerInfo - binary.Unmarshal(buf[:linux.SizeOfIPTOwnerInfo], usermem.ByteOrder, &matchData) + binary.Unmarshal(buf[:linux.SizeOfIPTOwnerInfo], hostarch.ByteOrder, &matchData) nflog("parseMatchers: parsed IPTOwnerInfo: %+v", matchData) var owner OwnerMatcher diff --git a/pkg/sentry/socket/netfilter/targets.go b/pkg/sentry/socket/netfilter/targets.go index f2653d523..80f8c6430 100644 --- a/pkg/sentry/socket/netfilter/targets.go +++ b/pkg/sentry/socket/netfilter/targets.go @@ -19,11 +19,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" - "gvisor.dev/gvisor/pkg/usermem" ) // ErrorTargetName is used to mark targets as error targets. Error targets @@ -167,7 +167,7 @@ func (*standardTargetMaker) marshal(target target) []byte { } ret := make([]byte, 0, linux.SizeOfXTStandardTarget) - return binary.Marshal(ret, usermem.ByteOrder, xt) + return binary.Marshal(ret, hostarch.ByteOrder, xt) } func (*standardTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (target, *syserr.Error) { @@ -177,7 +177,7 @@ func (*standardTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) ( } var standardTarget linux.XTStandardTarget buf = buf[:linux.SizeOfXTStandardTarget] - binary.Unmarshal(buf, usermem.ByteOrder, &standardTarget) + binary.Unmarshal(buf, hostarch.ByteOrder, &standardTarget) if standardTarget.Verdict < 0 { // A Verdict < 0 indicates a non-jump verdict. @@ -223,7 +223,7 @@ func (*errorTargetMaker) marshal(target target) []byte { copy(xt.Target.Name[:], ErrorTargetName) ret := make([]byte, 0, linux.SizeOfXTErrorTarget) - return binary.Marshal(ret, usermem.ByteOrder, xt) + return binary.Marshal(ret, hostarch.ByteOrder, xt) } func (*errorTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (target, *syserr.Error) { @@ -233,7 +233,7 @@ func (*errorTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (tar } var errTgt linux.XTErrorTarget buf = buf[:linux.SizeOfXTErrorTarget] - binary.Unmarshal(buf, usermem.ByteOrder, &errTgt) + binary.Unmarshal(buf, hostarch.ByteOrder, &errTgt) // Error targets are used in 2 cases: // * An actual error case. These rules have an error named @@ -281,7 +281,7 @@ func (*redirectTargetMaker) marshal(target target) []byte { xt.NfRange.RangeIPV4.Flags |= linux.NF_NAT_RANGE_PROTO_SPECIFIED xt.NfRange.RangeIPV4.MinPort = htons(rt.Port) xt.NfRange.RangeIPV4.MaxPort = xt.NfRange.RangeIPV4.MinPort - return binary.Marshal(ret, usermem.ByteOrder, xt) + return binary.Marshal(ret, hostarch.ByteOrder, xt) } func (*redirectTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (target, *syserr.Error) { @@ -297,7 +297,7 @@ func (*redirectTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) ( var rt linux.XTRedirectTarget buf = buf[:linux.SizeOfXTRedirectTarget] - binary.Unmarshal(buf, usermem.ByteOrder, &rt) + binary.Unmarshal(buf, hostarch.ByteOrder, &rt) // Copy linux.XTRedirectTarget to stack.RedirectTarget. target := redirectTarget{RedirectTarget: stack.RedirectTarget{ @@ -372,7 +372,7 @@ func (*nfNATTargetMaker) marshal(target target) []byte { nt.Range.MaxProto = nt.Range.MinProto ret := make([]byte, 0, nfNATMarhsalledSize) - return binary.Marshal(ret, usermem.ByteOrder, nt) + return binary.Marshal(ret, hostarch.ByteOrder, nt) } func (*nfNATTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (target, *syserr.Error) { @@ -388,7 +388,7 @@ func (*nfNATTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (tar var natRange linux.NFNATRange buf = buf[linux.SizeOfXTEntryTarget:nfNATMarhsalledSize] - binary.Unmarshal(buf, usermem.ByteOrder, &natRange) + binary.Unmarshal(buf, hostarch.ByteOrder, &natRange) // We don't support port or address ranges. if natRange.MinAddr != natRange.MaxAddr { @@ -454,7 +454,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte, ipv6 bool) (stack.T } var target linux.XTEntryTarget buf := optVal[:linux.SizeOfXTEntryTarget] - binary.Unmarshal(buf, usermem.ByteOrder, &target) + binary.Unmarshal(buf, hostarch.ByteOrder, &target) return unmarshalTarget(target, filter, optVal) } @@ -487,11 +487,11 @@ func (jt *JumpTarget) Action(*stack.PacketBuffer, *stack.ConnTrack, stack.Hook, func ntohs(port uint16) uint16 { buf := make([]byte, 2) binary.BigEndian.PutUint16(buf, port) - return usermem.ByteOrder.Uint16(buf) + return hostarch.ByteOrder.Uint16(buf) } func htons(port uint16) uint16 { buf := make([]byte, 2) - usermem.ByteOrder.PutUint16(buf, port) + hostarch.ByteOrder.PutUint16(buf, port) return binary.BigEndian.Uint16(buf) } diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go index 678d6b578..69557f515 100644 --- a/pkg/sentry/socket/netfilter/tcp_matcher.go +++ b/pkg/sentry/socket/netfilter/tcp_matcher.go @@ -19,9 +19,9 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" - "gvisor.dev/gvisor/pkg/usermem" ) const matcherNameTCP = "tcp" @@ -48,7 +48,7 @@ func (tcpMarshaler) marshal(mr matcher) []byte { DestinationPortEnd: matcher.destinationPortEnd, } buf := make([]byte, 0, linux.SizeOfXTTCP) - return marshalEntryMatch(matcherNameTCP, binary.Marshal(buf, usermem.ByteOrder, xttcp)) + return marshalEntryMatch(matcherNameTCP, binary.Marshal(buf, hostarch.ByteOrder, xttcp)) } // unmarshal implements matchMaker.unmarshal. @@ -60,7 +60,7 @@ func (tcpMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Ma // For alignment reasons, the match's total size may // exceed what's strictly necessary to hold matchData. var matchData linux.XTTCP - binary.Unmarshal(buf[:linux.SizeOfXTTCP], usermem.ByteOrder, &matchData) + binary.Unmarshal(buf[:linux.SizeOfXTTCP], hostarch.ByteOrder, &matchData) nflog("parseMatchers: parsed XTTCP: %+v", matchData) if matchData.Option != 0 || diff --git a/pkg/sentry/socket/netfilter/udp_matcher.go b/pkg/sentry/socket/netfilter/udp_matcher.go index f8568873f..6a60e6bd6 100644 --- a/pkg/sentry/socket/netfilter/udp_matcher.go +++ b/pkg/sentry/socket/netfilter/udp_matcher.go @@ -19,9 +19,9 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" - "gvisor.dev/gvisor/pkg/usermem" ) const matcherNameUDP = "udp" @@ -48,7 +48,7 @@ func (udpMarshaler) marshal(mr matcher) []byte { DestinationPortEnd: matcher.destinationPortEnd, } buf := make([]byte, 0, linux.SizeOfXTUDP) - return marshalEntryMatch(matcherNameUDP, binary.Marshal(buf, usermem.ByteOrder, xtudp)) + return marshalEntryMatch(matcherNameUDP, binary.Marshal(buf, hostarch.ByteOrder, xtudp)) } // unmarshal implements matchMaker.unmarshal. @@ -60,7 +60,7 @@ func (udpMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Ma // For alignment reasons, the match's total size may exceed what's // strictly necessary to hold matchData. var matchData linux.XTUDP - binary.Unmarshal(buf[:linux.SizeOfXTUDP], usermem.ByteOrder, &matchData) + binary.Unmarshal(buf[:linux.SizeOfXTUDP], hostarch.ByteOrder, &matchData) nflog("parseMatchers: parsed XTUDP: %+v", matchData) if matchData.InverseFlags != 0 { diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 9313e1167..171b95c63 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -16,6 +16,7 @@ go_library( "//pkg/abi/linux", "//pkg/binary", "//pkg/context", + "//pkg/hostarch", "//pkg/marshal", "//pkg/marshal/primitive", "//pkg/sentry/arch", diff --git a/pkg/sentry/socket/netlink/message.go b/pkg/sentry/socket/netlink/message.go index 0899c61d1..ab0e68af7 100644 --- a/pkg/sentry/socket/netlink/message.go +++ b/pkg/sentry/socket/netlink/message.go @@ -20,7 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // alignPad returns the length of padding required for alignment. @@ -42,7 +42,7 @@ type Message struct { func NewMessage(hdr linux.NetlinkMessageHeader) *Message { return &Message{ hdr: hdr, - buf: binary.Marshal(nil, usermem.ByteOrder, hdr), + buf: binary.Marshal(nil, hostarch.ByteOrder, hdr), } } @@ -58,7 +58,7 @@ func ParseMessage(buf []byte) (msg *Message, rest []byte, ok bool) { return } var hdr linux.NetlinkMessageHeader - binary.Unmarshal(hdrBytes, usermem.ByteOrder, &hdr) + binary.Unmarshal(hdrBytes, hostarch.ByteOrder, &hdr) // Msg portion. totalMsgLen := int(hdr.Length) @@ -105,7 +105,7 @@ func (m *Message) GetData(msg interface{}) (AttrsView, bool) { if !ok { return nil, false } - binary.Unmarshal(msgBytes, usermem.ByteOrder, msg) + binary.Unmarshal(msgBytes, hostarch.ByteOrder, msg) numPad := alignPad(linux.NetlinkMessageHeaderSize+size, linux.NLMSG_ALIGNTO) // Linux permits the last message not being aligned, just consume all of it. @@ -126,7 +126,7 @@ func (m *Message) GetData(msg interface{}) (AttrsView, bool) { // calling Finalize. func (m *Message) Finalize() []byte { // Update length, which is the first 4 bytes of the header. - usermem.ByteOrder.PutUint32(m.buf, uint32(len(m.buf))) + hostarch.ByteOrder.PutUint32(m.buf, uint32(len(m.buf))) // Align the message. Note that the message length in the header (set // above) is the useful length of the message, not the total aligned @@ -146,7 +146,7 @@ func (m *Message) putZeros(n int) { // Put serializes v into the message. func (m *Message) Put(v interface{}) { - m.buf = binary.Marshal(m.buf, usermem.ByteOrder, v) + m.buf = binary.Marshal(m.buf, hostarch.ByteOrder, v) } // PutAttr adds v to the message as a netlink attribute. @@ -251,7 +251,7 @@ func (v AttrsView) ParseFirst() (hdr linux.NetlinkAttrHeader, value []byte, rest if !ok { return } - binary.Unmarshal(hdrBytes, usermem.ByteOrder, &hdr) + binary.Unmarshal(hdrBytes, hostarch.ByteOrder, &hdr) value, ok = b.Extract(int(hdr.Length) - linux.NetlinkAttrHeaderSize) if !ok { diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index d5ffc75ce..30c297149 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -222,7 +223,7 @@ func ExtractSockAddr(b []byte) (*linux.SockAddrNetlink, *syserr.Error) { } var sa linux.SockAddrNetlink - binary.Unmarshal(b[:linux.SockAddrNetlinkSize], usermem.ByteOrder, &sa) + binary.Unmarshal(b[:linux.SockAddrNetlinkSize], hostarch.ByteOrder, &sa) if sa.Family != linux.AF_NETLINK { return nil, syserr.ErrInvalidArgument @@ -327,7 +328,7 @@ func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error { } // GetSockOpt implements socket.Socket.GetSockOpt. -func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { +func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { switch level { case linux.SOL_SOCKET: switch name { @@ -388,7 +389,7 @@ func (s *socketOpsCommon) SetSockOpt(t *kernel.Task, level int, name int, opt [] if len(opt) < sizeOfInt32 { return syserr.ErrInvalidArgument } - size := usermem.ByteOrder.Uint32(opt) + size := hostarch.ByteOrder.Uint32(opt) if size < minSendBufferSize { size = minSendBufferSize } else if size > maxSendBufferSize { @@ -411,7 +412,7 @@ func (s *socketOpsCommon) SetSockOpt(t *kernel.Task, level int, name int, opt [] if len(opt) < sizeOfInt32 { return syserr.ErrInvalidArgument } - passcred := usermem.ByteOrder.Uint32(opt) + passcred := hostarch.ByteOrder.Uint32(opt) s.ep.SocketOptions().SetPassCred(passcred != 0) return nil diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index 244d99436..0b39a5b67 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -21,6 +21,7 @@ go_library( "//pkg/abi/linux", "//pkg/binary", "//pkg/context", + "//pkg/hostarch", "//pkg/log", "//pkg/marshal", "//pkg/marshal/primitive", diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 64e70ab9d..ed6572bab 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -37,6 +37,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" @@ -600,7 +601,7 @@ func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { return syserr.ErrInvalidArgument } - family := usermem.ByteOrder.Uint16(sockaddr) + family := hostarch.ByteOrder.Uint16(sockaddr) var addr tcpip.FullAddress // Bind for AF_PACKET requires only family, protocol and ifindex. @@ -611,7 +612,7 @@ func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { if len(sockaddr) < sockAddrLinkSize { return syserr.ErrInvalidArgument } - binary.Unmarshal(sockaddr[:sockAddrLinkSize], usermem.ByteOrder, &a) + binary.Unmarshal(sockaddr[:sockAddrLinkSize], hostarch.ByteOrder, &a) if a.Protocol != uint16(s.protocol) { return syserr.ErrInvalidArgument @@ -757,7 +758,7 @@ func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error { // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by // tcpip.Endpoint. -func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { +func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { // TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is // implemented specifically for netstack.SocketOperations rather than // commonEndpoint. commonEndpoint should be extended to support socket @@ -793,7 +794,7 @@ func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr us // GetSockOpt can be used to implement the linux syscall getsockopt(2) for // sockets backed by a commonEndpoint. -func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { +func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { switch level { case linux.SOL_SOCKET: return getSockOptSocket(t, s, ep, family, skType, name, outLen) @@ -1244,7 +1245,7 @@ func getSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name, } // getSockOptIPv6 implements GetSockOpt when level is SOL_IPV6. -func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { +func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { if _, ok := ep.(tcpip.Endpoint); !ok { log.Warningf("SOL_IPV6 options not supported on endpoints other than tcpip.Endpoint: option = %d", name) return nil, syserr.ErrUnknownProtocolOption @@ -1392,7 +1393,7 @@ func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name } // getSockOptIP implements GetSockOpt when level is SOL_IP. -func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr usermem.Addr, outLen int, family int) (marshal.Marshallable, *syserr.Error) { +func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr hostarch.Addr, outLen int, family int) (marshal.Marshallable, *syserr.Error) { if _, ok := ep.(tcpip.Endpoint); !ok { log.Warningf("SOL_IP options not supported on endpoints other than tcpip.Endpoint: option = %d", name) return nil, syserr.ErrUnknownProtocolOption @@ -1602,7 +1603,7 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa } s.readMu.Lock() defer s.readMu.Unlock() - s.sockOptTimestamp = usermem.ByteOrder.Uint32(optVal) != 0 + s.sockOptTimestamp = hostarch.ByteOrder.Uint32(optVal) != 0 return nil } if level == linux.SOL_TCP && name == linux.TCP_INQ { @@ -1611,7 +1612,7 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa } s.readMu.Lock() defer s.readMu.Unlock() - s.sockOptInq = usermem.ByteOrder.Uint32(optVal) != 0 + s.sockOptInq = hostarch.ByteOrder.Uint32(optVal) != 0 return nil } @@ -1659,7 +1660,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) ep.SocketOptions().SetSendBufferSize(int64(v), true) return nil @@ -1668,7 +1669,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.ReceiveBufferSizeOption, int(v))) case linux.SO_REUSEADDR: @@ -1676,7 +1677,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) ep.SocketOptions().SetReuseAddress(v != 0) return nil @@ -1685,7 +1686,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) ep.SocketOptions().SetReusePort(v != 0) return nil @@ -1714,7 +1715,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) ep.SocketOptions().SetBroadcast(v != 0) return nil @@ -1723,7 +1724,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) ep.SocketOptions().SetPassCred(v != 0) return nil @@ -1732,7 +1733,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) ep.SocketOptions().SetKeepAlive(v != 0) return nil @@ -1742,7 +1743,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam } var v linux.Timeval - binary.Unmarshal(optVal[:linux.SizeOfTimeval], usermem.ByteOrder, &v) + binary.Unmarshal(optVal[:linux.SizeOfTimeval], hostarch.ByteOrder, &v) if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { return syserr.ErrDomain } @@ -1755,7 +1756,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam } var v linux.Timeval - binary.Unmarshal(optVal[:linux.SizeOfTimeval], usermem.ByteOrder, &v) + binary.Unmarshal(optVal[:linux.SizeOfTimeval], hostarch.ByteOrder, &v) if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { return syserr.ErrDomain } @@ -1767,7 +1768,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) if v == 0 { socket.SetSockOptEmitUnimplementedEvent(t, name) @@ -1781,7 +1782,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) ep.SocketOptions().SetNoChecksum(v != 0) return nil @@ -1791,7 +1792,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam } var v linux.Linger - binary.Unmarshal(optVal[:linux.SizeOfLinger], usermem.ByteOrder, &v) + binary.Unmarshal(optVal[:linux.SizeOfLinger], hostarch.ByteOrder, &v) ep.SocketOptions().SetLinger(tcpip.LingerOption{ Enabled: v.OnOff != 0, @@ -1824,7 +1825,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) ep.SocketOptions().SetDelayOption(v == 0) return nil @@ -1833,7 +1834,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) ep.SocketOptions().SetCorkOption(v != 0) return nil @@ -1842,7 +1843,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) ep.SocketOptions().SetQuickAck(v != 0) return nil @@ -1851,7 +1852,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.MaxSegOption, int(v))) case linux.TCP_KEEPIDLE: @@ -1859,7 +1860,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) if v < 1 || v > linux.MAX_TCP_KEEPIDLE { return syserr.ErrInvalidArgument } @@ -1871,7 +1872,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) if v < 1 || v > linux.MAX_TCP_KEEPINTVL { return syserr.ErrInvalidArgument } @@ -1883,7 +1884,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) if v < 1 || v > linux.MAX_TCP_KEEPCNT { return syserr.ErrInvalidArgument } @@ -1894,7 +1895,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i return syserr.ErrInvalidArgument } - v := int32(usermem.ByteOrder.Uint32(optVal)) + v := int32(hostarch.ByteOrder.Uint32(optVal)) if v < 0 { return syserr.ErrInvalidArgument } @@ -1913,7 +1914,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i return syserr.ErrInvalidArgument } - v := int32(usermem.ByteOrder.Uint32(optVal)) + v := int32(hostarch.ByteOrder.Uint32(optVal)) opt := tcpip.TCPLingerTimeoutOption(time.Second * time.Duration(v)) return syserr.TranslateNetstackError(ep.SetSockOpt(&opt)) @@ -1921,7 +1922,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i if len(optVal) < sizeOfInt32 { return syserr.ErrInvalidArgument } - v := int32(usermem.ByteOrder.Uint32(optVal)) + v := int32(hostarch.ByteOrder.Uint32(optVal)) if v < 0 { v = 0 } @@ -1932,7 +1933,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i if len(optVal) < sizeOfInt32 { return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPSynCountOption, int(v))) @@ -1940,7 +1941,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i if len(optVal) < sizeOfInt32 { return syserr.ErrInvalidArgument } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPWindowClampOption, int(v))) @@ -1978,7 +1979,7 @@ func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name return syserr.ErrInvalidEndpointState } - v := usermem.ByteOrder.Uint32(optVal) + v := hostarch.ByteOrder.Uint32(optVal) ep.SocketOptions().SetV6Only(v != 0) return nil @@ -2024,7 +2025,7 @@ func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name if len(optVal) < sizeOfInt32 { return syserr.ErrInvalidArgument } - v := int32(usermem.ByteOrder.Uint32(optVal)) + v := int32(hostarch.ByteOrder.Uint32(optVal)) ep.SocketOptions().SetReceiveOriginalDstAddress(v != 0) return nil @@ -2033,7 +2034,7 @@ func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name if len(optVal) < sizeOfInt32 { return syserr.ErrInvalidArgument } - v := int32(usermem.ByteOrder.Uint32(optVal)) + v := int32(hostarch.ByteOrder.Uint32(optVal)) if v < -1 || v > 255 { return syserr.ErrInvalidArgument } @@ -2117,12 +2118,12 @@ func copyInMulticastRequest(optVal []byte, allowAddr bool) (linux.InetMulticastR if len(optVal) >= inetMulticastRequestWithNICSize { var req linux.InetMulticastRequestWithNIC - binary.Unmarshal(optVal[:inetMulticastRequestWithNICSize], usermem.ByteOrder, &req) + binary.Unmarshal(optVal[:inetMulticastRequestWithNICSize], hostarch.ByteOrder, &req) return req, nil } var req linux.InetMulticastRequestWithNIC - binary.Unmarshal(optVal[:inetMulticastRequestSize], usermem.ByteOrder, &req.InetMulticastRequest) + binary.Unmarshal(optVal[:inetMulticastRequestSize], hostarch.ByteOrder, &req.InetMulticastRequest) return req, nil } @@ -2132,7 +2133,7 @@ func copyInMulticastV6Request(optVal []byte) (linux.Inet6MulticastRequest, *syse } var req linux.Inet6MulticastRequest - binary.Unmarshal(optVal[:inet6MulticastRequestSize], usermem.ByteOrder, &req) + binary.Unmarshal(optVal[:inet6MulticastRequestSize], hostarch.ByteOrder, &req) return req, nil } @@ -2145,7 +2146,7 @@ func parseIntOrChar(buf []byte) (int32, *syserr.Error) { } if len(buf) >= sizeOfInt32 { - return int32(usermem.ByteOrder.Uint32(buf)), nil + return int32(hostarch.ByteOrder.Uint32(buf)), nil } return int32(buf[0]), nil @@ -3007,7 +3008,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe if arg == linux.SIOCGIFNAME { // Gets the name of the interface given the interface index // stored in ifr_ifindex. - index = int32(usermem.ByteOrder.Uint32(ifr.Data[:4])) + index = int32(hostarch.ByteOrder.Uint32(ifr.Data[:4])) if iface, ok := stack.Interfaces()[index]; ok { ifr.SetName(iface.Name) return nil @@ -3029,7 +3030,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe switch arg { case linux.SIOCGIFINDEX: // Copy out the index to the data. - usermem.ByteOrder.PutUint32(ifr.Data[:], uint32(index)) + hostarch.ByteOrder.PutUint32(ifr.Data[:], uint32(index)) case linux.SIOCGIFHWADDR: // Copy the hardware address out. @@ -3042,7 +3043,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe // sockaddr. sa_family contains the ARPHRD_* device type, // sa_data the L2 hardware address starting from byte 0. Setting // the hardware address is a privileged operation. - usermem.ByteOrder.PutUint16(ifr.Data[:], iface.DeviceType) + hostarch.ByteOrder.PutUint16(ifr.Data[:], iface.DeviceType) n := copy(ifr.Data[2:], iface.Addr) for i := 2 + n; i < len(ifr.Data); i++ { ifr.Data[i] = 0 // Clear padding. @@ -3055,7 +3056,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe } // Drop the flags that don't fit in the size that we need to return. This // matches Linux behavior. - usermem.ByteOrder.PutUint16(ifr.Data[:2], uint16(f)) + hostarch.ByteOrder.PutUint16(ifr.Data[:2], uint16(f)) case linux.SIOCGIFADDR: // Copy the IPv4 address out. @@ -3071,11 +3072,11 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe case linux.SIOCGIFMETRIC: // Gets the metric of the device. As per netdevice(7), this // always just sets ifr_metric to 0. - usermem.ByteOrder.PutUint32(ifr.Data[:4], 0) + hostarch.ByteOrder.PutUint32(ifr.Data[:4], 0) case linux.SIOCGIFMTU: // Gets the MTU of the device. - usermem.ByteOrder.PutUint32(ifr.Data[:4], iface.MTU) + hostarch.ByteOrder.PutUint32(ifr.Data[:4], iface.MTU) case linux.SIOCGIFMAP: // Gets the hardware parameters of the device. @@ -3101,8 +3102,8 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe continue } // Populate ifr.ifr_netmask (type sockaddr). - usermem.ByteOrder.PutUint16(ifr.Data[0:2], uint16(linux.AF_INET)) - usermem.ByteOrder.PutUint16(ifr.Data[2:4], 0) + hostarch.ByteOrder.PutUint16(ifr.Data[0:2], uint16(linux.AF_INET)) + hostarch.ByteOrder.PutUint16(ifr.Data[2:4], 0) var mask uint32 = 0xffffffff << (32 - addr.PrefixLen) // Netmask is expected to be returned as a big endian // value. @@ -3157,14 +3158,14 @@ func ifconfIoctl(ctx context.Context, t *kernel.Task, io usermem.IO, ifc *linux. // Populate ifr.ifr_addr. ifr := linux.IFReq{} ifr.SetName(iface.Name) - usermem.ByteOrder.PutUint16(ifr.Data[0:2], uint16(ifaceAddr.Family)) - usermem.ByteOrder.PutUint16(ifr.Data[2:4], 0) + hostarch.ByteOrder.PutUint16(ifr.Data[0:2], uint16(ifaceAddr.Family)) + hostarch.ByteOrder.PutUint16(ifr.Data[2:4], 0) copy(ifr.Data[4:8], ifaceAddr.Addr[:4]) // Copy the ifr to userspace. dst := uintptr(ifc.Ptr) + uintptr(ifc.Len) ifc.Len += int32(linux.SizeOfIFReq) - if _, err := ifr.CopyOut(t, usermem.Addr(dst)); err != nil { + if _, err := ifr.CopyOut(t, hostarch.Addr(dst)); err != nil { return err } } diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index fc29f8f13..30f3ad153 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -17,6 +17,7 @@ package netstack import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -197,7 +198,7 @@ func (s *SocketVFS2) Ioctl(ctx context.Context, uio usermem.IO, args arch.Syscal // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by // tcpip.Endpoint. -func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { +func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { // TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is // implemented specifically for netstack.SocketVFS2 rather than // commonEndpoint. commonEndpoint should be extended to support socket @@ -245,7 +246,7 @@ func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []by } s.readMu.Lock() defer s.readMu.Unlock() - s.sockOptTimestamp = usermem.ByteOrder.Uint32(optVal) != 0 + s.sockOptTimestamp = hostarch.ByteOrder.Uint32(optVal) != 0 return nil } if level == linux.SOL_TCP && name == linux.TCP_INQ { @@ -254,7 +255,7 @@ func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []by } s.readMu.Lock() defer s.readMu.Unlock() - s.sockOptInq = usermem.ByteOrder.Uint32(optVal) != 0 + s.sockOptInq = hostarch.ByteOrder.Uint32(optVal) != 0 return nil } diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go index 909341dcf..4c3d48096 100644 --- a/pkg/sentry/socket/socket.go +++ b/pkg/sentry/socket/socket.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -216,7 +217,7 @@ type SocketOps interface { Shutdown(t *kernel.Task, how int) *syserr.Error // GetSockOpt implements the getsockopt(2) linux unix. - GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) + GetSockOpt(t *kernel.Task, level int, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) // SetSockOpt implements the setsockopt(2) linux unix. SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error @@ -356,7 +357,7 @@ func NewDirent(ctx context.Context, d *device.Device) *fs.Dirent { Type: fs.Socket, DeviceID: d.DeviceID(), InodeID: ino, - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, }) // Dirent name matches net/socket.c:sockfs_dname. @@ -571,19 +572,19 @@ func UnmarshalSockAddr(family int, data []byte) linux.SockAddr { switch family { case unix.AF_INET: var addr linux.SockAddrInet - binary.Unmarshal(data[:unix.SizeofSockaddrInet4], usermem.ByteOrder, &addr) + binary.Unmarshal(data[:unix.SizeofSockaddrInet4], hostarch.ByteOrder, &addr) return &addr case unix.AF_INET6: var addr linux.SockAddrInet6 - binary.Unmarshal(data[:unix.SizeofSockaddrInet6], usermem.ByteOrder, &addr) + binary.Unmarshal(data[:unix.SizeofSockaddrInet6], hostarch.ByteOrder, &addr) return &addr case unix.AF_UNIX: var addr linux.SockAddrUnix - binary.Unmarshal(data[:unix.SizeofSockaddrUnix], usermem.ByteOrder, &addr) + binary.Unmarshal(data[:unix.SizeofSockaddrUnix], hostarch.ByteOrder, &addr) return &addr case unix.AF_NETLINK: var addr linux.SockAddrNetlink - binary.Unmarshal(data[:unix.SizeofSockaddrNetlink], usermem.ByteOrder, &addr) + binary.Unmarshal(data[:unix.SizeofSockaddrNetlink], hostarch.ByteOrder, &addr) return &addr default: panic(fmt.Sprintf("Unsupported socket family %v", family)) @@ -693,7 +694,7 @@ func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) { } // Get the rest of the fields based on the address family. - switch family := usermem.ByteOrder.Uint16(addr); family { + switch family := hostarch.ByteOrder.Uint16(addr); family { case linux.AF_UNIX: path := addr[2:] if len(path) > linux.UnixPathMax { @@ -715,7 +716,7 @@ func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) { if len(addr) < sockAddrInetSize { return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument } - binary.Unmarshal(addr[:sockAddrInetSize], usermem.ByteOrder, &a) + binary.Unmarshal(addr[:sockAddrInetSize], hostarch.ByteOrder, &a) out := tcpip.FullAddress{ Addr: BytesToIPAddress(a.Addr[:]), @@ -728,7 +729,7 @@ func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) { if len(addr) < sockAddrInet6Size { return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument } - binary.Unmarshal(addr[:sockAddrInet6Size], usermem.ByteOrder, &a) + binary.Unmarshal(addr[:sockAddrInet6Size], hostarch.ByteOrder, &a) out := tcpip.FullAddress{ Addr: BytesToIPAddress(a.Addr[:]), @@ -744,7 +745,7 @@ func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) { if len(addr) < sockAddrLinkSize { return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument } - binary.Unmarshal(addr[:sockAddrLinkSize], usermem.ByteOrder, &a) + binary.Unmarshal(addr[:sockAddrLinkSize], hostarch.ByteOrder, &a) if a.Family != linux.AF_PACKET || a.HardwareAddrLen != header.EthernetAddressSize { return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument } diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index ff53a26b7..c9cbefb3a 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -40,6 +40,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/fspath", + "//pkg/hostarch", "//pkg/log", "//pkg/marshal", "//pkg/refs", diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index b22f7973a..db7b1affe 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -192,7 +193,7 @@ func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by // a transport.Endpoint. -func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { +func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outPtr, outLen) } diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index 7890d1048..c39e317ff 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -18,6 +18,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" @@ -112,7 +113,7 @@ func (s *SocketVFS2) Release(ctx context.Context) { // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by // a transport.Endpoint. -func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { +func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outPtr, outLen) } diff --git a/pkg/sentry/strace/BUILD b/pkg/sentry/strace/BUILD index 1b7fd2232..2ebd77f82 100644 --- a/pkg/sentry/strace/BUILD +++ b/pkg/sentry/strace/BUILD @@ -28,6 +28,7 @@ go_library( "//pkg/binary", "//pkg/bits", "//pkg/eventchannel", + "//pkg/hostarch", "//pkg/marshal/primitive", "//pkg/seccomp", "//pkg/sentry/arch", @@ -35,7 +36,6 @@ go_library( "//pkg/sentry/socket", "//pkg/sentry/socket/netlink", "//pkg/sentry/syscalls/linux", - "//pkg/usermem", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/strace/epoll.go b/pkg/sentry/strace/epoll.go index ae3b998c8..48650e3f9 100644 --- a/pkg/sentry/strace/epoll.go +++ b/pkg/sentry/strace/epoll.go @@ -21,10 +21,11 @@ import ( "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) -func epollEvent(t *kernel.Task, eventAddr usermem.Addr) string { +func epollEvent(t *kernel.Task, eventAddr hostarch.Addr) string { var e linux.EpollEvent if _, err := e.CopyIn(t, eventAddr); err != nil { return fmt.Sprintf("%#x {error reading event: %v}", eventAddr, err) @@ -35,7 +36,7 @@ func epollEvent(t *kernel.Task, eventAddr usermem.Addr) string { return sb.String() } -func epollEvents(t *kernel.Task, eventsAddr usermem.Addr, numEvents, maxBytes uint64) string { +func epollEvents(t *kernel.Task, eventsAddr hostarch.Addr, numEvents, maxBytes uint64) string { var sb strings.Builder fmt.Fprintf(&sb, "%#x {", eventsAddr) addr := eventsAddr diff --git a/pkg/sentry/strace/poll.go b/pkg/sentry/strace/poll.go index 074e80f9b..572a8b50b 100644 --- a/pkg/sentry/strace/poll.go +++ b/pkg/sentry/strace/poll.go @@ -22,7 +22,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/kernel" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // PollEventSet is the set of poll(2) event flags. @@ -52,7 +53,7 @@ func pollFD(t *kernel.Task, pfd *linux.PollFD, post bool) string { return fmt.Sprintf("{FD: %s, Events: %s, REvents: %s}", fd(t, pfd.FD), PollEventSet.Parse(uint64(pfd.Events)), revents) } -func pollFDs(t *kernel.Task, addr usermem.Addr, nfds uint, post bool) string { +func pollFDs(t *kernel.Task, addr hostarch.Addr, nfds uint, post bool) string { if addr == 0 { return "null" } diff --git a/pkg/sentry/strace/select.go b/pkg/sentry/strace/select.go index 3a4c32aa0..e6e928157 100644 --- a/pkg/sentry/strace/select.go +++ b/pkg/sentry/strace/select.go @@ -19,7 +19,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) func fdsFromSet(t *kernel.Task, set []byte) []int { @@ -35,7 +36,7 @@ func fdsFromSet(t *kernel.Task, set []byte) []int { return fds } -func fdSet(t *kernel.Task, nfds int, addr usermem.Addr) string { +func fdSet(t *kernel.Task, nfds int, addr hostarch.Addr) string { if nfds < 0 { return fmt.Sprintf("%#x (negative nfds)", addr) } diff --git a/pkg/sentry/strace/signal.go b/pkg/sentry/strace/signal.go index c41f36e3f..e5b379a20 100644 --- a/pkg/sentry/strace/signal.go +++ b/pkg/sentry/strace/signal.go @@ -21,7 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // signalNames contains the names of all named signals. @@ -100,7 +101,7 @@ var sigActionFlags = abi.FlagSet{ }, } -func sigSet(t *kernel.Task, addr usermem.Addr) string { +func sigSet(t *kernel.Task, addr hostarch.Addr) string { if addr == 0 { return "null" } @@ -110,7 +111,7 @@ func sigSet(t *kernel.Task, addr usermem.Addr) string { return fmt.Sprintf("%#x (error copying sigset: %v)", addr, err) } - set := linux.SignalSet(usermem.ByteOrder.Uint64(b[:])) + set := linux.SignalSet(hostarch.ByteOrder.Uint64(b[:])) return fmt.Sprintf("%#x %s", addr, formatSigSet(set)) } @@ -124,7 +125,7 @@ func formatSigSet(set linux.SignalSet) string { return fmt.Sprintf("[%v]", strings.Join(signals, " ")) } -func sigAction(t *kernel.Task, addr usermem.Addr) string { +func sigAction(t *kernel.Task, addr hostarch.Addr) string { if addr == 0 { return "null" } diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go index d943a7cb1..e5b7f9b96 100644 --- a/pkg/sentry/strace/socket.go +++ b/pkg/sentry/strace/socket.go @@ -26,7 +26,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/netlink" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // SocketFamily are the possible socket(2) families. @@ -161,7 +162,7 @@ var controlMessageType = map[int32]string{ linux.SO_TIMESTAMP: "SO_TIMESTAMP", } -func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64) string { +func cmsghdr(t *kernel.Task, addr hostarch.Addr, length uint64, maxBytes uint64) string { if length > maxBytes { return fmt.Sprintf("%#x (error decoding control: invalid length (%d))", addr, length) } @@ -180,7 +181,7 @@ func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64) } var h linux.ControlMessageHeader - binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageHeader], usermem.ByteOrder, &h) + binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageHeader], hostarch.ByteOrder, &h) var skipData bool level := "SOL_SOCKET" @@ -230,7 +231,7 @@ func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64) numRights := rightsSize / linux.SizeOfControlMessageRight fds := make(linux.ControlMessageRights, numRights) - binary.Unmarshal(buf[i:i+rightsSize], usermem.ByteOrder, &fds) + binary.Unmarshal(buf[i:i+rightsSize], hostarch.ByteOrder, &fds) rights := make([]string, 0, len(fds)) for _, fd := range fds { @@ -257,7 +258,7 @@ func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64) } var creds linux.ControlMessageCredentials - binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageCredentials], usermem.ByteOrder, &creds) + binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageCredentials], hostarch.ByteOrder, &creds) strs = append(strs, fmt.Sprintf( "{level=%s, type=%s, length=%d, pid: %d, uid: %d, gid: %d}", @@ -281,7 +282,7 @@ func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64) } var tv linux.Timeval - binary.Unmarshal(buf[i:i+linux.SizeOfTimeval], usermem.ByteOrder, &tv) + binary.Unmarshal(buf[i:i+linux.SizeOfTimeval], hostarch.ByteOrder, &tv) strs = append(strs, fmt.Sprintf( "{level=%s, type=%s, length=%d, Sec: %d, Usec: %d}", @@ -301,7 +302,7 @@ func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64) return fmt.Sprintf("%#x %s", addr, strings.Join(strs, ", ")) } -func msghdr(t *kernel.Task, addr usermem.Addr, printContent bool, maxBytes uint64) string { +func msghdr(t *kernel.Task, addr hostarch.Addr, printContent bool, maxBytes uint64) string { var msg slinux.MessageHeader64 if _, err := msg.CopyIn(t, addr); err != nil { return fmt.Sprintf("%#x (error decoding msghdr: %v)", addr, err) @@ -311,17 +312,17 @@ func msghdr(t *kernel.Task, addr usermem.Addr, printContent bool, maxBytes uint6 addr, msg.Name, msg.NameLen, - iovecs(t, usermem.Addr(msg.Iov), int(msg.IovLen), printContent, maxBytes), + iovecs(t, hostarch.Addr(msg.Iov), int(msg.IovLen), printContent, maxBytes), ) if printContent { - s = fmt.Sprintf("%s, control={%s}", s, cmsghdr(t, usermem.Addr(msg.Control), msg.ControlLen, maxBytes)) + s = fmt.Sprintf("%s, control={%s}", s, cmsghdr(t, hostarch.Addr(msg.Control), msg.ControlLen, maxBytes)) } else { s = fmt.Sprintf("%s, control=%#x, control_len=%d", s, msg.Control, msg.ControlLen) } return fmt.Sprintf("%s, flags=%d}", s, msg.Flags) } -func sockAddr(t *kernel.Task, addr usermem.Addr, length uint32) string { +func sockAddr(t *kernel.Task, addr hostarch.Addr, length uint32) string { if addr == 0 { return "null" } @@ -335,7 +336,7 @@ func sockAddr(t *kernel.Task, addr usermem.Addr, length uint32) string { if len(b) < 2 { return fmt.Sprintf("%#x {address too short: %d bytes}", addr, len(b)) } - family := usermem.ByteOrder.Uint16(b) + family := hostarch.ByteOrder.Uint16(b) familyStr := SocketFamily.Parse(uint64(family)) @@ -362,7 +363,7 @@ func sockAddr(t *kernel.Task, addr usermem.Addr, length uint32) string { } } -func postSockAddr(t *kernel.Task, addr usermem.Addr, lengthPtr usermem.Addr) string { +func postSockAddr(t *kernel.Task, addr hostarch.Addr, lengthPtr hostarch.Addr) string { if addr == 0 { return "null" } @@ -379,14 +380,14 @@ func postSockAddr(t *kernel.Task, addr usermem.Addr, lengthPtr usermem.Addr) str return sockAddr(t, addr, l) } -func copySockLen(t *kernel.Task, addr usermem.Addr) (uint32, error) { +func copySockLen(t *kernel.Task, addr hostarch.Addr) (uint32, error) { // socklen_t is 32-bits. var l primitive.Uint32 _, err := l.CopyIn(t, addr) return uint32(l), err } -func sockLenPointer(t *kernel.Task, addr usermem.Addr) string { +func sockLenPointer(t *kernel.Task, addr hostarch.Addr) string { if addr == 0 { return "null" } @@ -420,7 +421,7 @@ func sockFlags(flags int32) string { return SocketFlagSet.Parse(uint64(flags)) } -func getSockOptVal(t *kernel.Task, level, optname uint64, optVal usermem.Addr, optLen usermem.Addr, maximumBlobSize uint, rval uintptr) string { +func getSockOptVal(t *kernel.Task, level, optname uint64, optVal hostarch.Addr, optLen hostarch.Addr, maximumBlobSize uint, rval uintptr) string { if int(rval) < 0 { return hexNum(uint64(optVal)) } @@ -434,7 +435,7 @@ func getSockOptVal(t *kernel.Task, level, optname uint64, optVal usermem.Addr, o return sockOptVal(t, level, optname, optVal, uint64(l), maximumBlobSize) } -func sockOptVal(t *kernel.Task, level, optname uint64, optVal usermem.Addr, optLen uint64, maximumBlobSize uint) string { +func sockOptVal(t *kernel.Task, level, optname uint64, optVal hostarch.Addr, optLen uint64, maximumBlobSize uint) string { switch optLen { case 1: var v primitive.Uint8 diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go index 396744597..ec5d5f846 100644 --- a/pkg/sentry/strace/strace.go +++ b/pkg/sentry/strace/strace.go @@ -32,7 +32,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" pb "gvisor.dev/gvisor/pkg/sentry/strace/strace_go_proto" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // DefaultLogMaximumSize is the default LogMaximumSize. @@ -62,7 +63,7 @@ func hexArg(arg arch.SyscallArgument) string { return hexNum(arg.Uint64()) } -func iovecs(t *kernel.Task, addr usermem.Addr, iovcnt int, printContent bool, maxBytes uint64) string { +func iovecs(t *kernel.Task, addr hostarch.Addr, iovcnt int, printContent bool, maxBytes uint64) string { if iovcnt < 0 || iovcnt > linux.UIO_MAXIOV { return fmt.Sprintf("%#x (error decoding iovecs: invalid iovcnt)", addr) } @@ -107,7 +108,7 @@ func iovecs(t *kernel.Task, addr usermem.Addr, iovcnt int, printContent bool, ma return fmt.Sprintf("%#x %s", addr, strings.Join(iovs, ", ")) } -func dump(t *kernel.Task, addr usermem.Addr, size uint, maximumBlobSize uint) string { +func dump(t *kernel.Task, addr hostarch.Addr, size uint, maximumBlobSize uint) string { origSize := size if size > maximumBlobSize { size = maximumBlobSize @@ -131,7 +132,7 @@ func dump(t *kernel.Task, addr usermem.Addr, size uint, maximumBlobSize uint) st return fmt.Sprintf("%#x %q%s", addr, b[:amt], dot) } -func path(t *kernel.Task, addr usermem.Addr) string { +func path(t *kernel.Task, addr hostarch.Addr) string { path, err := t.CopyInString(addr, linux.PATH_MAX) if err != nil { return fmt.Sprintf("%#x (error decoding path: %s)", addr, err) @@ -196,7 +197,7 @@ func fdVFS2(t *kernel.Task, fd int32) string { return fmt.Sprintf("%#x %s", fd, name) } -func fdpair(t *kernel.Task, addr usermem.Addr) string { +func fdpair(t *kernel.Task, addr hostarch.Addr) string { var fds [2]int32 _, err := primitive.CopyInt32SliceIn(t, addr, fds[:]) if err != nil { @@ -206,7 +207,7 @@ func fdpair(t *kernel.Task, addr usermem.Addr) string { return fmt.Sprintf("%#x [%d %d]", addr, fds[0], fds[1]) } -func uname(t *kernel.Task, addr usermem.Addr) string { +func uname(t *kernel.Task, addr hostarch.Addr) string { var u linux.UtsName if _, err := u.CopyIn(t, addr); err != nil { return fmt.Sprintf("%#x (error decoding utsname: %s)", addr, err) @@ -215,7 +216,7 @@ func uname(t *kernel.Task, addr usermem.Addr) string { return fmt.Sprintf("%#x %s", addr, u) } -func utimensTimespec(t *kernel.Task, addr usermem.Addr) string { +func utimensTimespec(t *kernel.Task, addr hostarch.Addr) string { if addr == 0 { return "null" } @@ -237,7 +238,7 @@ func utimensTimespec(t *kernel.Task, addr usermem.Addr) string { return fmt.Sprintf("%#x {sec=%v nsec=%s}", addr, tim.Sec, ns) } -func timespec(t *kernel.Task, addr usermem.Addr) string { +func timespec(t *kernel.Task, addr hostarch.Addr) string { if addr == 0 { return "null" } @@ -249,7 +250,7 @@ func timespec(t *kernel.Task, addr usermem.Addr) string { return fmt.Sprintf("%#x {sec=%v nsec=%v}", addr, tim.Sec, tim.Nsec) } -func timeval(t *kernel.Task, addr usermem.Addr) string { +func timeval(t *kernel.Task, addr hostarch.Addr) string { if addr == 0 { return "null" } @@ -262,7 +263,7 @@ func timeval(t *kernel.Task, addr usermem.Addr) string { return fmt.Sprintf("%#x {sec=%v usec=%v}", addr, tim.Sec, tim.Usec) } -func utimbuf(t *kernel.Task, addr usermem.Addr) string { +func utimbuf(t *kernel.Task, addr hostarch.Addr) string { if addr == 0 { return "null" } @@ -275,7 +276,7 @@ func utimbuf(t *kernel.Task, addr usermem.Addr) string { return fmt.Sprintf("%#x {actime=%v, modtime=%v}", addr, utim.Actime, utim.Modtime) } -func stat(t *kernel.Task, addr usermem.Addr) string { +func stat(t *kernel.Task, addr hostarch.Addr) string { if addr == 0 { return "null" } @@ -287,27 +288,27 @@ func stat(t *kernel.Task, addr usermem.Addr) string { return fmt.Sprintf("%#x {dev=%d, ino=%d, mode=%s, nlink=%d, uid=%d, gid=%d, rdev=%d, size=%d, blksize=%d, blocks=%d, atime=%s, mtime=%s, ctime=%s}", addr, stat.Dev, stat.Ino, linux.FileMode(stat.Mode), stat.Nlink, stat.UID, stat.GID, stat.Rdev, stat.Size, stat.Blksize, stat.Blocks, time.Unix(stat.ATime.Sec, stat.ATime.Nsec), time.Unix(stat.MTime.Sec, stat.MTime.Nsec), time.Unix(stat.CTime.Sec, stat.CTime.Nsec)) } -func itimerval(t *kernel.Task, addr usermem.Addr) string { +func itimerval(t *kernel.Task, addr hostarch.Addr) string { if addr == 0 { return "null" } interval := timeval(t, addr) - value := timeval(t, addr+usermem.Addr((*linux.Timeval)(nil).SizeBytes())) + value := timeval(t, addr+hostarch.Addr((*linux.Timeval)(nil).SizeBytes())) return fmt.Sprintf("%#x {interval=%s, value=%s}", addr, interval, value) } -func itimerspec(t *kernel.Task, addr usermem.Addr) string { +func itimerspec(t *kernel.Task, addr hostarch.Addr) string { if addr == 0 { return "null" } interval := timespec(t, addr) - value := timespec(t, addr+usermem.Addr((*linux.Timespec)(nil).SizeBytes())) + value := timespec(t, addr+hostarch.Addr((*linux.Timespec)(nil).SizeBytes())) return fmt.Sprintf("%#x {interval=%s, value=%s}", addr, interval, value) } -func stringVector(t *kernel.Task, addr usermem.Addr) string { +func stringVector(t *kernel.Task, addr hostarch.Addr) string { vec, err := t.CopyInVector(addr, slinux.ExecMaxElemSize, slinux.ExecMaxTotalSize) if err != nil { return fmt.Sprintf("%#x {error copying vector: %v}", addr, err) @@ -323,7 +324,7 @@ func stringVector(t *kernel.Task, addr usermem.Addr) string { return s } -func rusage(t *kernel.Task, addr usermem.Addr) string { +func rusage(t *kernel.Task, addr hostarch.Addr) string { if addr == 0 { return "null" } @@ -335,7 +336,7 @@ func rusage(t *kernel.Task, addr usermem.Addr) string { return fmt.Sprintf("%#x %+v", addr, ru) } -func capHeader(t *kernel.Task, addr usermem.Addr) string { +func capHeader(t *kernel.Task, addr hostarch.Addr) string { if addr == 0 { return "null" } @@ -360,7 +361,7 @@ func capHeader(t *kernel.Task, addr usermem.Addr) string { return fmt.Sprintf("%#x {Version: %s, Pid: %d}", addr, version, hdr.Pid) } -func capData(t *kernel.Task, hdrAddr, dataAddr usermem.Addr) string { +func capData(t *kernel.Task, hdrAddr, dataAddr hostarch.Addr) string { if dataAddr == 0 { return "null" } diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 3dcf36a96..408a6c422 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -64,6 +64,7 @@ go_library( "//pkg/abi/linux", "//pkg/bpf", "//pkg/context", + "//pkg/hostarch", "//pkg/log", "//pkg/marshal", "//pkg/marshal/primitive", diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index ac53a0c0e..2d2212605 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -18,11 +18,11 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/syscalls" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) const ( @@ -405,7 +405,7 @@ var AMD64 = &kernel.SyscallTable{ 434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil), 435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil), }, - Emulate: map[usermem.Addr]uintptr{ + Emulate: map[hostarch.Addr]uintptr{ 0xffffffffff600000: 96, // vsyscall gettimeofday(2) 0xffffffffff600400: 201, // vsyscall time(2) 0xffffffffff600800: 309, // vsyscall getcpu(2) @@ -723,7 +723,7 @@ var ARM64 = &kernel.SyscallTable{ 434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil), 435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil), }, - Emulate: map[usermem.Addr]uintptr{}, + Emulate: map[hostarch.Addr]uintptr{}, Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) { t.Kernel().EmitUnimplementedEvent(t) return 0, syserror.ENOSYS diff --git a/pkg/sentry/syscalls/linux/sigset.go b/pkg/sentry/syscalls/linux/sigset.go index 434559b80..e8c2d8f9e 100644 --- a/pkg/sentry/syscalls/linux/sigset.go +++ b/pkg/sentry/syscalls/linux/sigset.go @@ -16,9 +16,9 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // CopyInSigSet copies in a sigset_t, checks its size, and ensures that KILL and @@ -27,7 +27,7 @@ import ( // TODO(gvisor.dev/issue/1624): This is only exported because // syscalls/vfs2/signal.go depends on it. Once vfs1 is deleted and the vfs2 // syscalls are moved into this package, then they can be unexported. -func CopyInSigSet(t *kernel.Task, sigSetAddr usermem.Addr, size uint) (linux.SignalSet, error) { +func CopyInSigSet(t *kernel.Task, sigSetAddr hostarch.Addr, size uint) (linux.SignalSet, error) { if size != linux.SignalSetSize { return 0, syserror.EINVAL } @@ -35,14 +35,14 @@ func CopyInSigSet(t *kernel.Task, sigSetAddr usermem.Addr, size uint) (linux.Sig if _, err := t.CopyInBytes(sigSetAddr, b); err != nil { return 0, err } - mask := usermem.ByteOrder.Uint64(b[:]) + mask := hostarch.ByteOrder.Uint64(b[:]) return linux.SignalSet(mask) &^ kernel.UnblockableSignals, nil } // copyOutSigSet copies out a sigset_t. -func copyOutSigSet(t *kernel.Task, sigSetAddr usermem.Addr, mask linux.SignalSet) error { +func copyOutSigSet(t *kernel.Task, sigSetAddr hostarch.Addr, mask linux.SignalSet) error { b := t.CopyScratchBuffer(8) - usermem.ByteOrder.PutUint64(b, uint64(mask)) + hostarch.ByteOrder.PutUint64(b, uint64(mask)) _, err := t.CopyOutBytes(sigSetAddr, b) return err } @@ -55,15 +55,15 @@ func copyOutSigSet(t *kernel.Task, sigSetAddr usermem.Addr, mask linux.SignalSet // }; // // and returns sigset_addr and size. -func copyInSigSetWithSize(t *kernel.Task, addr usermem.Addr) (usermem.Addr, uint, error) { +func copyInSigSetWithSize(t *kernel.Task, addr hostarch.Addr) (hostarch.Addr, uint, error) { switch t.Arch().Width() { case 8: in := t.CopyScratchBuffer(16) if _, err := t.CopyInBytes(addr, in); err != nil { return 0, 0, err } - maskAddr := usermem.Addr(usermem.ByteOrder.Uint64(in[0:])) - maskSize := uint(usermem.ByteOrder.Uint64(in[8:])) + maskAddr := hostarch.Addr(hostarch.ByteOrder.Uint64(in[0:])) + maskSize := uint(hostarch.ByteOrder.Uint64(in[8:])) return maskAddr, maskSize, nil default: return 0, 0, syserror.ENOSYS diff --git a/pkg/sentry/syscalls/linux/sys_aio.go b/pkg/sentry/syscalls/linux/sys_aio.go index c2285f796..70e8569a8 100644 --- a/pkg/sentry/syscalls/linux/sys_aio.go +++ b/pkg/sentry/syscalls/linux/sys_aio.go @@ -17,6 +17,7 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -152,7 +153,7 @@ func IoGetevents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S } // Keep rolling. - eventsAddr += usermem.Addr(linux.IOEventSize) + eventsAddr += hostarch.Addr(linux.IOEventSize) } // Everything finished. @@ -191,12 +192,12 @@ func memoryFor(t *kernel.Task, cb *linux.IOCallback) (usermem.IOSequence, error) // I/O. switch cb.OpCode { case linux.IOCB_CMD_PREAD, linux.IOCB_CMD_PWRITE: - return t.SingleIOSequence(usermem.Addr(cb.Buf), bytes, usermem.IOOpts{ + return t.SingleIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{ AddressSpaceActive: false, }) case linux.IOCB_CMD_PREADV, linux.IOCB_CMD_PWRITEV: - return t.IovecsIOSequence(usermem.Addr(cb.Buf), bytes, usermem.IOOpts{ + return t.IovecsIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{ AddressSpaceActive: false, }) @@ -219,7 +220,7 @@ func IoCancel(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // LINT.IfChange -func getAIOCallback(t *kernel.Task, file *fs.File, cbAddr usermem.Addr, cb *linux.IOCallback, ioseq usermem.IOSequence, actx *mm.AIOContext, eventFile *fs.File) kernel.AIOCallback { +func getAIOCallback(t *kernel.Task, file *fs.File, cbAddr hostarch.Addr, cb *linux.IOCallback, ioseq usermem.IOSequence, actx *mm.AIOContext, eventFile *fs.File) kernel.AIOCallback { return func(ctx context.Context) { if actx.Dead() { actx.CancelPendingRequest() @@ -264,7 +265,7 @@ func getAIOCallback(t *kernel.Task, file *fs.File, cbAddr usermem.Addr, cb *linu } // submitCallback processes a single callback. -func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr usermem.Addr) error { +func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr hostarch.Addr) error { file := t.GetFile(cb.FD) if file == nil { // File not found. @@ -339,7 +340,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc for i := int32(0); i < nrEvents; i++ { // Copy in the callback address. - var cbAddr usermem.Addr + var cbAddr hostarch.Addr switch t.Arch().Width() { case 8: var cbAddrP primitive.Uint64 @@ -351,7 +352,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Nothing done. return 0, nil, err } - cbAddr = usermem.Addr(cbAddrP) + cbAddr = hostarch.Addr(cbAddrP) default: return 0, nil, syserror.ENOSYS } @@ -379,7 +380,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } // Advance to the next one. - addr += usermem.Addr(t.Arch().Width()) + addr += hostarch.Addr(t.Arch().Width()) } return uintptr(nrEvents), nil, nil diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index fd9649340..9cd238efd 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -18,6 +18,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -29,7 +30,6 @@ import ( ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // fileOpAt performs an operation on the second last component in the path. @@ -115,7 +115,7 @@ func fileOpOn(t *kernel.Task, dirFD int32, path string, resolve bool, fn func(ro } // copyInPath copies a path in. -func copyInPath(t *kernel.Task, addr usermem.Addr, allowEmpty bool) (path string, dirPath bool, err error) { +func copyInPath(t *kernel.Task, addr hostarch.Addr, allowEmpty bool) (path string, dirPath bool, err error) { path, err = t.CopyInString(addr, linux.PATH_MAX) if err != nil { return "", false, err @@ -133,7 +133,7 @@ func copyInPath(t *kernel.Task, addr usermem.Addr, allowEmpty bool) (path string // LINT.IfChange -func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uintptr, err error) { +func openAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint) (fd uintptr, err error) { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return 0, err @@ -208,7 +208,7 @@ func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uint return fd, err // Use result in frame. } -func mknodAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode) error { +func mknodAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode linux.FileMode) error { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err @@ -301,7 +301,7 @@ func Mknodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca return 0, nil, mknodAt(t, dirFD, path, mode) } -func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode linux.FileMode) (fd uintptr, err error) { +func createAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint, mode linux.FileMode) (fd uintptr, err error) { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return 0, err @@ -515,7 +515,7 @@ func (ac accessContext) Value(key interface{}) interface{} { } } -func accessAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode uint) error { +func accessAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode uint) error { const rOK = 4 const wOK = 2 const xOK = 1 @@ -694,7 +694,7 @@ func Getcwd(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal } // Top it off with a terminator. - _, err = t.CopyOutBytes(addr+usermem.Addr(bytes), []byte("\x00")) + _, err = t.CopyOutBytes(addr+hostarch.Addr(bytes), []byte("\x00")) return uintptr(bytes + 1), nil, err } @@ -1164,7 +1164,7 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // LINT.IfChange -func mkdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode) error { +func mkdirAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode linux.FileMode) error { path, _, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err @@ -1216,7 +1216,7 @@ func Mkdirat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca return 0, nil, mkdirAt(t, dirFD, addr, mode) } -func rmdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr) error { +func rmdirAt(t *kernel.Task, dirFD int32, addr hostarch.Addr) error { path, _, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err @@ -1256,7 +1256,7 @@ func Rmdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, rmdirAt(t, linux.AT_FDCWD, addr) } -func symlinkAt(t *kernel.Task, dirFD int32, newAddr usermem.Addr, oldAddr usermem.Addr) error { +func symlinkAt(t *kernel.Task, dirFD int32, newAddr hostarch.Addr, oldAddr hostarch.Addr) error { newPath, dirPath, err := copyInPath(t, newAddr, false /* allowEmpty */) if err != nil { return err @@ -1341,7 +1341,7 @@ func mayLinkAt(t *kernel.Task, target *fs.Inode) error { // linkAt creates a hard link to the target specified by oldDirFD and oldAddr, // specified by newDirFD and newAddr. If resolve is true, then the symlinks // will be followed when evaluating the target. -func linkAt(t *kernel.Task, oldDirFD int32, oldAddr usermem.Addr, newDirFD int32, newAddr usermem.Addr, resolve, allowEmpty bool) error { +func linkAt(t *kernel.Task, oldDirFD int32, oldAddr hostarch.Addr, newDirFD int32, newAddr hostarch.Addr, resolve, allowEmpty bool) error { oldPath, _, err := copyInPath(t, oldAddr, allowEmpty) if err != nil { return err @@ -1448,7 +1448,7 @@ func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // LINT.IfChange -func readlinkAt(t *kernel.Task, dirFD int32, addr usermem.Addr, bufAddr usermem.Addr, size uint) (copied uintptr, err error) { +func readlinkAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, bufAddr hostarch.Addr, size uint) (copied uintptr, err error) { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return 0, err @@ -1511,7 +1511,7 @@ func Readlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // LINT.IfChange -func unlinkAt(t *kernel.Task, dirFD int32, addr usermem.Addr) error { +func unlinkAt(t *kernel.Task, dirFD int32, addr hostarch.Addr) error { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err @@ -1728,7 +1728,7 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error { return nil } -func chownAt(t *kernel.Task, fd int32, addr usermem.Addr, resolve, allowEmpty bool, uid auth.UID, gid auth.GID) error { +func chownAt(t *kernel.Task, fd int32, addr hostarch.Addr, resolve, allowEmpty bool, uid auth.UID, gid auth.GID) error { path, _, err := copyInPath(t, addr, allowEmpty) if err != nil { return err @@ -1815,7 +1815,7 @@ func chmod(t *kernel.Task, d *fs.Dirent, mode linux.FileMode) error { return nil } -func chmodAt(t *kernel.Task, fd int32, addr usermem.Addr, mode linux.FileMode) error { +func chmodAt(t *kernel.Task, fd int32, addr hostarch.Addr, mode linux.FileMode) error { path, _, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err @@ -1866,7 +1866,7 @@ func defaultSetToSystemTimeSpec() fs.TimeSpec { } } -func utimes(t *kernel.Task, dirFD int32, addr usermem.Addr, ts fs.TimeSpec, resolve bool) error { +func utimes(t *kernel.Task, dirFD int32, addr hostarch.Addr, ts fs.TimeSpec, resolve bool) error { setTimestamp := func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // Does the task own the file? if !d.Inode.CheckOwnership(t) { @@ -2030,7 +2030,7 @@ func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // LINT.IfChange -func renameAt(t *kernel.Task, oldDirFD int32, oldAddr usermem.Addr, newDirFD int32, newAddr usermem.Addr) error { +func renameAt(t *kernel.Task, oldDirFD int32, oldAddr hostarch.Addr, newDirFD int32, newAddr hostarch.Addr) error { newPath, _, err := copyInPath(t, newAddr, false /* allowEmpty */) if err != nil { return err diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go index f39ce0639..eeea1613b 100644 --- a/pkg/sentry/syscalls/linux/sys_futex.go +++ b/pkg/sentry/syscalls/linux/sys_futex.go @@ -18,11 +18,11 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // futexWaitRestartBlock encapsulates the state required to restart futex(2) @@ -41,7 +41,7 @@ type futexWaitRestartBlock struct { // Restart implements kernel.SyscallRestartBlock.Restart. func (f *futexWaitRestartBlock) Restart(t *kernel.Task) (uintptr, error) { - return futexWaitDuration(t, f.duration, false, usermem.Addr(f.addr), f.private, f.val, f.mask) + return futexWaitDuration(t, f.duration, false, hostarch.Addr(f.addr), f.private, f.val, f.mask) } // futexWaitAbsolute performs a FUTEX_WAIT_BITSET, blocking until the wait is @@ -51,7 +51,7 @@ func (f *futexWaitRestartBlock) Restart(t *kernel.Task) (uintptr, error) { // // If blocking is interrupted, the syscall is restarted with the original // arguments. -func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, forever bool, addr usermem.Addr, private bool, val, mask uint32) (uintptr, error) { +func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, forever bool, addr hostarch.Addr, private bool, val, mask uint32) (uintptr, error) { w := t.FutexWaiter() err := t.Futex().WaitPrepare(w, t, addr, private, val, mask) if err != nil { @@ -87,7 +87,7 @@ func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, fo // syscall. If forever is true, the syscall is restarted with the original // arguments. If forever is false, duration is a relative timeout and the // syscall is restarted with the remaining timeout. -func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, addr usermem.Addr, private bool, val, mask uint32) (uintptr, error) { +func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, addr hostarch.Addr, private bool, val, mask uint32) (uintptr, error) { w := t.FutexWaiter() err := t.Futex().WaitPrepare(w, t, addr, private, val, mask) if err != nil { @@ -124,7 +124,7 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add return 0, syserror.ERESTART_RESTARTBLOCK } -func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr usermem.Addr, private bool) error { +func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr hostarch.Addr, private bool) error { w := t.FutexWaiter() locked, err := t.Futex().LockPI(w, t, addr, uint32(t.ThreadID()), private, false) if err != nil { @@ -152,7 +152,7 @@ func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr usermem.A return syserror.ConvertIntr(err, syserror.ERESTARTSYS) } -func tryLockPI(t *kernel.Task, addr usermem.Addr, private bool) error { +func tryLockPI(t *kernel.Task, addr hostarch.Addr, private bool) error { w := t.FutexWaiter() locked, err := t.Futex().LockPI(w, t, addr, uint32(t.ThreadID()), private, true) if err != nil { diff --git a/pkg/sentry/syscalls/linux/sys_getdents.go b/pkg/sentry/syscalls/linux/sys_getdents.go index b25f7d881..bbba71d8f 100644 --- a/pkg/sentry/syscalls/linux/sys_getdents.go +++ b/pkg/sentry/syscalls/linux/sys_getdents.go @@ -19,6 +19,7 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -62,7 +63,7 @@ func Getdents64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // getdents implements the core of getdents(2)/getdents64(2). // f is the syscall implementation dirent serialization function. -func getdents(t *kernel.Task, fd int32, addr usermem.Addr, size int, f func(*dirent, io.Writer) (int, error)) (uintptr, error) { +func getdents(t *kernel.Task, fd int32, addr hostarch.Addr, size int, f func(*dirent, io.Writer) (int, error)) (uintptr, error) { dir := t.GetFile(fd) if dir == nil { return 0, syserror.EBADF diff --git a/pkg/sentry/syscalls/linux/sys_mempolicy.go b/pkg/sentry/syscalls/linux/sys_mempolicy.go index 9b4a5c3f1..6d27f4292 100644 --- a/pkg/sentry/syscalls/linux/sys_mempolicy.go +++ b/pkg/sentry/syscalls/linux/sys_mempolicy.go @@ -18,6 +18,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" @@ -31,7 +32,7 @@ const ( allowedNodemask = (1 << maxNodes) - 1 ) -func copyInNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32) (uint64, error) { +func copyInNodemask(t *kernel.Task, addr hostarch.Addr, maxnode uint32) (uint64, error) { // "nodemask points to a bit mask of node IDs that contains up to maxnode // bits. The bit mask size is rounded to the next multiple of // sizeof(unsigned long), but the kernel will use bits only up to maxnode. @@ -41,7 +42,7 @@ func copyInNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32) (uint64, // because of what appears to be a bug: mm/mempolicy.c:get_nodes() uses // maxnode-1, not maxnode, as the number of bits. bits := maxnode - 1 - if bits > usermem.PageSize*8 { // also handles overflow from maxnode == 0 + if bits > hostarch.PageSize*8 { // also handles overflow from maxnode == 0 return 0, syserror.EINVAL } if bits == 0 { @@ -53,7 +54,7 @@ func copyInNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32) (uint64, if _, err := t.CopyInBytes(addr, buf); err != nil { return 0, err } - val := usermem.ByteOrder.Uint64(buf) + val := hostarch.ByteOrder.Uint64(buf) // Check that only allowed bits in the first unsigned long in the nodemask // are set. if val&^allowedNodemask != 0 { @@ -68,11 +69,11 @@ func copyInNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32) (uint64, return val, nil } -func copyOutNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32, val uint64) error { +func copyOutNodemask(t *kernel.Task, addr hostarch.Addr, maxnode uint32, val uint64) error { // mm/mempolicy.c:copy_nodes_to_user() also uses maxnode-1 as the number of // bits. bits := maxnode - 1 - if bits > usermem.PageSize*8 { // also handles overflow from maxnode == 0 + if bits > hostarch.PageSize*8 { // also handles overflow from maxnode == 0 return syserror.EINVAL } if bits == 0 { @@ -80,7 +81,7 @@ func copyOutNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32, val uint } // Copy out the first unsigned long in the nodemask. buf := t.CopyScratchBuffer(8) - usermem.ByteOrder.PutUint64(buf, val) + hostarch.ByteOrder.PutUint64(buf, val) if _, err := t.CopyOutBytes(addr, buf); err != nil { return err } @@ -258,7 +259,7 @@ func Mbind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err } -func copyInMempolicyNodemask(t *kernel.Task, modeWithFlags linux.NumaPolicy, nodemask usermem.Addr, maxnode uint32) (linux.NumaPolicy, uint64, error) { +func copyInMempolicyNodemask(t *kernel.Task, modeWithFlags linux.NumaPolicy, nodemask hostarch.Addr, maxnode uint32) (linux.NumaPolicy, uint64, error) { flags := linux.NumaPolicy(modeWithFlags & linux.MPOL_MODE_FLAGS) mode := linux.NumaPolicy(modeWithFlags &^ linux.MPOL_MODE_FLAGS) if flags == linux.MPOL_MODE_FLAGS { diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go index cd8dfdfa4..70da0707d 100644 --- a/pkg/sentry/syscalls/linux/sys_mmap.go +++ b/pkg/sentry/syscalls/linux/sys_mmap.go @@ -23,7 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // Brk implements linux syscall brk(2). @@ -61,12 +62,12 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC Unmap: fixed, Map32Bit: map32bit, Private: private, - Perms: usermem.AccessType{ + Perms: hostarch.AccessType{ Read: linux.PROT_READ&prot != 0, Write: linux.PROT_WRITE&prot != 0, Execute: linux.PROT_EXEC&prot != 0, }, - MaxPerms: usermem.AnyAccess, + MaxPerms: hostarch.AnyAccess, GrowsDown: linux.MAP_GROWSDOWN&flags != 0, Precommit: linux.MAP_POPULATE&flags != 0, } @@ -160,7 +161,7 @@ func Mremap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal func Mprotect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { length := args[1].Uint64() prot := args[2].Int() - err := t.MemoryManager().MProtect(args[0].Pointer(), length, usermem.AccessType{ + err := t.MemoryManager().MProtect(args[0].Pointer(), length, hostarch.AccessType{ Read: linux.PROT_READ&prot != 0, Write: linux.PROT_WRITE&prot != 0, Execute: linux.PROT_EXEC&prot != 0, @@ -183,7 +184,7 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca return 0, nil, nil } // Not explicitly stated: length need not be page-aligned. - lenAddr, ok := usermem.Addr(length).RoundUp() + lenAddr, ok := hostarch.Addr(length).RoundUp() if !ok { return 0, nil, syserror.EINVAL } @@ -232,7 +233,7 @@ func Mincore(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // "The length argument need not be a multiple of the page size, but since // residency information is returned for whole pages, length is effectively // rounded up to the next multiple of the page size." - mincore(2) - la, ok := usermem.Addr(length).RoundUp() + la, ok := hostarch.Addr(length).RoundUp() if !ok { return 0, nil, syserror.ENOMEM } @@ -247,7 +248,7 @@ func Mincore(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if mapped != uint64(la) { return 0, nil, syserror.ENOMEM } - resident := bytes.Repeat([]byte{1}, int(mapped/usermem.PageSize)) + resident := bytes.Repeat([]byte{1}, int(mapped/hostarch.PageSize)) _, err := t.CopyOutBytes(vec, resident) return 0, nil, err } diff --git a/pkg/sentry/syscalls/linux/sys_mount.go b/pkg/sentry/syscalls/linux/sys_mount.go index bd0633564..864d2138c 100644 --- a/pkg/sentry/syscalls/linux/sys_mount.go +++ b/pkg/sentry/syscalls/linux/sys_mount.go @@ -20,7 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // Mount implements Linux syscall mount(2). @@ -31,7 +32,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall flags := args[3].Uint64() dataAddr := args[4].Pointer() - fsType, err := t.CopyInString(typeAddr, usermem.PageSize) + fsType, err := t.CopyInString(typeAddr, hostarch.PageSize) if err != nil { return 0, nil, err } @@ -52,7 +53,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // character placement, and the address is passed to each file system. // Most file systems always treat this data as a string, though, and so // do all of the ones we implement. - data, err = t.CopyInString(dataAddr, usermem.PageSize) + data, err = t.CopyInString(dataAddr, hostarch.PageSize) if err != nil { return 0, nil, err } diff --git a/pkg/sentry/syscalls/linux/sys_pipe.go b/pkg/sentry/syscalls/linux/sys_pipe.go index f7135ea46..d95034347 100644 --- a/pkg/sentry/syscalls/linux/sys_pipe.go +++ b/pkg/sentry/syscalls/linux/sys_pipe.go @@ -16,19 +16,19 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // LINT.IfChange // pipe2 implements the actual system call with flags. -func pipe2(t *kernel.Task, addr usermem.Addr, flags uint) (uintptr, error) { +func pipe2(t *kernel.Task, addr hostarch.Addr, flags uint) (uintptr, error) { if flags&^(linux.O_NONBLOCK|linux.O_CLOEXEC) != 0 { return 0, syserror.EINVAL } diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go index 254f4c9f9..da548a14a 100644 --- a/pkg/sentry/syscalls/linux/sys_poll.go +++ b/pkg/sentry/syscalls/linux/sys_poll.go @@ -18,13 +18,13 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -155,7 +155,7 @@ func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time. } // CopyInPollFDs copies an array of struct pollfd unless nfds exceeds the max. -func CopyInPollFDs(t *kernel.Task, addr usermem.Addr, nfds uint) ([]linux.PollFD, error) { +func CopyInPollFDs(t *kernel.Task, addr hostarch.Addr, nfds uint) ([]linux.PollFD, error) { if uint64(nfds) > t.ThreadGroup().Limits().GetCapped(limits.NumberOfFiles, fileCap) { return nil, syserror.EINVAL } @@ -170,7 +170,7 @@ func CopyInPollFDs(t *kernel.Task, addr usermem.Addr, nfds uint) ([]linux.PollFD return pfd, nil } -func doPoll(t *kernel.Task, addr usermem.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) { +func doPoll(t *kernel.Task, addr hostarch.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) { pfd, err := CopyInPollFDs(t, addr, nfds) if err != nil { return timeout, 0, err @@ -198,7 +198,7 @@ func doPoll(t *kernel.Task, addr usermem.Addr, nfds uint, timeout time.Duration) } // CopyInFDSet copies an fd set from select(2)/pselect(2). -func CopyInFDSet(t *kernel.Task, addr usermem.Addr, nBytes, nBitsInLastPartialByte int) ([]byte, error) { +func CopyInFDSet(t *kernel.Task, addr hostarch.Addr, nBytes, nBitsInLastPartialByte int) ([]byte, error) { set := make([]byte, nBytes) if addr != 0 { @@ -215,7 +215,7 @@ func CopyInFDSet(t *kernel.Task, addr usermem.Addr, nBytes, nBitsInLastPartialBy return set, nil } -func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Addr, timeout time.Duration) (uintptr, error) { +func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs hostarch.Addr, timeout time.Duration) (uintptr, error) { if nfds < 0 || nfds > fileCap { return 0, syserror.EINVAL } @@ -365,7 +365,7 @@ func timeoutRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration) // copyOutTimespecRemaining copies the time remaining in timeout to timespecAddr. // // startNs must be from CLOCK_MONOTONIC. -func copyOutTimespecRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timespecAddr usermem.Addr) error { +func copyOutTimespecRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timespecAddr hostarch.Addr) error { if timeout <= 0 { return nil } @@ -377,7 +377,7 @@ func copyOutTimespecRemaining(t *kernel.Task, startNs ktime.Time, timeout time.D // copyOutTimevalRemaining copies the time remaining in timeout to timevalAddr. // // startNs must be from CLOCK_MONOTONIC. -func copyOutTimevalRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timevalAddr usermem.Addr) error { +func copyOutTimevalRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timevalAddr hostarch.Addr) error { if timeout <= 0 { return nil } @@ -391,7 +391,7 @@ func copyOutTimevalRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Du // // +stateify savable type pollRestartBlock struct { - pfdAddr usermem.Addr + pfdAddr hostarch.Addr nfds uint timeout time.Duration } @@ -401,7 +401,7 @@ func (p *pollRestartBlock) Restart(t *kernel.Task) (uintptr, error) { return poll(t, p.pfdAddr, p.nfds, p.timeout) } -func poll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Duration) (uintptr, error) { +func poll(t *kernel.Task, pfdAddr hostarch.Addr, nfds uint, timeout time.Duration) (uintptr, error) { remainingTimeout, n, err := doPoll(t, pfdAddr, nfds, timeout) // On an interrupt poll(2) is restarted with the remaining timeout. if err == syserror.EINTR { diff --git a/pkg/sentry/syscalls/linux/sys_random.go b/pkg/sentry/syscalls/linux/sys_random.go index c0aa0fd60..ae545f80f 100644 --- a/pkg/sentry/syscalls/linux/sys_random.go +++ b/pkg/sentry/syscalls/linux/sys_random.go @@ -24,6 +24,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) const ( @@ -64,7 +66,7 @@ func GetRandom(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if min > 256 { min = 256 } - n, err := t.MemoryManager().CopyOutFrom(t, usermem.AddrRangeSeqOf(ar), safemem.FromIOReader{&randReader{-1, min}}, usermem.IOOpts{ + n, err := t.MemoryManager().CopyOutFrom(t, hostarch.AddrRangeSeqOf(ar), safemem.FromIOReader{&randReader{-1, min}}, usermem.IOOpts{ AddressSpaceActive: true, }) if n >= int64(min) { diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go index 88cd234d1..e64246d57 100644 --- a/pkg/sentry/syscalls/linux/sys_rlimit.go +++ b/pkg/sentry/syscalls/linux/sys_rlimit.go @@ -16,12 +16,12 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // rlimit describes an implementation of 'struct rlimit', which may vary from @@ -67,12 +67,12 @@ func (r *rlimit64) fromLimit(lim limits.Limit) { } } -func (r *rlimit64) copyIn(t *kernel.Task, addr usermem.Addr) error { +func (r *rlimit64) copyIn(t *kernel.Task, addr hostarch.Addr) error { _, err := r.CopyIn(t, addr) return err } -func (r *rlimit64) copyOut(t *kernel.Task, addr usermem.Addr) error { +func (r *rlimit64) copyOut(t *kernel.Task, addr hostarch.Addr) error { _, err := r.CopyOut(t, addr) return err } diff --git a/pkg/sentry/syscalls/linux/sys_seccomp.go b/pkg/sentry/syscalls/linux/sys_seccomp.go index 4fdb4463c..e16d6ff3f 100644 --- a/pkg/sentry/syscalls/linux/sys_seccomp.go +++ b/pkg/sentry/syscalls/linux/sys_seccomp.go @@ -17,10 +17,10 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // userSockFprog is equivalent to Linux's struct sock_fprog on amd64. @@ -33,14 +33,14 @@ type userSockFprog struct { _ [6]byte // padding for alignment // Filter is a user pointer to the struct sock_filter array that makes up - // the filter program. Filter is a uint64 rather than a usermem.Addr - // because usermem.Addr is actually uintptr, which is not a fixed-size + // the filter program. Filter is a uint64 rather than a hostarch.Addr + // because hostarch.Addr is actually uintptr, which is not a fixed-size // type. Filter uint64 } // seccomp applies a seccomp policy to the current task. -func seccomp(t *kernel.Task, mode, flags uint64, addr usermem.Addr) error { +func seccomp(t *kernel.Task, mode, flags uint64, addr hostarch.Addr) error { // We only support SECCOMP_SET_MODE_FILTER at the moment. if mode != linux.SECCOMP_SET_MODE_FILTER { // Unsupported mode. @@ -60,7 +60,7 @@ func seccomp(t *kernel.Task, mode, flags uint64, addr usermem.Addr) error { return err } filter := make([]linux.BPFInstruction, int(fprog.Len)) - if _, err := linux.CopyBPFInstructionSliceIn(t, usermem.Addr(fprog.Filter), filter); err != nil { + if _, err := linux.CopyBPFInstructionSliceIn(t, hostarch.Addr(fprog.Filter), filter); err != nil { return err } compiledFilter, err := bpf.Compile(filter) diff --git a/pkg/sentry/syscalls/linux/sys_sem.go b/pkg/sentry/syscalls/linux/sys_sem.go index f0570d927..c84260080 100644 --- a/pkg/sentry/syscalls/linux/sys_sem.go +++ b/pkg/sentry/syscalls/linux/sys_sem.go @@ -19,13 +19,13 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) const opsMax = 500 // SEMOPM @@ -310,7 +310,7 @@ func setVal(t *kernel.Task, id int32, num int32, val int16) error { return set.SetVal(t, num, val, creds, int32(pid)) } -func setValAll(t *kernel.Task, id int32, array usermem.Addr) error { +func setValAll(t *kernel.Task, id int32, array hostarch.Addr) error { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { @@ -335,7 +335,7 @@ func getVal(t *kernel.Task, id int32, num int32) (int16, error) { return set.GetVal(num, creds) } -func getValAll(t *kernel.Task, id int32, array usermem.Addr) error { +func getValAll(t *kernel.Task, id int32, array hostarch.Addr) error { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go index d639c9bf7..53b12dc41 100644 --- a/pkg/sentry/syscalls/linux/sys_signal.go +++ b/pkg/sentry/syscalls/linux/sys_signal.go @@ -19,12 +19,12 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/signalfd" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // "For a process to have permission to send a signal it must @@ -516,7 +516,7 @@ func RestartSyscall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne } // sharedSignalfd is shared between the two calls. -func sharedSignalfd(t *kernel.Task, fd int32, sigset usermem.Addr, sigsetsize uint, flags int32) (uintptr, *kernel.SyscallControl, error) { +func sharedSignalfd(t *kernel.Task, fd int32, sigset hostarch.Addr, sigsetsize uint, flags int32) (uintptr, *kernel.SyscallControl, error) { // Copy in the signal mask. mask, err := CopyInSigSet(t, sigset, sigsetsize) if err != nil { diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index c6adfe06b..0141e8a96 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -18,6 +18,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -34,12 +35,6 @@ import ( // LINT.IfChange -// minListenBacklog is the minimum reasonable backlog for listening sockets. -const minListenBacklog = 8 - -// maxListenBacklog is the maximum allowed backlog for listening sockets. -const maxListenBacklog = 1024 - // maxAddrLen is the maximum socket address length we're willing to accept. const maxAddrLen = 200 @@ -117,7 +112,7 @@ type multipleMessageHeader64 struct { // CaptureAddress allocates memory for and copies a socket address structure // from the untrusted address space range. -func CaptureAddress(t *kernel.Task, addr usermem.Addr, addrlen uint32) ([]byte, error) { +func CaptureAddress(t *kernel.Task, addr hostarch.Addr, addrlen uint32) ([]byte, error) { if addrlen > maxAddrLen { return nil, syserror.EINVAL } @@ -133,7 +128,7 @@ func CaptureAddress(t *kernel.Task, addr usermem.Addr, addrlen uint32) ([]byte, // writeAddress writes a sockaddr structure and its length to an output buffer // in the unstrusted address space range. If the address is bigger than the // buffer, it is truncated. -func writeAddress(t *kernel.Task, addr linux.SockAddr, addrLen uint32, addrPtr usermem.Addr, addrLenPtr usermem.Addr) error { +func writeAddress(t *kernel.Task, addr linux.SockAddr, addrLen uint32, addrPtr hostarch.Addr, addrLenPtr hostarch.Addr) error { // Get the buffer length. var bufLen uint32 if _, err := primitive.CopyUint32In(t, addrLenPtr, &bufLen); err != nil { @@ -276,7 +271,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // accept is the implementation of the accept syscall. It is called by accept // and accept4 syscall handlers. -func accept(t *kernel.Task, fd int32, addr usermem.Addr, addrLen usermem.Addr, flags int) (uintptr, error) { +func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr, flags int) (uintptr, error) { // Check that no unsupported flags are passed in. if flags & ^(linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { return 0, syserror.EINVAL @@ -381,14 +376,6 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, syserror.ENOTSOCK } - // Per Linux, the backlog is silently capped to reasonable values. - if backlog <= 0 { - backlog = minListenBacklog - } - if backlog > maxListenBacklog { - backlog = maxListenBacklog - } - return 0, nil, s.Listen(t, int(backlog)).ToError() } @@ -472,7 +459,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // getSockOpt tries to handle common socket options, or dispatches to a specific // socket implementation. -func getSockOpt(t *kernel.Task, s socket.Socket, level, name int, optValAddr usermem.Addr, len int) (marshal.Marshallable, *syserr.Error) { +func getSockOpt(t *kernel.Task, s socket.Socket, level, name int, optValAddr hostarch.Addr, len int) (marshal.Marshallable, *syserr.Error) { if level == linux.SOL_SOCKET { switch name { case linux.SO_TYPE, linux.SO_DOMAIN, linux.SO_PROTOCOL: @@ -735,7 +722,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return uintptr(count), nil, nil } -func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags int32, haveDeadline bool, deadline ktime.Time) (uintptr, error) { +func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr hostarch.Addr, flags int32, haveDeadline bool, deadline ktime.Time) (uintptr, error) { // Capture the message header and io vectors. var msg MessageHeader64 if _, err := msg.CopyIn(t, msgPtr); err != nil { @@ -745,7 +732,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i if msg.IovLen > linux.UIO_MAXIOV { return 0, syserror.EMSGSIZE } - dst, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ + dst, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ AddressSpaceActive: true, }) if err != nil { @@ -796,7 +783,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i // Copy the address to the caller. if msg.NameLen != 0 { - if err := writeAddress(t, sender, senderLen, usermem.Addr(msg.Name), usermem.Addr(msgPtr+nameLenOffset)); err != nil { + if err := writeAddress(t, sender, senderLen, hostarch.Addr(msg.Name), hostarch.Addr(msgPtr+nameLenOffset)); err != nil { return 0, err } } @@ -806,7 +793,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i return 0, err } if len(controlData) > 0 { - if _, err := t.CopyOutBytes(usermem.Addr(msg.Control), controlData); err != nil { + if _, err := t.CopyOutBytes(hostarch.Addr(msg.Control), controlData); err != nil { return 0, err } } @@ -821,7 +808,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i // recvFrom is the implementation of the recvfrom syscall. It is called by // recvfrom and recv syscall handlers. -func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLenPtr usermem.Addr) (uintptr, error) { +func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLenPtr hostarch.Addr) (uintptr, error) { if int(bufLen) < 0 { return 0, syserror.EINVAL } @@ -997,7 +984,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return uintptr(count), nil, nil } -func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr usermem.Addr, flags int32) (uintptr, error) { +func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr hostarch.Addr, flags int32) (uintptr, error) { // Capture the message header. var msg MessageHeader64 if _, err := msg.CopyIn(t, msgPtr); err != nil { @@ -1011,7 +998,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme return 0, syserror.ENOBUFS } controlData = make([]byte, msg.ControlLen) - if _, err := t.CopyInBytes(usermem.Addr(msg.Control), controlData); err != nil { + if _, err := t.CopyInBytes(hostarch.Addr(msg.Control), controlData); err != nil { return 0, err } } @@ -1020,7 +1007,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme var to []byte if msg.NameLen != 0 { var err error - to, err = CaptureAddress(t, usermem.Addr(msg.Name), msg.NameLen) + to, err = CaptureAddress(t, hostarch.Addr(msg.Name), msg.NameLen) if err != nil { return 0, err } @@ -1030,7 +1017,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme if msg.IovLen > linux.UIO_MAXIOV { return 0, syserror.EMSGSIZE } - src, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ + src, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ AddressSpaceActive: true, }) if err != nil { @@ -1064,7 +1051,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme // sendTo is the implementation of the sendto syscall. It is called by sendto // and send syscall handlers. -func sendTo(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLen uint32) (uintptr, error) { +func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLen uint32) (uintptr, error) { bl := int(bufLen) if bl < 0 { return 0, syserror.EINVAL diff --git a/pkg/sentry/syscalls/linux/sys_stat.go b/pkg/sentry/syscalls/linux/sys_stat.go index cda29a8b5..2338ba44b 100644 --- a/pkg/sentry/syscalls/linux/sys_stat.go +++ b/pkg/sentry/syscalls/linux/sys_stat.go @@ -16,11 +16,11 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // LINT.IfChange @@ -106,7 +106,7 @@ func Fstat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } // stat implements stat from the given *fs.Dirent. -func stat(t *kernel.Task, d *fs.Dirent, dirPath bool, statAddr usermem.Addr) error { +func stat(t *kernel.Task, d *fs.Dirent, dirPath bool, statAddr hostarch.Addr) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -120,7 +120,7 @@ func stat(t *kernel.Task, d *fs.Dirent, dirPath bool, statAddr usermem.Addr) err } // fstat implements fstat for the given *fs.File. -func fstat(t *kernel.Task, f *fs.File, statAddr usermem.Addr) error { +func fstat(t *kernel.Task, f *fs.File, statAddr hostarch.Addr) error { uattr, err := f.UnstableAttr(t) if err != nil { return err @@ -180,7 +180,7 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall }) } -func statx(t *kernel.Task, sattr fs.StableAttr, uattr fs.UnstableAttr, statxAddr usermem.Addr) error { +func statx(t *kernel.Task, sattr fs.StableAttr, uattr fs.UnstableAttr, statxAddr hostarch.Addr) error { // "[T]he kernel may return fields that weren't requested and may fail to // return fields that were requested, depending on what the backing // filesystem supports. @@ -257,7 +257,7 @@ func Fstatfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // statfsImpl implements the linux syscall statfs and fstatfs based on a Dirent, // copying the statfs structure out to addr on success, otherwise an error is // returned. -func statfsImpl(t *kernel.Task, d *fs.Dirent, addr usermem.Addr) error { +func statfsImpl(t *kernel.Task, d *fs.Dirent, addr hostarch.Addr) error { info, err := d.Inode.StatFS(t) if err != nil { return err diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index b5f920949..3185ea527 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -19,6 +19,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -46,7 +47,7 @@ var ( ExecMaxTotalSize = 2 * 1024 * 1024 // ExecMaxElemSize is the maximum length of a single argv or envv entry. - ExecMaxElemSize = 32 * usermem.PageSize + ExecMaxElemSize = 32 * hostarch.PageSize ) // Getppid implements linux syscall getppid(2). @@ -88,7 +89,7 @@ func Execveat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return execveat(t, dirFD, pathnameAddr, argvAddr, envvAddr, flags) } -func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr usermem.Addr, flags int32) (uintptr, *kernel.SyscallControl, error) { +func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr hostarch.Addr, flags int32) (uintptr, *kernel.SyscallControl, error) { pathname, err := t.CopyInString(pathnameAddr, linux.PATH_MAX) if err != nil { return 0, nil, err @@ -199,7 +200,7 @@ func ExitGroup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys } // clone is used by Clone, Fork, and VFork. -func clone(t *kernel.Task, flags int, stack usermem.Addr, parentTID usermem.Addr, childTID usermem.Addr, tls usermem.Addr) (uintptr, *kernel.SyscallControl, error) { +func clone(t *kernel.Task, flags int, stack hostarch.Addr, parentTID hostarch.Addr, childTID hostarch.Addr, tls hostarch.Addr) (uintptr, *kernel.SyscallControl, error) { opts := kernel.CloneOptions{ SharingOptions: kernel.SharingOptions{ NewAddressSpace: flags&linux.CLONE_VM == 0, @@ -274,7 +275,7 @@ func parseCommonWaitOptions(wopts *kernel.WaitOptions, options int) error { } // wait4 waits for the given child process to exit. -func wait4(t *kernel.Task, pid int, statusAddr usermem.Addr, options int, rusageAddr usermem.Addr) (uintptr, error) { +func wait4(t *kernel.Task, pid int, statusAddr hostarch.Addr, options int, rusageAddr hostarch.Addr) (uintptr, error) { if options&^(linux.WNOHANG|linux.WUNTRACED|linux.WCONTINUED|linux.WNOTHREAD|linux.WALL|linux.WCLONE) != 0 { return 0, syserror.EINVAL } diff --git a/pkg/sentry/syscalls/linux/sys_time.go b/pkg/sentry/syscalls/linux/sys_time.go index c5054d2f1..83b777bbd 100644 --- a/pkg/sentry/syscalls/linux/sys_time.go +++ b/pkg/sentry/syscalls/linux/sys_time.go @@ -19,12 +19,12 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // The most significant 29 bits hold either a pid or a file descriptor. @@ -165,7 +165,7 @@ func Time(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC addr := args[0].Pointer() r := t.Kernel().RealtimeClock().Now().TimeT() - if addr == usermem.Addr(0) { + if addr == hostarch.Addr(0) { return uintptr(r), nil, nil } @@ -182,7 +182,7 @@ func Time(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC type clockNanosleepRestartBlock struct { c ktime.Clock duration time.Duration - rem usermem.Addr + rem hostarch.Addr } // Restart implements kernel.SyscallRestartBlock.Restart. @@ -221,7 +221,7 @@ func clockNanosleepUntil(t *kernel.Task, c ktime.Clock, ts linux.Timespec) error // // If blocking is interrupted, the syscall is restarted with the remaining // duration timeout. -func clockNanosleepFor(t *kernel.Task, c ktime.Clock, dur time.Duration, rem usermem.Addr) error { +func clockNanosleepFor(t *kernel.Task, c ktime.Clock, dur time.Duration, rem hostarch.Addr) error { timer, start, tchan := ktime.After(c, dur) err := t.BlockWithTimer(nil, tchan) @@ -324,14 +324,14 @@ func Gettimeofday(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. tv := args[0].Pointer() tz := args[1].Pointer() - if tv != usermem.Addr(0) { + if tv != hostarch.Addr(0) { nowTv := t.Kernel().RealtimeClock().Now().Timeval() if err := copyTimevalOut(t, tv, &nowTv); err != nil { return 0, nil, err } } - if tz != usermem.Addr(0) { + if tz != hostarch.Addr(0) { // Ask the time package for the timezone. _, offset := time.Now().Zone() // This int32 array mimics linux's struct timezone. diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go index 97474fd3c..28ad6a60e 100644 --- a/pkg/sentry/syscalls/linux/sys_xattr.go +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -18,11 +18,11 @@ import ( "strings" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // LINT.IfChange @@ -87,7 +87,7 @@ func getXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink } // getXattr implements getxattr(2) from the given *fs.Dirent. -func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, size uint64) (int, error) { +func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr hostarch.Addr, size uint64) (int, error) { name, err := copyInXattrName(t, nameAddr) if err != nil { return 0, err @@ -180,7 +180,7 @@ func setXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink } // setXattr implements setxattr(2) from the given *fs.Dirent. -func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, size uint64, flags uint32) error { +func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr hostarch.Addr, size uint64, flags uint32) error { if flags&^(linux.XATTR_CREATE|linux.XATTR_REPLACE) != 0 { return syserror.EINVAL } @@ -214,7 +214,7 @@ func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, si return nil } -func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) { +func copyInXattrName(t *kernel.Task, nameAddr hostarch.Addr) (string, error) { name, err := t.CopyInString(nameAddr, linux.XATTR_NAME_MAX+1) if err != nil { if err == syserror.ENAMETOOLONG { @@ -306,7 +306,7 @@ func listXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlin return uintptr(n), nil, nil } -func listXattr(t *kernel.Task, d *fs.Dirent, addr usermem.Addr, size uint64) (int, error) { +func listXattr(t *kernel.Task, d *fs.Dirent, addr hostarch.Addr, size uint64) (int, error) { if !xattrFileTypeOk(d.Inode) { return 0, nil } @@ -408,7 +408,7 @@ func removeXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSyml } // removeXattr implements removexattr(2) from the given *fs.Dirent. -func removeXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr) error { +func removeXattr(t *kernel.Task, d *fs.Dirent, nameAddr hostarch.Addr) error { name, err := copyInXattrName(t, nameAddr) if err != nil { return err diff --git a/pkg/sentry/syscalls/linux/timespec.go b/pkg/sentry/syscalls/linux/timespec.go index ddc3ee26e..3edc922eb 100644 --- a/pkg/sentry/syscalls/linux/timespec.go +++ b/pkg/sentry/syscalls/linux/timespec.go @@ -18,13 +18,13 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // copyTimespecIn copies a Timespec from the untrusted app range to the kernel. -func copyTimespecIn(t *kernel.Task, addr usermem.Addr) (linux.Timespec, error) { +func copyTimespecIn(t *kernel.Task, addr hostarch.Addr) (linux.Timespec, error) { switch t.Arch().Width() { case 8: ts := linux.Timespec{} @@ -33,8 +33,8 @@ func copyTimespecIn(t *kernel.Task, addr usermem.Addr) (linux.Timespec, error) { if err != nil { return ts, err } - ts.Sec = int64(usermem.ByteOrder.Uint64(in[0:])) - ts.Nsec = int64(usermem.ByteOrder.Uint64(in[8:])) + ts.Sec = int64(hostarch.ByteOrder.Uint64(in[0:])) + ts.Nsec = int64(hostarch.ByteOrder.Uint64(in[8:])) return ts, nil default: return linux.Timespec{}, syserror.ENOSYS @@ -42,12 +42,12 @@ func copyTimespecIn(t *kernel.Task, addr usermem.Addr) (linux.Timespec, error) { } // copyTimespecOut copies a Timespec to the untrusted app range. -func copyTimespecOut(t *kernel.Task, addr usermem.Addr, ts *linux.Timespec) error { +func copyTimespecOut(t *kernel.Task, addr hostarch.Addr, ts *linux.Timespec) error { switch t.Arch().Width() { case 8: out := t.CopyScratchBuffer(16) - usermem.ByteOrder.PutUint64(out[0:], uint64(ts.Sec)) - usermem.ByteOrder.PutUint64(out[8:], uint64(ts.Nsec)) + hostarch.ByteOrder.PutUint64(out[0:], uint64(ts.Sec)) + hostarch.ByteOrder.PutUint64(out[8:], uint64(ts.Nsec)) _, err := t.CopyOutBytes(addr, out) return err default: @@ -56,7 +56,7 @@ func copyTimespecOut(t *kernel.Task, addr usermem.Addr, ts *linux.Timespec) erro } // copyTimevalIn copies a Timeval from the untrusted app range to the kernel. -func copyTimevalIn(t *kernel.Task, addr usermem.Addr) (linux.Timeval, error) { +func copyTimevalIn(t *kernel.Task, addr hostarch.Addr) (linux.Timeval, error) { switch t.Arch().Width() { case 8: tv := linux.Timeval{} @@ -65,8 +65,8 @@ func copyTimevalIn(t *kernel.Task, addr usermem.Addr) (linux.Timeval, error) { if err != nil { return tv, err } - tv.Sec = int64(usermem.ByteOrder.Uint64(in[0:])) - tv.Usec = int64(usermem.ByteOrder.Uint64(in[8:])) + tv.Sec = int64(hostarch.ByteOrder.Uint64(in[0:])) + tv.Usec = int64(hostarch.ByteOrder.Uint64(in[8:])) return tv, nil default: return linux.Timeval{}, syserror.ENOSYS @@ -74,12 +74,12 @@ func copyTimevalIn(t *kernel.Task, addr usermem.Addr) (linux.Timeval, error) { } // copyTimevalOut copies a Timeval to the untrusted app range. -func copyTimevalOut(t *kernel.Task, addr usermem.Addr, tv *linux.Timeval) error { +func copyTimevalOut(t *kernel.Task, addr hostarch.Addr, tv *linux.Timeval) error { switch t.Arch().Width() { case 8: out := t.CopyScratchBuffer(16) - usermem.ByteOrder.PutUint64(out[0:], uint64(tv.Sec)) - usermem.ByteOrder.PutUint64(out[8:], uint64(tv.Usec)) + hostarch.ByteOrder.PutUint64(out[0:], uint64(tv.Sec)) + hostarch.ByteOrder.PutUint64(out[8:], uint64(tv.Usec)) _, err := t.CopyOutBytes(addr, out) return err default: @@ -94,7 +94,7 @@ func copyTimevalOut(t *kernel.Task, addr usermem.Addr, tv *linux.Timeval) error // returned value is the maximum that Duration will allow. // // If timespecAddr is NULL, the returned value is negative. -func copyTimespecInToDuration(t *kernel.Task, timespecAddr usermem.Addr) (time.Duration, error) { +func copyTimespecInToDuration(t *kernel.Task, timespecAddr hostarch.Addr) (time.Duration, error) { // Use a negative Duration to indicate "no timeout". timeout := time.Duration(-1) if timespecAddr != 0 { diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index 2e59bd5b1..5ce0bc714 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -43,6 +43,7 @@ go_library( "//pkg/context", "//pkg/fspath", "//pkg/gohacks", + "//pkg/hostarch", "//pkg/log", "//pkg/marshal", "//pkg/marshal/primitive", diff --git a/pkg/sentry/syscalls/linux/vfs2/aio.go b/pkg/sentry/syscalls/linux/vfs2/aio.go index de6789a65..fd1863ef3 100644 --- a/pkg/sentry/syscalls/linux/vfs2/aio.go +++ b/pkg/sentry/syscalls/linux/vfs2/aio.go @@ -26,6 +26,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // IoSubmit implements linux syscall io_submit(2). @@ -40,7 +42,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc for i := int32(0); i < nrEvents; i++ { // Copy in the callback address. - var cbAddr usermem.Addr + var cbAddr hostarch.Addr switch t.Arch().Width() { case 8: var cbAddrP primitive.Uint64 @@ -52,7 +54,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Nothing done. return 0, nil, err } - cbAddr = usermem.Addr(cbAddrP) + cbAddr = hostarch.Addr(cbAddrP) default: return 0, nil, syserror.ENOSYS } @@ -79,14 +81,14 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } // Advance to the next one. - addr += usermem.Addr(t.Arch().Width()) + addr += hostarch.Addr(t.Arch().Width()) } return uintptr(nrEvents), nil, nil } // submitCallback processes a single callback. -func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr usermem.Addr) error { +func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr hostarch.Addr) error { if cb.Reserved2 != 0 { return syserror.EINVAL } @@ -148,7 +150,7 @@ func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr user return nil } -func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr usermem.Addr, cb *linux.IOCallback, ioseq usermem.IOSequence, aioCtx *mm.AIOContext) kernel.AIOCallback { +func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr hostarch.Addr, cb *linux.IOCallback, ioseq usermem.IOSequence, aioCtx *mm.AIOContext) kernel.AIOCallback { return func(ctx context.Context) { // Release references after completing the callback. defer fd.DecRef(ctx) @@ -206,12 +208,12 @@ func memoryFor(t *kernel.Task, cb *linux.IOCallback) (usermem.IOSequence, error) // I/O. switch cb.OpCode { case linux.IOCB_CMD_PREAD, linux.IOCB_CMD_PWRITE: - return t.SingleIOSequence(usermem.Addr(cb.Buf), bytes, usermem.IOOpts{ + return t.SingleIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{ AddressSpaceActive: false, }) case linux.IOCB_CMD_PREADV, linux.IOCB_CMD_PWRITEV: - return t.IovecsIOSequence(usermem.Addr(cb.Buf), bytes, usermem.IOOpts{ + return t.IovecsIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{ AddressSpaceActive: false, }) diff --git a/pkg/sentry/syscalls/linux/vfs2/execve.go b/pkg/sentry/syscalls/linux/vfs2/execve.go index 7a409620d..3315398a4 100644 --- a/pkg/sentry/syscalls/linux/vfs2/execve.go +++ b/pkg/sentry/syscalls/linux/vfs2/execve.go @@ -24,7 +24,8 @@ import ( slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // Execve implements linux syscall execve(2). @@ -45,7 +46,7 @@ func Execveat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return execveat(t, dirfd, pathnameAddr, argvAddr, envvAddr, flags) } -func execveat(t *kernel.Task, dirfd int32, pathnameAddr, argvAddr, envvAddr usermem.Addr, flags int32) (uintptr, *kernel.SyscallControl, error) { +func execveat(t *kernel.Task, dirfd int32, pathnameAddr, argvAddr, envvAddr hostarch.Addr, flags int32) (uintptr, *kernel.SyscallControl, error) { if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 { return 0, nil, syserror.EINVAL } diff --git a/pkg/sentry/syscalls/linux/vfs2/filesystem.go b/pkg/sentry/syscalls/linux/vfs2/filesystem.go index 01e0f9010..36aa1d3ae 100644 --- a/pkg/sentry/syscalls/linux/vfs2/filesystem.go +++ b/pkg/sentry/syscalls/linux/vfs2/filesystem.go @@ -20,7 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // Link implements Linux syscall link(2). @@ -40,7 +41,7 @@ func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, linkat(t, olddirfd, oldpathAddr, newdirfd, newpathAddr, flags) } -func linkat(t *kernel.Task, olddirfd int32, oldpathAddr usermem.Addr, newdirfd int32, newpathAddr usermem.Addr, flags int32) error { +func linkat(t *kernel.Task, olddirfd int32, oldpathAddr hostarch.Addr, newdirfd int32, newpathAddr hostarch.Addr, flags int32) error { if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_FOLLOW) != 0 { return syserror.EINVAL } @@ -86,7 +87,7 @@ func Mkdirat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca return 0, nil, mkdirat(t, dirfd, addr, mode) } -func mkdirat(t *kernel.Task, dirfd int32, addr usermem.Addr, mode uint) error { +func mkdirat(t *kernel.Task, dirfd int32, addr hostarch.Addr, mode uint) error { path, err := copyInPath(t, addr) if err != nil { return err @@ -118,7 +119,7 @@ func Mknodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca return 0, nil, mknodat(t, dirfd, addr, linux.FileMode(mode), dev) } -func mknodat(t *kernel.Task, dirfd int32, addr usermem.Addr, mode linux.FileMode, dev uint32) error { +func mknodat(t *kernel.Task, dirfd int32, addr hostarch.Addr, mode linux.FileMode, dev uint32) error { path, err := copyInPath(t, addr) if err != nil { return err @@ -165,7 +166,7 @@ func Creat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return openat(t, linux.AT_FDCWD, addr, linux.O_WRONLY|linux.O_CREAT|linux.O_TRUNC, mode) } -func openat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr, flags uint32, mode uint) (uintptr, *kernel.SyscallControl, error) { +func openat(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr, flags uint32, mode uint) (uintptr, *kernel.SyscallControl, error) { path, err := copyInPath(t, pathAddr) if err != nil { return 0, nil, err @@ -217,7 +218,7 @@ func Renameat2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, renameat(t, olddirfd, oldpathAddr, newdirfd, newpathAddr, flags) } -func renameat(t *kernel.Task, olddirfd int32, oldpathAddr usermem.Addr, newdirfd int32, newpathAddr usermem.Addr, flags uint32) error { +func renameat(t *kernel.Task, olddirfd int32, oldpathAddr hostarch.Addr, newdirfd int32, newpathAddr hostarch.Addr, flags uint32) error { oldpath, err := copyInPath(t, oldpathAddr) if err != nil { return err @@ -250,7 +251,7 @@ func Rmdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, rmdirat(t, linux.AT_FDCWD, pathAddr) } -func rmdirat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr) error { +func rmdirat(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr) error { path, err := copyInPath(t, pathAddr) if err != nil { return err @@ -269,7 +270,7 @@ func Unlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, unlinkat(t, linux.AT_FDCWD, pathAddr) } -func unlinkat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr) error { +func unlinkat(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr) error { path, err := copyInPath(t, pathAddr) if err != nil { return err @@ -313,7 +314,7 @@ func Symlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, symlinkat(t, targetAddr, newdirfd, linkpathAddr) } -func symlinkat(t *kernel.Task, targetAddr usermem.Addr, newdirfd int32, linkpathAddr usermem.Addr) error { +func symlinkat(t *kernel.Task, targetAddr hostarch.Addr, newdirfd int32, linkpathAddr hostarch.Addr) error { target, err := t.CopyInString(targetAddr, linux.PATH_MAX) if err != nil { return err diff --git a/pkg/sentry/syscalls/linux/vfs2/getdents.go b/pkg/sentry/syscalls/linux/vfs2/getdents.go index 5517595b5..b41a3056a 100644 --- a/pkg/sentry/syscalls/linux/vfs2/getdents.go +++ b/pkg/sentry/syscalls/linux/vfs2/getdents.go @@ -22,7 +22,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // Getdents implements Linux syscall getdents(2). @@ -58,7 +59,7 @@ func getdents(t *kernel.Task, args arch.SyscallArguments, isGetdents64 bool) (ui type getdentsCallback struct { t *kernel.Task - addr usermem.Addr + addr hostarch.Addr remaining int isGetdents64 bool } @@ -69,7 +70,7 @@ var getdentsCallbackPool = sync.Pool{ }, } -func getGetdentsCallback(t *kernel.Task, addr usermem.Addr, size int, isGetdents64 bool) *getdentsCallback { +func getGetdentsCallback(t *kernel.Task, addr hostarch.Addr, size int, isGetdents64 bool) *getdentsCallback { cb := getdentsCallbackPool.Get().(*getdentsCallback) *cb = getdentsCallback{ t: t, @@ -102,9 +103,9 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error { return syserror.EINVAL } buf = cb.t.CopyScratchBuffer(size) - usermem.ByteOrder.PutUint64(buf[0:8], dirent.Ino) - usermem.ByteOrder.PutUint64(buf[8:16], uint64(dirent.NextOff)) - usermem.ByteOrder.PutUint16(buf[16:18], uint16(size)) + hostarch.ByteOrder.PutUint64(buf[0:8], dirent.Ino) + hostarch.ByteOrder.PutUint64(buf[8:16], uint64(dirent.NextOff)) + hostarch.ByteOrder.PutUint16(buf[16:18], uint16(size)) buf[18] = dirent.Type copy(buf[19:], dirent.Name) // Zero out all remaining bytes in buf, including the NUL terminator @@ -136,9 +137,9 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error { return syserror.EINVAL } buf = cb.t.CopyScratchBuffer(size) - usermem.ByteOrder.PutUint64(buf[0:8], dirent.Ino) - usermem.ByteOrder.PutUint64(buf[8:16], uint64(dirent.NextOff)) - usermem.ByteOrder.PutUint16(buf[16:18], uint16(size)) + hostarch.ByteOrder.PutUint64(buf[0:8], dirent.Ino) + hostarch.ByteOrder.PutUint64(buf[8:16], uint64(dirent.NextOff)) + hostarch.ByteOrder.PutUint16(buf[16:18], uint16(size)) copy(buf[18:], dirent.Name) // Zero out all remaining bytes in buf, including the NUL terminator // after dirent.Name and the zero padding byte between the name and @@ -155,7 +156,7 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error { // cb.remaining. return err } - cb.addr += usermem.Addr(n) + cb.addr += hostarch.Addr(n) cb.remaining -= n return nil } diff --git a/pkg/sentry/syscalls/linux/vfs2/mmap.go b/pkg/sentry/syscalls/linux/vfs2/mmap.go index 9d9dbf775..c961545f6 100644 --- a/pkg/sentry/syscalls/linux/vfs2/mmap.go +++ b/pkg/sentry/syscalls/linux/vfs2/mmap.go @@ -21,7 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // Mmap implements Linux syscall mmap(2). @@ -48,12 +49,12 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC Unmap: fixed, Map32Bit: map32bit, Private: private, - Perms: usermem.AccessType{ + Perms: hostarch.AccessType{ Read: linux.PROT_READ&prot != 0, Write: linux.PROT_WRITE&prot != 0, Execute: linux.PROT_EXEC&prot != 0, }, - MaxPerms: usermem.AnyAccess, + MaxPerms: hostarch.AnyAccess, GrowsDown: linux.MAP_GROWSDOWN&flags != 0, Precommit: linux.MAP_POPULATE&flags != 0, } diff --git a/pkg/sentry/syscalls/linux/vfs2/mount.go b/pkg/sentry/syscalls/linux/vfs2/mount.go index 769c9b92f..dd93430e2 100644 --- a/pkg/sentry/syscalls/linux/vfs2/mount.go +++ b/pkg/sentry/syscalls/linux/vfs2/mount.go @@ -20,7 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // Mount implements Linux syscall mount(2). @@ -33,11 +34,11 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // For null-terminated strings related to mount(2), Linux copies in at most // a page worth of data. See fs/namespace.c:copy_mount_string(). - fsType, err := t.CopyInString(typeAddr, usermem.PageSize) + fsType, err := t.CopyInString(typeAddr, hostarch.PageSize) if err != nil { return 0, nil, err } - source, err := t.CopyInString(sourceAddr, usermem.PageSize) + source, err := t.CopyInString(sourceAddr, hostarch.PageSize) if err != nil { return 0, nil, err } @@ -53,7 +54,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // character placement, and the address is passed to each file system. // Most file systems always treat this data as a string, though, and so // do all of the ones we implement. - data, err = t.CopyInString(dataAddr, usermem.PageSize) + data, err = t.CopyInString(dataAddr, hostarch.PageSize) if err != nil { return 0, nil, err } diff --git a/pkg/sentry/syscalls/linux/vfs2/path.go b/pkg/sentry/syscalls/linux/vfs2/path.go index 90a511d9a..2aaf1ed74 100644 --- a/pkg/sentry/syscalls/linux/vfs2/path.go +++ b/pkg/sentry/syscalls/linux/vfs2/path.go @@ -20,10 +20,11 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) -func copyInPath(t *kernel.Task, addr usermem.Addr) (fspath.Path, error) { +func copyInPath(t *kernel.Task, addr hostarch.Addr) (fspath.Path, error) { pathname, err := t.CopyInString(addr, linux.PATH_MAX) if err != nil { return fspath.Path{}, err diff --git a/pkg/sentry/syscalls/linux/vfs2/pipe.go b/pkg/sentry/syscalls/linux/vfs2/pipe.go index 6986e39fe..c6fc1954c 100644 --- a/pkg/sentry/syscalls/linux/vfs2/pipe.go +++ b/pkg/sentry/syscalls/linux/vfs2/pipe.go @@ -22,7 +22,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // Pipe implements Linux syscall pipe(2). @@ -38,7 +39,7 @@ func Pipe2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, pipe2(t, addr, flags) } -func pipe2(t *kernel.Task, addr usermem.Addr, flags int32) error { +func pipe2(t *kernel.Task, addr hostarch.Addr, flags int32) error { if flags&^(linux.O_NONBLOCK|linux.O_CLOEXEC) != 0 { return syserror.EINVAL } diff --git a/pkg/sentry/syscalls/linux/vfs2/poll.go b/pkg/sentry/syscalls/linux/vfs2/poll.go index c22e4ce54..a69c80edd 100644 --- a/pkg/sentry/syscalls/linux/vfs2/poll.go +++ b/pkg/sentry/syscalls/linux/vfs2/poll.go @@ -25,8 +25,9 @@ import ( "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" + + "gvisor.dev/gvisor/pkg/hostarch" ) // fileCap is the maximum allowable files for poll & select. This has no @@ -158,7 +159,7 @@ func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time. } // copyInPollFDs copies an array of struct pollfd unless nfds exceeds the max. -func copyInPollFDs(t *kernel.Task, addr usermem.Addr, nfds uint) ([]linux.PollFD, error) { +func copyInPollFDs(t *kernel.Task, addr hostarch.Addr, nfds uint) ([]linux.PollFD, error) { if uint64(nfds) > t.ThreadGroup().Limits().GetCapped(limits.NumberOfFiles, fileCap) { return nil, syserror.EINVAL } @@ -173,7 +174,7 @@ func copyInPollFDs(t *kernel.Task, addr usermem.Addr, nfds uint) ([]linux.PollFD return pfd, nil } -func doPoll(t *kernel.Task, addr usermem.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) { +func doPoll(t *kernel.Task, addr hostarch.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) { pfd, err := copyInPollFDs(t, addr, nfds) if err != nil { return timeout, 0, err @@ -201,7 +202,7 @@ func doPoll(t *kernel.Task, addr usermem.Addr, nfds uint, timeout time.Duration) } // CopyInFDSet copies an fd set from select(2)/pselect(2). -func CopyInFDSet(t *kernel.Task, addr usermem.Addr, nBytes, nBitsInLastPartialByte int) ([]byte, error) { +func CopyInFDSet(t *kernel.Task, addr hostarch.Addr, nBytes, nBitsInLastPartialByte int) ([]byte, error) { set := make([]byte, nBytes) if addr != 0 { @@ -218,7 +219,7 @@ func CopyInFDSet(t *kernel.Task, addr usermem.Addr, nBytes, nBitsInLastPartialBy return set, nil } -func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Addr, timeout time.Duration) (uintptr, error) { +func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs hostarch.Addr, timeout time.Duration) (uintptr, error) { if nfds < 0 || nfds > fileCap { return 0, syserror.EINVAL } @@ -368,7 +369,7 @@ func timeoutRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration) // copyOutTimespecRemaining copies the time remaining in timeout to timespecAddr. // // startNs must be from CLOCK_MONOTONIC. -func copyOutTimespecRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timespecAddr usermem.Addr) error { +func copyOutTimespecRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timespecAddr hostarch.Addr) error { if timeout <= 0 { return nil } @@ -381,7 +382,7 @@ func copyOutTimespecRemaining(t *kernel.Task, startNs ktime.Time, timeout time.D // copyOutTimevalRemaining copies the time remaining in timeout to timevalAddr. // // startNs must be from CLOCK_MONOTONIC. -func copyOutTimevalRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timevalAddr usermem.Addr) error { +func copyOutTimevalRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timevalAddr hostarch.Addr) error { if timeout <= 0 { return nil } @@ -396,7 +397,7 @@ func copyOutTimevalRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Du // // +stateify savable type pollRestartBlock struct { - pfdAddr usermem.Addr + pfdAddr hostarch.Addr nfds uint timeout time.Duration } @@ -406,7 +407,7 @@ func (p *pollRestartBlock) Restart(t *kernel.Task) (uintptr, error) { return poll(t, p.pfdAddr, p.nfds, p.timeout) } -func poll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Duration) (uintptr, error) { +func poll(t *kernel.Task, pfdAddr hostarch.Addr, nfds uint, timeout time.Duration) (uintptr, error) { remainingTimeout, n, err := doPoll(t, pfdAddr, nfds, timeout) // On an interrupt poll(2) is restarted with the remaining timeout. if err == syserror.EINTR { @@ -530,7 +531,7 @@ func Pselect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if _, err := maskStruct.CopyIn(t, maskWithSizeAddr); err != nil { return 0, nil, err } - if err := setTempSignalSet(t, usermem.Addr(maskStruct.sigsetAddr), uint(maskStruct.sizeofSigset)); err != nil { + if err := setTempSignalSet(t, hostarch.Addr(maskStruct.sigsetAddr), uint(maskStruct.sizeofSigset)); err != nil { return 0, nil, err } } @@ -551,7 +552,7 @@ func Pselect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // returned value is the maximum that Duration will allow. // // If timespecAddr is NULL, the returned value is negative. -func copyTimespecInToDuration(t *kernel.Task, timespecAddr usermem.Addr) (time.Duration, error) { +func copyTimespecInToDuration(t *kernel.Task, timespecAddr hostarch.Addr) (time.Duration, error) { // Use a negative Duration to indicate "no timeout". timeout := time.Duration(-1) if timespecAddr != 0 { @@ -567,7 +568,7 @@ func copyTimespecInToDuration(t *kernel.Task, timespecAddr usermem.Addr) (time.D return timeout, nil } -func setTempSignalSet(t *kernel.Task, maskAddr usermem.Addr, maskSize uint) error { +func setTempSignalSet(t *kernel.Task, maskAddr hostarch.Addr, maskSize uint) error { if maskAddr == 0 { return nil } diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go index 903169dc2..c6330c21a 100644 --- a/pkg/sentry/syscalls/linux/vfs2/setstat.go +++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go @@ -23,7 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) const chmodMask = 0777 | linux.S_ISUID | linux.S_ISGID | linux.S_ISVTX @@ -43,7 +44,7 @@ func Fchmodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, fchmodat(t, dirfd, pathAddr, mode) } -func fchmodat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr, mode uint) error { +func fchmodat(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr, mode uint) error { path, err := copyInPath(t, pathAddr) if err != nil { return err @@ -102,7 +103,7 @@ func Fchownat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, fchownat(t, dirfd, pathAddr, owner, group, flags) } -func fchownat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr, owner, group, flags int32) error { +func fchownat(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr, owner, group, flags int32) error { if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 { return syserror.EINVAL } @@ -327,7 +328,7 @@ func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, setstatat(t, dirfd, path, shouldAllowEmptyPath, followFinalSymlink, &opts) } -func populateSetStatOptionsForUtimes(t *kernel.Task, timesAddr usermem.Addr, opts *vfs.SetStatOptions) error { +func populateSetStatOptionsForUtimes(t *kernel.Task, timesAddr hostarch.Addr, opts *vfs.SetStatOptions) error { if timesAddr == 0 { opts.Stat.Mask = linux.STATX_ATIME | linux.STATX_MTIME opts.Stat.Atime.Nsec = linux.UTIME_NOW @@ -391,7 +392,7 @@ func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, setstatat(t, dirfd, path, shouldAllowEmptyPath, shouldFollowFinalSymlink(flags&linux.AT_SYMLINK_NOFOLLOW == 0), &opts) } -func populateSetStatOptionsForUtimens(t *kernel.Task, timesAddr usermem.Addr, opts *vfs.SetStatOptions) error { +func populateSetStatOptionsForUtimens(t *kernel.Task, timesAddr hostarch.Addr, opts *vfs.SetStatOptions) error { if timesAddr == 0 { opts.Stat.Mask = linux.STATX_ATIME | linux.STATX_MTIME opts.Stat.Atime.Nsec = linux.UTIME_NOW diff --git a/pkg/sentry/syscalls/linux/vfs2/signal.go b/pkg/sentry/syscalls/linux/vfs2/signal.go index b89f34cdb..6163da103 100644 --- a/pkg/sentry/syscalls/linux/vfs2/signal.go +++ b/pkg/sentry/syscalls/linux/vfs2/signal.go @@ -21,11 +21,12 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // sharedSignalfd is shared between the two calls. -func sharedSignalfd(t *kernel.Task, fd int32, sigset usermem.Addr, sigsetsize uint, flags int32) (uintptr, *kernel.SyscallControl, error) { +func sharedSignalfd(t *kernel.Task, fd int32, sigset hostarch.Addr, sigsetsize uint, flags int32) (uintptr, *kernel.SyscallControl, error) { // Copy in the signal mask. mask, err := slinux.CopyInSigSet(t, sigset, sigsetsize) if err != nil { diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go index 346fd1cea..7cc0be892 100644 --- a/pkg/sentry/syscalls/linux/vfs2/socket.go +++ b/pkg/sentry/syscalls/linux/vfs2/socket.go @@ -31,13 +31,9 @@ import ( "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" -) - -// minListenBacklog is the minimum reasonable backlog for listening sockets. -const minListenBacklog = 8 -// maxListenBacklog is the maximum allowed backlog for listening sockets. -const maxListenBacklog = 1024 + "gvisor.dev/gvisor/pkg/hostarch" +) // maxAddrLen is the maximum socket address length we're willing to accept. const maxAddrLen = 200 @@ -116,7 +112,7 @@ type multipleMessageHeader64 struct { // CaptureAddress allocates memory for and copies a socket address structure // from the untrusted address space range. -func CaptureAddress(t *kernel.Task, addr usermem.Addr, addrlen uint32) ([]byte, error) { +func CaptureAddress(t *kernel.Task, addr hostarch.Addr, addrlen uint32) ([]byte, error) { if addrlen > maxAddrLen { return nil, syserror.EINVAL } @@ -132,7 +128,7 @@ func CaptureAddress(t *kernel.Task, addr usermem.Addr, addrlen uint32) ([]byte, // writeAddress writes a sockaddr structure and its length to an output buffer // in the unstrusted address space range. If the address is bigger than the // buffer, it is truncated. -func writeAddress(t *kernel.Task, addr linux.SockAddr, addrLen uint32, addrPtr usermem.Addr, addrLenPtr usermem.Addr) error { +func writeAddress(t *kernel.Task, addr linux.SockAddr, addrLen uint32, addrPtr hostarch.Addr, addrLenPtr hostarch.Addr) error { // Get the buffer length. var bufLen uint32 if _, err := primitive.CopyUint32In(t, addrLenPtr, &bufLen); err != nil { @@ -279,7 +275,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // accept is the implementation of the accept syscall. It is called by accept // and accept4 syscall handlers. -func accept(t *kernel.Task, fd int32, addr usermem.Addr, addrLen usermem.Addr, flags int) (uintptr, error) { +func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr, flags int) (uintptr, error) { // Check that no unsupported flags are passed in. if flags & ^(linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { return 0, syserror.EINVAL @@ -384,14 +380,6 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, syserror.ENOTSOCK } - // Per Linux, the backlog is silently capped to reasonable values. - if backlog <= 0 { - backlog = minListenBacklog - } - if backlog > maxListenBacklog { - backlog = maxListenBacklog - } - return 0, nil, s.Listen(t, int(backlog)).ToError() } @@ -475,7 +463,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // getSockOpt tries to handle common socket options, or dispatches to a specific // socket implementation. -func getSockOpt(t *kernel.Task, s socket.SocketVFS2, level, name int, optValAddr usermem.Addr, len int) (marshal.Marshallable, *syserr.Error) { +func getSockOpt(t *kernel.Task, s socket.SocketVFS2, level, name int, optValAddr hostarch.Addr, len int) (marshal.Marshallable, *syserr.Error) { if level == linux.SOL_SOCKET { switch name { case linux.SO_TYPE, linux.SO_DOMAIN, linux.SO_PROTOCOL: @@ -738,7 +726,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return uintptr(count), nil, nil } -func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, flags int32, haveDeadline bool, deadline ktime.Time) (uintptr, error) { +func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr hostarch.Addr, flags int32, haveDeadline bool, deadline ktime.Time) (uintptr, error) { // Capture the message header and io vectors. var msg MessageHeader64 if _, err := msg.CopyIn(t, msgPtr); err != nil { @@ -748,7 +736,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla if msg.IovLen > linux.UIO_MAXIOV { return 0, syserror.EMSGSIZE } - dst, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ + dst, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ AddressSpaceActive: true, }) if err != nil { @@ -799,7 +787,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla // Copy the address to the caller. if msg.NameLen != 0 { - if err := writeAddress(t, sender, senderLen, usermem.Addr(msg.Name), usermem.Addr(msgPtr+nameLenOffset)); err != nil { + if err := writeAddress(t, sender, senderLen, hostarch.Addr(msg.Name), hostarch.Addr(msgPtr+nameLenOffset)); err != nil { return 0, err } } @@ -809,7 +797,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla return 0, err } if len(controlData) > 0 { - if _, err := t.CopyOutBytes(usermem.Addr(msg.Control), controlData); err != nil { + if _, err := t.CopyOutBytes(hostarch.Addr(msg.Control), controlData); err != nil { return 0, err } } @@ -824,7 +812,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla // recvFrom is the implementation of the recvfrom syscall. It is called by // recvfrom and recv syscall handlers. -func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLenPtr usermem.Addr) (uintptr, error) { +func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLenPtr hostarch.Addr) (uintptr, error) { if int(bufLen) < 0 { return 0, syserror.EINVAL } @@ -1000,7 +988,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return uintptr(count), nil, nil } -func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescription, msgPtr usermem.Addr, flags int32) (uintptr, error) { +func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescription, msgPtr hostarch.Addr, flags int32) (uintptr, error) { // Capture the message header. var msg MessageHeader64 if _, err := msg.CopyIn(t, msgPtr); err != nil { @@ -1014,7 +1002,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio return 0, syserror.ENOBUFS } controlData = make([]byte, msg.ControlLen) - if _, err := t.CopyInBytes(usermem.Addr(msg.Control), controlData); err != nil { + if _, err := t.CopyInBytes(hostarch.Addr(msg.Control), controlData); err != nil { return 0, err } } @@ -1023,7 +1011,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio var to []byte if msg.NameLen != 0 { var err error - to, err = CaptureAddress(t, usermem.Addr(msg.Name), msg.NameLen) + to, err = CaptureAddress(t, hostarch.Addr(msg.Name), msg.NameLen) if err != nil { return 0, err } @@ -1033,7 +1021,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio if msg.IovLen > linux.UIO_MAXIOV { return 0, syserror.EMSGSIZE } - src, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ + src, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ AddressSpaceActive: true, }) if err != nil { @@ -1067,7 +1055,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio // sendTo is the implementation of the sendto syscall. It is called by sendto // and send syscall handlers. -func sendTo(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLen uint32) (uintptr, error) { +func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLen uint32) (uintptr, error) { bl := int(bufLen) if bl < 0 { return 0, syserror.EINVAL diff --git a/pkg/sentry/syscalls/linux/vfs2/stat.go b/pkg/sentry/syscalls/linux/vfs2/stat.go index 0f5d5189c..69e77fa99 100644 --- a/pkg/sentry/syscalls/linux/vfs2/stat.go +++ b/pkg/sentry/syscalls/linux/vfs2/stat.go @@ -24,7 +24,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // Stat implements Linux syscall stat(2). @@ -50,7 +51,7 @@ func Newfstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return 0, nil, fstatat(t, dirfd, pathAddr, statAddr, flags) } -func fstatat(t *kernel.Task, dirfd int32, pathAddr, statAddr usermem.Addr, flags int32) error { +func fstatat(t *kernel.Task, dirfd int32, pathAddr, statAddr hostarch.Addr, flags int32) error { if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 { return syserror.EINVAL } @@ -264,7 +265,7 @@ func Faccessat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, accessAt(t, dirfd, addr, mode) } -func accessAt(t *kernel.Task, dirfd int32, pathAddr usermem.Addr, mode uint) error { +func accessAt(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr, mode uint) error { const rOK = 4 const wOK = 2 const xOK = 1 @@ -312,7 +313,7 @@ func Readlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return readlinkat(t, dirfd, pathAddr, bufAddr, size) } -func readlinkat(t *kernel.Task, dirfd int32, pathAddr, bufAddr usermem.Addr, size uint) (uintptr, *kernel.SyscallControl, error) { +func readlinkat(t *kernel.Task, dirfd int32, pathAddr, bufAddr hostarch.Addr, size uint) (uintptr, *kernel.SyscallControl, error) { if int(size) <= 0 { return 0, nil, syserror.EINVAL } diff --git a/pkg/sentry/syscalls/linux/vfs2/xattr.go b/pkg/sentry/syscalls/linux/vfs2/xattr.go index e05723ef9..c261050c6 100644 --- a/pkg/sentry/syscalls/linux/vfs2/xattr.go +++ b/pkg/sentry/syscalls/linux/vfs2/xattr.go @@ -23,7 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" + + "gvisor.dev/gvisor/pkg/hostarch" ) // ListXattr implements Linux syscall listxattr(2). @@ -291,7 +292,7 @@ func Fremovexattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. return 0, nil, file.RemoveXattr(t, name) } -func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) { +func copyInXattrName(t *kernel.Task, nameAddr hostarch.Addr) (string, error) { name, err := t.CopyInString(nameAddr, linux.XATTR_NAME_MAX+1) if err != nil { if err == syserror.ENAMETOOLONG { @@ -305,7 +306,7 @@ func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) { return name, nil } -func copyOutXattrNameList(t *kernel.Task, listAddr usermem.Addr, size uint, names []string) (int, error) { +func copyOutXattrNameList(t *kernel.Task, listAddr hostarch.Addr, size uint, names []string) (int, error) { if size > linux.XATTR_LIST_MAX { size = linux.XATTR_LIST_MAX } @@ -327,7 +328,7 @@ func copyOutXattrNameList(t *kernel.Task, listAddr usermem.Addr, size uint, name return t.CopyOutBytes(listAddr, buf.Bytes()) } -func copyInXattrValue(t *kernel.Task, valueAddr usermem.Addr, size uint) (string, error) { +func copyInXattrValue(t *kernel.Task, valueAddr hostarch.Addr, size uint) (string, error) { if size > linux.XATTR_SIZE_MAX { return "", syserror.E2BIG } @@ -338,7 +339,7 @@ func copyInXattrValue(t *kernel.Task, valueAddr usermem.Addr, size uint) (string return gohacks.StringFromImmutableBytes(buf), nil } -func copyOutXattrValue(t *kernel.Task, valueAddr usermem.Addr, size uint, value string) (int, error) { +func copyOutXattrValue(t *kernel.Task, valueAddr hostarch.Addr, size uint, value string) (int, error) { if size > linux.XATTR_SIZE_MAX { size = linux.XATTR_SIZE_MAX } diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index df4990854..ac60fe8bf 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -99,6 +99,7 @@ go_library( "//pkg/fdnotifier", "//pkg/fspath", "//pkg/gohacks", + "//pkg/hostarch", "//pkg/log", "//pkg/refs", "//pkg/refsvfs2", diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index 3caf417ca..f48817132 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -20,10 +20,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // NewAnonVirtualDentry returns a VirtualDentry with the given synthetic name, @@ -43,7 +43,7 @@ func (vfs *VirtualFilesystem) NewAnonVirtualDentry(name string) VirtualDentry { } const ( - anonfsBlockSize = usermem.PageSize // via fs/libfs.c:pseudo_fs_fill_super() + anonfsBlockSize = hostarch.PageSize // via fs/libfs.c:pseudo_fs_fill_super() // Mode, UID, and GID for a generic anonfs file. anonFileMode = 0600 // no type is correct diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index 1556b41a3..b87d9690a 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -252,6 +252,9 @@ type WritableDynamicBytesSource interface { // are backed by a bytes.Buffer that is regenerated when necessary, consistent // with Linux's fs/seq_file.c:single_open(). // +// If data additionally implements WritableDynamicBytesSource, writes are +// dispatched to the implementer. The source data is not automatically modified. +// // DynamicBytesFileDescriptionImpl.SetDataSource() must be called before first // use. // diff --git a/pkg/sentry/vfs/filesystem_impl_util.go b/pkg/sentry/vfs/filesystem_impl_util.go index 2620cf975..15b234d61 100644 --- a/pkg/sentry/vfs/filesystem_impl_util.go +++ b/pkg/sentry/vfs/filesystem_impl_util.go @@ -18,7 +18,7 @@ import ( "strings" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // GenericParseMountOptions parses a comma-separated list of options of the @@ -50,7 +50,7 @@ func GenericParseMountOptions(str string) map[string]string { func GenericStatFS(fsMagic uint64) linux.Statfs { return linux.Statfs{ Type: fsMagic, - BlockSize: usermem.PageSize, + BlockSize: hostarch.PageSize, NameLength: linux.NAME_MAX, } } diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go index 32fa01578..49d29e20b 100644 --- a/pkg/sentry/vfs/inotify.go +++ b/pkg/sentry/vfs/inotify.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sync" @@ -256,7 +257,7 @@ func (i *Inotify) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallAr n += uint32(e.sizeOf()) } var buf [4]byte - usermem.ByteOrder.PutUint32(buf[:], n) + hostarch.ByteOrder.PutUint32(buf[:], n) _, err := uio.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{}) return 0, err @@ -683,10 +684,10 @@ func (e *Event) sizeOf() int { // construct the output. We use a buffer allocated ahead of time for // performance. buf must be at least inotifyEventBaseSize bytes. func (e *Event) CopyTo(ctx context.Context, buf []byte, dst usermem.IOSequence) (int64, error) { - usermem.ByteOrder.PutUint32(buf[0:], uint32(e.wd)) - usermem.ByteOrder.PutUint32(buf[4:], e.mask) - usermem.ByteOrder.PutUint32(buf[8:], e.cookie) - usermem.ByteOrder.PutUint32(buf[12:], e.len) + hostarch.ByteOrder.PutUint32(buf[0:], uint32(e.wd)) + hostarch.ByteOrder.PutUint32(buf[4:], e.mask) + hostarch.ByteOrder.PutUint32(buf[8:], e.cookie) + hostarch.ByteOrder.PutUint32(buf[12:], e.len) writeLen := 0 diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index 922f9e697..7cdab6945 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -970,17 +970,22 @@ func superBlockOpts(mountPath string, mnt *Mount) string { opts += "," + mopts } - // NOTE(b/147673608): If the mount is a cgroup, we also need to include - // the cgroup name in the options. For now we just read that from the - // path. + // NOTE(b/147673608): If the mount is a ramdisk-based fake cgroupfs, we also + // need to include the cgroup name in the options. For now we just read that + // from the path. Note that this is only possible when "cgroup" isn't + // registered as a valid filesystem type. // - // TODO(gvisor.dev/issue/190): Once gVisor has full cgroup support, we - // should get this value from the cgroup itself, and not rely on the - // path. + // TODO(gvisor.dev/issue/190): Once we removed fake cgroupfs support, we + // should remove this. + if cgroupfs := mnt.vfs.getFilesystemType("cgroup"); cgroupfs != nil && cgroupfs.opts.AllowUserMount { + // Real cgroupfs available. + return opts + } if mnt.fs.FilesystemType().Name() == "cgroup" { splitPath := strings.Split(mountPath, "/") cgroupType := splitPath[len(splitPath)-1] opts += "," + cgroupType } + return opts } diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go index f588311e0..85bd164cd 100644 --- a/pkg/tcpip/header/ipv4.go +++ b/pkg/tcpip/header/ipv4.go @@ -178,6 +178,26 @@ const ( IPv4FlagDontFragment ) +// ipv4LinkLocalUnicastSubnet is the IPv4 link local unicast subnet as defined +// by RFC 3927 section 1. +var ipv4LinkLocalUnicastSubnet = func() tcpip.Subnet { + subnet, err := tcpip.NewSubnet("\xa9\xfe\x00\x00", tcpip.AddressMask("\xff\xff\x00\x00")) + if err != nil { + panic(err) + } + return subnet +}() + +// ipv4LinkLocalMulticastSubnet is the IPv4 link local multicast subnet as +// defined by RFC 5771 section 4. +var ipv4LinkLocalMulticastSubnet = func() tcpip.Subnet { + subnet, err := tcpip.NewSubnet("\xe0\x00\x00\x00", tcpip.AddressMask("\xff\xff\xff\x00")) + if err != nil { + panic(err) + } + return subnet +}() + // IPv4EmptySubnet is the empty IPv4 subnet. var IPv4EmptySubnet = func() tcpip.Subnet { subnet, err := tcpip.NewSubnet(IPv4Any, tcpip.AddressMask(IPv4Any)) @@ -423,6 +443,18 @@ func (b IPv4) IsValid(pktSize int) bool { return true } +// IsV4LinkLocalUnicastAddress determines if the provided address is an IPv4 +// link-local unicast address. +func IsV4LinkLocalUnicastAddress(addr tcpip.Address) bool { + return ipv4LinkLocalUnicastSubnet.Contains(addr) +} + +// IsV4LinkLocalMulticastAddress determines if the provided address is an IPv4 +// link-local multicast address. +func IsV4LinkLocalMulticastAddress(addr tcpip.Address) bool { + return ipv4LinkLocalMulticastSubnet.Contains(addr) +} + // IsV4MulticastAddress determines if the provided address is an IPv4 multicast // address (range 224.0.0.0 to 239.255.255.255). The four most significant bits // will be 1110 = 0xe0. diff --git a/pkg/tcpip/header/ipv4_test.go b/pkg/tcpip/header/ipv4_test.go index 6475cd694..c02fe898b 100644 --- a/pkg/tcpip/header/ipv4_test.go +++ b/pkg/tcpip/header/ipv4_test.go @@ -18,6 +18,7 @@ import ( "testing" "github.com/google/go-cmp/cmp" + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" ) @@ -177,3 +178,77 @@ func TestIPv4EncodeOptions(t *testing.T) { }) } } + +func TestIsV4LinkLocalUnicastAddress(t *testing.T) { + tests := []struct { + name string + addr tcpip.Address + expected bool + }{ + { + name: "Valid (lowest)", + addr: "\xa9\xfe\x00\x00", + expected: true, + }, + { + name: "Valid (highest)", + addr: "\xa9\xfe\xff\xff", + expected: true, + }, + { + name: "Invalid (before subnet)", + addr: "\xa9\xfd\xff\xff", + expected: false, + }, + { + name: "Invalid (after subnet)", + addr: "\xa9\xff\x00\x00", + expected: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if got := header.IsV4LinkLocalUnicastAddress(test.addr); got != test.expected { + t.Errorf("got header.IsV4LinkLocalUnicastAddress(%s) = %t, want = %t", test.addr, got, test.expected) + } + }) + } +} + +func TestIsV4LinkLocalMulticastAddress(t *testing.T) { + tests := []struct { + name string + addr tcpip.Address + expected bool + }{ + { + name: "Valid (lowest)", + addr: "\xe0\x00\x00\x00", + expected: true, + }, + { + name: "Valid (highest)", + addr: "\xe0\x00\x00\xff", + expected: true, + }, + { + name: "Invalid (before subnet)", + addr: "\xdf\xff\xff\xff", + expected: false, + }, + { + name: "Invalid (after subnet)", + addr: "\xe0\x00\x01\x00", + expected: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if got := header.IsV4LinkLocalMulticastAddress(test.addr); got != test.expected { + t.Errorf("got header.IsV4LinkLocalMulticastAddress(%s) = %t, want = %t", test.addr, got, test.expected) + } + }) + } +} diff --git a/pkg/tcpip/header/ipv6.go b/pkg/tcpip/header/ipv6.go index f2403978c..fa6ccff30 100644 --- a/pkg/tcpip/header/ipv6.go +++ b/pkg/tcpip/header/ipv6.go @@ -98,12 +98,27 @@ const ( // The address is ff02::1. IPv6AllNodesMulticastAddress tcpip.Address = "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01" - // IPv6AllRoutersMulticastAddress is a link-local multicast group that - // all IPv6 routers MUST join, as per RFC 4291, section 2.8. Packets + // IPv6AllRoutersInterfaceLocalMulticastAddress is an interface-local + // multicast group that all IPv6 routers MUST join, as per RFC 4291, section + // 2.8. Packets destined to this address will reach the router on an + // interface. + // + // The address is ff01::2. + IPv6AllRoutersInterfaceLocalMulticastAddress tcpip.Address = "\xff\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02" + + // IPv6AllRoutersLinkLocalMulticastAddress is a link-local multicast group + // that all IPv6 routers MUST join, as per RFC 4291, section 2.8. Packets // destined to this address will reach all routers on a link. // // The address is ff02::2. - IPv6AllRoutersMulticastAddress tcpip.Address = "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02" + IPv6AllRoutersLinkLocalMulticastAddress tcpip.Address = "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02" + + // IPv6AllRoutersSiteLocalMulticastAddress is a site-local multicast group + // that all IPv6 routers MUST join, as per RFC 4291, section 2.8. Packets + // destined to this address will reach all routers in a site. + // + // The address is ff05::2. + IPv6AllRoutersSiteLocalMulticastAddress tcpip.Address = "\xff\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02" // IPv6MinimumMTU is the minimum MTU required by IPv6, per RFC 8200, // section 5: @@ -142,11 +157,6 @@ const ( // ipv6MulticastAddressScopeMask is the mask for the scope (scop) field, // within the byte holding the field, as per RFC 4291 section 2.7. ipv6MulticastAddressScopeMask = 0xF - - // ipv6LinkLocalMulticastScope is the value of the scope (scop) field within - // a multicast IPv6 address that indicates the address has link-local scope, - // as per RFC 4291 section 2.7. - ipv6LinkLocalMulticastScope = 2 ) // IPv6EmptySubnet is the empty IPv6 subnet. It may also be known as the @@ -399,7 +409,7 @@ func IsV6LoopbackAddress(addr tcpip.Address) bool { // IsV6LinkLocalMulticastAddress determines if the provided address is an IPv6 // link-local multicast address. func IsV6LinkLocalMulticastAddress(addr tcpip.Address) bool { - return IsV6MulticastAddress(addr) && addr[ipv6MulticastAddressScopeByteIdx]&ipv6MulticastAddressScopeMask == ipv6LinkLocalMulticastScope + return IsV6MulticastAddress(addr) && V6MulticastScope(addr) == IPv6LinkLocalMulticastScope } // AppendOpaqueInterfaceIdentifier appends a 64 bit opaque interface identifier @@ -520,3 +530,45 @@ func GenerateTempIPv6SLAACAddr(tempIIDHistory []byte, stableAddr tcpip.Address) PrefixLen: IIDOffsetInIPv6Address * 8, } } + +// IPv6MulticastScope is the scope of a multicast IPv6 address. +type IPv6MulticastScope uint8 + +// The various values for IPv6 multicast scopes, as per RFC 7346 section 2: +// +// +------+--------------------------+-------------------------+ +// | scop | NAME | REFERENCE | +// +------+--------------------------+-------------------------+ +// | 0 | Reserved | [RFC4291], RFC 7346 | +// | 1 | Interface-Local scope | [RFC4291], RFC 7346 | +// | 2 | Link-Local scope | [RFC4291], RFC 7346 | +// | 3 | Realm-Local scope | [RFC4291], RFC 7346 | +// | 4 | Admin-Local scope | [RFC4291], RFC 7346 | +// | 5 | Site-Local scope | [RFC4291], RFC 7346 | +// | 6 | Unassigned | | +// | 7 | Unassigned | | +// | 8 | Organization-Local scope | [RFC4291], RFC 7346 | +// | 9 | Unassigned | | +// | A | Unassigned | | +// | B | Unassigned | | +// | C | Unassigned | | +// | D | Unassigned | | +// | E | Global scope | [RFC4291], RFC 7346 | +// | F | Reserved | [RFC4291], RFC 7346 | +// +------+--------------------------+-------------------------+ +const ( + IPv6Reserved0MulticastScope = IPv6MulticastScope(0x0) + IPv6InterfaceLocalMulticastScope = IPv6MulticastScope(0x1) + IPv6LinkLocalMulticastScope = IPv6MulticastScope(0x2) + IPv6RealmLocalMulticastScope = IPv6MulticastScope(0x3) + IPv6AdminLocalMulticastScope = IPv6MulticastScope(0x4) + IPv6SiteLocalMulticastScope = IPv6MulticastScope(0x5) + IPv6OrganizationLocalMulticastScope = IPv6MulticastScope(0x8) + IPv6GlobalMulticastScope = IPv6MulticastScope(0xE) + IPv6ReservedFMulticastScope = IPv6MulticastScope(0xF) +) + +// V6MulticastScope returns the scope of a multicast address. +func V6MulticastScope(addr tcpip.Address) IPv6MulticastScope { + return IPv6MulticastScope(addr[ipv6MulticastAddressScopeByteIdx] & ipv6MulticastAddressScopeMask) +} diff --git a/pkg/tcpip/header/ipv6_test.go b/pkg/tcpip/header/ipv6_test.go index f10f446a6..38b6dbc18 100644 --- a/pkg/tcpip/header/ipv6_test.go +++ b/pkg/tcpip/header/ipv6_test.go @@ -373,3 +373,83 @@ func TestSolicitedNodeAddr(t *testing.T) { }) } } + +func TestV6MulticastScope(t *testing.T) { + tests := []struct { + addr tcpip.Address + want header.IPv6MulticastScope + }{ + { + addr: "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6Reserved0MulticastScope, + }, + { + addr: "\xff\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6InterfaceLocalMulticastScope, + }, + { + addr: "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6LinkLocalMulticastScope, + }, + { + addr: "\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6RealmLocalMulticastScope, + }, + { + addr: "\xff\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6AdminLocalMulticastScope, + }, + { + addr: "\xff\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6SiteLocalMulticastScope, + }, + { + addr: "\xff\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6MulticastScope(6), + }, + { + addr: "\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6MulticastScope(7), + }, + { + addr: "\xff\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6OrganizationLocalMulticastScope, + }, + { + addr: "\xff\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6MulticastScope(9), + }, + { + addr: "\xff\x0a\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6MulticastScope(10), + }, + { + addr: "\xff\x0b\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6MulticastScope(11), + }, + { + addr: "\xff\x0c\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6MulticastScope(12), + }, + { + addr: "\xff\x0d\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6MulticastScope(13), + }, + { + addr: "\xff\x0e\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6GlobalMulticastScope, + }, + { + addr: "\xff\x0f\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", + want: header.IPv6ReservedFMulticastScope, + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("%s", test.addr), func(t *testing.T) { + if got := header.V6MulticastScope(test.addr); got != test.want { + t.Fatalf("got header.V6MulticastScope(%s) = %d, want = %d", test.addr, got, test.want) + } + }) + } +} diff --git a/pkg/tcpip/network/internal/ip/generic_multicast_protocol.go b/pkg/tcpip/network/internal/ip/generic_multicast_protocol.go index b9f129728..ac35d81e7 100644 --- a/pkg/tcpip/network/internal/ip/generic_multicast_protocol.go +++ b/pkg/tcpip/network/internal/ip/generic_multicast_protocol.go @@ -156,14 +156,6 @@ type GenericMulticastProtocolOptions struct { // // Unsolicited reports are transmitted when a group is newly joined. MaxUnsolicitedReportDelay time.Duration - - // AllNodesAddress is a multicast address that all nodes on a network should - // be a member of. - // - // This address will not have the generic multicast protocol performed on it; - // it will be left in the non member/listener state, and packets will never - // be sent for it. - AllNodesAddress tcpip.Address } // MulticastGroupProtocol is a multicast group protocol whose core state machine @@ -188,6 +180,10 @@ type MulticastGroupProtocol interface { // SendLeave sends a multicast leave for the specified group address. SendLeave(groupAddress tcpip.Address) tcpip.Error + + // ShouldPerformProtocol returns true iff the protocol should be performed for + // the specified group. + ShouldPerformProtocol(tcpip.Address) bool } // GenericMulticastProtocolState is the per interface generic multicast protocol @@ -455,20 +451,7 @@ func (g *GenericMulticastProtocolState) initializeNewMemberLocked(groupAddress t info.lastToSendReport = false - if groupAddress == g.opts.AllNodesAddress { - // As per RFC 2236 section 6 page 10 (for IGMPv2), - // - // The all-systems group (address 224.0.0.1) is handled as a special - // case. The host starts in Idle Member state for that group on every - // interface, never transitions to another state, and never sends a - // report for that group. - // - // As per RFC 2710 section 5 page 10 (for MLDv1), - // - // The link-scope all-nodes address (FF02::1) is handled as a special - // case. The node starts in Idle Listener state for that address on - // every interface, never transitions to another state, and never sends - // a Report or Done for that address. + if !g.opts.Protocol.ShouldPerformProtocol(groupAddress) { info.state = idleMember return } @@ -537,20 +520,7 @@ func (g *GenericMulticastProtocolState) maybeSendLeave(groupAddress tcpip.Addres return } - if groupAddress == g.opts.AllNodesAddress { - // As per RFC 2236 section 6 page 10 (for IGMPv2), - // - // The all-systems group (address 224.0.0.1) is handled as a special - // case. The host starts in Idle Member state for that group on every - // interface, never transitions to another state, and never sends a - // report for that group. - // - // As per RFC 2710 section 5 page 10 (for MLDv1), - // - // The link-scope all-nodes address (FF02::1) is handled as a special - // case. The node starts in Idle Listener state for that address on - // every interface, never transitions to another state, and never sends - // a Report or Done for that address. + if !g.opts.Protocol.ShouldPerformProtocol(groupAddress) { return } @@ -627,20 +597,7 @@ func (g *GenericMulticastProtocolState) setDelayTimerForAddressRLocked(groupAddr return } - if groupAddress == g.opts.AllNodesAddress { - // As per RFC 2236 section 6 page 10 (for IGMPv2), - // - // The all-systems group (address 224.0.0.1) is handled as a special - // case. The host starts in Idle Member state for that group on every - // interface, never transitions to another state, and never sends a - // report for that group. - // - // As per RFC 2710 section 5 page 10 (for MLDv1), - // - // The link-scope all-nodes address (FF02::1) is handled as a special - // case. The node starts in Idle Listener state for that address on - // every interface, never transitions to another state, and never sends - // a Report or Done for that address. + if !g.opts.Protocol.ShouldPerformProtocol(groupAddress) { return } diff --git a/pkg/tcpip/network/internal/ip/generic_multicast_protocol_test.go b/pkg/tcpip/network/internal/ip/generic_multicast_protocol_test.go index 381460c82..0b51563cd 100644 --- a/pkg/tcpip/network/internal/ip/generic_multicast_protocol_test.go +++ b/pkg/tcpip/network/internal/ip/generic_multicast_protocol_test.go @@ -43,6 +43,8 @@ type mockMulticastGroupProtocolProtectedFields struct { type mockMulticastGroupProtocol struct { t *testing.T + skipProtocolAddress tcpip.Address + mu mockMulticastGroupProtocolProtectedFields } @@ -165,6 +167,11 @@ func (m *mockMulticastGroupProtocol) SendLeave(groupAddress tcpip.Address) tcpip return nil } +// ShouldPerformProtocol implements ip.MulticastGroupProtocol. +func (m *mockMulticastGroupProtocol) ShouldPerformProtocol(groupAddress tcpip.Address) bool { + return groupAddress != m.skipProtocolAddress +} + func (m *mockMulticastGroupProtocol) check(sendReportGroupAddresses []tcpip.Address, sendLeaveGroupAddresses []tcpip.Address) string { m.mu.Lock() defer m.mu.Unlock() @@ -193,10 +200,11 @@ func (m *mockMulticastGroupProtocol) check(sendReportGroupAddresses []tcpip.Addr cmp.FilterPath( func(p cmp.Path) bool { switch p.Last().String() { - case ".RWMutex", ".t", ".makeQueuePackets", ".disabled", ".genericMulticastGroup": + case ".RWMutex", ".t", ".makeQueuePackets", ".disabled", ".genericMulticastGroup", ".skipProtocolAddress": return true + default: + return false } - return false }, cmp.Ignore(), ), @@ -225,14 +233,13 @@ func TestJoinGroup(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - mgp := mockMulticastGroupProtocol{t: t} + mgp := mockMulticastGroupProtocol{t: t, skipProtocolAddress: addr2} clock := faketime.NewManualClock() mgp.init(ip.GenericMulticastProtocolOptions{ Rand: rand.New(rand.NewSource(0)), Clock: clock, MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay, - AllNodesAddress: addr2, }) // Joining a group should send a report immediately and another after @@ -279,14 +286,13 @@ func TestLeaveGroup(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - mgp := mockMulticastGroupProtocol{t: t} + mgp := mockMulticastGroupProtocol{t: t, skipProtocolAddress: addr2} clock := faketime.NewManualClock() mgp.init(ip.GenericMulticastProtocolOptions{ Rand: rand.New(rand.NewSource(1)), Clock: clock, MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay, - AllNodesAddress: addr2, }) mgp.joinGroup(test.addr) @@ -356,14 +362,13 @@ func TestHandleReport(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - mgp := mockMulticastGroupProtocol{t: t} + mgp := mockMulticastGroupProtocol{t: t, skipProtocolAddress: addr3} clock := faketime.NewManualClock() mgp.init(ip.GenericMulticastProtocolOptions{ Rand: rand.New(rand.NewSource(2)), Clock: clock, MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay, - AllNodesAddress: addr3, }) mgp.joinGroup(addr1) @@ -446,14 +451,13 @@ func TestHandleQuery(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - mgp := mockMulticastGroupProtocol{t: t} + mgp := mockMulticastGroupProtocol{t: t, skipProtocolAddress: addr3} clock := faketime.NewManualClock() mgp.init(ip.GenericMulticastProtocolOptions{ Rand: rand.New(rand.NewSource(3)), Clock: clock, MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay, - AllNodesAddress: addr3, }) mgp.joinGroup(addr1) @@ -574,14 +578,13 @@ func TestJoinCount(t *testing.T) { } func TestMakeAllNonMemberAndInitialize(t *testing.T) { - mgp := mockMulticastGroupProtocol{t: t} + mgp := mockMulticastGroupProtocol{t: t, skipProtocolAddress: addr3} clock := faketime.NewManualClock() mgp.init(ip.GenericMulticastProtocolOptions{ Rand: rand.New(rand.NewSource(3)), Clock: clock, MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay, - AllNodesAddress: addr3, }) mgp.joinGroup(addr1) diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go index a4edc69c7..58fd18af8 100644 --- a/pkg/tcpip/network/ip_test.go +++ b/pkg/tcpip/network/ip_test.go @@ -15,6 +15,7 @@ package ip_test import ( + "fmt" "strings" "testing" @@ -1938,3 +1939,80 @@ func TestICMPInclusionSize(t *testing.T) { }) } } + +func TestJoinLeaveAllRoutersGroup(t *testing.T) { + const nicID = 1 + + tests := []struct { + name string + netProto tcpip.NetworkProtocolNumber + protoFactory stack.NetworkProtocolFactory + allRoutersAddr tcpip.Address + }{ + { + name: "IPv4", + netProto: ipv4.ProtocolNumber, + protoFactory: ipv4.NewProtocol, + allRoutersAddr: header.IPv4AllRoutersGroup, + }, + { + name: "IPv6 Interface Local", + netProto: ipv6.ProtocolNumber, + protoFactory: ipv6.NewProtocol, + allRoutersAddr: header.IPv6AllRoutersInterfaceLocalMulticastAddress, + }, + { + name: "IPv6 Link Local", + netProto: ipv6.ProtocolNumber, + protoFactory: ipv6.NewProtocol, + allRoutersAddr: header.IPv6AllRoutersLinkLocalMulticastAddress, + }, + { + name: "IPv6 Site Local", + netProto: ipv6.ProtocolNumber, + protoFactory: ipv6.NewProtocol, + allRoutersAddr: header.IPv6AllRoutersSiteLocalMulticastAddress, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + for _, nicDisabled := range [...]bool{true, false} { + t.Run(fmt.Sprintf("NIC Disabled = %t", nicDisabled), func(t *testing.T) { + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol}, + TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol, tcp.NewProtocol}, + }) + opts := stack.NICOptions{Disabled: nicDisabled} + if err := s.CreateNICWithOptions(nicID, channel.New(0, 0, ""), opts); err != nil { + t.Fatalf("CreateNICWithOptions(%d, _, %#v) = %s", nicID, opts, err) + } + + if got, err := s.IsInGroup(nicID, test.allRoutersAddr); err != nil { + t.Fatalf("s.IsInGroup(%d, %s): %s", nicID, test.allRoutersAddr, err) + } else if got { + t.Fatalf("got s.IsInGroup(%d, %s) = true, want = false", nicID, test.allRoutersAddr) + } + + if err := s.SetForwarding(test.netProto, true); err != nil { + t.Fatalf("s.SetForwarding(%d, true): %s", test.netProto, err) + } + if got, err := s.IsInGroup(nicID, test.allRoutersAddr); err != nil { + t.Fatalf("s.IsInGroup(%d, %s): %s", nicID, test.allRoutersAddr, err) + } else if !got { + t.Fatalf("got s.IsInGroup(%d, %s) = false, want = true", nicID, test.allRoutersAddr) + } + + if err := s.SetForwarding(test.netProto, false); err != nil { + t.Fatalf("s.SetForwarding(%d, false): %s", test.netProto, err) + } + if got, err := s.IsInGroup(nicID, test.allRoutersAddr); err != nil { + t.Fatalf("s.IsInGroup(%d, %s): %s", nicID, test.allRoutersAddr, err) + } else if got { + t.Fatalf("got s.IsInGroup(%d, %s) = true, want = false", nicID, test.allRoutersAddr) + } + }) + } + }) + } +} diff --git a/pkg/tcpip/network/ipv4/igmp.go b/pkg/tcpip/network/ipv4/igmp.go index f3fc1c87e..b1ac29294 100644 --- a/pkg/tcpip/network/ipv4/igmp.go +++ b/pkg/tcpip/network/ipv4/igmp.go @@ -126,6 +126,17 @@ func (igmp *igmpState) SendLeave(groupAddress tcpip.Address) tcpip.Error { return err } +// ShouldPerformProtocol implements ip.MulticastGroupProtocol. +func (igmp *igmpState) ShouldPerformProtocol(groupAddress tcpip.Address) bool { + // As per RFC 2236 section 6 page 10, + // + // The all-systems group (address 224.0.0.1) is handled as a special + // case. The host starts in Idle Member state for that group on every + // interface, never transitions to another state, and never sends a + // report for that group. + return groupAddress != header.IPv4AllSystems +} + // init sets up an igmpState struct, and is required to be called before using // a new igmpState. // @@ -137,7 +148,6 @@ func (igmp *igmpState) init(ep *endpoint) { Clock: ep.protocol.stack.Clock(), Protocol: igmp, MaxUnsolicitedReportDelay: UnsolicitedReportIntervalMax, - AllNodesAddress: header.IPv4AllSystems, }) igmp.igmpV1Present = igmpV1PresentDefault igmp.igmpV1Job = ep.protocol.stack.NewJob(&ep.mu, func() { diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index 1a5661ca4..2e44f8523 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -150,6 +150,38 @@ func (p *protocol) forgetEndpoint(nicID tcpip.NICID) { delete(p.mu.eps, nicID) } +// transitionForwarding transitions the endpoint's forwarding status to +// forwarding. +// +// Must only be called when the forwarding status changes. +func (e *endpoint) transitionForwarding(forwarding bool) { + e.mu.Lock() + defer e.mu.Unlock() + + if forwarding { + // There does not seem to be an RFC requirement for a node to join the all + // routers multicast address but + // https://www.iana.org/assignments/multicast-addresses/multicast-addresses.xhtml + // specifies the address as a group for all routers on a subnet so we join + // the group here. + if err := e.joinGroupLocked(header.IPv4AllRoutersGroup); err != nil { + // joinGroupLocked only returns an error if the group address is not a + // valid IPv4 multicast address. + panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err)) + } + + return + } + + switch err := e.leaveGroupLocked(header.IPv4AllRoutersGroup).(type) { + case nil: + case *tcpip.ErrBadLocalAddress: + // The endpoint may have already left the multicast group. + default: + panic(fmt.Sprintf("e.leaveGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err)) + } +} + // Enable implements stack.NetworkEndpoint. func (e *endpoint) Enable() tcpip.Error { e.mu.Lock() @@ -226,7 +258,7 @@ func (e *endpoint) disableLocked() { } // The endpoint may have already left the multicast group. - switch err := e.leaveGroupLocked(header.IPv4AllSystems); err.(type) { + switch err := e.leaveGroupLocked(header.IPv4AllSystems).(type) { case nil, *tcpip.ErrBadLocalAddress: default: panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv4AllSystems, err)) @@ -551,6 +583,22 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu // forwardPacket attempts to forward a packet to its final destination. func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) tcpip.Error { h := header.IPv4(pkt.NetworkHeader().View()) + + dstAddr := h.DestinationAddress() + if header.IsV4LinkLocalUnicastAddress(h.SourceAddress()) || header.IsV4LinkLocalUnicastAddress(dstAddr) || header.IsV4LinkLocalMulticastAddress(dstAddr) { + // As per RFC 3927 section 7, + // + // A router MUST NOT forward a packet with an IPv4 Link-Local source or + // destination address, irrespective of the router's default route + // configuration or routes obtained from dynamic routing protocols. + // + // A router which receives a packet with an IPv4 Link-Local source or + // destination address MUST NOT forward the packet. This prevents + // forwarding of packets back onto the network segment from which they + // originated, or to any other segment. + return nil + } + ttl := h.TTL() if ttl == 0 { // As per RFC 792 page 6, Time Exceeded Message, @@ -589,8 +637,6 @@ func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) tcpip.Error { } } - dstAddr := h.DestinationAddress() - // Check if the destination is owned by the stack. if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil { ep.handleValidatedPacket(h, pkt) @@ -1168,12 +1214,27 @@ func (p *protocol) Forwarding() bool { return uint8(atomic.LoadUint32(&p.forwarding)) == 1 } +// setForwarding sets the forwarding status for the protocol. +// +// Returns true if the forwarding status was updated. +func (p *protocol) setForwarding(v bool) bool { + if v { + return atomic.CompareAndSwapUint32(&p.forwarding, 0 /* old */, 1 /* new */) + } + return atomic.CompareAndSwapUint32(&p.forwarding, 1 /* old */, 0 /* new */) +} + // SetForwarding implements stack.ForwardingNetworkProtocol. func (p *protocol) SetForwarding(v bool) { - if v { - atomic.StoreUint32(&p.forwarding, 1) - } else { - atomic.StoreUint32(&p.forwarding, 0) + p.mu.Lock() + defer p.mu.Unlock() + + if !p.setForwarding(v) { + return + } + + for _, ep := range p.mu.eps { + ep.transitionForwarding(v) } } diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go index c6d9d8f0d..7ee7be0f9 100644 --- a/pkg/tcpip/network/ipv6/ipv6.go +++ b/pkg/tcpip/network/ipv6/ipv6.go @@ -410,22 +410,65 @@ func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address, holderLinkAddr t // // Must only be called when the forwarding status changes. func (e *endpoint) transitionForwarding(forwarding bool) { + allRoutersGroups := [...]tcpip.Address{ + header.IPv6AllRoutersInterfaceLocalMulticastAddress, + header.IPv6AllRoutersLinkLocalMulticastAddress, + header.IPv6AllRoutersSiteLocalMulticastAddress, + } + e.mu.Lock() defer e.mu.Unlock() - if !e.Enabled() { - return - } - if forwarding { // When transitioning into an IPv6 router, host-only state (NDP discovered // routers, discovered on-link prefixes, and auto-generated addresses) is // cleaned up/invalidated and NDP router solicitations are stopped. e.mu.ndp.stopSolicitingRouters() e.mu.ndp.cleanupState(true /* hostOnly */) - } else { - // When transitioning into an IPv6 host, NDP router solicitations are - // started. + + // As per RFC 4291 section 2.8: + // + // A router is required to recognize all addresses that a host is + // required to recognize, plus the following addresses as identifying + // itself: + // + // o The All-Routers multicast addresses defined in Section 2.7.1. + // + // As per RFC 4291 section 2.7.1, + // + // All Routers Addresses: FF01:0:0:0:0:0:0:2 + // FF02:0:0:0:0:0:0:2 + // FF05:0:0:0:0:0:0:2 + // + // The above multicast addresses identify the group of all IPv6 routers, + // within scope 1 (interface-local), 2 (link-local), or 5 (site-local). + for _, g := range allRoutersGroups { + if err := e.joinGroupLocked(g); err != nil { + // joinGroupLocked only returns an error if the group address is not a + // valid IPv6 multicast address. + panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", g, err)) + } + } + + return + } + + for _, g := range allRoutersGroups { + switch err := e.leaveGroupLocked(g).(type) { + case nil: + case *tcpip.ErrBadLocalAddress: + // The endpoint may have already left the multicast group. + default: + panic(fmt.Sprintf("e.leaveGroupLocked(%s): %s", g, err)) + } + } + + // When transitioning into an IPv6 host, NDP router solicitations are + // started if the endpoint is enabled. + // + // If the endpoint is not currently enabled, routers will be solicited when + // the endpoint becomes enabled (if it is still a host). + if e.Enabled() { e.mu.ndp.startSolicitingRouters() } } @@ -573,7 +616,7 @@ func (e *endpoint) disableLocked() { e.mu.ndp.cleanupState(false /* hostOnly */) // The endpoint may have already left the multicast group. - switch err := e.leaveGroupLocked(header.IPv6AllNodesMulticastAddress); err.(type) { + switch err := e.leaveGroupLocked(header.IPv6AllNodesMulticastAddress).(type) { case nil, *tcpip.ErrBadLocalAddress: default: panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv6AllNodesMulticastAddress, err)) @@ -869,6 +912,16 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu // forwardPacket attempts to forward a packet to its final destination. func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) tcpip.Error { h := header.IPv6(pkt.NetworkHeader().View()) + + dstAddr := h.DestinationAddress() + if header.IsV6LinkLocalAddress(h.SourceAddress()) || header.IsV6LinkLocalAddress(dstAddr) || header.IsV6LinkLocalMulticastAddress(dstAddr) { + // As per RFC 4291 section 2.5.6, + // + // Routers must not forward any packets with Link-Local source or + // destination addresses to other links. + return nil + } + hopLimit := h.HopLimit() if hopLimit <= 1 { // As per RFC 4443 section 3.3, @@ -881,8 +934,6 @@ func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) tcpip.Error { return e.protocol.returnError(&icmpReasonHopLimitExceeded{}, pkt) } - dstAddr := h.DestinationAddress() - // Check if the destination is owned by the stack. if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil { ep.handleValidatedPacket(h, pkt) @@ -1979,9 +2030,9 @@ func (p *protocol) Forwarding() bool { // Returns true if the forwarding status was updated. func (p *protocol) setForwarding(v bool) bool { if v { - return atomic.SwapUint32(&p.forwarding, 1) == 0 + return atomic.CompareAndSwapUint32(&p.forwarding, 0 /* old */, 1 /* new */) } - return atomic.SwapUint32(&p.forwarding, 0) == 1 + return atomic.CompareAndSwapUint32(&p.forwarding, 1 /* old */, 0 /* new */) } // SetForwarding implements stack.ForwardingNetworkProtocol. diff --git a/pkg/tcpip/network/ipv6/mld.go b/pkg/tcpip/network/ipv6/mld.go index dd153466d..165b7d2d2 100644 --- a/pkg/tcpip/network/ipv6/mld.go +++ b/pkg/tcpip/network/ipv6/mld.go @@ -76,10 +76,29 @@ func (mld *mldState) SendReport(groupAddress tcpip.Address) (bool, tcpip.Error) // // Precondition: mld.ep.mu must be read locked. func (mld *mldState) SendLeave(groupAddress tcpip.Address) tcpip.Error { - _, err := mld.writePacket(header.IPv6AllRoutersMulticastAddress, groupAddress, header.ICMPv6MulticastListenerDone) + _, err := mld.writePacket(header.IPv6AllRoutersLinkLocalMulticastAddress, groupAddress, header.ICMPv6MulticastListenerDone) return err } +// ShouldPerformProtocol implements ip.MulticastGroupProtocol. +func (mld *mldState) ShouldPerformProtocol(groupAddress tcpip.Address) bool { + // As per RFC 2710 section 5 page 10, + // + // The link-scope all-nodes address (FF02::1) is handled as a special + // case. The node starts in Idle Listener state for that address on + // every interface, never transitions to another state, and never sends + // a Report or Done for that address. + // + // MLD messages are never sent for multicast addresses whose scope is 0 + // (reserved) or 1 (node-local). + if groupAddress == header.IPv6AllNodesMulticastAddress { + return false + } + + scope := header.V6MulticastScope(groupAddress) + return scope != header.IPv6Reserved0MulticastScope && scope != header.IPv6InterfaceLocalMulticastScope +} + // init sets up an mldState struct, and is required to be called before using // a new mldState. // @@ -91,7 +110,6 @@ func (mld *mldState) init(ep *endpoint) { Clock: ep.protocol.stack.Clock(), Protocol: mld, MaxUnsolicitedReportDelay: UnsolicitedReportIntervalMax, - AllNodesAddress: header.IPv6AllNodesMulticastAddress, }) } diff --git a/pkg/tcpip/network/ipv6/mld_test.go b/pkg/tcpip/network/ipv6/mld_test.go index 85a8f9944..146b300f1 100644 --- a/pkg/tcpip/network/ipv6/mld_test.go +++ b/pkg/tcpip/network/ipv6/mld_test.go @@ -93,7 +93,7 @@ func TestIPv6JoinLeaveSolicitedNodeAddressPerformsMLD(t *testing.T) { if p, ok := e.Read(); !ok { t.Fatal("expected a done message to be sent") } else { - validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), header.IPv6Any, header.IPv6AllRoutersMulticastAddress, header.ICMPv6MulticastListenerDone, linkLocalAddrSNMC) + validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), header.IPv6Any, header.IPv6AllRoutersLinkLocalMulticastAddress, header.ICMPv6MulticastListenerDone, linkLocalAddrSNMC) } } @@ -464,3 +464,141 @@ func TestMLDPacketValidation(t *testing.T) { }) } } + +func TestMLDSkipProtocol(t *testing.T) { + const nicID = 1 + + tests := []struct { + name string + group tcpip.Address + expectReport bool + }{ + { + name: "Reserverd0", + group: "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: false, + }, + { + name: "Interface Local", + group: "\xff\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: false, + }, + { + name: "Link Local", + group: "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + { + name: "Realm Local", + group: "\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + { + name: "Admin Local", + group: "\xff\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + { + name: "Site Local", + group: "\xff\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + { + name: "Unassigned(6)", + group: "\xff\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + { + name: "Unassigned(7)", + group: "\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + { + name: "Organization Local", + group: "\xff\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + { + name: "Unassigned(9)", + group: "\xff\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + { + name: "Unassigned(A)", + group: "\xff\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + { + name: "Unassigned(B)", + group: "\xff\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + { + name: "Unassigned(C)", + group: "\xff\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + { + name: "Unassigned(D)", + group: "\xff\x0d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + { + name: "Global", + group: "\xff\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + { + name: "ReservedF", + group: "\xff\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11", + expectReport: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{ + MLD: ipv6.MLDOptions{ + Enabled: true, + }, + })}, + }) + e := channel.New(1, header.IPv6MinimumMTU, "") + if err := s.CreateNIC(nicID, e); err != nil { + t.Fatalf("CreateNIC(%d, _): %s", nicID, err) + } + if err := s.AddAddress(nicID, ipv6.ProtocolNumber, linkLocalAddr); err != nil { + t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ipv6.ProtocolNumber, linkLocalAddr, err) + } + if p, ok := e.Read(); !ok { + t.Fatal("expected a report message to be sent") + } else { + validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), linkLocalAddr, linkLocalAddrSNMC, header.ICMPv6MulticastListenerReport, linkLocalAddrSNMC) + } + + if err := s.JoinGroup(ipv6.ProtocolNumber, nicID, test.group); err != nil { + t.Fatalf("s.JoinGroup(%d, %d, %s): %s", ipv6.ProtocolNumber, nicID, test.group, err) + } + if isInGroup, err := s.IsInGroup(nicID, test.group); err != nil { + t.Fatalf("IsInGroup(%d, %s): %s", nicID, test.group, err) + } else if !isInGroup { + t.Fatalf("got IsInGroup(%d, %s) = false, want = true", nicID, test.group) + } + + if !test.expectReport { + if p, ok := e.Read(); ok { + t.Fatalf("got e.Read() = (%#v, true), want = (_, false)", p) + } + + return + } + + if p, ok := e.Read(); !ok { + t.Fatal("expected a report message to be sent") + } else { + validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), linkLocalAddr, test.group, header.ICMPv6MulticastListenerReport, test.group) + } + }) + } +} diff --git a/pkg/tcpip/network/ipv6/ndp.go b/pkg/tcpip/network/ipv6/ndp.go index 536493f87..dd7f6a126 100644 --- a/pkg/tcpip/network/ipv6/ndp.go +++ b/pkg/tcpip/network/ipv6/ndp.go @@ -1703,7 +1703,7 @@ func (ndp *ndpState) startSolicitingRouters() { // the unspecified address if no address is assigned // to the sending interface. localAddr := header.IPv6Any - if addressEndpoint := ndp.ep.AcquireOutgoingPrimaryAddress(header.IPv6AllRoutersMulticastAddress, false); addressEndpoint != nil { + if addressEndpoint := ndp.ep.AcquireOutgoingPrimaryAddress(header.IPv6AllRoutersLinkLocalMulticastAddress, false); addressEndpoint != nil { localAddr = addressEndpoint.AddressWithPrefix().Address addressEndpoint.DecRef() } @@ -1730,7 +1730,7 @@ func (ndp *ndpState) startSolicitingRouters() { icmpData.SetChecksum(header.ICMPv6Checksum(header.ICMPv6ChecksumParams{ Header: icmpData, Src: localAddr, - Dst: header.IPv6AllRoutersMulticastAddress, + Dst: header.IPv6AllRoutersLinkLocalMulticastAddress, })) pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ @@ -1739,14 +1739,14 @@ func (ndp *ndpState) startSolicitingRouters() { }) sent := ndp.ep.stats.icmp.packetsSent - if err := addIPHeader(localAddr, header.IPv6AllRoutersMulticastAddress, pkt, stack.NetworkHeaderParams{ + if err := addIPHeader(localAddr, header.IPv6AllRoutersLinkLocalMulticastAddress, pkt, stack.NetworkHeaderParams{ Protocol: header.ICMPv6ProtocolNumber, TTL: header.NDPHopLimit, }, nil /* extensionHeaders */); err != nil { panic(fmt.Sprintf("failed to add IP header: %s", err)) } - if err := ndp.ep.nic.WritePacketToRemote(header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersMulticastAddress), nil /* gso */, ProtocolNumber, pkt); err != nil { + if err := ndp.ep.nic.WritePacketToRemote(header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersLinkLocalMulticastAddress), nil /* gso */, ProtocolNumber, pkt); err != nil { sent.dropped.Increment() // Don't send any more messages if we had an error. remaining = 0 diff --git a/pkg/tcpip/network/multicast_group_test.go b/pkg/tcpip/network/multicast_group_test.go index ecd5003a7..2aa4e6d75 100644 --- a/pkg/tcpip/network/multicast_group_test.go +++ b/pkg/tcpip/network/multicast_group_test.go @@ -194,7 +194,7 @@ func checkInitialIPv6Groups(t *testing.T, e *channel.Endpoint, s *stack.Stack, c if p, ok := e.Read(); !ok { t.Fatal("expected a report message to be sent") } else { - validateMLDPacket(t, p, header.IPv6AllRoutersMulticastAddress, mldDone, 0, ipv6AddrSNMC) + validateMLDPacket(t, p, header.IPv6AllRoutersLinkLocalMulticastAddress, mldDone, 0, ipv6AddrSNMC) } // Should not send any more packets. @@ -606,7 +606,7 @@ func TestMGPLeaveGroup(t *testing.T) { validateLeave: func(t *testing.T, p channel.PacketInfo) { t.Helper() - validateMLDPacket(t, p, header.IPv6AllRoutersMulticastAddress, mldDone, 0, ipv6MulticastAddr1) + validateMLDPacket(t, p, header.IPv6AllRoutersLinkLocalMulticastAddress, mldDone, 0, ipv6MulticastAddr1) }, checkInitialGroups: checkInitialIPv6Groups, }, @@ -1014,7 +1014,7 @@ func TestMGPWithNICLifecycle(t *testing.T) { validateLeave: func(t *testing.T, p channel.PacketInfo, addr tcpip.Address) { t.Helper() - validateMLDPacket(t, p, header.IPv6AllRoutersMulticastAddress, mldDone, 0, addr) + validateMLDPacket(t, p, header.IPv6AllRoutersLinkLocalMulticastAddress, mldDone, 0, addr) }, getAndCheckGroupAddress: func(t *testing.T, seen map[tcpip.Address]bool, p channel.PacketInfo) tcpip.Address { t.Helper() diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go index 14124ae66..a869cce38 100644 --- a/pkg/tcpip/stack/ndp_test.go +++ b/pkg/tcpip/stack/ndp_test.go @@ -5204,13 +5204,13 @@ func TestRouterSolicitation(t *testing.T) { } // Make sure the right remote link address is used. - if want := header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersMulticastAddress); p.Route.RemoteLinkAddress != want { + if want := header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersLinkLocalMulticastAddress); p.Route.RemoteLinkAddress != want { t.Errorf("got remote link address = %s, want = %s", p.Route.RemoteLinkAddress, want) } checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()), checker.SrcAddr(test.expectedSrcAddr), - checker.DstAddr(header.IPv6AllRoutersMulticastAddress), + checker.DstAddr(header.IPv6AllRoutersLinkLocalMulticastAddress), checker.TTL(header.NDPHopLimit), checker.NDPRS(checker.NDPRSOptions(test.expectedNDPOpts)), ) @@ -5362,7 +5362,7 @@ func TestStopStartSolicitingRouters(t *testing.T) { } checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()), checker.SrcAddr(header.IPv6Any), - checker.DstAddr(header.IPv6AllRoutersMulticastAddress), + checker.DstAddr(header.IPv6AllRoutersLinkLocalMulticastAddress), checker.TTL(header.NDPHopLimit), checker.NDPRS()) } diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 87ea09a5e..60de16579 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -786,6 +786,13 @@ func (*TCPRecovery) isGettableTransportProtocolOption() {} func (*TCPRecovery) isSettableTransportProtocolOption() {} +// TCPAlwaysUseSynCookies indicates unconditional usage of syncookies. +type TCPAlwaysUseSynCookies bool + +func (*TCPAlwaysUseSynCookies) isGettableTransportProtocolOption() {} + +func (*TCPAlwaysUseSynCookies) isSettableTransportProtocolOption() {} + const ( // TCPRACKLossDetection indicates RACK is used for loss detection and // recovery. @@ -1020,19 +1027,6 @@ func (*TCPMaxRetriesOption) isGettableTransportProtocolOption() {} func (*TCPMaxRetriesOption) isSettableTransportProtocolOption() {} -// TCPSynRcvdCountThresholdOption is used by SetSockOpt/GetSockOpt to specify -// the number of endpoints that can be in SYN-RCVD state before the stack -// switches to using SYN cookies. -type TCPSynRcvdCountThresholdOption uint64 - -func (*TCPSynRcvdCountThresholdOption) isGettableSocketOption() {} - -func (*TCPSynRcvdCountThresholdOption) isSettableSocketOption() {} - -func (*TCPSynRcvdCountThresholdOption) isGettableTransportProtocolOption() {} - -func (*TCPSynRcvdCountThresholdOption) isSettableTransportProtocolOption() {} - // TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide // default for number of times SYN is retransmitted before aborting a connect. type TCPSynRetriesOption uint8 diff --git a/pkg/tcpip/tests/integration/BUILD b/pkg/tcpip/tests/integration/BUILD index 3cc8c36f1..3b51e4be0 100644 --- a/pkg/tcpip/tests/integration/BUILD +++ b/pkg/tcpip/tests/integration/BUILD @@ -9,6 +9,8 @@ go_test( deps = [ "//pkg/tcpip", "//pkg/tcpip/checker", + "//pkg/tcpip/header", + "//pkg/tcpip/link/channel", "//pkg/tcpip/network/arp", "//pkg/tcpip/network/ipv4", "//pkg/tcpip/network/ipv6", diff --git a/pkg/tcpip/tests/integration/forward_test.go b/pkg/tcpip/tests/integration/forward_test.go index d10ae05c2..0de5079e8 100644 --- a/pkg/tcpip/tests/integration/forward_test.go +++ b/pkg/tcpip/tests/integration/forward_test.go @@ -21,6 +21,8 @@ import ( "github.com/google/go-cmp/cmp" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/checker" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" "gvisor.dev/gvisor/pkg/tcpip/network/arp" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" @@ -312,3 +314,193 @@ func TestForwarding(t *testing.T) { }) } } + +func TestMulticastForwarding(t *testing.T) { + const ( + nicID1 = 1 + nicID2 = 2 + + ipv4LinkLocalUnicastAddr = tcpip.Address("\xa9\xfe\x00\x0a") + ipv4LinkLocalMulticastAddr = tcpip.Address("\xe0\x00\x00\x0a") + ipv4GlobalMulticastAddr = tcpip.Address("\xe0\x00\x01\x0a") + + ipv6LinkLocalUnicastAddr = tcpip.Address("\xfe\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a") + ipv6LinkLocalMulticastAddr = tcpip.Address("\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a") + ipv6GlobalMulticastAddr = tcpip.Address("\xff\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a") + + ttl = 64 + ) + + rxICMPv4EchoRequest := func(e *channel.Endpoint, src, dst tcpip.Address) { + utils.RxICMPv4EchoRequest(e, src, dst, ttl) + } + + rxICMPv6EchoRequest := func(e *channel.Endpoint, src, dst tcpip.Address) { + utils.RxICMPv6EchoRequest(e, src, dst, ttl) + } + + v4Checker := func(t *testing.T, b []byte, src, dst tcpip.Address) { + checker.IPv4(t, b, + checker.SrcAddr(src), + checker.DstAddr(dst), + checker.TTL(ttl-1), + checker.ICMPv4( + checker.ICMPv4Type(header.ICMPv4Echo))) + } + + v6Checker := func(t *testing.T, b []byte, src, dst tcpip.Address) { + checker.IPv6(t, b, + checker.SrcAddr(src), + checker.DstAddr(dst), + checker.TTL(ttl-1), + checker.ICMPv6( + checker.ICMPv6Type(header.ICMPv6EchoRequest))) + } + + tests := []struct { + name string + srcAddr, dstAddr tcpip.Address + rx func(*channel.Endpoint, tcpip.Address, tcpip.Address) + expectForward bool + checker func(*testing.T, []byte) + }{ + { + name: "IPv4 link-local multicast destination", + srcAddr: utils.RemoteIPv4Addr, + dstAddr: ipv4LinkLocalMulticastAddr, + rx: rxICMPv4EchoRequest, + expectForward: false, + }, + { + name: "IPv4 link-local source", + srcAddr: ipv4LinkLocalUnicastAddr, + dstAddr: utils.RemoteIPv4Addr, + rx: rxICMPv4EchoRequest, + expectForward: false, + }, + { + name: "IPv4 link-local destination", + srcAddr: utils.RemoteIPv4Addr, + dstAddr: ipv4LinkLocalUnicastAddr, + rx: rxICMPv4EchoRequest, + expectForward: false, + }, + { + name: "IPv4 non-link-local unicast", + srcAddr: utils.RemoteIPv4Addr, + dstAddr: utils.Ipv4Addr2.AddressWithPrefix.Address, + rx: rxICMPv4EchoRequest, + expectForward: true, + checker: func(t *testing.T, b []byte) { + v4Checker(t, b, utils.RemoteIPv4Addr, utils.Ipv4Addr2.AddressWithPrefix.Address) + }, + }, + { + name: "IPv4 non-link-local multicast", + srcAddr: utils.RemoteIPv4Addr, + dstAddr: ipv4GlobalMulticastAddr, + rx: rxICMPv4EchoRequest, + expectForward: true, + checker: func(t *testing.T, b []byte) { + v4Checker(t, b, utils.RemoteIPv4Addr, ipv4GlobalMulticastAddr) + }, + }, + + { + name: "IPv6 link-local multicast destination", + srcAddr: utils.RemoteIPv6Addr, + dstAddr: ipv6LinkLocalMulticastAddr, + rx: rxICMPv6EchoRequest, + expectForward: false, + }, + { + name: "IPv6 link-local source", + srcAddr: ipv6LinkLocalUnicastAddr, + dstAddr: utils.RemoteIPv6Addr, + rx: rxICMPv6EchoRequest, + expectForward: false, + }, + { + name: "IPv6 link-local destination", + srcAddr: utils.RemoteIPv6Addr, + dstAddr: ipv6LinkLocalUnicastAddr, + rx: rxICMPv6EchoRequest, + expectForward: false, + }, + { + name: "IPv6 non-link-local unicast", + srcAddr: utils.RemoteIPv6Addr, + dstAddr: utils.Ipv6Addr2.AddressWithPrefix.Address, + rx: rxICMPv6EchoRequest, + expectForward: true, + checker: func(t *testing.T, b []byte) { + v6Checker(t, b, utils.RemoteIPv6Addr, utils.Ipv6Addr2.AddressWithPrefix.Address) + }, + }, + { + name: "IPv6 non-link-local multicast", + srcAddr: utils.RemoteIPv6Addr, + dstAddr: ipv6GlobalMulticastAddr, + rx: rxICMPv6EchoRequest, + expectForward: true, + checker: func(t *testing.T, b []byte) { + v6Checker(t, b, utils.RemoteIPv6Addr, ipv6GlobalMulticastAddr) + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol}, + TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol}, + }) + + e1 := channel.New(1, header.IPv6MinimumMTU, "") + if err := s.CreateNIC(nicID1, e1); err != nil { + t.Fatalf("s.CreateNIC(%d, _): %s", nicID1, err) + } + + e2 := channel.New(1, header.IPv6MinimumMTU, "") + if err := s.CreateNIC(nicID2, e2); err != nil { + t.Fatalf("s.CreateNIC(%d, _): %s", nicID2, err) + } + + if err := s.AddAddress(nicID2, ipv4.ProtocolNumber, utils.Ipv4Addr.Address); err != nil { + t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID2, ipv4.ProtocolNumber, utils.Ipv4Addr.Address, err) + } + if err := s.AddAddress(nicID2, ipv6.ProtocolNumber, utils.Ipv6Addr.Address); err != nil { + t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID2, ipv6.ProtocolNumber, utils.Ipv6Addr.Address, err) + } + + if err := s.SetForwarding(ipv4.ProtocolNumber, true); err != nil { + t.Fatalf("s.SetForwarding(%d, true): %s", ipv4.ProtocolNumber, err) + } + if err := s.SetForwarding(ipv6.ProtocolNumber, true); err != nil { + t.Fatalf("s.SetForwarding(%d, true): %s", ipv6.ProtocolNumber, err) + } + + s.SetRouteTable([]tcpip.Route{ + { + Destination: header.IPv4EmptySubnet, + NIC: nicID2, + }, + { + Destination: header.IPv6EmptySubnet, + NIC: nicID2, + }, + }) + + test.rx(e1, test.srcAddr, test.dstAddr) + + p, ok := e2.Read() + if ok != test.expectForward { + t.Fatalf("got e2.Read() = (%#v, %t), want = (_, %t)", p, ok, test.expectForward) + } + + if test.expectForward { + test.checker(t, stack.PayloadSince(p.Pkt.NetworkHeader())) + } + }) + } +} diff --git a/pkg/tcpip/tests/integration/loopback_test.go b/pkg/tcpip/tests/integration/loopback_test.go index 2c538a43e..82c2e11ab 100644 --- a/pkg/tcpip/tests/integration/loopback_test.go +++ b/pkg/tcpip/tests/integration/loopback_test.go @@ -513,22 +513,23 @@ func TestExternalLoopbackTraffic(t *testing.T) { ipv4Loopback = tcpip.Address("\x7f\x00\x00\x01") numPackets = 1 + ttl = 64 ) loopbackSourcedICMPv4 := func(e *channel.Endpoint) { - utils.RxICMPv4EchoRequest(e, ipv4Loopback, utils.Ipv4Addr.Address) + utils.RxICMPv4EchoRequest(e, ipv4Loopback, utils.Ipv4Addr.Address, ttl) } loopbackSourcedICMPv6 := func(e *channel.Endpoint) { - utils.RxICMPv6EchoRequest(e, header.IPv6Loopback, utils.Ipv6Addr.Address) + utils.RxICMPv6EchoRequest(e, header.IPv6Loopback, utils.Ipv6Addr.Address, ttl) } loopbackDestinedICMPv4 := func(e *channel.Endpoint) { - utils.RxICMPv4EchoRequest(e, utils.RemoteIPv4Addr, ipv4Loopback) + utils.RxICMPv4EchoRequest(e, utils.RemoteIPv4Addr, ipv4Loopback, ttl) } loopbackDestinedICMPv6 := func(e *channel.Endpoint) { - utils.RxICMPv6EchoRequest(e, utils.RemoteIPv6Addr, header.IPv6Loopback) + utils.RxICMPv6EchoRequest(e, utils.RemoteIPv6Addr, header.IPv6Loopback, ttl) } invalidSrcAddrStat := func(s tcpip.IPStats) *tcpip.StatCounter { diff --git a/pkg/tcpip/tests/integration/multicast_broadcast_test.go b/pkg/tcpip/tests/integration/multicast_broadcast_test.go index c6a9c2393..09ff3b892 100644 --- a/pkg/tcpip/tests/integration/multicast_broadcast_test.go +++ b/pkg/tcpip/tests/integration/multicast_broadcast_test.go @@ -43,12 +43,15 @@ const ( // to a multicast or broadcast address uses a unicast source address for the // reply. func TestPingMulticastBroadcast(t *testing.T) { - const nicID = 1 + const ( + nicID = 1 + ttl = 64 + ) tests := []struct { name string protoNum tcpip.NetworkProtocolNumber - rxICMP func(*channel.Endpoint, tcpip.Address, tcpip.Address) + rxICMP func(*channel.Endpoint, tcpip.Address, tcpip.Address, uint8) srcAddr tcpip.Address dstAddr tcpip.Address expectedSrc tcpip.Address @@ -136,7 +139,7 @@ func TestPingMulticastBroadcast(t *testing.T) { }, }) - test.rxICMP(e, test.srcAddr, test.dstAddr) + test.rxICMP(e, test.srcAddr, test.dstAddr, ttl) pkt, ok := e.Read() if !ok { t.Fatal("expected ICMP response") diff --git a/pkg/tcpip/tests/utils/utils.go b/pkg/tcpip/tests/utils/utils.go index d1c9f3a94..8fd9be32b 100644 --- a/pkg/tcpip/tests/utils/utils.go +++ b/pkg/tcpip/tests/utils/utils.go @@ -48,10 +48,6 @@ const ( LinkAddr4 = tcpip.LinkAddress("\x02\x03\x03\x04\x05\x09") ) -const ( - ttl = 255 -) - // Common IP addresses used by tests. var ( Ipv4Addr = tcpip.AddressWithPrefix{ @@ -322,7 +318,7 @@ func SetupRoutedStacks(t *testing.T, host1Stack, routerStack, host2Stack *stack. // RxICMPv4EchoRequest constructs and injects an ICMPv4 echo request packet on // the provided endpoint. -func RxICMPv4EchoRequest(e *channel.Endpoint, src, dst tcpip.Address) { +func RxICMPv4EchoRequest(e *channel.Endpoint, src, dst tcpip.Address, ttl uint8) { totalLen := header.IPv4MinimumSize + header.ICMPv4MinimumSize hdr := buffer.NewPrependable(totalLen) pkt := header.ICMPv4(hdr.Prepend(header.ICMPv4MinimumSize)) @@ -347,7 +343,7 @@ func RxICMPv4EchoRequest(e *channel.Endpoint, src, dst tcpip.Address) { // RxICMPv6EchoRequest constructs and injects an ICMPv6 echo request packet on // the provided endpoint. -func RxICMPv6EchoRequest(e *channel.Endpoint, src, dst tcpip.Address) { +func RxICMPv6EchoRequest(e *channel.Endpoint, src, dst tcpip.Address, ttl uint8) { totalLen := header.IPv6MinimumSize + header.ICMPv6MinimumSize hdr := buffer.NewPrependable(totalLen) pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6MinimumSize)) diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index 025b134e2..7372ebc08 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -51,11 +51,6 @@ const ( // timestamp and the current timestamp. If the difference is greater // than maxTSDiff, the cookie is expired. maxTSDiff = 2 - - // SynRcvdCountThreshold is the default global maximum number of - // connections that are allowed to be in SYN-RCVD state before TCP - // starts using SYN cookies to accept connections. - SynRcvdCountThreshold uint64 = 1000 ) var ( @@ -80,9 +75,6 @@ func encodeMSS(mss uint16) uint32 { type listenContext struct { stack *stack.Stack - // synRcvdCount is a reference to the stack level synRcvdCount. - synRcvdCount *synRcvdCounter - // rcvWnd is the receive window that is sent by this listening context // in the initial SYN-ACK. rcvWnd seqnum.Size @@ -138,11 +130,6 @@ func newListenContext(stk *stack.Stack, listenEP *endpoint, rcvWnd seqnum.Size, listenEP: listenEP, pendingEndpoints: make(map[stack.TransportEndpointID]*endpoint), } - p, ok := stk.TransportProtocolInstance(ProtocolNumber).(*protocol) - if !ok { - panic(fmt.Sprintf("unable to get TCP protocol instance from stack: %+v", stk)) - } - l.synRcvdCount = p.SynRcvdCounter() rand.Read(l.nonce[0][:]) rand.Read(l.nonce[1][:]) @@ -199,6 +186,14 @@ func (l *listenContext) isCookieValid(id stack.TransportEndpointID, cookie seqnu return (v - l.cookieHash(id, cookieTS, 1)) & hashMask, true } +func (l *listenContext) useSynCookies() bool { + var alwaysUseSynCookies tcpip.TCPAlwaysUseSynCookies + if err := l.stack.TransportProtocolOption(header.TCPProtocolNumber, &alwaysUseSynCookies); err != nil { + panic(fmt.Sprintf("TransportProtocolOption(%d, %T) = %s", header.TCPProtocolNumber, alwaysUseSynCookies, err)) + } + return bool(alwaysUseSynCookies) || (l.listenEP != nil && l.listenEP.synRcvdBacklogFull()) +} + // createConnectingEndpoint creates a new endpoint in a connecting state, with // the connection parameters given by the arguments. func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, irs seqnum.Value, rcvdSynOpts *header.TCPSynOptions, queue *waiter.Queue) (*endpoint, tcpip.Error) { @@ -307,6 +302,7 @@ func (l *listenContext) startHandshake(s *segment, opts *header.TCPSynOptions, q // Initialize and start the handshake. h := ep.newPassiveHandshake(isn, irs, opts, deferAccept) + h.listenEP = l.listenEP h.start() return h, nil } @@ -485,7 +481,6 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header } go func() { - defer ctx.synRcvdCount.dec() if err := h.complete(); err != nil { e.stack.Stats().TCP.FailedConnectionAttempts.Increment() e.stats.FailedConnectionAttempts.Increment() @@ -497,24 +492,29 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header h.ep.startAcceptedLoop() e.stack.Stats().TCP.PassiveConnectionOpenings.Increment() e.deliverAccepted(h.ep, false /*withSynCookie*/) - }() // S/R-SAFE: synRcvdCount is the barrier. + }() return nil } -func (e *endpoint) incSynRcvdCount() bool { +func (e *endpoint) synRcvdBacklogFull() bool { e.acceptMu.Lock() - canInc := int(atomic.LoadInt32(&e.synRcvdCount)) < cap(e.acceptedChan) + acceptedChanCap := cap(e.acceptedChan) e.acceptMu.Unlock() - if canInc { - atomic.AddInt32(&e.synRcvdCount, 1) - } - return canInc + // The allocated accepted channel size would always be one greater than the + // listen backlog. But, the SYNRCVD connections count is always checked + // against the listen backlog value for Linux parity reason. + // https://github.com/torvalds/linux/blob/7acac4b3196/include/net/inet_connection_sock.h#L280 + // + // We maintain an equality check here as the synRcvdCount is incremented + // and compared only from a single listener context and the capacity of + // the accepted channel can only increase by a new listen call. + return int(atomic.LoadInt32(&e.synRcvdCount)) == acceptedChanCap-1 } func (e *endpoint) acceptQueueIsFull() bool { e.acceptMu.Lock() - full := len(e.acceptedChan)+int(atomic.LoadInt32(&e.synRcvdCount)) >= cap(e.acceptedChan) + full := len(e.acceptedChan) == cap(e.acceptedChan) e.acceptMu.Unlock() return full } @@ -539,17 +539,13 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err switch { case s.flags == header.TCPFlagSyn: opts := parseSynSegmentOptions(s) - if ctx.synRcvdCount.inc() { - // Only handle the syn if the following conditions hold - // - accept queue is not full. - // - number of connections in synRcvd state is less than the - // backlog. - if !e.acceptQueueIsFull() && e.incSynRcvdCount() { + if !ctx.useSynCookies() { + if !e.acceptQueueIsFull() { s.incRef() + atomic.AddInt32(&e.synRcvdCount, 1) _ = e.handleSynSegment(ctx, s, &opts) return nil } - ctx.synRcvdCount.dec() e.stack.Stats().TCP.ListenOverflowSynDrop.Increment() e.stats.ReceiveErrors.ListenOverflowSynDrop.Increment() e.stack.Stats().DroppedPackets.Increment() @@ -615,25 +611,6 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err return nil } - if !ctx.synRcvdCount.synCookiesInUse() { - // When not using SYN cookies, as per RFC 793, section 3.9, page 64: - // Any acknowledgment is bad if it arrives on a connection still in - // the LISTEN state. An acceptable reset segment should be formed - // for any arriving ACK-bearing segment. The RST should be - // formatted as follows: - // - // <SEQ=SEG.ACK><CTL=RST> - // - // Send a reset as this is an ACK for which there is no - // half open connections and we are not using cookies - // yet. - // - // The only time we should reach here when a connection - // was opened and closed really quickly and a delayed - // ACK was received from the sender. - return replyWithReset(e.stack, s, e.sendTOS, e.ttl) - } - iss := s.ackNumber - 1 irs := s.sequenceNumber - 1 @@ -651,7 +628,23 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err if !ok || int(data) >= len(mssTable) { e.stack.Stats().TCP.ListenOverflowInvalidSynCookieRcvd.Increment() e.stack.Stats().DroppedPackets.Increment() - return nil + + // When not using SYN cookies, as per RFC 793, section 3.9, page 64: + // Any acknowledgment is bad if it arrives on a connection still in + // the LISTEN state. An acceptable reset segment should be formed + // for any arriving ACK-bearing segment. The RST should be + // formatted as follows: + // + // <SEQ=SEG.ACK><CTL=RST> + // + // Send a reset as this is an ACK for which there is no + // half open connections and we are not using cookies + // yet. + // + // The only time we should reach here when a connection + // was opened and closed really quickly and a delayed + // ACK was received from the sender. + return replyWithReset(e.stack, s, e.sendTOS, e.ttl) } e.stack.Stats().TCP.ListenOverflowSynCookieRcvd.Increment() // Create newly accepted endpoint and deliver it. diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index a9e978cf6..8f0f0c3e9 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -65,11 +65,12 @@ const ( // NOTE: handshake.ep.mu is held during handshake processing. It is released if // we are going to block and reacquired when we start processing an event. type handshake struct { - ep *endpoint - state handshakeState - active bool - flags header.TCPFlags - ackNum seqnum.Value + ep *endpoint + listenEP *endpoint + state handshakeState + active bool + flags header.TCPFlags + ackNum seqnum.Value // iss is the initial send sequence number, as defined in RFC 793. iss seqnum.Value @@ -394,6 +395,15 @@ func (h *handshake) synRcvdState(s *segment) tcpip.Error { return nil } + // Drop the ACK if the accept queue is full. + // https://github.com/torvalds/linux/blob/7acac4b3196/net/ipv4/tcp_ipv4.c#L1523 + // We could abort the connection as well with a tunable as in + // https://github.com/torvalds/linux/blob/7acac4b3196/net/ipv4/tcp_minisocks.c#L788 + if listenEP := h.listenEP; listenEP != nil && listenEP.acceptQueueIsFull() { + listenEP.stack.Stats().DroppedPackets.Increment() + return nil + } + // Update timestamp if required. See RFC7323, section-4.3. if h.ep.sendTSOk && s.parsedOptions.TS { h.ep.updateRecentTimestamp(s.parsedOptions.TSVal, h.ackNum, s.sequenceNumber) diff --git a/pkg/tcpip/transport/tcp/dual_stack_test.go b/pkg/tcpip/transport/tcp/dual_stack_test.go index f6a16f96e..d6d68f128 100644 --- a/pkg/tcpip/transport/tcp/dual_stack_test.go +++ b/pkg/tcpip/transport/tcp/dual_stack_test.go @@ -565,17 +565,15 @@ func TestV4AcceptOnV4(t *testing.T) { } func testV4ListenClose(t *testing.T, c *context.Context) { - // Set the SynRcvd threshold to zero to force a syn cookie based accept - // to happen. - var opt tcpip.TCPSynRcvdCountThresholdOption + opt := tcpip.TCPAlwaysUseSynCookies(true) if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { - t.Fatalf("setting TCPSynRcvdCountThresholdOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err) + t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err) } - const n = uint16(32) + const n = 32 // Start listening. - if err := c.EP.Listen(int(tcp.SynRcvdCountThreshold + 1)); err != nil { + if err := c.EP.Listen(n); err != nil { t.Fatalf("Listen failed: %v", err) } @@ -591,9 +589,9 @@ func testV4ListenClose(t *testing.T, c *context.Context) { }) } - // Each of these ACK's will cause a syn-cookie based connection to be + // Each of these ACKs will cause a syn-cookie based connection to be // accepted and delivered to the listening endpoint. - for i := uint16(0); i < n; i++ { + for i := 0; i < n; i++ { b := c.GetPacket() tcp := header.TCP(header.IPv4(b).Payload()) iss := seqnum.Value(tcp.SequenceNumber()) diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index c5daba232..9438056f9 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -2474,6 +2474,20 @@ func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) tcpip.Error { // Listen puts the endpoint in "listen" mode, which allows it to accept // new connections. func (e *endpoint) Listen(backlog int) tcpip.Error { + if uint32(backlog) > MaxListenBacklog { + // Linux treats incoming backlog as uint with a limit defined by + // sysctl_somaxconn. + // https://github.com/torvalds/linux/blob/7acac4b3196/net/socket.c#L1666 + // + // We use the backlog to allocate a channel of that size, hence enforce + // a hard limit for the backlog. + backlog = MaxListenBacklog + } else { + // Accept one more than the configured listen backlog to keep in parity with + // Linux. Ref, because of missing equality check here: + // https://github.com/torvalds/linux/blob/7acac4b3196/include/net/sock.h#L937 + backlog++ + } err := e.listen(backlog) if err != nil { if !err.IgnoreStats() { diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index 2a4667906..230fa6ebe 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -68,6 +68,9 @@ const ( // DefaultSynRetries is the default value for the number of SYN retransmits // before a connect is aborted. DefaultSynRetries = 6 + + // MaxListenBacklog is the maximum limit of listen backlog supported. + MaxListenBacklog = 1024 ) const ( @@ -75,63 +78,6 @@ const ( ccCubic = "cubic" ) -// syncRcvdCounter tracks the number of endpoints in the SYN-RCVD state. The -// value is protected by a mutex so that we can increment only when it's -// guaranteed not to go above a threshold. -type synRcvdCounter struct { - sync.Mutex - value uint64 - pending sync.WaitGroup - threshold uint64 -} - -// inc tries to increment the global number of endpoints in SYN-RCVD state. It -// succeeds if the increment doesn't make the count go beyond the threshold, and -// fails otherwise. -func (s *synRcvdCounter) inc() bool { - s.Lock() - defer s.Unlock() - if s.value >= s.threshold { - return false - } - - s.pending.Add(1) - s.value++ - - return true -} - -// dec atomically decrements the global number of endpoints in SYN-RCVD -// state. It must only be called if a previous call to inc succeeded. -func (s *synRcvdCounter) dec() { - s.Lock() - defer s.Unlock() - s.value-- - s.pending.Done() -} - -// synCookiesInUse returns true if the synRcvdCount is greater than -// SynRcvdCountThreshold. -func (s *synRcvdCounter) synCookiesInUse() bool { - s.Lock() - defer s.Unlock() - return s.value >= s.threshold -} - -// SetThreshold sets synRcvdCounter.Threshold to ths new threshold. -func (s *synRcvdCounter) SetThreshold(threshold uint64) { - s.Lock() - defer s.Unlock() - s.threshold = threshold -} - -// Threshold returns the current value of synRcvdCounter.Threhsold. -func (s *synRcvdCounter) Threshold() uint64 { - s.Lock() - defer s.Unlock() - return s.threshold -} - type protocol struct { stack *stack.Stack @@ -139,6 +85,7 @@ type protocol struct { sackEnabled bool recovery tcpip.TCPRecovery delayEnabled bool + alwaysUseSynCookies bool sendBufferSize tcpip.TCPSendBufferSizeRangeOption recvBufferSize tcpip.TCPReceiveBufferSizeRangeOption congestionControl string @@ -150,7 +97,6 @@ type protocol struct { minRTO time.Duration maxRTO time.Duration maxRetries uint32 - synRcvdCount synRcvdCounter synRetries uint8 dispatcher dispatcher } @@ -373,9 +319,9 @@ func (p *protocol) SetOption(option tcpip.SettableTransportProtocolOption) tcpip p.mu.Unlock() return nil - case *tcpip.TCPSynRcvdCountThresholdOption: + case *tcpip.TCPAlwaysUseSynCookies: p.mu.Lock() - p.synRcvdCount.SetThreshold(uint64(*v)) + p.alwaysUseSynCookies = bool(*v) p.mu.Unlock() return nil @@ -480,9 +426,9 @@ func (p *protocol) Option(option tcpip.GettableTransportProtocolOption) tcpip.Er p.mu.RUnlock() return nil - case *tcpip.TCPSynRcvdCountThresholdOption: + case *tcpip.TCPAlwaysUseSynCookies: p.mu.RLock() - *v = tcpip.TCPSynRcvdCountThresholdOption(p.synRcvdCount.Threshold()) + *v = tcpip.TCPAlwaysUseSynCookies(p.alwaysUseSynCookies) p.mu.RUnlock() return nil @@ -507,12 +453,6 @@ func (p *protocol) Wait() { p.dispatcher.wait() } -// SynRcvdCounter returns a reference to the synRcvdCount for this protocol -// instance. -func (p *protocol) SynRcvdCounter() *synRcvdCounter { - return &p.synRcvdCount -} - // Parse implements stack.TransportProtocol.Parse. func (*protocol) Parse(pkt *stack.PacketBuffer) bool { return parse.TCP(pkt) @@ -537,7 +477,6 @@ func NewProtocol(s *stack.Stack) stack.TransportProtocol { lingerTimeout: DefaultTCPLingerTimeout, timeWaitTimeout: DefaultTCPTimeWaitTimeout, timeWaitReuse: tcpip.TCPTimeWaitReuseLoopbackOnly, - synRcvdCount: synRcvdCounter{threshold: SynRcvdCountThreshold}, synRetries: DefaultSynRetries, minRTO: MinRTO, maxRTO: MaxRTO, diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go index 81f800cad..20c9761f2 100644 --- a/pkg/tcpip/transport/tcp/tcp_sack_test.go +++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go @@ -160,12 +160,9 @@ func TestSackPermittedAccept(t *testing.T) { defer c.Cleanup() if tc.cookieEnabled { - // Set the SynRcvd threshold to - // zero to force a syn cookie - // based accept to happen. - var opt tcpip.TCPSynRcvdCountThresholdOption + opt := tcpip.TCPAlwaysUseSynCookies(true) if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { - t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err) + t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err) } } setStackSACKPermitted(t, c, sackEnabled) @@ -235,12 +232,9 @@ func TestSackDisabledAccept(t *testing.T) { defer c.Cleanup() if tc.cookieEnabled { - // Set the SynRcvd threshold to - // zero to force a syn cookie - // based accept to happen. - var opt tcpip.TCPSynRcvdCountThresholdOption + opt := tcpip.TCPAlwaysUseSynCookies(true) if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { - t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err) + t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err) } } diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 9c23469f2..5605a4390 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -955,11 +955,7 @@ func TestUserSuppliedMSSOnConnect(t *testing.T) { // when completing the handshake for a new TCP connection from a TCP // listening socket. It should be present in the sent TCP SYN-ACK segment. func TestUserSuppliedMSSOnListenAccept(t *testing.T) { - const ( - nonSynCookieAccepts = 2 - totalAccepts = 4 - mtu = 5000 - ) + const mtu = 5000 ips := []struct { name string @@ -1033,12 +1029,6 @@ func TestUserSuppliedMSSOnListenAccept(t *testing.T) { ip.createEP(c) - // Set the SynRcvd threshold to force a syn cookie based accept to happen. - opt := tcpip.TCPSynRcvdCountThresholdOption(nonSynCookieAccepts) - if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { - t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err) - } - if err := c.EP.SetSockOptInt(tcpip.MaxSegOption, int(test.setMSS)); err != nil { t.Fatalf("SetSockOptInt(MaxSegOption, %d): %s", test.setMSS, err) } @@ -1048,13 +1038,17 @@ func TestUserSuppliedMSSOnListenAccept(t *testing.T) { t.Fatalf("Bind(%+v): %s:", bindAddr, err) } - if err := c.EP.Listen(totalAccepts); err != nil { - t.Fatalf("Listen(%d): %s:", totalAccepts, err) + backlog := 5 + // Keep the number of client requests twice to the backlog + // such that half of the connections do not use syncookies + // and the other half does. + clientConnects := backlog * 2 + + if err := c.EP.Listen(backlog); err != nil { + t.Fatalf("Listen(%d): %s:", backlog, err) } - // The first nonSynCookieAccepts packets sent will trigger a gorooutine - // based accept. The rest will trigger a cookie based accept. - for i := 0; i < totalAccepts; i++ { + for i := 0; i < clientConnects; i++ { // Send a SYN requests. iss := seqnum.Value(i) srcPort := context.TestPort + uint16(i) @@ -3087,11 +3081,9 @@ func TestSynCookiePassiveSendMSSLessThanMTU(t *testing.T) { c := context.New(t, mtu) defer c.Cleanup() - // Set the SynRcvd threshold to zero to force a syn cookie based accept - // to happen. - opt := tcpip.TCPSynRcvdCountThresholdOption(0) + opt := tcpip.TCPAlwaysUseSynCookies(true) if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { - t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err) + t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err) } // Create EP and start listening. @@ -5363,7 +5355,7 @@ func TestListenBacklogFull(t *testing.T) { } lastPortOffset := uint16(0) - for ; int(lastPortOffset) < listenBacklog; lastPortOffset++ { + for ; int(lastPortOffset) < listenBacklog+1; lastPortOffset++ { executeHandshake(t, c, context.TestPort+lastPortOffset, false /*synCookieInUse */) } @@ -5671,15 +5663,13 @@ func TestListenSynRcvdQueueFull(t *testing.T) { } // Test acceptance. - // Start listening. - listenBacklog := 1 - if err := c.EP.Listen(listenBacklog); err != nil { + if err := c.EP.Listen(0); err != nil { t.Fatalf("Listen failed: %s", err) } // Send two SYN's the first one should get a SYN-ACK, the // second one should not get any response and is dropped as - // the synRcvd count will be equal to backlog. + // the accept queue is full. irs := seqnum.Value(context.TestInitialSequenceNumber) c.SendPacket(nil, &context.Headers{ SrcPort: context.TestPort, @@ -5701,23 +5691,7 @@ func TestListenSynRcvdQueueFull(t *testing.T) { } checker.IPv4(t, b, checker.TCP(tcpCheckers...)) - // Now execute send one more SYN. The stack should not respond as the backlog - // is full at this point. - // - // NOTE: we did not complete the handshake for the previous one so the - // accept backlog should be empty and there should be one connection in - // synRcvd state. - c.SendPacket(nil, &context.Headers{ - SrcPort: context.TestPort + 1, - DstPort: context.StackPort, - Flags: header.TCPFlagSyn, - SeqNum: seqnum.Value(889), - RcvWnd: 30000, - }) - c.CheckNoPacketTimeout("unexpected packet received", 50*time.Millisecond) - - // Now complete the previous connection and verify that there is a connection - // to accept. + // Now complete the previous connection. // Send ACK. c.SendPacket(nil, &context.Headers{ SrcPort: context.TestPort, @@ -5728,11 +5702,24 @@ func TestListenSynRcvdQueueFull(t *testing.T) { RcvWnd: 30000, }) - // Try to accept the connections in the backlog. + // Verify if that is delivered to the accept queue. we, ch := waiter.NewChannelEntry(nil) c.WQ.EventRegister(&we, waiter.ReadableEvents) defer c.WQ.EventUnregister(&we) + <-ch + + // Now execute send one more SYN. The stack should not respond as the backlog + // is full at this point. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort + 1, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: seqnum.Value(889), + RcvWnd: 30000, + }) + c.CheckNoPacketTimeout("unexpected packet received", 50*time.Millisecond) + // Try to accept the connections in the backlog. newEP, _, err := c.EP.Accept(nil) if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. @@ -5764,11 +5751,6 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() - opt := tcpip.TCPSynRcvdCountThresholdOption(1) - if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { - t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err) - } - // Create TCP endpoint. var err tcpip.Error c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) @@ -5781,9 +5763,8 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) { t.Fatalf("Bind failed: %s", err) } - // Start listening. - listenBacklog := 1 - if err := c.EP.Listen(listenBacklog); err != nil { + // Test for SynCookies usage after filling up the backlog. + if err := c.EP.Listen(0); err != nil { t.Fatalf("Listen failed: %s", err) } @@ -6066,7 +6047,7 @@ func TestPassiveFailedConnectionAttemptIncrement(t *testing.T) { if err := c.EP.Bind(tcpip.FullAddress{Addr: context.StackAddr, Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) } - if err := c.EP.Listen(1); err != nil { + if err := c.EP.Listen(0); err != nil { t.Fatalf("Listen failed: %s", err) } diff --git a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go index 2949588ce..1deb1fe4d 100644 --- a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go @@ -139,9 +139,9 @@ func timeStampEnabledAccept(t *testing.T, cookieEnabled bool, wndScale int, wndS defer c.Cleanup() if cookieEnabled { - var opt tcpip.TCPSynRcvdCountThresholdOption + opt := tcpip.TCPAlwaysUseSynCookies(true) if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { - t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err) + t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err) } } @@ -202,9 +202,9 @@ func timeStampDisabledAccept(t *testing.T, cookieEnabled bool, wndScale int, wnd defer c.Cleanup() if cookieEnabled { - var opt tcpip.TCPSynRcvdCountThresholdOption + opt := tcpip.TCPAlwaysUseSynCookies(true) if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { - t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err) + t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err) } } diff --git a/pkg/test/dockerutil/container.go b/pkg/test/dockerutil/container.go index 41fcf4978..06152a444 100644 --- a/pkg/test/dockerutil/container.go +++ b/pkg/test/dockerutil/container.go @@ -434,7 +434,14 @@ func (c *Container) Wait(ctx context.Context) error { select { case err := <-errChan: return err - case <-statusChan: + case res := <-statusChan: + if res.StatusCode != 0 { + var msg string + if res.Error != nil { + msg = res.Error.Message + } + return fmt.Errorf("container returned non-zero status: %d, msg: %q", res.StatusCode, msg) + } return nil } } diff --git a/pkg/usermem/BUILD b/pkg/usermem/BUILD index 054269b59..3dba36f12 100644 --- a/pkg/usermem/BUILD +++ b/pkg/usermem/BUILD @@ -1,42 +1,22 @@ load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) -go_template_instance( - name = "addr_range", - out = "addr_range.go", - package = "usermem", - prefix = "Addr", - template = "//pkg/segment:generic_range", - types = { - "T": "Addr", - }, -) - go_library( name = "usermem", srcs = [ - "access_type.go", - "addr.go", - "addr_range.go", - "addr_range_seq_unsafe.go", "bytes_io.go", "bytes_io_unsafe.go", "usermem.go", - "usermem_arm64.go", - "usermem_x86.go", ], visibility = ["//:sandbox"], deps = [ "//pkg/atomicbitops", - "//pkg/binary", "//pkg/context", "//pkg/gohacks", - "//pkg/log", + "//pkg/hostarch", "//pkg/safemem", "//pkg/syserror", - "@org_golang_x_sys//unix:go_default_library", ], ) @@ -44,12 +24,12 @@ go_test( name = "usermem_test", size = "small", srcs = [ - "addr_range_seq_test.go", "usermem_test.go", ], library = ":usermem", deps = [ "//pkg/context", + "//pkg/hostarch", "//pkg/safemem", "//pkg/syserror", ], diff --git a/pkg/usermem/bytes_io.go b/pkg/usermem/bytes_io.go index e177d30eb..3da3c0294 100644 --- a/pkg/usermem/bytes_io.go +++ b/pkg/usermem/bytes_io.go @@ -16,6 +16,7 @@ package usermem import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/syserror" ) @@ -30,7 +31,7 @@ type BytesIO struct { } // CopyOut implements IO.CopyOut. -func (b *BytesIO) CopyOut(ctx context.Context, addr Addr, src []byte, opts IOOpts) (int, error) { +func (b *BytesIO) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts IOOpts) (int, error) { rngN, rngErr := b.rangeCheck(addr, len(src)) if rngN == 0 { return 0, rngErr @@ -39,7 +40,7 @@ func (b *BytesIO) CopyOut(ctx context.Context, addr Addr, src []byte, opts IOOpt } // CopyIn implements IO.CopyIn. -func (b *BytesIO) CopyIn(ctx context.Context, addr Addr, dst []byte, opts IOOpts) (int, error) { +func (b *BytesIO) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts IOOpts) (int, error) { rngN, rngErr := b.rangeCheck(addr, len(dst)) if rngN == 0 { return 0, rngErr @@ -48,7 +49,7 @@ func (b *BytesIO) CopyIn(ctx context.Context, addr Addr, dst []byte, opts IOOpts } // ZeroOut implements IO.ZeroOut. -func (b *BytesIO) ZeroOut(ctx context.Context, addr Addr, toZero int64, opts IOOpts) (int64, error) { +func (b *BytesIO) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts IOOpts) (int64, error) { if toZero > int64(maxInt) { return 0, syserror.EINVAL } @@ -64,7 +65,7 @@ func (b *BytesIO) ZeroOut(ctx context.Context, addr Addr, toZero int64, opts IOO } // CopyOutFrom implements IO.CopyOutFrom. -func (b *BytesIO) CopyOutFrom(ctx context.Context, ars AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error) { +func (b *BytesIO) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error) { dsts, rngErr := b.blocksFromAddrRanges(ars) n, err := src.ReadToBlocks(dsts) if err != nil { @@ -74,7 +75,7 @@ func (b *BytesIO) CopyOutFrom(ctx context.Context, ars AddrRangeSeq, src safemem } // CopyInTo implements IO.CopyInTo. -func (b *BytesIO) CopyInTo(ctx context.Context, ars AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error) { +func (b *BytesIO) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error) { srcs, rngErr := b.blocksFromAddrRanges(ars) n, err := dst.WriteFromBlocks(srcs) if err != nil { @@ -83,14 +84,14 @@ func (b *BytesIO) CopyInTo(ctx context.Context, ars AddrRangeSeq, dst safemem.Wr return int64(n), rngErr } -func (b *BytesIO) rangeCheck(addr Addr, length int) (int, error) { +func (b *BytesIO) rangeCheck(addr hostarch.Addr, length int) (int, error) { if length == 0 { return 0, nil } if length < 0 { return 0, syserror.EINVAL } - max := Addr(len(b.Bytes)) + max := hostarch.Addr(len(b.Bytes)) if addr >= max { return 0, syserror.EFAULT } @@ -101,7 +102,7 @@ func (b *BytesIO) rangeCheck(addr Addr, length int) (int, error) { return length, nil } -func (b *BytesIO) blocksFromAddrRanges(ars AddrRangeSeq) (safemem.BlockSeq, error) { +func (b *BytesIO) blocksFromAddrRanges(ars hostarch.AddrRangeSeq) (safemem.BlockSeq, error) { switch ars.NumRanges() { case 0: return safemem.BlockSeq{}, nil @@ -124,7 +125,7 @@ func (b *BytesIO) blocksFromAddrRanges(ars AddrRangeSeq) (safemem.BlockSeq, erro } } -func (b *BytesIO) blockFromAddrRange(ar AddrRange) (safemem.Block, error) { +func (b *BytesIO) blockFromAddrRange(ar hostarch.AddrRange) (safemem.Block, error) { n, err := b.rangeCheck(ar.Start, int(ar.Length())) if n == 0 { return safemem.Block{}, err @@ -136,6 +137,6 @@ func (b *BytesIO) blockFromAddrRange(ar AddrRange) (safemem.Block, error) { func BytesIOSequence(buf []byte) IOSequence { return IOSequence{ IO: &BytesIO{buf}, - Addrs: AddrRangeSeqOf(AddrRange{0, Addr(len(buf))}), + Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(len(buf))}), } } diff --git a/pkg/usermem/bytes_io_unsafe.go b/pkg/usermem/bytes_io_unsafe.go index 20de5037d..dcd5c81d1 100644 --- a/pkg/usermem/bytes_io_unsafe.go +++ b/pkg/usermem/bytes_io_unsafe.go @@ -20,10 +20,11 @@ import ( "gvisor.dev/gvisor/pkg/atomicbitops" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" ) // SwapUint32 implements IO.SwapUint32. -func (b *BytesIO) SwapUint32(ctx context.Context, addr Addr, new uint32, opts IOOpts) (uint32, error) { +func (b *BytesIO) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts IOOpts) (uint32, error) { if _, rngErr := b.rangeCheck(addr, 4); rngErr != nil { return 0, rngErr } @@ -31,7 +32,7 @@ func (b *BytesIO) SwapUint32(ctx context.Context, addr Addr, new uint32, opts IO } // CompareAndSwapUint32 implements IO.CompareAndSwapUint32. -func (b *BytesIO) CompareAndSwapUint32(ctx context.Context, addr Addr, old, new uint32, opts IOOpts) (uint32, error) { +func (b *BytesIO) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts IOOpts) (uint32, error) { if _, rngErr := b.rangeCheck(addr, 4); rngErr != nil { return 0, rngErr } @@ -39,7 +40,7 @@ func (b *BytesIO) CompareAndSwapUint32(ctx context.Context, addr Addr, old, new } // LoadUint32 implements IO.LoadUint32. -func (b *BytesIO) LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error) { +func (b *BytesIO) LoadUint32(ctx context.Context, addr hostarch.Addr, opts IOOpts) (uint32, error) { if _, err := b.rangeCheck(addr, 4); err != nil { return 0, err } diff --git a/pkg/usermem/usermem.go b/pkg/usermem/usermem.go index dc2571154..0d6d25e50 100644 --- a/pkg/usermem/usermem.go +++ b/pkg/usermem/usermem.go @@ -25,6 +25,8 @@ import ( "gvisor.dev/gvisor/pkg/gohacks" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/syserror" + + "gvisor.dev/gvisor/pkg/hostarch" ) // IO provides access to the contents of a virtual memory space. @@ -37,7 +39,7 @@ type IO interface { // any following locks in the lock order. // // Postconditions: CopyOut does not retain src. - CopyOut(ctx context.Context, addr Addr, src []byte, opts IOOpts) (int, error) + CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts IOOpts) (int, error) // CopyIn copies len(dst) bytes from the memory mapped at addr to dst. // It returns the number of bytes copied. If the number of bytes copied is @@ -47,7 +49,7 @@ type IO interface { // any following locks in the lock order. // // Postconditions: CopyIn does not retain dst. - CopyIn(ctx context.Context, addr Addr, dst []byte, opts IOOpts) (int, error) + CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts IOOpts) (int, error) // ZeroOut sets toZero bytes to 0, starting at addr. It returns the number // of bytes zeroed. If the number of bytes zeroed is < toZero, it returns a @@ -57,7 +59,7 @@ type IO interface { // * The caller must not hold mm.MemoryManager.mappingMu or any // following locks in the lock order. // * toZero >= 0. - ZeroOut(ctx context.Context, addr Addr, toZero int64, opts IOOpts) (int64, error) + ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts IOOpts) (int64, error) // CopyOutFrom copies ars.NumBytes() bytes from src to the memory mapped at // ars. It returns the number of bytes copied, which may be less than the @@ -72,7 +74,7 @@ type IO interface { // following locks in the lock order. // * src.ReadToBlocks must not block on mm.MemoryManager.activeMu or // any preceding locks in the lock order. - CopyOutFrom(ctx context.Context, ars AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error) + CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error) // CopyInTo copies ars.NumBytes() bytes from the memory mapped at ars to // dst. It returns the number of bytes copied. CopyInTo may return a @@ -86,7 +88,7 @@ type IO interface { // following locks in the lock order. // * dst.WriteFromBlocks must not block on mm.MemoryManager.activeMu or // any preceding locks in the lock order. - CopyInTo(ctx context.Context, ars AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error) + CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error) // TODO(jamieliu): The requirement that CopyOutFrom/CopyInTo call src/dst // at most once, which is unnecessary in most cases, forces implementations @@ -101,7 +103,7 @@ type IO interface { // * The caller must not hold mm.MemoryManager.mappingMu or any // following locks in the lock order. // * addr must be aligned to a 4-byte boundary. - SwapUint32(ctx context.Context, addr Addr, new uint32, opts IOOpts) (uint32, error) + SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts IOOpts) (uint32, error) // CompareAndSwapUint32 atomically compares the uint32 value at addr to // old; if they are equal, the value in memory is replaced by new. In @@ -111,7 +113,7 @@ type IO interface { // * The caller must not hold mm.MemoryManager.mappingMu or any // following locks in the lock order. // * addr must be aligned to a 4-byte boundary. - CompareAndSwapUint32(ctx context.Context, addr Addr, old, new uint32, opts IOOpts) (uint32, error) + CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts IOOpts) (uint32, error) // LoadUint32 atomically loads the uint32 value at addr and returns it. // @@ -119,7 +121,7 @@ type IO interface { // * The caller must not hold mm.MemoryManager.mappingMu or any // following locks in the lock order. // * addr must be aligned to a 4-byte boundary. - LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error) + LoadUint32(ctx context.Context, addr hostarch.Addr, opts IOOpts) (uint32, error) } // IOOpts contains options applicable to all IO methods. @@ -142,7 +144,7 @@ type IOOpts struct { type IOReadWriter struct { Ctx context.Context IO IO - Addr Addr + Addr hostarch.Addr Opts IOOpts } @@ -159,7 +161,7 @@ func (rw *IOReadWriter) Read(dst []byte) (int, error) { rw.Addr = end } else { // Disallow wraparound. - rw.Addr = ^Addr(0) + rw.Addr = ^hostarch.Addr(0) if err != nil { err = syserror.EFAULT } @@ -175,7 +177,7 @@ func (rw *IOReadWriter) Write(src []byte) (int, error) { rw.Addr = end } else { // Disallow wraparound. - rw.Addr = ^Addr(0) + rw.Addr = ^hostarch.Addr(0) if err != nil { err = syserror.EFAULT } @@ -197,7 +199,7 @@ const ( // // Preconditions: Same as IO.CopyFromUser, plus: // * maxlen >= 0. -func CopyStringIn(ctx context.Context, uio IO, addr Addr, maxlen int, opts IOOpts) (string, error) { +func CopyStringIn(ctx context.Context, uio IO, addr hostarch.Addr, maxlen int, opts IOOpts) (string, error) { initLen := maxlen if initLen > copyStringMaxInitBufLen { initLen = copyStringMaxInitBufLen @@ -251,12 +253,12 @@ func CopyStringIn(ctx context.Context, uio IO, addr Addr, maxlen int, opts IOOpt // the maximum, it returns a non-nil error explaining why. // // Preconditions: Same as IO.CopyOut. -func CopyOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, src []byte, opts IOOpts) (int, error) { +func CopyOutVec(ctx context.Context, uio IO, ars hostarch.AddrRangeSeq, src []byte, opts IOOpts) (int, error) { var done int for !ars.IsEmpty() && done < len(src) { ar := ars.Head() cplen := len(src) - done - if Addr(cplen) >= ar.Length() { + if hostarch.Addr(cplen) >= ar.Length() { cplen = int(ar.Length()) } n, err := uio.CopyOut(ctx, ar.Start, src[done:done+cplen], opts) @@ -275,12 +277,12 @@ func CopyOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, src []byte, opts // maximum, it returns a non-nil error explaining why. // // Preconditions: Same as IO.CopyIn. -func CopyInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst []byte, opts IOOpts) (int, error) { +func CopyInVec(ctx context.Context, uio IO, ars hostarch.AddrRangeSeq, dst []byte, opts IOOpts) (int, error) { var done int for !ars.IsEmpty() && done < len(dst) { ar := ars.Head() cplen := len(dst) - done - if Addr(cplen) >= ar.Length() { + if hostarch.Addr(cplen) >= ar.Length() { cplen = int(ar.Length()) } n, err := uio.CopyIn(ctx, ar.Start, dst[done:done+cplen], opts) @@ -299,12 +301,12 @@ func CopyInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst []byte, opts I // maximum, it returns a non-nil error explaining why. // // Preconditions: Same as IO.ZeroOut. -func ZeroOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, toZero int64, opts IOOpts) (int64, error) { +func ZeroOutVec(ctx context.Context, uio IO, ars hostarch.AddrRangeSeq, toZero int64, opts IOOpts) (int64, error) { var done int64 for !ars.IsEmpty() && done < toZero { ar := ars.Head() cplen := toZero - done - if Addr(cplen) >= ar.Length() { + if hostarch.Addr(cplen) >= ar.Length() { cplen = int64(ar.Length()) } n, err := uio.ZeroOut(ctx, ar.Start, cplen, opts) @@ -352,7 +354,7 @@ func isASCIIWhitespace(b byte) bool { // - CopyInt32StringsInVec returns EINVAL if ars.NumBytes() == 0. // // Preconditions: Same as CopyInVec. -func CopyInt32StringsInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dsts []int32, opts IOOpts) (int64, error) { +func CopyInt32StringsInVec(ctx context.Context, uio IO, ars hostarch.AddrRangeSeq, dsts []int32, opts IOOpts) (int64, error) { if len(dsts) == 0 { return 0, nil } @@ -403,7 +405,7 @@ func CopyInt32StringsInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dsts [ // CopyInt32StringInVec is equivalent to CopyInt32StringsInVec, but copies at // most one int32. -func CopyInt32StringInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst *int32, opts IOOpts) (int64, error) { +func CopyInt32StringInVec(ctx context.Context, uio IO, ars hostarch.AddrRangeSeq, dst *int32, opts IOOpts) (int64, error) { dsts := [1]int32{*dst} n, err := CopyInt32StringsInVec(ctx, uio, ars, dsts[:], opts) *dst = dsts[0] @@ -413,7 +415,7 @@ func CopyInt32StringInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst *in // IOSequence holds arguments to IO methods. type IOSequence struct { IO IO - Addrs AddrRangeSeq + Addrs hostarch.AddrRangeSeq Opts IOOpts } @@ -444,28 +446,28 @@ func (s IOSequence) NumBytes() int64 { // DropFirst returns a copy of s with s.Addrs.DropFirst(n). // -// Preconditions: Same as AddrRangeSeq.DropFirst. +// Preconditions: Same as hostarch.AddrRangeSeq.DropFirst. func (s IOSequence) DropFirst(n int) IOSequence { return IOSequence{s.IO, s.Addrs.DropFirst(n), s.Opts} } // DropFirst64 returns a copy of s with s.Addrs.DropFirst64(n). // -// Preconditions: Same as AddrRangeSeq.DropFirst64. +// Preconditions: Same as hostarch.AddrRangeSeq.DropFirst64. func (s IOSequence) DropFirst64(n int64) IOSequence { return IOSequence{s.IO, s.Addrs.DropFirst64(n), s.Opts} } // TakeFirst returns a copy of s with s.Addrs.TakeFirst(n). // -// Preconditions: Same as AddrRangeSeq.TakeFirst. +// Preconditions: Same as hostarch.AddrRangeSeq.TakeFirst. func (s IOSequence) TakeFirst(n int) IOSequence { return IOSequence{s.IO, s.Addrs.TakeFirst(n), s.Opts} } // TakeFirst64 returns a copy of s with s.Addrs.TakeFirst64(n). // -// Preconditions: Same as AddrRangeSeq.TakeFirst64. +// Preconditions: Same as hostarch.AddrRangeSeq.TakeFirst64. func (s IOSequence) TakeFirst64(n int64) IOSequence { return IOSequence{s.IO, s.Addrs.TakeFirst64(n), s.Opts} } diff --git a/pkg/usermem/usermem_test.go b/pkg/usermem/usermem_test.go index da60b0cc7..9b697b593 100644 --- a/pkg/usermem/usermem_test.go +++ b/pkg/usermem/usermem_test.go @@ -22,6 +22,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/syserror" ) @@ -106,7 +107,7 @@ func TestBytesIOZeroOutFailure(t *testing.T) { func TestBytesIOCopyOutFromSuccess(t *testing.T) { b := newBytesIOString("ABCDEFGH") - n, err := b.CopyOutFrom(newContext(), AddrRangeSeqFromSlice([]AddrRange{ + n, err := b.CopyOutFrom(newContext(), hostarch.AddrRangeSeqFromSlice([]hostarch.AddrRange{ {Start: 4, End: 7}, {Start: 1, End: 4}, }), safemem.FromIOReader{bytes.NewBufferString("barfoo")}, IOOpts{}) @@ -120,7 +121,7 @@ func TestBytesIOCopyOutFromSuccess(t *testing.T) { func TestBytesIOCopyOutFromFailure(t *testing.T) { b := newBytesIOString("ABCDE") - n, err := b.CopyOutFrom(newContext(), AddrRangeSeqFromSlice([]AddrRange{ + n, err := b.CopyOutFrom(newContext(), hostarch.AddrRangeSeqFromSlice([]hostarch.AddrRange{ {Start: 1, End: 4}, {Start: 4, End: 7}, }), safemem.FromIOReader{bytes.NewBufferString("foobar")}, IOOpts{}) @@ -135,7 +136,7 @@ func TestBytesIOCopyOutFromFailure(t *testing.T) { func TestBytesIOCopyInToSuccess(t *testing.T) { b := newBytesIOString("AfoobarH") var dst bytes.Buffer - n, err := b.CopyInTo(newContext(), AddrRangeSeqFromSlice([]AddrRange{ + n, err := b.CopyInTo(newContext(), hostarch.AddrRangeSeqFromSlice([]hostarch.AddrRange{ {Start: 4, End: 7}, {Start: 1, End: 4}, }), safemem.FromIOWriter{&dst}, IOOpts{}) @@ -150,7 +151,7 @@ func TestBytesIOCopyInToSuccess(t *testing.T) { func TestBytesIOCopyInToFailure(t *testing.T) { b := newBytesIOString("Afoob") var dst bytes.Buffer - n, err := b.CopyInTo(newContext(), AddrRangeSeqFromSlice([]AddrRange{ + n, err := b.CopyInTo(newContext(), hostarch.AddrRangeSeqFromSlice([]hostarch.AddrRange{ {Start: 1, End: 4}, {Start: 4, End: 7}, }), safemem.FromIOWriter{&dst}, IOOpts{}) diff --git a/runsc/BUILD b/runsc/BUILD index 3b91b984a..e99404eb1 100644 --- a/runsc/BUILD +++ b/runsc/BUILD @@ -9,6 +9,7 @@ go_binary( "version.go", ], pure = True, + tags = ["staging"], visibility = [ "//visibility:public", ], diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 67307ab3c..579edaa2c 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -57,6 +57,7 @@ go_library( "//pkg/sentry/fs/tmpfs", "//pkg/sentry/fs/tty", "//pkg/sentry/fs/user", + "//pkg/sentry/fsimpl/cgroupfs", "//pkg/sentry/fsimpl/devpts", "//pkg/sentry/fsimpl/devtmpfs", "//pkg/sentry/fsimpl/fuse", @@ -66,6 +67,7 @@ go_library( "//pkg/sentry/fsimpl/proc", "//pkg/sentry/fsimpl/sys", "//pkg/sentry/fsimpl/tmpfs", + "//pkg/sentry/fsimpl/verity", "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel:uncaught_signal_go_proto", diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 1ae76d7d7..05b721b28 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -400,7 +400,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { // Set up the restore environment. ctx := k.SupervisorContext() - mntr := newContainerMounter(cm.l.root.spec, cm.l.root.goferFDs, cm.l.k, cm.l.mountHints, kernel.VFS2Enabled) + mntr := newContainerMounter(&cm.l.root, cm.l.k, cm.l.mountHints, kernel.VFS2Enabled) if kernel.VFS2Enabled { ctx, err = mntr.configureRestore(ctx, cm.l.root.conf) if err != nil { diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 32adde643..3c0cef6db 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -31,6 +31,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/gofer" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/fs/user" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/cgroupfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" @@ -103,17 +104,22 @@ func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name // compileMounts returns the supported mounts from the mount spec, adding any // mandatory mounts that are required by the OCI specification. -func compileMounts(spec *specs.Spec, vfs2Enabled bool) []specs.Mount { +func compileMounts(spec *specs.Spec, conf *config.Config, vfs2Enabled bool) []specs.Mount { // Keep track of whether proc and sys were mounted. var procMounted, sysMounted, devMounted, devptsMounted bool var mounts []specs.Mount // Mount all submounts from the spec. for _, m := range spec.Mounts { - if !vfs2Enabled && !specutils.IsVFS1SupportedDevMount(m) { + if !specutils.IsSupportedDevMount(m, vfs2Enabled) { log.Warningf("ignoring dev mount at %q", m.Destination) continue } + // Unconditionally drop any cgroupfs mounts. If requested, we'll add our + // own below. + if m.Type == cgroupfs.Name { + continue + } switch filepath.Clean(m.Destination) { case "/proc": procMounted = true @@ -132,6 +138,24 @@ func compileMounts(spec *specs.Spec, vfs2Enabled bool) []specs.Mount { // Mount proc and sys even if the user did not ask for it, as the spec // says we SHOULD. var mandatoryMounts []specs.Mount + + if conf.Cgroupfs { + mandatoryMounts = append(mandatoryMounts, specs.Mount{ + Type: tmpfsvfs2.Name, + Destination: "/sys/fs/cgroup", + }) + mandatoryMounts = append(mandatoryMounts, specs.Mount{ + Type: cgroupfs.Name, + Destination: "/sys/fs/cgroup/memory", + Options: []string{"memory"}, + }) + mandatoryMounts = append(mandatoryMounts, specs.Mount{ + Type: cgroupfs.Name, + Destination: "/sys/fs/cgroup/cpu", + Options: []string{"cpu"}, + }) + } + if !procMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ Type: procvfs2.Name, @@ -248,6 +272,10 @@ func isSupportedMountFlag(fstype, opt string) bool { ok, err := parseMountOption(opt, tmpfsAllowedData...) return ok && err == nil } + if fstype == cgroupfs.Name { + ok, err := parseMountOption(opt, cgroupfs.SupportedMountOptions...) + return ok && err == nil + } return false } @@ -572,11 +600,11 @@ type containerMounter struct { hints *podMountHints } -func newContainerMounter(spec *specs.Spec, goferFDs []*fd.FD, k *kernel.Kernel, hints *podMountHints, vfs2Enabled bool) *containerMounter { +func newContainerMounter(info *containerInfo, k *kernel.Kernel, hints *podMountHints, vfs2Enabled bool) *containerMounter { return &containerMounter{ - root: spec.Root, - mounts: compileMounts(spec, vfs2Enabled), - fds: fdDispenser{fds: goferFDs}, + root: info.spec.Root, + mounts: compileMounts(info.spec, info.conf, vfs2Enabled), + fds: fdDispenser{fds: info.goferFDs}, k: k, hints: hints, } @@ -795,7 +823,13 @@ func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.M opts = p9MountData(fd, c.getMountAccessType(conf, m), conf.VFS2) // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly - + case cgroupfs.Name: + fsName = m.Type + var err error + opts, err = parseAndFilterOptions(m.Options, cgroupfs.SupportedMountOptions...) + if err != nil { + return "", nil, false, err + } default: log.Warningf("ignoring unknown filesystem type %q", m.Type) } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 774621970..95daf1f00 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -752,7 +752,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn // Setup the child container file system. l.startGoferMonitor(cid, info.goferFDs) - mntr := newContainerMounter(info.spec, info.goferFDs, l.k, l.mountHints, kernel.VFS2Enabled) + mntr := newContainerMounter(info, l.k, l.mountHints, kernel.VFS2Enabled) if root { if err := mntr.processHints(info.conf, info.procArgs.Credentials); err != nil { return nil, nil, nil, err diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 8b39bc59a..93c476971 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -439,7 +439,13 @@ func TestCreateMountNamespace(t *testing.T) { } defer cleanup() - mntr := newContainerMounter(&tc.spec, []*fd.FD{fd.New(sandEnd)}, nil, &podMountHints{}, false /* vfs2Enabled */) + info := containerInfo{ + conf: conf, + spec: &tc.spec, + goferFDs: []*fd.FD{fd.New(sandEnd)}, + } + + mntr := newContainerMounter(&info, nil, &podMountHints{}, false /* vfs2Enabled */) mns, err := mntr.createMountNamespace(ctx, conf) if err != nil { t.Fatalf("failed to create mount namespace: %v", err) @@ -479,7 +485,7 @@ func TestCreateMountNamespaceVFS2(t *testing.T) { defer l.Destroy() defer loaderCleanup() - mntr := newContainerMounter(l.root.spec, l.root.goferFDs, l.k, l.mountHints, true /* vfs2Enabled */) + mntr := newContainerMounter(&l.root, l.k, l.mountHints, true /* vfs2Enabled */) if err := mntr.processHints(l.root.conf, l.root.procArgs.Credentials); err != nil { t.Fatalf("failed process hints: %v", err) } @@ -702,7 +708,12 @@ func TestRestoreEnvironment(t *testing.T) { for _, ioFD := range tc.ioFDs { ioFDs = append(ioFDs, fd.New(ioFD)) } - mntr := newContainerMounter(tc.spec, ioFDs, nil, &podMountHints{}, false /* vfs2Enabled */) + info := containerInfo{ + conf: conf, + spec: tc.spec, + goferFDs: ioFDs, + } + mntr := newContainerMounter(&info, nil, &podMountHints{}, false /* vfs2Enabled */) actualRenv, err := mntr.createRestoreEnvironment(conf) if !tc.errorExpected && err != nil { t.Fatalf("could not create restore environment for test:%s", tc.name) diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 9b3dacf46..7d8fd0483 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -16,6 +16,7 @@ package boot import ( "fmt" + "path" "sort" "strings" @@ -29,6 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/devices/ttydev" "gvisor.dev/gvisor/pkg/sentry/devices/tundev" "gvisor.dev/gvisor/pkg/sentry/fs/user" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/cgroupfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/fuse" @@ -37,6 +39,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/verity" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -50,6 +53,10 @@ func registerFilesystems(k *kernel.Kernel) error { creds := auth.NewRootCredentials(k.RootUserNamespace()) vfsObj := k.VFS() + vfsObj.MustRegisterFilesystemType(cgroupfs.Name, &cgroupfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) vfsObj.MustRegisterFilesystemType(devpts.Name, &devpts.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserList: true, // TODO(b/29356795): Users may mount this once the terminals are in a @@ -60,6 +67,10 @@ func registerFilesystems(k *kernel.Kernel) error { AllowUserMount: true, AllowUserList: true, }) + vfsObj.MustRegisterFilesystemType(fuse.Name, &fuse.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) vfsObj.MustRegisterFilesystemType(gofer.Name, &gofer.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserList: true, }) @@ -79,9 +90,9 @@ func registerFilesystems(k *kernel.Kernel) error { AllowUserMount: true, AllowUserList: true, }) - vfsObj.MustRegisterFilesystemType(fuse.Name, &fuse.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, + vfsObj.MustRegisterFilesystemType(verity.Name, &verity.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserList: true, + AllowUserMount: true, }) // Setup files in devtmpfs. @@ -472,6 +483,12 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo var data []string var iopts interface{} + verityData, verityOpts, verityRequested, remainingMOpts, err := parseVerityMountOptions(m.Options) + if err != nil { + return "", nil, false, err + } + m.Options = remainingMOpts + // Find filesystem name and FS specific data field. switch m.Type { case devpts.Name, devtmpfs.Name, proc.Name, sys.Name: @@ -502,6 +519,13 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly + case cgroupfs.Name: + var err error + data, err = parseAndFilterOptions(m.Options, cgroupfs.SupportedMountOptions...) + if err != nil { + return "", nil, false, err + } + default: log.Warningf("ignoring unknown filesystem type %q", m.Type) return "", nil, false, nil @@ -530,9 +554,75 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo } } + if verityRequested { + verityData = verityData + "root_name=" + path.Base(m.Mount.Destination) + verityOpts.LowerName = fsName + verityOpts.LowerGetFSOptions = opts.GetFilesystemOptions + fsName = verity.Name + opts = &vfs.MountOptions{ + GetFilesystemOptions: vfs.GetFilesystemOptions{ + Data: verityData, + InternalData: verityOpts, + }, + InternalMount: true, + } + } + return fsName, opts, useOverlay, nil } +func parseKeyValue(s string) (string, string, bool) { + tokens := strings.SplitN(s, "=", 2) + if len(tokens) < 2 { + return "", "", false + } + return strings.TrimSpace(tokens[0]), strings.TrimSpace(tokens[1]), true +} + +// parseAndFilterOptions scans the provided mount options for verity-related +// mount options. It returns the parsed set of verity mount options, as well as +// the filtered set of mount options unrelated to verity. +func parseVerityMountOptions(mopts []string) (string, verity.InternalFilesystemOptions, bool, []string, error) { + nonVerity := []string{} + found := false + var rootHash string + verityOpts := verity.InternalFilesystemOptions{ + Action: verity.PanicOnViolation, + } + for _, o := range mopts { + if !strings.HasPrefix(o, "verity.") { + nonVerity = append(nonVerity, o) + continue + } + + k, v, ok := parseKeyValue(o) + if !ok { + return "", verityOpts, found, nonVerity, fmt.Errorf("invalid verity mount option with no value: %q", o) + } + + found = true + switch k { + case "verity.roothash": + rootHash = v + case "verity.action": + switch v { + case "error": + verityOpts.Action = verity.ErrorOnViolation + case "panic": + verityOpts.Action = verity.PanicOnViolation + default: + log.Warningf("Invalid verity action %q", v) + verityOpts.Action = verity.PanicOnViolation + } + default: + return "", verityOpts, found, nonVerity, fmt.Errorf("unknown verity mount option: %q", k) + } + } + verityOpts.AllowRuntimeEnable = len(rootHash) == 0 + verityData := "root_hash=" + rootHash + "," + return verityData, verityOpts, found, nonVerity, nil +} + // mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so. // Technically we don't have to mount tmpfs at /tmp, as we could just rely on // the host /tmp, but this is a nice optimization, and fixes some apps that call diff --git a/runsc/cli/main.go b/runsc/cli/main.go index a3c515f4b..6db6614cc 100644 --- a/runsc/cli/main.go +++ b/runsc/cli/main.go @@ -86,6 +86,7 @@ func Main(version string) { subcommands.Register(new(cmd.Symbolize), "") subcommands.Register(new(cmd.Wait), "") subcommands.Register(new(cmd.Mitigate), "") + subcommands.Register(new(cmd.VerityPrepare), "") // Register internal commands with the internal group name. This causes // them to be sorted below the user-facing commands with empty group. diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index 2c3b4058b..4b9987cf6 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -35,6 +35,7 @@ go_library( "statefile.go", "symbolize.go", "syscalls.go", + "verity_prepare.go", "wait.go", ], visibility = [ diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go index 455c57692..5485db149 100644 --- a/runsc/cmd/do.go +++ b/runsc/cmd/do.go @@ -126,9 +126,8 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su Hostname: hostname, } - specutils.LogSpec(spec) - cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000)) + if conf.Network == config.NetworkNone { addNamespace(spec, specs.LinuxNamespace{Type: specs.NetworkNamespace}) @@ -154,55 +153,7 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su } } - out, err := json.Marshal(spec) - if err != nil { - return Errorf("Error to marshal spec: %v", err) - } - tmpDir, err := ioutil.TempDir("", "runsc-do") - if err != nil { - return Errorf("Error to create tmp dir: %v", err) - } - defer os.RemoveAll(tmpDir) - - log.Infof("Changing configuration RootDir to %q", tmpDir) - conf.RootDir = tmpDir - - cfgPath := filepath.Join(tmpDir, "config.json") - if err := ioutil.WriteFile(cfgPath, out, 0755); err != nil { - return Errorf("Error write spec: %v", err) - } - - containerArgs := container.Args{ - ID: cid, - Spec: spec, - BundleDir: tmpDir, - Attached: true, - } - ct, err := container.New(conf, containerArgs) - if err != nil { - return Errorf("creating container: %v", err) - } - defer ct.Destroy() - - if err := ct.Start(conf); err != nil { - return Errorf("starting container: %v", err) - } - - // Forward signals to init in the container. Thus if we get SIGINT from - // ^C, the container gracefully exit, and we can clean up. - // - // N.B. There is a still a window before this where a signal may kill - // this process, skipping cleanup. - stopForwarding := ct.ForwardSignals(0 /* pid */, false /* fgProcess */) - defer stopForwarding() - - ws, err := ct.Wait() - if err != nil { - return Errorf("waiting for container: %v", err) - } - - *waitStatus = ws - return subcommands.ExitSuccess + return startContainerAndWait(spec, conf, cid, waitStatus) } func addNamespace(spec *specs.Spec, ns specs.LinuxNamespace) { @@ -397,3 +348,58 @@ func calculatePeerIP(ip string) (string, error) { } return fmt.Sprintf("%s.%s.%s.%d", parts[0], parts[1], parts[2], n), nil } + +func startContainerAndWait(spec *specs.Spec, conf *config.Config, cid string, waitStatus *unix.WaitStatus) subcommands.ExitStatus { + specutils.LogSpec(spec) + + out, err := json.Marshal(spec) + if err != nil { + return Errorf("Error to marshal spec: %v", err) + } + tmpDir, err := ioutil.TempDir("", "runsc-do") + if err != nil { + return Errorf("Error to create tmp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + log.Infof("Changing configuration RootDir to %q", tmpDir) + conf.RootDir = tmpDir + + cfgPath := filepath.Join(tmpDir, "config.json") + if err := ioutil.WriteFile(cfgPath, out, 0755); err != nil { + return Errorf("Error write spec: %v", err) + } + + containerArgs := container.Args{ + ID: cid, + Spec: spec, + BundleDir: tmpDir, + Attached: true, + } + + ct, err := container.New(conf, containerArgs) + if err != nil { + return Errorf("creating container: %v", err) + } + defer ct.Destroy() + + if err := ct.Start(conf); err != nil { + return Errorf("starting container: %v", err) + } + + // Forward signals to init in the container. Thus if we get SIGINT from + // ^C, the container gracefully exit, and we can clean up. + // + // N.B. There is a still a window before this where a signal may kill + // this process, skipping cleanup. + stopForwarding := ct.ForwardSignals(0 /* pid */, false /* fgProcess */) + defer stopForwarding() + + ws, err := ct.Wait() + if err != nil { + return Errorf("waiting for container: %v", err) + } + + *waitStatus = ws + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 4cb0164dd..6a755ecb6 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -176,7 +176,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) mountIdx := 1 // first one is the root for _, m := range spec.Mounts { - if specutils.Is9PMount(m) { + if specutils.Is9PMount(m, conf.VFS2) { cfg := fsgofer.Config{ ROMount: isReadonlyMount(m.Options) || conf.Overlay, HostUDS: conf.FSGoferHostUDS, @@ -350,7 +350,7 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error { // creates directories as needed. func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error { for _, m := range mounts { - if m.Type != "bind" || !specutils.IsVFS1SupportedDevMount(m) { + if !specutils.Is9PMount(m, conf.VFS2) { continue } @@ -390,7 +390,7 @@ func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error { func resolveMounts(conf *config.Config, mounts []specs.Mount, root string) ([]specs.Mount, error) { cleanMounts := make([]specs.Mount, 0, len(mounts)) for _, m := range mounts { - if m.Type != "bind" || !specutils.IsVFS1SupportedDevMount(m) { + if !specutils.Is9PMount(m, conf.VFS2) { cleanMounts = append(cleanMounts, m) continue } diff --git a/runsc/cmd/verity_prepare.go b/runsc/cmd/verity_prepare.go new file mode 100644 index 000000000..66128b2a3 --- /dev/null +++ b/runsc/cmd/verity_prepare.go @@ -0,0 +1,108 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "fmt" + "math/rand" + "os" + + "github.com/google/subcommands" + specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/runsc/config" + "gvisor.dev/gvisor/runsc/flag" + "gvisor.dev/gvisor/runsc/specutils" +) + +// VerityPrepare implements subcommands.Commands for the "verity-prepare" +// command. It sets up a sandbox with a writable verity mount mapped to "--dir", +// and executes the verity measure tool specified by "--tool" in the sandbox. It +// is intended to prepare --dir to be mounted as a verity filesystem. +type VerityPrepare struct { + root string + tool string + dir string +} + +// Name implements subcommands.Command.Name. +func (*VerityPrepare) Name() string { + return "verity-prepare" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*VerityPrepare) Synopsis() string { + return "Generates the data structures necessary to enable verityfs on a filesystem." +} + +// Usage implements subcommands.Command.Usage. +func (*VerityPrepare) Usage() string { + return "verity-prepare --tool=<measure_tool> --dir=<path>" +} + +// SetFlags implements subcommands.Command.SetFlags. +func (c *VerityPrepare) SetFlags(f *flag.FlagSet) { + f.StringVar(&c.root, "root", "/", `path to the root directory, defaults to "/"`) + f.StringVar(&c.tool, "tool", "", "path to the verity measure_tool") + f.StringVar(&c.dir, "dir", "", "path to the directory to be hashed") +} + +// Execute implements subcommands.Command.Execute. +func (c *VerityPrepare) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + conf := args[0].(*config.Config) + waitStatus := args[1].(*unix.WaitStatus) + + hostname, err := os.Hostname() + if err != nil { + return Errorf("Error to retrieve hostname: %v", err) + } + + // Map the entire host file system. + absRoot, err := resolvePath(c.root) + if err != nil { + return Errorf("Error resolving root: %v", err) + } + + spec := &specs.Spec{ + Root: &specs.Root{ + Path: absRoot, + }, + Process: &specs.Process{ + Cwd: absRoot, + Args: []string{c.tool, "--path", "/verityroot"}, + Env: os.Environ(), + Capabilities: specutils.AllCapabilities(), + }, + Hostname: hostname, + Mounts: []specs.Mount{ + specs.Mount{ + Source: c.dir, + Destination: "/verityroot", + Type: "bind", + Options: []string{"verity.roothash="}, + }, + }, + } + + cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000)) + + // Force no networking, it is not necessary to run the verity measure tool. + conf.Network = config.NetworkNone + + conf.Verity = true + + return startContainerAndWait(spec, conf, cid, waitStatus) +} diff --git a/runsc/config/config.go b/runsc/config/config.go index 1e5858837..0b2b97cc5 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -172,6 +172,9 @@ type Config struct { // Enables seccomp inside the sandbox. OCISeccomp bool `flag:"oci-seccomp"` + // Mounts the cgroup filesystem backed by the sentry's cgroupfs. + Cgroupfs bool `flag:"cgroupfs"` + // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in // tests. It allows runsc to start the sandbox process as the current // user, and without chrooting the sandbox process. This can be diff --git a/runsc/config/flags.go b/runsc/config/flags.go index 1d996c841..13a1a0163 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -75,6 +75,7 @@ func RegisterFlags() { flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.") flag.Bool("vfs2", false, "enables VFSv2. This uses the new VFS layer that is faster than the previous one.") flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.") + flag.Bool("cgroupfs", false, "Automatically mount cgroupfs.") // Flags that control sandbox runtime behavior: network related. flag.Var(networkTypePtr(NetworkSandbox), "network", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.") diff --git a/runsc/container/container.go b/runsc/container/container.go index f9d83c118..e72ada311 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -886,7 +886,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu // Add root mount and then add any other additional mounts. mountCount := 1 for _, m := range spec.Mounts { - if specutils.Is9PMount(m) { + if specutils.Is9PMount(m, conf.VFS2) { mountCount++ } } diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 450f92645..47da2dd10 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -486,7 +486,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn } if deviceFile, err := gPlatform.OpenDevice(); err != nil { - return fmt.Errorf("opening device file for platform %q: %v", gPlatform, err) + return fmt.Errorf("opening device file for platform %q: %v", conf.Platform, err) } else if deviceFile != nil { defer deviceFile.Close() cmd.ExtraFiles = append(cmd.ExtraFiles, deviceFile) @@ -1174,7 +1174,7 @@ func deviceFileForPlatform(name string) (*os.File, error) { f, err := p.OpenDevice() if err != nil { - return nil, fmt.Errorf("opening device file for platform %q: %v", p, err) + return nil, fmt.Errorf("opening device file for platform %q: %w", name, err) } return f, nil } diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go index b62504a8c..9ecd0fde6 100644 --- a/runsc/specutils/fs.go +++ b/runsc/specutils/fs.go @@ -18,6 +18,7 @@ import ( "fmt" "math/bits" "path" + "strings" specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" @@ -64,6 +65,12 @@ var optionsMap = map[string]mapping{ "sync": {set: true, val: unix.MS_SYNCHRONOUS}, } +// verityMountOptions is the set of valid verity mount option keys. +var verityMountOptions = map[string]struct{}{ + "verity.roothash": struct{}{}, + "verity.action": struct{}{}, +} + // propOptionsMap is similar to optionsMap, but it lists propagation options // that cannot be used together with other flags. var propOptionsMap = map[string]mapping{ @@ -117,6 +124,14 @@ func validateMount(mnt *specs.Mount) error { return nil } +func moptKey(opt string) string { + if len(opt) == 0 { + return opt + } + // Guaranteed to have at least one token, since opt is not empty. + return strings.SplitN(opt, "=", 2)[0] +} + // ValidateMountOptions validates that mount options are correct. func ValidateMountOptions(opts []string) error { for _, o := range opts { @@ -125,7 +140,8 @@ func ValidateMountOptions(opts []string) error { } _, ok1 := optionsMap[o] _, ok2 := propOptionsMap[o] - if !ok1 && !ok2 { + _, ok3 := verityMountOptions[moptKey(o)] + if !ok1 && !ok2 && !ok3 { return fmt.Errorf("unknown mount option %q", o) } if err := validatePropagation(o); err != nil { diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 45856fd58..e5e66546c 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -332,14 +332,20 @@ func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth. return auth.CapabilitySetOfMany(caps), nil } -// Is9PMount returns true if the given mount can be mounted as an external gofer. -func Is9PMount(m specs.Mount) bool { - return m.Type == "bind" && m.Source != "" && IsVFS1SupportedDevMount(m) +// Is9PMount returns true if the given mount can be mounted as an external +// gofer. +func Is9PMount(m specs.Mount, vfs2Enabled bool) bool { + return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m, vfs2Enabled) } -// IsVFS1SupportedDevMount returns true if m.Destination does not specify a +// IsSupportedDevMount returns true if m.Destination does not specify a // path that is hardcoded by VFS1's implementation of /dev. -func IsVFS1SupportedDevMount(m specs.Mount) bool { +func IsSupportedDevMount(m specs.Mount, vfs2Enabled bool) bool { + // VFS2 has no hardcoded files under /dev, so everything is allowed. + if vfs2Enabled { + return true + } + // See pkg/sentry/fs/dev/dev.go. var existingDevices = []string{ "/dev/fd", "/dev/stdin", "/dev/stdout", "/dev/stderr", diff --git a/shim/BUILD b/shim/BUILD index 434269d31..695f61eb9 100644 --- a/shim/BUILD +++ b/shim/BUILD @@ -6,6 +6,7 @@ go_binary( name = "containerd-shim-runsc-v1", srcs = ["main.go"], static = True, + tags = ["staging"], visibility = [ "//visibility:public", ], diff --git a/test/benchmarks/base/BUILD b/test/benchmarks/base/BUILD index 697ab5837..a5a3cf2c1 100644 --- a/test/benchmarks/base/BUILD +++ b/test/benchmarks/base/BUILD @@ -17,7 +17,6 @@ go_library( benchmark_test( name = "startup_test", - size = "enormous", srcs = ["startup_test.go"], visibility = ["//:sandbox"], deps = [ @@ -29,7 +28,6 @@ benchmark_test( benchmark_test( name = "size_test", - size = "enormous", srcs = ["size_test.go"], visibility = ["//:sandbox"], deps = [ @@ -42,7 +40,6 @@ benchmark_test( benchmark_test( name = "sysbench_test", - size = "enormous", srcs = ["sysbench_test.go"], visibility = ["//:sandbox"], deps = [ diff --git a/test/benchmarks/database/BUILD b/test/benchmarks/database/BUILD index 0b1743603..fee2695ff 100644 --- a/test/benchmarks/database/BUILD +++ b/test/benchmarks/database/BUILD @@ -11,7 +11,6 @@ go_library( benchmark_test( name = "redis_test", - size = "enormous", srcs = ["redis_test.go"], library = ":database", visibility = ["//:sandbox"], diff --git a/test/benchmarks/fs/BUILD b/test/benchmarks/fs/BUILD index dc82e63b2..c2b981a07 100644 --- a/test/benchmarks/fs/BUILD +++ b/test/benchmarks/fs/BUILD @@ -4,7 +4,6 @@ package(licenses = ["notice"]) benchmark_test( name = "bazel_test", - size = "enormous", srcs = ["bazel_test.go"], visibility = ["//:sandbox"], deps = [ @@ -18,7 +17,6 @@ benchmark_test( benchmark_test( name = "fio_test", - size = "enormous", srcs = ["fio_test.go"], visibility = ["//:sandbox"], deps = [ diff --git a/test/benchmarks/media/BUILD b/test/benchmarks/media/BUILD index 380783f0b..ad2ef3a55 100644 --- a/test/benchmarks/media/BUILD +++ b/test/benchmarks/media/BUILD @@ -11,7 +11,6 @@ go_library( benchmark_test( name = "ffmpeg_test", - size = "enormous", srcs = ["ffmpeg_test.go"], library = ":media", visibility = ["//:sandbox"], diff --git a/test/benchmarks/ml/BUILD b/test/benchmarks/ml/BUILD index 3425b8dad..56a4d4f39 100644 --- a/test/benchmarks/ml/BUILD +++ b/test/benchmarks/ml/BUILD @@ -11,7 +11,6 @@ go_library( benchmark_test( name = "tensorflow_test", - size = "enormous", srcs = ["tensorflow_test.go"], library = ":ml", visibility = ["//:sandbox"], diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD index 2741570f5..e047020bf 100644 --- a/test/benchmarks/network/BUILD +++ b/test/benchmarks/network/BUILD @@ -18,7 +18,6 @@ go_library( benchmark_test( name = "iperf_test", - size = "enormous", srcs = [ "iperf_test.go", ], @@ -34,7 +33,6 @@ benchmark_test( benchmark_test( name = "node_test", - size = "enormous", srcs = [ "node_test.go", ], @@ -49,7 +47,6 @@ benchmark_test( benchmark_test( name = "ruby_test", - size = "enormous", srcs = [ "ruby_test.go", ], @@ -64,7 +61,6 @@ benchmark_test( benchmark_test( name = "nginx_test", - size = "enormous", srcs = [ "nginx_test.go", ], @@ -79,7 +75,6 @@ benchmark_test( benchmark_test( name = "httpd_test", - size = "enormous", srcs = [ "httpd_test.go", ], diff --git a/test/e2e/BUILD b/test/e2e/BUILD index 29a84f184..3b3dadf04 100644 --- a/test/e2e/BUILD +++ b/test/e2e/BUILD @@ -8,7 +8,6 @@ go_test( srcs = [ "exec_test.go", "integration_test.go", - "regression_test.go", ], library = ":integration", tags = [ diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 49cd74887..1accc3b3b 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -168,13 +168,6 @@ func TestCheckpointRestore(t *testing.T) { t.Skip("Pause/resume is not supported.") } - // TODO(gvisor.dev/issue/3373): Remove after implementing. - if usingVFS2, err := dockerutil.UsingVFS2(); usingVFS2 { - t.Skip("CheckpointRestore not implemented in VFS2.") - } else if err != nil { - t.Fatalf("failed to read config for runtime %s: %v", dockerutil.Runtime(), err) - } - ctx := context.Background() d := dockerutil.MakeContainer(ctx, t) defer d.CleanUp(ctx) @@ -399,15 +392,15 @@ func TestTmpFile(t *testing.T) { // TestTmpMount checks that mounts inside '/tmp' are not overridden. func TestTmpMount(t *testing.T) { - ctx := context.Background() dir, err := ioutil.TempDir(testutil.TmpDir(), "tmp-mount") if err != nil { t.Fatalf("TempDir(): %v", err) } - want := "123" + const want = "123" if err := ioutil.WriteFile(filepath.Join(dir, "file.txt"), []byte("123"), 0666); err != nil { t.Fatalf("WriteFile(): %v", err) } + ctx := context.Background() d := dockerutil.MakeContainer(ctx, t) defer d.CleanUp(ctx) @@ -430,6 +423,48 @@ func TestTmpMount(t *testing.T) { } } +// Test that it is allowed to mount a file on top of /dev files, e.g. +// /dev/random. +func TestMountOverDev(t *testing.T) { + if usingVFS2, err := dockerutil.UsingVFS2(); !usingVFS2 { + t.Skip("VFS1 doesn't allow /dev/random to be mounted.") + } else if err != nil { + t.Fatalf("Failed to read config for runtime %s: %v", dockerutil.Runtime(), err) + } + + random, err := ioutil.TempFile(testutil.TmpDir(), "random") + if err != nil { + t.Fatal("ioutil.TempFile() failed:", err) + } + const want = "123" + if _, err := random.WriteString(want); err != nil { + t.Fatalf("WriteString() to %q: %v", random.Name(), err) + } + + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) + + opts := dockerutil.RunOpts{ + Image: "basic/alpine", + Mounts: []mount.Mount{ + { + Type: mount.TypeBind, + Source: random.Name(), + Target: "/dev/random", + }, + }, + } + cmd := "dd count=1 bs=5 if=/dev/random 2> /dev/null" + got, err := d.Run(ctx, opts, "sh", "-c", cmd) + if err != nil { + t.Fatalf("docker run failed: %v", err) + } + if want != got { + t.Errorf("invalid file content, want: %q, got: %q", want, got) + } +} + // TestSyntheticDirs checks that submounts can be created inside a readonly // mount even if the target path does not exist. func TestSyntheticDirs(t *testing.T) { @@ -550,6 +585,30 @@ func runIntegrationTest(t *testing.T, capAdd []string, args ...string) { } } +// Test that UDS can be created using overlay when parent directory is in lower +// layer only (b/134090485). +// +// Prerequisite: the directory where the socket file is created must not have +// been open for write before bind(2) is called. +func TestBindOverlay(t *testing.T) { + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) + + // Run the container. + got, err := d.Run(ctx, dockerutil.RunOpts{ + Image: "basic/ubuntu", + }, "bash", "-c", "nc -q -1 -l -U /var/run/sock & p=$! && sleep 1 && echo foobar-asdf | nc -q 0 -U /var/run/sock && wait $p") + if err != nil { + t.Fatalf("docker run failed: %v", err) + } + + // Check the output contains what we want. + if want := "foobar-asdf"; !strings.Contains(got, want) { + t.Fatalf("docker run output is missing %q: %s", want, got) + } +} + func TestMain(m *testing.M) { dockerutil.EnsureSupportedDockerVersion() flag.Parse() diff --git a/test/e2e/regression_test.go b/test/e2e/regression_test.go deleted file mode 100644 index 84564cdaa..000000000 --- a/test/e2e/regression_test.go +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package integration - -import ( - "context" - "strings" - "testing" - - "gvisor.dev/gvisor/pkg/test/dockerutil" -) - -// Test that UDS can be created using overlay when parent directory is in lower -// layer only (b/134090485). -// -// Prerequisite: the directory where the socket file is created must not have -// been open for write before bind(2) is called. -func TestBindOverlay(t *testing.T) { - ctx := context.Background() - d := dockerutil.MakeContainer(ctx, t) - defer d.CleanUp(ctx) - - // Run the container. - got, err := d.Run(ctx, dockerutil.RunOpts{ - Image: "basic/ubuntu", - }, "bash", "-c", "nc -q -1 -l -U /var/run/sock & p=$! && sleep 1 && echo foobar-asdf | nc -q 0 -U /var/run/sock && wait $p") - if err != nil { - t.Fatalf("docker run failed: %v", err) - } - - // Check the output contains what we want. - if want := "foobar-asdf"; !strings.Contains(got, want) { - t.Fatalf("docker run output is missing %q: %s", want, got) - } -} diff --git a/test/fsstress/BUILD b/test/fsstress/BUILD index d262c8554..e74e7fff2 100644 --- a/test/fsstress/BUILD +++ b/test/fsstress/BUILD @@ -14,9 +14,7 @@ go_test( "manual", "local", ], - deps = [ - "//pkg/test/dockerutil", - ], + deps = ["//pkg/test/dockerutil"], ) go_library( diff --git a/test/fsstress/fsstress_test.go b/test/fsstress/fsstress_test.go index 300c21ceb..d53c8f90d 100644 --- a/test/fsstress/fsstress_test.go +++ b/test/fsstress/fsstress_test.go @@ -17,7 +17,9 @@ package fsstress import ( "context" + "flag" "math/rand" + "os" "strconv" "strings" "testing" @@ -30,33 +32,44 @@ func init() { rand.Seed(int64(time.Now().Nanosecond())) } -func fsstress(t *testing.T, dir string) { +func TestMain(m *testing.M) { + dockerutil.EnsureSupportedDockerVersion() + flag.Parse() + os.Exit(m.Run()) +} + +type config struct { + operations string + processes string + target string +} + +func fsstress(t *testing.T, conf config) { ctx := context.Background() d := dockerutil.MakeContainer(ctx, t) defer d.CleanUp(ctx) - const ( - operations = "10000" - processes = "100" - image = "basic/fsstress" - ) + const image = "basic/fsstress" seed := strconv.FormatUint(uint64(rand.Uint32()), 10) - args := []string{"-d", dir, "-n", operations, "-p", processes, "-s", seed, "-X"} - t.Logf("Repro: docker run --rm --runtime=runsc %s %s", image, strings.Join(args, "")) + args := []string{"-d", conf.target, "-n", conf.operations, "-p", conf.processes, "-s", seed, "-X"} + t.Logf("Repro: docker run --rm --runtime=%s gvisor.dev/images/%s %s", dockerutil.Runtime(), image, strings.Join(args, " ")) out, err := d.Run(ctx, dockerutil.RunOpts{Image: image}, args...) if err != nil { t.Fatalf("docker run failed: %v\noutput: %s", err, out) } - lines := strings.SplitN(out, "\n", 2) - if len(lines) > 1 || !strings.HasPrefix(out, "seed =") { + // This is to catch cases where fsstress spews out error messages during clean + // up but doesn't return error. + if len(out) > 0 { t.Fatalf("unexpected output: %s", out) } } -func TestFsstressGofer(t *testing.T) { - fsstress(t, "/test") -} - func TestFsstressTmpfs(t *testing.T) { - fsstress(t, "/tmp") + // This takes between 10s to run on my machine. Adjust as needed. + cfg := config{ + operations: "5000", + processes: "20", + target: "/tmp", + } + fsstress(t, cfg) } diff --git a/test/image/image_test.go b/test/image/image_test.go index 968e62f63..952264173 100644 --- a/test/image/image_test.go +++ b/test/image/image_test.go @@ -183,7 +183,10 @@ func TestMysql(t *testing.T) { // Start the container. if err := server.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/mysql", - Env: []string{"MYSQL_ROOT_PASSWORD=foobar123"}, + Env: []string{ + "MYSQL_ROOT_PASSWORD=foobar123", + "MYSQL_ROOT_HOST=%", // Allow anyone to connect to the server. + }, }); err != nil { t.Fatalf("docker run failed: %v", err) } diff --git a/test/iptables/BUILD b/test/iptables/BUILD index 94d4ca2d4..9805665ac 100644 --- a/test/iptables/BUILD +++ b/test/iptables/BUILD @@ -16,8 +16,8 @@ go_library( visibility = ["//test/iptables:__subpackages__"], deps = [ "//pkg/binary", + "//pkg/hostarch", "//pkg/test/testutil", - "//pkg/usermem", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/test/iptables/nat.go b/test/iptables/nat.go index 70d8a1832..0776639a7 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -22,7 +22,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) const redirectPort = 42 @@ -848,7 +848,7 @@ func recvOrigDstAddr4(sockfd int) (unix.RawSockaddrInet4, error) { return unix.RawSockaddrInet4{}, err } var addr unix.RawSockaddrInet4 - binary.Unmarshal(buf, usermem.ByteOrder, &addr) + binary.Unmarshal(buf, hostarch.ByteOrder, &addr) return addr, nil } @@ -858,7 +858,7 @@ func recvOrigDstAddr6(sockfd int) (unix.RawSockaddrInet6, error) { return unix.RawSockaddrInet6{}, err } var addr unix.RawSockaddrInet6 - binary.Unmarshal(buf, usermem.ByteOrder, &addr) + binary.Unmarshal(buf, hostarch.ByteOrder, &addr) return addr, nil } diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl index 34e83ec49..634c15727 100644 --- a/test/packetimpact/runner/defs.bzl +++ b/test/packetimpact/runner/defs.bzl @@ -246,6 +246,12 @@ ALL_TESTS = [ expect_netstack_failure = True, ), PacketimpactTestInfo( + name = "tcp_listen_backlog", + ), + PacketimpactTestInfo( + name = "tcp_syncookie", + ), + PacketimpactTestInfo( name = "icmpv6_param_problem", ), PacketimpactTestInfo( diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD index 43b4c7ca1..616215dc3 100644 --- a/test/packetimpact/testbench/BUILD +++ b/test/packetimpact/testbench/BUILD @@ -16,11 +16,11 @@ go_library( ], visibility = ["//test/packetimpact:__subpackages__"], deps = [ + "//pkg/hostarch", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/header", "//pkg/tcpip/seqnum", - "//pkg/usermem", "//test/packetimpact/proto:posix_server_go_proto", "@com_github_google_go_cmp//cmp:go_default_library", "@com_github_google_go_cmp//cmp/cmpopts:go_default_library", diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go index 1ac96626a..feeb0888a 100644 --- a/test/packetimpact/testbench/rawsockets.go +++ b/test/packetimpact/testbench/rawsockets.go @@ -23,7 +23,7 @@ import ( "time" "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" ) // Sniffer can sniff raw packets on the wire. @@ -34,7 +34,7 @@ type Sniffer struct { func htons(x uint16) uint16 { buf := [2]byte{} binary.BigEndian.PutUint16(buf[:], x) - return usermem.ByteOrder.Uint16(buf[:]) + return hostarch.ByteOrder.Uint16(buf[:]) } // NewSniffer creates a Sniffer connected to *device. diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index c0deb33e5..83ff70951 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -105,8 +105,8 @@ packetimpact_testbench( deps = [ "//pkg/abi/linux", "//pkg/binary", + "//pkg/hostarch", "//pkg/tcpip/header", - "//pkg/usermem", "//test/packetimpact/testbench", "@org_golang_x_sys//unix:go_default_library", ], @@ -354,9 +354,9 @@ packetimpact_testbench( deps = [ "//pkg/abi/linux", "//pkg/binary", + "//pkg/hostarch", "//pkg/tcpip/header", "//pkg/tcpip/seqnum", - "//pkg/usermem", "//test/packetimpact/testbench", "@org_golang_x_sys//unix:go_default_library", ], @@ -368,8 +368,8 @@ packetimpact_testbench( deps = [ "//pkg/abi/linux", "//pkg/binary", + "//pkg/hostarch", "//pkg/tcpip/header", - "//pkg/usermem", "//test/packetimpact/testbench", "@org_golang_x_sys//unix:go_default_library", ], @@ -385,6 +385,26 @@ packetimpact_testbench( ], ) +packetimpact_testbench( + name = "tcp_listen_backlog", + srcs = ["tcp_listen_backlog_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + +packetimpact_testbench( + name = "tcp_syncookie", + srcs = ["tcp_syncookie_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + validate_all_tests() [packetimpact_go_test( diff --git a/test/packetimpact/tests/tcp_info_test.go b/test/packetimpact/tests/tcp_info_test.go index 3fc2c7fe5..93f58ec49 100644 --- a/test/packetimpact/tests/tcp_info_test.go +++ b/test/packetimpact/tests/tcp_info_test.go @@ -22,8 +22,8 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/tcpip/header" - "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/test/packetimpact/testbench" ) @@ -58,7 +58,7 @@ func TestTCPInfo(t *testing.T) { if got, want := len(infoBytes), linux.SizeOfTCPInfo; got != want { t.Fatalf("expected %T, got %d bytes want %d bytes", info, got, want) } - binary.Unmarshal(infoBytes, usermem.ByteOrder, &info) + binary.Unmarshal(infoBytes, hostarch.ByteOrder, &info) rtt := time.Duration(info.RTT) * time.Microsecond rttvar := time.Duration(info.RTTVar) * time.Microsecond @@ -99,7 +99,7 @@ func TestTCPInfo(t *testing.T) { if got, want := len(infoBytes), linux.SizeOfTCPInfo; got != want { t.Fatalf("expected %T, got %d bytes want %d bytes", info, got, want) } - binary.Unmarshal(infoBytes, usermem.ByteOrder, &info) + binary.Unmarshal(infoBytes, hostarch.ByteOrder, &info) if info.CaState != linux.TCP_CA_Loss { t.Errorf("expected the connection to be in loss recovery, got: %v want: %v", info.CaState, linux.TCP_CA_Loss) } diff --git a/test/packetimpact/tests/tcp_listen_backlog_test.go b/test/packetimpact/tests/tcp_listen_backlog_test.go new file mode 100644 index 000000000..26c812d0a --- /dev/null +++ b/test/packetimpact/tests/tcp_listen_backlog_test.go @@ -0,0 +1,86 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_listen_backlog_test + +import ( + "flag" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + testbench.Initialize(flag.CommandLine) +} + +// TestTCPListenBacklog tests for a listening endpoint behavior: +// (1) reply to more SYNs than what is configured as listen backlog +// (2) ignore ACKs (that complete a handshake) when the accept queue is full +// (3) ignore incoming SYNs when the accept queue is full +func TestTCPListenBacklog(t *testing.T) { + dut := testbench.NewDUT(t) + + // Listening endpoint accepts one more connection than the listen backlog. + listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 0 /*backlog*/) + + var establishedConn testbench.TCPIPv4 + var incompleteConn testbench.TCPIPv4 + + // Test if the DUT listener replies to more SYNs than listen backlog+1 + for i, conn := range []*testbench.TCPIPv4{&establishedConn, &incompleteConn} { + *conn = dut.Net.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) + // Expect dut connection to have transitioned to SYN-RCVD state. + conn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn)}) + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil { + t.Fatalf("expected SYN-ACK for %d connection, %s", i, err) + } + } + defer establishedConn.Close(t) + defer incompleteConn.Close(t) + + // Send the ACK to complete handshake. + establishedConn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)}) + dut.PollOne(t, listenFd, unix.POLLIN, time.Second) + + // Send the ACK to complete handshake, expect this to be ignored by the + // listener. + incompleteConn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)}) + + // Drain the accept queue to enable poll for subsequent connections on the + // listener. + dut.Accept(t, listenFd) + + // The ACK for the incomplete connection should be ignored by the + // listening endpoint and the poll on listener should now time out. + if pfds := dut.Poll(t, []unix.PollFd{{Fd: listenFd, Events: unix.POLLIN}}, time.Second); len(pfds) != 0 { + t.Fatalf("got dut.Poll(...) = %#v", pfds) + } + + // Re-send the ACK to complete handshake and re-fill the accept-queue. + incompleteConn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)}) + dut.PollOne(t, listenFd, unix.POLLIN, time.Second) + + // Now initiate a new connection when the accept queue is full. + connectingConn := dut.Net.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) + defer connectingConn.Close(t) + // Expect dut connection to drop the SYN and let the client stay in SYN_SENT state. + connectingConn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn)}) + if got, err := connectingConn.ExpectData(t, &testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err == nil { + t.Fatalf("expected no SYN-ACK, but got %s", got) + } +} diff --git a/test/packetimpact/tests/tcp_rack_test.go b/test/packetimpact/tests/tcp_rack_test.go index 0a5b0f12b..ff1431bbf 100644 --- a/test/packetimpact/tests/tcp_rack_test.go +++ b/test/packetimpact/tests/tcp_rack_test.go @@ -22,9 +22,9 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" - "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/test/packetimpact/testbench" ) @@ -74,7 +74,7 @@ func getRTTAndRTO(t *testing.T, dut testbench.DUT, acceptFd int32) (rtt, rto tim if got, want := len(infoBytes), linux.SizeOfTCPInfo; got != want { t.Fatalf("expected %T, got %d bytes want %d bytes", info, got, want) } - binary.Unmarshal(infoBytes, usermem.ByteOrder, &info) + binary.Unmarshal(infoBytes, hostarch.ByteOrder, &info) return time.Duration(info.RTT) * time.Microsecond, time.Duration(info.RTO) * time.Microsecond } @@ -407,7 +407,7 @@ func TestRACKWithLostRetransmission(t *testing.T) { if got, want := len(infoBytes), linux.SizeOfTCPInfo; got != want { t.Fatalf("expected %T, got %d bytes want %d bytes", info, got, want) } - binary.Unmarshal(infoBytes, usermem.ByteOrder, &info) + binary.Unmarshal(infoBytes, hostarch.ByteOrder, &info) if info.CaState != linux.TCP_CA_Recovery { t.Fatalf("expected connection to be in fast recovery, want: %v got: %v", linux.TCP_CA_Recovery, info.CaState) } diff --git a/test/packetimpact/tests/tcp_retransmits_test.go b/test/packetimpact/tests/tcp_retransmits_test.go index 3dc8f63ab..1eafe20c3 100644 --- a/test/packetimpact/tests/tcp_retransmits_test.go +++ b/test/packetimpact/tests/tcp_retransmits_test.go @@ -23,8 +23,8 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/tcpip/header" - "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/test/packetimpact/testbench" ) @@ -38,7 +38,7 @@ func getRTO(t *testing.T, dut testbench.DUT, acceptFd int32) (rto time.Duration) if got, want := len(infoBytes), linux.SizeOfTCPInfo; got != want { t.Fatalf("unexpected size for TCP_INFO, got %d bytes want %d bytes", got, want) } - binary.Unmarshal(infoBytes, usermem.ByteOrder, &info) + binary.Unmarshal(infoBytes, hostarch.ByteOrder, &info) return time.Duration(info.RTO) * time.Microsecond } diff --git a/test/packetimpact/tests/tcp_syncookie_test.go b/test/packetimpact/tests/tcp_syncookie_test.go new file mode 100644 index 000000000..1c21c62ff --- /dev/null +++ b/test/packetimpact/tests/tcp_syncookie_test.go @@ -0,0 +1,70 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_syncookie_test + +import ( + "flag" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + testbench.Initialize(flag.CommandLine) +} + +// TestSynCookie test if the DUT listener is replying back using syn cookies. +// The test does not complete the handshake by not sending the ACK to SYNACK. +// When syncookies are not used, this forces the listener to retransmit SYNACK. +// And when syncookies are being used, there is no such retransmit. +func TestTCPSynCookie(t *testing.T) { + dut := testbench.NewDUT(t) + + // Listening endpoint accepts one more connection than the listen backlog. + _, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1 /*backlog*/) + + var withoutSynCookieConn testbench.TCPIPv4 + var withSynCookieConn testbench.TCPIPv4 + + // Test if the DUT listener replies to more SYNs than listen backlog+1 + for _, conn := range []*testbench.TCPIPv4{&withoutSynCookieConn, &withSynCookieConn} { + *conn = dut.Net.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) + } + defer withoutSynCookieConn.Close(t) + defer withSynCookieConn.Close(t) + + checkSynAck := func(t *testing.T, conn *testbench.TCPIPv4, expectRetransmit bool) { + // Expect dut connection to have transitioned to SYN-RCVD state. + conn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn)}) + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil { + t.Fatalf("expected SYN-ACK, but got %s", err) + } + + // If the DUT listener is using syn cookies, it will not retransmit SYNACK + got, err := conn.ExpectData(t, &testbench.TCP{SeqNum: testbench.Uint32(uint32(*conn.RemoteSeqNum(t) - 1)), Flags: testbench.TCPFlags(header.TCPFlagSyn | header.TCPFlagAck)}, nil, 2*time.Second) + if expectRetransmit && err != nil { + t.Fatalf("expected retransmitted SYN-ACK, but got %s", err) + } + if !expectRetransmit && err == nil { + t.Fatalf("expected no retransmitted SYN-ACK, but got %s", got) + } + } + + t.Run("without syncookies", func(t *testing.T) { checkSynAck(t, &withoutSynCookieConn, true /*expectRetransmit*/) }) + t.Run("with syncookies", func(t *testing.T) { checkSynAck(t, &withSynCookieConn, false /*expectRetransmit*/) }) +} diff --git a/test/perf/BUILD b/test/perf/BUILD index ed899ac22..71982fc4d 100644 --- a/test/perf/BUILD +++ b/test/perf/BUILD @@ -35,7 +35,7 @@ syscall_test( ) syscall_test( - size = "enormous", + size = "large", debug = False, tags = ["nogotsan"], test = "//test/perf/linux:getdents_benchmark", @@ -48,7 +48,7 @@ syscall_test( ) syscall_test( - size = "enormous", + size = "large", debug = False, tags = ["nogotsan"], test = "//test/perf/linux:gettid_benchmark", @@ -106,7 +106,7 @@ syscall_test( ) syscall_test( - size = "enormous", + size = "large", debug = False, test = "//test/perf/linux:signal_benchmark", ) @@ -124,9 +124,10 @@ syscall_test( ) syscall_test( - size = "enormous", + size = "large", add_overlay = True, debug = False, + tags = ["nogotsan"], test = "//test/perf/linux:unlink_benchmark", ) diff --git a/test/perf/linux/getpid_benchmark.cc b/test/perf/linux/getpid_benchmark.cc index db74cb264..047a034bd 100644 --- a/test/perf/linux/getpid_benchmark.cc +++ b/test/perf/linux/getpid_benchmark.cc @@ -31,6 +31,24 @@ void BM_Getpid(benchmark::State& state) { BENCHMARK(BM_Getpid); +#ifdef __x86_64__ + +#define SYSNO_STR1(x) #x +#define SYSNO_STR(x) SYSNO_STR1(x) + +// BM_GetpidOpt uses the most often pattern of calling system calls: +// mov $SYS_XXX, %eax; syscall. +void BM_GetpidOpt(benchmark::State& state) { + for (auto s : state) { + __asm__("movl $" SYSNO_STR(SYS_getpid) ", %%eax\n" + "syscall\n" + : : : "rax", "rcx", "r11"); + } +} + +BENCHMARK(BM_GetpidOpt); +#endif // __x86_64__ + } // namespace } // namespace testing diff --git a/test/runtimes/defs.bzl b/test/runtimes/defs.bzl index 702522d86..2550b61a3 100644 --- a/test/runtimes/defs.bzl +++ b/test/runtimes/defs.bzl @@ -75,7 +75,6 @@ def runtime_test(name, **kwargs): "local", "manual", ], - size = "enormous", **kwargs ) diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index ef299799e..affcae8fd 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -244,6 +244,10 @@ syscall_test( ) syscall_test( + test = "//test/syscalls/linux:verity_ioctl_test", +) + +syscall_test( test = "//test/syscalls/linux:iptables_test", ) @@ -318,6 +322,10 @@ syscall_test( ) syscall_test( + test = "//test/syscalls/linux:verity_mount_test", +) + +syscall_test( size = "medium", test = "//test/syscalls/linux:mremap_test", ) @@ -772,8 +780,7 @@ syscall_test( ) syscall_test( - # NOTE(b/116636318): Large sendmsg may stall a long time. - size = "enormous", + flaky = 1, # NOTE(b/116636318): Large sendmsg may stall a long time. shard_count = more_shards, test = "//test/syscalls/linux:socket_unix_dgram_local_test", ) @@ -791,8 +798,7 @@ syscall_test( ) syscall_test( - # NOTE(b/116636318): Large sendmsg may stall a long time. - size = "enormous", + flaky = 1, # NOTE(b/116636318): Large sendmsg may stall a long time. shard_count = more_shards, test = "//test/syscalls/linux:socket_unix_seqpacket_local_test", ) @@ -995,3 +1001,7 @@ syscall_test( syscall_test( test = "//test/syscalls/linux:processes_test", ) + +syscall_test( + test = "//test/syscalls/linux:cgroup_test", +) diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 043ada583..bc2c7c0e3 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1014,6 +1014,22 @@ cc_binary( ], ) +cc_binary( + name = "verity_ioctl_test", + testonly = 1, + srcs = ["verity_ioctl.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + gtest, + "//test/util:fs_util", + "//test/util:mount_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + cc_library( name = "iptables_types", testonly = 1, @@ -1304,6 +1320,20 @@ cc_binary( ) cc_binary( + name = "verity_mount_test", + testonly = 1, + srcs = ["verity_mount.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:capability_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( name = "mremap_test", testonly = 1, srcs = ["mremap.cc"], @@ -4205,3 +4235,24 @@ cc_binary( "//test/util:test_util", ], ) + +cc_binary( + name = "cgroup_test", + testonly = 1, + srcs = ["cgroup.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:cgroup_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + ], +) diff --git a/test/syscalls/linux/cgroup.cc b/test/syscalls/linux/cgroup.cc new file mode 100644 index 000000000..a1006a978 --- /dev/null +++ b/test/syscalls/linux/cgroup.cc @@ -0,0 +1,421 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// All tests in this file rely on being about to mount and unmount cgroupfs, +// which isn't expected to work, or be safe on a general linux system. + +#include <sys/mount.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/strings/str_split.h" +#include "test/util/capability_util.h" +#include "test/util/cgroup_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +using ::testing::_; +using ::testing::Ge; +using ::testing::Gt; + +std::vector<std::string> known_controllers = {"cpu", "cpuset", "cpuacct", + "memory"}; + +bool CgroupsAvailable() { + return IsRunningOnGvisor() && !IsRunningWithVFS1() && + TEST_CHECK_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)); +} + +TEST(Cgroup, MountSucceeds) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("")); + EXPECT_NO_ERRNO(c.ContainsCallingProcess()); +} + +TEST(Cgroup, SeparateMounts) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + + for (const auto& ctl : known_controllers) { + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs(ctl)); + EXPECT_NO_ERRNO(c.ContainsCallingProcess()); + } +} + +TEST(Cgroup, AllControllersImplicit) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("")); + + absl::flat_hash_map<std::string, CgroupsEntry> cgroups_entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries()); + for (const auto& ctl : known_controllers) { + EXPECT_TRUE(cgroups_entries.contains(ctl)) + << absl::StreamFormat("ctl=%s", ctl); + } + EXPECT_EQ(cgroups_entries.size(), known_controllers.size()); +} + +TEST(Cgroup, AllControllersExplicit) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("all")); + + absl::flat_hash_map<std::string, CgroupsEntry> cgroups_entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries()); + for (const auto& ctl : known_controllers) { + EXPECT_TRUE(cgroups_entries.contains(ctl)) + << absl::StreamFormat("ctl=%s", ctl); + } + EXPECT_EQ(cgroups_entries.size(), known_controllers.size()); +} + +TEST(Cgroup, ProcsAndTasks) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("")); + absl::flat_hash_set<pid_t> pids = ASSERT_NO_ERRNO_AND_VALUE(c.Procs()); + absl::flat_hash_set<pid_t> tids = ASSERT_NO_ERRNO_AND_VALUE(c.Tasks()); + + EXPECT_GE(tids.size(), pids.size()) << "Found more processes than threads"; + + // Pids should be a strict subset of tids. + for (auto it = pids.begin(); it != pids.end(); ++it) { + EXPECT_TRUE(tids.contains(*it)) + << absl::StreamFormat("Have pid %d, but no such tid", *it); + } +} + +TEST(Cgroup, ControllersMustBeInUniqueHierarchy) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + // Hierarchy #1: all controllers. + Cgroup all = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("")); + // Hierarchy #2: memory. + // + // This should conflict since memory is already in hierarchy #1, and the two + // hierarchies have different sets of controllers, so this mount can't be a + // view into hierarchy #1. + EXPECT_THAT(m.MountCgroupfs("memory"), PosixErrorIs(EBUSY, _)) + << "Memory controller mounted on two hierarchies"; + EXPECT_THAT(m.MountCgroupfs("cpu"), PosixErrorIs(EBUSY, _)) + << "CPU controller mounted on two hierarchies"; +} + +TEST(Cgroup, UnmountFreesControllers) { + SKIP_IF(!CgroupsAvailable()); + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup all = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("")); + // All controllers are now attached to all's hierarchy. Attempting new mount + // with any individual controller should fail. + EXPECT_THAT(m.MountCgroupfs("memory"), PosixErrorIs(EBUSY, _)) + << "Memory controller mounted on two hierarchies"; + + // Unmount the "all" hierarchy. This should enable any controller to be + // mounted on a new hierarchy again. + ASSERT_NO_ERRNO(m.Unmount(all)); + EXPECT_NO_ERRNO(m.MountCgroupfs("memory")); + EXPECT_NO_ERRNO(m.MountCgroupfs("cpu")); +} + +TEST(Cgroup, OnlyContainsControllerSpecificFiles) { + SKIP_IF(!CgroupsAvailable()); + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup mem = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("memory")); + EXPECT_THAT(Exists(mem.Relpath("memory.usage_in_bytes")), + IsPosixErrorOkAndHolds(true)); + // CPU files shouldn't exist in memory cgroups. + EXPECT_THAT(Exists(mem.Relpath("cpu.cfs_period_us")), + IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(mem.Relpath("cpu.cfs_quota_us")), + IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(mem.Relpath("cpu.shares")), IsPosixErrorOkAndHolds(false)); + + Cgroup cpu = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpu")); + EXPECT_THAT(Exists(cpu.Relpath("cpu.cfs_period_us")), + IsPosixErrorOkAndHolds(true)); + EXPECT_THAT(Exists(cpu.Relpath("cpu.cfs_quota_us")), + IsPosixErrorOkAndHolds(true)); + EXPECT_THAT(Exists(cpu.Relpath("cpu.shares")), IsPosixErrorOkAndHolds(true)); + // Memory files shouldn't exist in cpu cgroups. + EXPECT_THAT(Exists(cpu.Relpath("memory.usage_in_bytes")), + IsPosixErrorOkAndHolds(false)); +} + +TEST(Cgroup, InvalidController) { + SKIP_IF(!CgroupsAvailable()); + + TempPath mountpoint = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + std::string mopts = "this-controller-is-invalid"; + EXPECT_THAT( + mount("none", mountpoint.path().c_str(), "cgroup", 0, mopts.c_str()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(Cgroup, MoptAllMustBeExclusive) { + SKIP_IF(!CgroupsAvailable()); + + TempPath mountpoint = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + std::string mopts = "all,cpu"; + EXPECT_THAT( + mount("none", mountpoint.path().c_str(), "cgroup", 0, mopts.c_str()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(MemoryCgroup, MemoryUsageInBytes) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("memory")); + EXPECT_THAT(c.ReadIntegerControlFile("memory.usage_in_bytes"), + IsPosixErrorOkAndHolds(Gt(0))); +} + +TEST(CPUCgroup, ControlFilesHaveDefaultValues) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpu")); + EXPECT_THAT(c.ReadIntegerControlFile("cpu.cfs_quota_us"), + IsPosixErrorOkAndHolds(-1)); + EXPECT_THAT(c.ReadIntegerControlFile("cpu.cfs_period_us"), + IsPosixErrorOkAndHolds(100000)); + EXPECT_THAT(c.ReadIntegerControlFile("cpu.shares"), + IsPosixErrorOkAndHolds(1024)); +} + +TEST(CPUAcctCgroup, CPUAcctUsage) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpuacct")); + + const int64_t usage = + ASSERT_NO_ERRNO_AND_VALUE(c.ReadIntegerControlFile("cpuacct.usage")); + const int64_t usage_user = + ASSERT_NO_ERRNO_AND_VALUE(c.ReadIntegerControlFile("cpuacct.usage_user")); + const int64_t usage_sys = + ASSERT_NO_ERRNO_AND_VALUE(c.ReadIntegerControlFile("cpuacct.usage_sys")); + + EXPECT_GE(usage, 0); + EXPECT_GE(usage_user, 0); + EXPECT_GE(usage_sys, 0); + + EXPECT_GE(usage_user + usage_sys, usage); +} + +TEST(CPUAcctCgroup, CPUAcctStat) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpuacct")); + + std::string stat = + ASSERT_NO_ERRNO_AND_VALUE(c.ReadControlFile("cpuacct.stat")); + + // We're expecting the contents of "cpuacct.stat" to look similar to this: + // + // user 377986 + // system 220662 + + std::vector<absl::string_view> lines = + absl::StrSplit(stat, '\n', absl::SkipEmpty()); + ASSERT_EQ(lines.size(), 2); + + std::vector<absl::string_view> user_tokens = + StrSplit(lines[0], absl::ByChar(' ')); + EXPECT_EQ(user_tokens[0], "user"); + EXPECT_THAT(Atoi<int64_t>(user_tokens[1]), IsPosixErrorOkAndHolds(Ge(0))); + + std::vector<absl::string_view> sys_tokens = + StrSplit(lines[1], absl::ByChar(' ')); + EXPECT_EQ(sys_tokens[0], "system"); + EXPECT_THAT(Atoi<int64_t>(sys_tokens[1]), IsPosixErrorOkAndHolds(Ge(0))); +} + +TEST(ProcCgroups, Empty) { + SKIP_IF(!CgroupsAvailable()); + + absl::flat_hash_map<std::string, CgroupsEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries()); + // No cgroups mounted yet, we should have no entries. + EXPECT_TRUE(entries.empty()); +} + +TEST(ProcCgroups, ProcCgroupsEntries) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + + Cgroup mem = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("memory")); + absl::flat_hash_map<std::string, CgroupsEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries()); + EXPECT_EQ(entries.size(), 1); + ASSERT_TRUE(entries.contains("memory")); + CgroupsEntry mem_e = entries["memory"]; + EXPECT_EQ(mem_e.subsys_name, "memory"); + EXPECT_GE(mem_e.hierarchy, 1); + // Expect a single root cgroup. + EXPECT_EQ(mem_e.num_cgroups, 1); + // Cgroups are currently always enabled when mounted. + EXPECT_TRUE(mem_e.enabled); + + // Add a second cgroup, and check for new entry. + + Cgroup cpu = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpu")); + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries()); + EXPECT_EQ(entries.size(), 2); + EXPECT_TRUE(entries.contains("memory")); // Still have memory entry. + ASSERT_TRUE(entries.contains("cpu")); + CgroupsEntry cpu_e = entries["cpu"]; + EXPECT_EQ(cpu_e.subsys_name, "cpu"); + EXPECT_GE(cpu_e.hierarchy, 1); + EXPECT_EQ(cpu_e.num_cgroups, 1); + EXPECT_TRUE(cpu_e.enabled); + + // Separate hierarchies, since controllers were mounted separately. + EXPECT_NE(mem_e.hierarchy, cpu_e.hierarchy); +} + +TEST(ProcCgroups, UnmountRemovesEntries) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup cg = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpu,memory")); + absl::flat_hash_map<std::string, CgroupsEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries()); + EXPECT_EQ(entries.size(), 2); + + ASSERT_NO_ERRNO(m.Unmount(cg)); + + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries()); + EXPECT_TRUE(entries.empty()); +} + +TEST(ProcPIDCgroup, Empty) { + SKIP_IF(!CgroupsAvailable()); + + absl::flat_hash_map<std::string, PIDCgroupEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid())); + EXPECT_TRUE(entries.empty()); +} + +TEST(ProcPIDCgroup, Entries) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("memory")); + + absl::flat_hash_map<std::string, PIDCgroupEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid())); + EXPECT_EQ(entries.size(), 1); + PIDCgroupEntry mem_e = entries["memory"]; + EXPECT_GE(mem_e.hierarchy, 1); + EXPECT_EQ(mem_e.controllers, "memory"); + EXPECT_EQ(mem_e.path, "/"); + + Cgroup c1 = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpu")); + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid())); + EXPECT_EQ(entries.size(), 2); + EXPECT_TRUE(entries.contains("memory")); // Still have memory entry. + PIDCgroupEntry cpu_e = entries["cpu"]; + EXPECT_GE(cpu_e.hierarchy, 1); + EXPECT_EQ(cpu_e.controllers, "cpu"); + EXPECT_EQ(cpu_e.path, "/"); + + // Separate hierarchies, since controllers were mounted separately. + EXPECT_NE(mem_e.hierarchy, cpu_e.hierarchy); +} + +TEST(ProcPIDCgroup, UnmountRemovesEntries) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup all = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("")); + + absl::flat_hash_map<std::string, PIDCgroupEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid())); + EXPECT_GT(entries.size(), 0); + + ASSERT_NO_ERRNO(m.Unmount(all)); + + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid())); + EXPECT_TRUE(entries.empty()); +} + +TEST(ProcCgroup, PIDCgroupMatchesCgroups) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("memory")); + Cgroup c1 = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpu")); + + absl::flat_hash_map<std::string, CgroupsEntry> cgroups_entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries()); + absl::flat_hash_map<std::string, PIDCgroupEntry> pid_entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid())); + + CgroupsEntry cgroup_mem = cgroups_entries["memory"]; + PIDCgroupEntry pid_mem = pid_entries["memory"]; + + EXPECT_EQ(cgroup_mem.hierarchy, pid_mem.hierarchy); + + CgroupsEntry cgroup_cpu = cgroups_entries["cpu"]; + PIDCgroupEntry pid_cpu = pid_entries["cpu"]; + + EXPECT_EQ(cgroup_cpu.hierarchy, pid_cpu.hierarchy); + EXPECT_NE(cgroup_mem.hierarchy, cgroup_cpu.hierarchy); + EXPECT_NE(pid_mem.hierarchy, pid_cpu.hierarchy); +} + +TEST(ProcCgroup, MultiControllerHierarchy) { + SKIP_IF(!CgroupsAvailable()); + + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("memory,cpu")); + + absl::flat_hash_map<std::string, CgroupsEntry> cgroups_entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries()); + + CgroupsEntry mem_e = cgroups_entries["memory"]; + CgroupsEntry cpu_e = cgroups_entries["cpu"]; + + // Both controllers should have the same hierarchy ID. + EXPECT_EQ(mem_e.hierarchy, cpu_e.hierarchy); + + absl::flat_hash_map<std::string, PIDCgroupEntry> pid_entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid())); + + // Expecting an entry listing both controllers, that matches the previous + // hierarchy ID. Note that the controllers are listed in alphabetical order. + PIDCgroupEntry pid_e = pid_entries["cpu,memory"]; + EXPECT_EQ(pid_e.hierarchy, mem_e.hierarchy); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/semaphore.cc b/test/syscalls/linux/semaphore.cc index 28f51a3bf..8c5732147 100644 --- a/test/syscalls/linux/semaphore.cc +++ b/test/syscalls/linux/semaphore.cc @@ -234,14 +234,6 @@ TEST(SemaphoreTest, SemTimedOpBlock) { AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); ASSERT_THAT(sem.get(), SyscallSucceeds()); - ScopedThread th([&sem] { - absl::SleepFor(absl::Milliseconds(100)); - - struct sembuf buf = {}; - buf.sem_op = 1; - ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); - }); - struct sembuf buf = {}; buf.sem_op = -1; struct timespec timeout = {}; diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 597b5bcb1..d391363fb 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -489,13 +489,6 @@ void TestListenWhileConnect(const TestParam& param, TestAddress const& listener = param.listener; TestAddress const& connector = param.connector; - constexpr int kBacklog = 2; - // Linux completes one more connection than the listen backlog argument. - // To ensure that there is at least one client connection that stays in - // connecting state, keep 2 more client connections than the listen backlog. - // gVisor differs in this behavior though, gvisor.dev/issue/3153. - constexpr int kClients = kBacklog + 2; - // Create the listening socket. FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); @@ -503,6 +496,13 @@ void TestListenWhileConnect(const TestParam& param, ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), listener.addr_len), SyscallSucceeds()); + // This test is only interested in deterministically getting a socket in + // connecting state. For that, we use a listen backlog of zero which would + // mean there is exactly one connection that gets established and is enqueued + // to the accept queue. We poll on the listener to ensure that is enqueued. + // After that the subsequent client connect will stay in connecting state as + // the accept queue is full. + constexpr int kBacklog = 0; ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); // Get the port bound by the listening socket. @@ -515,42 +515,49 @@ void TestListenWhileConnect(const TestParam& param, sockaddr_storage conn_addr = connector.addr; ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); - std::vector<FileDescriptor> clients; - for (int i = 0; i < kClients; i++) { - FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE( - Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); - int ret = connect(client.get(), reinterpret_cast<sockaddr*>(&conn_addr), - connector.addr_len); - if (ret != 0) { - EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS)); - clients.push_back(std::move(client)); - } + FileDescriptor established_client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT( + connect(established_client.get(), reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Ensure that the accept queue has the completed connection. + constexpr int kTimeout = 10000; + pollfd pfd = { + .fd = listen_fd.get(), + .events = POLLIN, + }; + ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); + ASSERT_EQ(pfd.revents, POLLIN); + + FileDescriptor connecting_client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + // Keep the last client in connecting state. + int ret = + connect(connecting_client.get(), reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len); + if (ret != 0) { + EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS)); } stopListen(listen_fd); - for (auto& client : clients) { - constexpr int kTimeout = 10000; + std::array<std::pair<int, int>, 2> sockets = { + std::make_pair(established_client.get(), ECONNRESET), + std::make_pair(connecting_client.get(), ECONNREFUSED), + }; + for (size_t i = 0; i < sockets.size(); i++) { + SCOPED_TRACE(absl::StrCat("i=", i)); + auto [fd, expected_errno] = sockets[i]; pollfd pfd = { - .fd = client.get(), - .events = POLLIN, + .fd = fd, }; - // When the listening socket is closed, then we expect the remote to reset - // the connection. - ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); - ASSERT_EQ(pfd.revents, POLLIN | POLLHUP | POLLERR); + // When the listening socket is closed, the peer would reset the connection. + EXPECT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); + EXPECT_EQ(pfd.revents, POLLHUP | POLLERR); char c; - // Subsequent read can fail with: - // ECONNRESET: If the client connection was established and was reset by the - // remote. - // ECONNREFUSED: If the client connection failed to be established. - ASSERT_THAT(read(client.get(), &c, sizeof(c)), - AnyOf(SyscallFailsWithErrno(ECONNRESET), - SyscallFailsWithErrno(ECONNREFUSED))); - // The last client connection would be in connecting (SYN_SENT) state. - if (client.get() == clients[kClients - 1].get()) { - ASSERT_EQ(errno, ECONNREFUSED) << strerror(errno); - } + EXPECT_THAT(read(fd, &c, sizeof(c)), SyscallFailsWithErrno(expected_errno)); } } @@ -570,7 +577,59 @@ TEST_P(SocketInetLoopbackTest, TCPListenShutdownWhileConnect) { // random save as established connections which can't be delivered to the accept // queue because the queue is full are not correctly delivered after restore // causing the last accept to timeout on the restore. -TEST_P(SocketInetLoopbackTest, TCPbacklog_NoRandomSave) { +TEST_P(SocketInetLoopbackTest, TCPAcceptBacklogSizes_NoRandomSave) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + std::array<int, 3> backlogs = {-1, 0, 1}; + for (auto& backlog : backlogs) { + ASSERT_THAT(listen(listen_fd.get(), backlog), SyscallSucceeds()); + + int expected_accepts; + if (backlog < 0) { + expected_accepts = 1024; + } else { + expected_accepts = backlog + 1; + } + for (int i = 0; i < expected_accepts; i++) { + SCOPED_TRACE(absl::StrCat("i=", i)); + // Connect to the listening socket. + const FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT( + RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast<struct sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + const FileDescriptor accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + } + } +} + +// TODO(b/157236388): Remove _NoRandomSave once bug is fixed. Test fails w/ +// random save as established connections which can't be delivered to the accept +// queue because the queue is full are not correctly delivered after restore +// causing the last accept to timeout on the restore. +TEST_P(SocketInetLoopbackTest, TCPBacklog_NoRandomSave) { auto const& param = GetParam(); TestAddress const& listener = param.listener; @@ -595,6 +654,7 @@ TEST_P(SocketInetLoopbackTest, TCPbacklog_NoRandomSave) { ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); int i = 0; while (1) { + SCOPED_TRACE(absl::StrCat("i=", i)); int ret; // Connect to the listening socket. @@ -620,103 +680,133 @@ TEST_P(SocketInetLoopbackTest, TCPbacklog_NoRandomSave) { i++; } + int client_conns = i; + int accepted_conns = 0; for (; i != 0; i--) { - // Accept the connection. - // - // We have to assign a name to the accepted socket, as unamed temporary - // objects are destructed upon full evaluation of the expression it is in, - // potentially causing the connecting socket to fail to shutdown properly. - auto accepted = - ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + SCOPED_TRACE(absl::StrCat("i=", i)); + pollfd pfd = { + .fd = listen_fd.get(), + .events = POLLIN, + }; + // Look for incoming connections to accept. The last connect request could + // be established from the client side, but the ACK of the handshake could + // be dropped by the listener if the accept queue was filled up by the + // previous connect. + int ret; + ASSERT_THAT(ret = poll(&pfd, 1, 3000), SyscallSucceeds()); + if (ret == 0) break; + if (pfd.revents == POLLIN) { + // Accept the connection. + // + // We have to assign a name to the accepted socket, as unamed temporary + // objects are destructed upon full evaluation of the expression it is in, + // potentially causing the connecting socket to fail to shutdown properly. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + accepted_conns++; + } } + // We should accept at least listen backlog + 1 connections. As the stack is + // enqueuing established connections to the accept queue, newer SYNs could + // still be replied to causing those client connections would be accepted as + // we start dequeuing the queue. + ASSERT_GE(accepted_conns, kBacklogSize + 1); + ASSERT_GE(client_conns, accepted_conns); } -// Test if the stack completes atmost listen backlog number of client -// connections. It exercises the path of the stack that enqueues completed -// connections to accept queue vs new incoming SYNs. -TEST_P(SocketInetLoopbackTest, TCPConnectBacklog_NoRandomSave) { - const auto& param = GetParam(); - const TestAddress& listener = param.listener; - const TestAddress& connector = param.connector; +// TODO(b/157236388): Remove _NoRandomSave once bug is fixed. Test fails w/ +// random save as established connections which can't be delivered to the accept +// queue because the queue is full are not correctly delivered after restore +// causing the last accept to timeout on the restore. +TEST_P(SocketInetLoopbackTest, TCPBacklogAcceptAll_NoRandomSave) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + // Create the listening socket. + FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); constexpr int kBacklog = 1; - // Keep the number of client connections more than the listen backlog. - // Linux completes one more connection than the listen backlog argument. - // gVisor differs in this behavior though, gvisor.dev/issue/3153. - int kClients = kBacklog + 2; - if (IsRunningOnGvisor()) { - kClients--; - } + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); - // Run the following test for few iterations to test race between accept queue - // getting filled with incoming SYNs. - for (int num = 0; num < 10; num++) { - FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( - Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); - sockaddr_storage listen_addr = listener.addr; - ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), - listener.addr_len), - SyscallSucceeds()); - ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); - socklen_t addrlen = listener.addr_len; - ASSERT_THAT( - getsockname(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), - &addrlen), - SyscallSucceeds()); - uint16_t const port = - ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); - sockaddr_storage conn_addr = connector.addr; - ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); - std::vector<FileDescriptor> clients; - // Issue multiple non-blocking client connects. - for (int i = 0; i < kClients; i++) { - FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE( - Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); - int ret = connect(client.get(), reinterpret_cast<sockaddr*>(&conn_addr), - connector.addr_len); - if (ret != 0) { - EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS)); - } - clients.push_back(std::move(client)); + // Fill up the accept queue and trigger more client connections which would be + // waiting to be accepted. + std::array<FileDescriptor, kBacklog + 1> established_clients; + for (auto& fd : established_clients) { + fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT(connect(fd.get(), reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + } + std::array<FileDescriptor, kBacklog> waiting_clients; + for (auto& fd : waiting_clients) { + fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + int ret = connect(fd.get(), reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len); + if (ret != 0) { + EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS)); } + } - // Now that client connects are issued, wait for the accept queue to get - // filled and ensure no new client connection is completed. - for (int i = 0; i < kClients; i++) { - pollfd pfd = { - .fd = clients[i].get(), - .events = POLLOUT, - }; - if (i < kClients - 1) { - // Poll for client side connection completions with a large timeout. - // We cannot poll on the listener side without calling accept as poll - // stays level triggered with non-zero accept queue length. - // - // Client side poll would not guarantee that the completed connection - // has been enqueued in to the acccept queue, but the fact that the - // listener ACKd the SYN, means that it cannot complete any new incoming - // SYNs when it has already ACKd for > backlog number of SYNs. - ASSERT_THAT(poll(&pfd, 1, 10000), SyscallSucceedsWithValue(1)) - << "num=" << num << " i=" << i << " kClients=" << kClients; - ASSERT_EQ(pfd.revents, POLLOUT) << "num=" << num << " i=" << i; - } else { - // Now that we expect accept queue filled up, ensure that the last - // client connection never completes with a smaller poll timeout. - ASSERT_THAT(poll(&pfd, 1, 1000), SyscallSucceedsWithValue(0)) - << "num=" << num << " i=" << i; - } + auto accept_connection = [&]() { + constexpr int kTimeout = 10000; + pollfd pfd = { + .fd = listen_fd.get(), + .events = POLLIN, + }; + ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); + ASSERT_EQ(pfd.revents, POLLIN); + // Accept the connection. + // + // We have to assign a name to the accepted socket, as unamed temporary + // objects are destructed upon full evaluation of the expression it is in, + // potentially causing the connecting socket to fail to shutdown properly. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + }; - ASSERT_THAT(close(clients[i].release()), SyscallSucceedsWithValue(0)) - << "num=" << num << " i=" << i; - } - clients.clear(); - // We close the listening side and open a new listener. We could instead - // drain the accept queue by calling accept() and reuse the listener, but - // that is racy as the retransmitted SYNs could get ACKd as we make room in - // the accept queue. - ASSERT_THAT(close(listen_fd.release()), SyscallSucceedsWithValue(0)); + // Ensure that we accept all client connections. The waiting connections would + // get enqueued as we drain the accept queue. + for (int i = 0; i < std::size(established_clients); i++) { + SCOPED_TRACE(absl::StrCat("established clients i=", i)); + accept_connection(); + } + + // The waiting client connections could be in one of these 2 states: + // (1) SYN_SENT: if the SYN was dropped because accept queue was full + // (2) ESTABLISHED: if the listener sent back a SYNACK, but may have dropped + // the ACK from the client if the accept queue was full (send out a data to + // re-send that ACK, to address that case). + for (int i = 0; i < std::size(waiting_clients); i++) { + SCOPED_TRACE(absl::StrCat("waiting clients i=", i)); + constexpr int kTimeout = 10000; + pollfd pfd = { + .fd = waiting_clients[i].get(), + .events = POLLOUT, + }; + EXPECT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); + EXPECT_EQ(pfd.revents, POLLOUT); + char c; + EXPECT_THAT(RetryEINTR(send)(waiting_clients[i].get(), &c, sizeof(c), 0), + SyscallSucceedsWithValue(sizeof(c))); + accept_connection(); } } diff --git a/test/syscalls/linux/verity_ioctl.cc b/test/syscalls/linux/verity_ioctl.cc new file mode 100644 index 000000000..dcd28f2c3 --- /dev/null +++ b/test/syscalls/linux/verity_ioctl.cc @@ -0,0 +1,133 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/mount.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/fs_util.h" +#include "test/util/mount_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +#ifndef FS_IOC_ENABLE_VERITY +#define FS_IOC_ENABLE_VERITY 1082156677 +#endif + +#ifndef FS_IOC_MEASURE_VERITY +#define FS_IOC_MEASURE_VERITY 3221513862 +#endif + +#ifndef FS_VERITY_FL +#define FS_VERITY_FL 1048576 +#endif + +#ifndef FS_IOC_GETFLAGS +#define FS_IOC_GETFLAGS 2148034049 +#endif + +struct fsverity_digest { + __u16 digest_algorithm; + __u16 digest_size; /* input/output */ + __u8 digest[]; +}; + +const int fsverity_max_digest_size = 64; +const int fsverity_default_digest_size = 32; + +class IoctlTest : public ::testing::Test { + protected: + void SetUp() override { + // Verity is implemented in VFS2. + SKIP_IF(IsRunningWithVFS1()); + + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + // Mount a tmpfs file system, to be wrapped by a verity fs. + tmpfs_dir_ = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT(mount("", tmpfs_dir_.path().c_str(), "tmpfs", 0, ""), + SyscallSucceeds()); + + // Create a new file in the tmpfs mount. + constexpr char kContents[] = "foobarbaz"; + file_ = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(tmpfs_dir_.path(), kContents, 0777)); + filename_ = Basename(file_.path()); + } + + TempPath tmpfs_dir_; + TempPath file_; + std::string filename_; +}; + +TEST_F(IoctlTest, Enable) { + // mount a verity fs on the existing tmpfs mount. + std::string mount_opts = "lower_path=" + tmpfs_dir_.path(); + auto const verity_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT( + mount("", verity_dir.path().c_str(), "verity", 0, mount_opts.c_str()), + SyscallSucceeds()); + + printf("verity path: %s, filename: %s\n", verity_dir.path().c_str(), + filename_.c_str()); + fflush(nullptr); + // Confirm that the verity flag is absent. + int flag = 0; + auto const fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(JoinPath(verity_dir.path(), filename_), O_RDONLY, 0777)); + ASSERT_THAT(ioctl(fd.get(), FS_IOC_GETFLAGS, &flag), SyscallSucceeds()); + EXPECT_EQ(flag & FS_VERITY_FL, 0); + + // Enable the file and confirm that the verity flag is present. + ASSERT_THAT(ioctl(fd.get(), FS_IOC_ENABLE_VERITY), SyscallSucceeds()); + ASSERT_THAT(ioctl(fd.get(), FS_IOC_GETFLAGS, &flag), SyscallSucceeds()); + EXPECT_EQ(flag & FS_VERITY_FL, FS_VERITY_FL); +} + +TEST_F(IoctlTest, Measure) { + // mount a verity fs on the existing tmpfs mount. + std::string mount_opts = "lower_path=" + tmpfs_dir_.path(); + auto const verity_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT( + mount("", verity_dir.path().c_str(), "verity", 0, mount_opts.c_str()), + SyscallSucceeds()); + + // Confirm that the file cannot be measured. + auto const fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(JoinPath(verity_dir.path(), filename_), O_RDONLY, 0777)); + int digest_size = sizeof(struct fsverity_digest) + fsverity_max_digest_size; + struct fsverity_digest *digest = + reinterpret_cast<struct fsverity_digest *>(malloc(digest_size)); + memset(digest, 0, digest_size); + digest->digest_size = fsverity_max_digest_size; + ASSERT_THAT(ioctl(fd.get(), FS_IOC_MEASURE_VERITY, digest), + SyscallFailsWithErrno(ENODATA)); + + // Enable the file and confirm that the file can be measured. + ASSERT_THAT(ioctl(fd.get(), FS_IOC_ENABLE_VERITY), SyscallSucceeds()); + ASSERT_THAT(ioctl(fd.get(), FS_IOC_MEASURE_VERITY, digest), + SyscallSucceeds()); + EXPECT_EQ(digest->digest_size, fsverity_default_digest_size); + free(digest); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/verity_mount.cc b/test/syscalls/linux/verity_mount.cc new file mode 100644 index 000000000..e73dd5599 --- /dev/null +++ b/test/syscalls/linux/verity_mount.cc @@ -0,0 +1,53 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/mount.h> + +#include <iomanip> +#include <sstream> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Mount verity file system on an existing gofer mount. +TEST(MountTest, MountExisting) { + // Verity is implemented in VFS2. + SKIP_IF(IsRunningWithVFS1()); + + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + // Mount a new tmpfs file system. + auto const tmpfs_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT(mount("", tmpfs_dir.path().c_str(), "tmpfs", 0, ""), + SyscallSucceeds()); + + // Mount a verity file system on the existing gofer mount. + auto const verity_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + std::string opts = "lower_path=" + tmpfs_dir.path(); + EXPECT_THAT(mount("", verity_dir.path().c_str(), "verity", 0, opts.c_str()), + SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/util/BUILD b/test/util/BUILD index e561f3daa..383de00ed 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -94,6 +94,7 @@ cc_library( ":file_descriptor", ":posix_error", "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", gtest, ], ) @@ -368,3 +369,20 @@ cc_library( testonly = 1, hdrs = ["temp_umask.h"], ) + +cc_library( + name = "cgroup_util", + testonly = 1, + srcs = ["cgroup_util.cc"], + hdrs = ["cgroup_util.h"], + deps = [ + ":cleanup", + ":fs_util", + ":mount_util", + ":posix_error", + ":temp_path", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/strings", + ], +) diff --git a/test/util/cgroup_util.cc b/test/util/cgroup_util.cc new file mode 100644 index 000000000..65d9c4986 --- /dev/null +++ b/test/util/cgroup_util.cc @@ -0,0 +1,223 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/util/cgroup_util.h" + +#include <sys/syscall.h> +#include <unistd.h> + +#include "absl/strings/str_split.h" +#include "test/util/fs_util.h" +#include "test/util/mount_util.h" + +namespace gvisor { +namespace testing { + +Cgroup::Cgroup(std::string path) : cgroup_path_(path) { + id_ = ++Cgroup::next_id_; + std::cerr << absl::StreamFormat("[cg#%d] <= %s", id_, cgroup_path_) + << std::endl; +} + +PosixErrorOr<std::string> Cgroup::ReadControlFile( + absl::string_view name) const { + std::string buf; + RETURN_IF_ERRNO(GetContents(Relpath(name), &buf)); + + const std::string alias_path = absl::StrFormat("[cg#%d]/%s", id_, name); + std::cerr << absl::StreamFormat("<contents of %s>", alias_path) << std::endl; + std::cerr << buf; + std::cerr << absl::StreamFormat("<end of %s>", alias_path) << std::endl; + + return buf; +} + +PosixErrorOr<int64_t> Cgroup::ReadIntegerControlFile( + absl::string_view name) const { + ASSIGN_OR_RETURN_ERRNO(const std::string buf, ReadControlFile(name)); + ASSIGN_OR_RETURN_ERRNO(const int64_t val, Atoi<int64_t>(buf)); + return val; +} + +PosixErrorOr<absl::flat_hash_set<pid_t>> Cgroup::Procs() const { + ASSIGN_OR_RETURN_ERRNO(std::string buf, ReadControlFile("cgroup.procs")); + return ParsePIDList(buf); +} + +PosixErrorOr<absl::flat_hash_set<pid_t>> Cgroup::Tasks() const { + ASSIGN_OR_RETURN_ERRNO(std::string buf, ReadControlFile("tasks")); + return ParsePIDList(buf); +} + +PosixError Cgroup::ContainsCallingProcess() const { + ASSIGN_OR_RETURN_ERRNO(const absl::flat_hash_set<pid_t> procs, Procs()); + ASSIGN_OR_RETURN_ERRNO(const absl::flat_hash_set<pid_t> tasks, Tasks()); + const pid_t pid = getpid(); + const pid_t tid = syscall(SYS_gettid); + if (!procs.contains(pid)) { + return PosixError( + ENOENT, absl::StrFormat("Cgroup doesn't contain process %d", pid)); + } + if (!tasks.contains(tid)) { + return PosixError(ENOENT, + absl::StrFormat("Cgroup doesn't contain task %d", tid)); + } + return NoError(); +} + +PosixErrorOr<absl::flat_hash_set<pid_t>> Cgroup::ParsePIDList( + absl::string_view data) const { + absl::flat_hash_set<pid_t> res; + std::vector<absl::string_view> lines = absl::StrSplit(data, '\n'); + for (const std::string_view& line : lines) { + if (line.empty()) { + continue; + } + ASSIGN_OR_RETURN_ERRNO(const int32_t pid, Atoi<int32_t>(line)); + res.insert(static_cast<pid_t>(pid)); + } + return res; +} + +int64_t Cgroup::next_id_ = 0; + +PosixErrorOr<Cgroup> Mounter::MountCgroupfs(std::string mopts) { + ASSIGN_OR_RETURN_ERRNO(TempPath mountpoint, + TempPath::CreateDirIn(root_.path())); + ASSIGN_OR_RETURN_ERRNO( + Cleanup mount, Mount("none", mountpoint.path(), "cgroup", 0, mopts, 0)); + const std::string mountpath = mountpoint.path(); + std::cerr << absl::StreamFormat( + "Mount(\"none\", \"%s\", \"cgroup\", 0, \"%s\", 0) => OK", + mountpath, mopts) + << std::endl; + Cgroup cg = Cgroup(mountpath); + mountpoints_[cg.id()] = std::move(mountpoint); + mounts_[cg.id()] = std::move(mount); + return cg; +} + +PosixError Mounter::Unmount(const Cgroup& c) { + auto mount = mounts_.find(c.id()); + auto mountpoint = mountpoints_.find(c.id()); + + if (mount == mounts_.end() || mountpoint == mountpoints_.end()) { + return PosixError( + ESRCH, absl::StrFormat("No mount found for cgroupfs containing cg#%d", + c.id())); + } + + std::cerr << absl::StreamFormat("Unmount([cg#%d])", c.id()) << std::endl; + + // Simply delete the entries, their destructors will unmount and delete the + // mountpoint. Note the order is important to avoid errors: mount then + // mountpoint. + mounts_.erase(mount); + mountpoints_.erase(mountpoint); + + return NoError(); +} + +constexpr char kProcCgroupsHeader[] = + "#subsys_name\thierarchy\tnum_cgroups\tenabled"; + +PosixErrorOr<absl::flat_hash_map<std::string, CgroupsEntry>> +ProcCgroupsEntries() { + std::string content; + RETURN_IF_ERRNO(GetContents("/proc/cgroups", &content)); + + bool found_header = false; + absl::flat_hash_map<std::string, CgroupsEntry> entries; + std::vector<std::string> lines = absl::StrSplit(content, '\n'); + std::cerr << "<contents of /proc/cgroups>" << std::endl; + for (const std::string& line : lines) { + std::cerr << line << std::endl; + + if (!found_header) { + EXPECT_EQ(line, kProcCgroupsHeader); + found_header = true; + continue; + } + if (line.empty()) { + continue; + } + + // Parse a single entry from /proc/cgroups. + // + // Example entries, fields are tab separated in the real file: + // + // #subsys_name hierarchy num_cgroups enabled + // cpuset 12 35 1 + // cpu 3 222 1 + // ^ ^ ^ ^ + // 0 1 2 3 + + CgroupsEntry entry; + std::vector<std::string> fields = + StrSplit(line, absl::ByAnyChar(": \t"), absl::SkipEmpty()); + + entry.subsys_name = fields[0]; + ASSIGN_OR_RETURN_ERRNO(entry.hierarchy, Atoi<uint32_t>(fields[1])); + ASSIGN_OR_RETURN_ERRNO(entry.num_cgroups, Atoi<uint64_t>(fields[2])); + ASSIGN_OR_RETURN_ERRNO(const int enabled, Atoi<int>(fields[3])); + entry.enabled = enabled != 0; + + entries[entry.subsys_name] = entry; + } + std::cerr << "<end of /proc/cgroups>" << std::endl; + + return entries; +} + +PosixErrorOr<absl::flat_hash_map<std::string, PIDCgroupEntry>> +ProcPIDCgroupEntries(pid_t pid) { + const std::string path = absl::StrFormat("/proc/%d/cgroup", pid); + std::string content; + RETURN_IF_ERRNO(GetContents(path, &content)); + + absl::flat_hash_map<std::string, PIDCgroupEntry> entries; + std::vector<std::string> lines = absl::StrSplit(content, '\n'); + + std::cerr << absl::StreamFormat("<contents of %s>", path) << std::endl; + for (const std::string& line : lines) { + std::cerr << line << std::endl; + + if (line.empty()) { + continue; + } + + // Parse a single entry from /proc/<pid>/cgroup. + // + // Example entries: + // + // 2:cpu:/path/to/cgroup + // 1:memory:/ + + PIDCgroupEntry entry; + std::vector<std::string> fields = + absl::StrSplit(line, absl::ByChar(':'), absl::SkipEmpty()); + + ASSIGN_OR_RETURN_ERRNO(entry.hierarchy, Atoi<uint32_t>(fields[0])); + entry.controllers = fields[1]; + entry.path = fields[2]; + + entries[entry.controllers] = entry; + } + std::cerr << absl::StreamFormat("<end of %s>", path) << std::endl; + + return entries; +} + +} // namespace testing +} // namespace gvisor diff --git a/test/util/cgroup_util.h b/test/util/cgroup_util.h new file mode 100644 index 000000000..b049559df --- /dev/null +++ b/test/util/cgroup_util.h @@ -0,0 +1,111 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_UTIL_CGROUP_UTIL_H_ +#define GVISOR_TEST_UTIL_CGROUP_UTIL_H_ + +#include <unistd.h> + +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/strings/string_view.h" +#include "test/util/cleanup.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" + +namespace gvisor { +namespace testing { + +// Cgroup represents a cgroup directory on a mounted cgroupfs. +class Cgroup { + public: + Cgroup(std::string path); + + uint64_t id() const { return id_; } + + std::string Relpath(absl::string_view leaf) const { + return JoinPath(cgroup_path_, leaf); + } + + // Returns the contents of a cgroup control file with the given name. + PosixErrorOr<std::string> ReadControlFile(absl::string_view name) const; + + // Reads the contents of a cgroup control with the given name, and attempts + // to parse it as an integer. + PosixErrorOr<int64_t> ReadIntegerControlFile(absl::string_view name) const; + + // Returns the thread ids of the leaders of thread groups managed by this + // cgroup. + PosixErrorOr<absl::flat_hash_set<pid_t>> Procs() const; + + PosixErrorOr<absl::flat_hash_set<pid_t>> Tasks() const; + + // ContainsCallingProcess checks whether the calling process is part of the + PosixError ContainsCallingProcess() const; + + private: + PosixErrorOr<absl::flat_hash_set<pid_t>> ParsePIDList( + absl::string_view data) const; + + static int64_t next_id_; + int64_t id_; + const std::string cgroup_path_; +}; + +// Mounter is a utility for creating cgroupfs mounts. It automatically manages +// the lifetime of created mounts. +class Mounter { + public: + Mounter(TempPath root) : root_(std::move(root)) {} + + PosixErrorOr<Cgroup> MountCgroupfs(std::string mopts); + + PosixError Unmount(const Cgroup& c); + + private: + // The destruction order of these members avoids errors during cleanup. We + // first unmount (by executing the mounts_ cleanups), then delete the + // mountpoint subdirs, then delete the root. + TempPath root_; + absl::flat_hash_map<int64_t, TempPath> mountpoints_; + absl::flat_hash_map<int64_t, Cleanup> mounts_; +}; + +// Represents a line from /proc/cgroups. +struct CgroupsEntry { + std::string subsys_name; + uint32_t hierarchy; + uint64_t num_cgroups; + bool enabled; +}; + +// Returns a parsed representation of /proc/cgroups. +PosixErrorOr<absl::flat_hash_map<std::string, CgroupsEntry>> +ProcCgroupsEntries(); + +// Represents a line from /proc/<pid>/cgroup. +struct PIDCgroupEntry { + uint32_t hierarchy; + std::string controllers; + std::string path; +}; + +// Returns a parsed representation of /proc/<pid>/cgroup. +PosixErrorOr<absl::flat_hash_map<std::string, PIDCgroupEntry>> +ProcPIDCgroupEntries(pid_t pid); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_UTIL_CGROUP_UTIL_H_ diff --git a/test/util/fs_util.cc b/test/util/fs_util.cc index 5f1ce0d8a..483ae848d 100644 --- a/test/util/fs_util.cc +++ b/test/util/fs_util.cc @@ -28,6 +28,8 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" #include "test/util/cleanup.h" #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" @@ -366,6 +368,48 @@ PosixErrorOr<std::vector<std::string>> ListDir(absl::string_view abspath, return files; } +PosixError DirContains(absl::string_view path, + const std::vector<std::string>& expect, + const std::vector<std::string>& exclude) { + ASSIGN_OR_RETURN_ERRNO(auto listing, ListDir(path, false)); + + for (auto& expected_entry : expect) { + auto cursor = std::find(listing.begin(), listing.end(), expected_entry); + if (cursor == listing.end()) { + return PosixError(ENOENT, absl::StrFormat("Failed to find '%s' in '%s'", + expected_entry, path)); + } + } + for (auto& excluded_entry : exclude) { + auto cursor = std::find(listing.begin(), listing.end(), excluded_entry); + if (cursor != listing.end()) { + return PosixError(ENOENT, absl::StrCat("File '", excluded_entry, + "' found in path '", path, "'")); + } + } + return NoError(); +} + +PosixError EventuallyDirContains(absl::string_view path, + const std::vector<std::string>& expect, + const std::vector<std::string>& exclude) { + constexpr int kRetryCount = 100; + const absl::Duration kRetryDelay = absl::Milliseconds(100); + + for (int i = 0; i < kRetryCount; ++i) { + auto res = DirContains(path, expect, exclude); + if (res.ok()) { + return res; + } + if (i < kRetryCount - 1) { + // Sleep if this isn't the final iteration. + absl::SleepFor(kRetryDelay); + } + } + return PosixError(ETIMEDOUT, + "Timed out while waiting for directory to contain files "); +} + PosixError RecursivelyDelete(absl::string_view path, int* undeleted_dirs, int* undeleted_files) { ASSIGN_OR_RETURN_ERRNO(bool exists, Exists(path)); diff --git a/test/util/fs_util.h b/test/util/fs_util.h index 2190c3bca..bb2d1d3c8 100644 --- a/test/util/fs_util.h +++ b/test/util/fs_util.h @@ -129,6 +129,18 @@ PosixError WalkTree( PosixErrorOr<std::vector<std::string>> ListDir(absl::string_view abspath, bool skipdots); +// Check that a directory contains children nodes named in expect, and does not +// contain any children nodes named in exclude. +PosixError DirContains(absl::string_view path, + const std::vector<std::string>& expect, + const std::vector<std::string>& exclude); + +// Same as DirContains, but adds a retry. Suitable for checking a directory +// being modified asynchronously. +PosixError EventuallyDirContains(absl::string_view path, + const std::vector<std::string>& expect, + const std::vector<std::string>& exclude); + // Attempt to recursively delete a directory or file. Returns an error and // the number of undeleted directories and files. If either // undeleted_dirs or undeleted_files is nullptr then it will not be used. diff --git a/tools/go_marshal/defs.bzl b/tools/go_marshal/defs.bzl index f44f83eab..e23901815 100644 --- a/tools/go_marshal/defs.bzl +++ b/tools/go_marshal/defs.bzl @@ -58,7 +58,7 @@ go_marshal = rule( marshal_deps = [ "//pkg/gohacks", "//pkg/safecopy", - "//pkg/usermem", + "//pkg/hostarch", "//pkg/marshal", ] diff --git a/tools/go_marshal/gomarshal/generator.go b/tools/go_marshal/gomarshal/generator.go index 39394d2a7..0e2d752cb 100644 --- a/tools/go_marshal/gomarshal/generator.go +++ b/tools/go_marshal/gomarshal/generator.go @@ -113,7 +113,7 @@ func NewGenerator(srcs []string, out, outTest, outTestUnconditional, pkg string, g.imports.add("unsafe") g.imports.add("gvisor.dev/gvisor/pkg/gohacks") g.imports.add("gvisor.dev/gvisor/pkg/safecopy") - g.imports.add("gvisor.dev/gvisor/pkg/usermem") + g.imports.add("gvisor.dev/gvisor/pkg/hostarch") g.imports.add("gvisor.dev/gvisor/pkg/marshal") return &g, nil diff --git a/tools/go_marshal/gomarshal/generator_interfaces.go b/tools/go_marshal/gomarshal/generator_interfaces.go index 65f5ea34d..3e643e77f 100644 --- a/tools/go_marshal/gomarshal/generator_interfaces.go +++ b/tools/go_marshal/gomarshal/generator_interfaces.go @@ -120,16 +120,16 @@ func (g *interfaceGenerator) marshalScalar(accessor, typ, bufVar string) { g.emit("%s[0] = byte(%s)\n", bufVar, accessor) g.shift(bufVar, 1) case "int16", "uint16": - g.recordUsedImport("usermem") - g.emit("usermem.ByteOrder.PutUint16(%s[:2], uint16(%s))\n", bufVar, accessor) + g.recordUsedImport("hostarch") + g.emit("hostarch.ByteOrder.PutUint16(%s[:2], uint16(%s))\n", bufVar, accessor) g.shift(bufVar, 2) case "int32", "uint32": - g.recordUsedImport("usermem") - g.emit("usermem.ByteOrder.PutUint32(%s[:4], uint32(%s))\n", bufVar, accessor) + g.recordUsedImport("hostarch") + g.emit("hostarch.ByteOrder.PutUint32(%s[:4], uint32(%s))\n", bufVar, accessor) g.shift(bufVar, 4) case "int64", "uint64": - g.recordUsedImport("usermem") - g.emit("usermem.ByteOrder.PutUint64(%s[:8], uint64(%s))\n", bufVar, accessor) + g.recordUsedImport("hostarch") + g.emit("hostarch.ByteOrder.PutUint64(%s[:8], uint64(%s))\n", bufVar, accessor) g.shift(bufVar, 8) default: g.emit("%s.MarshalBytes(%s[:%s.SizeBytes()])\n", accessor, bufVar, accessor) @@ -147,16 +147,16 @@ func (g *interfaceGenerator) unmarshalScalar(accessor, typ, bufVar string) { g.emit("%s = %s(%s[0])\n", accessor, typ, bufVar) g.shift(bufVar, 1) case "int16", "uint16": - g.recordUsedImport("usermem") - g.emit("%s = %s(usermem.ByteOrder.Uint16(%s[:2]))\n", accessor, typ, bufVar) + g.recordUsedImport("hostarch") + g.emit("%s = %s(hostarch.ByteOrder.Uint16(%s[:2]))\n", accessor, typ, bufVar) g.shift(bufVar, 2) case "int32", "uint32": - g.recordUsedImport("usermem") - g.emit("%s = %s(usermem.ByteOrder.Uint32(%s[:4]))\n", accessor, typ, bufVar) + g.recordUsedImport("hostarch") + g.emit("%s = %s(hostarch.ByteOrder.Uint32(%s[:4]))\n", accessor, typ, bufVar) g.shift(bufVar, 4) case "int64", "uint64": - g.recordUsedImport("usermem") - g.emit("%s = %s(usermem.ByteOrder.Uint64(%s[:8]))\n", accessor, typ, bufVar) + g.recordUsedImport("hostarch") + g.emit("%s = %s(hostarch.ByteOrder.Uint64(%s[:8]))\n", accessor, typ, bufVar) g.shift(bufVar, 8) default: g.emit("%s.UnmarshalBytes(%s[:%s.SizeBytes()])\n", accessor, bufVar, accessor) diff --git a/tools/go_marshal/gomarshal/generator_interfaces_array_newtype.go b/tools/go_marshal/gomarshal/generator_interfaces_array_newtype.go index 7525b52da..32afece2e 100644 --- a/tools/go_marshal/gomarshal/generator_interfaces_array_newtype.go +++ b/tools/go_marshal/gomarshal/generator_interfaces_array_newtype.go @@ -39,7 +39,7 @@ func (g *interfaceGenerator) emitMarshallableForArrayNewtype(n *ast.Ident, a *as g.recordUsedImport("runtime") g.recordUsedImport("safecopy") g.recordUsedImport("unsafe") - g.recordUsedImport("usermem") + g.recordUsedImport("hostarch") lenExpr := g.arrayLenExpr(a) @@ -102,7 +102,7 @@ func (g *interfaceGenerator) emitMarshallableForArrayNewtype(n *ast.Ident, a *as g.emit("// CopyOutN implements marshal.Marshallable.CopyOutN.\n") g.emit("//go:nosplit\n") - g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {\n", g.r, g.typeName()) + g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {\n", g.r, g.typeName()) g.inIndent(func() { g.emitCastToByteSlice(g.r, "buf", fmt.Sprintf("%s.SizeBytes()", g.r)) @@ -114,7 +114,7 @@ func (g *interfaceGenerator) emitMarshallableForArrayNewtype(n *ast.Ident, a *as g.emit("// CopyOut implements marshal.Marshallable.CopyOut.\n") g.emit("//go:nosplit\n") - g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName()) + g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName()) g.inIndent(func() { g.emit("return %s.CopyOutN(cc, addr, %s.SizeBytes())\n", g.r, g.r) }) @@ -122,7 +122,7 @@ func (g *interfaceGenerator) emitMarshallableForArrayNewtype(n *ast.Ident, a *as g.emit("// CopyIn implements marshal.Marshallable.CopyIn.\n") g.emit("//go:nosplit\n") - g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName()) + g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName()) g.inIndent(func() { g.emitCastToByteSlice(g.r, "buf", fmt.Sprintf("%s.SizeBytes()", g.r)) diff --git a/tools/go_marshal/gomarshal/generator_interfaces_dynamic.go b/tools/go_marshal/gomarshal/generator_interfaces_dynamic.go index b1a8622cd..345020ddc 100644 --- a/tools/go_marshal/gomarshal/generator_interfaces_dynamic.go +++ b/tools/go_marshal/gomarshal/generator_interfaces_dynamic.go @@ -46,8 +46,8 @@ func (g *interfaceGenerator) emitMarshallableForDynamicType() { g.emit("// CopyOutN implements marshal.Marshallable.CopyOutN.\n") g.emit("//go:nosplit\n") g.recordUsedImport("marshal") - g.recordUsedImport("usermem") - g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {\n", g.r, g.typeName()) + g.recordUsedImport("hostarch") + g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {\n", g.r, g.typeName()) g.inIndent(func() { g.emit("// Type %s doesn't have a packed layout in memory, fall back to MarshalBytes.\n", g.typeName()) g.emit("buf := cc.CopyScratchBuffer(%s.SizeBytes()) // escapes: okay.\n", g.r) @@ -59,8 +59,8 @@ func (g *interfaceGenerator) emitMarshallableForDynamicType() { g.emit("// CopyOut implements marshal.Marshallable.CopyOut.\n") g.emit("//go:nosplit\n") g.recordUsedImport("marshal") - g.recordUsedImport("usermem") - g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName()) + g.recordUsedImport("hostarch") + g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName()) g.inIndent(func() { g.emit("return %s.CopyOutN(cc, addr, %s.SizeBytes())\n", g.r, g.r) }) @@ -69,8 +69,8 @@ func (g *interfaceGenerator) emitMarshallableForDynamicType() { g.emit("// CopyIn implements marshal.Marshallable.CopyIn.\n") g.emit("//go:nosplit\n") g.recordUsedImport("marshal") - g.recordUsedImport("usermem") - g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName()) + g.recordUsedImport("hostarch") + g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName()) g.inIndent(func() { g.emit("// Type %s doesn't have a packed layout in memory, fall back to UnmarshalBytes.\n", g.typeName()) g.emit("buf := cc.CopyScratchBuffer(%s.SizeBytes()) // escapes: okay.\n", g.r) diff --git a/tools/go_marshal/gomarshal/generator_interfaces_primitive_newtype.go b/tools/go_marshal/gomarshal/generator_interfaces_primitive_newtype.go index 7edaf666c..05f0e0db4 100644 --- a/tools/go_marshal/gomarshal/generator_interfaces_primitive_newtype.go +++ b/tools/go_marshal/gomarshal/generator_interfaces_primitive_newtype.go @@ -29,14 +29,14 @@ func (g *interfaceGenerator) marshalPrimitiveScalar(accessor, typ, bufVar string case "int8", "uint8", "byte": g.emit("%s[0] = byte(*%s)\n", bufVar, accessor) case "int16", "uint16": - g.recordUsedImport("usermem") - g.emit("usermem.ByteOrder.PutUint16(%s[:2], uint16(*%s))\n", bufVar, accessor) + g.recordUsedImport("hostarch") + g.emit("hostarch.ByteOrder.PutUint16(%s[:2], uint16(*%s))\n", bufVar, accessor) case "int32", "uint32": - g.recordUsedImport("usermem") - g.emit("usermem.ByteOrder.PutUint32(%s[:4], uint32(*%s))\n", bufVar, accessor) + g.recordUsedImport("hostarch") + g.emit("hostarch.ByteOrder.PutUint32(%s[:4], uint32(*%s))\n", bufVar, accessor) case "int64", "uint64": - g.recordUsedImport("usermem") - g.emit("usermem.ByteOrder.PutUint64(%s[:8], uint64(*%s))\n", bufVar, accessor) + g.recordUsedImport("hostarch") + g.emit("hostarch.ByteOrder.PutUint64(%s[:8], uint64(*%s))\n", bufVar, accessor) default: g.emit("// Explicilty cast to the underlying type before dispatching to\n") g.emit("// MarshalBytes, so we don't recursively call %s.MarshalBytes\n", accessor) @@ -53,14 +53,14 @@ func (g *interfaceGenerator) unmarshalPrimitiveScalar(accessor, typ, bufVar, typ case "int8", "uint8": g.emit("*%s = %s(%s(%s[0]))\n", accessor, typeCast, typ, bufVar) case "int16", "uint16": - g.recordUsedImport("usermem") - g.emit("*%s = %s(%s(usermem.ByteOrder.Uint16(%s[:2])))\n", accessor, typeCast, typ, bufVar) + g.recordUsedImport("hostarch") + g.emit("*%s = %s(%s(hostarch.ByteOrder.Uint16(%s[:2])))\n", accessor, typeCast, typ, bufVar) case "int32", "uint32": - g.recordUsedImport("usermem") - g.emit("*%s = %s(%s(usermem.ByteOrder.Uint32(%s[:4])))\n", accessor, typeCast, typ, bufVar) + g.recordUsedImport("hostarch") + g.emit("*%s = %s(%s(hostarch.ByteOrder.Uint32(%s[:4])))\n", accessor, typeCast, typ, bufVar) case "int64", "uint64": - g.recordUsedImport("usermem") - g.emit("*%s = %s(%s(usermem.ByteOrder.Uint64(%s[:8])))\n", accessor, typeCast, typ, bufVar) + g.recordUsedImport("hostarch") + g.emit("*%s = %s(%s(hostarch.ByteOrder.Uint64(%s[:8])))\n", accessor, typeCast, typ, bufVar) default: g.emit("// Explicilty cast to the underlying type before dispatching to\n") g.emit("// UnmarshalBytes, so we don't recursively call %s.UnmarshalBytes\n", accessor) @@ -101,7 +101,7 @@ func (g *interfaceGenerator) emitMarshallableForPrimitiveNewtype(nt *ast.Ident) g.recordUsedImport("runtime") g.recordUsedImport("safecopy") g.recordUsedImport("unsafe") - g.recordUsedImport("usermem") + g.recordUsedImport("hostarch") g.emit("// SizeBytes implements marshal.Marshallable.SizeBytes.\n") g.emit("//go:nosplit\n") @@ -154,7 +154,7 @@ func (g *interfaceGenerator) emitMarshallableForPrimitiveNewtype(nt *ast.Ident) g.emit("// CopyOutN implements marshal.Marshallable.CopyOutN.\n") g.emit("//go:nosplit\n") - g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {\n", g.r, g.typeName()) + g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {\n", g.r, g.typeName()) g.inIndent(func() { g.emitCastToByteSlice(g.r, "buf", fmt.Sprintf("%s.SizeBytes()", g.r)) @@ -166,7 +166,7 @@ func (g *interfaceGenerator) emitMarshallableForPrimitiveNewtype(nt *ast.Ident) g.emit("// CopyOut implements marshal.Marshallable.CopyOut.\n") g.emit("//go:nosplit\n") - g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName()) + g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName()) g.inIndent(func() { g.emit("return %s.CopyOutN(cc, addr, %s.SizeBytes())\n", g.r, g.r) }) @@ -174,7 +174,7 @@ func (g *interfaceGenerator) emitMarshallableForPrimitiveNewtype(nt *ast.Ident) g.emit("// CopyIn implements marshal.Marshallable.CopyIn.\n") g.emit("//go:nosplit\n") - g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName()) + g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName()) g.inIndent(func() { g.emitCastToByteSlice(g.r, "buf", fmt.Sprintf("%s.SizeBytes()", g.r)) @@ -199,7 +199,7 @@ func (g *interfaceGenerator) emitMarshallableForPrimitiveNewtype(nt *ast.Ident) func (g *interfaceGenerator) emitMarshallableSliceForPrimitiveNewtype(nt *ast.Ident, slice *sliceAPI) { g.recordUsedImport("marshal") - g.recordUsedImport("usermem") + g.recordUsedImport("hostarch") g.recordUsedImport("reflect") g.recordUsedImport("runtime") g.recordUsedImport("unsafe") @@ -211,7 +211,7 @@ func (g *interfaceGenerator) emitMarshallableSliceForPrimitiveNewtype(nt *ast.Id g.emit("// Copy%sIn copies in a slice of %s objects from the task's memory.\n", slice.ident, eltType) g.emit("//go:nosplit\n") - g.emit("func Copy%sIn(cc marshal.CopyContext, addr usermem.Addr, dst []%s) (int, error) {\n", slice.ident, eltType) + g.emit("func Copy%sIn(cc marshal.CopyContext, addr hostarch.Addr, dst []%s) (int, error) {\n", slice.ident, eltType) g.inIndent(func() { g.emit("count := len(dst)\n") g.emit("if count == 0 {\n") @@ -231,7 +231,7 @@ func (g *interfaceGenerator) emitMarshallableSliceForPrimitiveNewtype(nt *ast.Id g.emit("// Copy%sOut copies a slice of %s objects to the task's memory.\n", slice.ident, eltType) g.emit("//go:nosplit\n") - g.emit("func Copy%sOut(cc marshal.CopyContext, addr usermem.Addr, src []%s) (int, error) {\n", slice.ident, eltType) + g.emit("func Copy%sOut(cc marshal.CopyContext, addr hostarch.Addr, src []%s) (int, error) {\n", slice.ident, eltType) g.inIndent(func() { g.emit("count := len(src)\n") g.emit("if count == 0 {\n") diff --git a/tools/go_marshal/gomarshal/generator_interfaces_struct.go b/tools/go_marshal/gomarshal/generator_interfaces_struct.go index 5f6306b8f..72df1ab64 100644 --- a/tools/go_marshal/gomarshal/generator_interfaces_struct.go +++ b/tools/go_marshal/gomarshal/generator_interfaces_struct.go @@ -319,8 +319,8 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) { g.emit("// CopyOutN implements marshal.Marshallable.CopyOutN.\n") g.emit("//go:nosplit\n") g.recordUsedImport("marshal") - g.recordUsedImport("usermem") - g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {\n", g.r, g.typeName()) + g.recordUsedImport("hostarch") + g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {\n", g.r, g.typeName()) g.inIndent(func() { fallback := func() { g.emit("// Type %s doesn't have a packed layout in memory, fall back to MarshalBytes.\n", g.typeName()) @@ -352,8 +352,8 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) { g.emit("// CopyOut implements marshal.Marshallable.CopyOut.\n") g.emit("//go:nosplit\n") g.recordUsedImport("marshal") - g.recordUsedImport("usermem") - g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName()) + g.recordUsedImport("hostarch") + g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName()) g.inIndent(func() { g.emit("return %s.CopyOutN(cc, addr, %s.SizeBytes())\n", g.r, g.r) }) @@ -362,8 +362,8 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) { g.emit("// CopyIn implements marshal.Marshallable.CopyIn.\n") g.emit("//go:nosplit\n") g.recordUsedImport("marshal") - g.recordUsedImport("usermem") - g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName()) + g.recordUsedImport("hostarch") + g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName()) g.inIndent(func() { fallback := func() { g.emit("// Type %s doesn't have a packed layout in memory, fall back to UnmarshalBytes.\n", g.typeName()) @@ -436,10 +436,10 @@ func (g *interfaceGenerator) emitMarshallableSliceForStruct(st *ast.StructType, } g.recordUsedImport("marshal") - g.recordUsedImport("usermem") + g.recordUsedImport("hostarch") g.emit("// Copy%sIn copies in a slice of %s objects from the task's memory.\n", slice.ident, g.typeName()) - g.emit("func Copy%sIn(cc marshal.CopyContext, addr usermem.Addr, dst []%s) (int, error) {\n", slice.ident, g.typeName()) + g.emit("func Copy%sIn(cc marshal.CopyContext, addr hostarch.Addr, dst []%s) (int, error) {\n", slice.ident, g.typeName()) g.inIndent(func() { g.emit("count := len(dst)\n") g.emit("if count == 0 {\n") @@ -496,7 +496,7 @@ func (g *interfaceGenerator) emitMarshallableSliceForStruct(st *ast.StructType, g.emit("}\n\n") g.emit("// Copy%sOut copies a slice of %s objects to the task's memory.\n", slice.ident, g.typeName()) - g.emit("func Copy%sOut(cc marshal.CopyContext, addr usermem.Addr, src []%s) (int, error) {\n", slice.ident, g.typeName()) + g.emit("func Copy%sOut(cc marshal.CopyContext, addr hostarch.Addr, src []%s) (int, error) {\n", slice.ident, g.typeName()) g.inIndent(func() { g.emit("count := len(src)\n") g.emit("if count == 0 {\n") diff --git a/tools/go_marshal/gomarshal/generator_tests.go b/tools/go_marshal/gomarshal/generator_tests.go index 6cf00843f..8f93a1de5 100644 --- a/tools/go_marshal/gomarshal/generator_tests.go +++ b/tools/go_marshal/gomarshal/generator_tests.go @@ -32,7 +32,7 @@ var standardImports = []string{ var sliceAPIImports = []string{ "encoding/binary", - "gvisor.dev/gvisor/pkg/usermem", + "gvisor.dev/gvisor/pkg/hostarch", } type testGenerator struct { @@ -143,7 +143,7 @@ func (g *testGenerator) emitTestMarshalUnmarshalPreservesData() { } func (g *testGenerator) emitTestMarshalUnmarshalSlicePreservesData(slice *sliceAPI) { - for _, name := range []string{"binary", "usermem"} { + for _, name := range []string{"binary", "hostarch"} { if !g.imports.markUsed(name) { panic(fmt.Sprintf("Generated test for '%s' referenced a non-existent import with local name '%s'", g.typeName(), name)) } @@ -155,7 +155,7 @@ func (g *testGenerator) emitTestMarshalUnmarshalSlicePreservesData(slice *sliceA g.emit("size := (*%s)(nil).SizeBytes() * len(x)\n", g.typeName()) g.emit("buf := bytes.NewBuffer(make([]byte, size))\n") g.emit("buf.Reset()\n") - g.emit("if err := binary.Write(buf, usermem.ByteOrder, x[:]); err != nil {\n") + g.emit("if err := binary.Write(buf, hostarch.ByteOrder, x[:]); err != nil {\n") g.inIndent(func() { g.emit("t.Fatal(fmt.Sprintf(\"binary.Write failed: %v\", err))\n") }) diff --git a/tools/go_marshal/test/BUILD b/tools/go_marshal/test/BUILD index 5bceacd32..e872560a9 100644 --- a/tools/go_marshal/test/BUILD +++ b/tools/go_marshal/test/BUILD @@ -15,7 +15,7 @@ go_test( deps = [ ":test", "//pkg/binary", - "//pkg/usermem", + "//pkg/hostarch", "//tools/go_marshal/analysis", ], ) @@ -41,6 +41,7 @@ go_test( srcs = ["marshal_test.go"], deps = [ ":test", + "//pkg/hostarch", "//pkg/marshal", "//pkg/marshal/primitive", "//pkg/syserror", diff --git a/tools/go_marshal/test/benchmark_test.go b/tools/go_marshal/test/benchmark_test.go index 224d308c7..16f478ff7 100644 --- a/tools/go_marshal/test/benchmark_test.go +++ b/tools/go_marshal/test/benchmark_test.go @@ -22,7 +22,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/tools/go_marshal/analysis" "gvisor.dev/gvisor/tools/go_marshal/test" ) @@ -39,10 +39,10 @@ func BenchmarkEncodingBinary(b *testing.B) { for n := 0; n < b.N; n++ { buf := bytes.NewBuffer(make([]byte, size)) buf.Reset() - if err := encbin.Write(buf, usermem.ByteOrder, &s1); err != nil { + if err := encbin.Write(buf, hostarch.ByteOrder, &s1); err != nil { b.Error("Write:", err) } - if err := encbin.Read(buf, usermem.ByteOrder, &s2); err != nil { + if err := encbin.Read(buf, hostarch.ByteOrder, &s2); err != nil { b.Error("Read:", err) } } @@ -66,8 +66,8 @@ func BenchmarkBinary(b *testing.B) { for n := 0; n < b.N; n++ { buf := make([]byte, 0, size) - buf = binary.Marshal(buf, usermem.ByteOrder, &s1) - binary.Unmarshal(buf, usermem.ByteOrder, &s2) + buf = binary.Marshal(buf, hostarch.ByteOrder, &s1) + binary.Unmarshal(buf, hostarch.ByteOrder, &s2) } b.StopTimer() @@ -89,42 +89,42 @@ func BenchmarkMarshalManual(b *testing.B) { buf := make([]byte, 0, s1.SizeBytes()) // Marshal - buf = binary.AppendUint64(buf, usermem.ByteOrder, s1.Dev) - buf = binary.AppendUint64(buf, usermem.ByteOrder, s1.Ino) - buf = binary.AppendUint64(buf, usermem.ByteOrder, s1.Nlink) - buf = binary.AppendUint32(buf, usermem.ByteOrder, s1.Mode) - buf = binary.AppendUint32(buf, usermem.ByteOrder, s1.UID) - buf = binary.AppendUint32(buf, usermem.ByteOrder, s1.GID) - buf = binary.AppendUint32(buf, usermem.ByteOrder, 0) - buf = binary.AppendUint64(buf, usermem.ByteOrder, s1.Rdev) - buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.Size)) - buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.Blksize)) - buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.Blocks)) - buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.ATime.Sec)) - buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.ATime.Nsec)) - buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.MTime.Sec)) - buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.MTime.Nsec)) - buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.CTime.Sec)) - buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.CTime.Nsec)) + buf = binary.AppendUint64(buf, hostarch.ByteOrder, s1.Dev) + buf = binary.AppendUint64(buf, hostarch.ByteOrder, s1.Ino) + buf = binary.AppendUint64(buf, hostarch.ByteOrder, s1.Nlink) + buf = binary.AppendUint32(buf, hostarch.ByteOrder, s1.Mode) + buf = binary.AppendUint32(buf, hostarch.ByteOrder, s1.UID) + buf = binary.AppendUint32(buf, hostarch.ByteOrder, s1.GID) + buf = binary.AppendUint32(buf, hostarch.ByteOrder, 0) + buf = binary.AppendUint64(buf, hostarch.ByteOrder, s1.Rdev) + buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.Size)) + buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.Blksize)) + buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.Blocks)) + buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.ATime.Sec)) + buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.ATime.Nsec)) + buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.MTime.Sec)) + buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.MTime.Nsec)) + buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.CTime.Sec)) + buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.CTime.Nsec)) // Unmarshal - s2.Dev = usermem.ByteOrder.Uint64(buf[0:8]) - s2.Ino = usermem.ByteOrder.Uint64(buf[8:16]) - s2.Nlink = usermem.ByteOrder.Uint64(buf[16:24]) - s2.Mode = usermem.ByteOrder.Uint32(buf[24:28]) - s2.UID = usermem.ByteOrder.Uint32(buf[28:32]) - s2.GID = usermem.ByteOrder.Uint32(buf[32:36]) + s2.Dev = hostarch.ByteOrder.Uint64(buf[0:8]) + s2.Ino = hostarch.ByteOrder.Uint64(buf[8:16]) + s2.Nlink = hostarch.ByteOrder.Uint64(buf[16:24]) + s2.Mode = hostarch.ByteOrder.Uint32(buf[24:28]) + s2.UID = hostarch.ByteOrder.Uint32(buf[28:32]) + s2.GID = hostarch.ByteOrder.Uint32(buf[32:36]) // Padding: buf[36:40] - s2.Rdev = usermem.ByteOrder.Uint64(buf[40:48]) - s2.Size = int64(usermem.ByteOrder.Uint64(buf[48:56])) - s2.Blksize = int64(usermem.ByteOrder.Uint64(buf[56:64])) - s2.Blocks = int64(usermem.ByteOrder.Uint64(buf[64:72])) - s2.ATime.Sec = int64(usermem.ByteOrder.Uint64(buf[72:80])) - s2.ATime.Nsec = int64(usermem.ByteOrder.Uint64(buf[80:88])) - s2.MTime.Sec = int64(usermem.ByteOrder.Uint64(buf[88:96])) - s2.MTime.Nsec = int64(usermem.ByteOrder.Uint64(buf[96:104])) - s2.CTime.Sec = int64(usermem.ByteOrder.Uint64(buf[104:112])) - s2.CTime.Nsec = int64(usermem.ByteOrder.Uint64(buf[112:120])) + s2.Rdev = hostarch.ByteOrder.Uint64(buf[40:48]) + s2.Size = int64(hostarch.ByteOrder.Uint64(buf[48:56])) + s2.Blksize = int64(hostarch.ByteOrder.Uint64(buf[56:64])) + s2.Blocks = int64(hostarch.ByteOrder.Uint64(buf[64:72])) + s2.ATime.Sec = int64(hostarch.ByteOrder.Uint64(buf[72:80])) + s2.ATime.Nsec = int64(hostarch.ByteOrder.Uint64(buf[80:88])) + s2.MTime.Sec = int64(hostarch.ByteOrder.Uint64(buf[88:96])) + s2.MTime.Nsec = int64(hostarch.ByteOrder.Uint64(buf[96:104])) + s2.CTime.Sec = int64(hostarch.ByteOrder.Uint64(buf[104:112])) + s2.CTime.Nsec = int64(hostarch.ByteOrder.Uint64(buf[112:120])) } b.StopTimer() @@ -187,8 +187,8 @@ func BenchmarkBinarySlice(b *testing.B) { for n := 0; n < b.N; n++ { buf := make([]byte, 0, size) - buf = binary.Marshal(buf, usermem.ByteOrder, &s1) - binary.Unmarshal(buf, usermem.ByteOrder, &s2) + buf = binary.Marshal(buf, hostarch.ByteOrder, &s1) + binary.Unmarshal(buf, hostarch.ByteOrder, &s2) } b.StopTimer() diff --git a/tools/go_marshal/test/escape/BUILD b/tools/go_marshal/test/escape/BUILD index 2981ef196..62e0b4665 100644 --- a/tools/go_marshal/test/escape/BUILD +++ b/tools/go_marshal/test/escape/BUILD @@ -7,8 +7,8 @@ go_library( testonly = 1, srcs = ["escape.go"], deps = [ + "//pkg/hostarch", "//pkg/marshal", - "//pkg/usermem", "//tools/go_marshal/test", ], ) diff --git a/tools/go_marshal/test/escape/escape.go b/tools/go_marshal/test/escape/escape.go index df14ae98e..1ac606862 100644 --- a/tools/go_marshal/test/escape/escape.go +++ b/tools/go_marshal/test/escape/escape.go @@ -16,8 +16,8 @@ package escape import ( + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" - "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/tools/go_marshal/test" ) @@ -29,21 +29,21 @@ func (*dummyCopyContext) CopyScratchBuffer(size int) []byte { return make([]byte, size) } -func (*dummyCopyContext) CopyOutBytes(addr usermem.Addr, b []byte) (int, error) { +func (*dummyCopyContext) CopyOutBytes(addr hostarch.Addr, b []byte) (int, error) { return len(b), nil } -func (*dummyCopyContext) CopyInBytes(addr usermem.Addr, b []byte) (int, error) { +func (*dummyCopyContext) CopyInBytes(addr hostarch.Addr, b []byte) (int, error) { return len(b), nil } -func (t *dummyCopyContext) MarshalBytes(addr usermem.Addr, marshallable marshal.Marshallable) { +func (t *dummyCopyContext) MarshalBytes(addr hostarch.Addr, marshallable marshal.Marshallable) { buf := t.CopyScratchBuffer(marshallable.SizeBytes()) marshallable.MarshalBytes(buf) t.CopyOutBytes(addr, buf) } -func (t *dummyCopyContext) MarshalUnsafe(addr usermem.Addr, marshallable marshal.Marshallable) { +func (t *dummyCopyContext) MarshalUnsafe(addr hostarch.Addr, marshallable marshal.Marshallable) { buf := t.CopyScratchBuffer(marshallable.SizeBytes()) marshallable.MarshalUnsafe(buf) t.CopyOutBytes(addr, buf) @@ -53,14 +53,14 @@ func (t *dummyCopyContext) MarshalUnsafe(addr usermem.Addr, marshallable marshal //go:nosplit func doCopyIn(t *dummyCopyContext) { var stat test.Stat - stat.CopyIn(t, usermem.Addr(0xf000ba12)) + stat.CopyIn(t, hostarch.Addr(0xf000ba12)) } // +checkescape:all //go:nosplit func doCopyOut(t *dummyCopyContext) { var stat test.Stat - stat.CopyOut(t, usermem.Addr(0xf000ba12)) + stat.CopyOut(t, hostarch.Addr(0xf000ba12)) } // +mustescape:builtin @@ -70,7 +70,7 @@ func doMarshalBytesDirect(t *dummyCopyContext) { var stat test.Stat buf := t.CopyScratchBuffer(stat.SizeBytes()) stat.MarshalBytes(buf) - t.CopyOutBytes(usermem.Addr(0xf000ba12), buf) + t.CopyOutBytes(hostarch.Addr(0xf000ba12), buf) } // +mustescape:builtin @@ -80,7 +80,7 @@ func doMarshalUnsafeDirect(t *dummyCopyContext) { var stat test.Stat buf := t.CopyScratchBuffer(stat.SizeBytes()) stat.MarshalUnsafe(buf) - t.CopyOutBytes(usermem.Addr(0xf000ba12), buf) + t.CopyOutBytes(hostarch.Addr(0xf000ba12), buf) } // +mustescape:local,heap @@ -88,7 +88,7 @@ func doMarshalUnsafeDirect(t *dummyCopyContext) { //go:nosplit func doMarshalBytesViaMarshallable(t *dummyCopyContext) { var stat test.Stat - t.MarshalBytes(usermem.Addr(0xf000ba12), &stat) + t.MarshalBytes(hostarch.Addr(0xf000ba12), &stat) } // +mustescape:local,heap @@ -96,5 +96,5 @@ func doMarshalBytesViaMarshallable(t *dummyCopyContext) { //go:nosplit func doMarshalUnsafeViaMarshallable(t *dummyCopyContext) { var stat test.Stat - t.MarshalUnsafe(usermem.Addr(0xf000ba12), &stat) + t.MarshalUnsafe(hostarch.Addr(0xf000ba12), &stat) } diff --git a/tools/go_marshal/test/marshal_test.go b/tools/go_marshal/test/marshal_test.go index 733689c79..43bafbf96 100644 --- a/tools/go_marshal/test/marshal_test.go +++ b/tools/go_marshal/test/marshal_test.go @@ -27,6 +27,7 @@ import ( "unsafe" "github.com/google/go-cmp/cmp" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/syserror" @@ -47,7 +48,7 @@ func (t *mockCopyContext) populate(val interface{}) { var buf bytes.Buffer // Use binary.Write so we aren't testing go-marshal against its own // potentially buggy implementation. - if err := binary.Write(&buf, usermem.ByteOrder, val); err != nil { + if err := binary.Write(&buf, hostarch.ByteOrder, val); err != nil { panic(err) } t.taskMem.Bytes = buf.Bytes() @@ -71,14 +72,14 @@ func (t *mockCopyContext) CopyScratchBuffer(size int) []byte { // CopyOutBytes implements marshal.CopyContext.CopyOutBytes. The implementation // completely ignores the target address and stores a copy of b in its // internally buffer, overriding any previous contents. -func (t *mockCopyContext) CopyOutBytes(_ usermem.Addr, b []byte) (int, error) { +func (t *mockCopyContext) CopyOutBytes(_ hostarch.Addr, b []byte) (int, error) { return t.taskMem.CopyOut(nil, 0, b, usermem.IOOpts{}) } // CopyInBytes implements marshal.CopyContext.CopyInBytes. The implementation // completely ignores the source address and always fills b from the begining of // its internal buffer. -func (t *mockCopyContext) CopyInBytes(_ usermem.Addr, b []byte) (int, error) { +func (t *mockCopyContext) CopyInBytes(_ hostarch.Addr, b []byte) (int, error) { return t.taskMem.CopyIn(nil, 0, b, usermem.IOOpts{}) } @@ -91,7 +92,7 @@ func unsafeMemory(m marshal.Marshallable) []byte { // since the layout isn't packed. Allocate a temporary buffer // and marshal instead. var buf bytes.Buffer - if err := binary.Write(&buf, usermem.ByteOrder, m); err != nil { + if err := binary.Write(&buf, hostarch.ByteOrder, m); err != nil { panic(err) } return buf.Bytes() @@ -130,7 +131,7 @@ func unsafeMemorySlice(m interface{}, elt marshal.Marshallable) []byte { // since the layout isn't packed. Allocate a temporary buffer // and marshal instead. var buf bytes.Buffer - if err := binary.Write(&buf, usermem.ByteOrder, m); err != nil { + if err := binary.Write(&buf, hostarch.ByteOrder, m); err != nil { panic(err) } return buf.Bytes() @@ -176,7 +177,7 @@ func limitedCopyIn(t *testing.T, src, dst marshal.Marshallable, limit int) { cc.populate(src) cc.setLimit(limit) - n, err := dst.CopyIn(&cc, usermem.Addr(0)) + n, err := dst.CopyIn(&cc, hostarch.Addr(0)) if n != limit { t.Errorf("CopyIn copied unexpected number of bytes, expected %d, got %d", limit, n) } @@ -206,7 +207,7 @@ func limitedCopyOut(t *testing.T, src marshal.Marshallable, limit int) { var cc mockCopyContext cc.setLimit(limit) - n, err := src.CopyOut(&cc, usermem.Addr(0)) + n, err := src.CopyOut(&cc, hostarch.Addr(0)) if n != limit { t.Errorf("CopyOut copied unexpected number of bytes, expected %d, got %d", limit, n) } @@ -227,7 +228,7 @@ func copyOutN(t *testing.T, src marshal.Marshallable, limit int) { var cc mockCopyContext cc.setLimit(limit) - n, err := src.CopyOutN(&cc, usermem.Addr(0), limit) + n, err := src.CopyOutN(&cc, hostarch.Addr(0), limit) if err != nil { t.Errorf("CopyOut returned unexpected error: %v", err) } @@ -304,18 +305,18 @@ func TestLimitedMarshalling(t *testing.T) { func TestLimitedSliceMarshalling(t *testing.T) { types := []struct { arrayPtrType reflect.Type - copySliceIn func(cc marshal.CopyContext, addr usermem.Addr, dstSlice interface{}) (int, error) - copySliceOut func(cc marshal.CopyContext, addr usermem.Addr, srcSlice interface{}) (int, error) + copySliceIn func(cc marshal.CopyContext, addr hostarch.Addr, dstSlice interface{}) (int, error) + copySliceOut func(cc marshal.CopyContext, addr hostarch.Addr, srcSlice interface{}) (int, error) unsafeMemory func(arrPtr interface{}) []byte }{ // Packed types. { reflect.TypeOf((*[20]test.Stat)(nil)), - func(cc marshal.CopyContext, addr usermem.Addr, dst interface{}) (int, error) { + func(cc marshal.CopyContext, addr hostarch.Addr, dst interface{}) (int, error) { slice := dst.(*[20]test.Stat)[:] return test.CopyStatSliceIn(cc, addr, slice) }, - func(cc marshal.CopyContext, addr usermem.Addr, src interface{}) (int, error) { + func(cc marshal.CopyContext, addr hostarch.Addr, src interface{}) (int, error) { slice := src.(*[20]test.Stat)[:] return test.CopyStatSliceOut(cc, addr, slice) }, @@ -326,11 +327,11 @@ func TestLimitedSliceMarshalling(t *testing.T) { }, { reflect.TypeOf((*[1]test.Stat)(nil)), - func(cc marshal.CopyContext, addr usermem.Addr, dst interface{}) (int, error) { + func(cc marshal.CopyContext, addr hostarch.Addr, dst interface{}) (int, error) { slice := dst.(*[1]test.Stat)[:] return test.CopyStatSliceIn(cc, addr, slice) }, - func(cc marshal.CopyContext, addr usermem.Addr, src interface{}) (int, error) { + func(cc marshal.CopyContext, addr hostarch.Addr, src interface{}) (int, error) { slice := src.(*[1]test.Stat)[:] return test.CopyStatSliceOut(cc, addr, slice) }, @@ -341,11 +342,11 @@ func TestLimitedSliceMarshalling(t *testing.T) { }, { reflect.TypeOf((*[5]test.SignalSetAlias)(nil)), - func(cc marshal.CopyContext, addr usermem.Addr, dst interface{}) (int, error) { + func(cc marshal.CopyContext, addr hostarch.Addr, dst interface{}) (int, error) { slice := dst.(*[5]test.SignalSetAlias)[:] return test.CopySignalSetAliasSliceIn(cc, addr, slice) }, - func(cc marshal.CopyContext, addr usermem.Addr, src interface{}) (int, error) { + func(cc marshal.CopyContext, addr hostarch.Addr, src interface{}) (int, error) { slice := src.(*[5]test.SignalSetAlias)[:] return test.CopySignalSetAliasSliceOut(cc, addr, slice) }, @@ -357,11 +358,11 @@ func TestLimitedSliceMarshalling(t *testing.T) { // Non-packed types. { reflect.TypeOf((*[20]test.Type1)(nil)), - func(cc marshal.CopyContext, addr usermem.Addr, dst interface{}) (int, error) { + func(cc marshal.CopyContext, addr hostarch.Addr, dst interface{}) (int, error) { slice := dst.(*[20]test.Type1)[:] return test.CopyType1SliceIn(cc, addr, slice) }, - func(cc marshal.CopyContext, addr usermem.Addr, src interface{}) (int, error) { + func(cc marshal.CopyContext, addr hostarch.Addr, src interface{}) (int, error) { slice := src.(*[20]test.Type1)[:] return test.CopyType1SliceOut(cc, addr, slice) }, @@ -372,11 +373,11 @@ func TestLimitedSliceMarshalling(t *testing.T) { }, { reflect.TypeOf((*[1]test.Type1)(nil)), - func(cc marshal.CopyContext, addr usermem.Addr, dst interface{}) (int, error) { + func(cc marshal.CopyContext, addr hostarch.Addr, dst interface{}) (int, error) { slice := dst.(*[1]test.Type1)[:] return test.CopyType1SliceIn(cc, addr, slice) }, - func(cc marshal.CopyContext, addr usermem.Addr, src interface{}) (int, error) { + func(cc marshal.CopyContext, addr hostarch.Addr, src interface{}) (int, error) { slice := src.(*[1]test.Type1)[:] return test.CopyType1SliceOut(cc, addr, slice) }, @@ -387,11 +388,11 @@ func TestLimitedSliceMarshalling(t *testing.T) { }, { reflect.TypeOf((*[7]test.Type8)(nil)), - func(cc marshal.CopyContext, addr usermem.Addr, dst interface{}) (int, error) { + func(cc marshal.CopyContext, addr hostarch.Addr, dst interface{}) (int, error) { slice := dst.(*[7]test.Type8)[:] return test.CopyType8SliceIn(cc, addr, slice) }, - func(cc marshal.CopyContext, addr usermem.Addr, src interface{}) (int, error) { + func(cc marshal.CopyContext, addr hostarch.Addr, src interface{}) (int, error) { slice := src.(*[7]test.Type8)[:] return test.CopyType8SliceOut(cc, addr, slice) }, @@ -444,7 +445,7 @@ func TestLimitedSliceMarshalling(t *testing.T) { cc.populate(expected) cc.setLimit(limit) - n, err := tt.copySliceIn(&cc, usermem.Addr(0), actual) + n, err := tt.copySliceIn(&cc, hostarch.Addr(0), actual) if n != limit { t.Errorf("CopyIn copied unexpected number of bytes, expected %d, got %d", limit, n) } @@ -498,7 +499,7 @@ func TestLimitedSliceMarshalling(t *testing.T) { cc.populate(expected) cc.setLimit(limit) - n, err := tt.copySliceOut(&cc, usermem.Addr(0), expected) + n, err := tt.copySliceOut(&cc, hostarch.Addr(0), expected) if n != limit { t.Errorf("CopyIn copied unexpected number of bytes, expected %d, got %d", limit, n) } @@ -523,14 +524,14 @@ func TestDynamicTypeStruct(t *testing.T) { var cc mockCopyContext cc.setLimit(t12.SizeBytes()) - if _, err := t12.CopyOut(&cc, usermem.Addr(0)); err != nil { + if _, err := t12.CopyOut(&cc, hostarch.Addr(0)); err != nil { t.Fatalf("cc.CopyOut faile: %v", err) } res := test.Type12Dynamic{ Y: make([]primitive.Int64, len(t12.Y)), } - res.CopyIn(&cc, usermem.Addr(0)) + res.CopyIn(&cc, hostarch.Addr(0)) if !reflect.DeepEqual(t12, res) { t.Errorf("dynamic type is not same after marshalling and unmarshalling: before = %+v, after = %+v", t12, res) } @@ -541,12 +542,12 @@ func TestDynamicTypeIdentifier(t *testing.T) { var cc mockCopyContext cc.setLimit(s.SizeBytes()) - if _, err := s.CopyOut(&cc, usermem.Addr(0)); err != nil { + if _, err := s.CopyOut(&cc, hostarch.Addr(0)); err != nil { t.Fatalf("cc.CopyOut faile: %v", err) } res := test.Type13Dynamic(make([]byte, len(s))) - res.CopyIn(&cc, usermem.Addr(0)) + res.CopyIn(&cc, hostarch.Addr(0)) if res != s { t.Errorf("dynamic type is not same after marshalling and unmarshalling: before = %s, after = %s", s, res) } diff --git a/tools/nogo/analyzers.go b/tools/nogo/analyzers.go index 8b4bff3b6..2b3c03fec 100644 --- a/tools/nogo/analyzers.go +++ b/tools/nogo/analyzers.go @@ -83,11 +83,6 @@ var AllAnalyzers = []*analysis.Analyzer{ checklocks.Analyzer, } -// EscapeAnalyzers is a list of escape-related analyzers. -var EscapeAnalyzers = []*analysis.Analyzer{ - checkescape.EscapeAnalyzer, -} - func register(all []*analysis.Analyzer) { // Register all fact types. // @@ -129,5 +124,4 @@ func init() { // Register lists. register(AllAnalyzers) - register(EscapeAnalyzers) } diff --git a/tools/nogo/check/main.go b/tools/nogo/check/main.go index 69bdfe502..4194770be 100644 --- a/tools/nogo/check/main.go +++ b/tools/nogo/check/main.go @@ -31,7 +31,6 @@ var ( stdlibFile = flag.String("stdlib", "", "stdlib configuration file (in JSON format)") findingsOutput = flag.String("findings", "", "output file (or stdout, if not specified)") factsOutput = flag.String("facts", "", "output file for facts (optional)") - escapesOutput = flag.String("escapes", "", "output file for escapes (optional)") ) func loadConfig(file string, config interface{}) interface{} { @@ -66,25 +65,13 @@ func main() { // Run the configuration. if *stdlibFile != "" { - // Perform basic analysis. + // Perform stdlib analysis. c := loadConfig(*stdlibFile, new(nogo.StdlibConfig)).(*nogo.StdlibConfig) findings, factData, err = nogo.CheckStdlib(c, nogo.AllAnalyzers) - } else if *packageFile != "" { - // Perform basic analysis. + // Perform standard analysis. c := loadConfig(*packageFile, new(nogo.PackageConfig)).(*nogo.PackageConfig) findings, factData, err = nogo.CheckPackage(c, nogo.AllAnalyzers, nil) - - // Do we need to do escape analysis? - if *escapesOutput != "" { - escapes, _, err := nogo.CheckPackage(c, nogo.EscapeAnalyzers, nil) - if err != nil { - log.Fatalf("error performing escape analysis: %v", err) - } - if err := nogo.WriteFindingsToFile(escapes, *escapesOutput); err != nil { - log.Fatalf("error writing escapes to %q: %v", *escapesOutput, err) - } - } } else { log.Fatalf("please provide at least one of package or stdlib!") } diff --git a/tools/nogo/defs.bzl b/tools/nogo/defs.bzl index 0c48a7a5a..cb407a736 100644 --- a/tools/nogo/defs.bzl +++ b/tools/nogo/defs.bzl @@ -174,7 +174,6 @@ NogoInfo = provider( fields = { "facts": "serialized package facts", "raw_findings": "raw package findings (if relevant)", - "escapes": "escape-only findings (if relevant)", "importpath": "package import path", "binaries": "package binary files", "srcs": "srcs (for go_test support)", @@ -281,7 +280,6 @@ def _nogo_aspect_impl(target, ctx): go_ctx = go_context(ctx, goos = nogo_target_info.goos, goarch = nogo_target_info.goarch) facts = ctx.actions.declare_file(target.label.name + ".facts") raw_findings = ctx.actions.declare_file(target.label.name + ".raw_findings") - escapes = ctx.actions.declare_file(target.label.name + ".escapes") config = struct( ImportPath = importpath, GoFiles = [src.path for src in srcs if src.path.endswith(".go")], @@ -298,7 +296,7 @@ def _nogo_aspect_impl(target, ctx): inputs.append(config_file) ctx.actions.run( inputs = inputs, - outputs = [facts, raw_findings, escapes], + outputs = [facts, raw_findings], tools = depset(go_ctx.runfiles.to_list() + ctx.files._nogo_objdump_tool), executable = ctx.files._nogo_check[0], mnemonic = "NogoAnalysis", @@ -309,7 +307,6 @@ def _nogo_aspect_impl(target, ctx): "-package=%s" % config_file.path, "-findings=%s" % raw_findings.path, "-facts=%s" % facts.path, - "-escapes=%s" % escapes.path, ], ) @@ -322,15 +319,16 @@ def _nogo_aspect_impl(target, ctx): all_raw_findings = [stdlib_info.raw_findings] + depset(all_raw_findings).to_list() + [raw_findings] # Return the package facts as output. - return [NogoInfo( - facts = facts, - raw_findings = all_raw_findings, - escapes = escapes, - importpath = importpath, - binaries = binaries, - srcs = srcs, - deps = deps, - )] + return [ + NogoInfo( + facts = facts, + raw_findings = all_raw_findings, + importpath = importpath, + binaries = binaries, + srcs = srcs, + deps = deps, + ), + ] nogo_aspect = go_rule( aspect, @@ -367,7 +365,6 @@ def _nogo_test_impl(ctx): if len(ctx.attr.deps) != 1: fail("nogo_test requires exactly one dep.") raw_findings = ctx.attr.deps[0][NogoInfo].raw_findings - escapes = ctx.attr.deps[0][NogoInfo].escapes # Build a step that applies the configuration. config_srcs = ctx.attr.config[NogoConfigInfo].srcs @@ -409,8 +406,6 @@ def _nogo_test_impl(ctx): # pays attention to the mnemoic above, so this must be # what is expected by the tooling. nogo_findings = depset([findings]), - # Expose all escape analysis findings (see above). - nogo_escapes = depset([escapes]), )] nogo_test = rule( @@ -432,3 +427,18 @@ nogo_test = rule( }, test = True, ) + +def _nogo_aspect_tricorder_impl(target, ctx): + if ctx.rule.kind != "nogo_test" or OutputGroupInfo not in target: + return [] + if not hasattr(target[OutputGroupInfo], "nogo_findings"): + return [] + return [ + OutputGroupInfo(tricorder = target[OutputGroupInfo].nogo_findings), + ] + +# Trivial aspect that forwards the findings from a nogo_test rule to +# go/tricorder, which reads from the `tricorder` output group. +nogo_aspect_tricorder = aspect( + implementation = _nogo_aspect_tricorder_impl, +) |