diff options
941 files changed, 13572 insertions, 7134 deletions
diff --git a/Dockerfile b/Dockerfile index 32ac76f1b..6e9d870db 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:bionic -RUN apt-get update && apt-get install -y curl gnupg2 git +RUN apt-get update && apt-get install -y curl gnupg2 git python3 RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \ curl https://bazel.build/bazel-release.pub.gpg | apt-key add - RUN apt-get update && apt-get install -y bazel && apt-get clean @@ -53,15 +53,6 @@ Make sure the following dependencies are installed: * [Docker version 17.09.0 or greater][docker] * Gold linker (e.g. `binutils-gold` package on Ubuntu) -### Getting the source - -Clone the repository: - -``` -git clone https://gvisor.googlesource.com/gvisor gvisor -cd gvisor -``` - ### Building Build and install the `runsc` binary: @@ -116,6 +107,24 @@ Then invoke bazel with the following flags: You can also add those flags to your local ~/.bazelrc to avoid needing to specify them each time on the command line. +### Using `go get` + +This project uses [bazel][bazel] to build and manage dependencies. A synthetic +`go` branch is maintained that is compatible with standard `go` tooling for +convenience. + +For example, to build `runsc` directly from this branch: + +``` +echo "module runsc" > go.mod +GO111MODULE=on go get gvisor.dev/gvisor/runsc@go +CGO_ENABLED=0 GO111MODULE=on go install gvisor.dev/gvisor/runsc +``` + +Note that this branch is supported in a best effort capacity, and direct +development on this branch is not supported. Development should occur on the +`master` branch, which is then reflected into the `go` branch. + ## Community & Governance The governance model is documented in our [community][community] repository. diff --git a/g3doc/README.md b/g3doc/README.md new file mode 100644 index 000000000..49d58cdae --- /dev/null +++ b/g3doc/README.md @@ -0,0 +1,2 @@ +The gVisor logo files are licensed under CC BY-SA 4.0 (Creative Commons +Attribution-ShareAlike 4.0 International). @@ -1,4 +1,4 @@ -module gvisor.googlesource.com/gvisor +module gvisor.dev/gvisor go 1.12 @@ -0,0 +1,19 @@ +github.com/cenkalti/backoff v0.0.0-20190506075156-2146c9339422/go.mod h1:b6Nc7NRH5C4aCISLry0tLnTjcuTEvoiqcWDdsU0sOGM= +github.com/gofrs/flock v0.6.1-0.20180915234121-886344bea079/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU= +github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/subcommands v0.0.0-20190508160503-636abe8753b8/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk= +github.com/google/uuid v0.0.0-20171129191014-dec09d789f3d/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/opencontainers/runtime-spec v0.1.2-0.20171211145439-b2d941ef6a78/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= +github.com/vishvananda/netlink v1.0.1-0.20190318003149-adb577d4a45e/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk= +github.com/vishvananda/netns v0.0.0-20171111001504-be1fbeda1936/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= diff --git a/pkg/abi/BUILD b/pkg/abi/BUILD index 323263ebf..32c601a03 100644 --- a/pkg/abi/BUILD +++ b/pkg/abi/BUILD @@ -9,6 +9,6 @@ go_library( "abi_linux.go", "flag.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/abi", + importpath = "gvisor.dev/gvisor/pkg/abi", visibility = ["//:sandbox"], ) diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index fbd0e4674..cd405aa96 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -1,4 +1,4 @@ -# Package linux contains the constants and types needed to inferface with a +# Package linux contains the constants and types needed to interface with a # Linux kernel. It should be used instead of syscall or golang.org/x/sys/unix # when the host OS may not be Linux. @@ -10,9 +10,7 @@ go_library( name = "linux", srcs = [ "aio.go", - "ashmem.go", "audit.go", - "binder.go", "bpf.go", "capability.go", "dev.go", @@ -54,7 +52,7 @@ go_library( "utsname.go", "wait.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/abi/linux", + importpath = "gvisor.dev/gvisor/pkg/abi/linux", visibility = ["//visibility:public"], deps = [ "//pkg/abi", diff --git a/pkg/abi/linux/capability.go b/pkg/abi/linux/capability.go index 65dd77e6e..965f74663 100644 --- a/pkg/abi/linux/capability.go +++ b/pkg/abi/linux/capability.go @@ -60,13 +60,14 @@ const ( CAP_BLOCK_SUSPEND = Capability(36) CAP_AUDIT_READ = Capability(37) - // MaxCapability is the highest-numbered capability. - MaxCapability = CAP_AUDIT_READ + // CAP_LAST_CAP is the highest-numbered capability. + // Seach for "CAP_LAST_CAP" to find other places that need to change. + CAP_LAST_CAP = CAP_AUDIT_READ ) // Ok returns true if cp is a supported capability. func (cp Capability) Ok() bool { - return cp >= 0 && cp <= MaxCapability + return cp >= 0 && cp <= CAP_LAST_CAP } // String returns the capability name. diff --git a/pkg/abi/linux/fcntl.go b/pkg/abi/linux/fcntl.go index b30350193..30902a8ca 100644 --- a/pkg/abi/linux/fcntl.go +++ b/pkg/abi/linux/fcntl.go @@ -14,7 +14,7 @@ package linux -// Comands from linux/fcntl.h. +// Commands from linux/fcntl.h. const ( F_DUPFD = 0 F_GETFD = 1 diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go index 81ff9fe9e..426003eb7 100644 --- a/pkg/abi/linux/file.go +++ b/pkg/abi/linux/file.go @@ -18,8 +18,8 @@ import ( "fmt" "strings" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/binary" ) // Constants for open(2). @@ -181,6 +181,57 @@ type Stat struct { // SizeOfStat is the size of a Stat struct. var SizeOfStat = binary.Size(Stat{}) +// Flags for statx. +const ( + AT_STATX_SYNC_TYPE = 0x6000 + AT_STATX_SYNC_AS_STAT = 0x0000 + AT_STATX_FORCE_SYNC = 0x2000 + AT_STATX_DONT_SYNC = 0x4000 +) + +// Mask values for statx. +const ( + STATX_TYPE = 0x00000001 + STATX_MODE = 0x00000002 + STATX_NLINK = 0x00000004 + STATX_UID = 0x00000008 + STATX_GID = 0x00000010 + STATX_ATIME = 0x00000020 + STATX_MTIME = 0x00000040 + STATX_CTIME = 0x00000080 + STATX_INO = 0x00000100 + STATX_SIZE = 0x00000200 + STATX_BLOCKS = 0x00000400 + STATX_BASIC_STATS = 0x000007ff + STATX_BTIME = 0x00000800 + STATX_ALL = 0x00000fff + STATX__RESERVED = 0x80000000 +) + +// Statx represents struct statx. +type Statx struct { + Mask uint32 + Blksize uint32 + Attributes uint64 + Nlink uint32 + UID uint32 + GID uint32 + Mode uint16 + _ uint16 + Ino uint64 + Size uint64 + Blocks uint64 + AttributesMask uint64 + Atime StatxTimestamp + Btime StatxTimestamp + Ctime StatxTimestamp + Mtime StatxTimestamp + RdevMajor uint32 + RdevMinor uint32 + DevMajor uint32 + DevMinor uint32 +} + // FileMode represents a mode_t. type FileMode uint diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go index 04bb767dc..0e18db9ef 100644 --- a/pkg/abi/linux/ioctl.go +++ b/pkg/abi/linux/ioctl.go @@ -72,28 +72,3 @@ const ( SIOCGMIIPHY = 0x8947 SIOCGMIIREG = 0x8948 ) - -// ioctl(2) requests provided by uapi/linux/android/binder.h -const ( - BinderWriteReadIoctl = 0xc0306201 - BinderSetIdleTimeoutIoctl = 0x40086203 - BinderSetMaxThreadsIoctl = 0x40046205 - BinderSetIdlePriorityIoctl = 0x40046206 - BinderSetContextMgrIoctl = 0x40046207 - BinderThreadExitIoctl = 0x40046208 - BinderVersionIoctl = 0xc0046209 -) - -// ioctl(2) requests provided by drivers/staging/android/uapi/ashmem.h -const ( - AshmemSetNameIoctl = 0x41007701 - AshmemGetNameIoctl = 0x81007702 - AshmemSetSizeIoctl = 0x40087703 - AshmemGetSizeIoctl = 0x00007704 - AshmemSetProtMaskIoctl = 0x40087705 - AshmemGetProtMaskIoctl = 0x00007706 - AshmemPinIoctl = 0x40087707 - AshmemUnpinIoctl = 0x40087708 - AshmemGetPinStatusIoctl = 0x00007709 - AshmemPurgeAllCachesIoctl = 0x0000770a -) diff --git a/pkg/abi/linux/linux.go b/pkg/abi/linux/linux.go index 8a8f831cd..281acdbde 100644 --- a/pkg/abi/linux/linux.go +++ b/pkg/abi/linux/linux.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package linux contains the constants and types needed to inferface with a Linux kernel. +// Package linux contains the constants and types needed to interface with a Linux kernel. package linux // NumSoftIRQ is the number of software IRQs, exposed via /proc/stat. diff --git a/pkg/abi/linux/netdevice.go b/pkg/abi/linux/netdevice.go index aef1acf75..7866352b4 100644 --- a/pkg/abi/linux/netdevice.go +++ b/pkg/abi/linux/netdevice.go @@ -14,7 +14,7 @@ package linux -import "gvisor.googlesource.com/gvisor/pkg/binary" +import "gvisor.dev/gvisor/pkg/binary" const ( // IFNAMSIZ is the size of the name field for IFReq. diff --git a/pkg/abi/linux/signal.go b/pkg/abi/linux/signal.go index 9cbd77dda..c69b04ea9 100644 --- a/pkg/abi/linux/signal.go +++ b/pkg/abi/linux/signal.go @@ -15,7 +15,7 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/bits" + "gvisor.dev/gvisor/pkg/bits" ) const ( diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go index a714ac86d..6d22002c4 100644 --- a/pkg/abi/linux/socket.go +++ b/pkg/abi/linux/socket.go @@ -14,7 +14,7 @@ package linux -import "gvisor.googlesource.com/gvisor/pkg/binary" +import "gvisor.dev/gvisor/pkg/binary" // Address families, from linux/socket.h. const ( diff --git a/pkg/abi/linux/time.go b/pkg/abi/linux/time.go index fa9ee27e1..e727066d7 100644 --- a/pkg/abi/linux/time.go +++ b/pkg/abi/linux/time.go @@ -226,3 +226,18 @@ type Tms struct { // TimerID represents type timer_t, which identifies a POSIX per-process // interval timer. type TimerID int32 + +// StatxTimestamp represents struct statx_timestamp. +type StatxTimestamp struct { + Sec int64 + Nsec uint32 + _ int32 +} + +// NsecToStatxTimestamp translates nanoseconds to StatxTimestamp. +func NsecToStatxTimestamp(nsec int64) (ts StatxTimestamp) { + return StatxTimestamp{ + Sec: nsec / 1e9, + Nsec: uint32(nsec % 1e9), + } +} diff --git a/pkg/amutex/BUILD b/pkg/amutex/BUILD index bdb6e8f2c..39d253b98 100644 --- a/pkg/amutex/BUILD +++ b/pkg/amutex/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "amutex", srcs = ["amutex.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/amutex", + importpath = "gvisor.dev/gvisor/pkg/amutex", visibility = ["//:sandbox"], ) @@ -14,5 +14,4 @@ go_test( size = "small", srcs = ["amutex_test.go"], embed = [":amutex"], - tags = ["flaky"], ) diff --git a/pkg/amutex/amutex.go b/pkg/amutex/amutex.go index 4f7759b87..1c4fd1784 100644 --- a/pkg/amutex/amutex.go +++ b/pkg/amutex/amutex.go @@ -21,13 +21,13 @@ import ( ) // Sleeper must be implemented by users of the abortable mutex to allow for -// cancelation of waits. +// cancellation of waits. type Sleeper interface { // SleepStart is called by the AbortableMutex.Lock() function when the // mutex is contended and the goroutine is about to sleep. // // A channel can be returned that causes the sleep to be canceled if - // it's readable. If no cancelation is desired, nil can be returned. + // it's readable. If no cancellation is desired, nil can be returned. SleepStart() <-chan struct{} // SleepFinish is called by AbortableMutex.Lock() once a contended mutex diff --git a/pkg/amutex/amutex_test.go b/pkg/amutex/amutex_test.go index 211bdda4b..1d7f45641 100644 --- a/pkg/amutex/amutex_test.go +++ b/pkg/amutex/amutex_test.go @@ -47,7 +47,7 @@ func TestMutualExclusion(t *testing.T) { // goroutines ran concurrently within the critical section. // // If one of the goroutines doesn't complete, it's likely a bug that - // causes to to wait forever. + // causes it to wait forever. const gr = 1000 const iters = 100000 v := 0 diff --git a/pkg/atomicbitops/BUILD b/pkg/atomicbitops/BUILD index 9555bf645..47ab65346 100644 --- a/pkg/atomicbitops/BUILD +++ b/pkg/atomicbitops/BUILD @@ -9,7 +9,7 @@ go_library( "atomic_bitops_amd64.s", "atomic_bitops_common.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/atomicbitops", + importpath = "gvisor.dev/gvisor/pkg/atomicbitops", visibility = ["//:sandbox"], ) diff --git a/pkg/binary/BUILD b/pkg/binary/BUILD index bd37376b0..09d6c2c1f 100644 --- a/pkg/binary/BUILD +++ b/pkg/binary/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "binary", srcs = ["binary.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/binary", + importpath = "gvisor.dev/gvisor/pkg/binary", visibility = ["//:sandbox"], ) diff --git a/pkg/bits/BUILD b/pkg/bits/BUILD index 5214b2c24..0c2dde4f8 100644 --- a/pkg/bits/BUILD +++ b/pkg/bits/BUILD @@ -14,7 +14,7 @@ go_library( "uint64_arch_amd64_asm.s", "uint64_arch_generic.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/bits", + importpath = "gvisor.dev/gvisor/pkg/bits", visibility = ["//:sandbox"], ) diff --git a/pkg/bpf/BUILD b/pkg/bpf/BUILD index 3c7ae3103..b692aa3b1 100644 --- a/pkg/bpf/BUILD +++ b/pkg/bpf/BUILD @@ -11,7 +11,7 @@ go_library( "interpreter.go", "program_builder.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/bpf", + importpath = "gvisor.dev/gvisor/pkg/bpf", visibility = ["//visibility:public"], deps = ["//pkg/abi/linux"], ) diff --git a/pkg/bpf/bpf.go b/pkg/bpf/bpf.go index eb546f48f..b8b8ad372 100644 --- a/pkg/bpf/bpf.go +++ b/pkg/bpf/bpf.go @@ -17,7 +17,7 @@ // https://www.freebsd.org/cgi/man.cgi?bpf(4) package bpf -import "gvisor.googlesource.com/gvisor/pkg/abi/linux" +import "gvisor.dev/gvisor/pkg/abi/linux" const ( // MaxInstructions is the maximum number of instructions in a BPF program, diff --git a/pkg/bpf/decoder.go b/pkg/bpf/decoder.go index 45c192215..c8ee0c3b1 100644 --- a/pkg/bpf/decoder.go +++ b/pkg/bpf/decoder.go @@ -18,7 +18,7 @@ import ( "bytes" "fmt" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) // DecodeProgram translates an array of BPF instructions into text format. diff --git a/pkg/bpf/decoder_test.go b/pkg/bpf/decoder_test.go index 8c4bdad21..6a023f0c0 100644 --- a/pkg/bpf/decoder_test.go +++ b/pkg/bpf/decoder_test.go @@ -17,7 +17,7 @@ package bpf import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) func TestDecode(t *testing.T) { diff --git a/pkg/bpf/interpreter.go b/pkg/bpf/interpreter.go index 86de523a2..ed27abb9b 100644 --- a/pkg/bpf/interpreter.go +++ b/pkg/bpf/interpreter.go @@ -17,7 +17,7 @@ package bpf import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) // Possible values for ProgramError.Code. diff --git a/pkg/bpf/interpreter_test.go b/pkg/bpf/interpreter_test.go index 67b00ffe3..547921d0a 100644 --- a/pkg/bpf/interpreter_test.go +++ b/pkg/bpf/interpreter_test.go @@ -17,8 +17,8 @@ package bpf import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" ) func TestCompilationErrors(t *testing.T) { diff --git a/pkg/bpf/program_builder.go b/pkg/bpf/program_builder.go index fc9d27203..7992044d0 100644 --- a/pkg/bpf/program_builder.go +++ b/pkg/bpf/program_builder.go @@ -18,7 +18,7 @@ import ( "fmt" "math" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) const ( diff --git a/pkg/bpf/program_builder_test.go b/pkg/bpf/program_builder_test.go index 5b2ad67de..92ca5f4c3 100644 --- a/pkg/bpf/program_builder_test.go +++ b/pkg/bpf/program_builder_test.go @@ -18,7 +18,7 @@ import ( "fmt" "testing" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) func validate(p *ProgramBuilder, expected []linux.BPFInstruction) error { diff --git a/pkg/compressio/BUILD b/pkg/compressio/BUILD index 3a0ac64e6..cdec96df1 100644 --- a/pkg/compressio/BUILD +++ b/pkg/compressio/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "compressio", srcs = ["compressio.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/compressio", + importpath = "gvisor.dev/gvisor/pkg/compressio", visibility = ["//:sandbox"], deps = ["//pkg/binary"], ) diff --git a/pkg/compressio/compressio.go b/pkg/compressio/compressio.go index 8c14ccbfa..3b0bb086e 100644 --- a/pkg/compressio/compressio.go +++ b/pkg/compressio/compressio.go @@ -54,7 +54,7 @@ import ( "runtime" "sync" - "gvisor.googlesource.com/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/binary" ) var bufPool = sync.Pool{ diff --git a/pkg/control/client/BUILD b/pkg/control/client/BUILD index 22a4a4a5a..066d7b1a1 100644 --- a/pkg/control/client/BUILD +++ b/pkg/control/client/BUILD @@ -7,7 +7,7 @@ go_library( srcs = [ "client.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/control/client", + importpath = "gvisor.dev/gvisor/pkg/control/client", visibility = ["//:sandbox"], deps = [ "//pkg/unet", diff --git a/pkg/control/client/client.go b/pkg/control/client/client.go index 3fec27846..41807cd45 100644 --- a/pkg/control/client/client.go +++ b/pkg/control/client/client.go @@ -16,8 +16,8 @@ package client import ( - "gvisor.googlesource.com/gvisor/pkg/unet" - "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/urpc" ) // ConnectTo attempts to connect to the sandbox with the given address. diff --git a/pkg/control/server/BUILD b/pkg/control/server/BUILD index 76b2e9787..21adf3adf 100644 --- a/pkg/control/server/BUILD +++ b/pkg/control/server/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "server", srcs = ["server.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/control/server", + importpath = "gvisor.dev/gvisor/pkg/control/server", visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/pkg/control/server/server.go b/pkg/control/server/server.go index 1a15da1a8..a56152d10 100644 --- a/pkg/control/server/server.go +++ b/pkg/control/server/server.go @@ -24,9 +24,9 @@ import ( "os" "sync" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/unet" - "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/urpc" ) // curUID is the unix user ID of the user that the control server is running as. diff --git a/pkg/cpuid/BUILD b/pkg/cpuid/BUILD index 29cc38778..830e19e07 100644 --- a/pkg/cpuid/BUILD +++ b/pkg/cpuid/BUILD @@ -8,7 +8,7 @@ go_library( "cpu_amd64.s", "cpuid.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/cpuid", + importpath = "gvisor.dev/gvisor/pkg/cpuid", visibility = ["//:sandbox"], deps = ["//pkg/log"], ) diff --git a/pkg/cpuid/cpuid.go b/pkg/cpuid/cpuid.go index 3eb2bcd2b..3fabaf445 100644 --- a/pkg/cpuid/cpuid.go +++ b/pkg/cpuid/cpuid.go @@ -36,7 +36,7 @@ import ( "strconv" "strings" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) // Common references for CPUID leaves and bits: @@ -620,7 +620,7 @@ func vendorIDFromRegs(bx, cx, dx uint32) string { // state includes floating point registers, and other cpu state that's not // associated with the normal task context. // -// Note: We can save some space here with an optimiazation where we use a +// Note: We can save some space here with an optimization where we use a // smaller chunk of memory depending on features that are actually enabled. // Currently we just use the largest possible size for simplicity (which is // about 2.5K worst case, with avx512). diff --git a/pkg/eventchannel/BUILD b/pkg/eventchannel/BUILD index d1483db0a..4c336ea84 100644 --- a/pkg/eventchannel/BUILD +++ b/pkg/eventchannel/BUILD @@ -8,7 +8,7 @@ go_library( srcs = [ "event.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/eventchannel", + importpath = "gvisor.dev/gvisor/pkg/eventchannel", visibility = ["//:sandbox"], deps = [ ":eventchannel_go_proto", @@ -26,7 +26,7 @@ proto_library( go_proto_library( name = "eventchannel_go_proto", - importpath = "gvisor.googlesource.com/gvisor/pkg/eventchannel/eventchannel_go_proto", + importpath = "gvisor.dev/gvisor/pkg/eventchannel/eventchannel_go_proto", proto = ":eventchannel_proto", visibility = ["//:sandbox"], ) diff --git a/pkg/eventchannel/event.go b/pkg/eventchannel/event.go index 4c8ae573b..f6d26532b 100644 --- a/pkg/eventchannel/event.go +++ b/pkg/eventchannel/event.go @@ -27,9 +27,9 @@ import ( "github.com/golang/protobuf/proto" "github.com/golang/protobuf/ptypes" - pb "gvisor.googlesource.com/gvisor/pkg/eventchannel/eventchannel_go_proto" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/unet" + pb "gvisor.dev/gvisor/pkg/eventchannel/eventchannel_go_proto" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/unet" ) // Emitter emits a proto message. @@ -144,7 +144,7 @@ type debugEmitter struct { inner Emitter } -// DebugEmitterFrom creates a new event channel emitter by wraping an existing +// DebugEmitterFrom creates a new event channel emitter by wrapping an existing // raw emitter. func DebugEmitterFrom(inner Emitter) Emitter { return &debugEmitter{ diff --git a/pkg/fd/BUILD b/pkg/fd/BUILD index ab1109157..785c685a0 100644 --- a/pkg/fd/BUILD +++ b/pkg/fd/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "fd", srcs = ["fd.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/fd", + importpath = "gvisor.dev/gvisor/pkg/fd", visibility = ["//visibility:public"], ) diff --git a/pkg/fd/fd.go b/pkg/fd/fd.go index 2785243a2..83bcfe220 100644 --- a/pkg/fd/fd.go +++ b/pkg/fd/fd.go @@ -167,7 +167,7 @@ func NewFromFile(file *os.File) (*FD, error) { return New(fd), nil } -// Open is equivallent to open(2). +// Open is equivalent to open(2). func Open(path string, openmode int, perm uint32) (*FD, error) { f, err := syscall.Open(path, openmode|syscall.O_LARGEFILE, perm) if err != nil { @@ -176,7 +176,7 @@ func Open(path string, openmode int, perm uint32) (*FD, error) { return New(f), nil } -// OpenAt is equivallent to openat(2). +// OpenAt is equivalent to openat(2). func OpenAt(dir *FD, path string, flags int, mode uint32) (*FD, error) { f, err := syscall.Openat(dir.FD(), path, flags, mode) if err != nil { diff --git a/pkg/fdchannel/BUILD b/pkg/fdchannel/BUILD new file mode 100644 index 000000000..e54e7371c --- /dev/null +++ b/pkg/fdchannel/BUILD @@ -0,0 +1,17 @@ +load("//tools/go_stateify:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "fdchannel", + srcs = ["fdchannel_unsafe.go"], + importpath = "gvisor.dev/gvisor/pkg/fdchannel", + visibility = ["//visibility:public"], +) + +go_test( + name = "fdchannel_test", + size = "small", + srcs = ["fdchannel_test.go"], + embed = [":fdchannel"], +) diff --git a/pkg/fdchannel/fdchannel_test.go b/pkg/fdchannel/fdchannel_test.go new file mode 100644 index 000000000..5d01dc636 --- /dev/null +++ b/pkg/fdchannel/fdchannel_test.go @@ -0,0 +1,131 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fdchannel + +import ( + "io/ioutil" + "os" + "sync" + "syscall" + "testing" + "time" +) + +func TestSendRecvFD(t *testing.T) { + sendFile, err := ioutil.TempFile("", "fdchannel_test_") + if err != nil { + t.Fatalf("failed to create temporary file: %v", err) + } + defer sendFile.Close() + + chanFDs, err := NewConnectedSockets() + if err != nil { + t.Fatalf("failed to create fdchannel sockets: %v", err) + } + sendEP := NewEndpoint(chanFDs[0]) + defer sendEP.Destroy() + recvEP := NewEndpoint(chanFDs[1]) + defer recvEP.Destroy() + + recvFD, err := recvEP.RecvFDNonblock() + if err != syscall.EAGAIN && err != syscall.EWOULDBLOCK { + t.Errorf("RecvFDNonblock before SendFD: got (%d, %v), wanted (<unspecified>, EAGAIN or EWOULDBLOCK", recvFD, err) + } + + if err := sendEP.SendFD(int(sendFile.Fd())); err != nil { + t.Fatalf("SendFD failed: %v", err) + } + recvFD, err = recvEP.RecvFD() + if err != nil { + t.Fatalf("RecvFD failed: %v", err) + } + recvFile := os.NewFile(uintptr(recvFD), "received file") + defer recvFile.Close() + + sendInfo, err := sendFile.Stat() + if err != nil { + t.Fatalf("failed to stat sent file: %v", err) + } + sendInfoSys := sendInfo.Sys() + sendStat, ok := sendInfoSys.(*syscall.Stat_t) + if !ok { + t.Fatalf("sent file's FileInfo is backed by unknown type %T", sendInfoSys) + } + + recvInfo, err := recvFile.Stat() + if err != nil { + t.Fatalf("failed to stat received file: %v", err) + } + recvInfoSys := recvInfo.Sys() + recvStat, ok := recvInfoSys.(*syscall.Stat_t) + if !ok { + t.Fatalf("received file's FileInfo is backed by unknown type %T", recvInfoSys) + } + + if sendStat.Dev != recvStat.Dev || sendStat.Ino != recvStat.Ino { + t.Errorf("sent file (dev=%d, ino=%d) does not match received file (dev=%d, ino=%d)", sendStat.Dev, sendStat.Ino, recvStat.Dev, recvStat.Ino) + } +} + +func TestShutdownThenRecvFD(t *testing.T) { + sendFile, err := ioutil.TempFile("", "fdchannel_test_") + if err != nil { + t.Fatalf("failed to create temporary file: %v", err) + } + defer sendFile.Close() + + chanFDs, err := NewConnectedSockets() + if err != nil { + t.Fatalf("failed to create fdchannel sockets: %v", err) + } + sendEP := NewEndpoint(chanFDs[0]) + defer sendEP.Destroy() + recvEP := NewEndpoint(chanFDs[1]) + defer recvEP.Destroy() + + recvEP.Shutdown() + if _, err := recvEP.RecvFD(); err == nil { + t.Error("RecvFD succeeded unexpectedly") + } +} + +func TestRecvFDThenShutdown(t *testing.T) { + sendFile, err := ioutil.TempFile("", "fdchannel_test_") + if err != nil { + t.Fatalf("failed to create temporary file: %v", err) + } + defer sendFile.Close() + + chanFDs, err := NewConnectedSockets() + if err != nil { + t.Fatalf("failed to create fdchannel sockets: %v", err) + } + sendEP := NewEndpoint(chanFDs[0]) + defer sendEP.Destroy() + recvEP := NewEndpoint(chanFDs[1]) + defer recvEP.Destroy() + + var receiverWG sync.WaitGroup + receiverWG.Add(1) + go func() { + defer receiverWG.Done() + if _, err := recvEP.RecvFD(); err == nil { + t.Error("RecvFD succeeded unexpectedly") + } + }() + defer receiverWG.Wait() + time.Sleep(time.Second) // to ensure recvEP.RecvFD() has blocked + recvEP.Shutdown() +} diff --git a/pkg/fdchannel/fdchannel_unsafe.go b/pkg/fdchannel/fdchannel_unsafe.go new file mode 100644 index 000000000..367235be5 --- /dev/null +++ b/pkg/fdchannel/fdchannel_unsafe.go @@ -0,0 +1,146 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris + +// Package fdchannel implements passing file descriptors between processes over +// Unix domain sockets. +package fdchannel + +import ( + "fmt" + "reflect" + "sync/atomic" + "syscall" + "unsafe" +) + +// int32 is the real type of a file descriptor. +const sizeofInt32 = int(unsafe.Sizeof(int32(0))) + +// NewConnectedSockets returns a pair of file descriptors, owned by the caller, +// representing connected sockets that may be passed to separate calls to +// NewEndpoint to create connected Endpoints. +func NewConnectedSockets() ([2]int, error) { + return syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0) +} + +// Endpoint sends file descriptors to, and receives them from, another +// connected Endpoint. +// +// Endpoint is not copyable or movable by value. +type Endpoint struct { + sockfd int32 // accessed using atomic memory operations + msghdr syscall.Msghdr + cmsg *syscall.Cmsghdr // followed by sizeofInt32 bytes of data +} + +// Init must be called on zero-value Endpoints before first use. sockfd must be +// a blocking AF_UNIX SOCK_SEQPACKET socket. +func (ep *Endpoint) Init(sockfd int) { + // "Datagram sockets in various domains (e.g., the UNIX and Internet + // domains) permit zero-length datagrams." - recv(2). Experimentally, + // sendmsg+recvmsg for a zero-length datagram is slightly faster than + // sendmsg+recvmsg for a single byte over a stream socket. + cmsgSlice := make([]byte, syscall.CmsgSpace(sizeofInt32)) + cmsgReflect := (*reflect.SliceHeader)((unsafe.Pointer)(&cmsgSlice)) + ep.sockfd = int32(sockfd) + ep.msghdr.Control = (*byte)((unsafe.Pointer)(cmsgReflect.Data)) + ep.cmsg = (*syscall.Cmsghdr)((unsafe.Pointer)(cmsgReflect.Data)) + // ep.msghdr.Controllen and ep.cmsg.* are mutated by recvmsg(2), so they're + // set before calling sendmsg/recvmsg. +} + +// NewEndpoint is a convenience function that returns an initialized Endpoint +// allocated on the heap. +func NewEndpoint(sockfd int) *Endpoint { + ep := &Endpoint{} + ep.Init(sockfd) + return ep +} + +// Destroy releases resources owned by ep. No other Endpoint methods may be +// called after Destroy. +func (ep *Endpoint) Destroy() { + // These need not use sync/atomic since there must not be any concurrent + // calls to Endpoint methods. + if ep.sockfd >= 0 { + syscall.Close(int(ep.sockfd)) + ep.sockfd = -1 + } +} + +// Shutdown causes concurrent and future calls to ep.SendFD(), ep.RecvFD(), and +// ep.RecvFDNonblock(), as well as the same calls in the connected Endpoint, to +// unblock and return errors. It does not wait for concurrent calls to return. +// +// Shutdown is the only Endpoint method that may be called concurrently with +// other methods. +func (ep *Endpoint) Shutdown() { + if sockfd := int(atomic.SwapInt32(&ep.sockfd, -1)); sockfd >= 0 { + syscall.Shutdown(sockfd, syscall.SHUT_RDWR) + syscall.Close(sockfd) + } +} + +// SendFD sends the open file description represented by the given file +// descriptor to the connected Endpoint. +func (ep *Endpoint) SendFD(fd int) error { + cmsgLen := syscall.CmsgLen(sizeofInt32) + ep.cmsg.Level = syscall.SOL_SOCKET + ep.cmsg.Type = syscall.SCM_RIGHTS + ep.cmsg.SetLen(cmsgLen) + *ep.cmsgData() = int32(fd) + ep.msghdr.SetControllen(cmsgLen) + _, _, e := syscall.Syscall(syscall.SYS_SENDMSG, uintptr(atomic.LoadInt32(&ep.sockfd)), uintptr((unsafe.Pointer)(&ep.msghdr)), 0) + if e != 0 { + return e + } + return nil +} + +// RecvFD receives an open file description from the connected Endpoint and +// returns a file descriptor representing it, owned by the caller. +func (ep *Endpoint) RecvFD() (int, error) { + return ep.recvFD(0) +} + +// RecvFDNonblock receives an open file description from the connected Endpoint +// and returns a file descriptor representing it, owned by the caller. If there +// are no pending receivable open file descriptions, RecvFDNonblock returns +// (<unspecified>, EAGAIN or EWOULDBLOCK). +func (ep *Endpoint) RecvFDNonblock() (int, error) { + return ep.recvFD(syscall.MSG_DONTWAIT) +} + +func (ep *Endpoint) recvFD(flags uintptr) (int, error) { + cmsgLen := syscall.CmsgLen(sizeofInt32) + ep.msghdr.SetControllen(cmsgLen) + _, _, e := syscall.Syscall(syscall.SYS_RECVMSG, uintptr(atomic.LoadInt32(&ep.sockfd)), uintptr((unsafe.Pointer)(&ep.msghdr)), flags|syscall.MSG_TRUNC) + if e != 0 { + return -1, e + } + if int(ep.msghdr.Controllen) != cmsgLen { + return -1, fmt.Errorf("received control message has incorrect length: got %d, wanted %d", ep.msghdr.Controllen, cmsgLen) + } + if ep.cmsg.Level != syscall.SOL_SOCKET || ep.cmsg.Type != syscall.SCM_RIGHTS { + return -1, fmt.Errorf("received control message has incorrect (level, type): got (%v, %v), wanted (%v, %v)", ep.cmsg.Level, ep.cmsg.Type, syscall.SOL_SOCKET, syscall.SCM_RIGHTS) + } + return int(*ep.cmsgData()), nil +} + +func (ep *Endpoint) cmsgData() *int32 { + // syscall.CmsgLen(0) == syscall.cmsgAlignOf(syscall.SizeofCmsghdr) + return (*int32)((unsafe.Pointer)(uintptr((unsafe.Pointer)(ep.cmsg)) + uintptr(syscall.CmsgLen(0)))) +} diff --git a/pkg/fdnotifier/BUILD b/pkg/fdnotifier/BUILD index 8c8d193cc..d0552c06e 100644 --- a/pkg/fdnotifier/BUILD +++ b/pkg/fdnotifier/BUILD @@ -8,7 +8,7 @@ go_library( "fdnotifier.go", "poll_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/fdnotifier", + importpath = "gvisor.dev/gvisor/pkg/fdnotifier", visibility = ["//:sandbox"], deps = ["//pkg/waiter"], ) diff --git a/pkg/fdnotifier/fdnotifier.go b/pkg/fdnotifier/fdnotifier.go index f0b028b0b..58529f99f 100644 --- a/pkg/fdnotifier/fdnotifier.go +++ b/pkg/fdnotifier/fdnotifier.go @@ -25,7 +25,7 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/waiter" ) type fdInfo struct { diff --git a/pkg/fdnotifier/poll_unsafe.go b/pkg/fdnotifier/poll_unsafe.go index bc5e0ac44..ab8857b5e 100644 --- a/pkg/fdnotifier/poll_unsafe.go +++ b/pkg/fdnotifier/poll_unsafe.go @@ -20,7 +20,7 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/waiter" ) // NonBlockingPoll polls the given FD in non-blocking fashion. It is used just diff --git a/pkg/flipcall/BUILD b/pkg/flipcall/BUILD new file mode 100644 index 000000000..7126fc45f --- /dev/null +++ b/pkg/flipcall/BUILD @@ -0,0 +1,31 @@ +load("//tools/go_stateify:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "flipcall", + srcs = [ + "endpoint_futex.go", + "endpoint_unsafe.go", + "flipcall.go", + "futex_linux.go", + "packet_window_allocator.go", + ], + importpath = "gvisor.dev/gvisor/pkg/flipcall", + visibility = ["//visibility:public"], + deps = [ + "//pkg/abi/linux", + "//pkg/log", + "//pkg/memutil", + ], +) + +go_test( + name = "flipcall_test", + size = "small", + srcs = [ + "flipcall_example_test.go", + "flipcall_test.go", + ], + embed = [":flipcall"], +) diff --git a/pkg/flipcall/endpoint_futex.go b/pkg/flipcall/endpoint_futex.go new file mode 100644 index 000000000..5cab02b1d --- /dev/null +++ b/pkg/flipcall/endpoint_futex.go @@ -0,0 +1,45 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flipcall + +import ( + "fmt" +) + +type endpointControlState struct{} + +func (ep *Endpoint) initControlState(ctrlMode ControlMode) error { + if ctrlMode != ControlModeFutex { + return fmt.Errorf("unsupported control mode: %v", ctrlMode) + } + return nil +} + +func (ep *Endpoint) doRoundTrip() error { + return ep.doFutexRoundTrip() +} + +func (ep *Endpoint) doWaitFirst() error { + return ep.doFutexWaitFirst() +} + +func (ep *Endpoint) doNotifyLast() error { + return ep.doFutexNotifyLast() +} + +// Preconditions: ep.isShutdown() == true. +func (ep *Endpoint) interruptForShutdown() { + ep.doFutexInterruptForShutdown() +} diff --git a/pkg/flipcall/endpoint_unsafe.go b/pkg/flipcall/endpoint_unsafe.go new file mode 100644 index 000000000..8319955e0 --- /dev/null +++ b/pkg/flipcall/endpoint_unsafe.go @@ -0,0 +1,238 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flipcall + +import ( + "fmt" + "math" + "reflect" + "sync/atomic" + "syscall" + "unsafe" +) + +// An Endpoint provides the ability to synchronously transfer data and control +// to a connected peer Endpoint, which may be in another process. +// +// Since the Endpoint control transfer model is synchronous, at any given time +// one Endpoint "has control" (designated the *active* Endpoint), and the other +// is "waiting for control" (designated the *inactive* Endpoint). Users of the +// flipcall package arbitrarily designate one Endpoint as initially-active, and +// the other as initially-inactive; in a client/server protocol, the client +// Endpoint is usually initially-active (able to send a request) and the server +// Endpoint is usually initially-inactive (waiting for a request). The +// initially-active Endpoint writes data to be sent to Endpoint.Data(), and +// then synchronously transfers control to the inactive Endpoint by calling +// Endpoint.SendRecv(), becoming the inactive Endpoint in the process. The +// initially-inactive Endpoint waits for control by calling +// Endpoint.RecvFirst(); receiving control causes it to become the active +// Endpoint. After this, the protocol is symmetric: the active Endpoint reads +// data sent by the peer by reading from Endpoint.Data(), writes data to be +// sent to the peer into Endpoint.Data(), and then calls Endpoint.SendRecv() to +// exchange roles with the peer, which blocks until the peer has done the same. +type Endpoint struct { + // shutdown is non-zero if Endpoint.Shutdown() has been called. shutdown is + // accessed using atomic memory operations. + shutdown uint32 + + // dataCap is the size of the datagram part of the packet window in bytes. + // dataCap is immutable. + dataCap uint32 + + // packet is the beginning of the packet window. packet is immutable. + packet unsafe.Pointer + + ctrl endpointControlState +} + +// Init must be called on zero-value Endpoints before first use. If it +// succeeds, Destroy() must be called once the Endpoint is no longer in use. +// +// ctrlMode specifies how connected Endpoints will exchange control. Both +// connected Endpoints must specify the same value for ctrlMode. +// +// pwd represents the packet window used to exchange data with the peer +// Endpoint. FD may differ between Endpoints if they are in different +// processes, but must represent the same file. The packet window must +// initially be filled with zero bytes. +func (ep *Endpoint) Init(ctrlMode ControlMode, pwd PacketWindowDescriptor) error { + if pwd.Length < pageSize { + return fmt.Errorf("packet window size (%d) less than minimum (%d)", pwd.Length, pageSize) + } + if pwd.Length > math.MaxUint32 { + return fmt.Errorf("packet window size (%d) exceeds maximum (%d)", pwd.Length, math.MaxUint32) + } + m, _, e := syscall.Syscall6(syscall.SYS_MMAP, 0, uintptr(pwd.Length), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED, uintptr(pwd.FD), uintptr(pwd.Offset)) + if e != 0 { + return fmt.Errorf("failed to mmap packet window: %v", e) + } + ep.dataCap = uint32(pwd.Length) - uint32(packetHeaderBytes) + ep.packet = (unsafe.Pointer)(m) + if err := ep.initControlState(ctrlMode); err != nil { + ep.unmapPacket() + return err + } + return nil +} + +// NewEndpoint is a convenience function that returns an initialized Endpoint +// allocated on the heap. +func NewEndpoint(ctrlMode ControlMode, pwd PacketWindowDescriptor) (*Endpoint, error) { + var ep Endpoint + if err := ep.Init(ctrlMode, pwd); err != nil { + return nil, err + } + return &ep, nil +} + +func (ep *Endpoint) unmapPacket() { + syscall.Syscall(syscall.SYS_MUNMAP, uintptr(ep.packet), uintptr(ep.dataCap)+packetHeaderBytes, 0) + ep.dataCap = 0 + ep.packet = nil +} + +// Destroy releases resources owned by ep. No other Endpoint methods may be +// called after Destroy. +func (ep *Endpoint) Destroy() { + ep.unmapPacket() +} + +// Packets consist of an 8-byte header followed by an arbitrarily-sized +// datagram. The header consists of: +// +// - A 4-byte native-endian sequence number, which is incremented by the active +// Endpoint after it finishes writing to the packet window. The sequence number +// is needed to handle spurious wakeups. +// +// - A 4-byte native-endian datagram length in bytes. +const ( + sizeofUint32 = unsafe.Sizeof(uint32(0)) + packetHeaderBytes = 2 * sizeofUint32 +) + +func (ep *Endpoint) seq() *uint32 { + return (*uint32)(ep.packet) +} + +func (ep *Endpoint) dataLen() *uint32 { + return (*uint32)((unsafe.Pointer)(uintptr(ep.packet) + sizeofUint32)) +} + +// DataCap returns the maximum datagram size supported by ep in bytes. +func (ep *Endpoint) DataCap() uint32 { + return ep.dataCap +} + +func (ep *Endpoint) data() unsafe.Pointer { + return unsafe.Pointer(uintptr(ep.packet) + packetHeaderBytes) +} + +// Data returns the datagram part of ep's packet window as a byte slice. +// +// Note that the packet window is shared with the potentially-untrusted peer +// Endpoint, which may concurrently mutate the contents of the packet window. +// Thus: +// +// - Readers must not assume that two reads of the same byte in Data() will +// return the same result. In other words, readers should read any given byte +// in Data() at most once. +// +// - Writers must not assume that they will read back the same data that they +// have written. In other words, writers should avoid reading from Data() at +// all. +func (ep *Endpoint) Data() []byte { + var bs []byte + bsReflect := (*reflect.SliceHeader)((unsafe.Pointer)(&bs)) + bsReflect.Data = uintptr(ep.data()) + bsReflect.Len = int(ep.DataCap()) + bsReflect.Cap = bsReflect.Len + return bs +} + +// SendRecv transfers control to the peer Endpoint, causing its call to +// Endpoint.SendRecv() or Endpoint.RecvFirst() to return with the given +// datagram length, then blocks until the peer Endpoint calls +// Endpoint.SendRecv() or Endpoint.SendLast(). +// +// Preconditions: No previous call to ep.SendRecv() or ep.RecvFirst() has +// returned an error. ep.SendLast() has never been called. +func (ep *Endpoint) SendRecv(dataLen uint32) (uint32, error) { + dataCap := ep.DataCap() + if dataLen > dataCap { + return 0, fmt.Errorf("can't send packet with datagram length %d (maximum %d)", dataLen, dataCap) + } + atomic.StoreUint32(ep.dataLen(), dataLen) + if err := ep.doRoundTrip(); err != nil { + return 0, err + } + recvDataLen := atomic.LoadUint32(ep.dataLen()) + if recvDataLen > dataCap { + return 0, fmt.Errorf("received packet with invalid datagram length %d (maximum %d)", recvDataLen, dataCap) + } + return recvDataLen, nil +} + +// RecvFirst blocks until the peer Endpoint calls Endpoint.SendRecv(), then +// returns the datagram length specified by that call. +// +// Preconditions: ep.SendRecv(), ep.RecvFirst(), and ep.SendLast() have never +// been called. +func (ep *Endpoint) RecvFirst() (uint32, error) { + if err := ep.doWaitFirst(); err != nil { + return 0, err + } + recvDataLen := atomic.LoadUint32(ep.dataLen()) + if dataCap := ep.DataCap(); recvDataLen > dataCap { + return 0, fmt.Errorf("received packet with invalid datagram length %d (maximum %d)", recvDataLen, dataCap) + } + return recvDataLen, nil +} + +// SendLast causes the peer Endpoint's call to Endpoint.SendRecv() or +// Endpoint.RecvFirst() to return with the given datagram length. +// +// Preconditions: No previous call to ep.SendRecv() or ep.RecvFirst() has +// returned an error. ep.SendLast() has never been called. +func (ep *Endpoint) SendLast(dataLen uint32) error { + dataCap := ep.DataCap() + if dataLen > dataCap { + return fmt.Errorf("can't send packet with datagram length %d (maximum %d)", dataLen, dataCap) + } + atomic.StoreUint32(ep.dataLen(), dataLen) + if err := ep.doNotifyLast(); err != nil { + return err + } + return nil +} + +// Shutdown causes concurrent and future calls to ep.SendRecv(), +// ep.RecvFirst(), and ep.SendLast() to unblock and return errors. It does not +// wait for concurrent calls to return. +func (ep *Endpoint) Shutdown() { + if atomic.SwapUint32(&ep.shutdown, 1) == 0 { + ep.interruptForShutdown() + } +} + +func (ep *Endpoint) isShutdown() bool { + return atomic.LoadUint32(&ep.shutdown) != 0 +} + +type endpointShutdownError struct{} + +// Error implements error.Error. +func (endpointShutdownError) Error() string { + return "Endpoint.Shutdown() has been called" +} diff --git a/pkg/flipcall/flipcall.go b/pkg/flipcall/flipcall.go new file mode 100644 index 000000000..79a1e418a --- /dev/null +++ b/pkg/flipcall/flipcall.go @@ -0,0 +1,32 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package flipcall implements a protocol providing Fast Local Interprocess +// Procedure Calls. +package flipcall + +// ControlMode defines how control is exchanged across a connection. +type ControlMode uint8 + +const ( + // ControlModeInvalid is invalid, and exists so that ControlMode fields in + // structs must be explicitly initialized. + ControlModeInvalid ControlMode = iota + + // ControlModeFutex uses shared futex operations on packet control words. + ControlModeFutex + + // controlModeCount is the number of ControlModes in this list. + controlModeCount +) diff --git a/pkg/flipcall/flipcall_example_test.go b/pkg/flipcall/flipcall_example_test.go new file mode 100644 index 000000000..572a1f119 --- /dev/null +++ b/pkg/flipcall/flipcall_example_test.go @@ -0,0 +1,106 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flipcall + +import ( + "bytes" + "fmt" +) + +func Example() { + const ( + reqPrefix = "request " + respPrefix = "response " + count = 3 + maxMessageLen = len(respPrefix) + 1 // 1 digit + ) + + pwa, err := NewPacketWindowAllocator() + if err != nil { + panic(err) + } + defer pwa.Destroy() + pwd, err := pwa.Allocate(PacketWindowLengthForDataCap(uint32(maxMessageLen))) + if err != nil { + panic(err) + } + clientEP, err := NewEndpoint(ControlModeFutex, pwd) + if err != nil { + panic(err) + } + defer clientEP.Destroy() + serverEP, err := NewEndpoint(ControlModeFutex, pwd) + if err != nil { + panic(err) + } + defer serverEP.Destroy() + + serverDone := make(chan struct{}) + go func() { + defer func() { serverDone <- struct{}{} }() + i := 0 + var buf bytes.Buffer + // wait for first request + n, err := serverEP.RecvFirst() + if err != nil { + return + } + for { + // read request + buf.Reset() + buf.Write(serverEP.Data()[:n]) + fmt.Println(buf.String()) + // write response + buf.Reset() + fmt.Fprintf(&buf, "%s%d", respPrefix, i) + copy(serverEP.Data(), buf.Bytes()) + // send response and wait for next request + n, err = serverEP.SendRecv(uint32(buf.Len())) + if err != nil { + return + } + i++ + } + }() + defer func() { + serverEP.Shutdown() + <-serverDone + }() + + var buf bytes.Buffer + for i := 0; i < count; i++ { + // write request + buf.Reset() + fmt.Fprintf(&buf, "%s%d", reqPrefix, i) + copy(clientEP.Data(), buf.Bytes()) + // send request and wait for response + n, err := clientEP.SendRecv(uint32(buf.Len())) + if err != nil { + panic(err) + } + // read response + buf.Reset() + buf.Write(clientEP.Data()[:n]) + fmt.Println(buf.String()) + } + + // Output: + // request 0 + // response 0 + // request 1 + // response 1 + // request 2 + // response 2 +} diff --git a/pkg/flipcall/flipcall_test.go b/pkg/flipcall/flipcall_test.go new file mode 100644 index 000000000..20d3002f0 --- /dev/null +++ b/pkg/flipcall/flipcall_test.go @@ -0,0 +1,211 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flipcall + +import ( + "testing" + "time" +) + +var testPacketWindowSize = pageSize + +func testSendRecv(t *testing.T, ctrlMode ControlMode) { + pwa, err := NewPacketWindowAllocator() + if err != nil { + t.Fatalf("failed to create PacketWindowAllocator: %v", err) + } + defer pwa.Destroy() + pwd, err := pwa.Allocate(testPacketWindowSize) + if err != nil { + t.Fatalf("PacketWindowAllocator.Allocate() failed: %v", err) + } + + sendEP, err := NewEndpoint(ctrlMode, pwd) + if err != nil { + t.Fatalf("failed to create Endpoint: %v", err) + } + defer sendEP.Destroy() + recvEP, err := NewEndpoint(ctrlMode, pwd) + if err != nil { + t.Fatalf("failed to create Endpoint: %v", err) + } + defer recvEP.Destroy() + + otherThreadDone := make(chan struct{}) + go func() { + defer func() { otherThreadDone <- struct{}{} }() + t.Logf("initially-inactive Endpoint waiting for packet 1") + if _, err := recvEP.RecvFirst(); err != nil { + t.Fatalf("initially-inactive Endpoint.RecvFirst() failed: %v", err) + } + t.Logf("initially-inactive Endpoint got packet 1, sending packet 2 and waiting for packet 3") + if _, err := recvEP.SendRecv(0); err != nil { + t.Fatalf("initially-inactive Endpoint.SendRecv() failed: %v", err) + } + t.Logf("initially-inactive Endpoint got packet 3") + }() + defer func() { + t.Logf("waiting for initially-inactive Endpoint goroutine to complete") + <-otherThreadDone + }() + + t.Logf("initially-active Endpoint sending packet 1 and waiting for packet 2") + if _, err := sendEP.SendRecv(0); err != nil { + t.Fatalf("initially-active Endpoint.SendRecv() failed: %v", err) + } + t.Logf("initially-active Endpoint got packet 2, sending packet 3") + if err := sendEP.SendLast(0); err != nil { + t.Fatalf("initially-active Endpoint.SendLast() failed: %v", err) + } +} + +func TestFutexSendRecv(t *testing.T) { + testSendRecv(t, ControlModeFutex) +} + +func testRecvFirstShutdown(t *testing.T, ctrlMode ControlMode) { + pwa, err := NewPacketWindowAllocator() + if err != nil { + t.Fatalf("failed to create PacketWindowAllocator: %v", err) + } + defer pwa.Destroy() + pwd, err := pwa.Allocate(testPacketWindowSize) + if err != nil { + t.Fatalf("PacketWindowAllocator.Allocate() failed: %v", err) + } + + ep, err := NewEndpoint(ctrlMode, pwd) + if err != nil { + t.Fatalf("failed to create Endpoint: %v", err) + } + defer ep.Destroy() + + otherThreadDone := make(chan struct{}) + go func() { + defer func() { otherThreadDone <- struct{}{} }() + _, err := ep.RecvFirst() + if err == nil { + t.Errorf("Endpoint.RecvFirst() succeeded unexpectedly") + } + }() + + time.Sleep(time.Second) // to ensure ep.RecvFirst() has blocked + ep.Shutdown() + <-otherThreadDone +} + +func TestFutexRecvFirstShutdown(t *testing.T) { + testRecvFirstShutdown(t, ControlModeFutex) +} + +func testSendRecvShutdown(t *testing.T, ctrlMode ControlMode) { + pwa, err := NewPacketWindowAllocator() + if err != nil { + t.Fatalf("failed to create PacketWindowAllocator: %v", err) + } + defer pwa.Destroy() + pwd, err := pwa.Allocate(testPacketWindowSize) + if err != nil { + t.Fatalf("PacketWindowAllocator.Allocate() failed: %v", err) + } + + sendEP, err := NewEndpoint(ctrlMode, pwd) + if err != nil { + t.Fatalf("failed to create Endpoint: %v", err) + } + defer sendEP.Destroy() + recvEP, err := NewEndpoint(ctrlMode, pwd) + if err != nil { + t.Fatalf("failed to create Endpoint: %v", err) + } + defer recvEP.Destroy() + + otherThreadDone := make(chan struct{}) + go func() { + defer func() { otherThreadDone <- struct{}{} }() + if _, err := recvEP.RecvFirst(); err != nil { + t.Fatalf("initially-inactive Endpoint.RecvFirst() failed: %v", err) + } + if _, err := recvEP.SendRecv(0); err == nil { + t.Errorf("initially-inactive Endpoint.SendRecv() succeeded unexpectedly") + } + }() + + if _, err := sendEP.SendRecv(0); err != nil { + t.Fatalf("initially-active Endpoint.SendRecv() failed: %v", err) + } + time.Sleep(time.Second) // to ensure recvEP.SendRecv() has blocked + recvEP.Shutdown() + <-otherThreadDone +} + +func TestFutexSendRecvShutdown(t *testing.T) { + testSendRecvShutdown(t, ControlModeFutex) +} + +func benchmarkSendRecv(b *testing.B, ctrlMode ControlMode) { + pwa, err := NewPacketWindowAllocator() + if err != nil { + b.Fatalf("failed to create PacketWindowAllocator: %v", err) + } + defer pwa.Destroy() + pwd, err := pwa.Allocate(testPacketWindowSize) + if err != nil { + b.Fatalf("PacketWindowAllocator.Allocate() failed: %v", err) + } + + sendEP, err := NewEndpoint(ctrlMode, pwd) + if err != nil { + b.Fatalf("failed to create Endpoint: %v", err) + } + defer sendEP.Destroy() + recvEP, err := NewEndpoint(ctrlMode, pwd) + if err != nil { + b.Fatalf("failed to create Endpoint: %v", err) + } + defer recvEP.Destroy() + + otherThreadDone := make(chan struct{}) + go func() { + defer func() { otherThreadDone <- struct{}{} }() + if b.N == 0 { + return + } + if _, err := recvEP.RecvFirst(); err != nil { + b.Fatalf("initially-inactive Endpoint.RecvFirst() failed: %v", err) + } + for i := 1; i < b.N; i++ { + if _, err := recvEP.SendRecv(0); err != nil { + b.Fatalf("initially-inactive Endpoint.SendRecv() failed: %v", err) + } + } + if err := recvEP.SendLast(0); err != nil { + b.Fatalf("initially-inactive Endpoint.SendLast() failed: %v", err) + } + }() + defer func() { <-otherThreadDone }() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + if _, err := sendEP.SendRecv(0); err != nil { + b.Fatalf("initially-active Endpoint.SendRecv() failed: %v", err) + } + } + b.StopTimer() +} + +func BenchmarkFutexSendRecv(b *testing.B) { + benchmarkSendRecv(b, ControlModeFutex) +} diff --git a/pkg/flipcall/futex_linux.go b/pkg/flipcall/futex_linux.go new file mode 100644 index 000000000..3f592ad16 --- /dev/null +++ b/pkg/flipcall/futex_linux.go @@ -0,0 +1,94 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package flipcall + +import ( + "fmt" + "math" + "sync/atomic" + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" +) + +func (ep *Endpoint) doFutexRoundTrip() error { + ourSeq, err := ep.doFutexNotifySeq() + if err != nil { + return err + } + return ep.doFutexWaitSeq(ourSeq) +} + +func (ep *Endpoint) doFutexWaitFirst() error { + return ep.doFutexWaitSeq(0) +} + +func (ep *Endpoint) doFutexNotifyLast() error { + _, err := ep.doFutexNotifySeq() + return err +} + +func (ep *Endpoint) doFutexNotifySeq() (uint32, error) { + ourSeq := atomic.AddUint32(ep.seq(), 1) + if err := ep.futexWake(1); err != nil { + return ourSeq, fmt.Errorf("failed to FUTEX_WAKE peer Endpoint: %v", err) + } + return ourSeq, nil +} + +func (ep *Endpoint) doFutexWaitSeq(prevSeq uint32) error { + nextSeq := prevSeq + 1 + for { + if ep.isShutdown() { + return endpointShutdownError{} + } + if err := ep.futexWait(prevSeq); err != nil { + return fmt.Errorf("failed to FUTEX_WAIT for peer Endpoint: %v", err) + } + seq := atomic.LoadUint32(ep.seq()) + if seq == nextSeq { + return nil + } + if seq != prevSeq { + return fmt.Errorf("invalid packet sequence number %d (expected %d or %d)", seq, prevSeq, nextSeq) + } + } +} + +func (ep *Endpoint) doFutexInterruptForShutdown() { + // Wake MaxInt32 threads to prevent a malicious or broken peer from + // swallowing our wakeup by FUTEX_WAITing from multiple threads. + if err := ep.futexWake(math.MaxInt32); err != nil { + log.Warningf("failed to FUTEX_WAKE Endpoint: %v", err) + } +} + +func (ep *Endpoint) futexWake(numThreads int32) error { + if _, _, e := syscall.RawSyscall(syscall.SYS_FUTEX, uintptr(ep.packet), linux.FUTEX_WAKE, uintptr(numThreads)); e != 0 { + return e + } + return nil +} + +func (ep *Endpoint) futexWait(seq uint32) error { + _, _, e := syscall.Syscall6(syscall.SYS_FUTEX, uintptr(ep.packet), linux.FUTEX_WAIT, uintptr(seq), 0, 0, 0) + if e != 0 && e != syscall.EAGAIN && e != syscall.EINTR { + return e + } + return nil +} diff --git a/pkg/flipcall/packet_window_allocator.go b/pkg/flipcall/packet_window_allocator.go new file mode 100644 index 000000000..7b455b24d --- /dev/null +++ b/pkg/flipcall/packet_window_allocator.go @@ -0,0 +1,166 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flipcall + +import ( + "fmt" + "math/bits" + "os" + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/memutil" +) + +var ( + pageSize = os.Getpagesize() + pageMask = pageSize - 1 +) + +func init() { + if bits.OnesCount(uint(pageSize)) != 1 { + // This is depended on by roundUpToPage(). + panic(fmt.Sprintf("system page size (%d) is not a power of 2", pageSize)) + } + if uintptr(pageSize) < packetHeaderBytes { + // This is required since Endpoint.Init() imposes a minimum packet + // window size of 1 page. + panic(fmt.Sprintf("system page size (%d) is less than packet header size (%d)", pageSize, packetHeaderBytes)) + } +} + +// PacketWindowDescriptor represents a packet window, a range of pages in a +// shared memory file that is used to exchange packets between partner +// Endpoints. +type PacketWindowDescriptor struct { + // FD is the file descriptor representing the shared memory file. + FD int + + // Offset is the offset into the shared memory file at which the packet + // window begins. + Offset int64 + + // Length is the size of the packet window in bytes. + Length int +} + +// PacketWindowLengthForDataCap returns the minimum packet window size required +// to accommodate datagrams of the given size in bytes. +func PacketWindowLengthForDataCap(dataCap uint32) int { + return roundUpToPage(int(dataCap) + int(packetHeaderBytes)) +} + +func roundUpToPage(x int) int { + return (x + pageMask) &^ pageMask +} + +// A PacketWindowAllocator owns a shared memory file, and allocates packet +// windows from it. +type PacketWindowAllocator struct { + fd int + nextAlloc int64 + fileSize int64 +} + +// Init must be called on zero-value PacketWindowAllocators before first use. +// If it succeeds, Destroy() must be called once the PacketWindowAllocator is +// no longer in use. +func (pwa *PacketWindowAllocator) Init() error { + fd, err := memutil.CreateMemFD("flipcall_packet_windows", linux.MFD_CLOEXEC|linux.MFD_ALLOW_SEALING) + if err != nil { + return fmt.Errorf("failed to create memfd: %v", err) + } + // Apply F_SEAL_SHRINK to prevent either party from causing SIGBUS in the + // other by truncating the file, and F_SEAL_SEAL to prevent either party + // from applying F_SEAL_GROW or F_SEAL_WRITE. + if _, _, e := syscall.RawSyscall(syscall.SYS_FCNTL, uintptr(fd), linux.F_ADD_SEALS, linux.F_SEAL_SHRINK|linux.F_SEAL_SEAL); e != 0 { + syscall.Close(fd) + return fmt.Errorf("failed to apply memfd seals: %v", e) + } + pwa.fd = fd + return nil +} + +// NewPacketWindowAllocator is a convenience function that returns an +// initialized PacketWindowAllocator allocated on the heap. +func NewPacketWindowAllocator() (*PacketWindowAllocator, error) { + var pwa PacketWindowAllocator + if err := pwa.Init(); err != nil { + return nil, err + } + return &pwa, nil +} + +// Destroy releases resources owned by pwa. This invalidates file descriptors +// previously returned by pwa.FD() and pwd.Allocate(). +func (pwa *PacketWindowAllocator) Destroy() { + syscall.Close(pwa.fd) +} + +// FD represents the file descriptor of the shared memory file backing pwa. +func (pwa *PacketWindowAllocator) FD() int { + return pwa.fd +} + +// Allocate allocates a new packet window of at least the given size and +// returns a PacketWindowDescriptor representing it. +// +// Preconditions: size > 0. +func (pwa *PacketWindowAllocator) Allocate(size int) (PacketWindowDescriptor, error) { + if size <= 0 { + return PacketWindowDescriptor{}, fmt.Errorf("invalid size: %d", size) + } + // Page-align size to ensure that pwa.nextAlloc remains page-aligned. + size = roundUpToPage(size) + if size <= 0 { + return PacketWindowDescriptor{}, fmt.Errorf("size %d overflows after rounding up to page size", size) + } + end := pwa.nextAlloc + int64(size) // overflow checked by ensureFileSize + if err := pwa.ensureFileSize(end); err != nil { + return PacketWindowDescriptor{}, err + } + start := pwa.nextAlloc + pwa.nextAlloc = end + return PacketWindowDescriptor{ + FD: pwa.fd, + Offset: start, + Length: size, + }, nil +} + +func (pwa *PacketWindowAllocator) ensureFileSize(min int64) error { + if min <= 0 { + return fmt.Errorf("file size would overflow") + } + if pwa.fileSize >= min { + return nil + } + newSize := 2 * pwa.fileSize + if newSize == 0 { + newSize = int64(pageSize) + } + for newSize < min { + newNewSize := newSize * 2 + if newNewSize <= 0 { + return fmt.Errorf("file size would overflow") + } + newSize = newNewSize + } + if err := syscall.Ftruncate(pwa.fd, newSize); err != nil { + return fmt.Errorf("ftruncate failed: %v", err) + } + pwa.fileSize = newSize + return nil +} diff --git a/pkg/gate/BUILD b/pkg/gate/BUILD index 83679f2da..e6a8dbd02 100644 --- a/pkg/gate/BUILD +++ b/pkg/gate/BUILD @@ -7,7 +7,7 @@ go_library( srcs = [ "gate.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/gate", + importpath = "gvisor.dev/gvisor/pkg/gate", visibility = ["//visibility:public"], ) diff --git a/pkg/gate/gate_test.go b/pkg/gate/gate_test.go index 7467e7d07..5dbd8d712 100644 --- a/pkg/gate/gate_test.go +++ b/pkg/gate/gate_test.go @@ -19,7 +19,7 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/gate" + "gvisor.dev/gvisor/pkg/gate" ) func TestBasicEnter(t *testing.T) { diff --git a/pkg/ilist/BUILD b/pkg/ilist/BUILD index dbd65ab12..8f3defa25 100644 --- a/pkg/ilist/BUILD +++ b/pkg/ilist/BUILD @@ -8,7 +8,7 @@ go_library( srcs = [ "interface_list.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/ilist", + importpath = "gvisor.dev/gvisor/pkg/ilist", visibility = ["//visibility:public"], ) diff --git a/pkg/linewriter/BUILD b/pkg/linewriter/BUILD index d1aa2e7d6..c8e923a74 100644 --- a/pkg/linewriter/BUILD +++ b/pkg/linewriter/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "linewriter", srcs = ["linewriter.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/linewriter", + importpath = "gvisor.dev/gvisor/pkg/linewriter", visibility = ["//visibility:public"], ) diff --git a/pkg/log/BUILD b/pkg/log/BUILD index b2d18eddb..12615240c 100644 --- a/pkg/log/BUILD +++ b/pkg/log/BUILD @@ -11,7 +11,7 @@ go_library( "json_k8s.go", "log.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/log", + importpath = "gvisor.dev/gvisor/pkg/log", visibility = [ "//visibility:public", ], diff --git a/pkg/log/log.go b/pkg/log/log.go index 7d563241e..9387586e6 100644 --- a/pkg/log/log.go +++ b/pkg/log/log.go @@ -30,7 +30,7 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/linewriter" + "gvisor.dev/gvisor/pkg/linewriter" ) // Level is the log level. @@ -50,6 +50,19 @@ const ( Debug ) +func (l Level) String() string { + switch l { + case Warning: + return "Warning" + case Info: + return "Info" + case Debug: + return "Debug" + default: + return fmt.Sprintf("Invalid level: %d", l) + } +} + // Emitter is the final destination for logs. type Emitter interface { // Emit emits the given log statement. This allows for control over the @@ -209,7 +222,7 @@ var logMu sync.Mutex // log is the default logger. var log atomic.Value -// Log retieves the global logger. +// Log retrieves the global logger. func Log() *BasicLogger { return log.Load().(*BasicLogger) } diff --git a/pkg/memutil/BUILD b/pkg/memutil/BUILD index 71b48a972..7b50e2b28 100644 --- a/pkg/memutil/BUILD +++ b/pkg/memutil/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "memutil", srcs = ["memutil_unsafe.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/memutil", + importpath = "gvisor.dev/gvisor/pkg/memutil", visibility = ["//visibility:public"], deps = ["@org_golang_x_sys//unix:go_default_library"], ) diff --git a/pkg/metric/BUILD b/pkg/metric/BUILD index 4b2c7a00e..3b8a691f4 100644 --- a/pkg/metric/BUILD +++ b/pkg/metric/BUILD @@ -6,7 +6,7 @@ package(licenses = ["notice"]) go_library( name = "metric", srcs = ["metric.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/metric", + importpath = "gvisor.dev/gvisor/pkg/metric", visibility = ["//:sandbox"], deps = [ ":metric_go_proto", @@ -23,7 +23,7 @@ proto_library( go_proto_library( name = "metric_go_proto", - importpath = "gvisor.googlesource.com/gvisor/pkg/metric/metric_go_proto", + importpath = "gvisor.dev/gvisor/pkg/metric/metric_go_proto", proto = ":metric_proto", visibility = ["//:sandbox"], ) diff --git a/pkg/metric/metric.go b/pkg/metric/metric.go index 803709cc4..eadde06e4 100644 --- a/pkg/metric/metric.go +++ b/pkg/metric/metric.go @@ -21,9 +21,9 @@ import ( "sync" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/eventchannel" - "gvisor.googlesource.com/gvisor/pkg/log" - pb "gvisor.googlesource.com/gvisor/pkg/metric/metric_go_proto" + "gvisor.dev/gvisor/pkg/eventchannel" + "gvisor.dev/gvisor/pkg/log" + pb "gvisor.dev/gvisor/pkg/metric/metric_go_proto" ) var ( diff --git a/pkg/metric/metric_test.go b/pkg/metric/metric_test.go index b8b124c83..34969385a 100644 --- a/pkg/metric/metric_test.go +++ b/pkg/metric/metric_test.go @@ -18,8 +18,8 @@ import ( "testing" "github.com/golang/protobuf/proto" - "gvisor.googlesource.com/gvisor/pkg/eventchannel" - pb "gvisor.googlesource.com/gvisor/pkg/metric/metric_go_proto" + "gvisor.dev/gvisor/pkg/eventchannel" + pb "gvisor.dev/gvisor/pkg/metric/metric_go_proto" ) // sliceEmitter implements eventchannel.Emitter by appending all messages to a diff --git a/pkg/p9/BUILD b/pkg/p9/BUILD index 36b2ec5f6..c6737bf97 100644 --- a/pkg/p9/BUILD +++ b/pkg/p9/BUILD @@ -21,7 +21,7 @@ go_library( "transport.go", "version.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/p9", + importpath = "gvisor.dev/gvisor/pkg/p9", deps = [ "//pkg/fd", "//pkg/log", diff --git a/pkg/p9/client.go b/pkg/p9/client.go index 56587e2cf..7dc20aeef 100644 --- a/pkg/p9/client.go +++ b/pkg/p9/client.go @@ -20,8 +20,8 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/unet" ) // ErrOutOfTags indicates no tags are available. diff --git a/pkg/p9/client_file.go b/pkg/p9/client_file.go index 258080f67..a6cc0617e 100644 --- a/pkg/p9/client_file.go +++ b/pkg/p9/client_file.go @@ -21,8 +21,8 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/log" ) // Attach attaches to a server. diff --git a/pkg/p9/client_test.go b/pkg/p9/client_test.go index fc49729d8..87b2dd61e 100644 --- a/pkg/p9/client_test.go +++ b/pkg/p9/client_test.go @@ -18,7 +18,7 @@ import ( "syscall" "testing" - "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/unet" ) // TestVersion tests the version negotiation. diff --git a/pkg/p9/file.go b/pkg/p9/file.go index a456e8b3d..907445e15 100644 --- a/pkg/p9/file.go +++ b/pkg/p9/file.go @@ -17,7 +17,7 @@ package p9 import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/fd" ) // Attacher is provided by the server. diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go index f32368763..999b4f684 100644 --- a/pkg/p9/handlers.go +++ b/pkg/p9/handlers.go @@ -23,8 +23,8 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/log" ) // ExtractErrno extracts a syscall.Errno from a error, best effort. @@ -224,7 +224,7 @@ func (t *Tattach) handle(cs *connState) message { file: sf, refs: 1, mode: attr.Mode.FileType(), - pathNode: &cs.server.pathTree, + pathNode: cs.server.pathTree, } defer root.DecRef() @@ -545,15 +545,15 @@ func (t *Tunlinkat) handle(cs *connState) message { // Before we do the unlink itself, we need to ensure that there // are no operations in flight on associated path node. The // child's path node lock must be held to ensure that the - // unlink at marking the child deleted below is atomic with + // unlinkat marking the child deleted below is atomic with // respect to any other read or write operations. // // This is one case where we have a lock ordering issue, but // since we always acquire deeper in the hierarchy, we know // that we are free of lock cycles. childPathNode := ref.pathNode.pathNodeFor(t.Name) - childPathNode.mu.Lock() - defer childPathNode.mu.Unlock() + childPathNode.opMu.Lock() + defer childPathNode.opMu.Unlock() // Do the unlink. err = ref.file.UnlinkAt(t.Name, t.Flags) diff --git a/pkg/p9/local_server/BUILD b/pkg/p9/local_server/BUILD deleted file mode 100644 index aa6db186c..000000000 --- a/pkg/p9/local_server/BUILD +++ /dev/null @@ -1,14 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") - -package(licenses = ["notice"]) - -go_binary( - name = "local_server", - srcs = ["local_server.go"], - deps = [ - "//pkg/fd", - "//pkg/log", - "//pkg/p9", - "//pkg/unet", - ], -) diff --git a/pkg/p9/local_server/local_server.go b/pkg/p9/local_server/local_server.go deleted file mode 100644 index 9546b3de5..000000000 --- a/pkg/p9/local_server/local_server.go +++ /dev/null @@ -1,353 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Binary local_server provides a local 9P2000.L server for the p9 package. -// -// To use, first start the server: -// local_server /tmp/my_bind_addr -// -// Then, connect using the Linux 9P filesystem: -// mount -t 9p -o trans=unix /tmp/my_bind_addr /mnt -// -// This package also serves as an examplar. -package main - -import ( - "os" - "path" - "syscall" - - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/unet" -) - -// local wraps a local file. -type local struct { - p9.DefaultWalkGetAttr - - path string - file *os.File -} - -// info constructs a QID for this file. -func (l *local) info() (p9.QID, os.FileInfo, error) { - var ( - qid p9.QID - fi os.FileInfo - err error - ) - - // Stat the file. - if l.file != nil { - fi, err = l.file.Stat() - } else { - fi, err = os.Lstat(l.path) - } - if err != nil { - log.Warningf("error stating %#v: %v", l, err) - return qid, nil, err - } - - // Construct the QID type. - qid.Type = p9.ModeFromOS(fi.Mode()).QIDType() - - // Save the path from the Ino. - qid.Path = fi.Sys().(*syscall.Stat_t).Ino - return qid, fi, nil -} - -// Attach implements p9.Attacher.Attach. -func (l *local) Attach() (p9.File, error) { - return &local{path: "/"}, nil -} - -// Walk implements p9.File.Walk. -func (l *local) Walk(names []string) ([]p9.QID, p9.File, error) { - var qids []p9.QID - last := &local{path: l.path} - for _, name := range names { - c := &local{path: path.Join(last.path, name)} - qid, _, err := c.info() - if err != nil { - return nil, nil, err - } - qids = append(qids, qid) - last = c - } - return qids, last, nil -} - -// StatFS implements p9.File.StatFS. -// -// Not implemented. -func (l *local) StatFS() (p9.FSStat, error) { - return p9.FSStat{}, syscall.ENOSYS -} - -// FSync implements p9.File.FSync. -func (l *local) FSync() error { - return l.file.Sync() -} - -// GetAttr implements p9.File.GetAttr. -// -// Not fully implemented. -func (l *local) GetAttr(req p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) { - qid, fi, err := l.info() - if err != nil { - return qid, p9.AttrMask{}, p9.Attr{}, err - } - - stat := fi.Sys().(*syscall.Stat_t) - attr := p9.Attr{ - Mode: p9.FileMode(stat.Mode), - UID: p9.UID(stat.Uid), - GID: p9.GID(stat.Gid), - NLink: stat.Nlink, - RDev: stat.Rdev, - Size: uint64(stat.Size), - BlockSize: uint64(stat.Blksize), - Blocks: uint64(stat.Blocks), - ATimeSeconds: uint64(stat.Atim.Sec), - ATimeNanoSeconds: uint64(stat.Atim.Nsec), - MTimeSeconds: uint64(stat.Mtim.Sec), - MTimeNanoSeconds: uint64(stat.Mtim.Nsec), - CTimeSeconds: uint64(stat.Ctim.Sec), - CTimeNanoSeconds: uint64(stat.Ctim.Nsec), - } - valid := p9.AttrMask{ - Mode: true, - UID: true, - GID: true, - NLink: true, - RDev: true, - Size: true, - Blocks: true, - ATime: true, - MTime: true, - CTime: true, - } - - return qid, valid, attr, nil -} - -// SetAttr implements p9.File.SetAttr. -// -// Not implemented. -func (l *local) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { - return syscall.ENOSYS -} - -// Remove implements p9.File.Remove. -// -// Not implemented. -func (l *local) Remove() error { - return syscall.ENOSYS -} - -// Rename implements p9.File.Rename. -// -// Not implemented. -func (l *local) Rename(directory p9.File, name string) error { - return syscall.ENOSYS -} - -// Close implements p9.File.Close. -func (l *local) Close() error { - if l.file != nil { - return l.file.Close() - } - return nil -} - -// Open implements p9.File.Open. -func (l *local) Open(mode p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) { - qid, _, err := l.info() - if err != nil { - return nil, qid, 0, err - } - - // Do the actual open. - f, err := os.OpenFile(l.path, int(mode), 0) - if err != nil { - return nil, qid, 0, err - } - l.file = f - - // Note: we don't send the local file for this server. - return nil, qid, 4096, nil -} - -// Read implements p9.File.Read. -func (l *local) ReadAt(p []byte, offset uint64) (int, error) { - return l.file.ReadAt(p, int64(offset)) -} - -// Write implements p9.File.Write. -func (l *local) WriteAt(p []byte, offset uint64) (int, error) { - return l.file.WriteAt(p, int64(offset)) -} - -// Create implements p9.File.Create. -func (l *local) Create(name string, mode p9.OpenFlags, permissions p9.FileMode, _ p9.UID, _ p9.GID) (*fd.FD, p9.File, p9.QID, uint32, error) { - f, err := os.OpenFile(l.path, int(mode)|syscall.O_CREAT|syscall.O_EXCL, os.FileMode(permissions)) - if err != nil { - return nil, nil, p9.QID{}, 0, err - } - - l2 := &local{path: path.Join(l.path, name), file: f} - qid, _, err := l2.info() - if err != nil { - l2.Close() - return nil, nil, p9.QID{}, 0, err - } - - return nil, l2, qid, 4096, nil -} - -// Mkdir implements p9.File.Mkdir. -// -// Not properly implemented. -func (l *local) Mkdir(name string, permissions p9.FileMode, _ p9.UID, _ p9.GID) (p9.QID, error) { - if err := os.Mkdir(path.Join(l.path, name), os.FileMode(permissions)); err != nil { - return p9.QID{}, err - } - - // Blank QID. - return p9.QID{}, nil -} - -// Symlink implements p9.File.Symlink. -// -// Not properly implemented. -func (l *local) Symlink(oldname string, newname string, _ p9.UID, _ p9.GID) (p9.QID, error) { - if err := os.Symlink(oldname, path.Join(l.path, newname)); err != nil { - return p9.QID{}, err - } - - // Blank QID. - return p9.QID{}, nil -} - -// Link implements p9.File.Link. -// -// Not properly implemented. -func (l *local) Link(target p9.File, newname string) error { - return os.Link(target.(*local).path, path.Join(l.path, newname)) -} - -// Mknod implements p9.File.Mknod. -// -// Not implemented. -func (l *local) Mknod(name string, mode p9.FileMode, major uint32, minor uint32, _ p9.UID, _ p9.GID) (p9.QID, error) { - return p9.QID{}, syscall.ENOSYS -} - -// RenameAt implements p9.File.RenameAt. -// -// Not implemented. -func (l *local) RenameAt(oldname string, newdir p9.File, newname string) error { - return syscall.ENOSYS -} - -// UnlinkAt implements p9.File.UnlinkAt. -// -// Not implemented. -func (l *local) UnlinkAt(name string, flags uint32) error { - return syscall.ENOSYS -} - -// Readdir implements p9.File.Readdir. -func (l *local) Readdir(offset uint64, count uint32) ([]p9.Dirent, error) { - // We only do *all* dirents in single shot. - const maxDirentBuffer = 1024 * 1024 - buf := make([]byte, maxDirentBuffer) - n, err := syscall.ReadDirent(int(l.file.Fd()), buf) - if err != nil { - // Return zero entries. - return nil, nil - } - - // Parse the entries; note that we read up to offset+count here. - _, newCount, newNames := syscall.ParseDirent(buf[:n], int(offset)+int(count), nil) - var dirents []p9.Dirent - for i := int(offset); i >= 0 && i < newCount; i++ { - entry := local{path: path.Join(l.path, newNames[i])} - qid, _, err := entry.info() - if err != nil { - continue - } - dirents = append(dirents, p9.Dirent{ - QID: qid, - Type: qid.Type, - Name: newNames[i], - Offset: uint64(i + 1), - }) - } - - return dirents, nil -} - -// Readlink implements p9.File.Readlink. -// -// Not properly implemented. -func (l *local) Readlink() (string, error) { - return os.Readlink(l.path) -} - -// Flush implements p9.File.Flush. -func (l *local) Flush() error { - return nil -} - -// Connect implements p9.File.Connect. -func (l *local) Connect(p9.ConnectFlags) (*fd.FD, error) { - return nil, syscall.ECONNREFUSED -} - -// Renamed implements p9.File.Renamed. -func (l *local) Renamed(parent p9.File, newName string) { - l.path = path.Join(parent.(*local).path, newName) -} - -// Allocate implements p9.File.Allocate. -func (l *local) Allocate(mode p9.AllocateMode, offset, length uint64) error { - return syscall.Fallocate(int(l.file.Fd()), mode.ToLinux(), int64(offset), int64(length)) -} - -func main() { - log.SetLevel(log.Debug) - - if len(os.Args) != 2 { - log.Warningf("usage: %s <bind-addr>", os.Args[0]) - os.Exit(1) - } - - // Bind and listen on the socket. - serverSocket, err := unet.BindAndListen(os.Args[1], false) - if err != nil { - log.Warningf("err binding: %v", err) - os.Exit(1) - } - - // Run the server. - s := p9.NewServer(&local{}) - s.Serve(serverSocket) -} - -var ( - _ p9.File = &local{} -) diff --git a/pkg/p9/messages.go b/pkg/p9/messages.go index 75d6bc832..fd9eb1c5d 100644 --- a/pkg/p9/messages.go +++ b/pkg/p9/messages.go @@ -18,7 +18,7 @@ import ( "fmt" "math" - "gvisor.googlesource.com/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/fd" ) // ErrInvalidMsgType is returned when an unsupported message type is found. diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go index 4039862e6..e12831dbd 100644 --- a/pkg/p9/p9.go +++ b/pkg/p9/p9.go @@ -92,7 +92,7 @@ func (o OpenFlags) String() string { } } -// Tag is a messsage tag. +// Tag is a message tag. type Tag uint16 // FID is a file identifier. diff --git a/pkg/p9/p9test/BUILD b/pkg/p9/p9test/BUILD index cf22edde8..6e939a49a 100644 --- a/pkg/p9/p9test/BUILD +++ b/pkg/p9/p9test/BUILD @@ -8,7 +8,7 @@ alias( actual = "@com_github_golang_mock//mockgen:mockgen", ) -MOCK_SRC_PACKAGE = "gvisor.googlesource.com/gvisor/pkg/p9" +MOCK_SRC_PACKAGE = "gvisor.dev/gvisor/pkg/p9" # mockgen_reflect is a source file that contains mock generation code that # imports the p9 package and generates a specification via reflection. The @@ -64,7 +64,7 @@ go_library( "mocks.go", "p9test.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/p9/p9test", + importpath = "gvisor.dev/gvisor/pkg/p9/p9test", visibility = ["//:sandbox"], deps = [ "//pkg/fd", diff --git a/pkg/p9/p9test/client_test.go b/pkg/p9/p9test/client_test.go index e00dd03ab..fe649c2e8 100644 --- a/pkg/p9/p9test/client_test.go +++ b/pkg/p9/p9test/client_test.go @@ -28,8 +28,8 @@ import ( "time" "github.com/golang/mock/gomock" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/p9" ) func TestPanic(t *testing.T) { @@ -269,14 +269,14 @@ type fileGenerator func(*Harness, string, p9.File) (*Mock, *Mock, p9.File) func walkHelper(h *Harness, name string, dir p9.File) (parentBackend *Mock, walkedBackend *Mock, walked p9.File) { _, parent, err := dir.Walk(nil) if err != nil { - h.t.Fatalf("got walk err %v, want nil", err) + h.t.Fatalf("Walk(nil) got err %v, want nil", err) } defer parent.Close() parentBackend = h.Pop(parent) _, walked, err = parent.Walk([]string{name}) if err != nil { - h.t.Fatalf("got walk err %v, want nil", err) + h.t.Fatalf("Walk(%s) got err %v, want nil", name, err) } walkedBackend = h.Pop(walked) @@ -389,7 +389,7 @@ func checkDeleted(h *Harness, file p9.File) { _, newFile, err := file.Walk(nil) if err == syscall.EBUSY { // We can't walk from here because this reference is open - // aleady. Okay, we will also have unopened cases through + // already. Okay, we will also have unopened cases through // TestUnlink, just skip the remove operation for now. return } else if err != nil { @@ -854,6 +854,62 @@ func TestRenameAtInvalid(t *testing.T) { } } +// TestRenameSecondOrder tests that indirect rename targets continue to receive +// Renamed calls after a rename of its renamed parent. i.e., +// +// 1. Create /one/file +// 2. Create /directory +// 3. Rename /one -> /directory/one +// 4. Rename /directory -> /three/foo +// 5. file from (1) should still receive Renamed. +// +// This is a regression test for b/135219260. +func TestRenameSecondOrder(t *testing.T) { + h, c := NewHarness(t) + defer h.Finish() + + rootBackend, root := newRoot(h, c) + defer root.Close() + + // Walk to /one. + _, oneBackend, oneFile := walkHelper(h, "one", root) + defer oneFile.Close() + + // Walk to and generate /one/file. + // + // walkHelper re-walks to oneFile, so we need the second backend, + // which will also receive Renamed calls. + oneSecondBackend, fileBackend, fileFile := walkHelper(h, "file", oneFile) + defer fileFile.Close() + + // Walk to and generate /directory. + _, directoryBackend, directoryFile := walkHelper(h, "directory", root) + defer directoryFile.Close() + + // Rename /one to /directory/one. + rootBackend.EXPECT().RenameAt("one", directoryBackend, "one").Return(nil) + expectRenamed(oneBackend, []string{}, directoryBackend, "one") + expectRenamed(oneSecondBackend, []string{}, directoryBackend, "one") + expectRenamed(fileBackend, []string{}, oneBackend, "file") + if err := renameAt(h, root, directoryFile, "one", "one", false); err != nil { + h.t.Fatalf("got rename err %v, want nil", err) + } + + // Walk to /three. + _, threeBackend, threeFile := walkHelper(h, "three", root) + defer threeFile.Close() + + // Rename /directory to /three/foo. + rootBackend.EXPECT().RenameAt("directory", threeBackend, "foo").Return(nil) + expectRenamed(directoryBackend, []string{}, threeBackend, "foo") + expectRenamed(oneBackend, []string{}, directoryBackend, "one") + expectRenamed(oneSecondBackend, []string{}, directoryBackend, "one") + expectRenamed(fileBackend, []string{}, oneBackend, "file") + if err := renameAt(h, root, threeFile, "directory", "foo", false); err != nil { + h.t.Fatalf("got rename err %v, want nil", err) + } +} + func TestReadlink(t *testing.T) { for name := range newTypeMap(nil) { t.Run(name, func(t *testing.T) { diff --git a/pkg/p9/p9test/p9test.go b/pkg/p9/p9test/p9test.go index 1c8eff200..95846e5f7 100644 --- a/pkg/p9/p9test/p9test.go +++ b/pkg/p9/p9test/p9test.go @@ -23,8 +23,8 @@ import ( "testing" "github.com/golang/mock/gomock" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/unet" ) // Harness is an attacher mock. diff --git a/pkg/p9/path_tree.go b/pkg/p9/path_tree.go index f37ad4ab2..865459411 100644 --- a/pkg/p9/path_tree.go +++ b/pkg/p9/path_tree.go @@ -23,87 +23,199 @@ import ( // // These are shared by all fidRefs that point to the same path. // -// These are not synchronized because we allow certain operations (file walk) -// to proceed without having to acquire a write lock. The lock in this -// structure exists to synchronize high-level, semantic operations, such as the -// simultaneous creation and deletion of a file. +// Lock ordering: +// opMu +// childMu // -// (+) below is the path component string. +// Two different pathNodes may only be locked if Server.renameMu is held for +// write, in which case they can be acquired in any order. type pathNode struct { - mu sync.RWMutex // See above. - fidRefs sync.Map // => map[*fidRef]string(+) - children sync.Map // => map[string(+)]*pathNode - count int64 + // opMu synchronizes high-level, sematic operations, such as the + // simultaneous creation and deletion of a file. + // + // opMu does not directly protect any fields in pathNode. + opMu sync.RWMutex + + // childMu protects the fields below. + childMu sync.RWMutex + + // childNodes maps child path component names to their pathNode. + childNodes map[string]*pathNode + + // childRefs maps child path component names to all of the their + // references. + childRefs map[string]map[*fidRef]struct{} + + // childRefNames maps child references back to their path component + // name. + childRefNames map[*fidRef]string +} + +func newPathNode() *pathNode { + return &pathNode{ + childNodes: make(map[string]*pathNode), + childRefs: make(map[string]map[*fidRef]struct{}), + childRefNames: make(map[*fidRef]string), + } +} + +// forEachChildRef calls fn for each child reference. +func (p *pathNode) forEachChildRef(fn func(ref *fidRef, name string)) { + p.childMu.RLock() + defer p.childMu.RUnlock() + + for name, m := range p.childRefs { + for ref := range m { + fn(ref, name) + } + } +} + +// forEachChildNode calls fn for each child pathNode. +func (p *pathNode) forEachChildNode(fn func(pn *pathNode)) { + p.childMu.RLock() + defer p.childMu.RUnlock() + + for _, pn := range p.childNodes { + fn(pn) + } } // pathNodeFor returns the path node for the given name, or a new one. -// -// Precondition: mu must be held in a readable fashion. func (p *pathNode) pathNodeFor(name string) *pathNode { - // Load the existing path node. - if pn, ok := p.children.Load(name); ok { - return pn.(*pathNode) + p.childMu.RLock() + // Fast path, node already exists. + if pn, ok := p.childNodes[name]; ok { + p.childMu.RUnlock() + return pn + } + p.childMu.RUnlock() + + // Slow path, create a new pathNode for shared use. + p.childMu.Lock() + + // Re-check after re-lock. + if pn, ok := p.childNodes[name]; ok { + p.childMu.Unlock() + return pn } - // Create a new pathNode for shared use. - pn, _ := p.children.LoadOrStore(name, new(pathNode)) - return pn.(*pathNode) + pn := newPathNode() + p.childNodes[name] = pn + p.childMu.Unlock() + return pn } // nameFor returns the name for the given fidRef. // -// Precondition: mu must be held in a readable fashion. +// Precondition: addChild is called for ref before nameFor. func (p *pathNode) nameFor(ref *fidRef) string { - if s, ok := p.fidRefs.Load(ref); ok { - return s.(string) + p.childMu.RLock() + n, ok := p.childRefNames[ref] + p.childMu.RUnlock() + + if !ok { + // This should not happen, don't proceed. + panic(fmt.Sprintf("expected name for %+v, none found", ref)) } - // This should not happen, don't proceed. - panic(fmt.Sprintf("expected name for %+v, none found", ref)) + return n } -// addChild adds a child to the given pathNode. -// -// This applies only to an individual fidRef. +// addChildLocked adds a child reference to p. // -// Precondition: mu must be held in a writable fashion. -func (p *pathNode) addChild(ref *fidRef, name string) { - if s, ok := p.fidRefs.Load(ref); ok { +// Precondition: As addChild, plus childMu is locked for write. +func (p *pathNode) addChildLocked(ref *fidRef, name string) { + if n, ok := p.childRefNames[ref]; ok { // This should not happen, don't proceed. - panic(fmt.Sprintf("unexpected fidRef %+v with path %q, wanted %q", ref, s, name)) + panic(fmt.Sprintf("unexpected fidRef %+v with path %q, wanted %q", ref, n, name)) } - p.fidRefs.Store(ref, name) + p.childRefNames[ref] = name + + m, ok := p.childRefs[name] + if !ok { + m = make(map[*fidRef]struct{}) + p.childRefs[name] = m + } + + m[ref] = struct{}{} } -// removeChild removes the given child. +// addChild adds a child reference to p. // -// This applies only to an individual fidRef. +// Precondition: ref may only be added once at a time. +func (p *pathNode) addChild(ref *fidRef, name string) { + p.childMu.Lock() + p.addChildLocked(ref, name) + p.childMu.Unlock() +} + +// removeChild removes the given child. // -// Precondition: mu must be held in a writable fashion. +// This applies only to an individual fidRef, which is not required to exist. func (p *pathNode) removeChild(ref *fidRef) { - p.fidRefs.Delete(ref) + p.childMu.Lock() + + // This ref may not exist anymore. This can occur, e.g., in unlink, + // where a removeWithName removes the ref, and then a DecRef on the ref + // attempts to remove again. + if name, ok := p.childRefNames[ref]; ok { + m, ok := p.childRefs[name] + if !ok { + // This should not happen, don't proceed. + p.childMu.Unlock() + panic(fmt.Sprintf("name %s missing from childfidRefs", name)) + } + + delete(m, ref) + if len(m) == 0 { + delete(p.childRefs, name) + } + } + + delete(p.childRefNames, ref) + + p.childMu.Unlock() } -// removeWithName removes all references with the given name. +// addPathNodeFor adds an existing pathNode as the node for name. // -// The original pathNode is returned by this function, and removed from this -// pathNode. Any operations on the removed tree must use this value. +// Preconditions: newName does not exist. +func (p *pathNode) addPathNodeFor(name string, pn *pathNode) { + p.childMu.Lock() + + if opn, ok := p.childNodes[name]; ok { + p.childMu.Unlock() + panic(fmt.Sprintf("unexpected pathNode %+v with path %q", opn, name)) + } + + p.childNodes[name] = pn + p.childMu.Unlock() +} + +// removeWithName removes all references with the given name. // -// The provided function is executed after removal. +// The provided function is executed after reference removal. The only method +// it may (transitively) call on this pathNode is addChildLocked. // -// Precondition: mu must be held in a writable fashion. +// If a child pathNode for name exists, it is removed from this pathNode and +// returned by this function. Any operations on the removed tree must use this +// value. func (p *pathNode) removeWithName(name string, fn func(ref *fidRef)) *pathNode { - p.fidRefs.Range(func(key, value interface{}) bool { - if value.(string) == name { - p.fidRefs.Delete(key) - fn(key.(*fidRef)) + p.childMu.Lock() + defer p.childMu.Unlock() + + if m, ok := p.childRefs[name]; ok { + for ref := range m { + delete(m, ref) + delete(p.childRefNames, ref) + fn(ref) } - return true - }) + } - // Return the original path node. - origPathNode := p.pathNodeFor(name) - p.children.Delete(name) + // Return the original path node, if it exists. + origPathNode := p.childNodes[name] + delete(p.childNodes, name) return origPathNode } diff --git a/pkg/p9/server.go b/pkg/p9/server.go index f377a6557..b294efbb0 100644 --- a/pkg/p9/server.go +++ b/pkg/p9/server.go @@ -21,8 +21,8 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/unet" ) // Server is a 9p2000.L server. @@ -35,7 +35,7 @@ type Server struct { // These may be across different connections, but rename operations // must be serialized globally for safely. There is a single pathTree // for the entire server, and not per connection. - pathTree pathNode + pathTree *pathNode // renameMu is a global lock protecting rename operations. With this // lock, we can be certain that any given rename operation can safely @@ -49,6 +49,7 @@ type Server struct { func NewServer(attacher Attacher) *Server { return &Server{ attacher: attacher, + pathTree: newPathNode(), } } @@ -201,19 +202,16 @@ func (f *fidRef) maybeParent() *fidRef { // notifyDelete marks all fidRefs as deleted. // -// Precondition: the write lock must be held on the given pathNode. +// Precondition: this must be called via safelyWrite or safelyGlobal. func notifyDelete(pn *pathNode) { // Call on all local references. - pn.fidRefs.Range(func(key, _ interface{}) bool { - ref := key.(*fidRef) + pn.forEachChildRef(func(ref *fidRef, _ string) { atomic.StoreUint32(&ref.deleted, 1) - return true }) // Call on all subtrees. - pn.children.Range(func(_, value interface{}) bool { - notifyDelete(value.(*pathNode)) - return true + pn.forEachChildNode(func(pn *pathNode) { + notifyDelete(pn) }) } @@ -225,28 +223,26 @@ func (f *fidRef) markChildDeleted(name string) { atomic.StoreUint32(&ref.deleted, 1) }) - // Mark everything below as deleted. - notifyDelete(origPathNode) + if origPathNode != nil { + // Mark all children as deleted. + notifyDelete(origPathNode) + } } // notifyNameChange calls the relevant Renamed method on all nodes in the path, // recursively. Note that this applies only for subtrees, as these // notifications do not apply to the actual file whose name has changed. // -// Precondition: the write lock must be held on the given pathNode. +// Precondition: this must be called via safelyGlobal. func notifyNameChange(pn *pathNode) { // Call on all local references. - pn.fidRefs.Range(func(key, value interface{}) bool { - ref := key.(*fidRef) - name := value.(string) + pn.forEachChildRef(func(ref *fidRef, name string) { ref.file.Renamed(ref.parent.file, name) - return true }) // Call on all subtrees. - pn.children.Range(func(_, value interface{}) bool { - notifyNameChange(value.(*pathNode)) - return true + pn.forEachChildNode(func(pn *pathNode) { + notifyNameChange(pn) }) } @@ -256,18 +252,25 @@ func notifyNameChange(pn *pathNode) { func (f *fidRef) renameChildTo(oldName string, target *fidRef, newName string) { target.markChildDeleted(newName) origPathNode := f.pathNode.removeWithName(oldName, func(ref *fidRef) { + // N.B. DecRef can take f.pathNode's parent's childMu. This is + // allowed because renameMu is held for write via safelyGlobal. ref.parent.DecRef() // Drop original reference. ref.parent = target // Change parent. ref.parent.IncRef() // Acquire new one. - target.pathNode.addChild(ref, newName) + if f.pathNode == target.pathNode { + target.pathNode.addChildLocked(ref, newName) + } else { + target.pathNode.addChild(ref, newName) + } ref.file.Renamed(target.file, newName) }) - // Replace the previous (now deleted) path node. - f.pathNode.children.Store(newName, origPathNode) - - // Call Renamed on everything above. - notifyNameChange(origPathNode) + if origPathNode != nil { + // Replace the previous (now deleted) path node. + target.pathNode.addPathNodeFor(newName, origPathNode) + // Call Renamed on all children. + notifyNameChange(origPathNode) + } } // safelyRead executes the given operation with the local path node locked. @@ -275,8 +278,8 @@ func (f *fidRef) renameChildTo(oldName string, target *fidRef, newName string) { func (f *fidRef) safelyRead(fn func() error) (err error) { f.server.renameMu.RLock() defer f.server.renameMu.RUnlock() - f.pathNode.mu.RLock() - defer f.pathNode.mu.RUnlock() + f.pathNode.opMu.RLock() + defer f.pathNode.opMu.RUnlock() return fn() } @@ -285,8 +288,8 @@ func (f *fidRef) safelyRead(fn func() error) (err error) { func (f *fidRef) safelyWrite(fn func() error) (err error) { f.server.renameMu.RLock() defer f.server.renameMu.RUnlock() - f.pathNode.mu.Lock() - defer f.pathNode.mu.Unlock() + f.pathNode.opMu.Lock() + defer f.pathNode.opMu.Unlock() return fn() } diff --git a/pkg/p9/transport.go b/pkg/p9/transport.go index ef59077ff..5648df589 100644 --- a/pkg/p9/transport.go +++ b/pkg/p9/transport.go @@ -22,9 +22,9 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/unet" ) // ErrSocket is returned in cases of a socket issue. diff --git a/pkg/p9/transport_test.go b/pkg/p9/transport_test.go index 0f88ff249..cdb3bc841 100644 --- a/pkg/p9/transport_test.go +++ b/pkg/p9/transport_test.go @@ -19,8 +19,8 @@ import ( "os" "testing" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/unet" ) const ( @@ -179,6 +179,51 @@ func TestSendClosed(t *testing.T) { } } +func BenchmarkSendRecv(b *testing.B) { + server, client, err := unet.SocketPair(false) + if err != nil { + b.Fatalf("socketpair got err %v expected nil", err) + } + defer server.Close() + defer client.Close() + + // Exchange Rflush messages since these contain no data and therefore incur + // no additional marshaling overhead. + go func() { + for i := 0; i < b.N; i++ { + tag, m, err := recv(server, maximumLength, msgRegistry.get) + if err != nil { + b.Fatalf("recv got err %v expected nil", err) + } + if tag != Tag(1) { + b.Fatalf("got tag %v expected 1", tag) + } + if _, ok := m.(*Rflush); !ok { + b.Fatalf("got message %T expected *Rflush", m) + } + if err := send(server, Tag(2), &Rflush{}); err != nil { + b.Fatalf("send got err %v expected nil", err) + } + } + }() + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := send(client, Tag(1), &Rflush{}); err != nil { + b.Fatalf("send got err %v expected nil", err) + } + tag, m, err := recv(client, maximumLength, msgRegistry.get) + if err != nil { + b.Fatalf("recv got err %v expected nil", err) + } + if tag != Tag(2) { + b.Fatalf("got tag %v expected 2", tag) + } + if _, ok := m.(*Rflush); !ok { + b.Fatalf("got message %v expected *Rflush", m) + } + } +} + func init() { msgRegistry.register(MsgTypeBadDecode, func() message { return &badDecode{} }) } diff --git a/pkg/procid/BUILD b/pkg/procid/BUILD index 7c22b763a..697e7a2f4 100644 --- a/pkg/procid/BUILD +++ b/pkg/procid/BUILD @@ -9,7 +9,7 @@ go_library( "procid_amd64.s", "procid_arm64.s", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/procid", + importpath = "gvisor.dev/gvisor/pkg/procid", visibility = ["//visibility:public"], ) diff --git a/pkg/rand/BUILD b/pkg/rand/BUILD index 4eec3a4dd..f4f2001f3 100644 --- a/pkg/rand/BUILD +++ b/pkg/rand/BUILD @@ -8,7 +8,7 @@ go_library( "rand.go", "rand_linux.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/rand", + importpath = "gvisor.dev/gvisor/pkg/rand", visibility = ["//:sandbox"], deps = ["@org_golang_x_sys//unix:go_default_library"], ) diff --git a/pkg/refs/BUILD b/pkg/refs/BUILD index fc562f821..9c08452fc 100644 --- a/pkg/refs/BUILD +++ b/pkg/refs/BUILD @@ -22,8 +22,11 @@ go_library( "refcounter_state.go", "weak_ref_list.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/refs", + importpath = "gvisor.dev/gvisor/pkg/refs", visibility = ["//:sandbox"], + deps = [ + "//pkg/log", + ], ) go_test( diff --git a/pkg/refs/refcounter.go b/pkg/refs/refcounter.go index 20f515391..6ecbace9e 100644 --- a/pkg/refs/refcounter.go +++ b/pkg/refs/refcounter.go @@ -17,9 +17,14 @@ package refs import ( + "bytes" + "fmt" "reflect" + "runtime" "sync" "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" ) // RefCounter is the interface to be implemented by objects that are reference @@ -189,6 +194,16 @@ type AtomicRefCount struct { // how these fields are used. refCount int64 + // name is the name of the type which owns this ref count. + // + // name is immutable after EnableLeakCheck is called. + name string + + // stack optionally records the caller of EnableLeakCheck. + // + // stack is immutable after EnableLeakCheck is called. + stack []uintptr + // mu protects the list below. mu sync.Mutex `state:"nosave"` @@ -196,6 +211,148 @@ type AtomicRefCount struct { weakRefs weakRefList `state:"nosave"` } +// LeakMode configures the leak checker. +type LeakMode uint32 + +const ( + // uninitializedLeakChecking indicates that the leak checker has not yet been initialized. + uninitializedLeakChecking LeakMode = iota + + // NoLeakChecking indicates that no effort should be made to check for + // leaks. + NoLeakChecking + + // LeaksLogWarning indicates that a warning should be logged when leaks + // are found. + LeaksLogWarning + + // LeaksLogTraces indicates that a trace collected during allocation + // should be logged when leaks are found. + LeaksLogTraces +) + +// leakMode stores the current mode for the reference leak checker. +// +// Values must be one of the LeakMode values. +// +// leakMode must be accessed atomically. +var leakMode uint32 + +// SetLeakMode configures the reference leak checker. +func SetLeakMode(mode LeakMode) { + atomic.StoreUint32(&leakMode, uint32(mode)) +} + +const maxStackFrames = 40 + +type fileLine struct { + file string + line int +} + +// A stackKey is a representation of a stack frame for use as a map key. +// +// The fileLine type is used as PC values seem to vary across collections, even +// for the same call stack. +type stackKey [maxStackFrames]fileLine + +var stackCache = struct { + sync.Mutex + entries map[stackKey][]uintptr +}{entries: map[stackKey][]uintptr{}} + +func makeStackKey(pcs []uintptr) stackKey { + frames := runtime.CallersFrames(pcs) + var key stackKey + keySlice := key[:0] + for { + frame, more := frames.Next() + keySlice = append(keySlice, fileLine{frame.File, frame.Line}) + + if !more || len(keySlice) == len(key) { + break + } + } + return key +} + +func recordStack() []uintptr { + pcs := make([]uintptr, maxStackFrames) + n := runtime.Callers(1, pcs) + if n == 0 { + // No pcs available. Stop now. + // + // This can happen if the first argument to runtime.Callers + // is large. + return nil + } + pcs = pcs[:n] + key := makeStackKey(pcs) + stackCache.Lock() + v, ok := stackCache.entries[key] + if !ok { + // Reallocate to prevent pcs from escaping. + v = append([]uintptr(nil), pcs...) + stackCache.entries[key] = v + } + stackCache.Unlock() + return v +} + +func formatStack(pcs []uintptr) string { + frames := runtime.CallersFrames(pcs) + var trace bytes.Buffer + for { + frame, more := frames.Next() + fmt.Fprintf(&trace, "%s:%d: %s\n", frame.File, frame.Line, frame.Function) + + if !more { + break + } + } + return trace.String() +} + +func (r *AtomicRefCount) finalize() { + var note string + switch LeakMode(atomic.LoadUint32(&leakMode)) { + case NoLeakChecking: + return + case uninitializedLeakChecking: + note = "(Leak checker uninitialized): " + } + if n := r.ReadRefs(); n != 0 { + msg := fmt.Sprintf("%sAtomicRefCount %p owned by %q garbage collected with ref count of %d (want 0)", note, r, r.name, n) + if len(r.stack) != 0 { + msg += ":\nCaller:\n" + formatStack(r.stack) + } + log.Warningf(msg) + } +} + +// EnableLeakCheck checks for reference leaks when the AtomicRefCount gets +// garbage collected. +// +// This function adds a finalizer to the AtomicRefCount, so the AtomicRefCount +// must be at the beginning of its parent. +// +// name is a friendly name that will be listed as the owner of the +// AtomicRefCount in logs. It should be the name of the parent type, including +// package. +func (r *AtomicRefCount) EnableLeakCheck(name string) { + if name == "" { + panic("invalid name") + } + switch LeakMode(atomic.LoadUint32(&leakMode)) { + case NoLeakChecking: + return + case LeaksLogTraces: + r.stack = recordStack() + } + r.name = name + runtime.SetFinalizer(r, (*AtomicRefCount).finalize) +} + // ReadRefs returns the current number of references. The returned count is // inherently racy and is unsafe to use without external synchronization. func (r *AtomicRefCount) ReadRefs() int64 { @@ -208,6 +365,8 @@ func (r *AtomicRefCount) ReadRefs() int64 { // // The sanity check here is limited to real references, since if they have // dropped beneath zero then the object should have been destroyed. +// +//go:nosplit func (r *AtomicRefCount) IncRef() { if v := atomic.AddInt64(&r.refCount, 1); v <= 0 { panic("Incrementing non-positive ref count") @@ -222,6 +381,8 @@ func (r *AtomicRefCount) IncRef() { // To do this safely without a loop, a speculative reference is first acquired // on the object. This allows multiple concurrent TryIncRef calls to // distinguish other TryIncRef calls from genuine references held. +// +//go:nosplit func (r *AtomicRefCount) TryIncRef() bool { const speculativeRef = 1 << 32 v := atomic.AddInt64(&r.refCount, speculativeRef) @@ -263,6 +424,7 @@ func (r *AtomicRefCount) dropWeakRef(w *WeakRef) { // B: DecRef [real decrease] // A: TryIncRef [transform speculative to real] // +//go:nosplit func (r *AtomicRefCount) DecRefWithDestructor(destroy func()) { switch v := atomic.AddInt64(&r.refCount, -1); { case v < -1: @@ -298,6 +460,8 @@ func (r *AtomicRefCount) DecRefWithDestructor(destroy func()) { } // DecRef decrements this object's reference count. +// +//go:nosplit func (r *AtomicRefCount) DecRef() { r.DecRefWithDestructor(nil) } diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD index 2a59ebbce..d1024e49d 100644 --- a/pkg/seccomp/BUILD +++ b/pkg/seccomp/BUILD @@ -27,7 +27,7 @@ go_library( "seccomp_rules.go", "seccomp_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/seccomp", + importpath = "gvisor.dev/gvisor/pkg/seccomp", visibility = ["//visibility:public"], deps = [ "//pkg/abi/linux", diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index cc142a497..c7503f2cc 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -20,9 +20,9 @@ import ( "reflect" "sort" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/bpf" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/log" ) const ( @@ -244,7 +244,7 @@ func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, action linux.BPFAc return nil } -// buildBSTProgram converts a binary tree started in 'root' into BPF code. The ouline of the code +// buildBSTProgram converts a binary tree started in 'root' into BPF code. The outline of the code // is as follows: // // // SYS_PIPE(22), root diff --git a/pkg/seccomp/seccomp_amd64.go b/pkg/seccomp/seccomp_amd64.go index 02dfb8d9f..00bf332c1 100644 --- a/pkg/seccomp/seccomp_amd64.go +++ b/pkg/seccomp/seccomp_amd64.go @@ -17,7 +17,7 @@ package seccomp import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) const ( diff --git a/pkg/seccomp/seccomp_arm64.go b/pkg/seccomp/seccomp_arm64.go index b575bcdbf..b62133f21 100644 --- a/pkg/seccomp/seccomp_arm64.go +++ b/pkg/seccomp/seccomp_arm64.go @@ -17,7 +17,7 @@ package seccomp import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) const ( diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go index 47ecac6f7..353686ed3 100644 --- a/pkg/seccomp/seccomp_test.go +++ b/pkg/seccomp/seccomp_test.go @@ -27,9 +27,9 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/bpf" ) type seccompData struct { diff --git a/pkg/seccomp/seccomp_test_victim.go b/pkg/seccomp/seccomp_test_victim.go index afc2f755f..62ae1fd9f 100644 --- a/pkg/seccomp/seccomp_test_victim.go +++ b/pkg/seccomp/seccomp_test_victim.go @@ -22,7 +22,7 @@ import ( "os" "syscall" - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) func main() { diff --git a/pkg/seccomp/seccomp_unsafe.go b/pkg/seccomp/seccomp_unsafe.go index ebb6397e8..0a3d92854 100644 --- a/pkg/seccomp/seccomp_unsafe.go +++ b/pkg/seccomp/seccomp_unsafe.go @@ -18,7 +18,7 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) // sockFprog is sock_fprog taken from <linux/filter.h>. diff --git a/pkg/secio/BUILD b/pkg/secio/BUILD index 2b4b87c61..f38fb39f3 100644 --- a/pkg/secio/BUILD +++ b/pkg/secio/BUILD @@ -8,7 +8,7 @@ go_library( "full_reader.go", "secio.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/secio", + importpath = "gvisor.dev/gvisor/pkg/secio", visibility = ["//pkg/sentry:internal"], ) diff --git a/pkg/segment/set.go b/pkg/segment/set.go index 982eb3fdd..03e4f258f 100644 --- a/pkg/segment/set.go +++ b/pkg/segment/set.go @@ -1288,7 +1288,7 @@ func (s *Set) String() string { return s.root.String() } -// String stringifes a node (and all of its children) for debugging. +// String stringifies a node (and all of its children) for debugging. func (n *node) String() string { var buf bytes.Buffer n.writeDebugString(&buf, "") diff --git a/pkg/segment/test/BUILD b/pkg/segment/test/BUILD index 81e929b8c..694486296 100644 --- a/pkg/segment/test/BUILD +++ b/pkg/segment/test/BUILD @@ -38,7 +38,7 @@ go_library( "int_set.go", "set_functions.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/segment/segment", + importpath = "gvisor.dev/gvisor/pkg/segment/segment", deps = [ "//pkg/state", ], diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD index 0c044bc33..7aace2d7b 100644 --- a/pkg/sentry/arch/BUILD +++ b/pkg/sentry/arch/BUILD @@ -21,7 +21,7 @@ go_library( "stack.go", "syscalls_amd64.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/arch", + importpath = "gvisor.dev/gvisor/pkg/sentry/arch", visibility = ["//:sandbox"], deps = [ ":registers_go_proto", @@ -44,7 +44,7 @@ proto_library( go_proto_library( name = "registers_go_proto", - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto", + importpath = "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto", proto = ":registers_proto", visibility = ["//visibility:public"], ) diff --git a/pkg/sentry/arch/arch.go b/pkg/sentry/arch/arch.go index 53f0c9018..ace7d5b18 100644 --- a/pkg/sentry/arch/arch.go +++ b/pkg/sentry/arch/arch.go @@ -20,11 +20,11 @@ import ( "fmt" "io" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // Arch describes an architecture. diff --git a/pkg/sentry/arch/arch_amd64.go b/pkg/sentry/arch/arch_amd64.go index 135c2ee1f..9e7db8b30 100644 --- a/pkg/sentry/arch/arch_amd64.go +++ b/pkg/sentry/arch/arch_amd64.go @@ -22,10 +22,10 @@ import ( "math/rand" "syscall" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // Host specifies the host architecture. diff --git a/pkg/sentry/arch/arch_state_x86.go b/pkg/sentry/arch/arch_state_x86.go index bb52d8db0..9061fcc86 100644 --- a/pkg/sentry/arch/arch_state_x86.go +++ b/pkg/sentry/arch/arch_state_x86.go @@ -18,8 +18,8 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // ErrFloatingPoint indicates a failed restore due to unusable floating point diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go index 4d167ce98..9294ac773 100644 --- a/pkg/sentry/arch/arch_x86.go +++ b/pkg/sentry/arch/arch_x86.go @@ -22,12 +22,12 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/log" - rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/log" + rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // System-related constants for x86. diff --git a/pkg/sentry/arch/auxv.go b/pkg/sentry/arch/auxv.go index 80c923103..4546b2ef9 100644 --- a/pkg/sentry/arch/auxv.go +++ b/pkg/sentry/arch/auxv.go @@ -15,7 +15,7 @@ package arch import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // An AuxEntry represents an entry in an ELF auxiliary vector. diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go index aa030fd70..febd6f9b9 100644 --- a/pkg/sentry/arch/signal_amd64.go +++ b/pkg/sentry/arch/signal_amd64.go @@ -21,9 +21,9 @@ import ( "math" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // SignalAct represents the action that should be taken when a signal is diff --git a/pkg/sentry/arch/signal_stack.go b/pkg/sentry/arch/signal_stack.go index a442f9fdc..5a3228113 100644 --- a/pkg/sentry/arch/signal_stack.go +++ b/pkg/sentry/arch/signal_stack.go @@ -17,7 +17,7 @@ package arch import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) const ( diff --git a/pkg/sentry/arch/stack.go b/pkg/sentry/arch/stack.go index 7e6324e82..7472c3c61 100644 --- a/pkg/sentry/arch/stack.go +++ b/pkg/sentry/arch/stack.go @@ -18,8 +18,8 @@ import ( "encoding/binary" "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // Stack is a simple wrapper around a usermem.IO and an address. diff --git a/pkg/sentry/context/BUILD b/pkg/sentry/context/BUILD index a3c8d0177..8dc1a77b1 100644 --- a/pkg/sentry/context/BUILD +++ b/pkg/sentry/context/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "context", srcs = ["context.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/context", + importpath = "gvisor.dev/gvisor/pkg/sentry/context", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/amutex", diff --git a/pkg/sentry/context/context.go b/pkg/sentry/context/context.go index d70f3a5c3..dfd62cbdb 100644 --- a/pkg/sentry/context/context.go +++ b/pkg/sentry/context/context.go @@ -16,8 +16,8 @@ package context import ( - "gvisor.googlesource.com/gvisor/pkg/amutex" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/amutex" + "gvisor.dev/gvisor/pkg/log" ) type contextID int diff --git a/pkg/sentry/context/contexttest/BUILD b/pkg/sentry/context/contexttest/BUILD index d17b1bdcf..3b6841b7e 100644 --- a/pkg/sentry/context/contexttest/BUILD +++ b/pkg/sentry/context/contexttest/BUILD @@ -6,7 +6,7 @@ go_library( name = "contexttest", testonly = 1, srcs = ["contexttest.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest", + importpath = "gvisor.dev/gvisor/pkg/sentry/context/contexttest", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/memutil", diff --git a/pkg/sentry/context/contexttest/contexttest.go b/pkg/sentry/context/contexttest/contexttest.go index 83da40711..15cf086a9 100644 --- a/pkg/sentry/context/contexttest/contexttest.go +++ b/pkg/sentry/context/contexttest/contexttest.go @@ -21,15 +21,15 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/memutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace" - "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/memutil" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/platform/ptrace" + "gvisor.dev/gvisor/pkg/sentry/uniqueid" ) // Context returns a Context that may be used in tests. Uses ptrace as the @@ -59,6 +59,7 @@ func Context(tb testing.TB) context.Context { l: limits.NewLimitSet(), mf: mf, platform: p, + creds: auth.NewAnonymousCredentials(), otherValues: make(map[interface{}]interface{}), } } @@ -70,6 +71,7 @@ type TestContext struct { l *limits.LimitSet mf *pgalloc.MemoryFile platform platform.Platform + creds *auth.Credentials otherValues map[interface{}]interface{} } @@ -108,6 +110,8 @@ func (t *TestContext) RegisterValue(key, value interface{}) { // Value implements context.Context. func (t *TestContext) Value(key interface{}) interface{} { switch key { + case auth.CtxCredentials: + return t.creds case limits.CtxLimits: return t.l case pgalloc.CtxMemoryFile: diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD index 5052bcc0d..bf802d1b6 100644 --- a/pkg/sentry/control/BUILD +++ b/pkg/sentry/control/BUILD @@ -6,11 +6,12 @@ go_library( name = "control", srcs = [ "control.go", + "logging.go", "pprof.go", "proc.go", "state.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/control", + importpath = "gvisor.dev/gvisor/pkg/sentry/control", visibility = [ "//pkg/sentry:internal", ], @@ -22,12 +23,13 @@ go_library( "//pkg/sentry/fs/host", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/time", "//pkg/sentry/limits", "//pkg/sentry/state", + "//pkg/sentry/strace", "//pkg/sentry/usage", "//pkg/sentry/watchdog", + "//pkg/tcpip/link/sniffer", "//pkg/urpc", ], ) diff --git a/pkg/sentry/control/logging.go b/pkg/sentry/control/logging.go new file mode 100644 index 000000000..811f24324 --- /dev/null +++ b/pkg/sentry/control/logging.go @@ -0,0 +1,136 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package control + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/strace" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" +) + +// LoggingArgs are the arguments to use for changing the logging +// level and strace list. +type LoggingArgs struct { + // SetLevel is a flag used to indicate that we should update + // the logging level. We should be able to change the strace + // list without affecting the logging level and vice versa. + SetLevel bool + + // Level is the log level that will be set if SetLevel is true. + Level log.Level + + // SetLogPackets indicates that we should update the log packets flag. + SetLogPackets bool + + // LogPackets is the actual value to set for LogPackets. + // SetLogPackets must be enabled to indicate that we're changing + // the value. + LogPackets bool + + // SetStrace is a flag used to indicate that strace related + // arguments were passed in. + SetStrace bool + + // EnableStrace is a flag from the CLI that specifies whether to + // enable strace at all. If this flag is false then a completely + // pristine copy of the syscall table will be swapped in. This + // approach is used to remain consistent with an empty strace + // whitelist meaning trace all system calls. + EnableStrace bool + + // Strace is the whitelist of syscalls to trace to log. If this + // and StraceEventWhitelist are empty trace all system calls. + StraceWhitelist []string + + // SetEventStrace is a flag used to indicate that event strace + // related arguments were passed in. + SetEventStrace bool + + // StraceEventWhitelist is the whitelist of syscalls to trace + // to event log. + StraceEventWhitelist []string +} + +// Logging provides functions related to logging. +type Logging struct{} + +// Change will change the log level and strace arguments. Although +// this functions signature requires an error it never acctually +// return san error. It's required by the URPC interface. +// Additionally, it may look odd that this is the only method +// attached to an empty struct but this is also part of how +// URPC dispatches. +func (l *Logging) Change(args *LoggingArgs, code *int) error { + if args.SetLevel { + // Logging uses an atomic for the level so this is thread safe. + log.SetLevel(args.Level) + } + + if args.SetLogPackets { + if args.LogPackets { + atomic.StoreUint32(&sniffer.LogPackets, 1) + } else { + atomic.StoreUint32(&sniffer.LogPackets, 0) + } + log.Infof("LogPackets set to: %v", atomic.LoadUint32(&sniffer.LogPackets)) + } + + if args.SetStrace { + if err := l.configureStrace(args); err != nil { + return fmt.Errorf("error configuring strace: %v", err) + } + } + + if args.SetEventStrace { + if err := l.configureEventStrace(args); err != nil { + return fmt.Errorf("error configuring event strace: %v", err) + } + } + + return nil +} + +func (l *Logging) configureStrace(args *LoggingArgs) error { + if args.EnableStrace { + // Install the whitelist specified. + if len(args.StraceWhitelist) > 0 { + if err := strace.Enable(args.StraceWhitelist, strace.SinkTypeLog); err != nil { + return err + } + } else { + // For convenience, if strace is enabled but whitelist + // is empty, enable everything to log. + strace.EnableAll(strace.SinkTypeLog) + } + } else { + // Uninstall all strace functions. + strace.Disable(strace.SinkTypeLog) + } + return nil +} + +func (l *Logging) configureEventStrace(args *LoggingArgs) error { + if len(args.StraceEventWhitelist) > 0 { + if err := strace.Enable(args.StraceEventWhitelist, strace.SinkTypeEvent); err != nil { + return err + } + } else { + strace.Disable(strace.SinkTypeEvent) + } + return nil +} diff --git a/pkg/sentry/control/pprof.go b/pkg/sentry/control/pprof.go index d63916600..1f78d54a2 100644 --- a/pkg/sentry/control/pprof.go +++ b/pkg/sentry/control/pprof.go @@ -21,8 +21,8 @@ import ( "runtime/trace" "sync" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/urpc" ) var errNoOutput = errors.New("no output writer provided") diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index f7f02a3e1..6ae60c5cb 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -23,16 +23,15 @@ import ( "text/tabwriter" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/urpc" ) // Proc includes task-related functions. @@ -123,9 +122,8 @@ func ExecAsync(proc *Proc, args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID // TTYFileOperations that wraps the TTY is also returned. func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, error) { // Import file descriptors. - l := limits.NewLimitSet() - fdm := proc.Kernel.NewFDMap() - defer fdm.DecRef() + fdTable := proc.Kernel.NewFDTable() + defer fdTable.DecRef() // No matter what happens, we should close all files in the FilePayload // before returning. Any files that are imported will be duped. @@ -149,9 +147,9 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI WorkingDirectory: args.WorkingDirectory, Root: args.Root, Credentials: creds, - FDMap: fdm, + FDTable: fdTable, Umask: 0022, - Limits: l, + Limits: limits.NewLimitSet(), MaxSymlinkTraversals: linux.MaxSymlinkTraversals, UTSNamespace: proc.Kernel.RootUTSNamespace(), IPCNamespace: proc.Kernel.RootIPCNamespace(), @@ -212,7 +210,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI } // Add the file to the FD map. - if err := fdm.NewFDAt(kdefs.FD(appFD), appFile, kernel.FDFlags{}, l); err != nil { + if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil { return nil, 0, nil, err } } diff --git a/pkg/sentry/control/proc_test.go b/pkg/sentry/control/proc_test.go index b7895d03c..d8ada2694 100644 --- a/pkg/sentry/control/proc_test.go +++ b/pkg/sentry/control/proc_test.go @@ -17,9 +17,9 @@ package control import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/log" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/log" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usage" ) func init() { diff --git a/pkg/sentry/control/state.go b/pkg/sentry/control/state.go index 11efcaba1..41feeffe3 100644 --- a/pkg/sentry/control/state.go +++ b/pkg/sentry/control/state.go @@ -17,11 +17,11 @@ package control import ( "errors" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/state" - "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" - "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/state" + "gvisor.dev/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/urpc" ) // ErrInvalidFiles is returned when the urpc call to Save does not include an diff --git a/pkg/sentry/device/BUILD b/pkg/sentry/device/BUILD index 4ccf0674d..7e8918722 100644 --- a/pkg/sentry/device/BUILD +++ b/pkg/sentry/device/BUILD @@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library", "go_test") go_library( name = "device", srcs = ["device.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/device", + importpath = "gvisor.dev/gvisor/pkg/sentry/device", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/abi/linux"], ) diff --git a/pkg/sentry/device/device.go b/pkg/sentry/device/device.go index 458d03b30..47945d1a7 100644 --- a/pkg/sentry/device/device.go +++ b/pkg/sentry/device/device.go @@ -22,7 +22,7 @@ import ( "sync" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) // Registry tracks all simple devices and related state on the system for diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD index 142a00840..d7259b47b 100644 --- a/pkg/sentry/fs/BUILD +++ b/pkg/sentry/fs/BUILD @@ -43,7 +43,7 @@ go_library( "splice.go", "sync.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -69,6 +69,7 @@ go_library( "//pkg/state", "//pkg/syserror", "//pkg/waiter", + "//third_party/gvsync", ], ) diff --git a/pkg/sentry/fs/README.md b/pkg/sentry/fs/README.md index f53ed3eaa..db4a1b730 100644 --- a/pkg/sentry/fs/README.md +++ b/pkg/sentry/fs/README.md @@ -69,7 +69,7 @@ Specifically this state is: - An `fs.MountManager` containing mount points. -- A `kernel.FDMap` containing pointers to open files. +- A `kernel.FDTable` containing pointers to open files. Anything else managed by the VFS that can be easily loaded into memory from a filesystem is synced back to those filesystems and is not saved. Examples are @@ -126,7 +126,7 @@ A mount point is restored in two steps: - Second, during state.Load, each `fs.MountedFilesystem` optionally searches for a mount in the `fs.RestoreEnvironment` that matches its saved device - name. The `fs.MountedFilesystem` then restablishes a pointer to the root of + name. The `fs.MountedFilesystem` then reestablishes a pointer to the root of the mounted filesystem. For example, the mount specification provides the network connection for a mounted remote filesystem client to communicate with its remote file server. The `fs.MountedFilesystem` also trivially loads @@ -158,7 +158,7 @@ Otherwise an `fs.File` restores flags, an offset, and a unique identifier (only used internally). It may use the `fs.Inode`, which it indirectly holds a reference on through the -`fs.Dirent`, to restablish an open file handle on the backing filesystem (e.g. +`fs.Dirent`, to reestablish an open file handle on the backing filesystem (e.g. to continue reading and writing). ## Overlay diff --git a/pkg/sentry/fs/anon/BUILD b/pkg/sentry/fs/anon/BUILD index 2111df2e8..ae1c9cf76 100644 --- a/pkg/sentry/fs/anon/BUILD +++ b/pkg/sentry/fs/anon/BUILD @@ -8,7 +8,7 @@ go_library( "anon.go", "device.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/anon", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/anon/anon.go b/pkg/sentry/fs/anon/anon.go index a6ea8b9e7..7323c7222 100644 --- a/pkg/sentry/fs/anon/anon.go +++ b/pkg/sentry/fs/anon/anon.go @@ -17,11 +17,11 @@ package anon import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // NewInode constructs an anonymous Inode that is not associated @@ -33,7 +33,7 @@ func NewInode(ctx context.Context) *fs.Inode { User: fs.PermMask{Read: true, Write: true}, }, linux.ANON_INODE_FS_MAGIC), } - return fs.NewInode(iops, fs.NewPseudoMountSource(), fs.StableAttr{ + return fs.NewInode(ctx, iops, fs.NewPseudoMountSource(ctx), fs.StableAttr{ Type: fs.Anonymous, DeviceID: PseudoDevice.DeviceID(), InodeID: PseudoDevice.NextIno(), diff --git a/pkg/sentry/fs/anon/device.go b/pkg/sentry/fs/anon/device.go index 5927bd11e..d9ac14956 100644 --- a/pkg/sentry/fs/anon/device.go +++ b/pkg/sentry/fs/anon/device.go @@ -15,7 +15,7 @@ package anon import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/device" ) // PseudoDevice is the device on which all anonymous inodes reside. diff --git a/pkg/sentry/fs/ashmem/BUILD b/pkg/sentry/fs/ashmem/BUILD deleted file mode 100644 index ef1c31a3e..000000000 --- a/pkg/sentry/fs/ashmem/BUILD +++ /dev/null @@ -1,65 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -go_library( - name = "ashmem", - srcs = [ - "area.go", - "device.go", - "pin_board.go", - "uint64_range.go", - "uint64_set.go", - ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ashmem", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - ], -) - -go_test( - name = "ashmem_test", - size = "small", - srcs = ["pin_board_test.go"], - embed = [":ashmem"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/usermem", - ], -) - -go_template_instance( - name = "uint64_range", - out = "uint64_range.go", - package = "ashmem", - template = "//pkg/segment:generic_range", - types = { - "T": "uint64", - }, -) - -go_template_instance( - name = "uint64_set", - out = "uint64_set.go", - package = "ashmem", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "Range", - "Value": "noValue", - "Functions": "setFunctions", - }, -) diff --git a/pkg/sentry/fs/ashmem/area.go b/pkg/sentry/fs/ashmem/area.go deleted file mode 100644 index b4b0cc08b..000000000 --- a/pkg/sentry/fs/ashmem/area.go +++ /dev/null @@ -1,308 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ashmem - -import ( - "sync" - - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" -) - -const ( - // namePrefix is the name prefix assumed and forced by the Linux implementation. - namePrefix = "dev/ashmem" - - // nameLen is the maximum name length. - nameLen = 256 -) - -// Area implements fs.FileOperations. -// -// +stateify savable -type Area struct { - fsutil.FileNoFsync `state:"nosave"` - fsutil.FileNoSplice `state:"nosave"` - fsutil.FileNoopFlush `state:"nosave"` - fsutil.FileNotDirReaddir `state:"nosave"` - fsutil.FileUseInodeUnstableAttr `state:"nosave"` - waiter.AlwaysReady `state:"nosave"` - - ad *Device - - // mu protects fields below. - mu sync.Mutex `state:"nosave"` - tmpfsFile *fs.File - name string - size uint64 - perms usermem.AccessType - pb *PinBoard -} - -// Release implements fs.FileOperations.Release. -func (a *Area) Release() { - a.mu.Lock() - defer a.mu.Unlock() - if a.tmpfsFile != nil { - a.tmpfsFile.DecRef() - a.tmpfsFile = nil - } -} - -// Seek implements fs.FileOperations.Seek. -func (a *Area) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) { - a.mu.Lock() - defer a.mu.Unlock() - if a.size == 0 { - return 0, syserror.EINVAL - } - if a.tmpfsFile == nil { - return 0, syserror.EBADF - } - return a.tmpfsFile.FileOperations.Seek(ctx, file, whence, offset) -} - -// Read implements fs.FileOperations.Read. -func (a *Area) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { - a.mu.Lock() - defer a.mu.Unlock() - if a.size == 0 { - return 0, nil - } - if a.tmpfsFile == nil { - return 0, syserror.EBADF - } - return a.tmpfsFile.FileOperations.Read(ctx, file, dst, offset) -} - -// Write implements fs.FileOperations.Write. -func (a *Area) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { - return 0, syserror.ENOSYS -} - -// ConfigureMMap implements fs.FileOperations.ConfigureMMap. -func (a *Area) ConfigureMMap(ctx context.Context, file *fs.File, opts *memmap.MMapOpts) error { - a.mu.Lock() - defer a.mu.Unlock() - if a.size == 0 { - return syserror.EINVAL - } - - if !a.perms.SupersetOf(opts.Perms) { - return syserror.EPERM - } - opts.MaxPerms = opts.MaxPerms.Intersect(a.perms) - - if a.tmpfsFile == nil { - tmpfsInodeOps := tmpfs.NewInMemoryFile(ctx, usage.Tmpfs, fs.UnstableAttr{}) - tmpfsInode := fs.NewInode(tmpfsInodeOps, fs.NewPseudoMountSource(), fs.StableAttr{}) - dirent := fs.NewDirent(tmpfsInode, namePrefix+"/"+a.name) - tmpfsFile, err := tmpfsInode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Write: true}) - // Drop the extra reference on the Dirent. - dirent.DecRef() - - if err != nil { - return err - } - - // Truncate to the size set by ASHMEM_SET_SIZE ioctl. - err = tmpfsInodeOps.Truncate(ctx, tmpfsInode, int64(a.size)) - if err != nil { - return err - } - a.tmpfsFile = tmpfsFile - a.pb = NewPinBoard() - } - - return a.tmpfsFile.ConfigureMMap(ctx, opts) -} - -// Ioctl implements fs.FileOperations.Ioctl. -func (a *Area) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { - // Switch on ioctl request. - switch args[1].Uint() { - case linux.AshmemSetNameIoctl: - name, err := usermem.CopyStringIn(ctx, io, args[2].Pointer(), nameLen-1, usermem.IOOpts{ - AddressSpaceActive: true, - }) - if err != nil { - return 0, err - } - - a.mu.Lock() - defer a.mu.Unlock() - - // Cannot set name for already mapped ashmem. - if a.tmpfsFile != nil { - return 0, syserror.EINVAL - } - a.name = name - return 0, nil - - case linux.AshmemGetNameIoctl: - a.mu.Lock() - var local []byte - if a.name != "" { - nameLen := len([]byte(a.name)) - local = make([]byte, nameLen, nameLen+1) - copy(local, []byte(a.name)) - local = append(local, 0) - } else { - nameLen := len([]byte(namePrefix)) - local = make([]byte, nameLen, nameLen+1) - copy(local, []byte(namePrefix)) - local = append(local, 0) - } - a.mu.Unlock() - - if _, err := io.CopyOut(ctx, args[2].Pointer(), local, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { - return 0, syserror.EFAULT - } - return 0, nil - - case linux.AshmemSetSizeIoctl: - a.mu.Lock() - defer a.mu.Unlock() - - // Cannot set size for already mapped ashmem. - if a.tmpfsFile != nil { - return 0, syserror.EINVAL - } - a.size = uint64(args[2].SizeT()) - return 0, nil - - case linux.AshmemGetSizeIoctl: - return uintptr(a.size), nil - - case linux.AshmemPinIoctl, linux.AshmemUnpinIoctl, linux.AshmemGetPinStatusIoctl: - // Locking and unlocking is ok since once tmpfsFile is set, it won't be nil again - // even after unmapping! Unlocking is needed in order to avoid a deadlock on - // usermem.CopyObjectIn. - - // Cannot execute pin-related ioctls before mapping. - a.mu.Lock() - if a.tmpfsFile == nil { - a.mu.Unlock() - return 0, syserror.EINVAL - } - a.mu.Unlock() - - var pin linux.AshmemPin - _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pin, usermem.IOOpts{ - AddressSpaceActive: true, - }) - if err != nil { - return 0, syserror.EFAULT - } - - a.mu.Lock() - defer a.mu.Unlock() - return a.pinOperation(pin, args[1].Uint()) - - case linux.AshmemPurgeAllCachesIoctl: - return 0, nil - - case linux.AshmemSetProtMaskIoctl: - prot := uint64(args[2].ModeT()) - perms := usermem.AccessType{ - Read: prot&linux.PROT_READ != 0, - Write: prot&linux.PROT_WRITE != 0, - Execute: prot&linux.PROT_EXEC != 0, - } - - a.mu.Lock() - defer a.mu.Unlock() - - // Can only narrow prot mask. - if !a.perms.SupersetOf(perms) { - return 0, syserror.EINVAL - } - - // TODO(b/30946773,gvisor.dev/issue/153): If personality flag - // READ_IMPLIES_EXEC is set, set PROT_EXEC if PORT_READ is set. - - a.perms = perms - return 0, nil - - case linux.AshmemGetProtMaskIoctl: - return uintptr(a.perms.Prot()), nil - default: - // Ioctls irrelevant to Ashmem. - return 0, syserror.EINVAL - } -} - -// pinOperation should only be called while holding a.mu. -func (a *Area) pinOperation(pin linux.AshmemPin, op uint32) (uintptr, error) { - // Page-align a.size for checks. - pageAlignedSize, ok := usermem.Addr(a.size).RoundUp() - if !ok { - return 0, syserror.EINVAL - } - // Len 0 means everything onward. - if pin.Len == 0 { - pin.Len = uint32(pageAlignedSize) - pin.Offset - } - // Both Offset and Len have to be page-aligned. - if pin.Offset%uint32(usermem.PageSize) != 0 { - return 0, syserror.EINVAL - } - if pin.Len%uint32(usermem.PageSize) != 0 { - return 0, syserror.EINVAL - } - // Adding Offset and Len must not cause an uint32 overflow. - if end := pin.Offset + pin.Len; end < pin.Offset { - return 0, syserror.EINVAL - } - // Pin range must not exceed a's size. - if uint32(pageAlignedSize) < pin.Offset+pin.Len { - return 0, syserror.EINVAL - } - // Handle each operation. - r := RangeFromAshmemPin(pin) - switch op { - case linux.AshmemPinIoctl: - if a.pb.PinRange(r) { - return linux.AshmemWasPurged, nil - } - return linux.AshmemNotPurged, nil - - case linux.AshmemUnpinIoctl: - // TODO(b/30946773): Implement purge on unpin. - a.pb.UnpinRange(r) - return 0, nil - - case linux.AshmemGetPinStatusIoctl: - if a.pb.RangePinnedStatus(r) { - return linux.AshmemIsPinned, nil - } - return linux.AshmemIsUnpinned, nil - - default: - panic("unreachable") - } - -} diff --git a/pkg/sentry/fs/ashmem/device.go b/pkg/sentry/fs/ashmem/device.go deleted file mode 100644 index 22e1530e9..000000000 --- a/pkg/sentry/fs/ashmem/device.go +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package ashmem implements Android ashmem module (Anonymus Shared Memory). -package ashmem - -import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" -) - -// Device implements fs.InodeOperations. -// -// +stateify savable -type Device struct { - fsutil.InodeGenericChecker `state:"nosave"` - fsutil.InodeNoExtendedAttributes `state:"nosave"` - fsutil.InodeNoopAllocate `state:"nosave"` - fsutil.InodeNoopRelease `state:"nosave"` - fsutil.InodeNoopTruncate `state:"nosave"` - fsutil.InodeNoopWriteOut `state:"nosave"` - fsutil.InodeNotDirectory `state:"nosave"` - fsutil.InodeNotMappable `state:"nosave"` - fsutil.InodeNotSocket `state:"nosave"` - fsutil.InodeNotSymlink `state:"nosave"` - fsutil.InodeVirtual `state:"nosave"` - - fsutil.InodeSimpleAttributes -} - -var _ fs.InodeOperations = (*Device)(nil) - -// NewDevice creates and intializes a Device structure. -func NewDevice(ctx context.Context, owner fs.FileOwner, fp fs.FilePermissions) *Device { - return &Device{ - InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, owner, fp, linux.ANON_INODE_FS_MAGIC), - } -} - -// GetFile implements fs.InodeOperations.GetFile. -func (ad *Device) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { - return fs.NewFile(ctx, d, flags, &Area{ - ad: ad, - tmpfsFile: nil, - perms: usermem.AnyAccess, - }), nil -} diff --git a/pkg/sentry/fs/ashmem/pin_board.go b/pkg/sentry/fs/ashmem/pin_board.go deleted file mode 100644 index bdf23b371..000000000 --- a/pkg/sentry/fs/ashmem/pin_board.go +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ashmem - -import "gvisor.googlesource.com/gvisor/pkg/abi/linux" - -const maxUint64 = ^uint64(0) - -// setFunctions implements segment.Functions generated from segment.Functions for -// uint64 Key and noValue Value. For more information, see the build file and -// segment set implementation at pkg/segment/set.go. -type setFunctions struct{} - -// noValue is a type of range attached value, which is irrelevant here. -type noValue struct{} - -// MinKey implements segment.Functions.MinKey. -func (setFunctions) MinKey() uint64 { - return 0 -} - -// MaxKey implements segment.Functions.MaxKey. -func (setFunctions) MaxKey() uint64 { - return maxUint64 -} - -// ClearValue implements segment.Functions.ClearValue. -func (setFunctions) ClearValue(*noValue) { - return -} - -// Merge implements segment.Functions.Merge. -func (setFunctions) Merge(Range, noValue, Range, noValue) (noValue, bool) { - return noValue{}, true -} - -// Split implements segment.Functions.Split. -func (setFunctions) Split(Range, noValue, uint64) (noValue, noValue) { - return noValue{}, noValue{} -} - -// PinBoard represents a set of pinned ranges in ashmem. -// -// segment.Set is used for implementation where segments represent -// ranges of pinned bytes, while gaps represent ranges of unpinned -// bytes. All ranges are page-aligned. -// -// +stateify savable -type PinBoard struct { - Set -} - -// NewPinBoard creates a new pin board with all pages pinned. -func NewPinBoard() *PinBoard { - var pb PinBoard - pb.PinRange(Range{0, maxUint64}) - return &pb -} - -// PinRange pins all pages in the specified range and returns true -// if there are any newly pinned pages. -func (pb *PinBoard) PinRange(r Range) bool { - pinnedPages := false - for gap := pb.LowerBoundGap(r.Start); gap.Ok() && gap.Start() < r.End; { - common := gap.Range().Intersect(r) - if common.Length() == 0 { - gap = gap.NextGap() - continue - } - pinnedPages = true - gap = pb.Insert(gap, common, noValue{}).NextGap() - } - return pinnedPages -} - -// UnpinRange unpins all pages in the specified range. -func (pb *PinBoard) UnpinRange(r Range) { - for seg := pb.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; { - common := seg.Range().Intersect(r) - if common.Length() == 0 { - seg = seg.NextSegment() - continue - } - seg = pb.RemoveRange(common).NextSegment() - } -} - -// RangePinnedStatus returns false if there's at least one unpinned page in the -// specified range. -func (pb *PinBoard) RangePinnedStatus(r Range) bool { - for gap := pb.LowerBoundGap(r.Start); gap.Ok() && gap.Start() < r.End; { - common := gap.Range().Intersect(r) - if common.Length() == 0 { - gap = gap.NextGap() - continue - } - return false - } - return true -} - -// RangeFromAshmemPin converts ashmem's original pin structure -// to Range. -func RangeFromAshmemPin(ap linux.AshmemPin) Range { - if ap.Len == 0 { - return Range{ - uint64(ap.Offset), - maxUint64, - } - } - return Range{ - uint64(ap.Offset), - uint64(ap.Offset) + uint64(ap.Len), - } -} diff --git a/pkg/sentry/fs/ashmem/pin_board_test.go b/pkg/sentry/fs/ashmem/pin_board_test.go deleted file mode 100644 index 24f5d86d6..000000000 --- a/pkg/sentry/fs/ashmem/pin_board_test.go +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ashmem - -import ( - "testing" - - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" -) - -func TestPinBoard(t *testing.T) { - pb := NewPinBoard() - - // Confirm that all pages are pinned. - if !pb.RangePinnedStatus(RangeFromAshmemPin(linux.AshmemPin{0, 0})) { - t.Errorf("RangePinnedStatus(all pages) returned false (unpinned) at start.") - } - - // Unpin pages [1, 11) (counting from 0) - pb.UnpinRange(RangeFromAshmemPin(linux.AshmemPin{ - usermem.PageSize, - usermem.PageSize * 10, - })) - - // Confirm that pages [1, 11) are unpinned and that page 0 and pages - // larger than 10 are pinned. - pinned := []linux.AshmemPin{ - { - 0, - usermem.PageSize, - }, { - usermem.PageSize * 11, - 0, - }, - } - - for _, pin := range pinned { - if !pb.RangePinnedStatus(RangeFromAshmemPin(pin)) { - t.Errorf("RangePinnedStatus(AshmemPin{offset (pages): %v, len (pages): %v}) returned false (unpinned).", - pin.Offset, pin.Len) - } - } - - unpinned := []linux.AshmemPin{ - { - usermem.PageSize, - usermem.PageSize * 10, - }, - } - - for _, pin := range unpinned { - if pb.RangePinnedStatus(RangeFromAshmemPin(pin)) { - t.Errorf("RangePinnedStatus(AshmemPin{offset (pages): %v, len (pages): %v}) returned true (pinned).", - pin.Offset, pin.Len) - } - } - - // Pin pages [2, 6). - pb.PinRange(RangeFromAshmemPin(linux.AshmemPin{ - usermem.PageSize * 2, - usermem.PageSize * 4, - })) - - // Confirm that pages 0, [2, 6) and pages larger than 10 are pinned - // while others remain unpinned. - pinned = []linux.AshmemPin{ - { - 0, - usermem.PageSize, - }, - { - usermem.PageSize * 2, - usermem.PageSize * 4, - }, - { - usermem.PageSize * 11, - 0, - }, - } - - for _, pin := range pinned { - if !pb.RangePinnedStatus(RangeFromAshmemPin(pin)) { - t.Errorf("RangePinnedStatus(AshmemPin{offset (pages): %v, len (pages): %v}) returned false (unpinned).", - pin.Offset, pin.Len) - } - } - - unpinned = []linux.AshmemPin{ - { - usermem.PageSize, - usermem.PageSize, - }, { - usermem.PageSize * 6, - usermem.PageSize * 5, - }, - } - - for _, pin := range unpinned { - if pb.RangePinnedStatus(RangeFromAshmemPin(pin)) { - t.Errorf("RangePinnedStatus(AshmemPin{offset (pages): %v, len (pages): %v}) returned true (pinned).", - pin.Offset, pin.Len) - } - } - - // Status of a partially pinned range is unpinned. - if pb.RangePinnedStatus(RangeFromAshmemPin(linux.AshmemPin{0, 0})) { - t.Errorf("RangePinnedStatus(all pages) returned true (pinned).") - } - - // Pin the whole range again. - pb.PinRange(RangeFromAshmemPin(linux.AshmemPin{0, 0})) - - // Confirm that all pages are pinned. - if !pb.RangePinnedStatus(RangeFromAshmemPin(linux.AshmemPin{0, 0})) { - t.Errorf("RangePinnedStatus(all pages) returned false (unpinned) at start.") - } -} diff --git a/pkg/sentry/fs/attr.go b/pkg/sentry/fs/attr.go index 591e35e6a..9fc6a5bc2 100644 --- a/pkg/sentry/fs/attr.go +++ b/pkg/sentry/fs/attr.go @@ -19,11 +19,11 @@ import ( "os" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" ) // InodeType enumerates types of Inodes. @@ -89,6 +89,28 @@ func (n InodeType) String() string { } } +// LinuxType returns the linux file type for this inode type. +func (n InodeType) LinuxType() uint32 { + switch n { + case RegularFile, SpecialFile: + return linux.ModeRegular + case Directory, SpecialDirectory: + return linux.ModeDirectory + case Symlink: + return linux.ModeSymlink + case Pipe: + return linux.ModeNamedPipe + case CharacterDevice: + return linux.ModeCharacterDevice + case BlockDevice: + return linux.ModeBlockDevice + case Socket: + return linux.ModeSocket + default: + return 0 + } +} + // StableAttr contains Inode attributes that will be stable throughout the // lifetime of the Inode. // diff --git a/pkg/sentry/fs/binder/BUILD b/pkg/sentry/fs/binder/BUILD deleted file mode 100644 index 3710664d3..000000000 --- a/pkg/sentry/fs/binder/BUILD +++ /dev/null @@ -1,27 +0,0 @@ -package(licenses = ["notice"]) - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "binder", - srcs = [ - "binder.go", - ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/binder", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/syserror", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fs/binder/binder.go b/pkg/sentry/fs/binder/binder.go deleted file mode 100644 index c78f1fc40..000000000 --- a/pkg/sentry/fs/binder/binder.go +++ /dev/null @@ -1,260 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package binder implements Android Binder IPC module. -package binder - -import ( - "sync" - - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" -) - -const ( - currentProtocolVersion = 8 - - // mmapSizeLimit is the upper limit for mapped memory size in Binder. - mmapSizeLimit = 4 * 1024 * 1024 // 4MB -) - -// Device implements fs.InodeOperations. -// -// +stateify savable -type Device struct { - fsutil.InodeGenericChecker `state:"nosave"` - fsutil.InodeNoExtendedAttributes `state:"nosave"` - fsutil.InodeNoopAllocate `state:"nosave"` - fsutil.InodeNoopRelease `state:"nosave"` - fsutil.InodeNoopTruncate `state:"nosave"` - fsutil.InodeNoopWriteOut `state:"nosave"` - fsutil.InodeNotDirectory `state:"nosave"` - fsutil.InodeNotMappable `state:"nosave"` - fsutil.InodeNotSocket `state:"nosave"` - fsutil.InodeNotSymlink `state:"nosave"` - fsutil.InodeVirtual `state:"nosave"` - - fsutil.InodeSimpleAttributes -} - -var _ fs.InodeOperations = (*Device)(nil) - -// NewDevice creates and intializes a Device structure. -func NewDevice(ctx context.Context, owner fs.FileOwner, fp fs.FilePermissions) *Device { - return &Device{ - InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, owner, fp, 0), - } -} - -// GetFile implements fs.InodeOperations.GetFile. -// -// TODO(b/30946773): Add functionality to GetFile: Additional fields will be -// needed in the Device structure, initialize them here. Also, Device will need -// to keep track of the created Procs in order to implement BINDER_READ_WRITE -// ioctl. -func (bd *Device) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { - return fs.NewFile(ctx, d, flags, &Proc{ - bd: bd, - task: kernel.TaskFromContext(ctx), - mfp: pgalloc.MemoryFileProviderFromContext(ctx), - }), nil -} - -// Proc implements fs.FileOperations and fs.IoctlGetter. -// -// +stateify savable -type Proc struct { - fsutil.FileNoFsync `state:"nosave"` - fsutil.FileNoSplice `state:"nosave"` - fsutil.FileNotDirReaddir `state:"nosave"` - fsutil.FileUseInodeUnstableAttr `state:"nosave"` - waiter.AlwaysReady `state:"nosave"` - - bd *Device - task *kernel.Task - mfp pgalloc.MemoryFileProvider - - // mu protects fr. - mu sync.Mutex `state:"nosave"` - - // mapped is memory allocated from mfp.MemoryFile() by AddMapping. - mapped platform.FileRange -} - -// Release implements fs.FileOperations.Release. -func (bp *Proc) Release() { - bp.mu.Lock() - defer bp.mu.Unlock() - if bp.mapped.Length() != 0 { - bp.mfp.MemoryFile().DecRef(bp.mapped) - } -} - -// Seek implements fs.FileOperations.Seek. -// -// Binder doesn't support seek operation (unless in debug mode). -func (bp *Proc) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) { - return offset, syserror.EOPNOTSUPP -} - -// Read implements fs.FileOperations.Read. -// -// Binder doesn't support read operation (unless in debug mode). -func (bp *Proc) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { - return 0, syserror.EOPNOTSUPP -} - -// Write implements fs.FileOperations.Write. -// -// Binder doesn't support write operation. -func (bp *Proc) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { - return 0, syserror.EOPNOTSUPP -} - -// Flush implements fs.FileOperations.Flush. -// -// TODO(b/30946773): Implement. -func (bp *Proc) Flush(ctx context.Context, file *fs.File) error { - return nil -} - -// ConfigureMMap implements fs.FileOperations.ConfigureMMap. -func (bp *Proc) ConfigureMMap(ctx context.Context, file *fs.File, opts *memmap.MMapOpts) error { - // Compare drivers/android/binder.c:binder_mmap(). - if caller := kernel.TaskFromContext(ctx); caller != bp.task { - return syserror.EINVAL - } - if opts.Length > mmapSizeLimit { - opts.Length = mmapSizeLimit - } - opts.MaxPerms.Write = false - - // TODO(b/30946773): Binder sets VM_DONTCOPY, preventing the created vma - // from being copied across fork(), but we don't support this yet. As - // a result, MMs containing a Binder mapping cannot be forked (MM.Fork will - // fail when AddMapping returns EBUSY). - - return fsutil.GenericConfigureMMap(file, bp, opts) -} - -// Ioctl implements fs.FileOperations.Ioctl. -// -// TODO(b/30946773): Implement. -func (bp *Proc) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { - // Switch on ioctl request. - switch uint32(args[1].Int()) { - case linux.BinderVersionIoctl: - ver := &linux.BinderVersion{ - ProtocolVersion: currentProtocolVersion, - } - // Copy result to user-space. - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), ver, usermem.IOOpts{ - AddressSpaceActive: true, - }) - return 0, err - case linux.BinderWriteReadIoctl: - // TODO(b/30946773): Implement. - fallthrough - case linux.BinderSetIdleTimeoutIoctl: - // TODO(b/30946773): Implement. - fallthrough - case linux.BinderSetMaxThreadsIoctl: - // TODO(b/30946773): Implement. - fallthrough - case linux.BinderSetIdlePriorityIoctl: - // TODO(b/30946773): Implement. - fallthrough - case linux.BinderSetContextMgrIoctl: - // TODO(b/30946773): Implement. - fallthrough - case linux.BinderThreadExitIoctl: - // TODO(b/30946773): Implement. - return 0, syserror.ENOSYS - default: - // Ioctls irrelevant to Binder. - return 0, syserror.EINVAL - } -} - -// AddMapping implements memmap.Mappable.AddMapping. -func (bp *Proc) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, _ bool) error { - bp.mu.Lock() - defer bp.mu.Unlock() - if bp.mapped.Length() != 0 { - // mmap has been called before, which binder_mmap() doesn't like. - return syserror.EBUSY - } - // Binder only allocates and maps a single page up-front - // (drivers/android/binder.c:binder_mmap() => binder_update_page_range()). - fr, err := bp.mfp.MemoryFile().Allocate(usermem.PageSize, usage.Anonymous) - if err != nil { - return err - } - bp.mapped = fr - return nil -} - -// RemoveMapping implements memmap.Mappable.RemoveMapping. -func (*Proc) RemoveMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, uint64, bool) { - // Nothing to do. Notably, we don't free bp.mapped to allow another mmap. -} - -// CopyMapping implements memmap.Mappable.CopyMapping. -func (bp *Proc) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, _ bool) error { - // Nothing to do. Notably, this is one case where CopyMapping isn't - // equivalent to AddMapping, as AddMapping would return EBUSY. - return nil -} - -// Translate implements memmap.Mappable.Translate. -func (bp *Proc) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { - // TODO(b/30946773): In addition to the page initially allocated and mapped - // in AddMapping (Linux: binder_mmap), Binder allocates and maps pages for - // each transaction (Linux: binder_ioctl => binder_ioctl_write_read => - // binder_thread_write => binder_transaction => binder_alloc_buf => - // binder_update_page_range). Since we don't actually implement - // BinderWriteReadIoctl (Linux: BINDER_WRITE_READ), we only ever have the - // first page. - var err error - if required.End > usermem.PageSize { - err = &memmap.BusError{syserror.EFAULT} - } - if required.Start == 0 { - return []memmap.Translation{ - { - Source: memmap.MappableRange{0, usermem.PageSize}, - File: bp.mfp.MemoryFile(), - Offset: bp.mapped.Start, - Perms: usermem.AnyAccess, - }, - }, err - } - return nil, err -} - -// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. -func (bp *Proc) InvalidateUnsavable(ctx context.Context) error { - return nil -} diff --git a/pkg/sentry/fs/context.go b/pkg/sentry/fs/context.go index c80ea0175..51b4c7ee1 100644 --- a/pkg/sentry/fs/context.go +++ b/pkg/sentry/fs/context.go @@ -15,9 +15,9 @@ package fs import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ) // contextID is the fs package's type for context.Context.Value keys. diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go index 41265704c..9ac62c84d 100644 --- a/pkg/sentry/fs/copy_up.go +++ b/pkg/sentry/fs/copy_up.go @@ -19,11 +19,11 @@ import ( "io" "sync" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // copyUp copies a file in an overlay from a lower filesystem to an diff --git a/pkg/sentry/fs/copy_up_test.go b/pkg/sentry/fs/copy_up_test.go index 54810afca..1d80bf15a 100644 --- a/pkg/sentry/fs/copy_up_test.go +++ b/pkg/sentry/fs/copy_up_test.go @@ -22,10 +22,10 @@ import ( "sync" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/fs" + _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) const ( @@ -102,7 +102,7 @@ func makeOverlayTestFiles(t *testing.T) []*overlayTestFile { if err != nil { t.Fatalf("failed to mount tmpfs: %v", err) } - lowerRoot := fs.NewDirent(lower, "") + lowerRoot := fs.NewDirent(ctx, lower, "") // Make a deep set of subdirectories that everyone shares. next := lowerRoot diff --git a/pkg/sentry/fs/dentry.go b/pkg/sentry/fs/dentry.go index 7a2d4b180..6b2699f15 100644 --- a/pkg/sentry/fs/dentry.go +++ b/pkg/sentry/fs/dentry.go @@ -17,7 +17,7 @@ package fs import ( "sort" - "gvisor.googlesource.com/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/device" ) // DentAttr is the metadata of a directory entry. It is a subset of StableAttr. diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD index 6c4fdaba9..59de615fb 100644 --- a/pkg/sentry/fs/dev/BUILD +++ b/pkg/sentry/fs/dev/BUILD @@ -12,7 +12,7 @@ go_library( "null.go", "random.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/dev", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/dev", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -20,8 +20,6 @@ go_library( "//pkg/sentry/context", "//pkg/sentry/device", "//pkg/sentry/fs", - "//pkg/sentry/fs/ashmem", - "//pkg/sentry/fs/binder", "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/ramfs", "//pkg/sentry/fs/tmpfs", diff --git a/pkg/sentry/fs/dev/dev.go b/pkg/sentry/fs/dev/dev.go index 34ac01173..d4bbd9807 100644 --- a/pkg/sentry/fs/dev/dev.go +++ b/pkg/sentry/fs/dev/dev.go @@ -18,13 +18,11 @@ package dev import ( "math" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ashmem" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/binder" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // Memory device numbers are from Linux's drivers/char/mem.c @@ -40,8 +38,8 @@ const ( urandomDevMinor uint32 = 9 ) -func newCharacterDevice(iops fs.InodeOperations, msrc *fs.MountSource) *fs.Inode { - return fs.NewInode(iops, msrc, fs.StableAttr{ +func newCharacterDevice(ctx context.Context, iops fs.InodeOperations, msrc *fs.MountSource) *fs.Inode { + return fs.NewInode(ctx, iops, msrc, fs.StableAttr{ DeviceID: devDevice.DeviceID(), InodeID: devDevice.NextIno(), BlockSize: usermem.PageSize, @@ -49,8 +47,8 @@ func newCharacterDevice(iops fs.InodeOperations, msrc *fs.MountSource) *fs.Inode }) } -func newMemDevice(iops fs.InodeOperations, msrc *fs.MountSource, minor uint32) *fs.Inode { - return fs.NewInode(iops, msrc, fs.StableAttr{ +func newMemDevice(ctx context.Context, iops fs.InodeOperations, msrc *fs.MountSource, minor uint32) *fs.Inode { + return fs.NewInode(ctx, iops, msrc, fs.StableAttr{ DeviceID: devDevice.DeviceID(), InodeID: devDevice.NextIno(), BlockSize: usermem.PageSize, @@ -62,7 +60,7 @@ func newMemDevice(iops fs.InodeOperations, msrc *fs.MountSource, minor uint32) * func newDirectory(ctx context.Context, msrc *fs.MountSource) *fs.Inode { iops := ramfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555)) - return fs.NewInode(iops, msrc, fs.StableAttr{ + return fs.NewInode(ctx, iops, msrc, fs.StableAttr{ DeviceID: devDevice.DeviceID(), InodeID: devDevice.NextIno(), BlockSize: usermem.PageSize, @@ -72,7 +70,7 @@ func newDirectory(ctx context.Context, msrc *fs.MountSource) *fs.Inode { func newSymlink(ctx context.Context, target string, msrc *fs.MountSource) *fs.Inode { iops := ramfs.NewSymlink(ctx, fs.RootOwner, target) - return fs.NewInode(iops, msrc, fs.StableAttr{ + return fs.NewInode(ctx, iops, msrc, fs.StableAttr{ DeviceID: devDevice.DeviceID(), InodeID: devDevice.NextIno(), BlockSize: usermem.PageSize, @@ -81,24 +79,24 @@ func newSymlink(ctx context.Context, target string, msrc *fs.MountSource) *fs.In } // New returns the root node of a device filesystem. -func New(ctx context.Context, msrc *fs.MountSource, binderEnabled bool, ashmemEnabled bool) *fs.Inode { +func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode { contents := map[string]*fs.Inode{ "fd": newSymlink(ctx, "/proc/self/fd", msrc), "stdin": newSymlink(ctx, "/proc/self/fd/0", msrc), "stdout": newSymlink(ctx, "/proc/self/fd/1", msrc), "stderr": newSymlink(ctx, "/proc/self/fd/2", msrc), - "null": newMemDevice(newNullDevice(ctx, fs.RootOwner, 0666), msrc, nullDevMinor), - "zero": newMemDevice(newZeroDevice(ctx, fs.RootOwner, 0666), msrc, zeroDevMinor), - "full": newMemDevice(newFullDevice(ctx, fs.RootOwner, 0666), msrc, fullDevMinor), + "null": newMemDevice(ctx, newNullDevice(ctx, fs.RootOwner, 0666), msrc, nullDevMinor), + "zero": newMemDevice(ctx, newZeroDevice(ctx, fs.RootOwner, 0666), msrc, zeroDevMinor), + "full": newMemDevice(ctx, newFullDevice(ctx, fs.RootOwner, 0666), msrc, fullDevMinor), // This is not as good as /dev/random in linux because go // runtime uses sys_random and /dev/urandom internally. // According to 'man 4 random', this will be sufficient unless // application uses this to generate long-lived GPG/SSL/SSH // keys. - "random": newMemDevice(newRandomDevice(ctx, fs.RootOwner, 0444), msrc, randomDevMinor), - "urandom": newMemDevice(newRandomDevice(ctx, fs.RootOwner, 0444), msrc, urandomDevMinor), + "random": newMemDevice(ctx, newRandomDevice(ctx, fs.RootOwner, 0444), msrc, randomDevMinor), + "urandom": newMemDevice(ctx, newRandomDevice(ctx, fs.RootOwner, 0444), msrc, urandomDevMinor), "shm": tmpfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0777), msrc), @@ -118,18 +116,8 @@ func New(ctx context.Context, msrc *fs.MountSource, binderEnabled bool, ashmemEn "ptmx": newSymlink(ctx, "pts/ptmx", msrc), } - if binderEnabled { - binder := binder.NewDevice(ctx, fs.RootOwner, fs.FilePermsFromMode(0666)) - contents["binder"] = newCharacterDevice(binder, msrc) - } - - if ashmemEnabled { - ashmem := ashmem.NewDevice(ctx, fs.RootOwner, fs.FilePermsFromMode(0666)) - contents["ashmem"] = newCharacterDevice(ashmem, msrc) - } - iops := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return fs.NewInode(iops, msrc, fs.StableAttr{ + return fs.NewInode(ctx, iops, msrc, fs.StableAttr{ DeviceID: devDevice.DeviceID(), InodeID: devDevice.NextIno(), BlockSize: usermem.PageSize, diff --git a/pkg/sentry/fs/dev/device.go b/pkg/sentry/fs/dev/device.go index 9f4e41fc9..a0493474e 100644 --- a/pkg/sentry/fs/dev/device.go +++ b/pkg/sentry/fs/dev/device.go @@ -14,7 +14,7 @@ package dev -import "gvisor.googlesource.com/gvisor/pkg/sentry/device" +import "gvisor.dev/gvisor/pkg/sentry/device" // devDevice is the pseudo-filesystem device. var devDevice = device.NewAnonDevice() diff --git a/pkg/sentry/fs/dev/fs.go b/pkg/sentry/fs/dev/fs.go index 6096a40f8..55f8af704 100644 --- a/pkg/sentry/fs/dev/fs.go +++ b/pkg/sentry/fs/dev/fs.go @@ -15,19 +15,10 @@ package dev import ( - "strconv" - - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" ) -// Optional key containing boolean flag which specifies if Android Binder IPC should be enabled. -const binderEnabledKey = "binder_enabled" - -// Optional key containing boolean flag which specifies if Android ashmem should be enabled. -const ashmemEnabledKey = "ashmem_enabled" - // filesystem is a devtmpfs. // // +stateify savable @@ -39,7 +30,7 @@ func init() { fs.RegisterFilesystem(&filesystem{}) } -// FilesystemName is the name underwhich the filesystem is registered. +// FilesystemName is the name under which the filesystem is registered. // Name matches drivers/base/devtmpfs.c:dev_fs_type.name. const FilesystemName = "devtmpfs" @@ -67,33 +58,7 @@ func (*filesystem) Flags() fs.FilesystemFlags { // Mount returns a devtmpfs root that can be positioned in the vfs. func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSourceFlags, data string, _ interface{}) (*fs.Inode, error) { - // device is always ignored. // devtmpfs backed by ramfs ignores bad options. See fs/ramfs/inode.c:ramfs_parse_options. // -> we should consider parsing the mode and backing devtmpfs by this. - - // Parse generic comma-separated key=value options. - options := fs.GenericMountSourceOptions(data) - - // binerEnabledKey is optional and binder is disabled by default. - binderEnabled := false - if beStr, exists := options[binderEnabledKey]; exists { - var err error - binderEnabled, err = strconv.ParseBool(beStr) - if err != nil { - return nil, syserror.EINVAL - } - } - - // ashmemEnabledKey is optional and ashmem is disabled by default. - ashmemEnabled := false - if aeStr, exists := options[ashmemEnabledKey]; exists { - var err error - ashmemEnabled, err = strconv.ParseBool(aeStr) - if err != nil { - return nil, syserror.EINVAL - } - } - - // Construct the devtmpfs root. - return New(ctx, fs.NewNonCachingMountSource(f, flags), binderEnabled, ashmemEnabled), nil + return New(ctx, fs.NewNonCachingMountSource(ctx, f, flags)), nil } diff --git a/pkg/sentry/fs/dev/full.go b/pkg/sentry/fs/dev/full.go index 8f6c6da2d..07e0ea010 100644 --- a/pkg/sentry/fs/dev/full.go +++ b/pkg/sentry/fs/dev/full.go @@ -15,13 +15,13 @@ package dev import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // fullDevice is used to implement /dev/full. diff --git a/pkg/sentry/fs/dev/null.go b/pkg/sentry/fs/dev/null.go index 3f1accef8..4404b97ef 100644 --- a/pkg/sentry/fs/dev/null.go +++ b/pkg/sentry/fs/dev/null.go @@ -15,14 +15,14 @@ package dev import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/waiter" ) // +stateify savable @@ -97,6 +97,7 @@ func newZeroDevice(ctx context.Context, owner fs.FileOwner, mode linux.FileMode) func (zd *zeroDevice) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { flags.Pread = true flags.Pwrite = true + flags.NonSeekable = true return fs.NewFile(ctx, dirent, flags, &zeroFileOperations{}), nil } diff --git a/pkg/sentry/fs/dev/random.go b/pkg/sentry/fs/dev/random.go index e5a01a906..49cb92f6e 100644 --- a/pkg/sentry/fs/dev/random.go +++ b/pkg/sentry/fs/dev/random.go @@ -15,14 +15,14 @@ package dev import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/waiter" ) // +stateify savable diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index a0a35c242..fbca06761 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -22,13 +22,13 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/syserror" ) type globalDirentMap struct { @@ -206,7 +206,7 @@ type Dirent struct { // NewDirent returns a new root Dirent, taking the caller's reference on inode. The caller // holds the only reference to the Dirent. Parents may call hashChild to parent this Dirent. -func NewDirent(inode *Inode, name string) *Dirent { +func NewDirent(ctx context.Context, inode *Inode, name string) *Dirent { d := newDirent(inode, name) allDirents.add(d) d.userVisible = true @@ -229,11 +229,13 @@ func newDirent(inode *Inode, name string) *Dirent { if inode != nil { inode.MountSource.IncDirentRefs() } - return &Dirent{ + d := Dirent{ Inode: inode, name: name, children: make(map[string]*refs.WeakRef), } + d.EnableLeakCheck("fs.Dirent") + return &d } // NewNegativeDirent returns a new root negative Dirent. Otherwise same as NewDirent. @@ -918,7 +920,7 @@ type DirIterator interface { // calls, and must start with the given offset. // // The caller must ensure that this operation is permitted. - IterateDir(ctx context.Context, dirCtx *DirCtx, offset int) (int, error) + IterateDir(ctx context.Context, d *Dirent, dirCtx *DirCtx, offset int) (int, error) } // DirentReaddir serializes the directory entries of d including "." and "..". @@ -948,9 +950,6 @@ func direntReaddir(ctx context.Context, d *Dirent, it DirIterator, root *Dirent, if dirCtx.Serializer == nil { panic("Dirent.Readdir: serializer must not be nil") } - if d.frozen { - return d.readdirFrozen(root, offset, dirCtx) - } // Check that this is actually a directory before emitting anything. // Once we have written entries for "." and "..", future errors from @@ -959,6 +958,16 @@ func direntReaddir(ctx context.Context, d *Dirent, it DirIterator, root *Dirent, return 0, syserror.ENOTDIR } + // This is a special case for lseek(fd, 0, SEEK_END). + // See SeekWithDirCursor for more details. + if offset == FileMaxOffset { + return offset, nil + } + + if d.frozen { + return d.readdirFrozen(root, offset, dirCtx) + } + // Collect attrs for "." and "..". dot, dotdot := d.GetDotAttrs(root) @@ -981,7 +990,7 @@ func direntReaddir(ctx context.Context, d *Dirent, it DirIterator, root *Dirent, // it.IterateDir should be passed an offset that does not include the // initial dot elements. We will add them back later. offset -= 2 - newOffset, err := it.IterateDir(ctx, dirCtx, int(offset)) + newOffset, err := it.IterateDir(ctx, d, dirCtx, int(offset)) if int64(newOffset) < offset { panic(fmt.Sprintf("node.Readdir returned offset %v less than input offset %v", newOffset, offset)) } @@ -1068,7 +1077,7 @@ func (d *Dirent) mount(ctx context.Context, inode *Inode) (newChild *Dirent, err // // Note that NewDirent returns with one reference taken; the reference // is donated to the caller as the mount reference. - replacement := NewDirent(inode, d.name) + replacement := NewDirent(ctx, inode, d.name) replacement.mounted = true weakRef, ok := d.parent.hashChild(replacement) diff --git a/pkg/sentry/fs/dirent_cache.go b/pkg/sentry/fs/dirent_cache.go index 71f2d11de..60a15a275 100644 --- a/pkg/sentry/fs/dirent_cache.go +++ b/pkg/sentry/fs/dirent_cache.go @@ -146,7 +146,7 @@ func (c *DirentCache) contains(d *Dirent) bool { return c.list.Front() == d } -// Invalidate removes all Dirents from the cache, caling DecRef on each. +// Invalidate removes all Dirents from the cache, calling DecRef on each. func (c *DirentCache) Invalidate() { if c == nil { return @@ -159,7 +159,7 @@ func (c *DirentCache) Invalidate() { } // setMaxSize sets cache max size. If current size is larger than max size, the -// cache shrinks to acommodate the new max. +// cache shrinks to accommodate the new max. func (c *DirentCache) setMaxSize(max uint64) { c.mu.Lock() c.maxSize = max diff --git a/pkg/sentry/fs/dirent_refs_test.go b/pkg/sentry/fs/dirent_refs_test.go index db88d850e..884e3ff06 100644 --- a/pkg/sentry/fs/dirent_refs_test.go +++ b/pkg/sentry/fs/dirent_refs_test.go @@ -18,8 +18,8 @@ import ( "syscall" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" ) func newMockDirInode(ctx context.Context, cache *DirentCache) *Inode { @@ -31,7 +31,7 @@ func TestWalkPositive(t *testing.T) { // refs == -1 -> has been destroyed. ctx := contexttest.Context(t) - root := NewDirent(newMockDirInode(ctx, nil), "root") + root := NewDirent(ctx, newMockDirInode(ctx, nil), "root") if got := root.ReadRefs(); got != 1 { t.Fatalf("root has a ref count of %d, want %d", got, 1) @@ -73,7 +73,7 @@ func TestWalkNegative(t *testing.T) { // refs == -1 -> has been destroyed. ctx := contexttest.Context(t) - root := NewDirent(NewEmptyDir(ctx, nil), "root") + root := NewDirent(ctx, NewEmptyDir(ctx, nil), "root") mn := root.Inode.InodeOperations.(*mockInodeOperationsLookupNegative) if got := root.ReadRefs(); got != 1 { @@ -144,7 +144,7 @@ type mockInodeOperationsLookupNegative struct { func NewEmptyDir(ctx context.Context, cache *DirentCache) *Inode { m := NewMockMountSource(cache) - return NewInode(&mockInodeOperationsLookupNegative{ + return NewInode(ctx, &mockInodeOperationsLookupNegative{ MockInodeOperations: NewMockInodeOperations(ctx), }, m, StableAttr{Type: Directory}) } @@ -162,7 +162,7 @@ func TestHashNegativeToPositive(t *testing.T) { // refs == -1 -> has been destroyed. ctx := contexttest.Context(t) - root := NewDirent(NewEmptyDir(ctx, nil), "root") + root := NewDirent(ctx, NewEmptyDir(ctx, nil), "root") name := "d" _, err := root.walk(ctx, root, name, false) @@ -215,7 +215,6 @@ func TestRevalidate(t *testing.T) { // refs == 0 -> one reference. // refs == -1 -> has been destroyed. - ctx := contexttest.Context(t) for _, test := range []struct { // desc is the test's description. desc string @@ -233,7 +232,8 @@ func TestRevalidate(t *testing.T) { }, } { t.Run(test.desc, func(t *testing.T) { - root := NewDirent(NewMockInodeRevalidate(ctx, test.makeNegative), "root") + ctx := contexttest.Context(t) + root := NewDirent(ctx, NewMockInodeRevalidate(ctx, test.makeNegative), "root") name := "d" d1, err := root.walk(ctx, root, name, false) @@ -263,7 +263,7 @@ func NewMockInodeRevalidate(ctx context.Context, makeNegative bool) *Inode { mn := NewMockInodeOperations(ctx) m := NewMockMountSource(nil) m.MountSourceOperations.(*MockMountSourceOps).revalidate = true - return NewInode(&MockInodeOperationsRevalidate{MockInodeOperations: mn, makeNegative: makeNegative}, m, StableAttr{Type: Directory}) + return NewInode(ctx, &MockInodeOperationsRevalidate{MockInodeOperations: mn, makeNegative: makeNegative}, m, StableAttr{Type: Directory}) } func (m *MockInodeOperationsRevalidate) Lookup(ctx context.Context, dir *Inode, p string) (*Dirent, error) { @@ -290,12 +290,12 @@ func TestCreateExtraRefs(t *testing.T) { }{ { desc: "Create caching", - root: NewDirent(NewEmptyDir(ctx, NewDirentCache(1)), "root"), + root: NewDirent(ctx, NewEmptyDir(ctx, NewDirentCache(1)), "root"), refs: 2, }, { desc: "Create not caching", - root: NewDirent(NewEmptyDir(ctx, nil), "root"), + root: NewDirent(ctx, NewEmptyDir(ctx, nil), "root"), refs: 1, }, } { @@ -328,11 +328,11 @@ func TestRemoveExtraRefs(t *testing.T) { }{ { desc: "Remove caching", - root: NewDirent(NewEmptyDir(ctx, NewDirentCache(1)), "root"), + root: NewDirent(ctx, NewEmptyDir(ctx, NewDirentCache(1)), "root"), }, { desc: "Remove not caching", - root: NewDirent(NewEmptyDir(ctx, nil), "root"), + root: NewDirent(ctx, NewEmptyDir(ctx, nil), "root"), }, } { t.Run(test.desc, func(t *testing.T) { @@ -366,7 +366,6 @@ func TestRenameExtraRefs(t *testing.T) { // refs == 0 -> one reference. // refs == -1 -> has been destroyed. - ctx := contexttest.Context(t) for _, test := range []struct { // desc is the test's description. desc string @@ -384,10 +383,12 @@ func TestRenameExtraRefs(t *testing.T) { }, } { t.Run(test.desc, func(t *testing.T) { + ctx := contexttest.Context(t) + dirAttr := StableAttr{Type: Directory} - oldParent := NewDirent(NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "old_parent") - newParent := NewDirent(NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "new_parent") + oldParent := NewDirent(ctx, NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "old_parent") + newParent := NewDirent(ctx, NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "new_parent") renamed, err := oldParent.Walk(ctx, oldParent, "old_child") if err != nil { diff --git a/pkg/sentry/fs/dirent_state.go b/pkg/sentry/fs/dirent_state.go index 18652b809..f623d6c0e 100644 --- a/pkg/sentry/fs/dirent_state.go +++ b/pkg/sentry/fs/dirent_state.go @@ -18,7 +18,7 @@ import ( "fmt" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/refs" ) // beforeSave is invoked by stateify. diff --git a/pkg/sentry/fs/ext4/BUILD b/pkg/sentry/fs/ext4/BUILD index 9df9084c3..9dce67635 100644 --- a/pkg/sentry/fs/ext4/BUILD +++ b/pkg/sentry/fs/ext4/BUILD @@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library") go_library( name = "ext4", srcs = ["fs.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ext4", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ext4", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/context", diff --git a/pkg/sentry/fs/ext4/disklayout/BUILD b/pkg/sentry/fs/ext4/disklayout/BUILD new file mode 100644 index 000000000..d1cbab275 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/BUILD @@ -0,0 +1,40 @@ +package(licenses = ["notice"]) + +load("//tools/go_stateify:defs.bzl", "go_library", "go_test") + +go_library( + name = "disklayout", + srcs = [ + "block_group.go", + "block_group_32.go", + "block_group_64.go", + "disklayout.go", + "inode.go", + "inode_new.go", + "inode_old.go", + "superblock.go", + "superblock_32.go", + "superblock_64.go", + "superblock_old.go", + "test_utils.go", + ], + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ext4/disklayout", + deps = [ + "//pkg/abi/linux", + "//pkg/binary", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/time", + ], +) + +go_test( + name = "disklayout_test", + size = "small", + srcs = [ + "block_group_test.go", + "inode_test.go", + "superblock_test.go", + ], + embed = [":disklayout"], + deps = ["//pkg/sentry/kernel/time"], +) diff --git a/pkg/sentry/fs/ext4/disklayout/block_group.go b/pkg/sentry/fs/ext4/disklayout/block_group.go new file mode 100644 index 000000000..32ea3d97d --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/block_group.go @@ -0,0 +1,127 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +// BlockGroup represents a Linux ext block group descriptor. An ext file system +// is split into a series of block groups. This provides an access layer to +// information needed to access and use a block group. +// +// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#block-group-descriptors. +type BlockGroup interface { + // InodeTable returns the absolute block number of the block containing the + // inode table. This points to an array of Inode structs. Inode tables are + // statically allocated at mkfs time. The superblock records the number of + // inodes per group (length of this table) and the size of each inode struct. + InodeTable() uint64 + + // BlockBitmap returns the absolute block number of the block containing the + // block bitmap. This bitmap tracks the usage of data blocks within this block + // group and has its own checksum. + BlockBitmap() uint64 + + // InodeBitmap returns the absolute block number of the block containing the + // inode bitmap. This bitmap tracks the usage of this group's inode table + // entries and has its own checksum. + InodeBitmap() uint64 + + // ExclusionBitmap returns the absolute block number of the snapshot exclusion + // bitmap. + ExclusionBitmap() uint64 + + // FreeBlocksCount returns the number of free blocks in the group. + FreeBlocksCount() uint32 + + // FreeInodesCount returns the number of free inodes in the group. + FreeInodesCount() uint32 + + // DirectoryCount returns the number of inodes that represent directories + // under this block group. + DirectoryCount() uint32 + + // UnusedInodeCount returns the number of unused inodes beyond the last used + // inode in this group's inode table. As a result, we needn’t scan past the + // (InodesPerGroup - UnusedInodeCount())th entry in the inode table. + UnusedInodeCount() uint32 + + // BlockBitmapChecksum returns the block bitmap checksum. This is calculated + // using crc32c(FS UUID + group number + entire bitmap). + BlockBitmapChecksum() uint32 + + // InodeBitmapChecksum returns the inode bitmap checksum. This is calculated + // using crc32c(FS UUID + group number + entire bitmap). + InodeBitmapChecksum() uint32 + + // Checksum returns this block group's checksum. + // + // If SbMetadataCsum feature is set: + // - checksum is crc32c(FS UUID + group number + group descriptor + // structure) & 0xFFFF. + // + // If SbGdtCsum feature is set: + // - checksum is crc16(FS UUID + group number + group descriptor + // structure). + // + // SbMetadataCsum and SbGdtCsum should not be both set. + // If they are, Linux warns and asks to run fsck. + Checksum() uint16 + + // Flags returns BGFlags which represents the block group flags. + Flags() BGFlags +} + +// These are the different block group flags. +const ( + // BgInodeUninit indicates that inode table and bitmap are not initialized. + BgInodeUninit uint16 = 0x1 + + // BgBlockUninit indicates that block bitmap is not initialized. + BgBlockUninit uint16 = 0x2 + + // BgInodeZeroed indicates that inode table is zeroed. + BgInodeZeroed uint16 = 0x4 +) + +// BGFlags represents all the different combinations of block group flags. +type BGFlags struct { + InodeUninit bool + BlockUninit bool + InodeZeroed bool +} + +// ToInt converts a BGFlags struct back to its 16-bit representation. +func (f BGFlags) ToInt() uint16 { + var res uint16 + + if f.InodeUninit { + res |= BgInodeUninit + } + if f.BlockUninit { + res |= BgBlockUninit + } + if f.InodeZeroed { + res |= BgInodeZeroed + } + + return res +} + +// BGFlagsFromInt converts the 16-bit flag representation to a BGFlags struct. +func BGFlagsFromInt(flags uint16) BGFlags { + return BGFlags{ + InodeUninit: flags&BgInodeUninit > 0, + BlockUninit: flags&BgBlockUninit > 0, + InodeZeroed: flags&BgInodeZeroed > 0, + } +} diff --git a/pkg/sentry/fs/ext4/disklayout/block_group_32.go b/pkg/sentry/fs/ext4/disklayout/block_group_32.go new file mode 100644 index 000000000..3e16c76db --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/block_group_32.go @@ -0,0 +1,72 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +// BlockGroup32Bit emulates the first half of struct ext4_group_desc in +// fs/ext4/ext4.h. It is the block group descriptor struct for ext2, ext3 and +// 32-bit ext4 filesystems. It implements BlockGroup interface. +type BlockGroup32Bit struct { + BlockBitmapLo uint32 + InodeBitmapLo uint32 + InodeTableLo uint32 + FreeBlocksCountLo uint16 + FreeInodesCountLo uint16 + UsedDirsCountLo uint16 + FlagsRaw uint16 + ExcludeBitmapLo uint32 + BlockBitmapChecksumLo uint16 + InodeBitmapChecksumLo uint16 + ItableUnusedLo uint16 + ChecksumRaw uint16 +} + +// Compiles only if BlockGroup32Bit implements BlockGroup. +var _ BlockGroup = (*BlockGroup32Bit)(nil) + +// InodeTable implements BlockGroup.InodeTable. +func (bg *BlockGroup32Bit) InodeTable() uint64 { return uint64(bg.InodeTableLo) } + +// BlockBitmap implements BlockGroup.BlockBitmap. +func (bg *BlockGroup32Bit) BlockBitmap() uint64 { return uint64(bg.BlockBitmapLo) } + +// InodeBitmap implements BlockGroup.InodeBitmap. +func (bg *BlockGroup32Bit) InodeBitmap() uint64 { return uint64(bg.InodeBitmapLo) } + +// ExclusionBitmap implements BlockGroup.ExclusionBitmap. +func (bg *BlockGroup32Bit) ExclusionBitmap() uint64 { return uint64(bg.ExcludeBitmapLo) } + +// FreeBlocksCount implements BlockGroup.FreeBlocksCount. +func (bg *BlockGroup32Bit) FreeBlocksCount() uint32 { return uint32(bg.FreeBlocksCountLo) } + +// FreeInodesCount implements BlockGroup.FreeInodesCount. +func (bg *BlockGroup32Bit) FreeInodesCount() uint32 { return uint32(bg.FreeInodesCountLo) } + +// DirectoryCount implements BlockGroup.DirectoryCount. +func (bg *BlockGroup32Bit) DirectoryCount() uint32 { return uint32(bg.UsedDirsCountLo) } + +// UnusedInodeCount implements BlockGroup.UnusedInodeCount. +func (bg *BlockGroup32Bit) UnusedInodeCount() uint32 { return uint32(bg.ItableUnusedLo) } + +// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum. +func (bg *BlockGroup32Bit) BlockBitmapChecksum() uint32 { return uint32(bg.BlockBitmapChecksumLo) } + +// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum. +func (bg *BlockGroup32Bit) InodeBitmapChecksum() uint32 { return uint32(bg.InodeBitmapChecksumLo) } + +// Checksum implements BlockGroup.Checksum. +func (bg *BlockGroup32Bit) Checksum() uint16 { return bg.ChecksumRaw } + +// Flags implements BlockGroup.Flags. +func (bg *BlockGroup32Bit) Flags() BGFlags { return BGFlagsFromInt(bg.FlagsRaw) } diff --git a/pkg/sentry/fs/ext4/disklayout/block_group_64.go b/pkg/sentry/fs/ext4/disklayout/block_group_64.go new file mode 100644 index 000000000..9a809197a --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/block_group_64.go @@ -0,0 +1,93 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +// BlockGroup64Bit emulates struct ext4_group_desc in fs/ext4/ext4.h. +// It is the block group descriptor struct for 64-bit ext4 filesystems. +// It implements BlockGroup interface. It is an extension of the 32-bit +// version of BlockGroup. +type BlockGroup64Bit struct { + // We embed the 32-bit struct here because 64-bit version is just an extension + // of the 32-bit version. + BlockGroup32Bit + + // 64-bit specific fields. + BlockBitmapHi uint32 + InodeBitmapHi uint32 + InodeTableHi uint32 + FreeBlocksCountHi uint16 + FreeInodesCountHi uint16 + UsedDirsCountHi uint16 + ItableUnusedHi uint16 + ExcludeBitmapHi uint32 + BlockBitmapChecksumHi uint16 + InodeBitmapChecksumHi uint16 + _ uint32 // Padding to 64 bytes. +} + +// Compiles only if BlockGroup64Bit implements BlockGroup. +var _ BlockGroup = (*BlockGroup64Bit)(nil) + +// Methods to override. Checksum() and Flags() are not overridden. + +// InodeTable implements BlockGroup.InodeTable. +func (bg *BlockGroup64Bit) InodeTable() uint64 { + return (uint64(bg.InodeTableHi) << 32) | uint64(bg.InodeTableLo) +} + +// BlockBitmap implements BlockGroup.BlockBitmap. +func (bg *BlockGroup64Bit) BlockBitmap() uint64 { + return (uint64(bg.BlockBitmapHi) << 32) | uint64(bg.BlockBitmapLo) +} + +// InodeBitmap implements BlockGroup.InodeBitmap. +func (bg *BlockGroup64Bit) InodeBitmap() uint64 { + return (uint64(bg.InodeBitmapHi) << 32) | uint64(bg.InodeBitmapLo) +} + +// ExclusionBitmap implements BlockGroup.ExclusionBitmap. +func (bg *BlockGroup64Bit) ExclusionBitmap() uint64 { + return (uint64(bg.ExcludeBitmapHi) << 32) | uint64(bg.ExcludeBitmapLo) +} + +// FreeBlocksCount implements BlockGroup.FreeBlocksCount. +func (bg *BlockGroup64Bit) FreeBlocksCount() uint32 { + return (uint32(bg.FreeBlocksCountHi) << 16) | uint32(bg.FreeBlocksCountLo) +} + +// FreeInodesCount implements BlockGroup.FreeInodesCount. +func (bg *BlockGroup64Bit) FreeInodesCount() uint32 { + return (uint32(bg.FreeInodesCountHi) << 16) | uint32(bg.FreeInodesCountLo) +} + +// DirectoryCount implements BlockGroup.DirectoryCount. +func (bg *BlockGroup64Bit) DirectoryCount() uint32 { + return (uint32(bg.UsedDirsCountHi) << 16) | uint32(bg.UsedDirsCountLo) +} + +// UnusedInodeCount implements BlockGroup.UnusedInodeCount. +func (bg *BlockGroup64Bit) UnusedInodeCount() uint32 { + return (uint32(bg.ItableUnusedHi) << 16) | uint32(bg.ItableUnusedLo) +} + +// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum. +func (bg *BlockGroup64Bit) BlockBitmapChecksum() uint32 { + return (uint32(bg.BlockBitmapChecksumHi) << 16) | uint32(bg.BlockBitmapChecksumLo) +} + +// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum. +func (bg *BlockGroup64Bit) InodeBitmapChecksum() uint32 { + return (uint32(bg.InodeBitmapChecksumHi) << 16) | uint32(bg.InodeBitmapChecksumLo) +} diff --git a/pkg/abi/linux/binder.go b/pkg/sentry/fs/ext4/disklayout/block_group_test.go index 63b08324a..0ef4294c0 100644 --- a/pkg/abi/linux/binder.go +++ b/pkg/sentry/fs/ext4/disklayout/block_group_test.go @@ -1,4 +1,4 @@ -// Copyright 2018 The gVisor Authors. +// Copyright 2019 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,9 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -package linux +package disklayout -// BinderVersion structure is used for BINDER_VERSION ioctl. -type BinderVersion struct { - ProtocolVersion int32 +import ( + "testing" +) + +// TestBlockGroupSize tests that the block group descriptor structs are of the +// correct size. +func TestBlockGroupSize(t *testing.T) { + assertSize(t, BlockGroup32Bit{}, 32) + assertSize(t, BlockGroup64Bit{}, 64) } diff --git a/pkg/sentry/fs/ext4/disklayout/disklayout.go b/pkg/sentry/fs/ext4/disklayout/disklayout.go new file mode 100644 index 000000000..bdf4e2132 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/disklayout.go @@ -0,0 +1,50 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package disklayout provides Linux ext file system's disk level structures +// which can be directly read into from the underlying device. Structs aim to +// emulate structures `exactly` how they are layed out on disk. +// +// This library aims to be compatible with all ext(2/3/4) systems so it +// provides a generic interface for all major structures and various +// implementations (for different versions). The user code is responsible for +// using appropriate implementations based on the underlying device. +// +// Interfacing all major structures here serves a few purposes: +// - Abstracts away the complexity of the underlying structure from client +// code. The client only has to figure out versioning on set up and then +// can use these as black boxes and pass it higher up the stack. +// - Having pointer receivers forces the user to use pointers to these +// heavy structs. Hence, prevents the client code from unintentionally +// copying these by value while passing the interface around. +// - Version-based implementation selection is resolved on set up hence +// avoiding per call overhead of choosing implementation. +// - All interface methods are pretty light weight (do not take in any +// parameters by design). Passing pointer arguments to interface methods +// can lead to heap allocation as the compiler won't be able to perform +// escape analysis on an unknown implementation at compile time. +// +// Notes: +// - All fields in these structs are exported because binary.Read would +// panic otherwise. +// - All structures on disk are in little-endian order. Only jbd2 (journal) +// structures are in big-endian order. +// - All OS dependent fields in these structures will be interpretted using +// the Linux version of that field. +// - The suffix `Lo` in field names stands for lower bits of that field. +// - The suffix `Hi` in field names stands for upper bits of that field. +// - The suffix `Raw` has been added to indicate that the field is not split +// into Lo and Hi fields and also to resolve name collision with the +// respective interface. +package disklayout diff --git a/pkg/sentry/fs/ext4/disklayout/inode.go b/pkg/sentry/fs/ext4/disklayout/inode.go new file mode 100644 index 000000000..b48001910 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/inode.go @@ -0,0 +1,267 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/time" +) + +// The Inode interface must be implemented by structs representing ext inodes. +// The inode stores all the metadata pertaining to the file (except for the +// file name which is held by the directory entry). It does NOT expose all +// fields and should be extended if need be. +// +// Some file systems (e.g. FAT) use the directory entry to store all this +// information. Ext file systems do not so that they can support hard links. +// However, ext4 cheats a little bit and duplicates the file type in the +// directory entry for performance gains. +// +// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#index-nodes. +type Inode interface { + // Mode returns the linux file mode which is majorly used to extract + // information like: + // - File permissions (read/write/execute by user/group/others). + // - Sticky, set UID and GID bits. + // - File type. + // + // Masks to extract this information are provided in pkg/abi/linux/file.go. + Mode() linux.FileMode + + // UID returns the owner UID. + UID() auth.KUID + + // GID returns the owner GID. + GID() auth.KGID + + // Size returns the size of the file in bytes. + Size() uint64 + + // InodeSize returns the size of this inode struct in bytes. + // In ext2 and ext3, the inode struct and inode disk record size was fixed at + // 128 bytes. Ext4 makes it possible for the inode struct to be bigger. + // However, accessing any field beyond the 128 bytes marker must be verified + // using this method. + InodeSize() uint16 + + // AccessTime returns the last access time. Shows when the file was last read. + // + // If InExtendedAttr is set, then this should NOT be used because the + // underlying field is used to store the extended attribute value checksum. + AccessTime() time.Time + + // ChangeTime returns the last change time. Shows when the file meta data + // (like permissions) was last changed. + // + // If InExtendedAttr is set, then this should NOT be used because the + // underlying field is used to store the lower 32 bits of the attribute + // value’s reference count. + ChangeTime() time.Time + + // ModificationTime returns the last modification time. Shows when the file + // content was last modified. + // + // If InExtendedAttr is set, then this should NOT be used because + // the underlying field contains the number of the inode that owns the + // extended attribute. + ModificationTime() time.Time + + // DeletionTime returns the deletion time. Inodes are marked as deleted by + // writing to the underlying field. FS tools can restore files until they are + // actually overwritten. + DeletionTime() time.Time + + // LinksCount returns the number of hard links to this inode. + // + // Normally there is an upper limit on the number of hard links: + // - ext2/ext3 = 32,000 + // - ext4 = 65,000 + // + // This implies that an ext4 directory cannot have more than 64,998 + // subdirectories because each subdirectory will have a hard link to the + // directory via the `..` entry. The directory has hard link via the `.` entry + // of its own. And finally the inode is initiated with 1 hard link (itself). + // + // The underlying value is reset to 1 if all the following hold: + // - Inode is a directory. + // - SbDirNlink is enabled. + // - Number of hard links is incremented past 64,999. + // Hard link value of 1 for a directory would indicate that the number of hard + // links is unknown because a directory can have minimum 2 hard links (itself + // and `.` entry). + LinksCount() uint16 + + // Flags returns InodeFlags which represents the inode flags. + Flags() InodeFlags + + // Blocks returns the underlying inode.i_block array. This field is special + // and is used to store various kinds of things depending on the filesystem + // version and inode type. + // - In ext2/ext3, it contains the block map. + // - In ext4, it contains the extent tree. + // - For inline files, it contains the file contents. + // - For symlinks, it contains the link path (if it fits here). + // + // See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#the-contents-of-inode-i-block. + Blocks() [60]byte +} + +// Inode flags. This is not comprehensive and flags which were not used in +// the Linux kernel have been excluded. +const ( + // InSync indicates that all writes to the file must be synchronous. + InSync = 0x8 + + // InImmutable indicates that this file is immutable. + InImmutable = 0x10 + + // InAppend indicates that this file can only be appended to. + InAppend = 0x20 + + // InNoDump indicates that teh dump(1) utility should not dump this file. + InNoDump = 0x40 + + // InNoAccessTime indicates that the access time of this inode must not be + // updated. + InNoAccessTime = 0x80 + + // InIndex indicates that this directory has hashed indexes. + InIndex = 0x1000 + + // InJournalData indicates that file data must always be written through a + // journal device. + InJournalData = 0x4000 + + // InDirSync indicates that all the directory entiry data must be written + // synchronously. + InDirSync = 0x10000 + + // InTopDir indicates that this inode is at the top of the directory hierarchy. + InTopDir = 0x20000 + + // InHugeFile indicates that this is a huge file. + InHugeFile = 0x40000 + + // InExtents indicates that this inode uses extents. + InExtents = 0x80000 + + // InExtendedAttr indicates that this inode stores a large extended attribute + // value in its data blocks. + InExtendedAttr = 0x200000 + + // InInline indicates that this inode has inline data. + InInline = 0x10000000 + + // InReserved indicates that this inode is reserved for the ext4 library. + InReserved = 0x80000000 +) + +// InodeFlags represents all possible combinations of inode flags. It aims to +// cover the bit masks and provide a more user-friendly interface. +type InodeFlags struct { + Sync bool + Immutable bool + Append bool + NoDump bool + NoAccessTime bool + Index bool + JournalData bool + DirSync bool + TopDir bool + HugeFile bool + Extents bool + ExtendedAttr bool + Inline bool + Reserved bool +} + +// ToInt converts inode flags back to its 32-bit rep. +func (f InodeFlags) ToInt() uint32 { + var res uint32 + + if f.Sync { + res |= InSync + } + if f.Immutable { + res |= InImmutable + } + if f.Append { + res |= InAppend + } + if f.NoDump { + res |= InNoDump + } + if f.NoAccessTime { + res |= InNoAccessTime + } + if f.Index { + res |= InIndex + } + if f.JournalData { + res |= InJournalData + } + if f.DirSync { + res |= InDirSync + } + if f.TopDir { + res |= InTopDir + } + if f.HugeFile { + res |= InHugeFile + } + if f.Extents { + res |= InExtents + } + if f.ExtendedAttr { + res |= InExtendedAttr + } + if f.Inline { + res |= InInline + } + if f.Reserved { + res |= InReserved + } + + return res +} + +// InodeFlagsFromInt converts the integer representation of inode flags to +// a InodeFlags struct. +func InodeFlagsFromInt(f uint32) InodeFlags { + return InodeFlags{ + Sync: f&InSync > 0, + Immutable: f&InImmutable > 0, + Append: f&InAppend > 0, + NoDump: f&InNoDump > 0, + NoAccessTime: f&InNoAccessTime > 0, + Index: f&InIndex > 0, + JournalData: f&InJournalData > 0, + DirSync: f&InDirSync > 0, + TopDir: f&InTopDir > 0, + HugeFile: f&InHugeFile > 0, + Extents: f&InExtents > 0, + ExtendedAttr: f&InExtendedAttr > 0, + Inline: f&InInline > 0, + Reserved: f&InReserved > 0, + } +} + +// These masks define how users can view/modify inode flags. The rest of the +// flags are for internal kernel usage only. +const ( + InUserReadFlagMask = 0x4BDFFF + InUserWriteFlagMask = 0x4B80FF +) diff --git a/pkg/sentry/fs/ext4/disklayout/inode_new.go b/pkg/sentry/fs/ext4/disklayout/inode_new.go new file mode 100644 index 000000000..4f5348372 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/inode_new.go @@ -0,0 +1,96 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +import "gvisor.dev/gvisor/pkg/sentry/kernel/time" + +// InodeNew represents ext4 inode structure which can be bigger than +// OldInodeSize. The actual size of this struct should be determined using +// inode.ExtraInodeSize. Accessing any field here should be verified with the +// actual size. The extra space between the end of the inode struct and end of +// the inode record can be used to store extended attr. +// +// If the TimeExtra fields are in scope, the lower 2 bits of those are used +// to extend their counter part to be 34 bits wide; the rest (upper) 30 bits +// are used to provide nanoscond precision. Hence, these timestamps will now +// overflow in May 2446. +// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps. +type InodeNew struct { + InodeOld + + ExtraInodeSize uint16 + ChecksumHi uint16 + ChangeTimeExtra uint32 + ModificationTimeExtra uint32 + AccessTimeExtra uint32 + CreationTime uint32 + CreationTimeExtra uint32 + VersionHi uint32 + ProjectID uint32 +} + +// Compiles only if InodeNew implements Inode. +var _ Inode = (*InodeNew)(nil) + +// fromExtraTime decodes the extra time and constructs the kernel time struct +// with nanosecond precision. +func fromExtraTime(lo int32, extra uint32) time.Time { + // See description above InodeNew for format. + seconds := (int64(extra&0x3) << 32) + int64(lo) + nanoseconds := int64(extra >> 2) + return time.FromUnix(seconds, nanoseconds) +} + +// Only override methods which change due to ext4 specific fields. + +// Size implements Inode.Size. +func (in *InodeNew) Size() uint64 { + return (uint64(in.SizeHi) << 32) | uint64(in.SizeLo) +} + +// InodeSize implements Inode.InodeSize. +func (in *InodeNew) InodeSize() uint16 { + return oldInodeSize + in.ExtraInodeSize +} + +// ChangeTime implements Inode.ChangeTime. +func (in *InodeNew) ChangeTime() time.Time { + // Apply new timestamp logic if inode.ChangeTimeExtra is in scope. + if in.ExtraInodeSize >= 8 { + return fromExtraTime(in.ChangeTimeRaw, in.ChangeTimeExtra) + } + + return in.InodeOld.ChangeTime() +} + +// ModificationTime implements Inode.ModificationTime. +func (in *InodeNew) ModificationTime() time.Time { + // Apply new timestamp logic if inode.ModificationTimeExtra is in scope. + if in.ExtraInodeSize >= 12 { + return fromExtraTime(in.ModificationTimeRaw, in.ModificationTimeExtra) + } + + return in.InodeOld.ModificationTime() +} + +// AccessTime implements Inode.AccessTime. +func (in *InodeNew) AccessTime() time.Time { + // Apply new timestamp logic if inode.AccessTimeExtra is in scope. + if in.ExtraInodeSize >= 16 { + return fromExtraTime(in.AccessTimeRaw, in.AccessTimeExtra) + } + + return in.InodeOld.AccessTime() +} diff --git a/pkg/sentry/fs/ext4/disklayout/inode_old.go b/pkg/sentry/fs/ext4/disklayout/inode_old.go new file mode 100644 index 000000000..dc4c9d8e4 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/inode_old.go @@ -0,0 +1,117 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/time" +) + +const ( + // oldInodeSize is the inode size in ext2/ext3. + oldInodeSize = 128 +) + +// InodeOld implements Inode interface. It emulates ext2/ext3 inode struct. +// Inode struct size and record size are both 128 bytes for this. +// +// All fields representing time are in seconds since the epoch. Which means that +// they will overflow in January 2038. +type InodeOld struct { + ModeRaw uint16 + UIDLo uint16 + SizeLo uint32 + + // The time fields are signed integers because they could be negative to + // represent time before the epoch. + AccessTimeRaw int32 + ChangeTimeRaw int32 + ModificationTimeRaw int32 + DeletionTimeRaw int32 + + GIDLo uint16 + LinksCountRaw uint16 + BlocksCountLo uint32 + FlagsRaw uint32 + VersionLo uint32 // This is OS dependent. + BlocksRaw [60]byte + Generation uint32 + FileACLLo uint32 + SizeHi uint32 + ObsoFaddr uint32 + + // OS dependent fields have been inlined here. + BlocksCountHi uint16 + FileACLHi uint16 + UIDHi uint16 + GIDHi uint16 + ChecksumLo uint16 + _ uint16 +} + +// Compiles only if InodeOld implements Inode. +var _ Inode = (*InodeOld)(nil) + +// Mode implements Inode.Mode. +func (in *InodeOld) Mode() linux.FileMode { return linux.FileMode(in.ModeRaw) } + +// UID implements Inode.UID. +func (in *InodeOld) UID() auth.KUID { + return auth.KUID((uint32(in.UIDHi) << 16) | uint32(in.UIDLo)) +} + +// GID implements Inode.GID. +func (in *InodeOld) GID() auth.KGID { + return auth.KGID((uint32(in.GIDHi) << 16) | uint32(in.GIDLo)) +} + +// Size implements Inode.Size. +func (in *InodeOld) Size() uint64 { + // In ext2/ext3, in.SizeHi did not exist, it was instead named in.DirACL. + return uint64(in.SizeLo) +} + +// InodeSize implements Inode.InodeSize. +func (in *InodeOld) InodeSize() uint16 { return oldInodeSize } + +// AccessTime implements Inode.AccessTime. +func (in *InodeOld) AccessTime() time.Time { + return time.FromUnix(int64(in.AccessTimeRaw), 0) +} + +// ChangeTime implements Inode.ChangeTime. +func (in *InodeOld) ChangeTime() time.Time { + return time.FromUnix(int64(in.ChangeTimeRaw), 0) +} + +// ModificationTime implements Inode.ModificationTime. +func (in *InodeOld) ModificationTime() time.Time { + return time.FromUnix(int64(in.ModificationTimeRaw), 0) +} + +// DeletionTime implements Inode.DeletionTime. +func (in *InodeOld) DeletionTime() time.Time { + return time.FromUnix(int64(in.DeletionTimeRaw), 0) +} + +// LinksCount implements Inode.LinksCount. +func (in *InodeOld) LinksCount() uint16 { return in.LinksCountRaw } + +// Flags implements Inode.Flags. +func (in *InodeOld) Flags() InodeFlags { return InodeFlagsFromInt(in.FlagsRaw) } + +// Blocks implements Inode.Blocks. +func (in *InodeOld) Blocks() [60]byte { return in.BlocksRaw } diff --git a/pkg/sentry/fs/ext4/disklayout/inode_test.go b/pkg/sentry/fs/ext4/disklayout/inode_test.go new file mode 100644 index 000000000..9cae9e4f0 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/inode_test.go @@ -0,0 +1,222 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +import ( + "fmt" + "strconv" + "testing" + + "gvisor.dev/gvisor/pkg/sentry/kernel/time" +) + +// TestInodeSize tests that the inode structs are of the correct size. +func TestInodeSize(t *testing.T) { + assertSize(t, InodeOld{}, oldInodeSize) + + // This was updated from 156 bytes to 160 bytes in Oct 2015. + assertSize(t, InodeNew{}, 160) +} + +// TestTimestampSeconds tests that the seconds part of [a/c/m] timestamps in +// ext4 inode structs are decoded correctly. +// +// These tests are derived from the table under https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps. +func TestTimestampSeconds(t *testing.T) { + type timestampTest struct { + // msbSet tells if the most significant bit of InodeOld.[X]TimeRaw is set. + // If this is set then the 32-bit time is negative. + msbSet bool + + // lowerBound tells if we should take the lowest possible value of + // InodeOld.[X]TimeRaw while satisfying test.msbSet condition. If set to + // false it tells to take the highest possible value. + lowerBound bool + + // extraBits is InodeNew.[X]TimeExtra. + extraBits uint32 + + // want is the kernel time struct that is expected. + want time.Time + } + + tests := []timestampTest{ + // 1901-12-13 + { + msbSet: true, + lowerBound: true, + extraBits: 0, + want: time.FromUnix(int64(-0x80000000), 0), + }, + + // 1969-12-31 + { + msbSet: true, + lowerBound: false, + extraBits: 0, + want: time.FromUnix(int64(-1), 0), + }, + + // 1970-01-01 + { + msbSet: false, + lowerBound: true, + extraBits: 0, + want: time.FromUnix(int64(0), 0), + }, + + // 2038-01-19 + { + msbSet: false, + lowerBound: false, + extraBits: 0, + want: time.FromUnix(int64(0x7fffffff), 0), + }, + + // 2038-01-19 + { + msbSet: true, + lowerBound: true, + extraBits: 1, + want: time.FromUnix(int64(0x80000000), 0), + }, + + // 2106-02-07 + { + msbSet: true, + lowerBound: false, + extraBits: 1, + want: time.FromUnix(int64(0xffffffff), 0), + }, + + // 2106-02-07 + { + msbSet: false, + lowerBound: true, + extraBits: 1, + want: time.FromUnix(int64(0x100000000), 0), + }, + + // 2174-02-25 + { + msbSet: false, + lowerBound: false, + extraBits: 1, + want: time.FromUnix(int64(0x17fffffff), 0), + }, + + // 2174-02-25 + { + msbSet: true, + lowerBound: true, + extraBits: 2, + want: time.FromUnix(int64(0x180000000), 0), + }, + + // 2242-03-16 + { + msbSet: true, + lowerBound: false, + extraBits: 2, + want: time.FromUnix(int64(0x1ffffffff), 0), + }, + + // 2242-03-16 + { + msbSet: false, + lowerBound: true, + extraBits: 2, + want: time.FromUnix(int64(0x200000000), 0), + }, + + // 2310-04-04 + { + msbSet: false, + lowerBound: false, + extraBits: 2, + want: time.FromUnix(int64(0x27fffffff), 0), + }, + + // 2310-04-04 + { + msbSet: true, + lowerBound: true, + extraBits: 3, + want: time.FromUnix(int64(0x280000000), 0), + }, + + // 2378-04-22 + { + msbSet: true, + lowerBound: false, + extraBits: 3, + want: time.FromUnix(int64(0x2ffffffff), 0), + }, + + // 2378-04-22 + { + msbSet: false, + lowerBound: true, + extraBits: 3, + want: time.FromUnix(int64(0x300000000), 0), + }, + + // 2446-05-10 + { + msbSet: false, + lowerBound: false, + extraBits: 3, + want: time.FromUnix(int64(0x37fffffff), 0), + }, + } + + lowerMSB0 := int32(0) // binary: 00000000 00000000 00000000 00000000 + upperMSB0 := int32(0x7fffffff) // binary: 01111111 11111111 11111111 11111111 + lowerMSB1 := int32(-0x80000000) // binary: 10000000 00000000 00000000 00000000 + upperMSB1 := int32(-1) // binary: 11111111 11111111 11111111 11111111 + + get32BitTime := func(test timestampTest) int32 { + if test.msbSet { + if test.lowerBound { + return lowerMSB1 + } + + return upperMSB1 + } + + if test.lowerBound { + return lowerMSB0 + } + + return upperMSB0 + } + + getTestName := func(test timestampTest) string { + return fmt.Sprintf( + "Tests time decoding with epoch bits 0b%s and 32-bit raw time: MSB set=%t, lower bound=%t", + strconv.FormatInt(int64(test.extraBits), 2), + test.msbSet, + test.lowerBound, + ) + } + + for _, test := range tests { + t.Run(getTestName(test), func(t *testing.T) { + if got := fromExtraTime(get32BitTime(test), test.extraBits); got != test.want { + t.Errorf("Expected: %v, Got: %v", test.want, got) + } + }) + } +} diff --git a/pkg/sentry/fs/ext4/disklayout/superblock.go b/pkg/sentry/fs/ext4/disklayout/superblock.go new file mode 100644 index 000000000..e4b8f46fb --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/superblock.go @@ -0,0 +1,468 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +// SuperBlock should be implemented by structs representing the ext superblock. +// The superblock holds a lot of information about the enclosing filesystem. +// This interface aims to provide access methods to important information held +// by the superblock. It does NOT expose all fields of the superblock, only the +// ones necessary. This can be expanded when need be. +// +// Location and replication: +// - The superblock is located at offset 1024 in block group 0. +// - Redundant copies of the superblock and group descriptors are kept in +// all groups if SbSparse feature flag is NOT set. If it is set, the +// replicas only exist in groups whose group number is either 0 or a +// power of 3, 5, or 7. +// - There is also a sparse superblock feature v2 in which there are just +// two replicas saved in the block groups pointed by sb.s_backup_bgs. +// +// Replicas should eventually be updated if the superblock is updated. +// +// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#super-block. +type SuperBlock interface { + // InodesCount returns the total number of inodes in this filesystem. + InodesCount() uint32 + + // BlocksCount returns the total number of data blocks in this filesystem. + BlocksCount() uint64 + + // FreeBlocksCount returns the number of free blocks in this filesystem. + FreeBlocksCount() uint64 + + // FreeInodesCount returns the number of free inodes in this filesystem. + FreeInodesCount() uint32 + + // MountCount returns the number of mounts since the last fsck. + MountCount() uint16 + + // MaxMountCount returns the number of mounts allowed beyond which a fsck is + // needed. + MaxMountCount() uint16 + + // FirstDataBlock returns the absolute block number of the first data block, + // which contains the super block itself. + // + // If the filesystem has 1kb data blocks then this should return 1. For all + // other configurations, this typically returns 0. + // + // The first block group descriptor is in (FirstDataBlock() + 1)th block. + FirstDataBlock() uint32 + + // BlockSize returns the size of one data block in this filesystem. + // This can be calculated by 2^(10 + sb.s_log_block_size). This ensures that + // the smallest block size is 1kb. + BlockSize() uint64 + + // BlocksPerGroup returns the number of data blocks in a block group. + BlocksPerGroup() uint32 + + // ClusterSize returns block cluster size (set during mkfs time by admin). + // This can be calculated by 2^(10 + sb.s_log_cluster_size). This ensures that + // the smallest cluster size is 1kb. + // + // sb.s_log_cluster_size must equal sb.s_log_block_size if bigalloc feature + // is NOT set and consequently BlockSize() = ClusterSize() in that case. + ClusterSize() uint64 + + // ClustersPerGroup returns: + // - number of clusters per group if bigalloc is enabled. + // - BlocksPerGroup() otherwise. + ClustersPerGroup() uint32 + + // InodeSize returns the size of the inode disk record size in bytes. Use this + // to iterate over inode arrays on disk. + // + // In ext2 and ext3: + // - Each inode had a disk record of 128 bytes. + // - The inode struct size was fixed at 128 bytes. + // + // In ext4 its possible to allocate larger on-disk inodes: + // - Inode disk record size = sb.s_inode_size (function return value). + // = 256 (default) + // - Inode struct size = 128 + inode.i_extra_isize. + // = 128 + 32 = 160 (default) + InodeSize() uint16 + + // InodesPerGroup returns the number of inodes in a block group. + InodesPerGroup() uint32 + + // BgDescSize returns the size of the block group descriptor struct. + // + // In ext2, ext3, ext4 (without 64-bit feature), the block group descriptor + // is only 32 bytes long. + // In ext4 with 64-bit feature, the block group descriptor expands to AT LEAST + // 64 bytes. It might be bigger than that. + BgDescSize() uint16 + + // CompatibleFeatures returns the CompatFeatures struct which holds all the + // compatible features this fs supports. + CompatibleFeatures() CompatFeatures + + // IncompatibleFeatures returns the CompatFeatures struct which holds all the + // incompatible features this fs supports. + IncompatibleFeatures() IncompatFeatures + + // ReadOnlyCompatibleFeatures returns the CompatFeatures struct which holds all the + // readonly compatible features this fs supports. + ReadOnlyCompatibleFeatures() RoCompatFeatures + + // Magic() returns the magic signature which must be 0xef53. + Magic() uint16 + + // Revision returns the superblock revision. Superblock struct fields from + // offset 0x54 till 0x150 should only be used if superblock has DynamicRev. + Revision() SbRevision +} + +// SbRevision is the type for superblock revisions. +type SbRevision int + +// Super block revisions. +const ( + // OldRev is the good old (original) format. + OldRev SbRevision = 0 + + // DynamicRev is v2 format w/ dynamic inode sizes. + DynamicRev SbRevision = 1 +) + +// Superblock compatible features. +// This is not exhaustive, unused features are not listed. +const ( + // SbDirPrealloc indicates directory preallocation. + SbDirPrealloc = 0x1 + + // SbHasJournal indicates the presence of a journal. jbd2 should only work + // with this being set. + SbHasJournal = 0x4 + + // SbExtAttr indicates extended attributes support. + SbExtAttr = 0x8 + + // SbResizeInode indicates that the fs has reserved GDT blocks (right after + // group descriptors) for fs expansion. + SbResizeInode = 0x10 + + // SbDirIndex indicates that the fs has directory indices. + SbDirIndex = 0x20 + + // SbSparseV2 stands for Sparse superblock version 2. + SbSparseV2 = 0x200 +) + +// CompatFeatures represents a superblock's compatible feature set. If the +// kernel does not understand any of these feature, it can still read/write +// to this fs. +type CompatFeatures struct { + DirPrealloc bool + HasJournal bool + ExtAttr bool + ResizeInode bool + DirIndex bool + SparseV2 bool +} + +// ToInt converts superblock compatible features back to its 32-bit rep. +func (f CompatFeatures) ToInt() uint32 { + var res uint32 + + if f.DirPrealloc { + res |= SbDirPrealloc + } + if f.HasJournal { + res |= SbHasJournal + } + if f.ExtAttr { + res |= SbExtAttr + } + if f.ResizeInode { + res |= SbResizeInode + } + if f.DirIndex { + res |= SbDirIndex + } + if f.SparseV2 { + res |= SbSparseV2 + } + + return res +} + +// CompatFeaturesFromInt converts the integer representation of superblock +// compatible features to CompatFeatures struct. +func CompatFeaturesFromInt(f uint32) CompatFeatures { + return CompatFeatures{ + DirPrealloc: f&SbDirPrealloc > 0, + HasJournal: f&SbHasJournal > 0, + ExtAttr: f&SbExtAttr > 0, + ResizeInode: f&SbResizeInode > 0, + DirIndex: f&SbDirIndex > 0, + SparseV2: f&SbSparseV2 > 0, + } +} + +// Superblock incompatible features. +// This is not exhaustive, unused features are not listed. +const ( + // SbDirentFileType indicates that directory entries record the file type. + // We should use struct ext4_dir_entry_2 for dirents then. + SbDirentFileType = 0x2 + + // SbRecovery indicates that the filesystem needs recovery. + SbRecovery = 0x4 + + // SbJournalDev indicates that the filesystem has a separate journal device. + SbJournalDev = 0x8 + + // SbMetaBG indicates that the filesystem is using Meta block groups. Moves + // the group descriptors from the congested first block group into the first + // group of each metablock group to increase the maximum block groups limit + // and hence support much larger filesystems. + // + // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#meta-block-groups. + SbMetaBG = 0x10 + + // SbExtents indicates that the filesystem uses extents. Must be set in ext4 + // filesystems. + SbExtents = 0x40 + + // SbIs64Bit indicates that this filesystem addresses blocks with 64-bits. + // Hence can support 2^64 data blocks. + SbIs64Bit = 0x80 + + // SbMMP indicates that this filesystem has multiple mount protection. + // + // See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#multiple-mount-protection. + SbMMP = 0x100 + + // SbFlexBg indicates that this filesystem has flexible block groups. Several + // block groups are tied into one logical block group so that all the metadata + // for the block groups (bitmaps and inode tables) are close together for + // faster loading. Consequently, large files will be continuous on disk. + // However, this does not affect the placement of redundant superblocks and + // group descriptors. + // + // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#flexible-block-groups. + SbFlexBg = 0x200 + + // SbLargeDir shows that large directory enabled. Directory htree can be 3 + // levels deep. Directory htrees are allowed to be 2 levels deep otherwise. + SbLargeDir = 0x4000 + + // SbInlineData allows inline data in inodes for really small files. + SbInlineData = 0x8000 + + // SbEncrypted indicates that this fs contains encrypted inodes. + SbEncrypted = 0x10000 +) + +// IncompatFeatures represents a superblock's incompatible feature set. If the +// kernel does not understand any of these feature, it should refuse to mount. +type IncompatFeatures struct { + DirentFileType bool + Recovery bool + JournalDev bool + MetaBG bool + Extents bool + Is64Bit bool + MMP bool + FlexBg bool + LargeDir bool + InlineData bool + Encrypted bool +} + +// ToInt converts superblock incompatible features back to its 32-bit rep. +func (f IncompatFeatures) ToInt() uint32 { + var res uint32 + + if f.DirentFileType { + res |= SbDirentFileType + } + if f.Recovery { + res |= SbRecovery + } + if f.JournalDev { + res |= SbJournalDev + } + if f.MetaBG { + res |= SbMetaBG + } + if f.Extents { + res |= SbExtents + } + if f.Is64Bit { + res |= SbIs64Bit + } + if f.MMP { + res |= SbMMP + } + if f.FlexBg { + res |= SbFlexBg + } + if f.LargeDir { + res |= SbLargeDir + } + if f.InlineData { + res |= SbInlineData + } + if f.Encrypted { + res |= SbEncrypted + } + + return res +} + +// IncompatFeaturesFromInt converts the integer representation of superblock +// incompatible features to IncompatFeatures struct. +func IncompatFeaturesFromInt(f uint32) IncompatFeatures { + return IncompatFeatures{ + DirentFileType: f&SbDirentFileType > 0, + Recovery: f&SbRecovery > 0, + JournalDev: f&SbJournalDev > 0, + MetaBG: f&SbMetaBG > 0, + Extents: f&SbExtents > 0, + Is64Bit: f&SbIs64Bit > 0, + MMP: f&SbMMP > 0, + FlexBg: f&SbFlexBg > 0, + LargeDir: f&SbLargeDir > 0, + InlineData: f&SbInlineData > 0, + Encrypted: f&SbEncrypted > 0, + } +} + +// Superblock readonly compatible features. +// This is not exhaustive, unused features are not listed. +const ( + // SbSparse indicates sparse superblocks. Only groups with number either 0 or + // a power of 3, 5, or 7 will have redundant copies of the superblock and + // block descriptors. + SbSparse = 0x1 + + // SbLargeFile indicates that this fs has been used to store a file >= 2GiB. + SbLargeFile = 0x2 + + // SbHugeFile indicates that this fs contains files whose sizes are + // represented in units of logicals blocks, not 512-byte sectors. + SbHugeFile = 0x8 + + // SbGdtCsum indicates that group descriptors have checksums. + SbGdtCsum = 0x10 + + // SbDirNlink indicates that the new subdirectory limit is 64,999. Ext3 has a + // 32,000 subdirectory limit. + SbDirNlink = 0x20 + + // SbExtraIsize indicates that large inodes exist on this filesystem. + SbExtraIsize = 0x40 + + // SbHasSnapshot indicates the existence of a snapshot. + SbHasSnapshot = 0x80 + + // SbQuota enables usage tracking for all quota types. + SbQuota = 0x100 + + // SbBigalloc maps to the bigalloc feature. When set, the minimum allocation + // unit becomes a cluster rather than a data block. Then block bitmaps track + // clusters, not data blocks. + // + // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#bigalloc. + SbBigalloc = 0x200 + + // SbMetadataCsum indicates that the fs supports metadata checksumming. + SbMetadataCsum = 0x400 + + // SbReadOnly marks this filesystem as readonly. Should refuse to mount in + // read/write mode. + SbReadOnly = 0x1000 +) + +// RoCompatFeatures represents a superblock's readonly compatible feature set. +// If the kernel does not understand any of these feature, it can still mount +// readonly. But if the user wants to mount read/write, the kernel should +// refuse to mount. +type RoCompatFeatures struct { + Sparse bool + LargeFile bool + HugeFile bool + GdtCsum bool + DirNlink bool + ExtraIsize bool + HasSnapshot bool + Quota bool + Bigalloc bool + MetadataCsum bool + ReadOnly bool +} + +// ToInt converts superblock readonly compatible features to its 32-bit rep. +func (f RoCompatFeatures) ToInt() uint32 { + var res uint32 + + if f.Sparse { + res |= SbSparse + } + if f.LargeFile { + res |= SbLargeFile + } + if f.HugeFile { + res |= SbHugeFile + } + if f.GdtCsum { + res |= SbGdtCsum + } + if f.DirNlink { + res |= SbDirNlink + } + if f.ExtraIsize { + res |= SbExtraIsize + } + if f.HasSnapshot { + res |= SbHasSnapshot + } + if f.Quota { + res |= SbQuota + } + if f.Bigalloc { + res |= SbBigalloc + } + if f.MetadataCsum { + res |= SbMetadataCsum + } + if f.ReadOnly { + res |= SbReadOnly + } + + return res +} + +// RoCompatFeaturesFromInt converts the integer representation of superblock +// readonly compatible features to RoCompatFeatures struct. +func RoCompatFeaturesFromInt(f uint32) RoCompatFeatures { + return RoCompatFeatures{ + Sparse: f&SbSparse > 0, + LargeFile: f&SbLargeFile > 0, + HugeFile: f&SbHugeFile > 0, + GdtCsum: f&SbGdtCsum > 0, + DirNlink: f&SbDirNlink > 0, + ExtraIsize: f&SbExtraIsize > 0, + HasSnapshot: f&SbHasSnapshot > 0, + Quota: f&SbQuota > 0, + Bigalloc: f&SbBigalloc > 0, + MetadataCsum: f&SbMetadataCsum > 0, + ReadOnly: f&SbReadOnly > 0, + } +} diff --git a/pkg/sentry/fs/ext4/disklayout/superblock_32.go b/pkg/sentry/fs/ext4/disklayout/superblock_32.go new file mode 100644 index 000000000..587e4afaa --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/superblock_32.go @@ -0,0 +1,75 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +// SuperBlock32Bit implements SuperBlock and represents the 32-bit version of +// the ext4_super_block struct in fs/ext4/ext4.h. +type SuperBlock32Bit struct { + // We embed the old superblock struct here because the 32-bit version is just + // an extension of the old version. + SuperBlockOld + + FirstInode uint32 + InodeSizeRaw uint16 + BlockGroupNumber uint16 + FeatureCompat uint32 + FeatureIncompat uint32 + FeatureRoCompat uint32 + UUID [16]byte + VolumeName [16]byte + LastMounted [64]byte + AlgoUsageBitmap uint32 + PreallocBlocks uint8 + PreallocDirBlocks uint8 + ReservedGdtBlocks uint16 + JournalUUID [16]byte + JournalInum uint32 + JournalDev uint32 + LastOrphan uint32 + HashSeed [4]uint32 + DefaultHashVersion uint8 + JnlBackupType uint8 + BgDescSizeRaw uint16 + DefaultMountOpts uint32 + FirstMetaBg uint32 + MkfsTime uint32 + JnlBlocks [17]uint32 +} + +// Compiles only if SuperBlock32Bit implements SuperBlock. +var _ SuperBlock = (*SuperBlock32Bit)(nil) + +// Only override methods which change based on the additional fields above. +// Not overriding SuperBlock.BgDescSize because it would still return 32 here. + +// InodeSize implements SuperBlock.InodeSize. +func (sb *SuperBlock32Bit) InodeSize() uint16 { + return sb.InodeSizeRaw +} + +// CompatibleFeatures implements SuperBlock.CompatibleFeatures. +func (sb *SuperBlock32Bit) CompatibleFeatures() CompatFeatures { + return CompatFeaturesFromInt(sb.FeatureCompat) +} + +// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures. +func (sb *SuperBlock32Bit) IncompatibleFeatures() IncompatFeatures { + return IncompatFeaturesFromInt(sb.FeatureIncompat) +} + +// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures. +func (sb *SuperBlock32Bit) ReadOnlyCompatibleFeatures() RoCompatFeatures { + return RoCompatFeaturesFromInt(sb.FeatureRoCompat) +} diff --git a/pkg/sentry/fs/ext4/disklayout/superblock_64.go b/pkg/sentry/fs/ext4/disklayout/superblock_64.go new file mode 100644 index 000000000..a2c2278fb --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/superblock_64.go @@ -0,0 +1,94 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +// SuperBlock64Bit implements SuperBlock and represents the 64-bit version of +// the ext4_super_block struct in fs/ext4/ext4.h. This sums up to be exactly +// 1024 bytes (smallest possible block size) and hence the superblock always +// fits in no more than one data block. +type SuperBlock64Bit struct { + // We embed the 32-bit struct here because 64-bit version is just an extension + // of the 32-bit version. + SuperBlock32Bit + + BlocksCountHi uint32 + ReservedBlocksCountHi uint32 + FreeBlocksCountHi uint32 + MinInodeSize uint16 + WantInodeSize uint16 + Flags uint32 + RaidStride uint16 + MmpInterval uint16 + MmpBlock uint64 + RaidStripeWidth uint32 + LogGroupsPerFlex uint8 + ChecksumType uint8 + _ uint16 + KbytesWritten uint64 + SnapshotInum uint32 + SnapshotID uint32 + SnapshotRsrvBlocksCount uint64 + SnapshotList uint32 + ErrorCount uint32 + FirstErrorTime uint32 + FirstErrorInode uint32 + FirstErrorBlock uint64 + FirstErrorFunction [32]byte + FirstErrorLine uint32 + LastErrorTime uint32 + LastErrorInode uint32 + LastErrorLine uint32 + LastErrorBlock uint64 + LastErrorFunction [32]byte + MountOpts [64]byte + UserQuotaInum uint32 + GroupQuotaInum uint32 + OverheadBlocks uint32 + BackupBgs [2]uint32 + EncryptAlgos [4]uint8 + EncryptPwSalt [16]uint8 + LostFoundInode uint32 + ProjectQuotaInode uint32 + ChecksumSeed uint32 + WtimeHi uint8 + MtimeHi uint8 + MkfsTimeHi uint8 + LastCheckHi uint8 + FirstErrorTimeHi uint8 + LastErrorTimeHi uint8 + _ [2]uint8 + Encoding uint16 + EncodingFlags uint16 + _ [95]uint32 + Checksum uint32 +} + +// Compiles only if SuperBlock64Bit implements SuperBlock. +var _ SuperBlock = (*SuperBlock64Bit)(nil) + +// Only override methods which change based on the 64-bit feature. + +// BlocksCount implements SuperBlock.BlocksCount. +func (sb *SuperBlock64Bit) BlocksCount() uint64 { + return (uint64(sb.BlocksCountHi) << 32) | uint64(sb.BlocksCountLo) +} + +// FreeBlocksCount implements SuperBlock.FreeBlocksCount. +func (sb *SuperBlock64Bit) FreeBlocksCount() uint64 { + return (uint64(sb.FreeBlocksCountHi) << 32) | uint64(sb.FreeBlocksCountLo) +} + +// BgDescSize implements SuperBlock.BgDescSize. +func (sb *SuperBlock64Bit) BgDescSize() uint16 { return sb.BgDescSizeRaw } diff --git a/pkg/sentry/fs/ext4/disklayout/superblock_old.go b/pkg/sentry/fs/ext4/disklayout/superblock_old.go new file mode 100644 index 000000000..c74953610 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/superblock_old.go @@ -0,0 +1,102 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +// SuperBlockOld implements SuperBlock and represents the old version of the +// superblock struct in ext2 and ext3 systems. +type SuperBlockOld struct { + InodesCountRaw uint32 + BlocksCountLo uint32 + ReservedBlocksCount uint32 + FreeBlocksCountLo uint32 + FreeInodesCountRaw uint32 + FirstDataBlockRaw uint32 + LogBlockSize uint32 + LogClusterSize uint32 + BlocksPerGroupRaw uint32 + ClustersPerGroupRaw uint32 + InodesPerGroupRaw uint32 + Mtime uint32 + Wtime uint32 + MountCountRaw uint16 + MaxMountCountRaw uint16 + MagicRaw uint16 + State uint16 + Errors uint16 + MinorRevLevel uint16 + LastCheck uint32 + CheckInterval uint32 + CreatorOS uint32 + RevLevel uint32 + DefResUID uint16 + DefResGID uint16 +} + +// InodesCount implements SuperBlock.InodesCount. +func (sb *SuperBlockOld) InodesCount() uint32 { return sb.InodesCountRaw } + +// BlocksCount implements SuperBlock.BlocksCount. +func (sb *SuperBlockOld) BlocksCount() uint64 { return uint64(sb.BlocksCountLo) } + +// FreeBlocksCount implements SuperBlock.FreeBlocksCount. +func (sb *SuperBlockOld) FreeBlocksCount() uint64 { return uint64(sb.FreeBlocksCountLo) } + +// FreeInodesCount implements SuperBlock.FreeInodesCount. +func (sb *SuperBlockOld) FreeInodesCount() uint32 { return sb.FreeInodesCountRaw } + +// MountCount implements SuperBlock.MountCount. +func (sb *SuperBlockOld) MountCount() uint16 { return sb.MountCountRaw } + +// MaxMountCount implements SuperBlock.MaxMountCount. +func (sb *SuperBlockOld) MaxMountCount() uint16 { return sb.MaxMountCountRaw } + +// FirstDataBlock implements SuperBlock.FirstDataBlock. +func (sb *SuperBlockOld) FirstDataBlock() uint32 { return sb.FirstDataBlockRaw } + +// BlockSize implements SuperBlock.BlockSize. +func (sb *SuperBlockOld) BlockSize() uint64 { return 1 << (10 + sb.LogBlockSize) } + +// BlocksPerGroup implements SuperBlock.BlocksPerGroup. +func (sb *SuperBlockOld) BlocksPerGroup() uint32 { return sb.BlocksPerGroupRaw } + +// ClusterSize implements SuperBlock.ClusterSize. +func (sb *SuperBlockOld) ClusterSize() uint64 { return 1 << (10 + sb.LogClusterSize) } + +// ClustersPerGroup implements SuperBlock.ClustersPerGroup. +func (sb *SuperBlockOld) ClustersPerGroup() uint32 { return sb.ClustersPerGroupRaw } + +// InodeSize implements SuperBlock.InodeSize. +func (sb *SuperBlockOld) InodeSize() uint16 { return oldInodeSize } + +// InodesPerGroup implements SuperBlock.InodesPerGroup. +func (sb *SuperBlockOld) InodesPerGroup() uint32 { return sb.InodesPerGroupRaw } + +// BgDescSize implements SuperBlock.BgDescSize. +func (sb *SuperBlockOld) BgDescSize() uint16 { return 32 } + +// CompatibleFeatures implements SuperBlock.CompatibleFeatures. +func (sb *SuperBlockOld) CompatibleFeatures() CompatFeatures { return CompatFeatures{} } + +// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures. +func (sb *SuperBlockOld) IncompatibleFeatures() IncompatFeatures { return IncompatFeatures{} } + +// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures. +func (sb *SuperBlockOld) ReadOnlyCompatibleFeatures() RoCompatFeatures { return RoCompatFeatures{} } + +// Magic implements SuperBlock.Magic. +func (sb *SuperBlockOld) Magic() uint16 { return sb.MagicRaw } + +// Revision implements SuperBlock.Revision. +func (sb *SuperBlockOld) Revision() SbRevision { return SbRevision(sb.RevLevel) } diff --git a/pkg/abi/linux/ashmem.go b/pkg/sentry/fs/ext4/disklayout/superblock_test.go index 2a722abe0..463b5ba21 100644 --- a/pkg/abi/linux/ashmem.go +++ b/pkg/sentry/fs/ext4/disklayout/superblock_test.go @@ -1,4 +1,4 @@ -// Copyright 2018 The gVisor Authors. +// Copyright 2019 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,18 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -package linux +package disklayout -// Constants used by ashmem in pin-related ioctls. -const ( - AshmemNotPurged = 0 - AshmemWasPurged = 1 - AshmemIsUnpinned = 0 - AshmemIsPinned = 1 +import ( + "testing" ) -// AshmemPin structure is used for pin-related ioctls. -type AshmemPin struct { - Offset uint32 - Len uint32 +// TestSuperBlockSize tests that the superblock structs are of the correct +// size. +func TestSuperBlockSize(t *testing.T) { + assertSize(t, SuperBlockOld{}, 84) + assertSize(t, SuperBlock32Bit{}, 336) + assertSize(t, SuperBlock64Bit{}, 1024) } diff --git a/pkg/sentry/kernel/kdefs/kdefs.go b/pkg/sentry/fs/ext4/disklayout/test_utils.go index 304da2032..9c63f04c0 100644 --- a/pkg/sentry/kernel/kdefs/kdefs.go +++ b/pkg/sentry/fs/ext4/disklayout/test_utils.go @@ -1,4 +1,4 @@ -// Copyright 2018 The gVisor Authors. +// Copyright 2019 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,9 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package kdefs defines common kernel definitions. -// -package kdefs +package disklayout + +import ( + "reflect" + "testing" + + "gvisor.dev/gvisor/pkg/binary" +) + +func assertSize(t *testing.T, v interface{}, want uintptr) { + t.Helper() -// FD is a File Descriptor. -type FD int32 + if got := binary.Size(v); got != want { + t.Errorf("struct %s should be exactly %d bytes but is %d bytes", reflect.TypeOf(v).Name(), want, got) + } +} diff --git a/pkg/sentry/fs/ext4/fs.go b/pkg/sentry/fs/ext4/fs.go index de5f0ef63..5c7274821 100644 --- a/pkg/sentry/fs/ext4/fs.go +++ b/pkg/sentry/fs/ext4/fs.go @@ -16,8 +16,8 @@ package ext4 import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // filesystem implements fs.Filesystem for ext4. diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD index 098463e97..bf00b9c09 100644 --- a/pkg/sentry/fs/fdpipe/BUILD +++ b/pkg/sentry/fs/fdpipe/BUILD @@ -9,8 +9,8 @@ go_library( "pipe_opener.go", "pipe_state.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fdpipe", - imports = ["gvisor.googlesource.com/gvisor/pkg/sentry/fs"], + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/fdpipe", + imports = ["gvisor.dev/gvisor/pkg/sentry/fs"], visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/fd", diff --git a/pkg/sentry/fs/fdpipe/pipe.go b/pkg/sentry/fs/fdpipe/pipe.go index 4ef7ea08a..5a0a67eab 100644 --- a/pkg/sentry/fs/fdpipe/pipe.go +++ b/pkg/sentry/fs/fdpipe/pipe.go @@ -20,17 +20,17 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/fdnotifier" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/secio" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/secio" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // pipeOperations are the fs.FileOperations of a host pipe. diff --git a/pkg/sentry/fs/fdpipe/pipe_opener.go b/pkg/sentry/fs/fdpipe/pipe_opener.go index 0cabe2e18..64b558975 100644 --- a/pkg/sentry/fs/fdpipe/pipe_opener.go +++ b/pkg/sentry/fs/fdpipe/pipe_opener.go @@ -20,10 +20,10 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/syserror" ) // NonBlockingOpener is a generic host file opener used to retry opening host diff --git a/pkg/sentry/fs/fdpipe/pipe_opener_test.go b/pkg/sentry/fs/fdpipe/pipe_opener_test.go index 8c8b1b40c..8e4d839e1 100644 --- a/pkg/sentry/fs/fdpipe/pipe_opener_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_opener_test.go @@ -25,12 +25,12 @@ import ( "time" "github.com/google/uuid" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) type hostOpener struct { @@ -359,7 +359,7 @@ func TestCopiedReadAheadBuffer(t *testing.T) { inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ Type: fs.Pipe, }) - file := fs.NewFile(ctx, fs.NewDirent(inode, "pipe"), fs.FileFlags{Read: true}, pipeOps) + file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, pipeOps) // Check that the file we opened points to a pipe with a non-empty read ahead buffer. bufsize := len(pipeOps.readAheadBuffer) diff --git a/pkg/sentry/fs/fdpipe/pipe_state.go b/pkg/sentry/fs/fdpipe/pipe_state.go index 8b347aa11..29175fb3d 100644 --- a/pkg/sentry/fs/fdpipe/pipe_state.go +++ b/pkg/sentry/fs/fdpipe/pipe_state.go @@ -19,8 +19,8 @@ import ( "io/ioutil" "sync" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // beforeSave is invoked by stateify. diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go index b59a6aa0e..69abc1e71 100644 --- a/pkg/sentry/fs/fdpipe/pipe_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_test.go @@ -21,12 +21,12 @@ import ( "syscall" "testing" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/fdnotifier" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) func singlePipeFD() (int, error) { @@ -50,11 +50,11 @@ func mockPipeDirent(t *testing.T) *fs.Dirent { User: fs.PermMask{Read: true, Write: true}, }, } - inode := fs.NewInode(node, fs.NewMockMountSource(nil), fs.StableAttr{ + inode := fs.NewInode(ctx, node, fs.NewMockMountSource(nil), fs.StableAttr{ Type: fs.Pipe, BlockSize: usermem.PageSize, }) - return fs.NewDirent(inode, "") + return fs.NewDirent(ctx, inode, "") } func TestNewPipe(t *testing.T) { @@ -285,7 +285,7 @@ func TestPipeRequest(t *testing.T) { defer p.Release() inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe}) - file := fs.NewFile(ctx, fs.NewDirent(inode, "pipe"), fs.FileFlags{Read: true}, p) + file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) // Issue request via the appropriate function. switch c := test.context.(type) { @@ -339,7 +339,7 @@ func TestPipeReadAheadBuffer(t *testing.T) { inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ Type: fs.Pipe, }) - file := fs.NewFile(ctx, fs.NewDirent(inode, "pipe"), fs.FileFlags{Read: true}, p) + file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) // In total we expect to read data + buffered. total := append(buffered, data...) @@ -358,9 +358,9 @@ func TestPipeReadAheadBuffer(t *testing.T) { } } -// This is very important for pipes in general because they can return EWOULDBLOCK and for -// those that block they must continue until they have read all of the data (and report it -// as such. +// This is very important for pipes in general because they can return +// EWOULDBLOCK and for those that block they must continue until they have read +// all of the data (and report it as such). func TestPipeReadsAccumulate(t *testing.T) { fds := make([]int, 2) if err := syscall.Pipe(fds); err != nil { @@ -385,7 +385,7 @@ func TestPipeReadsAccumulate(t *testing.T) { inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ Type: fs.Pipe, }) - file := fs.NewFile(ctx, fs.NewDirent(inode, "pipe"), fs.FileFlags{Read: true}, p) + file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) // Write some some bytes to the pipe. data := []byte("some message") @@ -393,8 +393,8 @@ func TestPipeReadsAccumulate(t *testing.T) { t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(data)) } - // Construct a segment vec that is a bit more than we have written so we trigger - // an EWOULDBLOCK. + // Construct a segment vec that is a bit more than we have written so we + // trigger an EWOULDBLOCK. wantBytes := len(data) + 1 readBuffer := make([]byte, wantBytes) iov := usermem.BytesIOSequence(readBuffer) @@ -446,41 +446,57 @@ func TestPipeWritesAccumulate(t *testing.T) { wfile.Close() t.Fatalf("newPipeOperations got error %v, want nil", err) } - // Don't forget to remove the fd from the fd notifier. Otherwise other tests will - // likely be borked, because it's global :( + // Don't forget to remove the fd from the fd notifier. Otherwise other tests + // will likely be borked, because it's global :( defer p.Release() inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ Type: fs.Pipe, }) - file := fs.NewFile(ctx, fs.NewDirent(inode, "pipe"), fs.FileFlags{Read: true}, p) + file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) + + pipeSize, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(wfile.FD()), syscall.F_GETPIPE_SZ, 0) + if errno != 0 { + t.Fatalf("fcntl(F_GETPIPE_SZ) failed: %v", errno) + } + t.Logf("Pipe buffer size: %d", pipeSize) - // Construct a segment vec that is larger than the pipe size to trigger an EWOULDBLOCK. - wantBytes := 65536 * 2 + // Construct a segment vec that is larger than the pipe size to trigger an + // EWOULDBLOCK. + wantBytes := int(pipeSize) * 2 writeBuffer := make([]byte, wantBytes) for i := 0; i < wantBytes; i++ { writeBuffer[i] = 'a' } iov := usermem.BytesIOSequence(writeBuffer) n, err := p.Write(ctx, file, iov, 0) - total := n - iov = iov.DropFirst64(n) if err != syserror.ErrWouldBlock { t.Fatalf("Writev got error %v, want %v", err, syserror.ErrWouldBlock) } + if n != int64(pipeSize) { + t.Fatalf("Writev partial write, got: %v, want %v", n, pipeSize) + } + total := n + iov = iov.DropFirst64(n) // Read the entire pipe buf size to make space for the second half. - throwAway := make([]byte, 65536) - if n, err := syscall.Read(fds[0], throwAway); n != len(throwAway) || err != nil { - t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(throwAway)) + readBuffer := make([]byte, n) + if n, err := syscall.Read(fds[0], readBuffer); n != len(readBuffer) || err != nil { + t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(readBuffer)) + } + if !bytes.Equal(readBuffer, writeBuffer[:len(readBuffer)]) { + t.Fatalf("wrong data read from pipe, got: %v, want: %v", readBuffer, writeBuffer) } // This time we should not block. n, err = p.Write(ctx, file, iov, 0) - total += n if err != nil { t.Fatalf("Writev got error %v, want nil", err) } + if n != int64(pipeSize) { + t.Fatalf("Writev partial write, got: %v, want %v", n, pipeSize) + } + total += n // Assert that the result we got back is cumulative. if total != int64(wantBytes) { diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index f64954457..bb8117f89 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -20,17 +20,17 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/amutex" - "gvisor.googlesource.com/gvisor/pkg/metric" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/amutex" + "gvisor.dev/gvisor/pkg/metric" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs/lock" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) var ( @@ -130,14 +130,15 @@ type File struct { // to false respectively. func NewFile(ctx context.Context, dirent *Dirent, flags FileFlags, fops FileOperations) *File { dirent.IncRef() - f := &File{ + f := File{ UniqueID: uniqueid.GlobalFromContext(ctx), Dirent: dirent, FileOperations: fops, flags: flags, } f.mu.Init() - return f + f.EnableLeakCheck("fs.File") + return &f } // DecRef destroys the File when it is no longer referenced. @@ -267,7 +268,7 @@ func (f *File) Readv(ctx context.Context, dst usermem.IOSequence) (int64, error) reads.Increment() n, err := f.FileOperations.Read(ctx, f, dst, f.offset) - if n > 0 { + if n > 0 && !f.flags.NonSeekable { atomic.AddInt64(&f.offset, n) } f.mu.Unlock() @@ -310,9 +311,11 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error return 0, syserror.ErrInterrupted } + unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append) // Handle append mode. if f.Flags().Append { if err := f.offsetForAppend(ctx, &f.offset); err != nil { + unlockAppendMu() f.mu.Unlock() return 0, err } @@ -322,6 +325,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error limit, ok := f.checkLimit(ctx, f.offset) switch { case ok && limit == 0: + unlockAppendMu() f.mu.Unlock() return 0, syserror.ErrExceedsFileSizeLimit case ok: @@ -330,9 +334,10 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error // We must hold the lock during the write. n, err := f.FileOperations.Write(ctx, f, src, f.offset) - if n >= 0 { + if n >= 0 && !f.flags.NonSeekable { atomic.StoreInt64(&f.offset, f.offset+n) } + unlockAppendMu() f.mu.Unlock() return n, err } @@ -348,13 +353,11 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64 // However, on Linux, if a file is opened with O_APPEND, pwrite() // appends data to the end of the file, regardless of the value of // offset." + unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append) + defer unlockAppendMu() + if f.Flags().Append { - if !f.mu.Lock(ctx) { - return 0, syserror.ErrInterrupted - } - defer f.mu.Unlock() if err := f.offsetForAppend(ctx, &offset); err != nil { - f.mu.Unlock() return 0, err } } @@ -373,7 +376,7 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64 // offsetForAppend sets the given offset to the end of the file. // -// Precondition: the underlying file mutex should be held. +// Precondition: the file.Dirent.Inode.appendMu mutex should be held for writing. func (f *File) offsetForAppend(ctx context.Context, offset *int64) error { uattr, err := f.Dirent.Inode.UnstableAttr(ctx) if err != nil { diff --git a/pkg/sentry/fs/file_operations.go b/pkg/sentry/fs/file_operations.go index 0f2dfa273..d86f5bf45 100644 --- a/pkg/sentry/fs/file_operations.go +++ b/pkg/sentry/fs/file_operations.go @@ -15,11 +15,11 @@ package fs import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/waiter" ) // SpliceOpts define how a splice works. @@ -155,5 +155,16 @@ type FileOperations interface { // refer. // // Preconditions: The AddressSpace (if any) that io refers to is activated. - Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) + Ioctl(ctx context.Context, file *File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) +} + +// FifoSizer is an interface for setting and getting the size of a pipe. +type FifoSizer interface { + // FifoSize returns the pipe capacity in bytes. + FifoSize(ctx context.Context, file *File) (int64, error) + + // SetFifoSize sets the new pipe capacity in bytes. + // + // The new size is returned (which may be capped). + SetFifoSize(size int64) (int64, error) } diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go index 273de1e14..9820f0b13 100644 --- a/pkg/sentry/fs/file_overlay.go +++ b/pkg/sentry/fs/file_overlay.go @@ -17,13 +17,13 @@ package fs import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // overlayFile gets a handle to a file from the upper or lower filesystem @@ -85,12 +85,6 @@ type overlayFileOperations struct { // protected by File.mu of the owning file, which is held during // Readdir and Seek calls. dirCursor string - - // dirCacheMu protects dirCache. - dirCacheMu sync.RWMutex `state:"nosave"` - - // dirCache is cache of DentAttrs from upper and lower Inodes. - dirCache *SortedDentryMap } // Release implements FileOperations.Release. @@ -171,53 +165,68 @@ func (f *overlayFileOperations) Readdir(ctx context.Context, file *File, seriali if root != nil { defer root.DecRef() } + dirCtx := &DirCtx{ Serializer: serializer, DirCursor: &f.dirCursor, } + return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset()) +} - // If the directory dirent is frozen, then DirentReaddir will calculate - // the children based off the frozen dirent tree. There is no need to - // call readdir on the upper/lower layers. - if file.Dirent.frozen { - return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset()) +// IterateDir implements DirIterator.IterateDir. +func (f *overlayFileOperations) IterateDir(ctx context.Context, d *Dirent, dirCtx *DirCtx, offset int) (int, error) { + o := d.Inode.overlay + + if !d.Inode.MountSource.CacheReaddir() { + // Can't use the dirCache. Simply read the entries. + entries, err := readdirEntries(ctx, o) + if err != nil { + return offset, err + } + n, err := GenericReaddir(dirCtx, entries) + return offset + n, err } - // Otherwise proceed with usual overlay readdir. - o := file.Dirent.Inode.overlay + // Otherwise, use or create cached entries. + + o.dirCacheMu.RLock() + if o.dirCache != nil { + n, err := GenericReaddir(dirCtx, o.dirCache) + o.dirCacheMu.RUnlock() + return offset + n, err + } + o.dirCacheMu.RUnlock() // readdirEntries holds o.copyUpMu to ensure that copy-up does not - // occur while calculating the readir results. + // occur while calculating the readdir results. // // However, it is possible for a copy-up to occur after the call to - // readdirEntries, but before setting f.dirCache. This is OK, since - // copy-up only does not change the children in a way that would affect - // the children returned in dirCache. Copy-up only moves - // files/directories between layers in the overlay. + // readdirEntries, but before setting o.dirCache. This is OK, since + // copy-up does not change the children in a way that would affect the + // children returned in dirCache. Copy-up only moves files/directories + // between layers in the overlay. // - // It is also possible for Readdir to race with a Create operation - // (which may trigger a copy-up during it's execution). Depending on - // whether the Create happens before or after the readdirEntries call, - // the newly created file may or may not appear in the readdir results. - // But this can only be caused by a real race between readdir and - // create syscalls, so it's also OK. - dirCache, err := readdirEntries(ctx, o) - if err != nil { - return file.Offset(), err + // We must hold dirCacheMu around both readdirEntries and setting + // o.dirCache to synchronize with dirCache invalidations done by + // Create, Remove, Rename. + o.dirCacheMu.Lock() + + // We expect dirCache to be nil (we just checked above), but there is a + // chance that a racing call managed to just set it, in which case we + // can use that new value. + if o.dirCache == nil { + dirCache, err := readdirEntries(ctx, o) + if err != nil { + o.dirCacheMu.Unlock() + return offset, err + } + o.dirCache = dirCache } - f.dirCacheMu.Lock() - f.dirCache = dirCache - f.dirCacheMu.Unlock() + o.dirCacheMu.DowngradeLock() + n, err := GenericReaddir(dirCtx, o.dirCache) + o.dirCacheMu.RUnlock() - return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset()) -} - -// IterateDir implements DirIterator.IterateDir. -func (f *overlayFileOperations) IterateDir(ctx context.Context, dirCtx *DirCtx, offset int) (int, error) { - f.dirCacheMu.RLock() - n, err := GenericReaddir(dirCtx, f.dirCache) - f.dirCacheMu.RUnlock() return offset + n, err } @@ -338,13 +347,14 @@ func (*overlayFileOperations) ConfigureMMap(ctx context.Context, file *File, opt // preventing us from saving a proper inode mapping for the // file. file.IncRef() - id := &overlayMappingIdentity{ + id := overlayMappingIdentity{ id: opts.MappingIdentity, overlayFile: file, } + id.EnableLeakCheck("fs.overlayMappingIdentity") // Swap out the old MappingIdentity for the wrapped one. - opts.MappingIdentity = id + opts.MappingIdentity = &id return nil } @@ -388,9 +398,49 @@ func (f *overlayFileOperations) UnstableAttr(ctx context.Context, file *File) (U return f.lower.UnstableAttr(ctx) } -// Ioctl implements fs.FileOperations.Ioctl and always returns ENOTTY. -func (*overlayFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { - return 0, syserror.ENOTTY +// Ioctl implements fs.FileOperations.Ioctl. +func (f *overlayFileOperations) Ioctl(ctx context.Context, overlayFile *File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + f.upperMu.Lock() + defer f.upperMu.Unlock() + + if f.upper == nil { + // It's possible that ioctl changes the file. Since we don't know all + // possible ioctls, only allow them to propagate to the upper. Triggering a + // copy up on any ioctl would be too drastic. In the future, it can have a + // list of ioctls that are safe to send to lower and a list that triggers a + // copy up. + return 0, syserror.ENOTTY + } + return f.upper.FileOperations.Ioctl(ctx, f.upper, io, args) +} + +// FifoSize implements FifoSizer.FifoSize. +func (f *overlayFileOperations) FifoSize(ctx context.Context, overlayFile *File) (rv int64, err error) { + err = f.onTop(ctx, overlayFile, func(file *File, ops FileOperations) error { + sz, ok := ops.(FifoSizer) + if !ok { + return syserror.EINVAL + } + rv, err = sz.FifoSize(ctx, file) + return err + }) + return +} + +// SetFifoSize implements FifoSizer.SetFifoSize. +func (f *overlayFileOperations) SetFifoSize(size int64) (rv int64, err error) { + f.upperMu.Lock() + defer f.upperMu.Unlock() + + if f.upper == nil { + // Named pipes cannot be copied up and changes to the lower are prohibited. + return 0, syserror.EINVAL + } + sz, ok := f.upper.FileOperations.(FifoSizer) + if !ok { + return 0, syserror.EINVAL + } + return sz.SetFifoSize(size) } // readdirEntries returns a sorted map of directory entries from the diff --git a/pkg/sentry/fs/file_overlay_test.go b/pkg/sentry/fs/file_overlay_test.go index 6a2b8007c..2fb824d5c 100644 --- a/pkg/sentry/fs/file_overlay_test.go +++ b/pkg/sentry/fs/file_overlay_test.go @@ -18,18 +18,18 @@ import ( "reflect" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/contexttest" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" ) func TestReaddir(t *testing.T) { ctx := contexttest.Context(t) ctx = &rootContext{ Context: ctx, - root: fs.NewDirent(newTestRamfsDir(ctx, nil, nil), "root"), + root: fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root"), } for _, test := range []struct { // Test description. @@ -103,7 +103,7 @@ func TestReaddir(t *testing.T) { }, } { t.Run(test.desc, func(t *testing.T) { - openDir, err := test.dir.GetFile(ctx, fs.NewDirent(test.dir, "stub"), fs.FileFlags{Read: true}) + openDir, err := test.dir.GetFile(ctx, fs.NewDirent(ctx, test.dir, "stub"), fs.FileFlags{Read: true}) if err != nil { t.Fatalf("GetFile got error %v, want nil", err) } @@ -126,7 +126,7 @@ func TestReaddirRevalidation(t *testing.T) { ctx := contexttest.Context(t) ctx = &rootContext{ Context: ctx, - root: fs.NewDirent(newTestRamfsDir(ctx, nil, nil), "root"), + root: fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root"), } // Create an overlay with two directories, each with one file. @@ -139,7 +139,7 @@ func TestReaddirRevalidation(t *testing.T) { upperDir := upper.InodeOperations.(*dir).InodeOperations.(*ramfs.Dir) // Check that overlay returns the files from both upper and lower. - openDir, err := overlay.GetFile(ctx, fs.NewDirent(overlay, "stub"), fs.FileFlags{Read: true}) + openDir, err := overlay.GetFile(ctx, fs.NewDirent(ctx, overlay, "stub"), fs.FileFlags{Read: true}) if err != nil { t.Fatalf("GetFile got error %v, want nil", err) } @@ -156,7 +156,7 @@ func TestReaddirRevalidation(t *testing.T) { if err := upperDir.Remove(ctx, upper, "a"); err != nil { t.Fatalf("error removing child: %v", err) } - upperDir.AddChild(ctx, "c", fs.NewInode(fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermissions{}, 0), + upperDir.AddChild(ctx, "c", fs.NewInode(ctx, fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermissions{}, 0), upper.MountSource, fs.StableAttr{Type: fs.RegularFile})) // Seek to beginning of the directory and do the readdir again. @@ -186,7 +186,7 @@ func TestReaddirOverlayFrozen(t *testing.T) { overlayInode := fs.NewTestOverlayDir(ctx, upper, lower, false) // Set that overlay as the root. - root := fs.NewDirent(overlayInode, "root") + root := fs.NewDirent(ctx, overlayInode, "root") ctx = &rootContext{ Context: ctx, root: root, diff --git a/pkg/sentry/fs/filesystems.go b/pkg/sentry/fs/filesystems.go index acd84dfcc..b157fd228 100644 --- a/pkg/sentry/fs/filesystems.go +++ b/pkg/sentry/fs/filesystems.go @@ -20,7 +20,7 @@ import ( "strings" "sync" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context" ) // FilesystemFlags matches include/linux/fs.h:file_system_type.fs_flags. diff --git a/pkg/sentry/fs/filetest/BUILD b/pkg/sentry/fs/filetest/BUILD index 05ca72aa0..a9d6d9301 100644 --- a/pkg/sentry/fs/filetest/BUILD +++ b/pkg/sentry/fs/filetest/BUILD @@ -6,7 +6,7 @@ go_library( name = "filetest", testonly = 1, srcs = ["filetest.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/filetest", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/filetest", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/context", diff --git a/pkg/sentry/fs/filetest/filetest.go b/pkg/sentry/fs/filetest/filetest.go index c0b1b088d..22270a494 100644 --- a/pkg/sentry/fs/filetest/filetest.go +++ b/pkg/sentry/fs/filetest/filetest.go @@ -19,13 +19,13 @@ import ( "fmt" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/anon" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/waiter" ) // TestFileOperations is an implementation of the File interface. It provides all @@ -46,7 +46,7 @@ type TestFileOperations struct { // NewTestFile creates and initializes a new test file. func NewTestFile(tb testing.TB) *fs.File { ctx := contexttest.Context(tb) - dirent := fs.NewDirent(anon.NewInode(ctx), "test") + dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "test") return fs.NewFile(ctx, dirent, fs.FileFlags{}, &TestFileOperations{}) } diff --git a/pkg/sentry/fs/flags.go b/pkg/sentry/fs/flags.go index 5c8cb773f..1278f9c78 100644 --- a/pkg/sentry/fs/flags.go +++ b/pkg/sentry/fs/flags.go @@ -15,7 +15,7 @@ package fs import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) // FileFlags encodes file flags. @@ -57,6 +57,9 @@ type FileFlags struct { // Linux sets this flag for all files. Since gVisor is only compatible // with 64-bit Linux, it also sets this flag for all files. LargeFile bool + + // NonSeekable indicates that file.offset isn't used. + NonSeekable bool } // SettableFileFlags is a subset of FileFlags above that can be changed diff --git a/pkg/sentry/fs/fs.go b/pkg/sentry/fs/fs.go index 632055cce..8b2a5e6b2 100644 --- a/pkg/sentry/fs/fs.go +++ b/pkg/sentry/fs/fs.go @@ -56,8 +56,8 @@ package fs import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" ) var ( diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD index 44f43b965..6499f87ac 100644 --- a/pkg/sentry/fs/fsutil/BUILD +++ b/pkg/sentry/fs/fsutil/BUILD @@ -7,8 +7,8 @@ go_template_instance( name = "dirty_set_impl", out = "dirty_set_impl.go", imports = { - "memmap": "gvisor.googlesource.com/gvisor/pkg/sentry/memmap", - "platform": "gvisor.googlesource.com/gvisor/pkg/sentry/platform", + "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", + "platform": "gvisor.dev/gvisor/pkg/sentry/platform", }, package = "fsutil", prefix = "Dirty", @@ -25,7 +25,7 @@ go_template_instance( name = "frame_ref_set_impl", out = "frame_ref_set_impl.go", imports = { - "platform": "gvisor.googlesource.com/gvisor/pkg/sentry/platform", + "platform": "gvisor.dev/gvisor/pkg/sentry/platform", }, package = "fsutil", prefix = "frameRef", @@ -42,8 +42,8 @@ go_template_instance( name = "file_range_set_impl", out = "file_range_set_impl.go", imports = { - "memmap": "gvisor.googlesource.com/gvisor/pkg/sentry/memmap", - "platform": "gvisor.googlesource.com/gvisor/pkg/sentry/platform", + "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", + "platform": "gvisor.dev/gvisor/pkg/sentry/platform", }, package = "fsutil", prefix = "FileRange", @@ -74,7 +74,7 @@ go_library( "inode.go", "inode_cached.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/fsutil", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/fsutil/dirty_set.go b/pkg/sentry/fs/fsutil/dirty_set.go index f1451d77a..12132680b 100644 --- a/pkg/sentry/fs/fsutil/dirty_set.go +++ b/pkg/sentry/fs/fsutil/dirty_set.go @@ -17,11 +17,11 @@ package fsutil import ( "math" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // DirtySet maps offsets into a memmap.Mappable to DirtyInfo. It is used to diff --git a/pkg/sentry/fs/fsutil/dirty_set_test.go b/pkg/sentry/fs/fsutil/dirty_set_test.go index d9c68baa3..75575d994 100644 --- a/pkg/sentry/fs/fsutil/dirty_set_test.go +++ b/pkg/sentry/fs/fsutil/dirty_set_test.go @@ -18,8 +18,8 @@ import ( "reflect" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) func TestDirtySet(t *testing.T) { diff --git a/pkg/sentry/fs/fsutil/file.go b/pkg/sentry/fs/fsutil/file.go index 9381963d0..626b9126a 100644 --- a/pkg/sentry/fs/fsutil/file.go +++ b/pkg/sentry/fs/fsutil/file.go @@ -15,13 +15,13 @@ package fsutil import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // FileNoopRelease implements fs.FileOperations.Release for files that have no @@ -219,7 +219,7 @@ func GenericConfigureMMap(file *fs.File, m memmap.Mappable, opts *memmap.MMapOpt type FileNoIoctl struct{} // Ioctl implements fs.FileOperations.Ioctl. -func (FileNoIoctl) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (FileNoIoctl) Ioctl(context.Context, *fs.File, usermem.IO, arch.SyscallArguments) (uintptr, error) { return 0, syserror.ENOTTY } @@ -285,7 +285,7 @@ func NewStaticDirFileOperations(dentries *fs.SortedDentryMap) *StaticDirFileOper } // IterateDir implements DirIterator.IterateDir. -func (sdfo *StaticDirFileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) { +func (sdfo *StaticDirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) { n, err := fs.GenericReaddir(dirCtx, sdfo.dentryMap) return offset + n, err } diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go index b5ac6c71c..0a5466b0a 100644 --- a/pkg/sentry/fs/fsutil/file_range_set.go +++ b/pkg/sentry/fs/fsutil/file_range_set.go @@ -19,13 +19,13 @@ import ( "io" "math" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // FileRangeSet maps offsets into a memmap.Mappable to offsets into a diff --git a/pkg/sentry/fs/fsutil/frame_ref_set.go b/pkg/sentry/fs/fsutil/frame_ref_set.go index 6565c28c8..dd63db32b 100644 --- a/pkg/sentry/fs/fsutil/frame_ref_set.go +++ b/pkg/sentry/fs/fsutil/frame_ref_set.go @@ -17,7 +17,7 @@ package fsutil import ( "math" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/platform" ) type frameRefSetFunctions struct{} diff --git a/pkg/sentry/fs/fsutil/host_file_mapper.go b/pkg/sentry/fs/fsutil/host_file_mapper.go index 2bdfc0db6..e239f12a5 100644 --- a/pkg/sentry/fs/fsutil/host_file_mapper.go +++ b/pkg/sentry/fs/fsutil/host_file_mapper.go @@ -19,11 +19,11 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // HostFileMapper caches mappings of an arbitrary host file descriptor. It is diff --git a/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go b/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go index 7167be263..ad11a0573 100644 --- a/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go +++ b/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go @@ -17,7 +17,7 @@ package fsutil import ( "unsafe" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/safemem" ) func (*HostFileMapper) unsafeBlockFromChunkMapping(addr uintptr) safemem.Block { diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go index ad0518b8f..d2495cb83 100644 --- a/pkg/sentry/fs/fsutil/host_mappable.go +++ b/pkg/sentry/fs/fsutil/host_mappable.go @@ -18,12 +18,12 @@ import ( "math" "sync" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // HostMappable implements memmap.Mappable and platform.File over a diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go index 925887335..4e100a402 100644 --- a/pkg/sentry/fs/fsutil/inode.go +++ b/pkg/sentry/fs/fsutil/inode.go @@ -17,13 +17,13 @@ package fsutil import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // SimpleFileInode is a simple implementation of InodeOperations. diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go index 7bee2eb5f..ed62049a9 100644 --- a/pkg/sentry/fs/fsutil/inode_cached.go +++ b/pkg/sentry/fs/fsutil/inode_cached.go @@ -19,17 +19,17 @@ import ( "io" "sync" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/time" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // Lock order (compare the lock order model in mm/mm.go): diff --git a/pkg/sentry/fs/fsutil/inode_cached_test.go b/pkg/sentry/fs/fsutil/inode_cached_test.go index be3d4b6fc..dc19255ed 100644 --- a/pkg/sentry/fs/fsutil/inode_cached_test.go +++ b/pkg/sentry/fs/fsutil/inode_cached_test.go @@ -19,14 +19,14 @@ import ( "io" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) type noopBackingFile struct{} @@ -253,11 +253,11 @@ func (noopMappingSpace) Invalidate(ar usermem.AddrRange, opts memmap.InvalidateO } func anonInode(ctx context.Context) *fs.Inode { - return fs.NewInode(&SimpleFileInode{ + return fs.NewInode(ctx, &SimpleFileInode{ InodeSimpleAttributes: NewInodeSimpleAttributes(ctx, fs.FileOwnerFromContext(ctx), fs.FilePermissions{ User: fs.PermMask{Read: true, Write: true}, }, 0), - }, fs.NewPseudoMountSource(), fs.StableAttr{ + }, fs.NewPseudoMountSource(ctx), fs.StableAttr{ Type: fs.Anonymous, BlockSize: usermem.PageSize, }) @@ -276,7 +276,7 @@ func TestRead(t *testing.T) { // Construct a 3-page file. buf := pagesOf('a', 'b', 'c') - file := fs.NewFile(ctx, fs.NewDirent(anonInode(ctx), "anon"), fs.FileFlags{}, nil) + file := fs.NewFile(ctx, fs.NewDirent(ctx, anonInode(ctx), "anon"), fs.FileFlags{}, nil) uattr := fs.UnstableAttr{ Size: int64(len(buf)), } diff --git a/pkg/sentry/fs/g3doc/inotify.md b/pkg/sentry/fs/g3doc/inotify.md index 1e99a3357..71a577d9d 100644 --- a/pkg/sentry/fs/g3doc/inotify.md +++ b/pkg/sentry/fs/g3doc/inotify.md @@ -9,7 +9,7 @@ For the most part, the sentry implementation of inotify mirrors the Linux architecture. Inotify instances (i.e. the fd returned by inotify_init(2)) are backed by a pseudo-filesystem. Events are generated from various places in the sentry, including the [syscall layer][syscall_dir], the [vfs layer][dirent] and -the [process fd table][fd_map]. Watches are stored in inodes and generated +the [process fd table][fd_table]. Watches are stored in inodes and generated events are queued to the inotify instance owning the watches for delivery to the user. @@ -112,11 +112,11 @@ attempts to queue a new event, it is already holding `fs.Watches.mu`. If we used `Inotify.mu` to also protect the event queue, this would violate the above lock ordering. -[dirent]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/fs/dirent.go -[event]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/fs/inotify_event.go -[fd_map]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/kernel/fd_map.go -[inode]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/fs/inode.go -[inode_watches]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/fs/inode_inotify.go -[inotify]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/fs/inotify.go -[syscall_dir]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/syscalls/linux/ -[watch]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/fs/inotify_watch.go +[dirent]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/dirent.go +[event]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify_event.go +[fd_table]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/kernel/fd_table.go +[inode]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inode.go +[inode_watches]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inode_inotify.go +[inotify]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify.go +[syscall_dir]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/syscalls/linux/ +[watch]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify_watch.go diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD index f2c79b475..6b993928c 100644 --- a/pkg/sentry/fs/gofer/BUILD +++ b/pkg/sentry/fs/gofer/BUILD @@ -21,7 +21,7 @@ go_library( "socket.go", "util.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/gofer", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/gofer", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/gofer/attr.go b/pkg/sentry/fs/gofer/attr.go index c572f3396..4848e2374 100644 --- a/pkg/sentry/fs/gofer/attr.go +++ b/pkg/sentry/fs/gofer/attr.go @@ -17,12 +17,12 @@ package gofer import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // getattr returns the 9p attributes of the p9.File. On success, Mode, Size, and RDev diff --git a/pkg/sentry/fs/gofer/cache_policy.go b/pkg/sentry/fs/gofer/cache_policy.go index c59344589..cc11c6339 100644 --- a/pkg/sentry/fs/gofer/cache_policy.go +++ b/pkg/sentry/fs/gofer/cache_policy.go @@ -17,8 +17,8 @@ package gofer import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // cachePolicy is a 9p cache policy. It has methods that determine what to diff --git a/pkg/sentry/fs/gofer/context_file.go b/pkg/sentry/fs/gofer/context_file.go index be53ac4d9..44b72582a 100644 --- a/pkg/sentry/fs/gofer/context_file.go +++ b/pkg/sentry/fs/gofer/context_file.go @@ -15,9 +15,9 @@ package gofer import ( - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context" ) // contextFile is a wrapper around p9.File that notifies the context that diff --git a/pkg/sentry/fs/gofer/device.go b/pkg/sentry/fs/gofer/device.go index 1de6c247c..cbd3c5da2 100644 --- a/pkg/sentry/fs/gofer/device.go +++ b/pkg/sentry/fs/gofer/device.go @@ -14,7 +14,7 @@ package gofer -import "gvisor.googlesource.com/gvisor/pkg/sentry/device" +import "gvisor.dev/gvisor/pkg/sentry/device" // goferDevice is the gofer virtual device. var goferDevice = device.NewAnonMultiDevice() diff --git a/pkg/sentry/fs/gofer/file.go b/pkg/sentry/fs/gofer/file.go index fb4f50113..9e2e412cd 100644 --- a/pkg/sentry/fs/gofer/file.go +++ b/pkg/sentry/fs/gofer/file.go @@ -19,17 +19,17 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/metric" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/metric" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) var ( @@ -137,7 +137,7 @@ func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer } // IterateDir implements fs.DirIterator.IterateDir. -func (f *fileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) { +func (f *fileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) { f.inodeOperations.readdirMu.Lock() defer f.inodeOperations.readdirMu.Unlock() diff --git a/pkg/sentry/fs/gofer/file_state.go b/pkg/sentry/fs/gofer/file_state.go index 31264e065..9aa68a70e 100644 --- a/pkg/sentry/fs/gofer/file_state.go +++ b/pkg/sentry/fs/gofer/file_state.go @@ -17,8 +17,8 @@ package gofer import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // afterLoad is invoked by stateify. diff --git a/pkg/sentry/fs/gofer/fs.go b/pkg/sentry/fs/gofer/fs.go index 6ab89fcc2..69999dc28 100644 --- a/pkg/sentry/fs/gofer/fs.go +++ b/pkg/sentry/fs/gofer/fs.go @@ -20,9 +20,9 @@ import ( "fmt" "strconv" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // The following are options defined by the Linux 9p client that we support, diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go index 29d34da7e..7fc3c32ae 100644 --- a/pkg/sentry/fs/gofer/gofer_test.go +++ b/pkg/sentry/fs/gofer/gofer_test.go @@ -20,11 +20,11 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/p9/p9test" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/p9/p9test" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // rootTest runs a test with a p9 mock and an fs.InodeOperations created from @@ -62,8 +62,8 @@ func rootTest(t *testing.T, name string, cp cachePolicy, fn func(context.Context sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{ file: rootFile, }, root.QID, p9.AttrMaskAll(), root.Attr, false /* socket */) - m := fs.NewMountSource(s, &filesystem{}, fs.MountSourceFlags{}) - rootInode := fs.NewInode(rootInodeOperations, m, sattr) + m := fs.NewMountSource(ctx, s, &filesystem{}, fs.MountSourceFlags{}) + rootInode := fs.NewInode(ctx, rootInodeOperations, m, sattr) // Ensure that the cache is fully invalidated, so that any // close actions actually take place before the full harness is @@ -207,7 +207,7 @@ func TestRevalidation(t *testing.T) { name := fmt.Sprintf("cachepolicy=%s", test.cachePolicy) rootTest(t, name, test.cachePolicy, func(ctx context.Context, h *p9test.Harness, rootFile *p9test.Mock, rootInode *fs.Inode) { // Wrap in a dirent object. - rootDir := fs.NewDirent(rootInode, "root") + rootDir := fs.NewDirent(ctx, rootInode, "root") // Create a mock file a child of the root. We save when // this is generated, so that when the time changed, we diff --git a/pkg/sentry/fs/gofer/handles.go b/pkg/sentry/fs/gofer/handles.go index c7098cd36..27eeae3d9 100644 --- a/pkg/sentry/fs/gofer/handles.go +++ b/pkg/sentry/fs/gofer/handles.go @@ -17,14 +17,14 @@ package gofer import ( "io" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/secio" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/secio" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/safemem" ) // handles are the open handles of a gofer file. They are reference counted to @@ -79,11 +79,12 @@ func newHandles(ctx context.Context, file contextFile, flags fs.FileFlags) (*han newFile.close(ctx) return nil, err } - h := &handles{ + h := handles{ File: newFile, Host: hostFile, } - return h, nil + h.EnableLeakCheck("gofer.handles") + return &h, nil } type handleReadWriter struct { diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index dcb3b2880..95b064aea 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -19,19 +19,19 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fdpipe" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fdpipe" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/syserror" ) // inodeOperations implements fs.InodeOperations. diff --git a/pkg/sentry/fs/gofer/inode_state.go b/pkg/sentry/fs/gofer/inode_state.go index ac22ee4b1..0b2eedb7c 100644 --- a/pkg/sentry/fs/gofer/inode_state.go +++ b/pkg/sentry/fs/gofer/inode_state.go @@ -20,11 +20,11 @@ import ( "path/filepath" "strings" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/time" ) // Some fs implementations may not support atime, ctime, or mtime in getattr. diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go index 092f8b586..8c17603f8 100644 --- a/pkg/sentry/fs/gofer/path.go +++ b/pkg/sentry/fs/gofer/path.go @@ -18,13 +18,13 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/syserror" ) // maxFilenameLen is the maximum length of a filename. This is dictated by 9P's @@ -73,7 +73,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr, false) // Construct a positive Dirent. - return fs.NewDirent(fs.NewInode(node, dir.MountSource, sattr), name), nil + return fs.NewDirent(ctx, fs.NewInode(ctx, node, dir.MountSource, sattr), name), nil } // Creates a new Inode at name and returns its File based on the session's cache policy. @@ -141,20 +141,21 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr, false) // Construct the positive Dirent. - d := fs.NewDirent(fs.NewInode(iops, dir.MountSource, sattr), name) + d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) defer d.DecRef() // Construct the new file, caching the handles if allowed. - h := &handles{ + h := handles{ File: newFile, Host: hostFile, } + h.EnableLeakCheck("gofer.handles") if iops.fileState.canShareHandles() { iops.fileState.handlesMu.Lock() - iops.fileState.setSharedHandlesLocked(flags, h) + iops.fileState.setSharedHandlesLocked(flags, &h) iops.fileState.handlesMu.Unlock() } - return NewFile(ctx, d, name, flags, iops, h), nil + return NewFile(ctx, d, name, flags, iops, &h), nil } // CreateLink uses Create to create a symlink between oldname and newname. @@ -277,7 +278,7 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr, true) // Construct the positive Dirent. - childDir := fs.NewDirent(fs.NewInode(iops, dir.MountSource, sattr), name) + childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) i.session().endpoints.add(key, childDir, ep) return childDir, nil } diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index 085a358fe..69d08a627 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -18,18 +18,18 @@ import ( "fmt" "sync" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/unet" ) // DefaultDirentCacheSize is the default dirent cache size for 9P mounts. It can -// be adjusted independentely from the other dirent caches. +// be adjusted independently from the other dirent caches. var DefaultDirentCacheSize uint64 = fs.DefaultDirentCacheSize // +stateify savable @@ -145,16 +145,21 @@ func (s *session) Destroy() { s.client.Close() } -// Revalidate implements MountSource.Revalidate. +// Revalidate implements MountSourceOperations.Revalidate. func (s *session) Revalidate(ctx context.Context, name string, parent, child *fs.Inode) bool { return s.cachePolicy.revalidate(ctx, name, parent, child) } -// Keep implements MountSource.Keep. +// Keep implements MountSourceOperations.Keep. func (s *session) Keep(d *fs.Dirent) bool { return s.cachePolicy.keep(d) } +// CacheReaddir implements MountSourceOperations.CacheReaddir. +func (s *session) CacheReaddir() bool { + return s.cachePolicy.cacheReaddir() +} + // ResetInodeMappings implements fs.MountSourceOperations.ResetInodeMappings. func (s *session) ResetInodeMappings() { s.inodeMappings = make(map[uint64]string) @@ -236,7 +241,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF } // Construct the session. - s := &session{ + s := session{ connID: dev, msize: o.msize, version: o.version, @@ -245,13 +250,14 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF superBlockFlags: superBlockFlags, mounter: mounter, } + s.EnableLeakCheck("gofer.session") if o.privateunixsocket { s.endpoints = newEndpointMaps() } // Construct the MountSource with the session and superBlockFlags. - m := fs.NewMountSource(s, filesystem, superBlockFlags) + m := fs.NewMountSource(ctx, &s, filesystem, superBlockFlags) // Given that gofer files can consume host FDs, restrict the number // of files that can be held by the cache. @@ -285,8 +291,8 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF return nil, err } - sattr, iops := newInodeOperations(ctx, s, s.attach, qid, valid, attr, false) - return fs.NewInode(iops, m, sattr), nil + sattr, iops := newInodeOperations(ctx, &s, s.attach, qid, valid, attr, false) + return fs.NewInode(ctx, iops, m, sattr), nil } // newEndpointMaps creates a new endpointMaps. diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go index 68fbf3417..d045e04ff 100644 --- a/pkg/sentry/fs/gofer/session_state.go +++ b/pkg/sentry/fs/gofer/session_state.go @@ -17,10 +17,10 @@ package gofer import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/unet" ) // beforeSave is invoked by stateify. @@ -111,5 +111,4 @@ func (s *session) afterLoad() { panic("failed to restore endpoint maps: " + err.Error()) } } - } diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go index 7ac0a421f..a45a8f36c 100644 --- a/pkg/sentry/fs/gofer/socket.go +++ b/pkg/sentry/fs/gofer/socket.go @@ -15,14 +15,15 @@ package gofer import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/waiter" ) // BoundEndpoint returns a gofer-backed transport.BoundEndpoint. @@ -75,7 +76,7 @@ func sockTypeToP9(t linux.SockType) (p9.ConnectFlags, bool) { } // BidirectionalConnect implements ConnectableEndpoint.BidirectionalConnect. -func (e *endpoint) BidirectionalConnect(ce transport.ConnectingEndpoint, returnConnect func(transport.Receiver, transport.ConnectedEndpoint)) *syserr.Error { +func (e *endpoint) BidirectionalConnect(ctx context.Context, ce transport.ConnectingEndpoint, returnConnect func(transport.Receiver, transport.ConnectedEndpoint)) *syserr.Error { cf, ok := sockTypeToP9(ce.Type()) if !ok { return syserr.ErrConnectionRefused @@ -100,7 +101,7 @@ func (e *endpoint) BidirectionalConnect(ce transport.ConnectingEndpoint, returnC return syserr.ErrConnectionRefused } - c, serr := host.NewConnectedEndpoint(hostFile, ce.WaiterQueue(), e.path) + c, serr := host.NewConnectedEndpoint(ctx, hostFile, ce.WaiterQueue(), e.path) if serr != nil { ce.Unlock() log.Warningf("Gofer returned invalid host socket for BidirectionalConnect; file %+v flags %+v: %v", e.file, cf, serr) @@ -116,13 +117,13 @@ func (e *endpoint) BidirectionalConnect(ce transport.ConnectingEndpoint, returnC // UnidirectionalConnect implements // transport.BoundEndpoint.UnidirectionalConnect. -func (e *endpoint) UnidirectionalConnect() (transport.ConnectedEndpoint, *syserr.Error) { +func (e *endpoint) UnidirectionalConnect(ctx context.Context) (transport.ConnectedEndpoint, *syserr.Error) { hostFile, err := e.file.Connect(p9.DgramSocket) if err != nil { return nil, syserr.ErrConnectionRefused } - c, serr := host.NewConnectedEndpoint(hostFile, &waiter.Queue{}, e.path) + c, serr := host.NewConnectedEndpoint(ctx, hostFile, &waiter.Queue{}, e.path) if serr != nil { log.Warningf("Gofer returned invalid host socket for UnidirectionalConnect; file %+v: %v", e.file, serr) return nil, serr diff --git a/pkg/sentry/fs/gofer/util.go b/pkg/sentry/fs/gofer/util.go index d0e1096ce..848e6812b 100644 --- a/pkg/sentry/fs/gofer/util.go +++ b/pkg/sentry/fs/gofer/util.go @@ -17,9 +17,9 @@ package gofer import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" ) func utimes(ctx context.Context, file contextFile, ts fs.TimeSpec) error { diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index ea2ca11bf..b1080fb1a 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -22,7 +22,7 @@ go_library( "util.go", "util_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/host", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/host/control.go b/pkg/sentry/fs/host/control.go index 9ebb9bbb3..5532ff5a0 100644 --- a/pkg/sentry/fs/host/control.go +++ b/pkg/sentry/fs/host/control.go @@ -17,10 +17,10 @@ package host import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/socket/control" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" ) type scmRights struct { diff --git a/pkg/sentry/fs/host/descriptor.go b/pkg/sentry/fs/host/descriptor.go index ffcd57a94..2a4d1b291 100644 --- a/pkg/sentry/fs/host/descriptor.go +++ b/pkg/sentry/fs/host/descriptor.go @@ -19,9 +19,9 @@ import ( "path" "syscall" - "gvisor.googlesource.com/gvisor/pkg/fdnotifier" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/waiter" ) // descriptor wraps a host fd. diff --git a/pkg/sentry/fs/host/descriptor_test.go b/pkg/sentry/fs/host/descriptor_test.go index ff08e43af..4205981f5 100644 --- a/pkg/sentry/fs/host/descriptor_test.go +++ b/pkg/sentry/fs/host/descriptor_test.go @@ -20,8 +20,8 @@ import ( "syscall" "testing" - "gvisor.googlesource.com/gvisor/pkg/fdnotifier" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/waiter" ) func TestDescriptorRelease(t *testing.T) { diff --git a/pkg/sentry/fs/host/device.go b/pkg/sentry/fs/host/device.go index 055024c44..484f0b58b 100644 --- a/pkg/sentry/fs/host/device.go +++ b/pkg/sentry/fs/host/device.go @@ -15,7 +15,7 @@ package host import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/device" ) // hostFileDevice is the host file virtual device. diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go index ad0a3ec85..f6c626f2c 100644 --- a/pkg/sentry/fs/host/file.go +++ b/pkg/sentry/fs/host/file.go @@ -18,18 +18,18 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/fdnotifier" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/secio" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/secio" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // fileOperations implements fs.FileOperations for a host file descriptor. @@ -109,7 +109,7 @@ func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner iops := inode.InodeOperations.(*inodeOperations) name := fmt.Sprintf("host:[%d]", inode.StableAttr.InodeID) - dirent := fs.NewDirent(inode, name) + dirent := fs.NewDirent(ctx, inode, name) defer dirent.DecRef() if isTTY { @@ -179,7 +179,7 @@ func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer } // IterateDir implements fs.DirIterator.IterateDir. -func (f *fileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) { +func (f *fileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) { if f.dirinfo == nil { f.dirinfo = new(dirInfo) f.dirinfo.buf = make([]byte, usermem.PageSize) diff --git a/pkg/sentry/fs/host/fs.go b/pkg/sentry/fs/host/fs.go index b1b8dc0b6..68d2697c0 100644 --- a/pkg/sentry/fs/host/fs.go +++ b/pkg/sentry/fs/host/fs.go @@ -23,9 +23,9 @@ import ( "strconv" "strings" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // FilesystemName is the name under which Filesystem is registered. @@ -262,7 +262,7 @@ func childDentAttrs(ctx context.Context, d *fs.Dirent) map[string]fs.DentAttr { // newMountSource constructs a new host fs.MountSource // relative to a root path. The root should match the mount point. func newMountSource(ctx context.Context, root string, mounter fs.FileOwner, filesystem fs.Filesystem, flags fs.MountSourceFlags, dontTranslateOwnership bool) *fs.MountSource { - return fs.NewMountSource(&superOperations{ + return fs.NewMountSource(ctx, &superOperations{ root: root, inodeMappings: make(map[uint64]string), mounter: mounter, diff --git a/pkg/sentry/fs/host/fs_test.go b/pkg/sentry/fs/host/fs_test.go index 16c89ddf1..c6852ee30 100644 --- a/pkg/sentry/fs/host/fs_test.go +++ b/pkg/sentry/fs/host/fs_test.go @@ -23,9 +23,9 @@ import ( "sort" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // newTestMountNamespace creates a MountNamespace with a ramfs root. diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go index 7a230e426..679d8321a 100644 --- a/pkg/sentry/fs/host/inode.go +++ b/pkg/sentry/fs/host/inode.go @@ -18,18 +18,18 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/secio" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/secio" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // inodeOperations implements fs.InodeOperations for an fs.Inodes backed @@ -205,7 +205,7 @@ func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool, } // Return the fs.Inode. - return fs.NewInode(iops, msrc, fileState.sattr), nil + return fs.NewInode(ctx, iops, msrc, fileState.sattr), nil } // Mappable implements fs.InodeOperations.Mappable. @@ -245,7 +245,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string } // Return the fs.Dirent. - return fs.NewDirent(inode, name), nil + return fs.NewDirent(ctx, inode, name), nil } // Create implements fs.InodeOperations.Create. @@ -265,7 +265,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string return nil, err } - d := fs.NewDirent(inode, name) + d := fs.NewDirent(ctx, inode, name) defer d.DecRef() return inode.GetFile(ctx, d, flags) } diff --git a/pkg/sentry/fs/host/inode_state.go b/pkg/sentry/fs/host/inode_state.go index 26cc755bc..b267ec305 100644 --- a/pkg/sentry/fs/host/inode_state.go +++ b/pkg/sentry/fs/host/inode_state.go @@ -18,9 +18,9 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // beforeSave is invoked by stateify. diff --git a/pkg/sentry/fs/host/inode_test.go b/pkg/sentry/fs/host/inode_test.go index ad1878b5a..2d959f10d 100644 --- a/pkg/sentry/fs/host/inode_test.go +++ b/pkg/sentry/fs/host/inode_test.go @@ -21,8 +21,8 @@ import ( "syscall" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // TestMultipleReaddir verifies that multiple Readdir calls return the same @@ -56,7 +56,7 @@ func TestMultipleReaddir(t *testing.T) { t.Fatalf("Failed to create inode: %v", err) } - dirent := fs.NewDirent(n, "readdir") + dirent := fs.NewDirent(ctx, n, "readdir") openFile, err := n.GetFile(ctx, dirent, fs.FileFlags{Read: true}) if err != nil { t.Fatalf("Failed to get file: %v", err) @@ -64,12 +64,12 @@ func TestMultipleReaddir(t *testing.T) { defer openFile.DecRef() c1 := &fs.DirCtx{DirCursor: new(string)} - if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, c1, 0); err != nil { + if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c1, 0); err != nil { t.Fatalf("First Readdir failed: %v", err) } c2 := &fs.DirCtx{DirCursor: new(string)} - if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, c2, 0); err != nil { + if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c2, 0); err != nil { t.Errorf("Second Readdir failed: %v", err) } diff --git a/pkg/sentry/fs/host/ioctl_unsafe.go b/pkg/sentry/fs/host/ioctl_unsafe.go index b5a85c4d9..271582e54 100644 --- a/pkg/sentry/fs/host/ioctl_unsafe.go +++ b/pkg/sentry/fs/host/ioctl_unsafe.go @@ -18,7 +18,7 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) func ioctlGetTermios(fd int) (*linux.Termios, error) { diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go index 305eea718..44c4ee5f2 100644 --- a/pkg/sentry/fs/host/socket.go +++ b/pkg/sentry/fs/host/socket.go @@ -19,22 +19,22 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/fdnotifier" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/control" - unixsocket "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/unet" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/socket/control" + unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/waiter" ) // maxSendBufferSize is the maximum host send buffer size allowed for endpoint. @@ -47,12 +47,12 @@ const maxSendBufferSize = 8 << 20 // // +stateify savable type ConnectedEndpoint struct { - queue *waiter.Queue - path string - // ref keeps track of references to a connectedEndpoint. ref refs.AtomicRefCount + queue *waiter.Queue + path string + // If srfd >= 0, it is the host FD that file was imported from. srfd int `state:"wait"` @@ -118,7 +118,7 @@ func (c *ConnectedEndpoint) init() *syserr.Error { // The caller is responsible for calling Init(). Additionaly, Release needs to // be called twice because ConnectedEndpoint is both a transport.Receiver and // transport.ConnectedEndpoint. -func NewConnectedEndpoint(file *fd.FD, queue *waiter.Queue, path string) (*ConnectedEndpoint, *syserr.Error) { +func NewConnectedEndpoint(ctx context.Context, file *fd.FD, queue *waiter.Queue, path string) (*ConnectedEndpoint, *syserr.Error) { e := ConnectedEndpoint{ path: path, queue: queue, @@ -133,6 +133,8 @@ func NewConnectedEndpoint(file *fd.FD, queue *waiter.Queue, path string) (*Conne // AtomicRefCounters start off with a single reference. We need two. e.ref.IncRef() + e.ref.EnableLeakCheck("host.ConnectedEndpoint") + return &e, nil } @@ -151,7 +153,7 @@ func (c *ConnectedEndpoint) Init() { func NewSocketWithDirent(ctx context.Context, d *fs.Dirent, f *fd.FD, flags fs.FileFlags) (*fs.File, error) { f2 := fd.New(f.FD()) var q waiter.Queue - e, err := NewConnectedEndpoint(f2, &q, "" /* path */) + e, err := NewConnectedEndpoint(ctx, f2, &q, "" /* path */) if err != nil { f2.Release() return nil, err.ToError() @@ -162,7 +164,7 @@ func NewSocketWithDirent(ctx context.Context, d *fs.Dirent, f *fd.FD, flags fs.F e.Init() - ep := transport.NewExternal(e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e) + ep := transport.NewExternal(ctx, e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e) return unixsocket.NewWithDirent(ctx, d, ep, e.stype, flags), nil } @@ -181,7 +183,7 @@ func newSocket(ctx context.Context, orgfd int, saveable bool) (*fs.File, error) } f := fd.New(ownedfd) var q waiter.Queue - e, err := NewConnectedEndpoint(f, &q, "" /* path */) + e, err := NewConnectedEndpoint(ctx, f, &q, "" /* path */) if err != nil { if saveable { f.Close() @@ -194,7 +196,7 @@ func newSocket(ctx context.Context, orgfd int, saveable bool) (*fs.File, error) e.srfd = srfd e.Init() - ep := transport.NewExternal(e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e) + ep := transport.NewExternal(ctx, e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e) return unixsocket.New(ctx, ep, e.stype), nil } diff --git a/pkg/sentry/fs/host/socket_iovec.go b/pkg/sentry/fs/host/socket_iovec.go index 5efbb3ae8..05d7c79ad 100644 --- a/pkg/sentry/fs/host/socket_iovec.go +++ b/pkg/sentry/fs/host/socket_iovec.go @@ -17,8 +17,8 @@ package host import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/syserror" ) // maxIovs is the maximum number of iovecs to pass to the host. diff --git a/pkg/sentry/fs/host/socket_state.go b/pkg/sentry/fs/host/socket_state.go index 5676c451a..498018f0a 100644 --- a/pkg/sentry/fs/host/socket_state.go +++ b/pkg/sentry/fs/host/socket_state.go @@ -18,7 +18,7 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/fd" ) // beforeSave is invoked by stateify. diff --git a/pkg/sentry/fs/host/socket_test.go b/pkg/sentry/fs/host/socket_test.go index bc3ce5627..68b38fd1c 100644 --- a/pkg/sentry/fs/host/socket_test.go +++ b/pkg/sentry/fs/host/socket_test.go @@ -19,16 +19,16 @@ import ( "syscall" "testing" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/fdnotifier" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/waiter" ) var ( diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index e45b339f5..2526412a4 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -17,14 +17,14 @@ package host import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/unimpl" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // TTYFileOperations implements fs.FileOperations for a host file descriptor @@ -114,7 +114,7 @@ func (t *TTYFileOperations) Release() { } // Ioctl implements fs.FileOperations.Ioctl. -func (t *TTYFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { // Ignore arg[0]. This is the real FD: fd := t.fileOperations.iops.fileState.FD() ioctl := args[1].Uint64() diff --git a/pkg/sentry/fs/host/util.go b/pkg/sentry/fs/host/util.go index 94ff7708e..bad61a9a1 100644 --- a/pkg/sentry/fs/host/util.go +++ b/pkg/sentry/fs/host/util.go @@ -19,13 +19,13 @@ import ( "path" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/syserror" ) func open(parent *inodeOperations, name string) (int, error) { diff --git a/pkg/sentry/fs/host/util_unsafe.go b/pkg/sentry/fs/host/util_unsafe.go index b95a57c3f..2b76f1065 100644 --- a/pkg/sentry/fs/host/util_unsafe.go +++ b/pkg/sentry/fs/host/util_unsafe.go @@ -18,9 +18,9 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/fs" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" ) // NulByte is a single NUL byte. It is passed to readlinkat as an empty string. diff --git a/pkg/sentry/fs/host/wait_test.go b/pkg/sentry/fs/host/wait_test.go index afcb74724..88d24d693 100644 --- a/pkg/sentry/fs/host/wait_test.go +++ b/pkg/sentry/fs/host/wait_test.go @@ -19,9 +19,9 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/waiter" ) func TestWait(t *testing.T) { diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index 0b54c2e77..f4ddfa406 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -15,16 +15,18 @@ package fs import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/metric" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "sync" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/metric" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs/lock" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/syserror" ) var opens = metric.MustCreateNewUint64Metric("/fs/opens", false /* sync */, "Number of file opens.") @@ -55,6 +57,12 @@ type Inode struct { // overlay is the overlay entry for this Inode. overlay *overlayEntry + + // appendMu is used to synchronize write operations into files which + // have been opened with O_APPEND. Operations which change a file size + // have to take this lock for read. Write operations to files with + // O_APPEND have to take this lock for write. + appendMu sync.RWMutex `state:"nosave"` } // LockCtx is an Inode's lock context and contains different personalities of locks; both @@ -76,14 +84,16 @@ type LockCtx struct { // NewInode constructs an Inode from InodeOperations, a MountSource, and stable attributes. // // NewInode takes a reference on msrc. -func NewInode(iops InodeOperations, msrc *MountSource, sattr StableAttr) *Inode { +func NewInode(ctx context.Context, iops InodeOperations, msrc *MountSource, sattr StableAttr) *Inode { msrc.IncRef() - return &Inode{ + i := Inode{ InodeOperations: iops, StableAttr: sattr, Watches: newWatches(), MountSource: msrc, } + i.EnableLeakCheck("fs.Inode") + return &i } // DecRef drops a reference on the Inode. @@ -337,6 +347,8 @@ func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error { if i.overlay != nil { return overlayTruncate(ctx, i.overlay, d, size) } + i.appendMu.RLock() + defer i.appendMu.RUnlock() return i.InodeOperations.Truncate(ctx, i, size) } @@ -438,3 +450,12 @@ func (i *Inode) CheckCapability(ctx context.Context, cp linux.Capability) bool { } return creds.HasCapability(cp) } + +func (i *Inode) lockAppendMu(appendMode bool) func() { + if appendMode { + i.appendMu.Lock() + return i.appendMu.Unlock + } + i.appendMu.RLock() + return i.appendMu.RUnlock +} diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go index ea089dfae..5cde9d215 100644 --- a/pkg/sentry/fs/inode_operations.go +++ b/pkg/sentry/fs/inode_operations.go @@ -17,10 +17,10 @@ package fs import ( "errors" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/context" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" ) var ( diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index 06506fb20..24b769cfc 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -17,11 +17,11 @@ package fs import ( "strings" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/syserror" ) func overlayHasWhiteout(parent *Inode, name string) bool { @@ -111,7 +111,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name parent.copyMu.RUnlock() return nil, false, err } - d, err := NewDirent(newOverlayInode(ctx, entry, inode.MountSource), name), nil + d, err := NewDirent(ctx, newOverlayInode(ctx, entry, inode.MountSource), name), nil parent.copyMu.RUnlock() return d, true, err } @@ -201,7 +201,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name parent.copyMu.RUnlock() return nil, false, err } - d, err := NewDirent(newOverlayInode(ctx, entry, inode.MountSource), name), nil + d, err := NewDirent(ctx, newOverlayInode(ctx, entry, inode.MountSource), name), nil parent.copyMu.RUnlock() return d, upperInode != nil, err } @@ -217,6 +217,9 @@ func overlayCreate(ctx context.Context, o *overlayEntry, parent *Dirent, name st return nil, err } + // We've added to the directory so we must drop the cache. + o.markDirectoryDirty() + // Take another reference on the upper file's inode, which will be // owned by the overlay entry. upperFile.Dirent.Inode.IncRef() @@ -245,7 +248,7 @@ func overlayCreate(ctx context.Context, o *overlayEntry, parent *Dirent, name st // overlay file. overlayInode := newOverlayInode(ctx, entry, parent.Inode.MountSource) // d will own the inode reference. - overlayDirent := NewDirent(overlayInode, name) + overlayDirent := NewDirent(ctx, overlayInode, name) // The overlay file created below with NewFile will take a reference on // the overlayDirent, and it should be the only thing holding a // reference at the time of creation, so we must drop this reference. @@ -265,7 +268,12 @@ func overlayCreateDirectory(ctx context.Context, o *overlayEntry, parent *Dirent if err := copyUpLockedForRename(ctx, parent); err != nil { return err } - return o.upper.InodeOperations.CreateDirectory(ctx, o.upper, name, perm) + if err := o.upper.InodeOperations.CreateDirectory(ctx, o.upper, name, perm); err != nil { + return err + } + // We've added to the directory so we must drop the cache. + o.markDirectoryDirty() + return nil } func overlayCreateLink(ctx context.Context, o *overlayEntry, parent *Dirent, oldname string, newname string) error { @@ -273,7 +281,12 @@ func overlayCreateLink(ctx context.Context, o *overlayEntry, parent *Dirent, old if err := copyUpLockedForRename(ctx, parent); err != nil { return err } - return o.upper.InodeOperations.CreateLink(ctx, o.upper, oldname, newname) + if err := o.upper.InodeOperations.CreateLink(ctx, o.upper, oldname, newname); err != nil { + return err + } + // We've added to the directory so we must drop the cache. + o.markDirectoryDirty() + return nil } func overlayCreateHardLink(ctx context.Context, o *overlayEntry, parent *Dirent, target *Dirent, name string) error { @@ -285,7 +298,12 @@ func overlayCreateHardLink(ctx context.Context, o *overlayEntry, parent *Dirent, if err := copyUpLockedForRename(ctx, target); err != nil { return err } - return o.upper.InodeOperations.CreateHardLink(ctx, o.upper, target.Inode.overlay.upper, name) + if err := o.upper.InodeOperations.CreateHardLink(ctx, o.upper, target.Inode.overlay.upper, name); err != nil { + return err + } + // We've added to the directory so we must drop the cache. + o.markDirectoryDirty() + return nil } func overlayCreateFifo(ctx context.Context, o *overlayEntry, parent *Dirent, name string, perm FilePermissions) error { @@ -293,7 +311,12 @@ func overlayCreateFifo(ctx context.Context, o *overlayEntry, parent *Dirent, nam if err := copyUpLockedForRename(ctx, parent); err != nil { return err } - return o.upper.InodeOperations.CreateFifo(ctx, o.upper, name, perm) + if err := o.upper.InodeOperations.CreateFifo(ctx, o.upper, name, perm); err != nil { + return err + } + // We've added to the directory so we must drop the cache. + o.markDirectoryDirty() + return nil } func overlayRemove(ctx context.Context, o *overlayEntry, parent *Dirent, child *Dirent) error { @@ -318,6 +341,8 @@ func overlayRemove(ctx context.Context, o *overlayEntry, parent *Dirent, child * if child.Inode.overlay.lowerExists { return overlayCreateWhiteout(o.upper, child.name) } + // We've removed from the directory so we must drop the cache. + o.markDirectoryDirty() return nil } @@ -395,6 +420,8 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena if renamed.Inode.overlay.lowerExists { return overlayCreateWhiteout(oldParent.Inode.overlay.upper, oldName) } + // We've changed the directory so we must drop the cache. + o.markDirectoryDirty() return nil } @@ -411,6 +438,9 @@ func overlayBind(ctx context.Context, o *overlayEntry, parent *Dirent, name stri return nil, err } + // We've added to the directory so we must drop the cache. + o.markDirectoryDirty() + // Grab the inode and drop the dirent, we don't need it. inode := d.Inode inode.IncRef() @@ -422,7 +452,7 @@ func overlayBind(ctx context.Context, o *overlayEntry, parent *Dirent, name stri inode.DecRef() return nil, err } - return NewDirent(newOverlayInode(ctx, entry, inode.MountSource), name), nil + return NewDirent(ctx, newOverlayInode(ctx, entry, inode.MountSource), name), nil } func overlayBoundEndpoint(o *overlayEntry, path string) transport.BoundEndpoint { @@ -648,13 +678,13 @@ func NewTestOverlayDir(ctx context.Context, upper, lower *Inode, revalidate bool fs := &overlayFilesystem{} var upperMsrc *MountSource if revalidate { - upperMsrc = NewRevalidatingMountSource(fs, MountSourceFlags{}) + upperMsrc = NewRevalidatingMountSource(ctx, fs, MountSourceFlags{}) } else { - upperMsrc = NewNonCachingMountSource(fs, MountSourceFlags{}) + upperMsrc = NewNonCachingMountSource(ctx, fs, MountSourceFlags{}) } - msrc := NewMountSource(&overlayMountSourceOperations{ + msrc := NewMountSource(ctx, &overlayMountSourceOperations{ upper: upperMsrc, - lower: NewNonCachingMountSource(fs, MountSourceFlags{}), + lower: NewNonCachingMountSource(ctx, fs, MountSourceFlags{}), }, fs, MountSourceFlags{}) overlay := &overlayEntry{ upper: upper, diff --git a/pkg/sentry/fs/inode_overlay_test.go b/pkg/sentry/fs/inode_overlay_test.go index 52ce1d29e..8935aad65 100644 --- a/pkg/sentry/fs/inode_overlay_test.go +++ b/pkg/sentry/fs/inode_overlay_test.go @@ -17,12 +17,12 @@ package fs_test import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" + "gvisor.dev/gvisor/pkg/syserror" ) func TestLookup(t *testing.T) { @@ -275,12 +275,12 @@ func TestLookupRevalidation(t *testing.T) { }, } { t.Run(tc.desc, func(t *testing.T) { - root := fs.NewDirent(newTestRamfsDir(ctx, nil, nil), "root") + root := fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root") ctx = &rootContext{ Context: ctx, root: root, } - overlay := fs.NewDirent(fs.NewTestOverlayDir(ctx, tc.upper, tc.lower, tc.revalidate), "overlay") + overlay := fs.NewDirent(ctx, fs.NewTestOverlayDir(ctx, tc.upper, tc.lower, tc.revalidate), "overlay") // Lookup the file twice through the overlay. first, err := overlay.Walk(ctx, root, fileName) if err != nil { @@ -442,7 +442,7 @@ func (f *dirFile) Readdir(ctx context.Context, file *fs.File, ser fs.DentrySeria } func newTestRamfsInode(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - inode := fs.NewInode(&inode{ + inode := fs.NewInode(ctx, &inode{ InodeStaticFileGetter: fsutil.InodeStaticFileGetter{ Contents: []byte("foobar"), }, @@ -451,7 +451,7 @@ func newTestRamfsInode(ctx context.Context, msrc *fs.MountSource) *fs.Inode { } func newTestRamfsDir(ctx context.Context, contains []dirContent, negative []string) *fs.Inode { - msrc := fs.NewPseudoMountSource() + msrc := fs.NewPseudoMountSource(ctx) contents := make(map[string]*fs.Inode) for _, c := range contains { if c.dir { @@ -463,7 +463,7 @@ func newTestRamfsDir(ctx context.Context, contains []dirContent, negative []stri dops := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermissions{ User: fs.PermMask{Read: true, Execute: true}, }) - return fs.NewInode(&dir{ + return fs.NewInode(ctx, &dir{ InodeOperations: dops, negative: negative, }, msrc, fs.StableAttr{Type: fs.Directory}) diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go index 7dfd31020..c7f4e2d13 100644 --- a/pkg/sentry/fs/inotify.go +++ b/pkg/sentry/fs/inotify.go @@ -18,14 +18,14 @@ import ( "sync" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // Inotify represents an inotify instance created by inotify_init(2) or @@ -202,7 +202,7 @@ func (i *Inotify) UnstableAttr(ctx context.Context, file *File) (UnstableAttr, e } // Ioctl implements fs.FileOperations.Ioctl. -func (i *Inotify) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (i *Inotify) Ioctl(ctx context.Context, _ *File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { switch args[1].Int() { case linux.FIONREAD: i.evMu.Lock() diff --git a/pkg/sentry/fs/inotify_event.go b/pkg/sentry/fs/inotify_event.go index d52f956e4..9f70a3e82 100644 --- a/pkg/sentry/fs/inotify_event.go +++ b/pkg/sentry/fs/inotify_event.go @@ -18,8 +18,8 @@ import ( "bytes" "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // inotifyEventBaseSize is the base size of linux's struct inotify_event. This diff --git a/pkg/sentry/fs/inotify_watch.go b/pkg/sentry/fs/inotify_watch.go index a0b488467..0aa0a5e9b 100644 --- a/pkg/sentry/fs/inotify_watch.go +++ b/pkg/sentry/fs/inotify_watch.go @@ -18,7 +18,7 @@ import ( "sync" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) // Watch represent a particular inotify watch created by inotify_add_watch. diff --git a/pkg/sentry/fs/lock/BUILD b/pkg/sentry/fs/lock/BUILD index 7164744b8..08d7c0c57 100644 --- a/pkg/sentry/fs/lock/BUILD +++ b/pkg/sentry/fs/lock/BUILD @@ -39,7 +39,7 @@ go_library( "lock_set.go", "lock_set_functions.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/lock", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/log", diff --git a/pkg/sentry/fs/lock/lock.go b/pkg/sentry/fs/lock/lock.go index f2aee4512..636484424 100644 --- a/pkg/sentry/fs/lock/lock.go +++ b/pkg/sentry/fs/lock/lock.go @@ -55,7 +55,7 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/waiter" ) // LockType is a type of regional file lock. @@ -134,7 +134,7 @@ const ( // LockRegion attempts to acquire a typed lock for the uid on a region // of a file. Returns true if successful in locking the region. If false // is returned, the caller should normally interpret this as "try again later" if -// accquiring the lock in a non-blocking mode or "interrupted" if in a blocking mode. +// acquiring the lock in a non-blocking mode or "interrupted" if in a blocking mode. // Blocker is the interface used to provide blocking behavior, passing a nil Blocker // will result in non-blocking behavior. func (l *Locks) LockRegion(uid UniqueID, t LockType, r LockRange, block Blocker) bool { diff --git a/pkg/sentry/fs/mock.go b/pkg/sentry/fs/mock.go index ff04e9b22..7a24c6f1b 100644 --- a/pkg/sentry/fs/mock.go +++ b/pkg/sentry/fs/mock.go @@ -15,8 +15,8 @@ package fs import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/syserror" ) // MockInodeOperations implements InodeOperations for testing Inodes. @@ -34,7 +34,7 @@ type MockInodeOperations struct { // NewMockInode returns a mock *Inode using MockInodeOperations. func NewMockInode(ctx context.Context, msrc *MountSource, sattr StableAttr) *Inode { - return NewInode(NewMockInodeOperations(ctx), msrc, sattr) + return NewInode(ctx, NewMockInodeOperations(ctx), msrc, sattr) } // NewMockInodeOperations returns a *MockInodeOperations. @@ -75,6 +75,12 @@ func (n *MockMountSourceOps) Keep(dirent *Dirent) bool { return n.keep } +// CacheReaddir implements fs.MountSourceOperations.CacheReaddir. +func (n *MockMountSourceOps) CacheReaddir() bool { + // Common case: cache readdir results if there is a dirent cache. + return n.keep +} + // WriteOut implements fs.InodeOperations.WriteOut. func (n *MockInodeOperations) WriteOut(context.Context, *Inode) error { return nil @@ -93,7 +99,7 @@ func (n *MockInodeOperations) IsVirtual() bool { // Lookup implements fs.InodeOperations.Lookup. func (n *MockInodeOperations) Lookup(ctx context.Context, dir *Inode, p string) (*Dirent, error) { n.walkCalled = true - return NewDirent(NewInode(&MockInodeOperations{}, dir.MountSource, StableAttr{}), p), nil + return NewDirent(ctx, NewInode(ctx, &MockInodeOperations{}, dir.MountSource, StableAttr{}), p), nil } // SetPermissions implements fs.InodeOperations.SetPermissions. @@ -114,7 +120,7 @@ func (n *MockInodeOperations) SetTimestamps(context.Context, *Inode, TimeSpec) e // Create implements fs.InodeOperations.Create. func (n *MockInodeOperations) Create(ctx context.Context, dir *Inode, p string, flags FileFlags, perms FilePermissions) (*File, error) { n.createCalled = true - d := NewDirent(NewInode(&MockInodeOperations{}, dir.MountSource, StableAttr{}), p) + d := NewDirent(ctx, NewInode(ctx, &MockInodeOperations{}, dir.MountSource, StableAttr{}), p) return &File{Dirent: d}, nil } diff --git a/pkg/sentry/fs/mount.go b/pkg/sentry/fs/mount.go index 41e0d285b..7a9692800 100644 --- a/pkg/sentry/fs/mount.go +++ b/pkg/sentry/fs/mount.go @@ -19,12 +19,12 @@ import ( "fmt" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" ) -// DirentOperations provide file systems greater control over how long a Dirent stays pinned -// in core. Implementations must not take Dirent.mu. +// DirentOperations provide file systems greater control over how long a Dirent +// stays pinned in core. Implementations must not take Dirent.mu. type DirentOperations interface { // Revalidate is called during lookup each time we encounter a Dirent // in the cache. Implementations may update stale properties of the @@ -37,6 +37,12 @@ type DirentOperations interface { // Keep returns true if the Dirent should be kept in memory for as long // as possible beyond any active references. Keep(dirent *Dirent) bool + + // CacheReaddir returns true if directory entries returned by + // FileOperations.Readdir may be cached for future use. + // + // Postconditions: This method must always return the same value. + CacheReaddir() bool } // MountSourceOperations contains filesystem specific operations. @@ -127,17 +133,19 @@ const DefaultDirentCacheSize uint64 = 1000 // NewMountSource returns a new MountSource. Filesystem may be nil if there is no // filesystem backing the mount. -func NewMountSource(mops MountSourceOperations, filesystem Filesystem, flags MountSourceFlags) *MountSource { +func NewMountSource(ctx context.Context, mops MountSourceOperations, filesystem Filesystem, flags MountSourceFlags) *MountSource { fsType := "none" if filesystem != nil { fsType = filesystem.Name() } - return &MountSource{ + msrc := MountSource{ MountSourceOperations: mops, Flags: flags, FilesystemType: fsType, fscache: NewDirentCache(DefaultDirentCacheSize), } + msrc.EnableLeakCheck("fs.MountSource") + return &msrc } // DirentRefs returns the current mount direntRefs. @@ -188,36 +196,40 @@ func (msrc *MountSource) SetDirentCacheLimiter(l *DirentCacheLimiter) { // NewCachingMountSource returns a generic mount that will cache dirents // aggressively. -func NewCachingMountSource(filesystem Filesystem, flags MountSourceFlags) *MountSource { - return NewMountSource(&SimpleMountSourceOperations{ - keep: true, - revalidate: false, +func NewCachingMountSource(ctx context.Context, filesystem Filesystem, flags MountSourceFlags) *MountSource { + return NewMountSource(ctx, &SimpleMountSourceOperations{ + keep: true, + revalidate: false, + cacheReaddir: true, }, filesystem, flags) } // NewNonCachingMountSource returns a generic mount that will never cache dirents. -func NewNonCachingMountSource(filesystem Filesystem, flags MountSourceFlags) *MountSource { - return NewMountSource(&SimpleMountSourceOperations{ - keep: false, - revalidate: false, +func NewNonCachingMountSource(ctx context.Context, filesystem Filesystem, flags MountSourceFlags) *MountSource { + return NewMountSource(ctx, &SimpleMountSourceOperations{ + keep: false, + revalidate: false, + cacheReaddir: false, }, filesystem, flags) } // NewRevalidatingMountSource returns a generic mount that will cache dirents, -// but will revalidate them on each lookup. -func NewRevalidatingMountSource(filesystem Filesystem, flags MountSourceFlags) *MountSource { - return NewMountSource(&SimpleMountSourceOperations{ - keep: true, - revalidate: true, +// but will revalidate them on each lookup and always perform uncached readdir. +func NewRevalidatingMountSource(ctx context.Context, filesystem Filesystem, flags MountSourceFlags) *MountSource { + return NewMountSource(ctx, &SimpleMountSourceOperations{ + keep: true, + revalidate: true, + cacheReaddir: false, }, filesystem, flags) } // NewPseudoMountSource returns a "pseudo" mount source that is not backed by // an actual filesystem. It is always non-caching. -func NewPseudoMountSource() *MountSource { - return NewMountSource(&SimpleMountSourceOperations{ - keep: false, - revalidate: false, +func NewPseudoMountSource(ctx context.Context) *MountSource { + return NewMountSource(ctx, &SimpleMountSourceOperations{ + keep: false, + revalidate: false, + cacheReaddir: false, }, nil, MountSourceFlags{}) } @@ -225,8 +237,9 @@ func NewPseudoMountSource() *MountSource { // // +stateify savable type SimpleMountSourceOperations struct { - keep bool - revalidate bool + keep bool + revalidate bool + cacheReaddir bool } // Revalidate implements MountSourceOperations.Revalidate. @@ -239,6 +252,11 @@ func (smo *SimpleMountSourceOperations) Keep(*Dirent) bool { return smo.keep } +// CacheReaddir implements MountSourceOperations.CacheReaddir. +func (smo *SimpleMountSourceOperations) CacheReaddir() bool { + return smo.cacheReaddir +} + // ResetInodeMappings implements MountSourceOperations.ResetInodeMappings. func (*SimpleMountSourceOperations) ResetInodeMappings() {} diff --git a/pkg/sentry/fs/mount_overlay.go b/pkg/sentry/fs/mount_overlay.go index 535f812c8..4fcdd6c01 100644 --- a/pkg/sentry/fs/mount_overlay.go +++ b/pkg/sentry/fs/mount_overlay.go @@ -15,7 +15,7 @@ package fs import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context" ) // overlayMountSourceOperations implements MountSourceOperations for an overlay @@ -28,10 +28,10 @@ type overlayMountSourceOperations struct { lower *MountSource } -func newOverlayMountSource(upper, lower *MountSource, flags MountSourceFlags) *MountSource { +func newOverlayMountSource(ctx context.Context, upper, lower *MountSource, flags MountSourceFlags) *MountSource { upper.IncRef() lower.IncRef() - msrc := NewMountSource(&overlayMountSourceOperations{ + msrc := NewMountSource(ctx, &overlayMountSourceOperations{ upper: upper, lower: lower, }, &overlayFilesystem{}, flags) @@ -81,6 +81,17 @@ func (o *overlayMountSourceOperations) Keep(dirent *Dirent) bool { return o.upper.Keep(dirent) } +// CacheReaddir implements MountSourceOperations.CacheReaddir for an overlay by +// performing the logical AND of the upper and lower filesystems' CacheReaddir +// methods. +// +// N.B. This is fs-global instead of inode-specific because it must always +// return the same value. If it was inode-specific, we couldn't guarantee that +// property across copy up. +func (o *overlayMountSourceOperations) CacheReaddir() bool { + return o.lower.CacheReaddir() && o.upper.CacheReaddir() +} + // ResetInodeMappings propagates the call to both upper and lower MountSource. func (o *overlayMountSourceOperations) ResetInodeMappings() { o.upper.ResetInodeMappings() diff --git a/pkg/sentry/fs/mount_test.go b/pkg/sentry/fs/mount_test.go index 2e2716643..0b84732aa 100644 --- a/pkg/sentry/fs/mount_test.go +++ b/pkg/sentry/fs/mount_test.go @@ -18,7 +18,7 @@ import ( "fmt" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" ) // cacheReallyContains iterates through the dirent cache to determine whether diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go index a5c52d7ba..693ffc760 100644 --- a/pkg/sentry/fs/mounts.go +++ b/pkg/sentry/fs/mounts.go @@ -22,12 +22,12 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" ) // DefaultTraversalLimit provides a sensible default traversal limit that may @@ -124,7 +124,16 @@ func (m *Mount) IsUndo() bool { return false } -// MountNamespace defines a collection of mounts. +// MountNamespace defines a VFS root. It contains collection of Mounts that are +// mounted inside the Dirent tree rooted at the Root Dirent. It provides +// methods for traversing the Dirent, and for mounting/unmounting in the tree. +// +// Note that this does not correspond to a "mount namespace" in the Linux. It +// is more like a unique VFS instance. +// +// It's possible for different processes to have different MountNamespaces. In +// this case, the file systems exposed to the processes are completely +// distinct. // // +stateify savable type MountNamespace struct { @@ -166,18 +175,20 @@ func NewMountNamespace(ctx context.Context, root *Inode) (*MountNamespace, error // Set the root dirent and id on the root mount. The reference returned from // NewDirent will be donated to the MountNamespace constructed below. - d := NewDirent(root, "/") + d := NewDirent(ctx, root, "/") mnts := map[*Dirent]*Mount{ d: newRootMount(1, d), } - return &MountNamespace{ + mns := MountNamespace{ userns: creds.UserNamespace, root: d, mounts: mnts, mountID: 2, - }, nil + } + mns.EnableLeakCheck("fs.MountNamespace") + return &mns, nil } // UserNamespace returns the user namespace associated with this mount manager. @@ -652,6 +663,11 @@ func (mns *MountNamespace) ResolveExecutablePath(ctx context.Context, wd, name s } defer d.DecRef() + // Check that it is a regular file. + if !IsRegular(d.Inode.StableAttr) { + continue + } + // Check whether we can read and execute the found file. if err := d.Inode.CheckPermission(ctx, PermMask{Read: true, Execute: true}); err != nil { log.Infof("Found executable at %q, but user cannot execute it: %v", binPath, err) diff --git a/pkg/sentry/fs/mounts_test.go b/pkg/sentry/fs/mounts_test.go index 56d726dd1..c4c771f2c 100644 --- a/pkg/sentry/fs/mounts_test.go +++ b/pkg/sentry/fs/mounts_test.go @@ -17,11 +17,11 @@ package fs_test import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/contexttest" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" ) // Creates a new MountNamespace with filesystem: @@ -30,17 +30,17 @@ import ( // |-bar (file) func createMountNamespace(ctx context.Context) (*fs.MountNamespace, error) { perms := fs.FilePermsFromMode(0777) - m := fs.NewPseudoMountSource() + m := fs.NewPseudoMountSource(ctx) barFile := fsutil.NewSimpleFileInode(ctx, fs.RootOwner, perms, 0) fooDir := ramfs.NewDir(ctx, map[string]*fs.Inode{ - "bar": fs.NewInode(barFile, m, fs.StableAttr{Type: fs.RegularFile}), + "bar": fs.NewInode(ctx, barFile, m, fs.StableAttr{Type: fs.RegularFile}), }, fs.RootOwner, perms) rootDir := ramfs.NewDir(ctx, map[string]*fs.Inode{ - "foo": fs.NewInode(fooDir, m, fs.StableAttr{Type: fs.Directory}), + "foo": fs.NewInode(ctx, fooDir, m, fs.StableAttr{Type: fs.Directory}), }, fs.RootOwner, perms) - return fs.NewMountNamespace(ctx, fs.NewInode(rootDir, m, fs.StableAttr{Type: fs.Directory})) + return fs.NewMountNamespace(ctx, fs.NewInode(ctx, rootDir, m, fs.StableAttr{Type: fs.Directory})) } func TestFindLink(t *testing.T) { diff --git a/pkg/sentry/fs/offset.go b/pkg/sentry/fs/offset.go index 3f68da149..f7d844ce7 100644 --- a/pkg/sentry/fs/offset.go +++ b/pkg/sentry/fs/offset.go @@ -17,7 +17,7 @@ package fs import ( "math" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // OffsetPageEnd returns the file offset rounded up to the nearest diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go index db89a5f70..1d3ff39e0 100644 --- a/pkg/sentry/fs/overlay.go +++ b/pkg/sentry/fs/overlay.go @@ -19,11 +19,12 @@ import ( "strings" "sync" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/third_party/gvsync" ) // The virtual filesystem implements an overlay configuration. For a high-level @@ -104,7 +105,7 @@ func NewOverlayRoot(ctx context.Context, upper *Inode, lower *Inode, flags Mount return nil, fmt.Errorf("cannot nest overlay in upper file of another overlay") } - msrc := newOverlayMountSource(upper.MountSource, lower.MountSource, flags) + msrc := newOverlayMountSource(ctx, upper.MountSource, lower.MountSource, flags) overlay, err := newOverlayEntry(ctx, upper, lower, true) if err != nil { msrc.DecRef() @@ -127,7 +128,7 @@ func NewOverlayRootFile(ctx context.Context, upperMS *MountSource, lower *Inode, if !IsRegular(lower.StableAttr) { return nil, fmt.Errorf("lower Inode is not a regular file") } - msrc := newOverlayMountSource(upperMS, lower.MountSource, flags) + msrc := newOverlayMountSource(ctx, upperMS, lower.MountSource, flags) overlay, err := newOverlayEntry(ctx, nil, lower, true) if err != nil { msrc.DecRef() @@ -140,9 +141,9 @@ func NewOverlayRootFile(ctx context.Context, upperMS *MountSource, lower *Inode, func newOverlayInode(ctx context.Context, o *overlayEntry, msrc *MountSource) *Inode { var inode *Inode if o.upper != nil { - inode = NewInode(nil, msrc, o.upper.StableAttr) + inode = NewInode(ctx, nil, msrc, o.upper.StableAttr) } else { - inode = NewInode(nil, msrc, o.lower.StableAttr) + inode = NewInode(ctx, nil, msrc, o.lower.StableAttr) } inode.overlay = o return inode @@ -196,6 +197,12 @@ type overlayEntry struct { // these locks is sufficient to read upper; holding all three for writing // is required to mutate it. upper *Inode + + // dirCacheMu protects dirCache. + dirCacheMu gvsync.DowngradableRWMutex `state:"nosave"` + + // dirCache is cache of DentAttrs from upper and lower Inodes. + dirCache *SortedDentryMap } // newOverlayEntry returns a new overlayEntry. @@ -258,6 +265,17 @@ func (o *overlayEntry) isMappableLocked() bool { return o.inodeLocked().Mappable() != nil } +// markDirectoryDirty marks any cached data dirty for this directory. This is +// necessary in order to ensure that this node does not retain stale state +// throughout its lifetime across multiple open directory handles. +// +// Currently this means invalidating any readdir caches. +func (o *overlayEntry) markDirectoryDirty() { + o.dirCacheMu.Lock() + o.dirCache = nil + o.dirCacheMu.Unlock() +} + // AddMapping implements memmap.Mappable.AddMapping. func (o *overlayEntry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { o.mapsMu.Lock() diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index 1728fe0b5..70ed854a8 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -27,10 +27,11 @@ go_library( "uptime.go", "version.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/binary", "//pkg/log", "//pkg/sentry/context", "//pkg/sentry/fs", @@ -41,7 +42,6 @@ go_library( "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/time", "//pkg/sentry/limits", "//pkg/sentry/mm", diff --git a/pkg/sentry/fs/proc/cgroup.go b/pkg/sentry/fs/proc/cgroup.go index 1019f862a..05e31c55d 100644 --- a/pkg/sentry/fs/proc/cgroup.go +++ b/pkg/sentry/fs/proc/cgroup.go @@ -17,8 +17,8 @@ package proc import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" ) func newCGroupInode(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string]string) *fs.Inode { diff --git a/pkg/sentry/fs/proc/cpuinfo.go b/pkg/sentry/fs/proc/cpuinfo.go index 15031234e..3edf36780 100644 --- a/pkg/sentry/fs/proc/cpuinfo.go +++ b/pkg/sentry/fs/proc/cpuinfo.go @@ -15,9 +15,9 @@ package proc import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) func newCPUInfo(ctx context.Context, msrc *fs.MountSource) *fs.Inode { diff --git a/pkg/sentry/fs/proc/device/BUILD b/pkg/sentry/fs/proc/device/BUILD index 64b0c5a3a..0394451d4 100644 --- a/pkg/sentry/fs/proc/device/BUILD +++ b/pkg/sentry/fs/proc/device/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "device", srcs = ["device.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc/device", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/sentry/device"], ) diff --git a/pkg/sentry/fs/proc/device/device.go b/pkg/sentry/fs/proc/device/device.go index 0de466c73..bbe66e796 100644 --- a/pkg/sentry/fs/proc/device/device.go +++ b/pkg/sentry/fs/proc/device/device.go @@ -16,7 +16,7 @@ package device import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/device" ) // ProcDevice is the kernel proc device. diff --git a/pkg/sentry/fs/proc/exec_args.go b/pkg/sentry/fs/proc/exec_args.go index cb28f6bc3..1d3a2d426 100644 --- a/pkg/sentry/fs/proc/exec_args.go +++ b/pkg/sentry/fs/proc/exec_args.go @@ -19,14 +19,14 @@ import ( "fmt" "io" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // execArgType enumerates the types of exec arguments that are exposed through @@ -64,7 +64,7 @@ func newExecArgInode(t *kernel.Task, msrc *fs.MountSource, arg execArgType) *fs. arg: arg, t: t, } - return newProcInode(f, msrc, fs.SpecialFile, t) + return newProcInode(t, f, msrc, fs.SpecialFile, t) } // GetFile implements fs.InodeOperations.GetFile. diff --git a/pkg/sentry/fs/proc/fds.go b/pkg/sentry/fs/proc/fds.go index 744b31c74..bee421d76 100644 --- a/pkg/sentry/fs/proc/fds.go +++ b/pkg/sentry/fs/proc/fds.go @@ -19,14 +19,13 @@ import ( "sort" "strconv" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) // walkDescriptors finds the descriptor (file-flag pair) for the fd identified @@ -42,8 +41,8 @@ func walkDescriptors(t *kernel.Task, p string, toInode func(*fs.File, kernel.FDF var file *fs.File var fdFlags kernel.FDFlags t.WithMuLocked(func(t *kernel.Task) { - if fdm := t.FDMap(); fdm != nil { - file, fdFlags = fdm.GetDescriptor(kdefs.FD(n)) + if fdTable := t.FDTable(); fdTable != nil { + file, fdFlags = fdTable.Get(int32(n)) } }) if file == nil { @@ -56,36 +55,31 @@ func walkDescriptors(t *kernel.Task, p string, toInode func(*fs.File, kernel.FDF // toDentAttr callback for each to get a DentAttr, which it then emits. This is // a helper for implementing fs.InodeOperations.Readdir. func readDescriptors(t *kernel.Task, c *fs.DirCtx, offset int64, toDentAttr func(int) fs.DentAttr) (int64, error) { - var fds kernel.FDs + var fds []int32 t.WithMuLocked(func(t *kernel.Task) { - if fdm := t.FDMap(); fdm != nil { - fds = fdm.GetFDs() + if fdTable := t.FDTable(); fdTable != nil { + fds = fdTable.GetFDs() } }) - fdInts := make([]int, 0, len(fds)) - for _, fd := range fds { - fdInts = append(fdInts, int(fd)) - } - - // Find the fd to start at. - idx := sort.SearchInts(fdInts, int(offset)) - if idx == len(fdInts) { + // Find the appropriate starting point. + idx := sort.Search(len(fds), func(i int) bool { return fds[i] >= int32(offset) }) + if idx == len(fds) { return offset, nil } - fdInts = fdInts[idx:] + fds = fds[idx:] - var fd int - for _, fd = range fdInts { + // Serialize all FDs. + for _, fd := range fds { name := strconv.FormatUint(uint64(fd), 10) - if err := c.DirEmit(name, toDentAttr(fd)); err != nil { + if err := c.DirEmit(name, toDentAttr(int(fd))); err != nil { // Returned offset is the next fd to serialize. return int64(fd), err } } // We serialized them all. Next offset should be higher than last // serialized fd. - return int64(fd + 1), nil + return int64(fds[len(fds)-1] + 1), nil } // fd implements fs.InodeOperations for a file in /proc/TID/fd/. @@ -105,7 +99,7 @@ func newFd(t *kernel.Task, f *fs.File, msrc *fs.MountSource) *fs.Inode { Symlink: *ramfs.NewSymlink(t, fs.RootOwner, ""), file: f, } - return newProcInode(fd, msrc, fs.Symlink, t) + return newProcInode(t, fd, msrc, fs.Symlink, t) } // GetFile returns the fs.File backing this fd. The dirent and flags @@ -154,9 +148,9 @@ func (f *fd) Close() error { type fdDir struct { ramfs.Dir - // We hold a reference on the task's fdmap but only keep an indirect - // task pointer to avoid Dirent loading circularity caused by fdmap's - // potential back pointers into the dirent tree. + // We hold a reference on the task's FDTable but only keep an indirect + // task pointer to avoid Dirent loading circularity caused by the + // table's back pointers into the dirent tree. t *kernel.Task } @@ -168,7 +162,7 @@ func newFdDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { Dir: *ramfs.NewDir(t, nil, fs.RootOwner, fs.FilePermissions{User: fs.PermMask{Read: true, Execute: true}}), t: t, } - return newProcInode(f, msrc, fs.SpecialDirectory, t) + return newProcInode(t, f, msrc, fs.SpecialDirectory, t) } // Check implements InodeOperations.Check. @@ -198,7 +192,7 @@ func (f *fdDir) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dirent if err != nil { return nil, err } - return fs.NewDirent(n, p), nil + return fs.NewDirent(ctx, n, p), nil } // GetFile implements fs.FileOperations.GetFile. @@ -252,7 +246,7 @@ func newFdInfoDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { Dir: *ramfs.NewDir(t, nil, fs.RootOwner, fs.FilePermsFromMode(0500)), t: t, } - return newProcInode(fdid, msrc, fs.SpecialDirectory, t) + return newProcInode(t, fdid, msrc, fs.SpecialDirectory, t) } // Lookup loads an fd in /proc/TID/fdinfo into a Dirent. @@ -272,7 +266,7 @@ func (fdid *fdInfoDir) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs if err != nil { return nil, err } - return fs.NewDirent(inode, p), nil + return fs.NewDirent(ctx, inode, p), nil } // GetFile implements fs.FileOperations.GetFile. diff --git a/pkg/sentry/fs/proc/filesystems.go b/pkg/sentry/fs/proc/filesystems.go index 7bb081d0e..e9250c51c 100644 --- a/pkg/sentry/fs/proc/filesystems.go +++ b/pkg/sentry/fs/proc/filesystems.go @@ -18,9 +18,9 @@ import ( "bytes" "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" ) // filesystemsData backs /proc/filesystems. diff --git a/pkg/sentry/fs/proc/fs.go b/pkg/sentry/fs/proc/fs.go index d57d6cc5d..f14833805 100644 --- a/pkg/sentry/fs/proc/fs.go +++ b/pkg/sentry/fs/proc/fs.go @@ -17,8 +17,8 @@ package proc import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // filesystem is a procfs. @@ -30,7 +30,7 @@ func init() { fs.RegisterFilesystem(&filesystem{}) } -// FilesystemName is the name underwhich the filesystem is registered. +// FilesystemName is the name under which the filesystem is registered. // Name matches fs/proc/root.c:proc_fs_type.name. const FilesystemName = "proc" @@ -77,5 +77,5 @@ func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSou // Construct the procfs root. Since procfs files are all virtual, we // never want them cached. - return New(ctx, fs.NewNonCachingMountSource(f, flags), cgroups) + return New(ctx, fs.NewNonCachingMountSource(ctx, f, flags), cgroups) } diff --git a/pkg/sentry/fs/proc/inode.go b/pkg/sentry/fs/proc/inode.go index 986bc0a45..0c04f81fa 100644 --- a/pkg/sentry/fs/proc/inode.go +++ b/pkg/sentry/fs/proc/inode.go @@ -15,15 +15,15 @@ package proc import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // taskOwnedInodeOps wraps an fs.InodeOperations and overrides the UnstableAttr @@ -115,11 +115,11 @@ func newStaticProcInode(ctx context.Context, msrc *fs.MountSource, contents []by Contents: contents, }, } - return newProcInode(iops, msrc, fs.SpecialFile, nil) + return newProcInode(ctx, iops, msrc, fs.SpecialFile, nil) } // newProcInode creates a new inode from the given inode operations. -func newProcInode(iops fs.InodeOperations, msrc *fs.MountSource, typ fs.InodeType, t *kernel.Task) *fs.Inode { +func newProcInode(ctx context.Context, iops fs.InodeOperations, msrc *fs.MountSource, typ fs.InodeType, t *kernel.Task) *fs.Inode { sattr := fs.StableAttr{ DeviceID: device.ProcDevice.DeviceID(), InodeID: device.ProcDevice.NextIno(), @@ -129,5 +129,5 @@ func newProcInode(iops fs.InodeOperations, msrc *fs.MountSource, typ fs.InodeTyp if t != nil { iops = &taskOwnedInodeOps{iops, t} } - return fs.NewInode(iops, msrc, sattr) + return fs.NewInode(ctx, iops, msrc, sattr) } diff --git a/pkg/sentry/fs/proc/loadavg.go b/pkg/sentry/fs/proc/loadavg.go index 2dfe7089a..8602b7426 100644 --- a/pkg/sentry/fs/proc/loadavg.go +++ b/pkg/sentry/fs/proc/loadavg.go @@ -18,8 +18,8 @@ import ( "bytes" "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" ) // loadavgData backs /proc/loadavg. diff --git a/pkg/sentry/fs/proc/meminfo.go b/pkg/sentry/fs/proc/meminfo.go index d2b9b92c7..495f3e3ba 100644 --- a/pkg/sentry/fs/proc/meminfo.go +++ b/pkg/sentry/fs/proc/meminfo.go @@ -18,11 +18,11 @@ import ( "bytes" "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // meminfoData backs /proc/meminfo. diff --git a/pkg/sentry/fs/proc/mounts.go b/pkg/sentry/fs/proc/mounts.go index 1f7817947..e33c4a460 100644 --- a/pkg/sentry/fs/proc/mounts.go +++ b/pkg/sentry/fs/proc/mounts.go @@ -19,10 +19,10 @@ import ( "fmt" "sort" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) // forEachMountSource runs f for the process root mount and each mount that is a diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index 034950158..37694620c 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -19,17 +19,18 @@ import ( "fmt" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/socket/unix" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" ) // newNet creates a new proc net entry. @@ -55,9 +56,8 @@ func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSo "psched": newStaticProcInode(ctx, msrc, []byte(fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)))), "ptype": newStaticProcInode(ctx, msrc, []byte("Type Device Function")), "route": newStaticProcInode(ctx, msrc, []byte("Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT")), - "tcp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode")), - - "udp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops")), + "tcp": seqfile.NewSeqFileInode(ctx, &netTCP{k: k}, msrc), + "udp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops")), "unix": seqfile.NewSeqFileInode(ctx, &netUnix{k: k}, msrc), } @@ -70,7 +70,7 @@ func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSo } } d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(d, msrc, fs.SpecialDirectory, nil) + return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } // ifinet6 implements seqfile.SeqSource for /proc/net/if_inet6. @@ -210,10 +210,6 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s } var buf bytes.Buffer - // Header - fmt.Fprintf(&buf, "Num RefCount Protocol Flags Type St Inode Path\n") - - // Entries for _, se := range n.k.ListSockets() { s := se.Sock.Get() if s == nil { @@ -222,6 +218,7 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s } sfile := s.(*fs.File) if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX { + s.DecRef() // Not a unix socket. continue } @@ -281,12 +278,160 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s } fmt.Fprintf(&buf, "\n") - sfile.DecRef() + s.DecRef() + } + + data := []seqfile.SeqData{ + { + Buf: []byte("Num RefCount Protocol Flags Type St Inode Path\n"), + Handle: n, + }, + { + Buf: buf.Bytes(), + Handle: n, + }, + } + return data, 0 +} + +// netTCP implements seqfile.SeqSource for /proc/net/tcp. +// +// +stateify savable +type netTCP struct { + k *kernel.Kernel +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (*netTCP) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + t := kernel.TaskFromContext(ctx) + + if h != nil { + return nil, 0 + } + + var buf bytes.Buffer + for _, se := range n.k.ListSockets() { + s := se.Sock.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref %+v in socket table, racing with destruction?", se.Sock) + continue + } + sfile := s.(*fs.File) + sops, ok := sfile.FileOperations.(socket.Socket) + if !ok { + panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) + } + if family, stype, _ := sops.Type(); !(family == linux.AF_INET && stype == linux.SOCK_STREAM) { + s.DecRef() + // Not tcp4 sockets. + continue + } + + // Linux's documentation for the fields below can be found at + // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt. + // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock(). + // Note that the header doesn't contain labels for all the fields. + + // Field: sl; entry number. + fmt.Fprintf(&buf, "%4d: ", se.ID) + + portBuf := make([]byte, 2) + + // Field: local_adddress. + var localAddr linux.SockAddrInet + if local, _, err := sops.GetSockName(t); err == nil { + localAddr = local.(linux.SockAddrInet) + } + binary.LittleEndian.PutUint16(portBuf, localAddr.Port) + fmt.Fprintf(&buf, "%08X:%04X ", + binary.LittleEndian.Uint32(localAddr.Addr[:]), + portBuf) + + // Field: rem_address. + var remoteAddr linux.SockAddrInet + if remote, _, err := sops.GetPeerName(t); err == nil { + remoteAddr = remote.(linux.SockAddrInet) + } + binary.LittleEndian.PutUint16(portBuf, remoteAddr.Port) + fmt.Fprintf(&buf, "%08X:%04X ", + binary.LittleEndian.Uint32(remoteAddr.Addr[:]), + portBuf) + + // Field: state; socket state. + fmt.Fprintf(&buf, "%02X ", sops.State()) + + // Field: tx_queue, rx_queue; number of packets in the transmit and + // receive queue. Unimplemented. + fmt.Fprintf(&buf, "%08X:%08X ", 0, 0) + + // Field: tr, tm->when; timer active state and number of jiffies + // until timer expires. Unimplemented. + fmt.Fprintf(&buf, "%02X:%08X ", 0, 0) + + // Field: retrnsmt; number of unrecovered RTO timeouts. + // Unimplemented. + fmt.Fprintf(&buf, "%08X ", 0) + + // Field: uid. + uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) + if err != nil { + log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) + fmt.Fprintf(&buf, "%5d ", 0) + } else { + fmt.Fprintf(&buf, "%5d ", uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow())) + } + + // Field: timeout; number of unanswered 0-window probes. + // Unimplemented. + fmt.Fprintf(&buf, "%8d ", 0) + + // Field: inode. + fmt.Fprintf(&buf, "%8d ", sfile.InodeID()) + + // Field: refcount. Don't count the ref we obtain while deferencing + // the weakref to this socket. + fmt.Fprintf(&buf, "%d ", sfile.ReadRefs()-1) + + // Field: Socket struct address. Redacted due to the same reason as + // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. + fmt.Fprintf(&buf, "%#016p ", (*socket.Socket)(nil)) + + // Field: retransmit timeout. Unimplemented. + fmt.Fprintf(&buf, "%d ", 0) + + // Field: predicted tick of soft clock (delayed ACK control data). + // Unimplemented. + fmt.Fprintf(&buf, "%d ", 0) + + // Field: (ack.quick<<1)|ack.pingpong, Unimplemented. + fmt.Fprintf(&buf, "%d ", 0) + + // Field: sending congestion window, Unimplemented. + fmt.Fprintf(&buf, "%d ", 0) + + // Field: Slow start size threshold, -1 if threshold >= 0xFFFF. + // Unimplemented, report as large threshold. + fmt.Fprintf(&buf, "%d", -1) + + fmt.Fprintf(&buf, "\n") + + s.DecRef() } - data := []seqfile.SeqData{{ - Buf: buf.Bytes(), - Handle: (*netUnix)(nil), - }} + data := []seqfile.SeqData{ + { + Buf: []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n"), + Handle: n, + }, + { + Buf: buf.Bytes(), + Handle: n, + }, + } return data, 0 } diff --git a/pkg/sentry/fs/proc/net_test.go b/pkg/sentry/fs/proc/net_test.go index 9aed5fdca..f18681405 100644 --- a/pkg/sentry/fs/proc/net_test.go +++ b/pkg/sentry/fs/proc/net_test.go @@ -18,8 +18,8 @@ import ( "reflect" "testing" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/inet" ) func newIPv6TestStack() *inet.TestStack { diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go index 0e15894b4..0ef13f2f5 100644 --- a/pkg/sentry/fs/proc/proc.go +++ b/pkg/sentry/fs/proc/proc.go @@ -20,15 +20,15 @@ import ( "sort" "strconv" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet" + "gvisor.dev/gvisor/pkg/syserror" ) // proc is a root proc node. @@ -68,7 +68,7 @@ func New(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string "filesystems": seqfile.NewSeqFileInode(ctx, &filesystemsData{}, msrc), "loadavg": seqfile.NewSeqFileInode(ctx, &loadavgData{}, msrc), "meminfo": seqfile.NewSeqFileInode(ctx, &meminfoData{k}, msrc), - "mounts": newProcInode(ramfs.NewSymlink(ctx, fs.RootOwner, "self/mounts"), msrc, fs.Symlink, nil), + "mounts": newProcInode(ctx, ramfs.NewSymlink(ctx, fs.RootOwner, "self/mounts"), msrc, fs.Symlink, nil), "self": newSelf(ctx, pidns, msrc), "stat": seqfile.NewSeqFileInode(ctx, &statData{k}, msrc), "thread-self": newThreadSelf(ctx, pidns, msrc), @@ -94,7 +94,7 @@ func New(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string p.AddChild(ctx, "net", p.newNetDir(ctx, k, msrc)) } - return newProcInode(p, msrc, fs.SpecialDirectory, nil), nil + return newProcInode(ctx, p, msrc, fs.SpecialDirectory, nil), nil } // self is a magical link. @@ -112,7 +112,7 @@ func newSelf(ctx context.Context, pidns *kernel.PIDNamespace, msrc *fs.MountSour Symlink: *ramfs.NewSymlink(ctx, fs.RootOwner, ""), pidns: pidns, } - return newProcInode(s, msrc, fs.Symlink, nil) + return newProcInode(ctx, s, msrc, fs.Symlink, nil) } // newThreadSelf returns a new "threadSelf" node. @@ -121,7 +121,7 @@ func newThreadSelf(ctx context.Context, pidns *kernel.PIDNamespace, msrc *fs.Mou Symlink: *ramfs.NewSymlink(ctx, fs.RootOwner, ""), pidns: pidns, } - return newProcInode(s, msrc, fs.Symlink, nil) + return newProcInode(ctx, s, msrc, fs.Symlink, nil) } // Readlink implements fs.InodeOperations.Readlink. @@ -185,7 +185,7 @@ func (p *proc) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dire // Wrap it in a taskDir. td := p.newTaskDir(otherTask, dir.MountSource, true) - return fs.NewDirent(td, name), nil + return fs.NewDirent(ctx, td, name), nil } // GetFile implements fs.InodeOperations. diff --git a/pkg/sentry/fs/proc/rpcinet_proc.go b/pkg/sentry/fs/proc/rpcinet_proc.go index e36c0bfa6..01ac97530 100644 --- a/pkg/sentry/fs/proc/rpcinet_proc.go +++ b/pkg/sentry/fs/proc/rpcinet_proc.go @@ -17,19 +17,19 @@ package proc import ( "io" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) -// rpcInetInode implments fs.InodeOperations. +// rpcInetInode implements fs.InodeOperations. type rpcInetInode struct { fsutil.SimpleFileInode @@ -45,7 +45,7 @@ func newRPCInetInode(ctx context.Context, msrc *fs.MountSource, filepath string, filepath: filepath, k: kernel.KernelFromContext(ctx), } - return newProcInode(f, msrc, fs.SpecialFile, nil) + return newProcInode(ctx, f, msrc, fs.SpecialFile, nil) } // GetFile implements fs.InodeOperations.GetFile. @@ -141,7 +141,7 @@ func newRPCInetProcNet(ctx context.Context, msrc *fs.MountSource) *fs.Inode { } d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(d, msrc, fs.SpecialDirectory, nil) + return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } // newRPCInetProcSysNet will build an inode for /proc/sys/net. @@ -152,7 +152,7 @@ func newRPCInetProcSysNet(ctx context.Context, msrc *fs.MountSource) *fs.Inode { } d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(d, msrc, fs.SpecialDirectory, nil) + return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } // newRPCInetSysNetCore builds the /proc/sys/net/core directory. @@ -170,7 +170,7 @@ func newRPCInetSysNetCore(ctx context.Context, msrc *fs.MountSource) *fs.Inode { } d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(d, msrc, fs.SpecialDirectory, nil) + return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } // newRPCInetSysNetIPv4Dir builds the /proc/sys/net/ipv4 directory. @@ -213,5 +213,5 @@ func newRPCInetSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource) *fs.Inod } d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(d, msrc, fs.SpecialDirectory, nil) + return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } diff --git a/pkg/sentry/fs/proc/seqfile/BUILD b/pkg/sentry/fs/proc/seqfile/BUILD index 6b44c0075..20c3eefc8 100644 --- a/pkg/sentry/fs/proc/seqfile/BUILD +++ b/pkg/sentry/fs/proc/seqfile/BUILD @@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library", "go_test") go_library( name = "seqfile", srcs = ["seqfile.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/proc/seqfile/seqfile.go b/pkg/sentry/fs/proc/seqfile/seqfile.go index 8364d86ed..5fe823000 100644 --- a/pkg/sentry/fs/proc/seqfile/seqfile.go +++ b/pkg/sentry/fs/proc/seqfile/seqfile.go @@ -12,21 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. +// Package seqfile provides dynamic ordered files. package seqfile import ( "io" "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // SeqHandle is a helper handle to seek in the file. @@ -133,7 +134,7 @@ func NewSeqFileInode(ctx context.Context, source SeqSource, msrc *fs.MountSource BlockSize: usermem.PageSize, Type: fs.SpecialFile, } - return fs.NewInode(iops, msrc, sattr) + return fs.NewInode(ctx, iops, msrc, sattr) } // UnstableAttr returns unstable attributes of the SeqFile. diff --git a/pkg/sentry/fs/proc/seqfile/seqfile_test.go b/pkg/sentry/fs/proc/seqfile/seqfile_test.go index c4de565eb..ebfeee835 100644 --- a/pkg/sentry/fs/proc/seqfile/seqfile_test.go +++ b/pkg/sentry/fs/proc/seqfile/seqfile_test.go @@ -20,11 +20,11 @@ import ( "io" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) type seqTest struct { @@ -120,15 +120,15 @@ func TestSeqFile(t *testing.T) { testSource.Init() // Create a file that can be R/W. - m := fs.NewPseudoMountSource() ctx := contexttest.Context(t) + m := fs.NewPseudoMountSource(ctx) contents := map[string]*fs.Inode{ "foo": NewSeqFileInode(ctx, testSource, m), } root := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0777)) // How about opening it? - inode := fs.NewInode(root, m, fs.StableAttr{Type: fs.Directory}) + inode := fs.NewInode(ctx, root, m, fs.StableAttr{Type: fs.Directory}) dirent2, err := root.Lookup(ctx, inode, "foo") if err != nil { t.Fatalf("failed to walk to foo for n2: %v", err) @@ -196,15 +196,15 @@ func TestSeqFileFileUpdated(t *testing.T) { testSource.update = true // Create a file that can be R/W. - m := fs.NewPseudoMountSource() ctx := contexttest.Context(t) + m := fs.NewPseudoMountSource(ctx) contents := map[string]*fs.Inode{ "foo": NewSeqFileInode(ctx, testSource, m), } root := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0777)) // How about opening it? - inode := fs.NewInode(root, m, fs.StableAttr{Type: fs.Directory}) + inode := fs.NewInode(ctx, root, m, fs.StableAttr{Type: fs.Directory}) dirent2, err := root.Lookup(ctx, inode, "foo") if err != nil { t.Fatalf("failed to walk to foo for dirent2: %v", err) diff --git a/pkg/sentry/fs/proc/stat.go b/pkg/sentry/fs/proc/stat.go index 397f9ec6b..b641effbb 100644 --- a/pkg/sentry/fs/proc/stat.go +++ b/pkg/sentry/fs/proc/stat.go @@ -18,10 +18,10 @@ import ( "bytes" "fmt" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) // statData backs /proc/stat. diff --git a/pkg/sentry/fs/proc/sys.go b/pkg/sentry/fs/proc/sys.go index 59846af4f..cd37776c8 100644 --- a/pkg/sentry/fs/proc/sys.go +++ b/pkg/sentry/fs/proc/sys.go @@ -19,16 +19,16 @@ import ( "io" "strconv" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/waiter" ) // mmapMinAddrData backs /proc/sys/vm/mmap_min_addr. @@ -82,14 +82,14 @@ func (p *proc) newKernelDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode } children := map[string]*fs.Inode{ - "hostname": newProcInode(&h, msrc, fs.SpecialFile, nil), + "hostname": newProcInode(ctx, &h, msrc, fs.SpecialFile, nil), "shmall": newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.SHMALL, 10))), "shmmax": newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.SHMMAX, 10))), "shmmni": newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.SHMMNI, 10))), } d := ramfs.NewDir(ctx, children, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(d, msrc, fs.SpecialDirectory, nil) + return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } func (p *proc) newVMDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { @@ -98,7 +98,7 @@ func (p *proc) newVMDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { "overcommit_memory": seqfile.NewSeqFileInode(ctx, &overcommitMemory{}, msrc), } d := ramfs.NewDir(ctx, children, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(d, msrc, fs.SpecialDirectory, nil) + return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } func (p *proc) newSysDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { @@ -115,7 +115,7 @@ func (p *proc) newSysDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { } d := ramfs.NewDir(ctx, children, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(d, msrc, fs.SpecialDirectory, nil) + return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } // hostname is the inode for a file containing the system hostname. diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index dbf1a987c..f3b63dfc2 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -19,15 +19,15 @@ import ( "io" "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/waiter" ) type tcpMemDir int @@ -74,7 +74,7 @@ func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir BlockSize: usermem.PageSize, Type: fs.SpecialFile, } - return fs.NewInode(tm, msrc, sattr) + return fs.NewInode(ctx, tm, msrc, sattr) } // GetFile implements fs.InodeOperations.GetFile. @@ -184,7 +184,7 @@ func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *f BlockSize: usermem.PageSize, Type: fs.SpecialFile, } - return fs.NewInode(ts, msrc, sattr) + return fs.NewInode(ctx, ts, msrc, sattr) } // GetFile implements fs.InodeOperations.GetFile. @@ -277,7 +277,7 @@ func (p *proc) newSysNetCore(ctx context.Context, msrc *fs.MountSource, s inet.S } d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(d, msrc, fs.SpecialDirectory, nil) + return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { @@ -339,7 +339,7 @@ func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s ine } d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(d, msrc, fs.SpecialDirectory, nil) + return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } func (p *proc) newSysNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { @@ -351,5 +351,5 @@ func (p *proc) newSysNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode } } d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(d, msrc, fs.SpecialDirectory, nil) + return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } diff --git a/pkg/sentry/fs/proc/sys_net_test.go b/pkg/sentry/fs/proc/sys_net_test.go index 78135ba13..6abae7a60 100644 --- a/pkg/sentry/fs/proc/sys_net_test.go +++ b/pkg/sentry/fs/proc/sys_net_test.go @@ -17,9 +17,9 @@ package proc import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) func TestQuerySendBufferSize(t *testing.T) { diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 21a965f90..ef0ca3301 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -21,20 +21,20 @@ import ( "sort" "strconv" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // getTaskMM returns t's MemoryManager. If getTaskMM succeeds, the MemoryManager's @@ -101,7 +101,7 @@ func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, showSubtasks boo Dir: *ramfs.NewDir(t, contents, fs.RootOwner, fs.FilePermsFromMode(0555)), t: t, } - return newProcInode(d, msrc, fs.SpecialDirectory, t) + return newProcInode(t, d, msrc, fs.SpecialDirectory, t) } // subtasks represents a /proc/TID/task directory. @@ -122,7 +122,7 @@ func (p *proc) newSubtasks(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { t: t, p: p, } - return newProcInode(s, msrc, fs.SpecialDirectory, t) + return newProcInode(t, s, msrc, fs.SpecialDirectory, t) } // UnstableAttr returns unstable attributes of the subtasks. @@ -223,7 +223,7 @@ func (s *subtasks) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dir } td := s.p.newTaskDir(task, dir.MountSource, false) - return fs.NewDirent(td, p), nil + return fs.NewDirent(ctx, td, p), nil } // exe is an fs.InodeOperations symlink for the /proc/PID/exe file. @@ -240,7 +240,7 @@ func newExe(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { Symlink: *ramfs.NewSymlink(t, fs.RootOwner, ""), t: t, } - return newProcInode(exeSymlink, msrc, fs.Symlink, t) + return newProcInode(t, exeSymlink, msrc, fs.Symlink, t) } func (e *exe) executable() (d *fs.Dirent, err error) { @@ -308,7 +308,7 @@ func newNamespaceSymlink(t *kernel.Task, msrc *fs.MountSource, name string) *fs. Symlink: *ramfs.NewSymlink(t, fs.RootOwner, target), t: t, } - return newProcInode(n, msrc, fs.Symlink, t) + return newProcInode(t, n, msrc, fs.Symlink, t) } // Getlink implements fs.InodeOperations.Getlink. @@ -319,7 +319,7 @@ func (n *namespaceSymlink) Getlink(ctx context.Context, inode *fs.Inode) (*fs.Di // Create a new regular file to fake the namespace file. iops := fsutil.NewNoReadWriteFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0777), linux.PROC_SUPER_MAGIC) - return fs.NewDirent(newProcInode(iops, inode.MountSource, fs.RegularFile, nil), n.Symlink.Target), nil + return fs.NewDirent(ctx, newProcInode(ctx, iops, inode.MountSource, fs.RegularFile, nil), n.Symlink.Target), nil } func newNamespaceDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { @@ -329,7 +329,7 @@ func newNamespaceDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { "user": newNamespaceSymlink(t, msrc, "user"), } d := ramfs.NewDir(t, contents, fs.RootOwner, fs.FilePermsFromMode(0511)) - return newProcInode(d, msrc, fs.SpecialDirectory, t) + return newProcInode(t, d, msrc, fs.SpecialDirectory, t) } // mapsData implements seqfile.SeqSource for /proc/[pid]/maps. @@ -340,7 +340,7 @@ type mapsData struct { } func newMaps(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { - return newProcInode(seqfile.NewSeqFile(t, &mapsData{t}), msrc, fs.SpecialFile, t) + return newProcInode(t, seqfile.NewSeqFile(t, &mapsData{t}), msrc, fs.SpecialFile, t) } func (md *mapsData) mm() *mm.MemoryManager { @@ -380,7 +380,7 @@ type smapsData struct { } func newSmaps(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { - return newProcInode(seqfile.NewSeqFile(t, &smapsData{t}), msrc, fs.SpecialFile, t) + return newProcInode(t, seqfile.NewSeqFile(t, &smapsData{t}), msrc, fs.SpecialFile, t) } func (sd *smapsData) mm() *mm.MemoryManager { @@ -426,7 +426,7 @@ type taskStatData struct { } func newTaskStat(t *kernel.Task, msrc *fs.MountSource, showSubtasks bool, pidns *kernel.PIDNamespace) *fs.Inode { - return newProcInode(seqfile.NewSeqFile(t, &taskStatData{t, showSubtasks /* tgstats */, pidns}), msrc, fs.SpecialFile, t) + return newProcInode(t, seqfile.NewSeqFile(t, &taskStatData{t, showSubtasks /* tgstats */, pidns}), msrc, fs.SpecialFile, t) } // NeedsUpdate returns whether the generation is old or not. @@ -511,7 +511,7 @@ type statmData struct { } func newStatm(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { - return newProcInode(seqfile.NewSeqFile(t, &statmData{t}), msrc, fs.SpecialFile, t) + return newProcInode(t, seqfile.NewSeqFile(t, &statmData{t}), msrc, fs.SpecialFile, t) } // NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. @@ -548,7 +548,7 @@ type statusData struct { } func newStatus(t *kernel.Task, msrc *fs.MountSource, pidns *kernel.PIDNamespace) *fs.Inode { - return newProcInode(seqfile.NewSeqFile(t, &statusData{t, pidns}), msrc, fs.SpecialFile, t) + return newProcInode(t, seqfile.NewSeqFile(t, &statusData{t, pidns}), msrc, fs.SpecialFile, t) } // NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. @@ -580,8 +580,8 @@ func (s *statusData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ( var fds int var vss, rss, data uint64 s.t.WithMuLocked(func(t *kernel.Task) { - if fdm := t.FDMap(); fdm != nil { - fds = fdm.Size() + if fdTable := t.FDTable(); fdTable != nil { + fds = fdTable.Size() } if mm := t.MemoryManager(); mm != nil { vss = mm.VirtualMemorySize() @@ -615,7 +615,7 @@ type ioData struct { } func newIO(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { - return newProcInode(seqfile.NewSeqFile(t, &ioData{t.ThreadGroup()}), msrc, fs.SpecialFile, t) + return newProcInode(t, seqfile.NewSeqFile(t, &ioData{t.ThreadGroup()}), msrc, fs.SpecialFile, t) } // NeedsUpdate returns whether the generation is old or not. @@ -664,7 +664,7 @@ func newComm(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { SimpleFileInode: *fsutil.NewSimpleFileInode(t, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), t: t, } - return newProcInode(c, msrc, fs.SpecialFile, t) + return newProcInode(t, c, msrc, fs.SpecialFile, t) } // Check implements fs.InodeOperations.Check. @@ -736,7 +736,7 @@ func newAuxvec(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { SimpleFileInode: *fsutil.NewSimpleFileInode(t, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), t: t, } - return newProcInode(a, msrc, fs.SpecialFile, t) + return newProcInode(t, a, msrc, fs.SpecialFile, t) } // GetFile implements fs.InodeOperations.GetFile. diff --git a/pkg/sentry/fs/proc/uid_gid_map.go b/pkg/sentry/fs/proc/uid_gid_map.go index a14b1b45f..eea37d15c 100644 --- a/pkg/sentry/fs/proc/uid_gid_map.go +++ b/pkg/sentry/fs/proc/uid_gid_map.go @@ -19,15 +19,15 @@ import ( "fmt" "io" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // idMapInodeOperations implements fs.InodeOperations for @@ -66,7 +66,7 @@ func newGIDMap(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { } func newIDMap(t *kernel.Task, msrc *fs.MountSource, gids bool) *fs.Inode { - return newProcInode(&idMapInodeOperations{ + return newProcInode(t, &idMapInodeOperations{ InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(t, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC), t: t, gids: gids, diff --git a/pkg/sentry/fs/proc/uptime.go b/pkg/sentry/fs/proc/uptime.go index 35c3851e1..4e903917a 100644 --- a/pkg/sentry/fs/proc/uptime.go +++ b/pkg/sentry/fs/proc/uptime.go @@ -18,14 +18,14 @@ import ( "fmt" "io" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // uptime is a file containing the system uptime. @@ -44,7 +44,7 @@ func newUptime(ctx context.Context, msrc *fs.MountSource) *fs.Inode { SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), startTime: ktime.NowFromContext(ctx), } - return newProcInode(u, msrc, fs.SpecialFile, nil) + return newProcInode(ctx, u, msrc, fs.SpecialFile, nil) } // GetFile implements fs.InodeOperations.GetFile. diff --git a/pkg/sentry/fs/proc/version.go b/pkg/sentry/fs/proc/version.go index a5479990c..a6d2c3cd3 100644 --- a/pkg/sentry/fs/proc/version.go +++ b/pkg/sentry/fs/proc/version.go @@ -17,9 +17,9 @@ package proc import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) // versionData backs /proc/version. diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD index f36e4a5e8..516efcc4c 100644 --- a/pkg/sentry/fs/ramfs/BUILD +++ b/pkg/sentry/fs/ramfs/BUILD @@ -10,7 +10,7 @@ go_library( "symlink.go", "tree.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ramfs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go index cd6e03d66..f3e984c24 100644 --- a/pkg/sentry/fs/ramfs/dir.go +++ b/pkg/sentry/fs/ramfs/dir.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +// Package ramfs provides the fundamentals for a simple in-memory filesystem. package ramfs import ( @@ -19,12 +20,12 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/syserror" ) // CreateOps represents operations to create different file types. @@ -269,7 +270,7 @@ func (d *Dir) Lookup(ctx context.Context, _ *fs.Inode, p string) (*fs.Dirent, er // Take a reference on the inode before returning it. This reference // is owned by the dirent we are about to create. inode.IncRef() - return fs.NewDirent(inode, p), nil + return fs.NewDirent(ctx, inode, p), nil } // walkLocked must be called with d.mu held. @@ -321,7 +322,7 @@ func (d *Dir) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.F inode.IncRef() // Create the Dirent and corresponding file. - created := fs.NewDirent(inode, name) + created := fs.NewDirent(ctx, inode, name) defer created.DecRef() return created.Inode.GetFile(ctx, created, flags) } @@ -382,7 +383,7 @@ func (d *Dir) Bind(ctx context.Context, dir *fs.Inode, name string, ep transport } // Take another ref on inode which will be donated to the new dirent. inode.IncRef() - return fs.NewDirent(inode, name), nil + return fs.NewDirent(ctx, inode, name), nil } // CreateFifo implements fs.InodeOperations.CreateFifo. @@ -430,7 +431,7 @@ func (dfo *dirFileOperations) Seek(ctx context.Context, file *fs.File, whence fs } // IterateDir implements DirIterator.IterateDir. -func (dfo *dirFileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) { +func (dfo *dirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) { dfo.dir.mu.Lock() defer dfo.dir.mu.Unlock() diff --git a/pkg/sentry/fs/ramfs/socket.go b/pkg/sentry/fs/ramfs/socket.go index 7d8bca70e..a24fe2ea2 100644 --- a/pkg/sentry/fs/ramfs/socket.go +++ b/pkg/sentry/fs/ramfs/socket.go @@ -15,12 +15,12 @@ package ramfs import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/waiter" ) // Socket represents a socket. diff --git a/pkg/sentry/fs/ramfs/symlink.go b/pkg/sentry/fs/ramfs/symlink.go index 21c246169..fcfaa29aa 100644 --- a/pkg/sentry/fs/ramfs/symlink.go +++ b/pkg/sentry/fs/ramfs/symlink.go @@ -15,11 +15,11 @@ package ramfs import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/waiter" ) // Symlink represents a symlink. diff --git a/pkg/sentry/fs/ramfs/tree.go b/pkg/sentry/fs/ramfs/tree.go index 8c6b31f70..702cc4a1e 100644 --- a/pkg/sentry/fs/ramfs/tree.go +++ b/pkg/sentry/fs/ramfs/tree.go @@ -19,10 +19,10 @@ import ( "path" "strings" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/anon" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // MakeDirectoryTree constructs a ramfs tree of all directories containing @@ -68,7 +68,7 @@ func makeSubdir(ctx context.Context, msrc *fs.MountSource, root *Dir, subdir str // emptyDir returns an empty *ramfs.Dir with all permissions granted. func emptyDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { dir := NewDir(ctx, make(map[string]*fs.Inode), fs.RootOwner, fs.FilePermsFromMode(0777)) - return fs.NewInode(dir, msrc, fs.StableAttr{ + return fs.NewInode(ctx, dir, msrc, fs.StableAttr{ DeviceID: anon.PseudoDevice.DeviceID(), InodeID: anon.PseudoDevice.NextIno(), BlockSize: usermem.PageSize, diff --git a/pkg/sentry/fs/ramfs/tree_test.go b/pkg/sentry/fs/ramfs/tree_test.go index 27abeb6ba..61a7e2900 100644 --- a/pkg/sentry/fs/ramfs/tree_test.go +++ b/pkg/sentry/fs/ramfs/tree_test.go @@ -17,12 +17,11 @@ package ramfs import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" ) func TestMakeDirectoryTree(t *testing.T) { - mount := fs.NewPseudoMountSource() for _, test := range []struct { name string @@ -54,6 +53,7 @@ func TestMakeDirectoryTree(t *testing.T) { }, } { ctx := contexttest.Context(t) + mount := fs.NewPseudoMountSource(ctx) tree, err := MakeDirectoryTree(ctx, mount, test.subdirs) if err != nil { t.Errorf("%s: failed to make ramfs tree, got error %v, want nil", test.name, err) diff --git a/pkg/sentry/fs/save.go b/pkg/sentry/fs/save.go index 2eaf6ab69..fe5c76b44 100644 --- a/pkg/sentry/fs/save.go +++ b/pkg/sentry/fs/save.go @@ -18,7 +18,7 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) // SaveInodeMappings saves a mapping of path -> inode ID for every diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go index 65937f44d..eed1c2854 100644 --- a/pkg/sentry/fs/splice.go +++ b/pkg/sentry/fs/splice.go @@ -18,9 +18,9 @@ import ( "io" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/secio" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/secio" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/syserror" ) // Splice moves data to this file, directly from another. @@ -88,6 +88,8 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, // Check append-only mode and the limit. if !dstPipe { + unlock := dst.Dirent.Inode.lockAppendMu(dst.Flags().Append) + defer unlock() if dst.Flags().Append { if opts.DstOffset { // We need to acquire the lock. diff --git a/pkg/sentry/fs/sys/BUILD b/pkg/sentry/fs/sys/BUILD index 42e98230e..70fa3af89 100644 --- a/pkg/sentry/fs/sys/BUILD +++ b/pkg/sentry/fs/sys/BUILD @@ -10,7 +10,7 @@ go_library( "fs.go", "sys.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/sys", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/sys", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/sys/device.go b/pkg/sentry/fs/sys/device.go index 128d3a9d9..4e79dbb71 100644 --- a/pkg/sentry/fs/sys/device.go +++ b/pkg/sentry/fs/sys/device.go @@ -14,7 +14,7 @@ package sys -import "gvisor.googlesource.com/gvisor/pkg/sentry/device" +import "gvisor.dev/gvisor/pkg/sentry/device" // sysfsDevice is the sysfs virtual device. var sysfsDevice = device.NewAnonDevice() diff --git a/pkg/sentry/fs/sys/devices.go b/pkg/sentry/fs/sys/devices.go index 54f35c6a0..4f78ca8d2 100644 --- a/pkg/sentry/fs/sys/devices.go +++ b/pkg/sentry/fs/sys/devices.go @@ -17,11 +17,11 @@ package sys import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) // +stateify savable @@ -58,7 +58,7 @@ func newPossible(ctx context.Context, msrc *fs.MountSource) *fs.Inode { Contents: contents, }, } - return newFile(c, msrc) + return newFile(ctx, c, msrc) } func newCPU(ctx context.Context, msrc *fs.MountSource) *fs.Inode { diff --git a/pkg/sentry/fs/sys/fs.go b/pkg/sentry/fs/sys/fs.go index f0c2322e0..e60b63e75 100644 --- a/pkg/sentry/fs/sys/fs.go +++ b/pkg/sentry/fs/sys/fs.go @@ -15,8 +15,8 @@ package sys import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // filesystem is a sysfs. @@ -30,7 +30,7 @@ func init() { fs.RegisterFilesystem(&filesystem{}) } -// FilesystemName is the name underwhich the filesystem is registered. +// FilesystemName is the name under which the filesystem is registered. // Name matches fs/sysfs/mount.c:sysfs_fs_type.name. const FilesystemName = "sysfs" @@ -61,5 +61,5 @@ func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSou // device is always ignored. // sysfs ignores data, see fs/sysfs/mount.c:sysfs_mount. - return New(ctx, fs.NewNonCachingMountSource(f, flags)), nil + return New(ctx, fs.NewNonCachingMountSource(ctx, f, flags)), nil } diff --git a/pkg/sentry/fs/sys/sys.go b/pkg/sentry/fs/sys/sys.go index d20ef91fa..b14bf3f55 100644 --- a/pkg/sentry/fs/sys/sys.go +++ b/pkg/sentry/fs/sys/sys.go @@ -16,25 +16,25 @@ package sys import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) -func newFile(node fs.InodeOperations, msrc *fs.MountSource) *fs.Inode { +func newFile(ctx context.Context, node fs.InodeOperations, msrc *fs.MountSource) *fs.Inode { sattr := fs.StableAttr{ DeviceID: sysfsDevice.DeviceID(), InodeID: sysfsDevice.NextIno(), BlockSize: usermem.PageSize, Type: fs.SpecialFile, } - return fs.NewInode(node, msrc, sattr) + return fs.NewInode(ctx, node, msrc, sattr) } func newDir(ctx context.Context, msrc *fs.MountSource, contents map[string]*fs.Inode) *fs.Inode { d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return fs.NewInode(d, msrc, fs.StableAttr{ + return fs.NewInode(ctx, d, msrc, fs.StableAttr{ DeviceID: sysfsDevice.DeviceID(), InodeID: sysfsDevice.NextIno(), BlockSize: usermem.PageSize, diff --git a/pkg/sentry/fs/timerfd/BUILD b/pkg/sentry/fs/timerfd/BUILD index 0e06a5028..1d80daeaf 100644 --- a/pkg/sentry/fs/timerfd/BUILD +++ b/pkg/sentry/fs/timerfd/BUILD @@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library") go_library( name = "timerfd", srcs = ["timerfd.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/timerfd", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/timerfd", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/context", diff --git a/pkg/sentry/fs/timerfd/timerfd.go b/pkg/sentry/fs/timerfd/timerfd.go index c1721f434..59403d9db 100644 --- a/pkg/sentry/fs/timerfd/timerfd.go +++ b/pkg/sentry/fs/timerfd/timerfd.go @@ -19,14 +19,14 @@ package timerfd import ( "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/anon" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // TimerOperations implements fs.FileOperations for timerfds. @@ -53,7 +53,7 @@ type TimerOperations struct { // NewFile returns a timerfd File that receives time from c. func NewFile(ctx context.Context, c ktime.Clock) *fs.File { - dirent := fs.NewDirent(anon.NewInode(ctx), "anon_inode:[timerfd]") + dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[timerfd]") // Release the initial dirent reference after NewFile takes a reference. defer dirent.DecRef() tops := &TimerOperations{} diff --git a/pkg/sentry/fs/tmpfs/BUILD b/pkg/sentry/fs/tmpfs/BUILD index 9570c71e5..8f7eb5757 100644 --- a/pkg/sentry/fs/tmpfs/BUILD +++ b/pkg/sentry/fs/tmpfs/BUILD @@ -11,7 +11,7 @@ go_library( "inode_file.go", "tmpfs.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/tmpfs/device.go b/pkg/sentry/fs/tmpfs/device.go index 179c3a46f..ae7c55ee1 100644 --- a/pkg/sentry/fs/tmpfs/device.go +++ b/pkg/sentry/fs/tmpfs/device.go @@ -14,7 +14,7 @@ package tmpfs -import "gvisor.googlesource.com/gvisor/pkg/sentry/device" +import "gvisor.dev/gvisor/pkg/sentry/device" // tmpfsDevice is the kernel tmpfs device. var tmpfsDevice = device.NewAnonDevice() diff --git a/pkg/sentry/fs/tmpfs/file_regular.go b/pkg/sentry/fs/tmpfs/file_regular.go index d1c163879..9a6943fe4 100644 --- a/pkg/sentry/fs/tmpfs/file_regular.go +++ b/pkg/sentry/fs/tmpfs/file_regular.go @@ -15,12 +15,12 @@ package tmpfs import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/waiter" ) // regularFileOperations implements fs.FileOperations for a regular diff --git a/pkg/sentry/fs/tmpfs/file_test.go b/pkg/sentry/fs/tmpfs/file_test.go index b44c06556..0075ef023 100644 --- a/pkg/sentry/fs/tmpfs/file_test.go +++ b/pkg/sentry/fs/tmpfs/file_test.go @@ -18,17 +18,17 @@ import ( "bytes" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) func newFileInode(ctx context.Context) *fs.Inode { - m := fs.NewCachingMountSource(&Filesystem{}, fs.MountSourceFlags{}) + m := fs.NewCachingMountSource(ctx, &Filesystem{}, fs.MountSourceFlags{}) iops := NewInMemoryFile(ctx, usage.Tmpfs, fs.WithCurrentTime(ctx, fs.UnstableAttr{})) - return fs.NewInode(iops, m, fs.StableAttr{ + return fs.NewInode(ctx, iops, m, fs.StableAttr{ DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), BlockSize: usermem.PageSize, @@ -38,7 +38,7 @@ func newFileInode(ctx context.Context) *fs.Inode { func newFile(ctx context.Context) *fs.File { inode := newFileInode(ctx) - f, _ := inode.GetFile(ctx, fs.NewDirent(inode, "stub"), fs.FileFlags{Read: true, Write: true}) + f, _ := inode.GetFile(ctx, fs.NewDirent(ctx, inode, "stub"), fs.FileFlags{Read: true, Write: true}) return f } diff --git a/pkg/sentry/fs/tmpfs/fs.go b/pkg/sentry/fs/tmpfs/fs.go index 83e1bf247..be98ad751 100644 --- a/pkg/sentry/fs/tmpfs/fs.go +++ b/pkg/sentry/fs/tmpfs/fs.go @@ -18,10 +18,10 @@ import ( "fmt" "strconv" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ) const ( @@ -65,7 +65,7 @@ func init() { fs.RegisterFilesystem(&Filesystem{}) } -// FilesystemName is the name underwhich the filesystem is registered. +// FilesystemName is the name under which the filesystem is registered. // Name matches mm/shmem.c:shmem_fs_type.name. const FilesystemName = "tmpfs" @@ -133,12 +133,15 @@ func (f *Filesystem) Mount(ctx context.Context, device string, flags fs.MountSou } // Construct a mount which will follow the cache options provided. + // + // TODO(gvisor.dev/issue/179): There should be no reason to disable + // caching once bind mounts are properly supported. var msrc *fs.MountSource switch options[cacheKey] { case "", cacheAll: - msrc = fs.NewCachingMountSource(f, flags) + msrc = fs.NewCachingMountSource(ctx, f, flags) case cacheRevalidate: - msrc = fs.NewRevalidatingMountSource(f, flags) + msrc = fs.NewRevalidatingMountSource(ctx, f, flags) default: return nil, fmt.Errorf("invalid cache policy option %q", options[cacheKey]) } diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go index 3fe659543..f86dfaa36 100644 --- a/pkg/sentry/fs/tmpfs/inode_file.go +++ b/pkg/sentry/fs/tmpfs/inode_file.go @@ -20,18 +20,18 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/metric" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/metric" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) var ( @@ -128,7 +128,7 @@ func NewMemfdInode(ctx context.Context, allowSeals bool) *fs.Inode { if allowSeals { iops.seals = 0 } - return fs.NewInode(iops, fs.NewNonCachingMountSource(nil, fs.MountSourceFlags{}), fs.StableAttr{ + return fs.NewInode(ctx, iops, fs.NewNonCachingMountSource(ctx, nil, fs.MountSourceFlags{}), fs.StableAttr{ Type: fs.RegularFile, DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go index 263d10cfe..0f4497cd6 100644 --- a/pkg/sentry/fs/tmpfs/tmpfs.go +++ b/pkg/sentry/fs/tmpfs/tmpfs.go @@ -16,17 +16,17 @@ package tmpfs import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/pipe" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) var fsInfo = fs.Info{ @@ -90,7 +90,7 @@ func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwn // Manually set the CreateOps. d.ramfsDir.CreateOps = d.newCreateOps() - return fs.NewInode(d, msrc, fs.StableAttr{ + return fs.NewInode(ctx, d, msrc, fs.StableAttr{ DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), BlockSize: usermem.PageSize, @@ -218,7 +218,7 @@ func (d *Dir) newCreateOps() *ramfs.CreateOps { Links: 0, }) iops := NewInMemoryFile(ctx, usage.Tmpfs, uattr) - return fs.NewInode(iops, dir.MountSource, fs.StableAttr{ + return fs.NewInode(ctx, iops, dir.MountSource, fs.StableAttr{ DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), BlockSize: usermem.PageSize, @@ -262,7 +262,7 @@ type Symlink struct { // NewSymlink returns a new symlink with the provided permissions. func NewSymlink(ctx context.Context, target string, owner fs.FileOwner, msrc *fs.MountSource) *fs.Inode { s := &Symlink{Symlink: *ramfs.NewSymlink(ctx, owner, target)} - return fs.NewInode(s, msrc, fs.StableAttr{ + return fs.NewInode(ctx, s, msrc, fs.StableAttr{ DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), BlockSize: usermem.PageSize, @@ -292,7 +292,7 @@ type Socket struct { // NewSocket returns a new socket with the provided permissions. func NewSocket(ctx context.Context, socket transport.BoundEndpoint, owner fs.FileOwner, perms fs.FilePermissions, msrc *fs.MountSource) *fs.Inode { s := &Socket{Socket: *ramfs.NewSocket(ctx, socket, owner, perms)} - return fs.NewInode(s, msrc, fs.StableAttr{ + return fs.NewInode(ctx, s, msrc, fs.StableAttr{ DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), BlockSize: usermem.PageSize, @@ -329,7 +329,7 @@ func NewFifo(ctx context.Context, owner fs.FileOwner, perms fs.FilePermissions, fifoIops := &Fifo{iops} // Build a new Inode. - return fs.NewInode(fifoIops, msrc, fs.StableAttr{ + return fs.NewInode(ctx, fifoIops, msrc, fs.StableAttr{ DeviceID: tmpfsDevice.DeviceID(), InodeID: tmpfsDevice.NextIno(), BlockSize: usermem.PageSize, diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD index 908d9de09..5e9327aec 100644 --- a/pkg/sentry/fs/tty/BUILD +++ b/pkg/sentry/fs/tty/BUILD @@ -13,7 +13,7 @@ go_library( "slave.go", "terminal.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tty", + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/tty", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go index 2603354c4..1d128532b 100644 --- a/pkg/sentry/fs/tty/dir.go +++ b/pkg/sentry/fs/tty/dir.go @@ -21,15 +21,15 @@ import ( "strconv" "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // dirInodeOperations is the root of a devpts mount. @@ -114,7 +114,7 @@ func newDir(ctx context.Context, m *fs.MountSource) *fs.Inode { InodeID: d.master.StableAttr.InodeID, }) - return fs.NewInode(d, m, fs.StableAttr{ + return fs.NewInode(ctx, d, m, fs.StableAttr{ DeviceID: ptsDevice.DeviceID(), // N.B. Linux always uses inode id 1 for the directory. See // fs/devpts/inode.c:devpts_fill_super. @@ -143,7 +143,7 @@ func (d *dirInodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name str // Master? if name == "ptmx" { d.master.IncRef() - return fs.NewDirent(d.master, name), nil + return fs.NewDirent(ctx, d.master, name), nil } // Slave number? @@ -159,7 +159,7 @@ func (d *dirInodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name str } s.IncRef() - return fs.NewDirent(s, name), nil + return fs.NewDirent(ctx, s, name), nil } // Create implements fs.InodeOperations.Create. @@ -307,7 +307,7 @@ type dirFileOperations struct { var _ fs.FileOperations = (*dirFileOperations)(nil) // IterateDir implements DirIterator.IterateDir. -func (df *dirFileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) { +func (df *dirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) { df.di.mu.Lock() defer df.di.mu.Unlock() diff --git a/pkg/sentry/fs/tty/fs.go b/pkg/sentry/fs/tty/fs.go index 701b2f7d9..edee56c12 100644 --- a/pkg/sentry/fs/tty/fs.go +++ b/pkg/sentry/fs/tty/fs.go @@ -15,10 +15,10 @@ package tty import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/syserror" ) // ptsDevice is the pseudo-filesystem device. @@ -67,7 +67,7 @@ func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSou return nil, syserror.EINVAL } - return newDir(ctx, fs.NewMountSource(&superOperations{}, f, flags)), nil + return newDir(ctx, fs.NewMountSource(ctx, &superOperations{}, f, flags)), nil } // superOperations implements fs.MountSourceOperations, preventing caching. @@ -94,6 +94,13 @@ func (superOperations) Keep(*fs.Dirent) bool { return false } +// CacheReaddir implements fs.DirentOperations.CacheReaddir. +// +// CacheReaddir returns false because entries change on master operations. +func (superOperations) CacheReaddir() bool { + return false +} + // ResetInodeMappings implements MountSourceOperations.ResetInodeMappings. func (superOperations) ResetInodeMappings() {} diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go index 20d29d130..7cc0eb409 100644 --- a/pkg/sentry/fs/tty/line_discipline.go +++ b/pkg/sentry/fs/tty/line_discipline.go @@ -19,12 +19,12 @@ import ( "sync" "unicode/utf8" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) const ( diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index afdf44cd1..92ec1ca18 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -15,15 +15,15 @@ package tty import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/unimpl" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // masterInodeOperations are the fs.InodeOperations for the master end of the @@ -46,7 +46,7 @@ func newMasterInode(ctx context.Context, d *dirInodeOperations, owner fs.FileOwn d: d, } - return fs.NewInode(iops, d.msrc, fs.StableAttr{ + return fs.NewInode(ctx, iops, d.msrc, fs.StableAttr{ DeviceID: ptsDevice.DeviceID(), // N.B. Linux always uses inode id 2 for ptmx. See // fs/devpts/inode.c:mknod_ptmx. @@ -144,7 +144,7 @@ func (mf *masterFileOperations) Write(ctx context.Context, _ *fs.File, src userm } // Ioctl implements fs.FileOperations.Ioctl. -func (mf *masterFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (mf *masterFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { switch cmd := args[1].Uint(); cmd { case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ // Get the number of bytes in the output queue read buffer. diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go index 11fb92be3..231e4e6eb 100644 --- a/pkg/sentry/fs/tty/queue.go +++ b/pkg/sentry/fs/tty/queue.go @@ -17,13 +17,13 @@ package tty import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // waitBufMaxBytes is the maximum size of a wait buffer. It is based on diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go index 2abf32e57..e30266404 100644 --- a/pkg/sentry/fs/tty/slave.go +++ b/pkg/sentry/fs/tty/slave.go @@ -15,14 +15,14 @@ package tty import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // slaveInodeOperations are the fs.InodeOperations for the slave end of the @@ -51,7 +51,7 @@ func newSlaveInode(ctx context.Context, d *dirInodeOperations, t *Terminal, owne t: t, } - return fs.NewInode(iops, d.msrc, fs.StableAttr{ + return fs.NewInode(ctx, iops, d.msrc, fs.StableAttr{ DeviceID: ptsDevice.DeviceID(), // N.B. Linux always uses inode id = tty index + 3. See // fs/devpts/inode.c:devpts_pty_new. @@ -128,7 +128,7 @@ func (sf *slaveFileOperations) Write(ctx context.Context, _ *fs.File, src userme } // Ioctl implements fs.FileOperations.Ioctl. -func (sf *slaveFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (sf *slaveFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { switch cmd := args[1].Uint(); cmd { case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ // Get the number of bytes in the input queue read buffer. diff --git a/pkg/sentry/fs/tty/terminal.go b/pkg/sentry/fs/tty/terminal.go index 2b4160ba5..b7cecb2ed 100644 --- a/pkg/sentry/fs/tty/terminal.go +++ b/pkg/sentry/fs/tty/terminal.go @@ -15,9 +15,9 @@ package tty import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" ) // Terminal is a pseudoterminal. @@ -38,9 +38,11 @@ type Terminal struct { func newTerminal(ctx context.Context, d *dirInodeOperations, n uint32) *Terminal { termios := linux.DefaultSlaveTermios - return &Terminal{ + t := Terminal{ d: d, n: n, ld: newLineDiscipline(termios), } + t.EnableLeakCheck("tty.Terminal") + return &t } diff --git a/pkg/sentry/fs/tty/tty_test.go b/pkg/sentry/fs/tty/tty_test.go index d2e75a511..59f07ff8e 100644 --- a/pkg/sentry/fs/tty/tty_test.go +++ b/pkg/sentry/fs/tty/tty_test.go @@ -17,9 +17,9 @@ package tty import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) func TestSimpleMasterToSlave(t *testing.T) { diff --git a/pkg/sentry/hostcpu/BUILD b/pkg/sentry/hostcpu/BUILD index b5067ae6d..f989f2f8b 100644 --- a/pkg/sentry/hostcpu/BUILD +++ b/pkg/sentry/hostcpu/BUILD @@ -8,7 +8,7 @@ go_library( "getcpu_amd64.s", "hostcpu.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu", + importpath = "gvisor.dev/gvisor/pkg/sentry/hostcpu", visibility = ["//:sandbox"], ) diff --git a/pkg/sentry/hostmm/BUILD b/pkg/sentry/hostmm/BUILD index 1a4632a54..67831d5a1 100644 --- a/pkg/sentry/hostmm/BUILD +++ b/pkg/sentry/hostmm/BUILD @@ -8,7 +8,7 @@ go_library( "cgroup.go", "hostmm.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/hostmm", + importpath = "gvisor.dev/gvisor/pkg/sentry/hostmm", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/fd", diff --git a/pkg/sentry/hostmm/hostmm.go b/pkg/sentry/hostmm/hostmm.go index 5432cada9..19335ca73 100644 --- a/pkg/sentry/hostmm/hostmm.go +++ b/pkg/sentry/hostmm/hostmm.go @@ -22,9 +22,9 @@ import ( "path" "syscall" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // NotifyCurrentMemcgPressureCallback requests that f is called whenever the diff --git a/pkg/sentry/inet/BUILD b/pkg/sentry/inet/BUILD index e288d34e9..184b566d9 100644 --- a/pkg/sentry/inet/BUILD +++ b/pkg/sentry/inet/BUILD @@ -12,6 +12,6 @@ go_library( "inet.go", "test_stack.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/inet", + importpath = "gvisor.dev/gvisor/pkg/sentry/inet", deps = ["//pkg/sentry/context"], ) diff --git a/pkg/sentry/inet/context.go b/pkg/sentry/inet/context.go index 8550c4793..4eda7dd1f 100644 --- a/pkg/sentry/inet/context.go +++ b/pkg/sentry/inet/context.go @@ -15,7 +15,7 @@ package inet import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context" ) // contextID is the inet package's type for context.Context.Value keys. diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index 04e375910..7b92f1b8d 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -85,7 +85,7 @@ proto_library( go_proto_library( name = "uncaught_signal_go_proto", - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto", proto = ":uncaught_signal_proto", visibility = ["//visibility:public"], deps = ["//pkg/sentry/arch:registers_go_proto"], @@ -96,7 +96,8 @@ go_library( srcs = [ "abstract_socket_namespace.go", "context.go", - "fd_map.go", + "fd_table.go", + "fd_table_unsafe.go", "fs_context.go", "ipc_namespace.go", "kernel.go", @@ -147,11 +148,11 @@ go_library( "vdso.go", "version.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel", imports = [ - "gvisor.googlesource.com/gvisor/pkg/bpf", - "gvisor.googlesource.com/gvisor/pkg/sentry/device", - "gvisor.googlesource.com/gvisor/pkg/tcpip", + "gvisor.dev/gvisor/pkg/bpf", + "gvisor.dev/gvisor/pkg/sentry/device", + "gvisor.dev/gvisor/pkg/tcpip", ], visibility = ["//:sandbox"], deps = [ @@ -179,7 +180,6 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/epoll", "//pkg/sentry/kernel/futex", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/sched", "//pkg/sentry/kernel/semaphore", "//pkg/sentry/kernel/shm", @@ -214,7 +214,7 @@ go_test( name = "kernel_test", size = "small", srcs = [ - "fd_map_test.go", + "fd_table_test.go", "table_test.go", "task_test.go", "timekeeper_test.go", @@ -223,9 +223,10 @@ go_test( deps = [ "//pkg/abi", "//pkg/sentry/arch", + "//pkg/sentry/context", "//pkg/sentry/context/contexttest", + "//pkg/sentry/fs", "//pkg/sentry/fs/filetest", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/sched", "//pkg/sentry/limits", "//pkg/sentry/pgalloc", diff --git a/pkg/sentry/kernel/abstract_socket_namespace.go b/pkg/sentry/kernel/abstract_socket_namespace.go index 5ce52e66c..244655b5c 100644 --- a/pkg/sentry/kernel/abstract_socket_namespace.go +++ b/pkg/sentry/kernel/abstract_socket_namespace.go @@ -18,8 +18,8 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" ) // +stateify savable diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD index abd4f2dae..42779baa9 100644 --- a/pkg/sentry/kernel/auth/BUILD +++ b/pkg/sentry/kernel/auth/BUILD @@ -4,6 +4,17 @@ load("//tools/go_generics:defs.bzl", "go_template_instance") load("//tools/go_stateify:defs.bzl", "go_library") go_template_instance( + name = "atomicptr_credentials", + out = "atomicptr_credentials.go", + package = "auth", + suffix = "Credentials", + template = "//third_party/gvsync:generic_atomicptr", + types = { + "Value": "Credentials", + }, +) + +go_template_instance( name = "id_map_range", out = "id_map_range.go", package = "auth", @@ -34,6 +45,7 @@ go_template_instance( go_library( name = "auth", srcs = [ + "atomicptr_credentials.go", "auth.go", "capability_set.go", "context.go", @@ -45,7 +57,7 @@ go_library( "id_map_set.go", "user_namespace.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/auth", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/auth/capability_set.go b/pkg/sentry/kernel/auth/capability_set.go index 7a0c967cd..fc8c6745c 100644 --- a/pkg/sentry/kernel/auth/capability_set.go +++ b/pkg/sentry/kernel/auth/capability_set.go @@ -15,8 +15,8 @@ package auth import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/bits" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bits" ) // A CapabilitySet is a set of capabilities implemented as a bitset. The zero @@ -24,7 +24,7 @@ import ( type CapabilitySet uint64 // AllCapabilities is a CapabilitySet containing all valid capabilities. -var AllCapabilities = CapabilitySetOf(linux.MaxCapability+1) - 1 +var AllCapabilities = CapabilitySetOf(linux.CAP_LAST_CAP+1) - 1 // CapabilitySetOf returns a CapabilitySet containing only the given // capability. diff --git a/pkg/sentry/kernel/auth/context.go b/pkg/sentry/kernel/auth/context.go index 16d110610..5c0e7d6b6 100644 --- a/pkg/sentry/kernel/auth/context.go +++ b/pkg/sentry/kernel/auth/context.go @@ -15,7 +15,7 @@ package auth import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context" ) // contextID is the auth package's type for context.Context.Value keys. diff --git a/pkg/sentry/kernel/auth/credentials.go b/pkg/sentry/kernel/auth/credentials.go index 1511a0324..e057d2c6d 100644 --- a/pkg/sentry/kernel/auth/credentials.go +++ b/pkg/sentry/kernel/auth/credentials.go @@ -15,8 +15,8 @@ package auth import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/syserror" ) // Credentials contains information required to authorize privileged operations diff --git a/pkg/sentry/kernel/auth/id_map.go b/pkg/sentry/kernel/auth/id_map.go index e5d6028d6..3d74bc610 100644 --- a/pkg/sentry/kernel/auth/id_map.go +++ b/pkg/sentry/kernel/auth/id_map.go @@ -15,9 +15,9 @@ package auth import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/syserror" ) // MapFromKUID translates kuid, a UID in the root namespace, to a UID in ns. diff --git a/pkg/sentry/kernel/auth/user_namespace.go b/pkg/sentry/kernel/auth/user_namespace.go index a40dd668f..af28ccc65 100644 --- a/pkg/sentry/kernel/auth/user_namespace.go +++ b/pkg/sentry/kernel/auth/user_namespace.go @@ -18,7 +18,7 @@ import ( "math" "sync" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserror" ) // A UserNamespace represents a user namespace. See user_namespaces(7) for diff --git a/pkg/sentry/kernel/context.go b/pkg/sentry/kernel/context.go index a1a084eab..e3f5b0d83 100644 --- a/pkg/sentry/kernel/context.go +++ b/pkg/sentry/kernel/context.go @@ -15,8 +15,8 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" ) // contextID is the kernel package's type for context.Context.Value keys. diff --git a/pkg/sentry/kernel/contexttest/BUILD b/pkg/sentry/kernel/contexttest/BUILD index bfb2a0b73..bec13a3d9 100644 --- a/pkg/sentry/kernel/contexttest/BUILD +++ b/pkg/sentry/kernel/contexttest/BUILD @@ -6,7 +6,7 @@ go_library( name = "contexttest", testonly = 1, srcs = ["contexttest.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/contexttest", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/context", diff --git a/pkg/sentry/kernel/contexttest/contexttest.go b/pkg/sentry/kernel/contexttest/contexttest.go index ae67e2a25..82f9d8922 100644 --- a/pkg/sentry/kernel/contexttest/contexttest.go +++ b/pkg/sentry/kernel/contexttest/contexttest.go @@ -19,11 +19,11 @@ package contexttest import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" ) // Context returns a Context that may be used in tests. Uses ptrace as the diff --git a/pkg/sentry/kernel/epoll/BUILD b/pkg/sentry/kernel/epoll/BUILD index 3ac59e13e..f46c43128 100644 --- a/pkg/sentry/kernel/epoll/BUILD +++ b/pkg/sentry/kernel/epoll/BUILD @@ -22,7 +22,7 @@ go_library( "epoll_list.go", "epoll_state.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/epoll", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/refs", @@ -30,7 +30,6 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/kernel/epoll/epoll.go b/pkg/sentry/kernel/epoll/epoll.go index 43ae22a5d..9c0a4e1b4 100644 --- a/pkg/sentry/kernel/epoll/epoll.go +++ b/pkg/sentry/kernel/epoll/epoll.go @@ -21,14 +21,13 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/anon" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/waiter" ) // Event describes the event mask that was observed and the user data to be @@ -61,7 +60,7 @@ const ( // +stateify savable type FileIdentifier struct { File *fs.File `state:"wait"` - Fd kdefs.FD + Fd int32 } // pollEntry holds all the state associated with an event poll entry, that is, @@ -155,7 +154,7 @@ var cycleMu sync.Mutex // NewEventPoll allocates and initializes a new event poll object. func NewEventPoll(ctx context.Context) *fs.File { // name matches fs/eventpoll.c:epoll_create1. - dirent := fs.NewDirent(anon.NewInode(ctx), fmt.Sprintf("anon_inode:[eventpoll]")) + dirent := fs.NewDirent(ctx, anon.NewInode(ctx), fmt.Sprintf("anon_inode:[eventpoll]")) // Release the initial dirent reference after NewFile takes a reference. defer dirent.DecRef() return fs.NewFile(ctx, dirent, fs.FileFlags{}, &EventPoll{ diff --git a/pkg/sentry/kernel/epoll/epoll_state.go b/pkg/sentry/kernel/epoll/epoll_state.go index 4c3c38f9e..a0d35d350 100644 --- a/pkg/sentry/kernel/epoll/epoll_state.go +++ b/pkg/sentry/kernel/epoll/epoll_state.go @@ -15,8 +15,8 @@ package epoll import ( - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/waiter" ) // afterLoad is invoked by stateify. diff --git a/pkg/sentry/kernel/epoll/epoll_test.go b/pkg/sentry/kernel/epoll/epoll_test.go index 49b781b69..4a20d4c82 100644 --- a/pkg/sentry/kernel/epoll/epoll_test.go +++ b/pkg/sentry/kernel/epoll/epoll_test.go @@ -17,9 +17,9 @@ package epoll import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/filetest" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs/filetest" + "gvisor.dev/gvisor/pkg/waiter" ) func TestFileDestroyed(t *testing.T) { diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD index f2f1a1223..1c5f979d4 100644 --- a/pkg/sentry/kernel/eventfd/BUILD +++ b/pkg/sentry/kernel/eventfd/BUILD @@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library", "go_test") go_library( name = "eventfd", srcs = ["eventfd.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/eventfd", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go index fe474cbf0..12f0d429b 100644 --- a/pkg/sentry/kernel/eventfd/eventfd.go +++ b/pkg/sentry/kernel/eventfd/eventfd.go @@ -21,15 +21,15 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/fdnotifier" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/anon" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // EventOperations represents an event with the semantics of Linux's file-based event @@ -68,7 +68,7 @@ type EventOperations struct { // New creates a new event object with the supplied initial value and mode. func New(ctx context.Context, initVal uint64, semMode bool) *fs.File { // name matches fs/eventfd.c:eventfd_file_create. - dirent := fs.NewDirent(anon.NewInode(ctx), "anon_inode:[eventfd]") + dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[eventfd]") // Release the initial dirent reference after NewFile takes a reference. defer dirent.DecRef() return fs.NewFile(ctx, dirent, fs.FileFlags{Read: true, Write: true}, &EventOperations{ diff --git a/pkg/sentry/kernel/eventfd/eventfd_test.go b/pkg/sentry/kernel/eventfd/eventfd_test.go index 1159638e5..018c7f3ef 100644 --- a/pkg/sentry/kernel/eventfd/eventfd_test.go +++ b/pkg/sentry/kernel/eventfd/eventfd_test.go @@ -17,9 +17,9 @@ package eventfd import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/waiter" ) func TestEventfd(t *testing.T) { diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD index 59b4a49e1..5eddca115 100644 --- a/pkg/sentry/kernel/fasync/BUILD +++ b/pkg/sentry/kernel/fasync/BUILD @@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library") go_library( name = "fasync", srcs = ["fasync.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/fasync", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/fasync", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go index 84cd08501..6b0bb0324 100644 --- a/pkg/sentry/kernel/fasync/fasync.go +++ b/pkg/sentry/kernel/fasync/fasync.go @@ -18,11 +18,11 @@ package fasync import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/waiter" ) // New creates a new FileAsync. @@ -34,9 +34,23 @@ func New() fs.FileAsync { // // +stateify savable type FileAsync struct { - mu sync.Mutex `state:"nosave"` - e waiter.Entry - requester *auth.Credentials + // e is immutable after first use (which is protected by mu below). + e waiter.Entry + + // regMu protects registeration and unregistration actions on e. + // + // regMu must be held while registration decisions are being made + // through the registration action itself. + // + // Lock ordering: regMu, mu. + regMu sync.Mutex `state:"nosave"` + + // mu protects all following fields. + // + // Lock ordering: e.mu, mu. + mu sync.Mutex `state:"nosave"` + requester *auth.Credentials + registered bool // Only one of the following is allowed to be non-nil. recipientPG *kernel.ProcessGroup @@ -47,7 +61,7 @@ type FileAsync struct { // Callback sends a signal. func (a *FileAsync) Callback(e *waiter.Entry) { a.mu.Lock() - if a.e.Callback == nil { + if !a.registered { a.mu.Unlock() return } @@ -80,14 +94,21 @@ func (a *FileAsync) Callback(e *waiter.Entry) { // // The file must not be currently registered. func (a *FileAsync) Register(w waiter.Waitable) { + a.regMu.Lock() + defer a.regMu.Unlock() a.mu.Lock() - defer a.mu.Unlock() - if a.e.Callback != nil { + if a.registered { + a.mu.Unlock() panic("registering already registered file") } - a.e.Callback = a + if a.e.Callback == nil { + a.e.Callback = a + } + a.registered = true + + a.mu.Unlock() w.EventRegister(&a.e, waiter.EventIn|waiter.EventOut|waiter.EventErr|waiter.EventHUp) } @@ -95,15 +116,19 @@ func (a *FileAsync) Register(w waiter.Waitable) { // // The file must be currently registered. func (a *FileAsync) Unregister(w waiter.Waitable) { + a.regMu.Lock() + defer a.regMu.Unlock() a.mu.Lock() - defer a.mu.Unlock() - if a.e.Callback == nil { + if !a.registered { + a.mu.Unlock() panic("unregistering unregistered file") } + a.registered = false + + a.mu.Unlock() w.EventUnregister(&a.e) - a.e.Callback = nil } // Owner returns who is currently getting signals. All return values will be diff --git a/pkg/sentry/kernel/fd_map.go b/pkg/sentry/kernel/fd_map.go deleted file mode 100644 index c5636d233..000000000 --- a/pkg/sentry/kernel/fd_map.go +++ /dev/null @@ -1,364 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "bytes" - "fmt" - "sort" - "sync" - "sync/atomic" - "syscall" - - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" -) - -// FDs is an ordering of FD's that can be made stable. -type FDs []kdefs.FD - -func (f FDs) Len() int { - return len(f) -} - -func (f FDs) Swap(i, j int) { - f[i], f[j] = f[j], f[i] -} - -func (f FDs) Less(i, j int) bool { - return f[i] < f[j] -} - -// FDFlags define flags for an individual descriptor. -// -// +stateify savable -type FDFlags struct { - // CloseOnExec indicates the descriptor should be closed on exec. - CloseOnExec bool -} - -// ToLinuxFileFlags converts a kernel.FDFlags object to a Linux file flags -// representation. -func (f FDFlags) ToLinuxFileFlags() (mask uint) { - if f.CloseOnExec { - mask |= linux.O_CLOEXEC - } - return -} - -// ToLinuxFDFlags converts a kernel.FDFlags object to a Linux descriptor flags -// representation. -func (f FDFlags) ToLinuxFDFlags() (mask uint) { - if f.CloseOnExec { - mask |= linux.FD_CLOEXEC - } - return -} - -// descriptor holds the details about a file descriptor, namely a pointer the -// file itself and the descriptor flags. -// -// +stateify savable -type descriptor struct { - file *fs.File - flags FDFlags -} - -// FDMap is used to manage File references and flags. -// -// +stateify savable -type FDMap struct { - refs.AtomicRefCount - k *Kernel - files map[kdefs.FD]descriptor - mu sync.RWMutex `state:"nosave"` - uid uint64 -} - -// ID returns a unique identifier for this FDMap. -func (f *FDMap) ID() uint64 { - return f.uid -} - -// NewFDMap allocates a new FDMap that may be used by tasks in k. -func (k *Kernel) NewFDMap() *FDMap { - return &FDMap{ - k: k, - files: make(map[kdefs.FD]descriptor), - uid: atomic.AddUint64(&k.fdMapUids, 1), - } -} - -// destroy removes all of the file descriptors from the map. -func (f *FDMap) destroy() { - f.RemoveIf(func(*fs.File, FDFlags) bool { - return true - }) -} - -// DecRef implements RefCounter.DecRef with destructor f.destroy. -func (f *FDMap) DecRef() { - f.DecRefWithDestructor(f.destroy) -} - -// Size returns the number of file descriptor slots currently allocated. -func (f *FDMap) Size() int { - f.mu.RLock() - defer f.mu.RUnlock() - - return len(f.files) -} - -// String is a stringer for FDMap. -func (f *FDMap) String() string { - f.mu.RLock() - defer f.mu.RUnlock() - - var b bytes.Buffer - for k, v := range f.files { - n, _ := v.file.Dirent.FullName(nil /* root */) - b.WriteString(fmt.Sprintf("\tfd:%d => name %s\n", k, n)) - } - return b.String() -} - -// NewFDFrom allocates a new FD guaranteed to be the lowest number available -// greater than or equal to from. This property is important as Unix programs -// tend to count on this allocation order. -func (f *FDMap) NewFDFrom(fd kdefs.FD, file *fs.File, flags FDFlags, limitSet *limits.LimitSet) (kdefs.FD, error) { - if fd < 0 { - // Don't accept negative FDs. - return 0, syscall.EINVAL - } - - f.mu.Lock() - defer f.mu.Unlock() - - // Finds the lowest fd not in the handles map. - lim := limitSet.Get(limits.NumberOfFiles) - for i := fd; lim.Cur == limits.Infinity || i < kdefs.FD(lim.Cur); i++ { - if _, ok := f.files[i]; !ok { - file.IncRef() - f.files[i] = descriptor{file, flags} - return i, nil - } - } - - return -1, syscall.EMFILE -} - -// NewFDAt sets the file reference for the given FD. If there is an -// active reference for that FD, the ref count for that existing reference -// is decremented. -func (f *FDMap) NewFDAt(fd kdefs.FD, file *fs.File, flags FDFlags, limitSet *limits.LimitSet) error { - if fd < 0 { - // Don't accept negative FDs. - return syscall.EBADF - } - - // In this one case we do not do a defer of the Unlock. The - // reason is that we must have done all the work needed for - // discarding any old open file before we return to the - // caller. In other words, the DecRef(), below, must have - // completed by the time we return to the caller to ensure - // side effects are, in fact, effected. A classic example is - // dup2(fd1, fd2); if fd2 was already open, it must be closed, - // and we don't want to resume the caller until it is; we have - // to block on the DecRef(). Hence we can not just do a 'go - // oldfile.DecRef()', since there would be no guarantee that - // it would be done before we the caller resumed. Since we - // must wait for the DecRef() to finish, and that could take - // time, it's best to first call f.muUnlock beore so we are - // not blocking other uses of this FDMap on the DecRef() call. - f.mu.Lock() - oldDesc, oldExists := f.files[fd] - lim := limitSet.Get(limits.NumberOfFiles).Cur - // if we're closing one then the effective limit is one - // more than the actual limit. - if oldExists && lim != limits.Infinity { - lim++ - } - if lim != limits.Infinity && fd >= kdefs.FD(lim) { - f.mu.Unlock() - return syscall.EMFILE - } - - file.IncRef() - f.files[fd] = descriptor{file, flags} - f.mu.Unlock() - - if oldExists { - oldDesc.file.DecRef() - } - return nil -} - -// SetFlags sets the flags for the given file descriptor, if it is valid. -func (f *FDMap) SetFlags(fd kdefs.FD, flags FDFlags) { - f.mu.Lock() - defer f.mu.Unlock() - - desc, ok := f.files[fd] - if !ok { - return - } - - f.files[fd] = descriptor{desc.file, flags} -} - -// GetDescriptor returns a reference to the file and the flags for the FD. It -// bumps its reference count as well. It returns nil if there is no File -// for the FD, i.e. if the FD is invalid. The caller must use DecRef -// when they are done. -func (f *FDMap) GetDescriptor(fd kdefs.FD) (*fs.File, FDFlags) { - f.mu.RLock() - defer f.mu.RUnlock() - - if desc, ok := f.files[fd]; ok { - desc.file.IncRef() - return desc.file, desc.flags - } - return nil, FDFlags{} -} - -// GetFile returns a reference to the File for the FD and bumps -// its reference count as well. It returns nil if there is no File -// for the FD, i.e. if the FD is invalid. The caller must use DecRef -// when they are done. -func (f *FDMap) GetFile(fd kdefs.FD) *fs.File { - f.mu.RLock() - if desc, ok := f.files[fd]; ok { - desc.file.IncRef() - f.mu.RUnlock() - return desc.file - } - f.mu.RUnlock() - return nil -} - -// fds returns an ordering of FDs. -func (f *FDMap) fds() FDs { - fds := make(FDs, 0, len(f.files)) - for fd := range f.files { - fds = append(fds, fd) - } - sort.Sort(fds) - return fds -} - -// GetFDs returns a list of valid fds. -func (f *FDMap) GetFDs() FDs { - f.mu.RLock() - defer f.mu.RUnlock() - return f.fds() -} - -// GetRefs returns a stable slice of references to all files and bumps the -// reference count on each. The caller must use DecRef on each reference when -// they're done using the slice. -func (f *FDMap) GetRefs() []*fs.File { - f.mu.RLock() - defer f.mu.RUnlock() - - fds := f.fds() - fs := make([]*fs.File, 0, len(fds)) - for _, fd := range fds { - desc := f.files[fd] - desc.file.IncRef() - fs = append(fs, desc.file) - } - return fs -} - -// Fork returns an independent FDMap pointing to the same descriptors. -func (f *FDMap) Fork() *FDMap { - f.mu.RLock() - defer f.mu.RUnlock() - - clone := f.k.NewFDMap() - - // Grab a extra reference for every file. - for fd, desc := range f.files { - desc.file.IncRef() - clone.files[fd] = desc - } - - // That's it! - return clone -} - -// unlock releases all file locks held by this FDMap's uid. Must only be -// called on a non-nil *fs.File. -func (f *FDMap) unlock(file *fs.File) { - id := lock.UniqueID(f.ID()) - file.Dirent.Inode.LockCtx.Posix.UnlockRegion(id, lock.LockRange{0, lock.LockEOF}) -} - -// inotifyFileClose generates the appropriate inotify events for f being closed. -func inotifyFileClose(f *fs.File) { - var ev uint32 - d := f.Dirent - - if fs.IsDir(d.Inode.StableAttr) { - ev |= linux.IN_ISDIR - } - - if f.Flags().Write { - ev |= linux.IN_CLOSE_WRITE - } else { - ev |= linux.IN_CLOSE_NOWRITE - } - - d.InotifyEvent(ev, 0) -} - -// Remove removes an FD from the FDMap, and returns (File, true) if a File -// one was found. Callers are expected to decrement the reference count on -// the File. Otherwise returns (nil, false). -func (f *FDMap) Remove(fd kdefs.FD) (*fs.File, bool) { - f.mu.Lock() - desc := f.files[fd] - delete(f.files, fd) - f.mu.Unlock() - if desc.file != nil { - f.unlock(desc.file) - inotifyFileClose(desc.file) - return desc.file, true - } - return nil, false -} - -// RemoveIf removes all FDs where cond is true. -func (f *FDMap) RemoveIf(cond func(*fs.File, FDFlags) bool) { - var removed []*fs.File - f.mu.Lock() - for fd, desc := range f.files { - if desc.file != nil && cond(desc.file, desc.flags) { - delete(f.files, fd) - removed = append(removed, desc.file) - } - } - f.mu.Unlock() - - for _, file := range removed { - f.unlock(file) - inotifyFileClose(file) - file.DecRef() - } -} diff --git a/pkg/sentry/kernel/fd_map_test.go b/pkg/sentry/kernel/fd_map_test.go deleted file mode 100644 index 22db4c7cf..000000000 --- a/pkg/sentry/kernel/fd_map_test.go +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernel - -import ( - "testing" - - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/filetest" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" -) - -const ( - // maxFD is the maximum FD to try to create in the map. - // This number of open files has been seen in the wild. - maxFD = 2 * 1024 -) - -func newTestFDMap() *FDMap { - return &FDMap{ - files: make(map[kdefs.FD]descriptor), - } -} - -// TestFDMapMany allocates maxFD FDs, i.e. maxes out the FDMap, -// until there is no room, then makes sure that NewFDAt works -// and also that if we remove one and add one that works too. -func TestFDMapMany(t *testing.T) { - file := filetest.NewTestFile(t) - limitSet := limits.NewLimitSet() - limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true /* privileged */) - - f := newTestFDMap() - for i := 0; i < maxFD; i++ { - if _, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err != nil { - t.Fatalf("Allocated %v FDs but wanted to allocate %v", i, maxFD) - } - } - - if _, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err == nil { - t.Fatalf("f.NewFDFrom(0, r) in full map: got nil, wanted error") - } - - if err := f.NewFDAt(1, file, FDFlags{}, limitSet); err != nil { - t.Fatalf("f.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err) - } -} - -// TestFDMap does a set of simple tests to make sure simple adds, -// removes, GetRefs, and DecRefs work. The ordering is just weird -// enough that a table-driven approach seemed clumsy. -func TestFDMap(t *testing.T) { - file := filetest.NewTestFile(t) - limitSet := limits.NewLimitSet() - limitSet.Set(limits.NumberOfFiles, limits.Limit{1, maxFD}, true /* privileged */) - - f := newTestFDMap() - if _, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err != nil { - t.Fatalf("Adding an FD to an empty 1-size map: got %v, want nil", err) - } - - if _, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err == nil { - t.Fatalf("Adding an FD to a filled 1-size map: got nil, wanted an error") - } - - largeLimit := limits.Limit{maxFD, maxFD} - limitSet.Set(limits.NumberOfFiles, largeLimit, true /* privileged */) - - if fd, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err != nil { - t.Fatalf("Adding an FD to a resized map: got %v, want nil", err) - } else if fd != kdefs.FD(1) { - t.Fatalf("Added an FD to a resized map: got %v, want 1", fd) - } - - if err := f.NewFDAt(1, file, FDFlags{}, limitSet); err != nil { - t.Fatalf("Replacing FD 1 via f.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err) - } - - if err := f.NewFDAt(maxFD+1, file, FDFlags{}, limitSet); err == nil { - t.Fatalf("Using an FD that was too large via f.NewFDAt(%v, r, FDFlags{}): got nil, wanted an error", maxFD+1) - } - - if ref := f.GetFile(1); ref == nil { - t.Fatalf("f.GetFile(1): got nil, wanted %v", file) - } - - if ref := f.GetFile(2); ref != nil { - t.Fatalf("f.GetFile(2): got a %v, wanted nil", ref) - } - - ref, ok := f.Remove(1) - if !ok { - t.Fatalf("f.Remove(1) for an existing FD: failed, want success") - } - ref.DecRef() - - if ref, ok := f.Remove(1); ok { - ref.DecRef() - t.Fatalf("r.Remove(1) for a removed FD: got success, want failure") - } - -} - -func TestDescriptorFlags(t *testing.T) { - file := filetest.NewTestFile(t) - f := newTestFDMap() - limitSet := limits.NewLimitSet() - limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true /* privileged */) - - origFlags := FDFlags{CloseOnExec: true} - - if err := f.NewFDAt(2, file, origFlags, limitSet); err != nil { - t.Fatalf("f.NewFDAt(2, r, FDFlags{}): got %v, wanted nil", err) - } - - newFile, newFlags := f.GetDescriptor(2) - if newFile == nil { - t.Fatalf("f.GetFile(2): got a %v, wanted nil", newFile) - } - - if newFlags != origFlags { - t.Fatalf("new File flags %+v don't match original %+v", newFlags, origFlags) - } -} diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go new file mode 100644 index 000000000..1f3a57dc1 --- /dev/null +++ b/pkg/sentry/kernel/fd_table.go @@ -0,0 +1,380 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kernel + +import ( + "bytes" + "fmt" + "math" + "sync" + "sync/atomic" + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/lock" + "gvisor.dev/gvisor/pkg/sentry/limits" +) + +// FDFlags define flags for an individual descriptor. +// +// +stateify savable +type FDFlags struct { + // CloseOnExec indicates the descriptor should be closed on exec. + CloseOnExec bool +} + +// ToLinuxFileFlags converts a kernel.FDFlags object to a Linux file flags +// representation. +func (f FDFlags) ToLinuxFileFlags() (mask uint) { + if f.CloseOnExec { + mask |= linux.O_CLOEXEC + } + return +} + +// ToLinuxFDFlags converts a kernel.FDFlags object to a Linux descriptor flags +// representation. +func (f FDFlags) ToLinuxFDFlags() (mask uint) { + if f.CloseOnExec { + mask |= linux.FD_CLOEXEC + } + return +} + +// descriptor holds the details about a file descriptor, namely a pointer to +// the file itself and the descriptor flags. +// +// Note that this is immutable and can only be changed via operations on the +// descriptorTable. +// +// +stateify savable +type descriptor struct { + file *fs.File + flags FDFlags +} + +// FDTable is used to manage File references and flags. +// +// +stateify savable +type FDTable struct { + refs.AtomicRefCount + k *Kernel + + // uid is a unique identifier. + uid uint64 + + // mu protects below. + mu sync.Mutex `state:"nosave"` + + // used contains the number of non-nil entries. + used int32 + + // descriptorTable holds descriptors. + descriptorTable `state:".(map[int32]descriptor)"` +} + +func (f *FDTable) saveDescriptorTable() map[int32]descriptor { + m := make(map[int32]descriptor) + f.forEach(func(fd int32, file *fs.File, flags FDFlags) { + m[fd] = descriptor{ + file: file, + flags: flags, + } + }) + return m +} + +func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) { + f.init() // Initialize table. + for fd, d := range m { + f.set(fd, d.file, d.flags) + + // Note that we do _not_ need to acquire a extra table + // reference here. The table reference will already be + // accounted for in the file, so we drop the reference taken by + // set above. + d.file.DecRef() + } +} + +// drop drops the table reference. +func (f *FDTable) drop(file *fs.File) { + // Release locks. + file.Dirent.Inode.LockCtx.Posix.UnlockRegion(lock.UniqueID(f.uid), lock.LockRange{0, lock.LockEOF}) + + // Send inotify events. + d := file.Dirent + var ev uint32 + if fs.IsDir(d.Inode.StableAttr) { + ev |= linux.IN_ISDIR + } + if file.Flags().Write { + ev |= linux.IN_CLOSE_WRITE + } else { + ev |= linux.IN_CLOSE_NOWRITE + } + d.InotifyEvent(ev, 0) + + // Drop the table reference. + file.DecRef() +} + +// ID returns a unique identifier for this FDTable. +func (f *FDTable) ID() uint64 { + return f.uid +} + +// NewFDTable allocates a new FDTable that may be used by tasks in k. +func (k *Kernel) NewFDTable() *FDTable { + f := &FDTable{ + k: k, + uid: atomic.AddUint64(&k.fdMapUids, 1), + } + f.init() + return f +} + +// destroy removes all of the file descriptors from the map. +func (f *FDTable) destroy() { + f.RemoveIf(func(*fs.File, FDFlags) bool { + return true + }) +} + +// DecRef implements RefCounter.DecRef with destructor f.destroy. +func (f *FDTable) DecRef() { + f.DecRefWithDestructor(f.destroy) +} + +// Size returns the number of file descriptor slots currently allocated. +func (f *FDTable) Size() int { + size := atomic.LoadInt32(&f.used) + return int(size) +} + +// forEach iterates over all non-nil files. +// +// It is the caller's responsibility to acquire an appropriate lock. +func (f *FDTable) forEach(fn func(fd int32, file *fs.File, flags FDFlags)) { + fd := int32(0) + for { + file, flags, ok := f.get(fd) + if !ok { + break + } + if file != nil { + if !file.TryIncRef() { + continue // Race caught. + } + fn(int32(fd), file, flags) + file.DecRef() + } + fd++ + } +} + +// String is a stringer for FDTable. +func (f *FDTable) String() string { + var b bytes.Buffer + f.forEach(func(fd int32, file *fs.File, flags FDFlags) { + n, _ := file.Dirent.FullName(nil /* root */) + b.WriteString(fmt.Sprintf("\tfd:%d => name %s\n", fd, n)) + }) + return b.String() +} + +// NewFDs allocates new FDs guaranteed to be the lowest number available +// greater than or equal to the fd parameter. All files will share the set +// flags. Success is guaranteed to be all or none. +func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags FDFlags) (fds []int32, err error) { + if fd < 0 { + // Don't accept negative FDs. + return nil, syscall.EINVAL + } + + // Default limit. + end := int32(math.MaxInt32) + + // Ensure we don't get past the provided limit. + if limitSet := limits.FromContext(ctx); limitSet != nil { + lim := limitSet.Get(limits.NumberOfFiles) + if lim.Cur != limits.Infinity { + end = int32(lim.Cur) + } + if fd >= end { + return nil, syscall.EMFILE + } + } + + f.mu.Lock() + defer f.mu.Unlock() + + // Install all entries. + for i := fd; i < end && len(fds) < len(files); i++ { + if d, _, _ := f.get(i); d == nil { + f.set(i, files[len(fds)], flags) // Set the descriptor. + fds = append(fds, i) // Record the file descriptor. + } + } + + // Failure? Unwind existing FDs. + if len(fds) < len(files) { + for _, i := range fds { + f.set(i, nil, FDFlags{}) // Zap entry. + } + return nil, syscall.EMFILE + } + + return fds, nil +} + +// NewFDAt sets the file reference for the given FD. If there is an active +// reference for that FD, the ref count for that existing reference is +// decremented. +func (f *FDTable) NewFDAt(ctx context.Context, fd int32, file *fs.File, flags FDFlags) error { + if fd < 0 { + // Don't accept negative FDs. + return syscall.EBADF + } + + f.mu.Lock() + defer f.mu.Unlock() + + // Check the limit for the provided file. + if limitSet := limits.FromContext(ctx); limitSet != nil { + if lim := limitSet.Get(limits.NumberOfFiles); lim.Cur != limits.Infinity && uint64(fd) >= lim.Cur { + return syscall.EMFILE + } + } + + // Install the entry. + f.set(fd, file, flags) + return nil +} + +// SetFlags sets the flags for the given file descriptor. +// +// True is returned iff flags were changed. +func (f *FDTable) SetFlags(fd int32, flags FDFlags) error { + if fd < 0 { + // Don't accept negative FDs. + return syscall.EBADF + } + + f.mu.Lock() + defer f.mu.Unlock() + + file, _, _ := f.get(fd) + if file == nil { + // No file found. + return syscall.EBADF + } + + // Update the flags. + f.set(fd, file, flags) + return nil +} + +// Get returns a reference to the file and the flags for the FD or nil if no +// file is defined for the given fd. +// +// N.B. Callers are required to use DecRef when they are done. +// +//go:nosplit +func (f *FDTable) Get(fd int32) (*fs.File, FDFlags) { + if fd < 0 { + return nil, FDFlags{} + } + + for { + file, flags, _ := f.get(fd) + if file != nil { + if !file.TryIncRef() { + continue // Race caught. + } + // Reference acquired. + return file, flags + } + // No file available. + return nil, FDFlags{} + } +} + +// GetFDs returns a list of valid fds. +func (f *FDTable) GetFDs() []int32 { + fds := make([]int32, 0, f.used) + f.forEach(func(fd int32, file *fs.File, flags FDFlags) { + fds = append(fds, fd) + }) + return fds +} + +// GetRefs returns a stable slice of references to all files and bumps the +// reference count on each. The caller must use DecRef on each reference when +// they're done using the slice. +func (f *FDTable) GetRefs() []*fs.File { + files := make([]*fs.File, 0, f.Size()) + f.forEach(func(_ int32, file *fs.File, flags FDFlags) { + file.IncRef() // Acquire a reference for caller. + files = append(files, file) + }) + return files +} + +// Fork returns an independent FDTable. +func (f *FDTable) Fork() *FDTable { + clone := f.k.NewFDTable() + + f.forEach(func(fd int32, file *fs.File, flags FDFlags) { + // The set function here will acquire an appropriate table + // reference for the clone. We don't need anything else. + clone.set(fd, file, flags) + }) + return clone +} + +// Remove removes an FD from and returns a non-file iff successful. +// +// N.B. Callers are required to use DecRef when they are done. +func (f *FDTable) Remove(fd int32) *fs.File { + if fd < 0 { + return nil + } + + f.mu.Lock() + defer f.mu.Unlock() + + orig, _, _ := f.get(fd) + if orig != nil { + orig.IncRef() // Reference for caller. + f.set(fd, nil, FDFlags{}) // Zap entry. + } + return orig +} + +// RemoveIf removes all FDs where cond is true. +func (f *FDTable) RemoveIf(cond func(*fs.File, FDFlags) bool) { + f.mu.Lock() + defer f.mu.Unlock() + + f.forEach(func(fd int32, file *fs.File, flags FDFlags) { + if cond(file, flags) { + f.set(fd, nil, FDFlags{}) // Clear from table. + } + }) +} diff --git a/pkg/sentry/kernel/fd_table_test.go b/pkg/sentry/kernel/fd_table_test.go new file mode 100644 index 000000000..2413788e7 --- /dev/null +++ b/pkg/sentry/kernel/fd_table_test.go @@ -0,0 +1,192 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kernel + +import ( + "runtime" + "sync" + "testing" + + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/filetest" + "gvisor.dev/gvisor/pkg/sentry/limits" +) + +const ( + // maxFD is the maximum FD to try to create in the map. + // + // This number of open files has been seen in the wild. + maxFD = 2 * 1024 +) + +func runTest(t testing.TB, fn func(ctx context.Context, fdTable *FDTable, file *fs.File, limitSet *limits.LimitSet)) { + t.Helper() // Don't show in stacks. + + // Create the limits and context. + limitSet := limits.NewLimitSet() + limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true) + ctx := contexttest.WithLimitSet(contexttest.Context(t), limitSet) + + // Create a test file.; + file := filetest.NewTestFile(t) + + // Create the table. + fdTable := new(FDTable) + fdTable.init() + + // Run the test. + fn(ctx, fdTable, file, limitSet) +} + +// TestFDTableMany allocates maxFD FDs, i.e. maxes out the FDTable, until there +// is no room, then makes sure that NewFDAt works and also that if we remove +// one and add one that works too. +func TestFDTableMany(t *testing.T) { + runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { + for i := 0; i < maxFD; i++ { + if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { + t.Fatalf("Allocated %v FDs but wanted to allocate %v", i, maxFD) + } + } + + if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err == nil { + t.Fatalf("fdTable.NewFDs(0, r) in full map: got nil, wanted error") + } + + if err := fdTable.NewFDAt(ctx, 1, file, FDFlags{}); err != nil { + t.Fatalf("fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err) + } + }) +} + +// TestFDTable does a set of simple tests to make sure simple adds, removes, +// GetRefs, and DecRefs work. The ordering is just weird enough that a +// table-driven approach seemed clumsy. +func TestFDTable(t *testing.T) { + runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, limitSet *limits.LimitSet) { + // Cap the limit at one. + limitSet.Set(limits.NumberOfFiles, limits.Limit{1, maxFD}, true) + + if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { + t.Fatalf("Adding an FD to an empty 1-size map: got %v, want nil", err) + } + + if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err == nil { + t.Fatalf("Adding an FD to a filled 1-size map: got nil, wanted an error") + } + + // Remove the previous limit. + limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true) + + if fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil { + t.Fatalf("Adding an FD to a resized map: got %v, want nil", err) + } else if len(fds) != 1 || fds[0] != 1 { + t.Fatalf("Added an FD to a resized map: got %v, want {1}", fds) + } + + if err := fdTable.NewFDAt(ctx, 1, file, FDFlags{}); err != nil { + t.Fatalf("Replacing FD 1 via fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err) + } + + if err := fdTable.NewFDAt(ctx, maxFD+1, file, FDFlags{}); err == nil { + t.Fatalf("Using an FD that was too large via fdTable.NewFDAt(%v, r, FDFlags{}): got nil, wanted an error", maxFD+1) + } + + if ref, _ := fdTable.Get(1); ref == nil { + t.Fatalf("fdTable.Get(1): got nil, wanted %v", file) + } + + if ref, _ := fdTable.Get(2); ref != nil { + t.Fatalf("fdTable.Get(2): got a %v, wanted nil", ref) + } + + ref := fdTable.Remove(1) + if ref == nil { + t.Fatalf("fdTable.Remove(1) for an existing FD: failed, want success") + } + ref.DecRef() + + if ref := fdTable.Remove(1); ref != nil { + t.Fatalf("r.Remove(1) for a removed FD: got success, want failure") + } + }) +} + +func TestDescriptorFlags(t *testing.T) { + runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { + if err := fdTable.NewFDAt(ctx, 2, file, FDFlags{CloseOnExec: true}); err != nil { + t.Fatalf("fdTable.NewFDAt(2, r, FDFlags{}): got %v, wanted nil", err) + } + + newFile, flags := fdTable.Get(2) + if newFile == nil { + t.Fatalf("fdTable.Get(2): got a %v, wanted nil", newFile) + } + + if !flags.CloseOnExec { + t.Fatalf("new File flags %v don't match original %d\n", flags, 0) + } + }) +} + +func BenchmarkFDLookupAndDecRef(b *testing.B) { + b.StopTimer() // Setup. + + runTest(b, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { + fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file, file, file, file, file}, FDFlags{}) + if err != nil { + b.Fatalf("fdTable.NewFDs: got %v, wanted nil", err) + } + + b.StartTimer() // Benchmark. + for i := 0; i < b.N; i++ { + tf, _ := fdTable.Get(fds[i%len(fds)]) + tf.DecRef() + } + }) +} + +func BenchmarkFDLookupAndDecRefConcurrent(b *testing.B) { + b.StopTimer() // Setup. + + runTest(b, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) { + fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file, file, file, file, file}, FDFlags{}) + if err != nil { + b.Fatalf("fdTable.NewFDs: got %v, wanted nil", err) + } + + concurrency := runtime.GOMAXPROCS(0) + if concurrency < 4 { + concurrency = 4 + } + each := b.N / concurrency + + b.StartTimer() // Benchmark. + var wg sync.WaitGroup + for i := 0; i < concurrency; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < each; i++ { + tf, _ := fdTable.Get(fds[i%len(fds)]) + tf.DecRef() + } + }() + } + wg.Wait() + }) +} diff --git a/pkg/sentry/kernel/fd_table_unsafe.go b/pkg/sentry/kernel/fd_table_unsafe.go new file mode 100644 index 000000000..e009df974 --- /dev/null +++ b/pkg/sentry/kernel/fd_table_unsafe.go @@ -0,0 +1,103 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kernel + +import ( + "sync/atomic" + "unsafe" + + "gvisor.dev/gvisor/pkg/sentry/fs" +) + +type descriptorTable struct { + // slice is a *[]unsafe.Pointer, where each element is actually + // *descriptor object, updated atomically. + // + // Changes to the slice itself requiring holding FDTable.mu. + slice unsafe.Pointer `state:".(map[int32]*descriptor)"` +} + +// init initializes the table. +func (f *FDTable) init() { + var slice []unsafe.Pointer // Empty slice. + atomic.StorePointer(&f.slice, unsafe.Pointer(&slice)) +} + +// get gets a file entry. +// +// The boolean indicates whether this was in range. +// +//go:nosplit +func (f *FDTable) get(fd int32) (*fs.File, FDFlags, bool) { + slice := *(*[]unsafe.Pointer)(atomic.LoadPointer(&f.slice)) + if fd >= int32(len(slice)) { + return nil, FDFlags{}, false + } + d := (*descriptor)(atomic.LoadPointer(&slice[fd])) + if d == nil { + return nil, FDFlags{}, true + } + return d.file, d.flags, true +} + +// set sets an entry. +// +// This handles accounting changes, as well as acquiring and releasing the +// reference needed by the table iff the file is different. +// +// Precondition: mu must be held. +func (f *FDTable) set(fd int32, file *fs.File, flags FDFlags) { + slice := *(*[]unsafe.Pointer)(atomic.LoadPointer(&f.slice)) + + // Grow the table as required. + if last := int32(len(slice)); fd >= last { + end := fd + 1 + if end < 2*last { + end = 2 * last + } + slice = append(slice, make([]unsafe.Pointer, end-last)...) + atomic.StorePointer(&f.slice, unsafe.Pointer(&slice)) + } + + // Create the new element. + var d *descriptor + if file != nil { + d = &descriptor{ + file: file, + flags: flags, + } + } + + // Update the single element. + orig := (*descriptor)(atomic.SwapPointer(&slice[fd], unsafe.Pointer(d))) + + // Acquire a table reference. + if file != nil && (orig == nil || file != orig.file) { + file.IncRef() + } + + // Drop the table reference. + if orig != nil && file != orig.file { + f.drop(orig.file) + } + + // Adjust used. + switch { + case orig == nil && file != nil: + atomic.AddInt32(&f.used, 1) + case orig != nil && file == nil: + atomic.AddInt32(&f.used, -1) + } +} diff --git a/pkg/sentry/kernel/fs_context.go b/pkg/sentry/kernel/fs_context.go index d8115f59a..ded27d668 100644 --- a/pkg/sentry/kernel/fs_context.go +++ b/pkg/sentry/kernel/fs_context.go @@ -18,8 +18,8 @@ import ( "fmt" "sync" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // FSContext contains filesystem context. @@ -51,18 +51,20 @@ type FSContext struct { func newFSContext(root, cwd *fs.Dirent, umask uint) *FSContext { root.IncRef() cwd.IncRef() - return &FSContext{ + f := FSContext{ root: root, cwd: cwd, umask: umask, } + f.EnableLeakCheck("kernel.FSContext") + return &f } // destroy is the destructor for an FSContext. // // This will call DecRef on both root and cwd Dirents. If either call to -// DecRef returns an error, then it will be propigated. If both calls to -// DecRef return an error, then the one from root.DecRef will be propigated. +// DecRef returns an error, then it will be propagated. If both calls to +// DecRef return an error, then the one from root.DecRef will be propagated. // // Note that there may still be calls to WorkingDirectory() or RootDirectory() // (that return nil). This is because valid references may still be held via diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD index b6af5b20b..a5cf1f627 100644 --- a/pkg/sentry/kernel/futex/BUILD +++ b/pkg/sentry/kernel/futex/BUILD @@ -33,7 +33,7 @@ go_library( "futex.go", "waiter_list.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/futex", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go index bb38eb81e..278cc8143 100644 --- a/pkg/sentry/kernel/futex/futex.go +++ b/pkg/sentry/kernel/futex/futex.go @@ -20,10 +20,10 @@ package futex import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // KeyKind indicates the type of a Key. @@ -729,14 +729,14 @@ func (m *Manager) UnlockPI(t Target, addr usermem.Addr, tid uint32, private bool } b := m.lockBucket(&k) - err = m.unlockPILocked(t, addr, tid, b) + err = m.unlockPILocked(t, addr, tid, b, &k) k.release() b.mu.Unlock() return err } -func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *bucket) error { +func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *bucket, key *Key) error { cur, err := t.LoadUint32(addr) if err != nil { return err @@ -746,7 +746,22 @@ func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *buc return syserror.EPERM } - if b.waiters.Empty() { + var next *Waiter // Who's the next owner? + var next2 *Waiter // Who's the one after that? + for w := b.waiters.Front(); w != nil; w = w.Next() { + if !w.key.matches(key) { + continue + } + + if next == nil { + next = w + } else { + next2 = w + break + } + } + + if next == nil { // It's safe to set 0 because there are no waiters, no new owner, and the // executing task is the current owner (no owner died bit). prev, err := t.CompareAndSwapUint32(addr, cur, 0) @@ -761,12 +776,10 @@ func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *buc return nil } - next := b.waiters.Front() - // Set next owner's TID, waiters if there are any. Resets owner died bit, if // set, because the executing task takes over as the owner. val := next.tid - if next.Next() != nil { + if next2 != nil { val |= linux.FUTEX_WAITERS } diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go index 2de5239bf..65e5d1428 100644 --- a/pkg/sentry/kernel/futex/futex_test.go +++ b/pkg/sentry/kernel/futex/futex_test.go @@ -23,7 +23,7 @@ import ( "testing" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // testData implements the Target interface, and allows us to diff --git a/pkg/sentry/kernel/ipc_namespace.go b/pkg/sentry/kernel/ipc_namespace.go index ebe12812c..80a070d7e 100644 --- a/pkg/sentry/kernel/ipc_namespace.go +++ b/pkg/sentry/kernel/ipc_namespace.go @@ -15,9 +15,9 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/semaphore" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/shm" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/semaphore" + "gvisor.dev/gvisor/pkg/sentry/kernel/shm" ) // IPCNamespace represents an IPC namespace. @@ -40,7 +40,7 @@ func NewIPCNamespace(userNS *auth.UserNamespace) *IPCNamespace { } } -// SemaphoreRegistry returns the semanphore set registry for this namespace. +// SemaphoreRegistry returns the semaphore set registry for this namespace. func (i *IPCNamespace) SemaphoreRegistry() *semaphore.Registry { return i.semaphores } diff --git a/pkg/sentry/kernel/kdefs/BUILD b/pkg/sentry/kernel/kdefs/BUILD deleted file mode 100644 index 38aaca134..000000000 --- a/pkg/sentry/kernel/kdefs/BUILD +++ /dev/null @@ -1,10 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "kdefs", - srcs = ["kdefs.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs", - visibility = ["//:sandbox"], -) diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index f253a81d9..38b49cba2 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -39,34 +39,34 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/eventchannel" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/timerfd" - "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/loader" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/port" - sentrytime "gvisor.googlesource.com/gvisor/pkg/sentry/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl" - uspb "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto" - "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid" - "gvisor.googlesource.com/gvisor/pkg/state" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/eventchannel" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/timerfd" + "gvisor.dev/gvisor/pkg/sentry/hostcpu" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/epoll" + "gvisor.dev/gvisor/pkg/sentry/kernel/futex" + "gvisor.dev/gvisor/pkg/sentry/kernel/sched" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/loader" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port" + sentrytime "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/unimpl" + uspb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto" + "gvisor.dev/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/state" + "gvisor.dev/gvisor/pkg/tcpip" ) // Kernel represents an emulated Linux kernel. It must be initialized by calling @@ -155,7 +155,7 @@ type Kernel struct { // cpuClockTicker increments cpuClock. cpuClockTicker *ktime.Timer `state:"nosave"` - // fdMapUids is an ever-increasing counter for generating FDMap uids. + // fdMapUids is an ever-increasing counter for generating FDTable uids. // // fdMapUids is mutable, and is accessed using atomic memory operations. fdMapUids uint64 @@ -381,13 +381,27 @@ func (k *Kernel) SaveTo(w io.Writer) error { // flushMountSourceRefs flushes the MountSources for all mounted filesystems // and open FDs. func (k *Kernel) flushMountSourceRefs() error { - // Flush all mount sources for currently mounted filesystems. + // Flush all mount sources for currently mounted filesystems in the + // root mount namespace. k.mounts.FlushMountSourceRefs() + // Some tasks may have other mount namespaces; flush those as well. + flushed := make(map[*fs.MountNamespace]struct{}) + k.tasks.mu.RLock() + k.tasks.forEachThreadGroupLocked(func(tg *ThreadGroup) { + if _, ok := flushed[tg.mounts]; ok { + // Already flushed. + return + } + tg.mounts.FlushMountSourceRefs() + flushed[tg.mounts] = struct{}{} + }) + k.tasks.mu.RUnlock() + // There may be some open FDs whose filesystems have been unmounted. We // must flush those as well. - return k.tasks.forEachFDPaused(func(desc descriptor) error { - desc.file.Dirent.Inode.MountSource.FlushDirentRefs() + return k.tasks.forEachFDPaused(func(file *fs.File) error { + file.Dirent.Inode.MountSource.FlushDirentRefs() return nil }) } @@ -396,35 +410,35 @@ func (k *Kernel) flushMountSourceRefs() error { // task. // // Precondition: Must be called with the kernel paused. -func (ts *TaskSet) forEachFDPaused(f func(descriptor) error) error { +func (ts *TaskSet) forEachFDPaused(f func(*fs.File) error) (err error) { ts.mu.RLock() defer ts.mu.RUnlock() for t := range ts.Root.tids { // We can skip locking Task.mu here since the kernel is paused. - if t.fds == nil { + if t.fdTable == nil { continue } - for _, desc := range t.fds.files { - if err := f(desc); err != nil { - return err + t.fdTable.forEach(func(_ int32, file *fs.File, _ FDFlags) { + if lastErr := f(file); lastErr != nil && err == nil { + err = lastErr } - } + }) } - return nil + return err } func (ts *TaskSet) flushWritesToFiles(ctx context.Context) error { - return ts.forEachFDPaused(func(desc descriptor) error { - if flags := desc.file.Flags(); !flags.Write { + return ts.forEachFDPaused(func(file *fs.File) error { + if flags := file.Flags(); !flags.Write { return nil } - if sattr := desc.file.Dirent.Inode.StableAttr; !fs.IsFile(sattr) && !fs.IsDir(sattr) { + if sattr := file.Dirent.Inode.StableAttr; !fs.IsFile(sattr) && !fs.IsDir(sattr) { return nil } // Here we need all metadata synced. - syncErr := desc.file.Fsync(ctx, 0, fs.FileMaxOffset, fs.SyncAll) + syncErr := file.Fsync(ctx, 0, fs.FileMaxOffset, fs.SyncAll) if err := fs.SaveFileFsyncError(syncErr); err != nil { - name, _ := desc.file.Dirent.FullName(nil /* root */) + name, _ := file.Dirent.FullName(nil /* root */) // Wrap this error in ErrSaveRejection // so that it will trigger a save // error, rather than a panic. This @@ -469,14 +483,12 @@ func (ts *TaskSet) unregisterEpollWaiters() { defer ts.mu.RUnlock() for t := range ts.Root.tids { // We can skip locking Task.mu here since the kernel is paused. - if fdmap := t.fds; fdmap != nil { - for _, desc := range fdmap.files { - if desc.file != nil { - if e, ok := desc.file.FileOperations.(*epoll.EventPoll); ok { - e.UnregisterEpollWaiters() - } + if t.fdTable != nil { + t.fdTable.forEach(func(_ int32, file *fs.File, _ FDFlags) { + if e, ok := file.FileOperations.(*epoll.EventPoll); ok { + e.UnregisterEpollWaiters() } - } + }) } } } @@ -524,6 +536,8 @@ func (k *Kernel) LoadFrom(r io.Reader, net inet.Stack) error { } log.Infof("Memory load took [%s].", time.Since(memoryStart)) + log.Infof("Overall load took [%s]", time.Since(loadStart)) + // Ensure that all pending asynchronous work is complete: // - namedpipe opening // - inode file opening @@ -588,9 +602,9 @@ type CreateProcessArgs struct { // Credentials is the initial credentials. Credentials *auth.Credentials - // FDMap is the initial set of file descriptors. If CreateProcess succeeds, - // it takes a reference on FDMap. - FDMap *FDMap + // FDTable is the initial set of file descriptors. If CreateProcess succeeds, + // it takes a reference on FDTable. + FDTable *FDTable // Umask is the initial umask. Umask uint @@ -611,12 +625,18 @@ type CreateProcessArgs struct { // AbstractSocketNamespace is the initial Abstract Socket namespace. AbstractSocketNamespace *AbstractSocketNamespace + // MountNamespace optionally contains the mount namespace for this + // process. If nil, the kernel's mount namespace is used. + // + // Anyone setting MountNamespace must donate a reference (i.e. + // increment it). + MountNamespace *fs.MountNamespace + // Root optionally contains the dirent that serves as the root for the // process. If nil, the mount namespace's root is used as the process' // root. // - // Anyone setting Root must donate a reference (i.e. increment it) to - // keep it alive until it is decremented by CreateProcess. + // Anyone setting Root must donate a reference (i.e. increment it). Root *fs.Dirent // ContainerID is the container that the process belongs to. @@ -659,7 +679,7 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} { return ctx.args.Credentials case fs.CtxRoot: if ctx.args.Root != nil { - // Take a refernce on the root dirent that will be + // Take a reference on the root dirent that will be // given to the caller. ctx.args.Root.IncRef() return ctx.args.Root @@ -715,20 +735,29 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, return nil, 0, fmt.Errorf("no kernel MountNamespace") } - tg := k.newThreadGroup(k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock) + // Grab the mount namespace. + mounts := args.MountNamespace + if mounts == nil { + // If no MountNamespace was configured, then use the kernel's + // root mount namespace, with an extra reference that will be + // donated to the task. + mounts = k.mounts + mounts.IncRef() + } + + tg := k.newThreadGroup(mounts, k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock) ctx := args.NewContext(k) // Grab the root directory. root := args.Root if root == nil { - root = fs.RootFromContext(ctx) - // Is the root STILL nil? - if root == nil { - return nil, 0, fmt.Errorf("CreateProcessArgs.Root was not provided, and failed to get root from context") - } + // If no Root was configured, then get it from the + // MountNamespace. + root = mounts.Root() } + // The call to newFSContext below will take a reference on root, so we + // don't need to hold this one. defer root.DecRef() - args.Root = nil // Grab the working directory. remainingTraversals := uint(args.MaxSymlinkTraversals) @@ -760,9 +789,9 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, return nil, 0, errors.New(se.String()) } - // Take a reference on the FDMap, which will be transferred to + // Take a reference on the FDTable, which will be transferred to // TaskSet.NewTask(). - args.FDMap.IncRef() + args.FDTable.IncRef() // Create the task. config := &TaskConfig{ @@ -770,7 +799,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, ThreadGroup: tg, TaskContext: tc, FSContext: newFSContext(root, wd, args.Umask), - FDMap: args.FDMap, + FDTable: args.FDTable, Credentials: args.Credentials, AllowedCPUMask: sched.NewFullCPUSet(k.applicationCores), UTSNamespace: args.UTSNamespace, @@ -842,7 +871,7 @@ func (k *Kernel) pauseTimeLocked() { } // By precondition, nothing else can be interacting with PIDNamespace.tids - // or FDMap.files, so we can iterate them without synchronization. (We + // or FDTable.files, so we can iterate them without synchronization. (We // can't hold the TaskSet mutex when pausing thread group timers because // thread group timers call ThreadGroup.SendSignal, which takes the TaskSet // mutex, while holding the Timer mutex.) @@ -853,14 +882,14 @@ func (k *Kernel) pauseTimeLocked() { it.PauseTimer() } } - // This means we'll iterate FDMaps shared by multiple tasks repeatedly, + // This means we'll iterate FDTables shared by multiple tasks repeatedly, // but ktime.Timer.Pause is idempotent so this is harmless. - if fdm := t.fds; fdm != nil { - for _, desc := range fdm.files { - if tfd, ok := desc.file.FileOperations.(*timerfd.TimerOperations); ok { + if t.fdTable != nil { + t.fdTable.forEach(func(_ int32, file *fs.File, _ FDFlags) { + if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok { tfd.PauseTimer() } - } + }) } } k.timekeeper.PauseUpdates() @@ -885,12 +914,12 @@ func (k *Kernel) resumeTimeLocked() { it.ResumeTimer() } } - if fdm := t.fds; fdm != nil { - for _, desc := range fdm.files { - if tfd, ok := desc.file.FileOperations.(*timerfd.TimerOperations); ok { + if t.fdTable != nil { + t.fdTable.forEach(func(_ int32, file *fs.File, _ FDFlags) { + if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok { tfd.ResumeTimer() } - } + }) } } } diff --git a/pkg/sentry/kernel/kernel_state.go b/pkg/sentry/kernel/kernel_state.go index 48c3ff5a9..909219086 100644 --- a/pkg/sentry/kernel/kernel_state.go +++ b/pkg/sentry/kernel/kernel_state.go @@ -15,8 +15,8 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/device" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/tcpip" ) // saveDanglingEndpoints is invoked by stateify. diff --git a/pkg/sentry/kernel/memevent/BUILD b/pkg/sentry/kernel/memevent/BUILD index 347a69062..ebcfaa619 100644 --- a/pkg/sentry/kernel/memevent/BUILD +++ b/pkg/sentry/kernel/memevent/BUILD @@ -6,7 +6,7 @@ package(licenses = ["notice"]) go_library( name = "memevent", srcs = ["memory_events.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/memevent", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/memevent", visibility = ["//:sandbox"], deps = [ ":memory_events_go_proto", @@ -26,7 +26,7 @@ proto_library( go_proto_library( name = "memory_events_go_proto", - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto", proto = ":memory_events_proto", visibility = ["//visibility:public"], ) diff --git a/pkg/sentry/kernel/memevent/memory_events.go b/pkg/sentry/kernel/memevent/memory_events.go index 0e2cee807..b0d98e7f0 100644 --- a/pkg/sentry/kernel/memevent/memory_events.go +++ b/pkg/sentry/kernel/memevent/memory_events.go @@ -20,12 +20,12 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/eventchannel" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/metric" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - pb "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/eventchannel" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/metric" + "gvisor.dev/gvisor/pkg/sentry/kernel" + pb "gvisor.dev/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto" + "gvisor.dev/gvisor/pkg/sentry/usage" ) var totalTicks = metric.MustCreateNewUint64Metric("/memory_events/ticks", false /*sync*/, "Total number of memory event periods that have elapsed since startup.") diff --git a/pkg/sentry/kernel/pending_signals.go b/pkg/sentry/kernel/pending_signals.go index c93f6598a..77a35b788 100644 --- a/pkg/sentry/kernel/pending_signals.go +++ b/pkg/sentry/kernel/pending_signals.go @@ -15,9 +15,9 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/bits" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bits" + "gvisor.dev/gvisor/pkg/sentry/arch" ) const ( diff --git a/pkg/sentry/kernel/pending_signals_state.go b/pkg/sentry/kernel/pending_signals_state.go index 2c902c7e3..ca8b4e164 100644 --- a/pkg/sentry/kernel/pending_signals_state.go +++ b/pkg/sentry/kernel/pending_signals_state.go @@ -15,7 +15,7 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/arch" ) // +stateify savable diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD index b07d15a2a..4d15cca85 100644 --- a/pkg/sentry/kernel/pipe/BUILD +++ b/pkg/sentry/kernel/pipe/BUILD @@ -27,7 +27,7 @@ go_library( "reader_writer.go", "writer.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/pipe", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/pipe", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/pipe/buffer.go b/pkg/sentry/kernel/pipe/buffer.go index 4360dc44f..69ef2a720 100644 --- a/pkg/sentry/kernel/pipe/buffer.go +++ b/pkg/sentry/kernel/pipe/buffer.go @@ -17,7 +17,7 @@ package pipe import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/safemem" ) // buffer encapsulates a queueable byte buffer. diff --git a/pkg/sentry/kernel/pipe/buffer_test.go b/pkg/sentry/kernel/pipe/buffer_test.go index 4b7dbc43f..ee1b90115 100644 --- a/pkg/sentry/kernel/pipe/buffer_test.go +++ b/pkg/sentry/kernel/pipe/buffer_test.go @@ -18,7 +18,7 @@ import ( "testing" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) func TestBufferSize(t *testing.T) { diff --git a/pkg/sentry/kernel/pipe/device.go b/pkg/sentry/kernel/pipe/device.go index eb59e15a1..89f5d9342 100644 --- a/pkg/sentry/kernel/pipe/device.go +++ b/pkg/sentry/kernel/pipe/device.go @@ -14,7 +14,7 @@ package pipe -import "gvisor.googlesource.com/gvisor/pkg/sentry/device" +import "gvisor.dev/gvisor/pkg/sentry/device" // pipeDevice is used for all pipe files. var pipeDevice = device.NewAnonDevice() diff --git a/pkg/sentry/kernel/pipe/node.go b/pkg/sentry/kernel/pipe/node.go index dc7da529e..a2dc72204 100644 --- a/pkg/sentry/kernel/pipe/node.go +++ b/pkg/sentry/kernel/pipe/node.go @@ -17,12 +17,12 @@ package pipe import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/amutex" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/amutex" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/syserror" ) // inodeOperations implements fs.InodeOperations for pipes. diff --git a/pkg/sentry/kernel/pipe/node_test.go b/pkg/sentry/kernel/pipe/node_test.go index 9a946b380..adbad7764 100644 --- a/pkg/sentry/kernel/pipe/node_test.go +++ b/pkg/sentry/kernel/pipe/node_test.go @@ -18,11 +18,11 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) type sleeper struct { @@ -63,7 +63,7 @@ var perms fs.FilePermissions = fs.FilePermissions{ func testOpenOrDie(ctx context.Context, t *testing.T, n fs.InodeOperations, flags fs.FileFlags, doneChan chan<- struct{}) (*fs.File, error) { inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe}) - d := fs.NewDirent(inode, "pipe") + d := fs.NewDirent(ctx, inode, "pipe") file, err := n.GetFile(ctx, d, flags) if err != nil { t.Fatalf("open with flags %+v failed: %v", flags, err) @@ -76,7 +76,7 @@ func testOpenOrDie(ctx context.Context, t *testing.T, n fs.InodeOperations, flag func testOpen(ctx context.Context, t *testing.T, n fs.InodeOperations, flags fs.FileFlags, resChan chan<- openResult) (*fs.File, error) { inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe}) - d := fs.NewDirent(inode, "pipe") + d := fs.NewDirent(ctx, inode, "pipe") file, err := n.GetFile(ctx, d, flags) if resChan != nil { resChan <- openResult{file, err} diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index 73438dc62..247e2928e 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -21,11 +21,11 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -39,19 +39,6 @@ const ( MaximumPipeSize = 8 << 20 ) -// Sizer is an interface for setting and getting the size of a pipe. -// -// It is implemented by Pipe and, through embedding, all other types. -type Sizer interface { - // PipeSize returns the pipe capacity in bytes. - PipeSize() int64 - - // SetPipeSize sets the new pipe capacity in bytes. - // - // The new size is returned (which may be capped). - SetPipeSize(int64) (int64, error) -} - // Pipe is an encapsulation of a platform-independent pipe. // It manages a buffered byte queue shared between a reader/writer // pair. @@ -150,8 +137,8 @@ func NewConnectedPipe(ctx context.Context, sizeBytes, atomicIOBytes int64) (*fs. InodeID: ino, BlockSize: int64(atomicIOBytes), } - ms := fs.NewPseudoMountSource() - d := fs.NewDirent(fs.NewInode(iops, ms, sattr), fmt.Sprintf("pipe:[%d]", ino)) + ms := fs.NewPseudoMountSource(ctx) + d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, ms, sattr), fmt.Sprintf("pipe:[%d]", ino)) // The p.Open calls below will each take a reference on the Dirent. We // must drop the one we already have. defer d.DecRef() @@ -162,6 +149,7 @@ func NewConnectedPipe(ctx context.Context, sizeBytes, atomicIOBytes int64) (*fs. // // Precondition: at least one of flags.Read or flags.Write must be set. func (p *Pipe) Open(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) *fs.File { + flags.NonSeekable = true switch { case flags.Read && flags.Write: p.rOpen() @@ -398,15 +386,15 @@ func (p *Pipe) queued() int64 { return p.size } -// PipeSize implements PipeSizer.PipeSize. -func (p *Pipe) PipeSize() int64 { +// FifoSize implements fs.FifoSizer.FifoSize. +func (p *Pipe) FifoSize(context.Context, *fs.File) (int64, error) { p.mu.Lock() defer p.mu.Unlock() - return p.max + return p.max, nil } -// SetPipeSize implements PipeSize.SetPipeSize. -func (p *Pipe) SetPipeSize(size int64) (int64, error) { +// SetFifoSize implements fs.FifoSizer.SetFifoSize. +func (p *Pipe) SetFifoSize(size int64) (int64, error) { if size < 0 { return 0, syserror.EINVAL } diff --git a/pkg/sentry/kernel/pipe/pipe_test.go b/pkg/sentry/kernel/pipe/pipe_test.go index 298c6587b..e3a14b665 100644 --- a/pkg/sentry/kernel/pipe/pipe_test.go +++ b/pkg/sentry/kernel/pipe/pipe_test.go @@ -18,10 +18,10 @@ import ( "bytes" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) func TestPipeRW(t *testing.T) { diff --git a/pkg/sentry/kernel/pipe/reader.go b/pkg/sentry/kernel/pipe/reader.go index 656be824d..7724b4452 100644 --- a/pkg/sentry/kernel/pipe/reader.go +++ b/pkg/sentry/kernel/pipe/reader.go @@ -15,7 +15,7 @@ package pipe import ( - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/waiter" ) // Reader satisfies the fs.FileOperations interface for read-only pipes. diff --git a/pkg/sentry/kernel/pipe/reader_writer.go b/pkg/sentry/kernel/pipe/reader_writer.go index e560b9be9..f69dbf27b 100644 --- a/pkg/sentry/kernel/pipe/reader_writer.go +++ b/pkg/sentry/kernel/pipe/reader_writer.go @@ -18,13 +18,13 @@ import ( "math" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/waiter" ) // ReaderWriter satisfies the FileOperations interface and services both @@ -77,7 +77,7 @@ func (rw *ReaderWriter) Readiness(mask waiter.EventMask) waiter.EventMask { } // Ioctl implements fs.FileOperations.Ioctl. -func (rw *ReaderWriter) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (rw *ReaderWriter) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { // Switch on ioctl request. switch int(args[1].Int()) { case linux.FIONREAD: diff --git a/pkg/sentry/kernel/pipe/writer.go b/pkg/sentry/kernel/pipe/writer.go index 8d5b68541..5bc6aa931 100644 --- a/pkg/sentry/kernel/pipe/writer.go +++ b/pkg/sentry/kernel/pipe/writer.go @@ -15,7 +15,7 @@ package pipe import ( - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/waiter" ) // Writer satisfies the fs.FileOperations interface for write-only pipes. diff --git a/pkg/sentry/kernel/posixtimer.go b/pkg/sentry/kernel/posixtimer.go index a016b4087..c5d095af7 100644 --- a/pkg/sentry/kernel/posixtimer.go +++ b/pkg/sentry/kernel/posixtimer.go @@ -17,10 +17,10 @@ package kernel import ( "math" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/syserror" ) // IntervalTimer represents a POSIX interval timer as described by diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index 193447b17..3be171cdc 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -17,11 +17,11 @@ package kernel import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // ptraceOptions are the subset of options controlling a task's ptrace behavior diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go index 048eeaa3f..5514cf432 100644 --- a/pkg/sentry/kernel/ptrace_amd64.go +++ b/pkg/sentry/kernel/ptrace_amd64.go @@ -17,9 +17,9 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // ptraceArch implements arch-specific ptrace commands. diff --git a/pkg/sentry/kernel/ptrace_arm64.go b/pkg/sentry/kernel/ptrace_arm64.go index 4899c813f..0acdf769d 100644 --- a/pkg/sentry/kernel/ptrace_arm64.go +++ b/pkg/sentry/kernel/ptrace_arm64.go @@ -17,9 +17,9 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // ptraceArch implements arch-specific ptrace commands. diff --git a/pkg/sentry/kernel/rseq.go b/pkg/sentry/kernel/rseq.go index c4fb2c56c..24ea002ba 100644 --- a/pkg/sentry/kernel/rseq.go +++ b/pkg/sentry/kernel/rseq.go @@ -15,9 +15,9 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/hostcpu" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // Restartable sequences, as described in https://lwn.net/Articles/650333/. diff --git a/pkg/sentry/kernel/sched/BUILD b/pkg/sentry/kernel/sched/BUILD index 184e8a35b..1725b8562 100644 --- a/pkg/sentry/kernel/sched/BUILD +++ b/pkg/sentry/kernel/sched/BUILD @@ -8,7 +8,7 @@ go_library( "cpuset.go", "sched.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/sched", visibility = ["//pkg/sentry:internal"], ) diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go index cc75eb08a..2347dcf36 100644 --- a/pkg/sentry/kernel/seccomp.go +++ b/pkg/sentry/kernel/seccomp.go @@ -17,12 +17,12 @@ package kernel import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/bpf" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) const maxSyscallFilterInstructions = 1 << 15 diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD index 840943ca8..36edf10f3 100644 --- a/pkg/sentry/kernel/semaphore/BUILD +++ b/pkg/sentry/kernel/semaphore/BUILD @@ -21,7 +21,7 @@ go_library( "semaphore.go", "waiter_list.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/semaphore", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/semaphore", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go index 9d0620e02..93fe68a3e 100644 --- a/pkg/sentry/kernel/semaphore/semaphore.go +++ b/pkg/sentry/kernel/semaphore/semaphore.go @@ -19,13 +19,13 @@ import ( "fmt" "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/syserror" ) const ( @@ -86,7 +86,7 @@ type Set struct { dead bool } -// sem represents a single semanphore from a set. +// sem represents a single semaphore from a set. // // +stateify savable type sem struct { diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go index abfcd0fb4..c235f6ca4 100644 --- a/pkg/sentry/kernel/semaphore/semaphore_test.go +++ b/pkg/sentry/kernel/semaphore/semaphore_test.go @@ -17,11 +17,11 @@ package semaphore import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" ) func executeOps(ctx context.Context, t *testing.T, set *Set, ops []linux.Sembuf, block bool) chan struct{} { diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go index 610e199da..81fcd8258 100644 --- a/pkg/sentry/kernel/sessions.go +++ b/pkg/sentry/kernel/sessions.go @@ -15,10 +15,10 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/syserror" ) // SessionID is the public identifier. @@ -294,6 +294,7 @@ func (tg *ThreadGroup) createSession() error { id: SessionID(id), leader: tg, } + s.refs.EnableLeakCheck("kernel.Session") // Create a new ProcessGroup, belonging to that Session. // This also has a single reference (assigned below). @@ -307,6 +308,7 @@ func (tg *ThreadGroup) createSession() error { session: s, ancestors: 0, } + pg.refs.EnableLeakCheck("kernel.ProcessGroup") // Tie them and return the result. s.processGroups.PushBack(pg) @@ -378,11 +380,13 @@ func (tg *ThreadGroup) CreateProcessGroup() error { // We manually adjust the ancestors if the parent is in the same // session. tg.processGroup.session.incRef() - pg := &ProcessGroup{ + pg := ProcessGroup{ id: ProcessGroupID(id), originator: tg, session: tg.processGroup.session, } + pg.refs.EnableLeakCheck("kernel.ProcessGroup") + if tg.leader.parent != nil && tg.leader.parent.tg.processGroup.session == pg.session { pg.ancestors++ } @@ -390,20 +394,20 @@ func (tg *ThreadGroup) CreateProcessGroup() error { // Assign the new process group; adjust children. oldParentPG := tg.parentPG() tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) { - childTG.processGroup.incRefWithParent(pg) + childTG.processGroup.incRefWithParent(&pg) childTG.processGroup.decRefWithParent(oldParentPG) }) tg.processGroup.decRefWithParent(oldParentPG) - tg.processGroup = pg + tg.processGroup = &pg // Add the new process group to the session. - pg.session.processGroups.PushBack(pg) + pg.session.processGroups.PushBack(&pg) // Ensure this translation is added to all namespaces. for ns := tg.pidns; ns != nil; ns = ns.parent { local := ns.tgids[tg] - ns.pgids[pg] = ProcessGroupID(local) - ns.processGroups[ProcessGroupID(local)] = pg + ns.pgids[&pg] = ProcessGroupID(local) + ns.processGroups[ProcessGroupID(local)] = &pg } return nil diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD index bc2089872..aa7471eb6 100644 --- a/pkg/sentry/kernel/shm/BUILD +++ b/pkg/sentry/kernel/shm/BUILD @@ -8,7 +8,7 @@ go_library( "device.go", "shm.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/shm", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/shm", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/shm/device.go b/pkg/sentry/kernel/shm/device.go index 3cb759072..6b0d5818b 100644 --- a/pkg/sentry/kernel/shm/device.go +++ b/pkg/sentry/kernel/shm/device.go @@ -14,7 +14,7 @@ package shm -import "gvisor.googlesource.com/gvisor/pkg/sentry/device" +import "gvisor.dev/gvisor/pkg/sentry/device" // shmDevice is the kernel shm device. var shmDevice = device.NewAnonDevice() diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index 00393b5f0..5bd610f68 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -37,19 +37,19 @@ import ( "fmt" "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // Key represents a shm segment key. Analogous to a file name. @@ -224,6 +224,7 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi creatorPID: pid, changeTime: ktime.NowFromContext(ctx), } + shm.EnableLeakCheck("kernel.Shm") // Find the next available ID. for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ { diff --git a/pkg/sentry/kernel/signal.go b/pkg/sentry/kernel/signal.go index b528ec0dc..02eede93d 100644 --- a/pkg/sentry/kernel/signal.go +++ b/pkg/sentry/kernel/signal.go @@ -17,10 +17,10 @@ package kernel import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/platform" ) // SignalPanic is used to panic the running threads. It is a signal which diff --git a/pkg/sentry/kernel/signal_handlers.go b/pkg/sentry/kernel/signal_handlers.go index ce8bcb5e5..a16f3d57f 100644 --- a/pkg/sentry/kernel/signal_handlers.go +++ b/pkg/sentry/kernel/signal_handlers.go @@ -17,8 +17,8 @@ package kernel import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" ) // SignalHandlers holds information about signal actions. diff --git a/pkg/sentry/kernel/syscalls.go b/pkg/sentry/kernel/syscalls.go index 27cd3728b..220fa73a2 100644 --- a/pkg/sentry/kernel/syscalls.go +++ b/pkg/sentry/kernel/syscalls.go @@ -19,10 +19,10 @@ import ( "sync" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/bits" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/bits" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // maxSyscallNum is the highest supported syscall number. diff --git a/pkg/sentry/kernel/syslog.go b/pkg/sentry/kernel/syslog.go index 175d1b247..8227ecf1d 100644 --- a/pkg/sentry/kernel/syslog.go +++ b/pkg/sentry/kernel/syslog.go @@ -67,6 +67,7 @@ func (s *syslog) Log() []byte { "Creating process schedule...", "Generating random numbers by fair dice roll...", "Rewriting operating system in Javascript...", + "Reticulating splines...", "Consulting tar man page...", "Forking spaghetti code...", "Checking naughty and nice process list...", diff --git a/pkg/sentry/kernel/table_test.go b/pkg/sentry/kernel/table_test.go index 3f2b042c8..32cf47e05 100644 --- a/pkg/sentry/kernel/table_test.go +++ b/pkg/sentry/kernel/table_test.go @@ -17,8 +17,8 @@ package kernel import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/sentry/arch" ) const ( diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 4d889422f..e91f82bb3 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -18,24 +18,24 @@ import ( "sync" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/bpf" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl" - "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/third_party/gvsync" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/futex" + "gvisor.dev/gvisor/pkg/sentry/kernel/sched" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/unimpl" + "gvisor.dev/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/third_party/gvsync" ) // Task represents a thread of execution in the untrusted app. It @@ -236,15 +236,15 @@ type Task struct { // tc is protected by mu, and is owned by the task goroutine. tc TaskContext - // fsc is the task's filesystem context. + // fsContext is the task's filesystem context. // - // fsc is protected by mu, and is owned by the task goroutine. - fsc *FSContext + // fsContext is protected by mu, and is owned by the task goroutine. + fsContext *FSContext - // fds is the task's file descriptor table. + // fdTable is the task's file descriptor table. // - // fds is protected by mu, and is owned by the task goroutine. - fds *FDMap + // fdTable is protected by mu, and is owned by the task goroutine. + fdTable *FDTable // If vforkParent is not nil, it is the task that created this task with // vfork() or clone(CLONE_VFORK), and should have its vforkStop ended when @@ -386,10 +386,11 @@ type Task struct { // creds is the task's credentials. // - // creds is protected by mu, however the value itself is immutable and can - // only be changed by a copy. After reading the pointer, access will - // proceed outside the scope of mu. creds is owned by the task goroutine. - creds *auth.Credentials + // creds.Load() may be called without synchronization. creds.Store() is + // serialized by mu. creds is owned by the task goroutine. All + // auth.Credentials objects that creds may point to, or have pointed to + // in the past, must be treated as immutable. + creds auth.AtomicPtrCredentials // utsns is the task's UTS namespace. // @@ -597,11 +598,11 @@ func (t *Task) Value(key interface{}) interface{} { case CtxTask: return t case auth.CtxCredentials: - return t.creds + return t.Credentials() case context.CtxThreadGroupID: return int32(t.ThreadGroup().ID()) case fs.CtxRoot: - return t.fsc.RootDirectory() + return t.fsContext.RootDirectory() case fs.CtxDirentCacheLimiter: return t.k.DirentCacheLimiter case inet.CtxStack: @@ -665,9 +666,9 @@ func (t *Task) SyscallRestartBlock() SyscallRestartBlock { // Preconditions: The caller must be running on the task goroutine, or t.mu // must be locked. func (t *Task) IsChrooted() bool { - realRoot := t.k.mounts.Root() + realRoot := t.tg.mounts.Root() defer realRoot.DecRef() - root := t.fsc.RootDirectory() + root := t.fsContext.RootDirectory() if root != nil { defer root.DecRef() } @@ -688,29 +689,68 @@ func (t *Task) TaskContext() *TaskContext { // Precondition: The caller must be running on the task goroutine, or t.mu must // be locked. func (t *Task) FSContext() *FSContext { - return t.fsc + return t.fsContext } -// FDMap returns t's FDMap. FDMap does not take an additional reference on the -// returned FDMap. +// FDTable returns t's FDTable. FDMTable does not take an additional reference +// on the returned FDMap. // // Precondition: The caller must be running on the task goroutine, or t.mu must // be locked. -func (t *Task) FDMap() *FDMap { - return t.fds +func (t *Task) FDTable() *FDTable { + return t.fdTable +} + +// GetFile is a convenience wrapper t.FDTable().GetFile. +// +// Precondition: same as FDTable. +func (t *Task) GetFile(fd int32) *fs.File { + f, _ := t.fdTable.Get(fd) + return f +} + +// NewFDs is a convenience wrapper for t.FDTable().NewFDs. +// +// This automatically passes the task as the context. +// +// Precondition: same as FDTable. +func (t *Task) NewFDs(fd int32, files []*fs.File, flags FDFlags) ([]int32, error) { + return t.fdTable.NewFDs(t, fd, files, flags) +} + +// NewFDFrom is a convenience wrapper for t.FDTable().NewFDs with a single file. +// +// This automatically passes the task as the context. +// +// Precondition: same as FDTable. +func (t *Task) NewFDFrom(fd int32, file *fs.File, flags FDFlags) (int32, error) { + fds, err := t.fdTable.NewFDs(t, fd, []*fs.File{file}, flags) + if err != nil { + return 0, err + } + return fds[0], nil +} + +// NewFDAt is a convenience wrapper for t.FDTable().NewFDAt. +// +// This automatically passes the task as the context. +// +// Precondition: same as FDTable. +func (t *Task) NewFDAt(fd int32, file *fs.File, flags FDFlags) error { + return t.fdTable.NewFDAt(t, fd, file, flags) } // WithMuLocked executes f with t.mu locked. func (t *Task) WithMuLocked(f func(*Task)) { t.mu.Lock() - defer t.mu.Unlock() f(t) + t.mu.Unlock() } // MountNamespace returns t's MountNamespace. MountNamespace does not take an // additional reference on the returned MountNamespace. func (t *Task) MountNamespace() *fs.MountNamespace { - return t.k.mounts + return t.tg.mounts } // AbstractSockets returns t's AbstractSocketNamespace. diff --git a/pkg/sentry/kernel/task_acct.go b/pkg/sentry/kernel/task_acct.go index 1ca2a82eb..5f3e60fe8 100644 --- a/pkg/sentry/kernel/task_acct.go +++ b/pkg/sentry/kernel/task_acct.go @@ -17,11 +17,11 @@ package kernel // Accounting, limits, timers. import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/syserror" ) // Getitimer implements getitimer(2). diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go index 1c76c4d84..2a2e6f662 100644 --- a/pkg/sentry/kernel/task_block.go +++ b/pkg/sentry/kernel/task_block.go @@ -17,8 +17,8 @@ package kernel import ( "time" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/syserror" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/syserror" ) // BlockWithTimeout blocks t until an event is received from C, the application diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index bba8ddd39..0916fd658 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -15,10 +15,10 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/bpf" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // SharingOptions controls what resources are shared by a new task created by @@ -214,20 +214,20 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { } } - var fsc *FSContext + var fsContext *FSContext if opts.NewFSContext { - fsc = t.fsc.Fork() + fsContext = t.fsContext.Fork() } else { - fsc = t.fsc - fsc.IncRef() + fsContext = t.fsContext + fsContext.IncRef() } - var fds *FDMap + var fdTable *FDTable if opts.NewFiles { - fds = t.fds.Fork() + fdTable = t.fdTable.Fork() } else { - fds = t.fds - fds.IncRef() + fdTable = t.fdTable + fdTable.IncRef() } pidns := t.tg.pidns @@ -238,11 +238,12 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { } tg := t.tg if opts.NewThreadGroup { + tg.mounts.IncRef() sh := t.tg.signalHandlers if opts.NewSignalHandlers { sh = sh.Fork() } - tg = t.k.newThreadGroup(pidns, sh, opts.TerminationSignal, tg.limits.GetCopy(), t.k.monotonicClock) + tg = t.k.newThreadGroup(tg.mounts, pidns, sh, opts.TerminationSignal, tg.limits.GetCopy(), t.k.monotonicClock) } cfg := &TaskConfig{ @@ -250,8 +251,8 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { ThreadGroup: tg, SignalMask: t.SignalMask(), TaskContext: tc, - FSContext: fsc, - FDMap: fds, + FSContext: fsContext, + FDTable: fdTable, Credentials: creds, Niceness: t.Niceness(), NetworkNamespaced: t.netns, @@ -424,6 +425,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { if opts.NewAddressSpace || opts.NewSignalHandlers { return syserror.EINVAL } + creds := t.Credentials() if opts.NewThreadGroup { t.tg.signalHandlers.mu.Lock() if t.tg.tasksCount != 1 { @@ -438,8 +440,6 @@ func (t *Task) Unshare(opts *SharingOptions) error { if t.IsChrooted() { return syserror.EPERM } - // This temporary is needed because Go. - creds := t.Credentials() newUserNS, err := creds.NewChildUserNamespace() if err != nil { return err @@ -448,6 +448,8 @@ func (t *Task) Unshare(opts *SharingOptions) error { if err != nil { return err } + // Need to reload creds, becaue t.SetUserNamespace() changed task credentials. + creds = t.Credentials() } haveCapSysAdmin := t.HasCapability(linux.CAP_SYS_ADMIN) if opts.NewPIDNamespace { @@ -472,7 +474,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { } // Note that this must happen after NewUserNamespace, so the // new user namespace is used if there is one. - t.utsns = t.utsns.Clone(t.creds.UserNamespace) + t.utsns = t.utsns.Clone(creds.UserNamespace) } if opts.NewIPCNamespace { if !haveCapSysAdmin { @@ -481,24 +483,24 @@ func (t *Task) Unshare(opts *SharingOptions) error { } // Note that "If CLONE_NEWIPC is set, then create the process in a new IPC // namespace" - t.ipcns = NewIPCNamespace(t.creds.UserNamespace) + t.ipcns = NewIPCNamespace(creds.UserNamespace) } - var oldfds *FDMap + var oldFDTable *FDTable if opts.NewFiles { - oldfds = t.fds - t.fds = oldfds.Fork() + oldFDTable = t.fdTable + t.fdTable = oldFDTable.Fork() } - var oldfsc *FSContext + var oldFSContext *FSContext if opts.NewFSContext { - oldfsc = t.fsc - t.fsc = oldfsc.Fork() + oldFSContext = t.fsContext + t.fsContext = oldFSContext.Fork() } t.mu.Unlock() - if oldfds != nil { - oldfds.DecRef() + if oldFDTable != nil { + oldFDTable.DecRef() } - if oldfsc != nil { - oldfsc.DecRef() + if oldFSContext != nil { + oldFSContext.DecRef() } return nil } diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go index bbd294141..54b1676b0 100644 --- a/pkg/sentry/kernel/task_context.go +++ b/pkg/sentry/kernel/task_context.go @@ -17,16 +17,16 @@ package kernel import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex" - "gvisor.googlesource.com/gvisor/pkg/sentry/loader" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/futex" + "gvisor.dev/gvisor/pkg/sentry/loader" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserr" ) var errNoSyscalls = syserr.New("no syscall table found", linux.ENOEXEC) diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go index 35d5cb90c..17a089b90 100644 --- a/pkg/sentry/kernel/task_exec.go +++ b/pkg/sentry/kernel/task_exec.go @@ -65,11 +65,11 @@ package kernel // """ import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/syserror" ) // execStop is a TaskStop that a task sets on itself when it wants to execve @@ -195,7 +195,7 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState { t.tg.pidns.owner.mu.Unlock() // Remove FDs with the CloseOnExec flag set. - t.fds.RemoveIf(func(file *fs.File, flags FDFlags) bool { + t.fdTable.RemoveIf(func(file *fs.File, flags FDFlags) bool { return flags.CloseOnExec }) diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index 158e665d3..535f03e50 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -29,11 +29,11 @@ import ( "fmt" "strconv" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // An ExitStatus is a value communicated from an exiting task or thread group @@ -265,8 +265,8 @@ func (*runExitMain) execute(t *Task) taskRunState { // Releasing the MM unblocks a blocked CLONE_VFORK parent. t.unstopVforkParent() - t.fsc.DecRef() - t.fds.DecRef() + t.fsContext.DecRef() + t.fdTable.DecRef() // If this is the last task to exit from the thread group, release the // thread group's resources. diff --git a/pkg/sentry/kernel/task_futex.go b/pkg/sentry/kernel/task_futex.go index f98097c2c..c211b5b74 100644 --- a/pkg/sentry/kernel/task_futex.go +++ b/pkg/sentry/kernel/task_futex.go @@ -15,8 +15,8 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/kernel/futex" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // Futex returns t's futex manager. @@ -34,14 +34,14 @@ func (t *Task) SwapUint32(addr usermem.Addr, new uint32) (uint32, error) { }) } -// CompareAndSwapUint32 implemets futex.Target.CompareAndSwapUint32. +// CompareAndSwapUint32 implements futex.Target.CompareAndSwapUint32. func (t *Task) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error) { return t.MemoryManager().CompareAndSwapUint32(t, addr, old, new, usermem.IOOpts{ AddressSpaceActive: true, }) } -// LoadUint32 implemets futex.Target.LoadUint32. +// LoadUint32 implements futex.Target.LoadUint32. func (t *Task) LoadUint32(addr usermem.Addr) (uint32, error) { return t.MemoryManager().LoadUint32(t, addr, usermem.IOOpts{ AddressSpaceActive: true, diff --git a/pkg/sentry/kernel/task_identity.go b/pkg/sentry/kernel/task_identity.go index ec95f78d0..78ff14b20 100644 --- a/pkg/sentry/kernel/task_identity.go +++ b/pkg/sentry/kernel/task_identity.go @@ -15,40 +15,32 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/syserror" ) // Credentials returns t's credentials. // // This value must be considered immutable. func (t *Task) Credentials() *auth.Credentials { - t.mu.Lock() - defer t.mu.Unlock() - return t.creds + return t.creds.Load() } // UserNamespace returns the user namespace associated with the task. func (t *Task) UserNamespace() *auth.UserNamespace { - t.mu.Lock() - defer t.mu.Unlock() - return t.creds.UserNamespace + return t.Credentials().UserNamespace } // HasCapabilityIn checks if the task has capability cp in user namespace ns. func (t *Task) HasCapabilityIn(cp linux.Capability, ns *auth.UserNamespace) bool { - t.mu.Lock() - defer t.mu.Unlock() - return t.creds.HasCapabilityIn(cp, ns) + return t.Credentials().HasCapabilityIn(cp, ns) } // HasCapability checks if the task has capability cp in its user namespace. func (t *Task) HasCapability(cp linux.Capability) bool { - t.mu.Lock() - defer t.mu.Unlock() - return t.creds.HasCapability(cp) + return t.Credentials().HasCapability(cp) } // SetUID implements the semantics of setuid(2). @@ -57,9 +49,12 @@ func (t *Task) SetUID(uid auth.UID) error { if !uid.Ok() { return syserror.EINVAL } + t.mu.Lock() defer t.mu.Unlock() - kuid := t.creds.UserNamespace.MapToKUID(uid) + + creds := t.Credentials() + kuid := creds.UserNamespace.MapToKUID(uid) if !kuid.Ok() { return syserror.EINVAL } @@ -67,17 +62,17 @@ func (t *Task) SetUID(uid auth.UID) error { // effective UID of the caller is root (more precisely: if the caller has // the CAP_SETUID capability), the real UID and saved set-user-ID are also // set." - setuid(2) - if t.creds.HasCapability(linux.CAP_SETUID) { + if creds.HasCapability(linux.CAP_SETUID) { t.setKUIDsUncheckedLocked(kuid, kuid, kuid) return nil } // "EPERM: The user is not privileged (Linux: does not have the CAP_SETUID // capability) and uid does not match the real UID or saved set-user-ID of // the calling process." - if kuid != t.creds.RealKUID && kuid != t.creds.SavedKUID { + if kuid != creds.RealKUID && kuid != creds.SavedKUID { return syserror.EPERM } - t.setKUIDsUncheckedLocked(t.creds.RealKUID, kuid, t.creds.SavedKUID) + t.setKUIDsUncheckedLocked(creds.RealKUID, kuid, creds.SavedKUID) return nil } @@ -87,37 +82,38 @@ func (t *Task) SetREUID(r, e auth.UID) error { defer t.mu.Unlock() // "Supplying a value of -1 for either the real or effective user ID forces // the system to leave that ID unchanged." - setreuid(2) - newR := t.creds.RealKUID + creds := t.Credentials() + newR := creds.RealKUID if r.Ok() { - newR = t.creds.UserNamespace.MapToKUID(r) + newR = creds.UserNamespace.MapToKUID(r) if !newR.Ok() { return syserror.EINVAL } } - newE := t.creds.EffectiveKUID + newE := creds.EffectiveKUID if e.Ok() { - newE = t.creds.UserNamespace.MapToKUID(e) + newE = creds.UserNamespace.MapToKUID(e) if !newE.Ok() { return syserror.EINVAL } } - if !t.creds.HasCapability(linux.CAP_SETUID) { + if !creds.HasCapability(linux.CAP_SETUID) { // "Unprivileged processes may only set the effective user ID to the // real user ID, the effective user ID, or the saved set-user-ID." - if newE != t.creds.RealKUID && newE != t.creds.EffectiveKUID && newE != t.creds.SavedKUID { + if newE != creds.RealKUID && newE != creds.EffectiveKUID && newE != creds.SavedKUID { return syserror.EPERM } // "Unprivileged users may only set the real user ID to the real user // ID or the effective user ID." - if newR != t.creds.RealKUID && newR != t.creds.EffectiveKUID { + if newR != creds.RealKUID && newR != creds.EffectiveKUID { return syserror.EPERM } } // "If the real user ID is set (i.e., ruid is not -1) or the effective user // ID is set to a value not equal to the previous real user ID, the saved // set-user-ID will be set to the new effective user ID." - newS := t.creds.SavedKUID - if r.Ok() || (e.Ok() && newE != t.creds.EffectiveKUID) { + newS := creds.SavedKUID + if r.Ok() || (e.Ok() && newE != creds.EffectiveKUID) { newS = newE } t.setKUIDsUncheckedLocked(newR, newE, newS) @@ -136,23 +132,24 @@ func (t *Task) SetRESUID(r, e, s auth.UID) error { // arguments equals -1, the corresponding value is not changed." - // setresuid(2) var err error - newR := t.creds.RealKUID + creds := t.Credentials() + newR := creds.RealKUID if r.Ok() { - newR, err = t.creds.UseUID(r) + newR, err = creds.UseUID(r) if err != nil { return err } } - newE := t.creds.EffectiveKUID + newE := creds.EffectiveKUID if e.Ok() { - newE, err = t.creds.UseUID(e) + newE, err = creds.UseUID(e) if err != nil { return err } } - newS := t.creds.SavedKUID + newS := creds.SavedKUID if s.Ok() { - newS, err = t.creds.UseUID(s) + newS, err = creds.UseUID(s) if err != nil { return err } @@ -163,10 +160,10 @@ func (t *Task) SetRESUID(r, e, s auth.UID) error { // Preconditions: t.mu must be locked. func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) { - root := t.creds.UserNamespace.MapToKUID(auth.RootUID) - oldR, oldE, oldS := t.creds.RealKUID, t.creds.EffectiveKUID, t.creds.SavedKUID - t.creds = t.creds.Fork() // See doc for creds. - t.creds.RealKUID, t.creds.EffectiveKUID, t.creds.SavedKUID = newR, newE, newS + creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds. + root := creds.UserNamespace.MapToKUID(auth.RootUID) + oldR, oldE, oldS := creds.RealKUID, creds.EffectiveKUID, creds.SavedKUID + creds.RealKUID, creds.EffectiveKUID, creds.SavedKUID = newR, newE, newS // "1. If one or more of the real, effective or saved set user IDs was // previously 0, and as a result of the UID changes all of these IDs have a @@ -184,9 +181,9 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) { // being cleared." (A thread's effective capability set is always // cleared when such a credential change is made, // regardless of the setting of the "keep capabilities" flag.) - if !t.creds.KeepCaps { - t.creds.PermittedCaps = 0 - t.creds.EffectiveCaps = 0 + if !creds.KeepCaps { + creds.PermittedCaps = 0 + creds.EffectiveCaps = 0 } } // """ @@ -197,9 +194,9 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) { // permitted set is copied to the effective set. // """ if oldE == root && newE != root { - t.creds.EffectiveCaps = 0 + creds.EffectiveCaps = 0 } else if oldE != root && newE == root { - t.creds.EffectiveCaps = t.creds.PermittedCaps + creds.EffectiveCaps = creds.PermittedCaps } // "4. If the filesystem user ID is changed from 0 to nonzero (see // setfsuid(2)), then the following capabilities are cleared from the @@ -220,6 +217,7 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) { // Not documented, but compare Linux's kernel/cred.c:commit_creds(). t.parentDeathSignal = 0 } + t.creds.Store(creds) } // SetGID implements the semantics of setgid(2). @@ -227,20 +225,23 @@ func (t *Task) SetGID(gid auth.GID) error { if !gid.Ok() { return syserror.EINVAL } + t.mu.Lock() defer t.mu.Unlock() - kgid := t.creds.UserNamespace.MapToKGID(gid) + + creds := t.Credentials() + kgid := creds.UserNamespace.MapToKGID(gid) if !kgid.Ok() { return syserror.EINVAL } - if t.creds.HasCapability(linux.CAP_SETGID) { + if creds.HasCapability(linux.CAP_SETGID) { t.setKGIDsUncheckedLocked(kgid, kgid, kgid) return nil } - if kgid != t.creds.RealKGID && kgid != t.creds.SavedKGID { + if kgid != creds.RealKGID && kgid != creds.SavedKGID { return syserror.EPERM } - t.setKGIDsUncheckedLocked(t.creds.RealKGID, kgid, t.creds.SavedKGID) + t.setKGIDsUncheckedLocked(creds.RealKGID, kgid, creds.SavedKGID) return nil } @@ -248,30 +249,32 @@ func (t *Task) SetGID(gid auth.GID) error { func (t *Task) SetREGID(r, e auth.GID) error { t.mu.Lock() defer t.mu.Unlock() - newR := t.creds.RealKGID + + creds := t.Credentials() + newR := creds.RealKGID if r.Ok() { - newR = t.creds.UserNamespace.MapToKGID(r) + newR = creds.UserNamespace.MapToKGID(r) if !newR.Ok() { return syserror.EINVAL } } - newE := t.creds.EffectiveKGID + newE := creds.EffectiveKGID if e.Ok() { - newE = t.creds.UserNamespace.MapToKGID(e) + newE = creds.UserNamespace.MapToKGID(e) if !newE.Ok() { return syserror.EINVAL } } - if !t.creds.HasCapability(linux.CAP_SETGID) { - if newE != t.creds.RealKGID && newE != t.creds.EffectiveKGID && newE != t.creds.SavedKGID { + if !creds.HasCapability(linux.CAP_SETGID) { + if newE != creds.RealKGID && newE != creds.EffectiveKGID && newE != creds.SavedKGID { return syserror.EPERM } - if newR != t.creds.RealKGID && newR != t.creds.EffectiveKGID { + if newR != creds.RealKGID && newR != creds.EffectiveKGID { return syserror.EPERM } } - newS := t.creds.SavedKGID - if r.Ok() || (e.Ok() && newE != t.creds.EffectiveKGID) { + newS := creds.SavedKGID + if r.Ok() || (e.Ok() && newE != creds.EffectiveKGID) { newS = newE } t.setKGIDsUncheckedLocked(newR, newE, newS) @@ -280,26 +283,29 @@ func (t *Task) SetREGID(r, e auth.GID) error { // SetRESGID implements the semantics of the setresgid(2) syscall. func (t *Task) SetRESGID(r, e, s auth.GID) error { + var err error + t.mu.Lock() defer t.mu.Unlock() - var err error - newR := t.creds.RealKGID + + creds := t.Credentials() + newR := creds.RealKGID if r.Ok() { - newR, err = t.creds.UseGID(r) + newR, err = creds.UseGID(r) if err != nil { return err } } - newE := t.creds.EffectiveKGID + newE := creds.EffectiveKGID if e.Ok() { - newE, err = t.creds.UseGID(e) + newE, err = creds.UseGID(e) if err != nil { return err } } - newS := t.creds.SavedKGID + newS := creds.SavedKGID if s.Ok() { - newS, err = t.creds.UseGID(s) + newS, err = creds.UseGID(s) if err != nil { return err } @@ -309,9 +315,9 @@ func (t *Task) SetRESGID(r, e, s auth.GID) error { } func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) { - oldE := t.creds.EffectiveKGID - t.creds = t.creds.Fork() // See doc for creds. - t.creds.RealKGID, t.creds.EffectiveKGID, t.creds.SavedKGID = newR, newE, newS + creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds. + oldE := creds.EffectiveKGID + creds.RealKGID, creds.EffectiveKGID, creds.SavedKGID = newR, newE, newS if oldE != newE { // "[dumpability] is reset to the current value contained in @@ -327,6 +333,7 @@ func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) { // kernel/cred.c:commit_creds(). t.parentDeathSignal = 0 } + t.creds.Store(creds) } // SetExtraGIDs attempts to change t's supplemental groups. All IDs are @@ -334,19 +341,21 @@ func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) { func (t *Task) SetExtraGIDs(gids []auth.GID) error { t.mu.Lock() defer t.mu.Unlock() - if !t.creds.HasCapability(linux.CAP_SETGID) { + creds := t.Credentials() + if !creds.HasCapability(linux.CAP_SETGID) { return syserror.EPERM } kgids := make([]auth.KGID, len(gids)) for i, gid := range gids { - kgid := t.creds.UserNamespace.MapToKGID(gid) + kgid := creds.UserNamespace.MapToKGID(gid) if !kgid.Ok() { return syserror.EINVAL } kgids[i] = kgid } - t.creds = t.creds.Fork() // See doc for creds. - t.creds.ExtraKGIDs = kgids + creds = creds.Fork() // The credentials object is immutable. See doc for creds. + creds.ExtraKGIDs = kgids + t.creds.Store(creds) return nil } @@ -360,27 +369,29 @@ func (t *Task) SetCapabilitySets(permitted, inheritable, effective auth.Capabili if effective & ^permitted != 0 { return syserror.EPERM } + creds := t.Credentials() // "It is also a limiting superset for the capabilities that may be added // to the inheritable set by a thread that does not have the CAP_SETPCAP // capability in its effective set." - if !t.creds.HasCapability(linux.CAP_SETPCAP) && (inheritable & ^(t.creds.InheritableCaps|t.creds.PermittedCaps) != 0) { + if !creds.HasCapability(linux.CAP_SETPCAP) && (inheritable & ^(creds.InheritableCaps|creds.PermittedCaps) != 0) { return syserror.EPERM } // "If a thread drops a capability from its permitted set, it can never // reacquire that capability (unless it execve(2)s ..." - if permitted & ^t.creds.PermittedCaps != 0 { + if permitted & ^creds.PermittedCaps != 0 { return syserror.EPERM } // "... if a capability is not in the bounding set, then a thread can't add // this capability to its inheritable set, even if it was in its permitted // capabilities ..." - if inheritable & ^(t.creds.InheritableCaps|t.creds.BoundingCaps) != 0 { + if inheritable & ^(creds.InheritableCaps|creds.BoundingCaps) != 0 { return syserror.EPERM } - t.creds = t.creds.Fork() // See doc for creds. - t.creds.PermittedCaps = permitted - t.creds.InheritableCaps = inheritable - t.creds.EffectiveCaps = effective + creds = creds.Fork() // The credentials object is immutable. See doc for creds. + creds.PermittedCaps = permitted + creds.InheritableCaps = inheritable + creds.EffectiveCaps = effective + t.creds.Store(creds) return nil } @@ -389,11 +400,13 @@ func (t *Task) SetCapabilitySets(permitted, inheritable, effective auth.Capabili func (t *Task) DropBoundingCapability(cp linux.Capability) error { t.mu.Lock() defer t.mu.Unlock() - if !t.creds.HasCapability(linux.CAP_SETPCAP) { + creds := t.Credentials() + if !creds.HasCapability(linux.CAP_SETPCAP) { return syserror.EPERM } - t.creds = t.creds.Fork() // See doc for creds. - t.creds.BoundingCaps &^= auth.CapabilitySetOf(cp) + creds = creds.Fork() // The credentials object is immutable. See doc for creds. + creds.BoundingCaps &^= auth.CapabilitySetOf(cp) + t.creds.Store(creds) return nil } @@ -402,31 +415,33 @@ func (t *Task) SetUserNamespace(ns *auth.UserNamespace) error { t.mu.Lock() defer t.mu.Unlock() + creds := t.Credentials() // "A process reassociating itself with a user namespace must have the // CAP_SYS_ADMIN capability in the target user namespace." - setns(2) // // If t just created ns, then t.creds is guaranteed to have CAP_SYS_ADMIN // in ns (by rule 3 in auth.Credentials.HasCapability). - if !t.creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) { + if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) { return syserror.EPERM } - t.creds = t.creds.Fork() // See doc for creds. - t.creds.UserNamespace = ns + creds = creds.Fork() // The credentials object is immutable. See doc for creds. + creds.UserNamespace = ns // "The child process created by clone(2) with the CLONE_NEWUSER flag // starts out with a complete set of capabilities in the new user // namespace. Likewise, a process that creates a new user namespace using // unshare(2) or joins an existing user namespace using setns(2) gains a // full set of capabilities in that namespace." - t.creds.PermittedCaps = auth.AllCapabilities - t.creds.InheritableCaps = 0 - t.creds.EffectiveCaps = auth.AllCapabilities - t.creds.BoundingCaps = auth.AllCapabilities + creds.PermittedCaps = auth.AllCapabilities + creds.InheritableCaps = 0 + creds.EffectiveCaps = auth.AllCapabilities + creds.BoundingCaps = auth.AllCapabilities // "A call to clone(2), unshare(2), or setns(2) using the CLONE_NEWUSER // flag sets the "securebits" flags (see capabilities(7)) to their default // values (all flags disabled) in the child (for clone(2)) or caller (for // unshare(2), or setns(2)." - user_namespaces(7) - t.creds.KeepCaps = false + creds.KeepCaps = false + t.creds.Store(creds) return nil } @@ -435,8 +450,9 @@ func (t *Task) SetUserNamespace(ns *auth.UserNamespace) error { func (t *Task) SetKeepCaps(k bool) { t.mu.Lock() defer t.mu.Unlock() - t.creds = t.creds.Fork() // See doc for creds. - t.creds.KeepCaps = k + creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds. + creds.KeepCaps = k + t.creds.Store(creds) } // updateCredsForExec updates t.creds to reflect an execve(). @@ -512,15 +528,16 @@ func (t *Task) updateCredsForExecLocked() { // the effective user ID. var newPermitted auth.CapabilitySet // since F(inheritable) == F(permitted) == 0 fileEffective := false - root := t.creds.UserNamespace.MapToKUID(auth.RootUID) - if t.creds.EffectiveKUID == root || t.creds.RealKUID == root { - newPermitted = t.creds.InheritableCaps | t.creds.BoundingCaps - if t.creds.EffectiveKUID == root { + creds := t.Credentials() + root := creds.UserNamespace.MapToKUID(auth.RootUID) + if creds.EffectiveKUID == root || creds.RealKUID == root { + newPermitted = creds.InheritableCaps | creds.BoundingCaps + if creds.EffectiveKUID == root { fileEffective = true } } - t.creds = t.creds.Fork() // See doc for creds. + creds = creds.Fork() // The credentials object is immutable. See doc for creds. // Now we enter poorly-documented, somewhat confusing territory. (The // accompanying comment in Linux's security/commoncap.c:cap_bprm_set_creds @@ -562,27 +579,28 @@ func (t *Task) updateCredsForExecLocked() { // But since no_new_privs is always set (A3 is always true), this becomes // much simpler. If B1 and B2 are false, C2 is a no-op. If B3 is false, C1 // is a no-op. So we can just do C1 and C2 unconditionally. - if t.creds.EffectiveKUID != t.creds.RealKUID || t.creds.EffectiveKGID != t.creds.RealKGID { - t.creds.EffectiveKUID = t.creds.RealKUID - t.creds.EffectiveKGID = t.creds.RealKGID + if creds.EffectiveKUID != creds.RealKUID || creds.EffectiveKGID != creds.RealKGID { + creds.EffectiveKUID = creds.RealKUID + creds.EffectiveKGID = creds.RealKGID t.parentDeathSignal = 0 } // (Saved set-user-ID is always set to the new effective user ID, and saved // set-group-ID is always set to the new effective group ID, regardless of // the above.) - t.creds.SavedKUID = t.creds.RealKUID - t.creds.SavedKGID = t.creds.RealKGID - t.creds.PermittedCaps &= newPermitted + creds.SavedKUID = creds.RealKUID + creds.SavedKGID = creds.RealKGID + creds.PermittedCaps &= newPermitted if fileEffective { - t.creds.EffectiveCaps = t.creds.PermittedCaps + creds.EffectiveCaps = creds.PermittedCaps } else { - t.creds.EffectiveCaps = 0 + creds.EffectiveCaps = 0 } // prctl(2): The "keep capabilities" value will be reset to 0 on subsequent // calls to execve(2). - t.creds.KeepCaps = false + creds.KeepCaps = false // "The bounding set is inherited at fork(2) from the thread's parent, and // is preserved across an execve(2)". So we're done. + t.creds.Store(creds) } diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go index e0e57e8bd..a29e9b9eb 100644 --- a/pkg/sentry/kernel/task_log.go +++ b/pkg/sentry/kernel/task_log.go @@ -18,8 +18,8 @@ import ( "fmt" "sort" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) const ( @@ -63,7 +63,7 @@ func (t *Task) DebugDumpState() { if mm := t.MemoryManager(); mm != nil { t.Debugf("Mappings:\n%s", mm) } - t.Debugf("FDMap:\n%s", t.fds) + t.Debugf("FDTable:\n%s", t.fdTable) } // debugDumpRegisters logs register state at log level debug. diff --git a/pkg/sentry/kernel/task_net.go b/pkg/sentry/kernel/task_net.go index 04c684c1a..172a31e1d 100644 --- a/pkg/sentry/kernel/task_net.go +++ b/pkg/sentry/kernel/task_net.go @@ -15,7 +15,7 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/inet" ) // IsNetworkNamespaced returns true if t is in a non-root network namespace. diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go index a79101a18..c92266c59 100644 --- a/pkg/sentry/kernel/task_run.go +++ b/pkg/sentry/kernel/task_run.go @@ -19,13 +19,13 @@ import ( "runtime" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/hostcpu" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // A taskRunState is a reified state in the task state machine. See README.md diff --git a/pkg/sentry/kernel/task_sched.go b/pkg/sentry/kernel/task_sched.go index 1c94ab11b..e76c069b0 100644 --- a/pkg/sentry/kernel/task_sched.go +++ b/pkg/sentry/kernel/task_sched.go @@ -22,13 +22,13 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/hostcpu" + "gvisor.dev/gvisor/pkg/sentry/kernel/sched" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/syserror" ) // TaskGoroutineState is a coarse representation of the current execution diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go index 654cf7525..266959a07 100644 --- a/pkg/sentry/kernel/task_signals.go +++ b/pkg/sentry/kernel/task_signals.go @@ -21,13 +21,13 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/eventchannel" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - ucspb "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/eventchannel" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // SignalAction is an internal signal action. diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go index b42531e57..a88bf3951 100644 --- a/pkg/sentry/kernel/task_start.go +++ b/pkg/sentry/kernel/task_start.go @@ -15,13 +15,13 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/futex" + "gvisor.dev/gvisor/pkg/sentry/kernel/sched" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/syserror" ) // TaskConfig defines the configuration of a new Task (see below). @@ -52,9 +52,10 @@ type TaskConfig struct { // succeeds. FSContext *FSContext - // FDMap is the FDMap of the new task. A reference must be held on FDMap, - // which is transferred to TaskSet.NewTask whether or not it succeeds. - FDMap *FDMap + // FDTable is the FDTableof the new task. A reference must be held on + // FDMap, which is transferred to TaskSet.NewTask whether or not it + // succeeds. + FDTable *FDTable // Credentials is the Credentials of the new task. Credentials *auth.Credentials @@ -90,7 +91,7 @@ func (ts *TaskSet) NewTask(cfg *TaskConfig) (*Task, error) { if err != nil { cfg.TaskContext.release() cfg.FSContext.DecRef() - cfg.FDMap.DecRef() + cfg.FDTable.DecRef() return nil, err } return t, nil @@ -112,14 +113,13 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) { signalMask: cfg.SignalMask, signalStack: arch.SignalStack{Flags: arch.SignalStackFlagDisable}, tc: *tc, - fsc: cfg.FSContext, - fds: cfg.FDMap, + fsContext: cfg.FSContext, + fdTable: cfg.FDTable, p: cfg.Kernel.Platform.NewContext(), k: cfg.Kernel, ptraceTracees: make(map[*Task]struct{}), allowedCPUMask: cfg.AllowedCPUMask.Copy(), ioUsage: &usage.IO{}, - creds: cfg.Credentials, niceness: cfg.Niceness, netns: cfg.NetworkNamespaced, utsns: cfg.UTSNamespace, @@ -129,6 +129,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) { futexWaiter: futex.NewWaiter(), containerID: cfg.ContainerID, } + t.creds.Store(cfg.Credentials) t.endStopCond.L = &t.tg.signalHandlers.mu t.ptraceTracer.Store((*Task)(nil)) // We don't construct t.blockingTimer until Task.run(); see that function diff --git a/pkg/sentry/kernel/task_stop.go b/pkg/sentry/kernel/task_stop.go index e735a5dd0..10c6e455c 100644 --- a/pkg/sentry/kernel/task_stop.go +++ b/pkg/sentry/kernel/task_stop.go @@ -172,7 +172,7 @@ func (t *Task) beginStopLocked() { } } -// endStopLocked decerements t.stopCount to indicate that an existing internal +// endStopLocked decrements t.stopCount to indicate that an existing internal // or external stop no longer applies to t. // // Preconditions: The signal mutex must be locked. diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go index a9283d0df..b543d536a 100644 --- a/pkg/sentry/kernel/task_syscall.go +++ b/pkg/sentry/kernel/task_syscall.go @@ -19,13 +19,13 @@ import ( "os" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/bits" - "gvisor.googlesource.com/gvisor/pkg/metric" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bits" + "gvisor.dev/gvisor/pkg/metric" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // SyscallRestartErrno represents a ERESTART* errno defined in the Linux's kernel diff --git a/pkg/sentry/kernel/task_test.go b/pkg/sentry/kernel/task_test.go index b895361d0..cfcde9a7a 100644 --- a/pkg/sentry/kernel/task_test.go +++ b/pkg/sentry/kernel/task_test.go @@ -17,7 +17,7 @@ package kernel import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched" + "gvisor.dev/gvisor/pkg/sentry/kernel/sched" ) func TestTaskCPU(t *testing.T) { diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go index 461bd7316..518bfe1bd 100644 --- a/pkg/sentry/kernel/task_usermem.go +++ b/pkg/sentry/kernel/task_usermem.go @@ -17,9 +17,9 @@ package kernel import ( "math" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // MAX_RW_COUNT is the maximum size in bytes of a single read or write. diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go index 8bd53928e..2a97e3e8e 100644 --- a/pkg/sentry/kernel/thread_group.go +++ b/pkg/sentry/kernel/thread_group.go @@ -18,10 +18,11 @@ import ( "sync" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/fs" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usage" ) // A ThreadGroup is a logical grouping of tasks that has widespread @@ -236,13 +237,21 @@ type ThreadGroup struct { // rscr is the thread group's RSEQ critical region. rscr atomic.Value `state:".(*RSEQCriticalRegion)"` + + // mounts is the thread group's mount namespace. This does not really + // correspond to a "mount namespace" in Linux, but is more like a + // complete VFS that need not be shared between processes. See the + // comment in mounts.go for more information. + // + // mounts is immutable. + mounts *fs.MountNamespace } // newThreadGroup returns a new, empty thread group in PID namespace ns. The // thread group leader will send its parent terminationSignal when it exits. // The new thread group isn't visible to the system until a task has been // created inside of it by a successful call to TaskSet.NewTask. -func (k *Kernel) newThreadGroup(ns *PIDNamespace, sh *SignalHandlers, terminationSignal linux.Signal, limits *limits.LimitSet, monotonicClock *timekeeperClock) *ThreadGroup { +func (k *Kernel) newThreadGroup(mounts *fs.MountNamespace, ns *PIDNamespace, sh *SignalHandlers, terminationSignal linux.Signal, limits *limits.LimitSet, monotonicClock *timekeeperClock) *ThreadGroup { tg := &ThreadGroup{ threadGroupNode: threadGroupNode{ pidns: ns, @@ -251,6 +260,7 @@ func (k *Kernel) newThreadGroup(ns *PIDNamespace, sh *SignalHandlers, terminatio terminationSignal: terminationSignal, ioUsage: &usage.IO{}, limits: limits, + mounts: mounts, } tg.itimerRealTimer = ktime.NewTimer(k.monotonicClock, &itimerRealListener{tg: tg}) tg.timers = make(map[linux.TimerID]*IntervalTimer) @@ -258,7 +268,7 @@ func (k *Kernel) newThreadGroup(ns *PIDNamespace, sh *SignalHandlers, terminatio return tg } -// saveRscr is invopked by stateify. +// saveRscr is invoked by stateify. func (tg *ThreadGroup) saveRscr() *RSEQCriticalRegion { return tg.rscr.Load().(*RSEQCriticalRegion) } @@ -298,6 +308,7 @@ func (tg *ThreadGroup) release() { for _, it := range its { it.DestroyTimer() } + tg.mounts.DecRef() } // forEachChildThreadGroupLocked indicates over all child ThreadGroups. diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go index 656bbd46c..b21b182fc 100644 --- a/pkg/sentry/kernel/threads.go +++ b/pkg/sentry/kernel/threads.go @@ -18,8 +18,8 @@ import ( "fmt" "sync" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/waiter" ) // TasksLimit is the maximum number of threads for untrusted application. diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD index 584f7c7cc..9beae4b31 100644 --- a/pkg/sentry/kernel/time/BUILD +++ b/pkg/sentry/kernel/time/BUILD @@ -8,7 +8,7 @@ go_library( "context.go", "time.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time", + importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/time", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/time/context.go b/pkg/sentry/kernel/time/context.go index c0660d362..8ef483dd3 100644 --- a/pkg/sentry/kernel/time/context.go +++ b/pkg/sentry/kernel/time/context.go @@ -15,7 +15,7 @@ package time import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context" ) // contextID is the time package's type for context.Context.Value keys. diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go index 3846cf1ea..aa6c75d25 100644 --- a/pkg/sentry/kernel/time/time.go +++ b/pkg/sentry/kernel/time/time.go @@ -22,9 +22,9 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // Events that may be generated by a Clock. @@ -142,6 +142,11 @@ func (t Time) Timeval() linux.Timeval { return linux.NsecToTimeval(t.Nanoseconds()) } +// StatxTimestamp converts Time to a Linux statx_timestamp. +func (t Time) StatxTimestamp() linux.StatxTimestamp { + return linux.NsecToStatxTimestamp(t.Nanoseconds()) +} + // Add adds the duration of d to t. func (t Time) Add(d time.Duration) Time { if t.ns > 0 && d.Nanoseconds() > math.MaxInt64-int64(t.ns) { diff --git a/pkg/sentry/kernel/timekeeper.go b/pkg/sentry/kernel/timekeeper.go index 505a4fa4f..76417342a 100644 --- a/pkg/sentry/kernel/timekeeper.go +++ b/pkg/sentry/kernel/timekeeper.go @@ -19,11 +19,11 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/log" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - sentrytime "gvisor.googlesource.com/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/log" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + sentrytime "gvisor.dev/gvisor/pkg/sentry/time" ) // Timekeeper manages all of the kernel clocks. @@ -122,7 +122,7 @@ func (t *Timekeeper) SetClocks(c sentrytime.Clocks) { // // In a restored sentry, monotonic time jumps forward by approximately // the same amount as real time. There are no guarantees here, we are - // just making a best-effort attempt to to make it appear that the app + // just making a best-effort attempt to make it appear that the app // was simply not scheduled for a long period, rather than that the // real time clock was changed. // diff --git a/pkg/sentry/kernel/timekeeper_state.go b/pkg/sentry/kernel/timekeeper_state.go index 6ce358a05..8e961c832 100644 --- a/pkg/sentry/kernel/timekeeper_state.go +++ b/pkg/sentry/kernel/timekeeper_state.go @@ -15,7 +15,7 @@ package kernel import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/time" ) // beforeSave is invoked by stateify. diff --git a/pkg/sentry/kernel/timekeeper_test.go b/pkg/sentry/kernel/timekeeper_test.go index a92ad689e..849c5b646 100644 --- a/pkg/sentry/kernel/timekeeper_test.go +++ b/pkg/sentry/kernel/timekeeper_test.go @@ -17,12 +17,12 @@ package kernel import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - sentrytime "gvisor.googlesource.com/gvisor/pkg/sentry/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + sentrytime "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // mockClocks is a sentrytime.Clocks that simply returns the times in the diff --git a/pkg/sentry/kernel/uts_namespace.go b/pkg/sentry/kernel/uts_namespace.go index 96fe3cbb9..0a563e715 100644 --- a/pkg/sentry/kernel/uts_namespace.go +++ b/pkg/sentry/kernel/uts_namespace.go @@ -17,7 +17,7 @@ package kernel import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ) // UTSNamespace represents a UTS namespace, a holder of two system identifiers: diff --git a/pkg/sentry/kernel/vdso.go b/pkg/sentry/kernel/vdso.go index d40ad74f4..fdd10c56c 100644 --- a/pkg/sentry/kernel/vdso.go +++ b/pkg/sentry/kernel/vdso.go @@ -17,11 +17,11 @@ package kernel import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // vdsoParams are the parameters exposed to the VDSO. diff --git a/pkg/sentry/limits/BUILD b/pkg/sentry/limits/BUILD index 800166675..40025d62d 100644 --- a/pkg/sentry/limits/BUILD +++ b/pkg/sentry/limits/BUILD @@ -9,7 +9,7 @@ go_library( "limits.go", "linux.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/limits", + importpath = "gvisor.dev/gvisor/pkg/sentry/limits", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/limits/context.go b/pkg/sentry/limits/context.go index 9200edb52..6972749ed 100644 --- a/pkg/sentry/limits/context.go +++ b/pkg/sentry/limits/context.go @@ -15,7 +15,7 @@ package limits import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context" ) // contextID is the limit package's type for context.Context.Value keys. diff --git a/pkg/sentry/limits/linux.go b/pkg/sentry/limits/linux.go index a2b401e3d..3f71abecc 100644 --- a/pkg/sentry/limits/linux.go +++ b/pkg/sentry/limits/linux.go @@ -17,7 +17,7 @@ package limits import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) // FromLinuxResource maps linux resources to sentry LimitTypes. diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD index 66300f25a..3b322f5f3 100644 --- a/pkg/sentry/loader/BUILD +++ b/pkg/sentry/loader/BUILD @@ -21,7 +21,7 @@ go_library( "vdso_state.go", ":vdso_bin", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/loader", + importpath = "gvisor.dev/gvisor/pkg/sentry/loader", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi", diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go index 900236531..fba2f27fe 100644 --- a/pkg/sentry/loader/elf.go +++ b/pkg/sentry/loader/elf.go @@ -20,19 +20,19 @@ import ( "fmt" "io" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) const ( diff --git a/pkg/sentry/loader/interpreter.go b/pkg/sentry/loader/interpreter.go index b88062ae5..ccf909cac 100644 --- a/pkg/sentry/loader/interpreter.go +++ b/pkg/sentry/loader/interpreter.go @@ -18,10 +18,10 @@ import ( "bytes" "io" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) const ( diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go index dc1a52398..baa12d9a0 100644 --- a/pkg/sentry/loader/loader.go +++ b/pkg/sentry/loader/loader.go @@ -21,18 +21,18 @@ import ( "io" "path" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" ) // readFull behaves like io.ReadFull for an *fs.File. @@ -54,7 +54,7 @@ func readFull(ctx context.Context, f *fs.File, dst usermem.IOSequence, offset in // openPath opens name for loading. // // openPath returns the fs.Dirent and an *fs.File for name, which is not -// installed in the Task FDMap. The caller takes ownership of both. +// installed in the Task FDTable. The caller takes ownership of both. // // name must be a readable, executable, regular file. func openPath(ctx context.Context, mm *fs.MountNamespace, root, wd *fs.Dirent, maxTraversals *uint, name string) (*fs.Dirent, *fs.File, error) { diff --git a/pkg/sentry/loader/vdso.go b/pkg/sentry/loader/vdso.go index 4e73527cf..ada28aea3 100644 --- a/pkg/sentry/loader/vdso.go +++ b/pkg/sentry/loader/vdso.go @@ -19,22 +19,22 @@ import ( "fmt" "io" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/anon" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) type fileContext struct { @@ -69,11 +69,12 @@ type byteReader struct { var _ fs.FileOperations = (*byteReader)(nil) // newByteReaderFile creates a fake file to read data from. -func newByteReaderFile(data []byte) *fs.File { +func newByteReaderFile(ctx context.Context, data []byte) *fs.File { // Create a fake inode. inode := fs.NewInode( + ctx, &fsutil.SimpleFileInode{}, - fs.NewPseudoMountSource(), + fs.NewPseudoMountSource(ctx), fs.StableAttr{ Type: fs.Anonymous, DeviceID: anon.PseudoDevice.DeviceID(), @@ -219,8 +220,8 @@ type VDSO struct { // PrepareVDSO validates the system VDSO and returns a VDSO, containing the // param page for updating by the kernel. -func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) { - vdsoFile := newByteReaderFile(vdsoBin) +func PrepareVDSO(ctx context.Context, mfp pgalloc.MemoryFileProvider) (*VDSO, error) { + vdsoFile := newByteReaderFile(ctx, vdsoBin) // First make sure the VDSO is valid. vdsoFile does not use ctx, so a // nil context can be passed. diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD index 9c2cbd18b..29c14ec56 100644 --- a/pkg/sentry/memmap/BUILD +++ b/pkg/sentry/memmap/BUILD @@ -36,7 +36,7 @@ go_library( "mapping_set_impl.go", "memmap.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/memmap", + importpath = "gvisor.dev/gvisor/pkg/sentry/memmap", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/log", diff --git a/pkg/sentry/memmap/mapping_set.go b/pkg/sentry/memmap/mapping_set.go index 3cf2b338f..0a5b7ce45 100644 --- a/pkg/sentry/memmap/mapping_set.go +++ b/pkg/sentry/memmap/mapping_set.go @@ -18,7 +18,7 @@ import ( "fmt" "math" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // MappingSet maps offsets into a Mappable to mappings of those offsets. It is @@ -85,7 +85,7 @@ func (mappingSetFunctions) Merge(r1 MappableRange, val1 MappingsOfRange, r2 Mapp // Each MappingOfRange in val1 must have a matching region in val2, forming // one contiguous region. for k1 := range val1 { - // We expect val2 to to contain a key that forms a contiguous + // We expect val2 to contain a key that forms a contiguous // region with k1. k2 := MappingOfRange{ MappingSpace: k1.MappingSpace, diff --git a/pkg/sentry/memmap/mapping_set_test.go b/pkg/sentry/memmap/mapping_set_test.go index c702555ce..f9b11a59c 100644 --- a/pkg/sentry/memmap/mapping_set_test.go +++ b/pkg/sentry/memmap/mapping_set_test.go @@ -18,7 +18,7 @@ import ( "reflect" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) type testMappingSpace struct { diff --git a/pkg/sentry/memmap/memmap.go b/pkg/sentry/memmap/memmap.go index 0106c857d..03b99aaea 100644 --- a/pkg/sentry/memmap/memmap.go +++ b/pkg/sentry/memmap/memmap.go @@ -18,10 +18,10 @@ package memmap import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // Mappable represents a memory-mappable object, a mutable mapping from uint64 diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index c78cb4280..072745a08 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -7,7 +7,7 @@ go_template_instance( name = "file_refcount_set", out = "file_refcount_set.go", imports = { - "platform": "gvisor.googlesource.com/gvisor/pkg/sentry/platform", + "platform": "gvisor.dev/gvisor/pkg/sentry/platform", }, package = "mm", prefix = "fileRefcount", @@ -27,7 +27,7 @@ go_template_instance( "minDegree": "8", }, imports = { - "usermem": "gvisor.googlesource.com/gvisor/pkg/sentry/usermem", + "usermem": "gvisor.dev/gvisor/pkg/sentry/usermem", }, package = "mm", prefix = "vma", @@ -47,7 +47,7 @@ go_template_instance( "minDegree": "8", }, imports = { - "usermem": "gvisor.googlesource.com/gvisor/pkg/sentry/usermem", + "usermem": "gvisor.dev/gvisor/pkg/sentry/usermem", }, package = "mm", prefix = "pma", @@ -95,7 +95,7 @@ go_library( "vma.go", "vma_set.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/mm", + importpath = "gvisor.dev/gvisor/pkg/sentry/mm", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/mm/README.md b/pkg/sentry/mm/README.md index e6efbf565..e1322e373 100644 --- a/pkg/sentry/mm/README.md +++ b/pkg/sentry/mm/README.md @@ -274,7 +274,7 @@ In the sentry: methods [`platform.AddressSpace.MapFile` and `platform.AddressSpace.Unmap`][platform]. -[memmap]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/memmap/memmap.go -[mm]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/mm/mm.go -[pgalloc]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/pgalloc/pgalloc.go -[platform]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/platform/platform.go +[memmap]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/memmap/memmap.go +[mm]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/mm/mm.go +[pgalloc]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/pgalloc/pgalloc.go +[platform]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/platform/platform.go diff --git a/pkg/sentry/mm/address_space.go b/pkg/sentry/mm/address_space.go index 06f587fde..cfebcfd42 100644 --- a/pkg/sentry/mm/address_space.go +++ b/pkg/sentry/mm/address_space.go @@ -18,9 +18,9 @@ import ( "fmt" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/atomicbitops" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // AddressSpace returns the platform.AddressSpace bound to mm. diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go index 5c61acf36..1b746d030 100644 --- a/pkg/sentry/mm/aio_context.go +++ b/pkg/sentry/mm/aio_context.go @@ -17,15 +17,15 @@ package mm import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // aioManager creates and manages asynchronous I/O contexts. @@ -213,7 +213,9 @@ func newAIOMappable(mfp pgalloc.MemoryFileProvider) (*aioMappable, error) { if err != nil { return nil, err } - return &aioMappable{mfp: mfp, fr: fr}, nil + m := aioMappable{mfp: mfp, fr: fr} + m.EnableLeakCheck("mm.aioMappable") + return &m, nil } // DecRef implements refs.RefCounter.DecRef. diff --git a/pkg/sentry/mm/debug.go b/pkg/sentry/mm/debug.go index fe58cfc4c..df9adf708 100644 --- a/pkg/sentry/mm/debug.go +++ b/pkg/sentry/mm/debug.go @@ -18,7 +18,7 @@ import ( "bytes" "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context" ) const ( diff --git a/pkg/sentry/mm/io.go b/pkg/sentry/mm/io.go index e4c057d28..b03e7d020 100644 --- a/pkg/sentry/mm/io.go +++ b/pkg/sentry/mm/io.go @@ -15,11 +15,11 @@ package mm import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // There are two supported ways to copy data to/from application virtual diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go index 7646d5ab2..4e9ca1de6 100644 --- a/pkg/sentry/mm/lifecycle.go +++ b/pkg/sentry/mm/lifecycle.go @@ -18,14 +18,14 @@ import ( "fmt" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/atomicbitops" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // NewMemoryManager returns a new MemoryManager with no mappings and 1 user. @@ -86,10 +86,22 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) { } // Copy vmas. + dontforks := false dstvgap := mm2.vmas.FirstGap() for srcvseg := mm.vmas.FirstSegment(); srcvseg.Ok(); srcvseg = srcvseg.NextSegment() { vma := srcvseg.Value() // makes a copy of the vma vmaAR := srcvseg.Range() + + if vma.dontfork { + length := uint64(vmaAR.Length()) + mm2.usageAS -= length + if vma.isPrivateDataLocked() { + mm2.dataAS -= length + } + dontforks = true + continue + } + // Inform the Mappable, if any, of the new mapping. if vma.mappable != nil { if err := vma.mappable.AddMapping(ctx, mm2, vmaAR, vma.off, vma.canWriteMappableLocked()); err != nil { @@ -118,6 +130,10 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) { defer mm2.activeMu.Unlock() mm.activeMu.Lock() defer mm.activeMu.Unlock() + if dontforks { + defer mm.pmas.MergeRange(mm.applicationAddrRange()) + } + srcvseg := mm.vmas.FirstSegment() dstpgap := mm2.pmas.FirstGap() var unmapAR usermem.AddrRange for srcpseg := mm.pmas.FirstSegment(); srcpseg.Ok(); srcpseg = srcpseg.NextSegment() { @@ -125,6 +141,27 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) { if !pma.private { continue } + + if dontforks { + // Find the 'vma' that contains the starting address + // associated with the 'pma' (there must be one). + srcvseg = srcvseg.seekNextLowerBound(srcpseg.Start()) + if checkInvariants { + if !srcvseg.Ok() { + panic(fmt.Sprintf("no vma covers pma range %v", srcpseg.Range())) + } + if srcpseg.Start() < srcvseg.Start() { + panic(fmt.Sprintf("vma %v ran ahead of pma %v", srcvseg.Range(), srcpseg.Range())) + } + } + + srcpseg = mm.pmas.Isolate(srcpseg, srcvseg.Range()) + if srcvseg.ValuePtr().dontfork { + continue + } + pma = srcpseg.ValuePtr() + } + if !pma.needCOW { pma.needCOW = true if pma.effectivePerms.Write { diff --git a/pkg/sentry/mm/metadata.go b/pkg/sentry/mm/metadata.go index c218006ee..d2a01d48a 100644 --- a/pkg/sentry/mm/metadata.go +++ b/pkg/sentry/mm/metadata.go @@ -15,9 +15,9 @@ package mm import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // Dumpability describes if and how core dumps should be created. diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index 604866d04..f350e0109 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -37,14 +37,14 @@ package mm import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/third_party/gvsync" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/third_party/gvsync" ) // MemoryManager implements a virtual address space. @@ -74,7 +74,7 @@ type MemoryManager struct { // privateRefs is immutable. privateRefs *privateRefs - // users is the number of dependences on the mappings in the MemoryManager. + // users is the number of dependencies on the mappings in the MemoryManager. // When the number of references in users reaches zero, all mappings are // unmapped. // @@ -274,6 +274,9 @@ type vma struct { // metag, none of which we currently support. growsDown bool `state:"manual"` + // dontfork is the MADV_DONTFORK setting for this vma configured by madvise(). + dontfork bool + mlockMode memmap.MLockMode // numaPolicy is the NUMA policy for this vma set by mbind(). diff --git a/pkg/sentry/mm/mm_test.go b/pkg/sentry/mm/mm_test.go index 7209c73ce..4d2bfaaed 100644 --- a/pkg/sentry/mm/mm_test.go +++ b/pkg/sentry/mm/mm_test.go @@ -17,15 +17,15 @@ package mm import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) func testMemoryManager(ctx context.Context) *MemoryManager { diff --git a/pkg/sentry/mm/pma.go b/pkg/sentry/mm/pma.go index ece561ff0..c976c6f45 100644 --- a/pkg/sentry/mm/pma.go +++ b/pkg/sentry/mm/pma.go @@ -17,14 +17,14 @@ package mm import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/safecopy" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/platform/safecopy" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // existingPMAsLocked checks that pmas exist for all addresses in ar, and diff --git a/pkg/sentry/mm/procfs.go b/pkg/sentry/mm/procfs.go index c8302a553..a8819aa84 100644 --- a/pkg/sentry/mm/procfs.go +++ b/pkg/sentry/mm/procfs.go @@ -19,10 +19,10 @@ import ( "fmt" "strings" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) const ( diff --git a/pkg/sentry/mm/save_restore.go b/pkg/sentry/mm/save_restore.go index 0385957bd..93259c5a3 100644 --- a/pkg/sentry/mm/save_restore.go +++ b/pkg/sentry/mm/save_restore.go @@ -17,7 +17,7 @@ package mm import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context" ) // InvalidateUnsavable invokes memmap.Mappable.InvalidateUnsavable on all diff --git a/pkg/sentry/mm/shm.go b/pkg/sentry/mm/shm.go index 12913007b..b9f2d23e5 100644 --- a/pkg/sentry/mm/shm.go +++ b/pkg/sentry/mm/shm.go @@ -15,10 +15,10 @@ package mm import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/shm" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/kernel/shm" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // DetachShm unmaps a sysv shared memory segment. diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go index 687959005..ea2d7af74 100644 --- a/pkg/sentry/mm/special_mappable.go +++ b/pkg/sentry/mm/special_mappable.go @@ -15,14 +15,14 @@ package mm import ( - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // SpecialMappable implements memmap.MappingIdentity and memmap.Mappable with @@ -45,7 +45,9 @@ type SpecialMappable struct { // // Preconditions: fr.Length() != 0. func NewSpecialMappable(name string, mfp pgalloc.MemoryFileProvider, fr platform.FileRange) *SpecialMappable { - return &SpecialMappable{mfp: mfp, fr: fr, name: name} + m := SpecialMappable{mfp: mfp, fr: fr, name: name} + m.EnableLeakCheck("mm.SpecialMappable") + return &m } // DecRef implements refs.RefCounter.DecRef. diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go index 9cf136532..c2466c988 100644 --- a/pkg/sentry/mm/syscalls.go +++ b/pkg/sentry/mm/syscalls.go @@ -18,15 +18,15 @@ import ( "fmt" mrand "math/rand" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/futex" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // HandleUserFault handles an application page fault. sp is the faulting @@ -1026,6 +1026,32 @@ func (mm *MemoryManager) SetNumaPolicy(addr usermem.Addr, length uint64, policy } } +// SetDontFork implements the semantics of madvise MADV_DONTFORK. +func (mm *MemoryManager) SetDontFork(addr usermem.Addr, length uint64, dontfork bool) error { + ar, ok := addr.ToRange(length) + if !ok { + return syserror.EINVAL + } + + mm.mappingMu.Lock() + defer mm.mappingMu.Unlock() + defer func() { + mm.vmas.MergeRange(ar) + mm.vmas.MergeAdjacent(ar) + }() + + for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() { + vseg = mm.vmas.Isolate(vseg, ar) + vma := vseg.ValuePtr() + vma.dontfork = dontfork + } + + if mm.vmas.SpanRange(ar) != ar.Length() { + return syserror.ENOMEM + } + return nil +} + // Decommit implements the semantics of Linux's madvise(MADV_DONTNEED). func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error { ar, ok := addr.ToRange(length) diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go index 0af8de5b0..f2fd70799 100644 --- a/pkg/sentry/mm/vma.go +++ b/pkg/sentry/mm/vma.go @@ -17,14 +17,14 @@ package mm import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // Preconditions: mm.mappingMu must be locked for writing. opts must be valid @@ -34,7 +34,7 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp panic(fmt.Sprintf("Non-effective MaxPerms %s cannot be enforced", opts.MaxPerms)) } - // Find a useable range. + // Find a usable range. addr, err := mm.findAvailableLocked(opts.Length, findAvailableOpts{ Addr: opts.Addr, Fixed: opts.Fixed, @@ -439,6 +439,7 @@ func (vmaSetFunctions) Merge(ar1 usermem.AddrRange, vma1 vma, ar2 usermem.AddrRa vma1.mlockMode != vma2.mlockMode || vma1.numaPolicy != vma2.numaPolicy || vma1.numaNodemask != vma2.numaNodemask || + vma1.dontfork != vma2.dontfork || vma1.id != vma2.id || vma1.hint != vma2.hint { return vma{}, false diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD index ca2d5ba6f..858f895f2 100644 --- a/pkg/sentry/pgalloc/BUILD +++ b/pkg/sentry/pgalloc/BUILD @@ -35,7 +35,7 @@ go_template_instance( "minDegree": "10", }, imports = { - "platform": "gvisor.googlesource.com/gvisor/pkg/sentry/platform", + "platform": "gvisor.dev/gvisor/pkg/sentry/platform", }, package = "pgalloc", prefix = "usage", @@ -59,7 +59,7 @@ go_library( "save_restore.go", "usage_set.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc", + importpath = "gvisor.dev/gvisor/pkg/sentry/pgalloc", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/log", diff --git a/pkg/sentry/pgalloc/context.go b/pkg/sentry/pgalloc/context.go index cb9809b1f..11ccf897b 100644 --- a/pkg/sentry/pgalloc/context.go +++ b/pkg/sentry/pgalloc/context.go @@ -15,7 +15,7 @@ package pgalloc import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context" ) // contextID is this package's type for context.Context.Value keys. diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go index 6d91f1a7b..8bd3e885d 100644 --- a/pkg/sentry/pgalloc/pgalloc.go +++ b/pkg/sentry/pgalloc/pgalloc.go @@ -30,14 +30,14 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/hostmm" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/hostmm" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // MemoryFile is a platform.File whose pages may be allocated to arbitrary diff --git a/pkg/sentry/pgalloc/pgalloc_test.go b/pkg/sentry/pgalloc/pgalloc_test.go index 14a39bb9e..428e6a859 100644 --- a/pkg/sentry/pgalloc/pgalloc_test.go +++ b/pkg/sentry/pgalloc/pgalloc_test.go @@ -17,7 +17,7 @@ package pgalloc import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) const ( diff --git a/pkg/sentry/pgalloc/save_restore.go b/pkg/sentry/pgalloc/save_restore.go index d4ba384b1..1effc7735 100644 --- a/pkg/sentry/pgalloc/save_restore.go +++ b/pkg/sentry/pgalloc/save_restore.go @@ -22,10 +22,10 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/state" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/state" ) // SaveTo writes f's state to the given stream. diff --git a/pkg/sentry/platform/BUILD b/pkg/sentry/platform/BUILD index ac8a6cb7f..9aa6ec507 100644 --- a/pkg/sentry/platform/BUILD +++ b/pkg/sentry/platform/BUILD @@ -22,12 +22,13 @@ go_library( "mmap_min_addr.go", "platform.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform", + importpath = "gvisor.dev/gvisor/pkg/sentry/platform", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", "//pkg/atomicbitops", "//pkg/log", + "//pkg/seccomp", "//pkg/sentry/arch", "//pkg/sentry/context", "//pkg/sentry/platform/safecopy", diff --git a/pkg/sentry/platform/context.go b/pkg/sentry/platform/context.go index 793f57fd7..e29bc4485 100644 --- a/pkg/sentry/platform/context.go +++ b/pkg/sentry/platform/context.go @@ -15,7 +15,7 @@ package platform import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context" ) // contextID is the auth package's type for context.Context.Value keys. diff --git a/pkg/sentry/platform/interrupt/BUILD b/pkg/sentry/platform/interrupt/BUILD index eeccd4d0e..eeb634644 100644 --- a/pkg/sentry/platform/interrupt/BUILD +++ b/pkg/sentry/platform/interrupt/BUILD @@ -7,7 +7,7 @@ go_library( srcs = [ "interrupt.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/interrupt", + importpath = "gvisor.dev/gvisor/pkg/sentry/platform/interrupt", visibility = ["//pkg/sentry:internal"], ) diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index 2931d6ddc..ad8b95744 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -14,6 +14,7 @@ go_library( "bluepill_fault.go", "bluepill_unsafe.go", "context.go", + "filters.go", "kvm.go", "kvm_amd64.go", "kvm_amd64_unsafe.go", @@ -25,7 +26,7 @@ go_library( "physical_map.go", "virtual_map.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm", + importpath = "gvisor.dev/gvisor/pkg/sentry/platform/kvm", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -33,6 +34,7 @@ go_library( "//pkg/cpuid", "//pkg/log", "//pkg/procid", + "//pkg/seccomp", "//pkg/sentry/arch", "//pkg/sentry/platform", "//pkg/sentry/platform/interrupt", diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go index 689122175..acd41f73d 100644 --- a/pkg/sentry/platform/kvm/address_space.go +++ b/pkg/sentry/platform/kvm/address_space.go @@ -18,10 +18,10 @@ import ( "sync" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/atomicbitops" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // dirtySet tracks vCPUs for invalidation. diff --git a/pkg/sentry/platform/kvm/allocator.go b/pkg/sentry/platform/kvm/allocator.go index 42bcc9733..80942e9c9 100644 --- a/pkg/sentry/platform/kvm/allocator.go +++ b/pkg/sentry/platform/kvm/allocator.go @@ -17,7 +17,7 @@ package kvm import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" ) type allocator struct { diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go index a926e6f8b..043de51b3 100644 --- a/pkg/sentry/platform/kvm/bluepill.go +++ b/pkg/sentry/platform/kvm/bluepill.go @@ -19,8 +19,8 @@ import ( "reflect" "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/safecopy" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/platform/safecopy" ) // bluepill enters guest mode. diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go index c258408f9..421c88220 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64.go @@ -19,8 +19,8 @@ package kvm import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0" ) var ( diff --git a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go index 92fde7ee0..9d8af143e 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go @@ -19,8 +19,8 @@ package kvm import ( "unsafe" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0" ) // bluepillArchContext returns the arch-specific context. @@ -30,7 +30,7 @@ func bluepillArchContext(context unsafe.Pointer) *arch.SignalContext64 { return &((*arch.UContext64)(context).MContext) } -// dieArchSetup initialies the state for dieTrampoline. +// dieArchSetup initializes the state for dieTrampoline. // // The amd64 dieTrampoline requires the vCPU to be set in BX, and the last RIP // to be in AX. The trampoline then simulates a call to dieHandler from the diff --git a/pkg/sentry/platform/kvm/bluepill_fault.go b/pkg/sentry/platform/kvm/bluepill_fault.go index 3c452f5ba..b97476053 100644 --- a/pkg/sentry/platform/kvm/bluepill_fault.go +++ b/pkg/sentry/platform/kvm/bluepill_fault.go @@ -18,7 +18,7 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) const ( diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go index 0eb0020f7..99450d22d 100644 --- a/pkg/sentry/platform/kvm/context.go +++ b/pkg/sentry/platform/kvm/context.go @@ -15,11 +15,11 @@ package kvm import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/interrupt" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/platform/interrupt" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // context is an implementation of the platform context. diff --git a/pkg/sentry/platform/kvm/filters.go b/pkg/sentry/platform/kvm/filters.go new file mode 100644 index 000000000..7d949f1dd --- /dev/null +++ b/pkg/sentry/platform/kvm/filters.go @@ -0,0 +1,33 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kvm + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/seccomp" +) + +// SyscallFilters returns syscalls made exclusively by the KVM platform. +func (*KVM) SyscallFilters() seccomp.SyscallRules { + return seccomp.SyscallRules{ + syscall.SYS_ARCH_PRCTL: {}, + syscall.SYS_IOCTL: {}, + syscall.SYS_MMAP: {}, + syscall.SYS_RT_SIGSUSPEND: {}, + syscall.SYS_RT_SIGTIMEDWAIT: {}, + 0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host. + } +} diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go index ed0521c3f..ee4cd2f4d 100644 --- a/pkg/sentry/platform/kvm/kvm.go +++ b/pkg/sentry/platform/kvm/kvm.go @@ -21,11 +21,11 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // KVM represents a lightweight VM context. @@ -141,3 +141,17 @@ func (k *KVM) NewContext() platform.Context { machine: k.machine, } } + +type constructor struct{} + +func (*constructor) New(f *os.File) (platform.Platform, error) { + return New(f) +} + +func (*constructor) OpenDevice() (*os.File, error) { + return OpenDevice() +} + +func init() { + platform.Register("kvm", &constructor{}) +} diff --git a/pkg/sentry/platform/kvm/kvm_amd64.go b/pkg/sentry/platform/kvm/kvm_amd64.go index 61493ccaf..5d8ef4761 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64.go +++ b/pkg/sentry/platform/kvm/kvm_amd64.go @@ -17,7 +17,7 @@ package kvm import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0" ) // userMemoryRegion is a region of physical memory. diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index e83db71e9..30df725d4 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -22,12 +22,12 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm/testutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) var dummyFPState = (*byte)(arch.NewFloatingPointData()) diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index f8ccd86af..679087e25 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -21,12 +21,12 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/atomicbitops" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/procid" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/procid" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // machine contains state associated with the VM as a whole. @@ -135,7 +135,7 @@ type dieState struct { // newVCPU creates a returns a new vCPU. // -// Precondtion: mu must be held. +// Precondition: mu must be held. func (m *machine) newVCPU() *vCPU { id := len(m.vCPUs) @@ -426,7 +426,12 @@ func (c *vCPU) unlock() { // Normal state. case vCPUUser | vCPUGuest | vCPUWaiter: // Force a transition: this must trigger a notification when we - // return from guest mode. + // return from guest mode. We must clear vCPUWaiter here + // anyways, because BounceToKernel will force a transition only + // from ring3 to ring0, which will not clear this bit. Halt may + // workaround the issue, but if there is no exception or + // syscall in this period, BounceToKernel will hang. + atomicbitops.AndUint32(&c.state, ^vCPUWaiter) c.notify() case vCPUUser | vCPUWaiter: // Waiting for the lock to be released; the responsibility is diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index b6821122a..c1cbe33be 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -22,11 +22,11 @@ import ( "runtime/debug" "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // initArchState initializes architecture-specific state. diff --git a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go index 06a2e3b0c..506ec9af1 100644 --- a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go @@ -22,8 +22,8 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/time" ) // setMemoryRegion initializes a region. @@ -87,7 +87,7 @@ func (c *vCPU) setCPUID() error { // setSystemTime sets the TSC for the vCPU. // -// This has to make the call many times in order to minimize the intrinstic +// This has to make the call many times in order to minimize the intrinsic // error in the offset. Unfortunately KVM does not expose a relative offset via // the API, so this is an approximation. We do this via an iterative algorithm. // This has the advantage that it can generally deal with highly variable diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go index 1d3c6d2d6..8d76e106e 100644 --- a/pkg/sentry/platform/kvm/machine_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_unsafe.go @@ -25,7 +25,7 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) //go:linkname entersyscall runtime.entersyscall diff --git a/pkg/sentry/platform/kvm/physical_map.go b/pkg/sentry/platform/kvm/physical_map.go index 450eb8201..586e91bb2 100644 --- a/pkg/sentry/platform/kvm/physical_map.go +++ b/pkg/sentry/platform/kvm/physical_map.go @@ -19,9 +19,9 @@ import ( "sort" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) const ( diff --git a/pkg/sentry/platform/kvm/testutil/BUILD b/pkg/sentry/platform/kvm/testutil/BUILD index e10087e8e..77a449a8b 100644 --- a/pkg/sentry/platform/kvm/testutil/BUILD +++ b/pkg/sentry/platform/kvm/testutil/BUILD @@ -10,6 +10,6 @@ go_library( "testutil_amd64.go", "testutil_amd64.s", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm/testutil", + importpath = "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil", visibility = ["//pkg/sentry/platform/kvm:__pkg__"], ) diff --git a/pkg/sentry/platform/kvm/virtual_map.go b/pkg/sentry/platform/kvm/virtual_map.go index 28a1b4414..2d68855ef 100644 --- a/pkg/sentry/platform/kvm/virtual_map.go +++ b/pkg/sentry/platform/kvm/virtual_map.go @@ -22,7 +22,7 @@ import ( "regexp" "strconv" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) type virtualRegion struct { diff --git a/pkg/sentry/platform/kvm/virtual_map_test.go b/pkg/sentry/platform/kvm/virtual_map_test.go index d03ec654a..6a2f145be 100644 --- a/pkg/sentry/platform/kvm/virtual_map_test.go +++ b/pkg/sentry/platform/kvm/virtual_map_test.go @@ -18,7 +18,7 @@ import ( "syscall" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) type checker struct { diff --git a/pkg/sentry/platform/mmap_min_addr.go b/pkg/sentry/platform/mmap_min_addr.go index 90976735b..999787462 100644 --- a/pkg/sentry/platform/mmap_min_addr.go +++ b/pkg/sentry/platform/mmap_min_addr.go @@ -20,7 +20,7 @@ import ( "strconv" "strings" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // systemMMapMinAddrSource is the source file. diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go index ae37276ad..ec22dbf87 100644 --- a/pkg/sentry/platform/platform.go +++ b/pkg/sentry/platform/platform.go @@ -19,11 +19,13 @@ package platform import ( "fmt" + "os" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // Platform provides abstractions for execution contexts (Context, @@ -93,6 +95,9 @@ type Platform interface { // Platforms for which this does not hold may panic if PreemptAllCPUs is // called. PreemptAllCPUs() error + + // SyscallFilters returns syscalls made exclusively by this platform. + SyscallFilters() seccomp.SyscallRules } // NoCPUPreemptionDetection implements Platform.DetectsCPUPreemption and @@ -256,7 +261,7 @@ type AddressSpaceIO interface { LoadUint32(addr usermem.Addr) (uint32, error) } -// NoAddressSpaceIO implements AddressSpaceIO methods by panicing. +// NoAddressSpaceIO implements AddressSpaceIO methods by panicking. type NoAddressSpaceIO struct{} // CopyOut implements AddressSpaceIO.CopyOut. @@ -347,3 +352,26 @@ type File interface { func (fr FileRange) String() string { return fmt.Sprintf("[%#x, %#x)", fr.Start, fr.End) } + +// Constructor represents a platform type. +type Constructor interface { + New(deviceFile *os.File) (Platform, error) + OpenDevice() (*os.File, error) +} + +// platforms contains all available platform types. +var platforms = map[string]Constructor{} + +// Register registers a new platform type. +func Register(name string, platform Constructor) { + platforms[name] = platform +} + +// Lookup looks up the platform constructor by name. +func Lookup(name string) (Constructor, error) { + p, ok := platforms[name] + if !ok { + return nil, fmt.Errorf("unknown platform: %v", name) + } + return p, nil +} diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD index 434d003a3..1b6c54e96 100644 --- a/pkg/sentry/platform/ptrace/BUILD +++ b/pkg/sentry/platform/ptrace/BUILD @@ -5,6 +5,7 @@ package(licenses = ["notice"]) go_library( name = "ptrace", srcs = [ + "filters.go", "ptrace.go", "ptrace_unsafe.go", "stub_amd64.s", @@ -15,7 +16,7 @@ go_library( "subprocess_linux_amd64_unsafe.go", "subprocess_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace", + importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ptrace", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/platform/ptrace/filters.go b/pkg/sentry/platform/ptrace/filters.go new file mode 100644 index 000000000..1e07cfd0d --- /dev/null +++ b/pkg/sentry/platform/ptrace/filters.go @@ -0,0 +1,33 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ptrace + +import ( + "syscall" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/seccomp" +) + +// SyscallFilters returns syscalls made exclusively by the ptrace platform. +func (*PTrace) SyscallFilters() seccomp.SyscallRules { + return seccomp.SyscallRules{ + unix.SYS_GETCPU: {}, + unix.SYS_SCHED_SETAFFINITY: {}, + syscall.SYS_PTRACE: {}, + syscall.SYS_TGKILL: {}, + syscall.SYS_WAIT4: {}, + } +} diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go index 6a890dd81..6fd30ed25 100644 --- a/pkg/sentry/platform/ptrace/ptrace.go +++ b/pkg/sentry/platform/ptrace/ptrace.go @@ -45,13 +45,14 @@ package ptrace import ( + "os" "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/interrupt" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/platform/interrupt" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) var ( @@ -236,3 +237,17 @@ func (p *PTrace) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan s func (*PTrace) NewContext() platform.Context { return &context{} } + +type constructor struct{} + +func (*constructor) New(*os.File) (platform.Platform, error) { + return New() +} + +func (*constructor) OpenDevice() (*os.File, error) { + return nil, nil +} + +func init() { + platform.Register("ptrace", &constructor{}) +} diff --git a/pkg/sentry/platform/ptrace/ptrace_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_unsafe.go index 585f6c1fb..2706039a5 100644 --- a/pkg/sentry/platform/ptrace/ptrace_unsafe.go +++ b/pkg/sentry/platform/ptrace/ptrace_unsafe.go @@ -18,8 +18,8 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // GETREGSET/SETREGSET register set types. diff --git a/pkg/sentry/platform/ptrace/stub_unsafe.go b/pkg/sentry/platform/ptrace/stub_unsafe.go index 54d5021a9..aa1b87237 100644 --- a/pkg/sentry/platform/ptrace/stub_unsafe.go +++ b/pkg/sentry/platform/ptrace/stub_unsafe.go @@ -19,8 +19,8 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/safecopy" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/platform/safecopy" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // stub is defined in arch-specific assembly. diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go index d7800a55e..15e84735e 100644 --- a/pkg/sentry/platform/ptrace/subprocess.go +++ b/pkg/sentry/platform/ptrace/subprocess.go @@ -21,11 +21,11 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/procid" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/procid" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // globalPool exists to solve two distinct problems: @@ -123,7 +123,7 @@ type subprocess struct { contexts map[*context]struct{} } -// newSubprocess returns a useable subprocess. +// newSubprocess returns a usable subprocess. // // This will either be a newly created subprocess, or one from the global pool. // The create function will be called in the latter case, which is guaranteed @@ -155,6 +155,7 @@ func newSubprocess(create func() (*thread, error)) (*subprocess, error) { errChan <- err return } + firstThread.grabInitRegs() // Ready to handle requests. errChan <- nil @@ -179,6 +180,7 @@ func newSubprocess(create func() (*thread, error)) (*subprocess, error) { // Detach the thread. t.detach() + t.initRegs = firstThread.initRegs // Return the thread. r <- t @@ -253,7 +255,7 @@ func (s *subprocess) newThread() *thread { return t } -// attach attachs to the thread. +// attach attaches to the thread. func (t *thread) attach() { if _, _, errno := syscall.RawSyscall(syscall.SYS_PTRACE, syscall.PTRACE_ATTACH, uintptr(t.tid), 0); errno != 0 { panic(fmt.Sprintf("unable to attach: %v", errno)) @@ -269,7 +271,9 @@ func (t *thread) attach() { // Initialize options. t.init() +} +func (t *thread) grabInitRegs() { // Grab registers. // // Note that we adjust the current register RIP value to be just before @@ -281,9 +285,9 @@ func (t *thread) attach() { t.initRegs.Rip -= initRegsRipAdjustment } -// detach detachs from the thread. +// detach detaches from the thread. // -// Because the SIGSTOP is not supressed, the thread will enter group-stop. +// Because the SIGSTOP is not suppressed, the thread will enter group-stop. func (t *thread) detach() { if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, syscall.PTRACE_DETACH, uintptr(t.tid), 0, uintptr(syscall.SIGSTOP), 0, 0); errno != 0 { panic(fmt.Sprintf("can't detach new clone: %v", errno)) @@ -370,13 +374,16 @@ func (t *thread) destroy() { // init initializes trace options. func (t *thread) init() { - // Set our TRACESYSGOOD option to differeniate real SIGTRAP. + // Set the TRACESYSGOOD option to differentiate real SIGTRAP. + // set PTRACE_O_EXITKILL to ensure that the unexpected exit of the + // sentry will immediately kill the associated stubs. + const PTRACE_O_EXITKILL = 0x100000 _, _, errno := syscall.RawSyscall6( syscall.SYS_PTRACE, syscall.PTRACE_SETOPTIONS, uintptr(t.tid), 0, - syscall.PTRACE_O_TRACESYSGOOD, + syscall.PTRACE_O_TRACESYSGOOD|syscall.PTRACE_O_TRACEEXIT|PTRACE_O_EXITKILL, 0, 0) if errno != 0 { panic(fmt.Sprintf("ptrace set options failed: %v", errno)) @@ -419,7 +426,7 @@ func (t *thread) syscall(regs *syscall.PtraceRegs) (uintptr, error) { // between syscall-enter-stop and syscall-exit-stop; it happens *after* // syscall-exit-stop.)" - ptrace(2), "Syscall-stops" if sig := t.wait(stopped); sig != (syscallEvent | syscall.SIGTRAP) { - panic(fmt.Sprintf("wait failed: expected SIGTRAP, got %v [%d]", sig, sig)) + t.dumpAndPanic(fmt.Sprintf("wait failed: expected SIGTRAP, got %v [%d]", sig, sig)) } // Grab registers. @@ -551,7 +558,7 @@ func (s *subprocess) switchToApp(c *context, ac arch.Context) bool { if c.signalInfo.Code > 0 { // The signal was generated by the kernel. We inspect // the signal information, and may patch it in order to - // faciliate vsyscall emulation. See patchSignalInfo. + // facilitate vsyscall emulation. See patchSignalInfo. patchSignalInfo(regs, &c.signalInfo) return false } else if c.signalInfo.Code <= 0 && c.signalInfo.Pid() == int32(os.Getpid()) { diff --git a/pkg/sentry/platform/ptrace/subprocess_amd64.go b/pkg/sentry/platform/ptrace/subprocess_amd64.go index fdd21c8f8..a70512913 100644 --- a/pkg/sentry/platform/ptrace/subprocess_amd64.go +++ b/pkg/sentry/platform/ptrace/subprocess_amd64.go @@ -21,7 +21,7 @@ import ( "strings" "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/arch" ) const ( diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go index 914be7486..87ded0bbd 100644 --- a/pkg/sentry/platform/ptrace/subprocess_linux.go +++ b/pkg/sentry/platform/ptrace/subprocess_linux.go @@ -20,11 +20,11 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/procid" - "gvisor.googlesource.com/gvisor/pkg/seccomp" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/procid" + "gvisor.dev/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/sentry/arch" ) const syscallEvent syscall.Signal = 0x80 @@ -235,6 +235,7 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro return nil, fmt.Errorf("wait failed: expected SIGSTOP, got %v", sig) } t.attach() + t.grabInitRegs() return t, nil } @@ -305,7 +306,7 @@ func (s *subprocess) createStub() (*thread, error) { arch.SyscallArgument{Value: 0}, arch.SyscallArgument{Value: 0}) if err != nil { - return nil, err + return nil, fmt.Errorf("creating stub process: %v", err) } // Wait for child to enter group-stop, so we don't stop its @@ -324,7 +325,7 @@ func (s *subprocess) createStub() (*thread, error) { arch.SyscallArgument{Value: 0}, arch.SyscallArgument{Value: 0}) if err != nil { - return nil, err + return nil, fmt.Errorf("waiting on stub process: %v", err) } childT := &thread{ diff --git a/pkg/sentry/platform/ptrace/subprocess_linux_amd64_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_linux_amd64_unsafe.go index 1bf7eab28..e977992f9 100644 --- a/pkg/sentry/platform/ptrace/subprocess_linux_amd64_unsafe.go +++ b/pkg/sentry/platform/ptrace/subprocess_linux_amd64_unsafe.go @@ -23,7 +23,7 @@ import ( "unsafe" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) // maskPool contains reusable CPU masks for setting affinity. Unfortunately, diff --git a/pkg/sentry/platform/ring0/BUILD b/pkg/sentry/platform/ring0/BUILD index ecb3e9a9c..8ed6c7652 100644 --- a/pkg/sentry/platform/ring0/BUILD +++ b/pkg/sentry/platform/ring0/BUILD @@ -43,7 +43,7 @@ go_library( "lib_amd64.s", "ring0.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0", + importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ring0", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/cpuid", diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/sentry/platform/ring0/defs.go index 5bbd4612d..076063f85 100644 --- a/pkg/sentry/platform/ring0/defs.go +++ b/pkg/sentry/platform/ring0/defs.go @@ -17,7 +17,7 @@ package ring0 import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) var ( diff --git a/pkg/sentry/platform/ring0/defs_amd64.go b/pkg/sentry/platform/ring0/defs_amd64.go index 413c3dbc4..7206322b1 100644 --- a/pkg/sentry/platform/ring0/defs_amd64.go +++ b/pkg/sentry/platform/ring0/defs_amd64.go @@ -17,7 +17,7 @@ package ring0 import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" ) // Segment indices and Selectors. diff --git a/pkg/sentry/platform/ring0/entry_amd64.s b/pkg/sentry/platform/ring0/entry_amd64.s index 8cb8c4996..02df38331 100644 --- a/pkg/sentry/platform/ring0/entry_amd64.s +++ b/pkg/sentry/platform/ring0/entry_amd64.s @@ -15,7 +15,7 @@ #include "funcdata.h" #include "textflag.h" -// NB: Offsets are programatically generated (see BUILD). +// NB: Offsets are programmatically generated (see BUILD). // // This file is concatenated with the definitions. diff --git a/pkg/sentry/platform/ring0/lib_amd64.go b/pkg/sentry/platform/ring0/lib_amd64.go index 9c5f26962..ca968a036 100644 --- a/pkg/sentry/platform/ring0/lib_amd64.go +++ b/pkg/sentry/platform/ring0/lib_amd64.go @@ -17,7 +17,7 @@ package ring0 import ( - "gvisor.googlesource.com/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/cpuid" ) // LoadFloatingPoint loads floating point state by the most efficient mechanism diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD index fe93d3030..3b95af617 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/sentry/platform/ring0/pagetables/BUILD @@ -84,7 +84,7 @@ go_library( "walker_map.go", "walker_unmap.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables", + importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables", visibility = [ "//pkg/sentry/platform/kvm:__subpackages__", "//pkg/sentry/platform/ring0:__subpackages__", diff --git a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go b/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go index 1b996b4e2..a90394a33 100644 --- a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go +++ b/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go @@ -17,7 +17,7 @@ package pagetables import ( "unsafe" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // newAlignedPTEs returns a set of aligned PTEs. diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go index e5dcaada7..904f1a6de 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables.go @@ -21,7 +21,7 @@ package pagetables import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // PageTables is a set of page tables. diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go index a1ec4b109..35e917526 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go @@ -19,7 +19,7 @@ package pagetables import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) func Test2MAnd4K(t *testing.T) { diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go index 36e424495..6e95ad2b9 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go @@ -17,7 +17,7 @@ package pagetables import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) type mapping struct { diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go index ff427fbe9..3e2383c5e 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go @@ -19,7 +19,7 @@ package pagetables import ( "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // archPageTables is architecture-specific data. diff --git a/pkg/sentry/platform/ring0/x86.go b/pkg/sentry/platform/ring0/x86.go index 7e5ceafdb..5f80d64e8 100644 --- a/pkg/sentry/platform/ring0/x86.go +++ b/pkg/sentry/platform/ring0/x86.go @@ -17,7 +17,7 @@ package ring0 import ( - "gvisor.googlesource.com/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/cpuid" ) // Useful bits. diff --git a/pkg/sentry/platform/safecopy/BUILD b/pkg/sentry/platform/safecopy/BUILD index d97a40297..924d8a6d6 100644 --- a/pkg/sentry/platform/safecopy/BUILD +++ b/pkg/sentry/platform/safecopy/BUILD @@ -16,7 +16,7 @@ go_library( "sighandler_amd64.s", "sighandler_arm64.s", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/safecopy", + importpath = "gvisor.dev/gvisor/pkg/sentry/platform/safecopy", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/syserror"], ) diff --git a/pkg/sentry/platform/safecopy/safecopy.go b/pkg/sentry/platform/safecopy/safecopy.go index 5126871eb..2fb7e5809 100644 --- a/pkg/sentry/platform/safecopy/safecopy.go +++ b/pkg/sentry/platform/safecopy/safecopy.go @@ -22,7 +22,7 @@ import ( "runtime" "syscall" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserror" ) // SegvError is returned when a safecopy function receives SIGSEGV. diff --git a/pkg/sentry/safemem/BUILD b/pkg/sentry/safemem/BUILD index 3ab453718..fd6dc8e6e 100644 --- a/pkg/sentry/safemem/BUILD +++ b/pkg/sentry/safemem/BUILD @@ -10,7 +10,7 @@ go_library( "safemem.go", "seq_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/safemem", + importpath = "gvisor.dev/gvisor/pkg/sentry/safemem", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/platform/safecopy", diff --git a/pkg/sentry/safemem/block_unsafe.go b/pkg/sentry/safemem/block_unsafe.go index 1f72deb61..6f03c94bf 100644 --- a/pkg/sentry/safemem/block_unsafe.go +++ b/pkg/sentry/safemem/block_unsafe.go @@ -19,7 +19,7 @@ import ( "reflect" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/safecopy" + "gvisor.dev/gvisor/pkg/sentry/platform/safecopy" ) // A Block is a range of contiguous bytes, similar to []byte but with the diff --git a/pkg/sentry/sighandling/BUILD b/pkg/sentry/sighandling/BUILD index cec3af92e..f561670c7 100644 --- a/pkg/sentry/sighandling/BUILD +++ b/pkg/sentry/sighandling/BUILD @@ -8,7 +8,7 @@ go_library( "sighandling.go", "sighandling_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/sighandling", + importpath = "gvisor.dev/gvisor/pkg/sentry/sighandling", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/abi/linux"], ) diff --git a/pkg/sentry/sighandling/sighandling.go b/pkg/sentry/sighandling/sighandling.go index 659b43363..2f65db70b 100644 --- a/pkg/sentry/sighandling/sighandling.go +++ b/pkg/sentry/sighandling/sighandling.go @@ -22,7 +22,7 @@ import ( "reflect" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) // numSignals is the number of normal (non-realtime) signals on Linux. diff --git a/pkg/sentry/sighandling/sighandling_unsafe.go b/pkg/sentry/sighandling/sighandling_unsafe.go index aca77888a..eace3766d 100644 --- a/pkg/sentry/sighandling/sighandling_unsafe.go +++ b/pkg/sentry/sighandling/sighandling_unsafe.go @@ -20,7 +20,7 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) // TODO(b/34161764): Move to pkg/abi/linux along with definitions in diff --git a/pkg/sentry/socket/BUILD b/pkg/sentry/socket/BUILD index 076f953e7..2b03ea87c 100644 --- a/pkg/sentry/socket/BUILD +++ b/pkg/sentry/socket/BUILD @@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library") go_library( name = "socket", srcs = ["socket.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket", + importpath = "gvisor.dev/gvisor/pkg/sentry/socket", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -14,7 +14,6 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/time", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usermem", diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD index 9f4763906..81dbd7309 100644 --- a/pkg/sentry/socket/control/BUILD +++ b/pkg/sentry/socket/control/BUILD @@ -5,9 +5,9 @@ load("//tools/go_stateify:defs.bzl", "go_library") go_library( name = "control", srcs = ["control.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/control", + importpath = "gvisor.dev/gvisor/pkg/sentry/socket/control", imports = [ - "gvisor.googlesource.com/gvisor/pkg/sentry/fs", + "gvisor.dev/gvisor/pkg/sentry/fs", ], visibility = ["//pkg/sentry:internal"], deps = [ @@ -17,7 +17,6 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usermem", "//pkg/syserror", diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index 434d7ca2e..4f4a20dfe 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -17,16 +17,15 @@ package control import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) const maxInt = int(^uint(0) >> 1) @@ -63,7 +62,7 @@ type RightsFiles []*fs.File func NewSCMRights(t *kernel.Task, fds []int32) (SCMRights, error) { files := make(RightsFiles, 0, len(fds)) for _, fd := range fds { - file, _ := t.FDMap().GetDescriptor(kdefs.FD(fd)) + file := t.GetFile(fd) if file == nil { files.Release() return nil, syserror.EBADF @@ -109,7 +108,9 @@ func rightsFDs(t *kernel.Task, rights SCMRights, cloexec bool, max int) ([]int32 files, trunc := rights.Files(t, max) fds := make([]int32, 0, len(files)) for i := 0; i < max && len(files) > 0; i++ { - fd, err := t.FDMap().NewFDFrom(0, files[0], kernel.FDFlags{cloexec}, t.ThreadGroup().Limits()) + fd, err := t.NewFDFrom(0, files[0], kernel.FDFlags{ + CloseOnExec: cloexec, + }) files[0].DecRef() files = files[1:] if err != nil { @@ -315,8 +316,7 @@ func PackTimestamp(t *kernel.Task, timestamp int64, buf []byte) []byte { // Parse parses a raw socket control message into portable objects. func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (transport.ControlMessages, error) { var ( - fds linux.ControlMessageRights - + fds linux.ControlMessageRights haveCreds bool creds linux.ControlMessageCredentials ) diff --git a/pkg/sentry/socket/epsocket/BUILD b/pkg/sentry/socket/epsocket/BUILD index 7e2679ea0..1f014f399 100644 --- a/pkg/sentry/socket/epsocket/BUILD +++ b/pkg/sentry/socket/epsocket/BUILD @@ -11,7 +11,7 @@ go_library( "save_restore.go", "stack.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket", + importpath = "gvisor.dev/gvisor/pkg/sentry/socket/epsocket", visibility = [ "//pkg/sentry:internal", ], @@ -28,7 +28,6 @@ go_library( "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/time", "//pkg/sentry/safemem", "//pkg/sentry/socket", diff --git a/pkg/sentry/socket/epsocket/device.go b/pkg/sentry/socket/epsocket/device.go index ab4083efe..85484d5b1 100644 --- a/pkg/sentry/socket/epsocket/device.go +++ b/pkg/sentry/socket/epsocket/device.go @@ -14,7 +14,7 @@ package epsocket -import "gvisor.googlesource.com/gvisor/pkg/sentry/device" +import "gvisor.dev/gvisor/pkg/sentry/device" // epsocketDevice is the endpoint socket virtual device. var epsocketDevice = device.NewAnonDevice() diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go index a50798cb3..9d1bcfd41 100644 --- a/pkg/sentry/socket/epsocket/epsocket.go +++ b/pkg/sentry/socket/epsocket/epsocket.go @@ -31,28 +31,27 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/metric" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/metric" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/unimpl" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/waiter" ) func mustCreateMetric(name, description string) *tcpip.StatCounter { @@ -262,7 +261,7 @@ func New(t *kernel.Task, family int, skType linux.SockType, protocol int, queue dirent := socket.NewDirent(t, epsocketDevice) defer dirent.DecRef() - return fs.NewFile(t, dirent, fs.FileFlags{Read: true, Write: true}, &SocketOperations{ + return fs.NewFile(t, dirent, fs.FileFlags{Read: true, Write: true, NonSeekable: true}, &SocketOperations{ Queue: queue, family: family, Endpoint: endpoint, @@ -286,14 +285,14 @@ func bytesToIPAddress(addr []byte) tcpip.Address { // GetAddress reads an sockaddr struct from the given address and converts it // to the FullAddress format. It supports AF_UNIX, AF_INET and AF_INET6 // addresses. -func GetAddress(sfamily int, addr []byte) (tcpip.FullAddress, *syserr.Error) { +func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syserr.Error) { // Make sure we have at least 2 bytes for the address family. if len(addr) < 2 { return tcpip.FullAddress{}, syserr.ErrInvalidArgument } family := usermem.ByteOrder.Uint16(addr) - if family != uint16(sfamily) { + if family != uint16(sfamily) && (!strict && family != linux.AF_UNSPEC) { return tcpip.FullAddress{}, syserr.ErrAddressFamilyNotSupported } @@ -318,7 +317,7 @@ func GetAddress(sfamily int, addr []byte) (tcpip.FullAddress, *syserr.Error) { case linux.AF_INET: var a linux.SockAddrInet if len(addr) < sockAddrInetSize { - return tcpip.FullAddress{}, syserr.ErrBadAddress + return tcpip.FullAddress{}, syserr.ErrInvalidArgument } binary.Unmarshal(addr[:sockAddrInetSize], usermem.ByteOrder, &a) @@ -331,7 +330,7 @@ func GetAddress(sfamily int, addr []byte) (tcpip.FullAddress, *syserr.Error) { case linux.AF_INET6: var a linux.SockAddrInet6 if len(addr) < sockAddrInet6Size { - return tcpip.FullAddress{}, syserr.ErrBadAddress + return tcpip.FullAddress{}, syserr.ErrInvalidArgument } binary.Unmarshal(addr[:sockAddrInet6Size], usermem.ByteOrder, &a) @@ -344,6 +343,9 @@ func GetAddress(sfamily int, addr []byte) (tcpip.FullAddress, *syserr.Error) { } return out, nil + case linux.AF_UNSPEC: + return tcpip.FullAddress{}, nil + default: return tcpip.FullAddress{}, syserr.ErrAddressFamilyNotSupported } @@ -466,7 +468,7 @@ func (s *SocketOperations) Readiness(mask waiter.EventMask) waiter.EventMask { // Connect implements the linux syscall connect(2) for sockets backed by // tpcip.Endpoint. func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error { - addr, err := GetAddress(s.family, sockaddr) + addr, err := GetAddress(s.family, sockaddr, false /* strict */) if err != nil { return err } @@ -499,7 +501,7 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo // Bind implements the linux syscall bind(2) for sockets backed by // tcpip.Endpoint. func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { - addr, err := GetAddress(s.family, sockaddr) + addr, err := GetAddress(s.family, sockaddr, true /* strict */) if err != nil { return err } @@ -537,7 +539,7 @@ func (s *SocketOperations) blockingAccept(t *kernel.Task) (tcpip.Endpoint, *wait // Accept implements the linux syscall accept(2) for sockets backed by // tcpip.Endpoint. -func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (kdefs.FD, interface{}, uint32, *syserr.Error) { +func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, interface{}, uint32, *syserr.Error) { // Issue the accept request to get the new endpoint. ep, wq, terr := s.Endpoint.Accept() if terr != nil { @@ -575,10 +577,9 @@ func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, } } - fdFlags := kernel.FDFlags{ + fd, e := t.NewFDFrom(0, ns, kernel.FDFlags{ CloseOnExec: flags&linux.SOCK_CLOEXEC != 0, - } - fd, e := t.FDMap().NewFDFrom(0, ns, fdFlags, t.ThreadGroup().Limits()) + }) t.Kernel().RecordSocket(ns) @@ -668,12 +669,6 @@ func GetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int, func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int, skType linux.SockType, name, outLen int) (interface{}, *syserr.Error) { // TODO(b/124056281): Stop rejecting short optLen values in getsockopt. switch name { - case linux.SO_TYPE: - if outLen < sizeOfInt32 { - return nil, syserr.ErrInvalidArgument - } - return int32(skType), nil - case linux.SO_ERROR: if outLen < sizeOfInt32 { return nil, syserr.ErrInvalidArgument @@ -1725,6 +1720,7 @@ func (s *SocketOperations) coalescingRead(ctx context.Context, dst usermem.IOSeq // If we managed to copy something, we must deliver it. if copied > 0 { + s.Endpoint.ModerateRecvBuf(copied) return copied, nil } @@ -1929,7 +1925,7 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to [] var addr *tcpip.FullAddress if len(to) > 0 { - addrBuf, err := GetAddress(s.family, to) + addrBuf, err := GetAddress(s.family, to, true /* strict */) if err != nil { return 0, err } @@ -1998,7 +1994,7 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to [] } // Ioctl implements fs.FileOperations.Ioctl. -func (s *SocketOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { // SIOCGSTAMP is implemented by epsocket rather than all commonEndpoint // sockets. // TODO(b/78348848): Add a commonEndpoint method to support SIOCGSTAMP. diff --git a/pkg/sentry/socket/epsocket/provider.go b/pkg/sentry/socket/epsocket/provider.go index 516582828..6d2b5d038 100644 --- a/pkg/sentry/socket/epsocket/provider.go +++ b/pkg/sentry/socket/epsocket/provider.go @@ -17,20 +17,20 @@ package epsocket import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/waiter" ) // provider is an inet socket provider. @@ -111,7 +111,7 @@ func (p *provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (* return nil, syserr.TranslateNetstackError(e) } - return New(t, p.family, stype, protocol, wq, ep) + return New(t, p.family, stype, int(transProto), wq, ep) } // Pair just returns nil sockets (not supported). diff --git a/pkg/sentry/socket/epsocket/save_restore.go b/pkg/sentry/socket/epsocket/save_restore.go index feaafb7cc..f7b8c10cc 100644 --- a/pkg/sentry/socket/epsocket/save_restore.go +++ b/pkg/sentry/socket/epsocket/save_restore.go @@ -15,7 +15,7 @@ package epsocket import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // afterLoad is invoked by stateify. diff --git a/pkg/sentry/socket/epsocket/stack.go b/pkg/sentry/socket/epsocket/stack.go index edefa225b..1627a4f68 100644 --- a/pkg/sentry/socket/epsocket/stack.go +++ b/pkg/sentry/socket/epsocket/stack.go @@ -15,14 +15,14 @@ package epsocket import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" ) // Stack implements inet.Stack for netstack/tcpip/stack.Stack. diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD index 975f47bc3..a951f1bb0 100644 --- a/pkg/sentry/socket/hostinet/BUILD +++ b/pkg/sentry/socket/hostinet/BUILD @@ -12,7 +12,7 @@ go_library( "socket_unsafe.go", "stack.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet", + importpath = "gvisor.dev/gvisor/pkg/sentry/socket/hostinet", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -26,7 +26,6 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/inet", "//pkg/sentry/kernel", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/time", "//pkg/sentry/safemem", "//pkg/sentry/socket", diff --git a/pkg/sentry/socket/hostinet/device.go b/pkg/sentry/socket/hostinet/device.go index 4267e3691..27049d65f 100644 --- a/pkg/sentry/socket/hostinet/device.go +++ b/pkg/sentry/socket/hostinet/device.go @@ -14,6 +14,6 @@ package hostinet -import "gvisor.googlesource.com/gvisor/pkg/sentry/device" +import "gvisor.dev/gvisor/pkg/sentry/device" var socketDevice = device.NewAnonDevice() diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index c62c8d8f1..7f69406b7 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -18,22 +18,21 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/fdnotifier" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -77,7 +76,7 @@ func newSocketFile(ctx context.Context, family int, stype linux.SockType, protoc } dirent := socket.NewDirent(ctx, socketDevice) defer dirent.DecRef() - return fs.NewFile(ctx, dirent, fs.FileFlags{NonBlocking: nonblock, Read: true, Write: true}, s), nil + return fs.NewFile(ctx, dirent, fs.FileFlags{NonBlocking: nonblock, Read: true, Write: true, NonSeekable: true}, s), nil } // Release implements fs.FileOperations.Release. @@ -190,7 +189,7 @@ func (s *socketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo } // Accept implements socket.Socket.Accept. -func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (kdefs.FD, interface{}, uint32, *syserr.Error) { +func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, interface{}, uint32, *syserr.Error) { var peerAddr []byte var peerAddrlen uint32 var peerAddrPtr *byte @@ -236,11 +235,11 @@ func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, } defer f.DecRef() - fdFlags := kernel.FDFlags{ + kfd, kerr := t.NewFDFrom(0, f, kernel.FDFlags{ CloseOnExec: flags&syscall.SOCK_CLOEXEC != 0, - } - kfd, kerr := t.FDMap().NewFDFrom(0, f, fdFlags, t.ThreadGroup().Limits()) + }) t.Kernel().RecordSocket(f) + return kfd, peerAddr, peerAddrlen, syserr.FromError(kerr) } @@ -288,7 +287,7 @@ func (s *socketOperations) GetSockOpt(t *kernel.Task, level int, name int, outLe } case syscall.SOL_SOCKET: switch name { - case syscall.SO_ERROR, syscall.SO_KEEPALIVE, syscall.SO_SNDBUF, syscall.SO_RCVBUF, syscall.SO_REUSEADDR, syscall.SO_TYPE: + case syscall.SO_ERROR, syscall.SO_KEEPALIVE, syscall.SO_SNDBUF, syscall.SO_RCVBUF, syscall.SO_REUSEADDR: optlen = sizeofInt32 case syscall.SO_LINGER: optlen = syscall.SizeofLinger diff --git a/pkg/sentry/socket/hostinet/socket_unsafe.go b/pkg/sentry/socket/hostinet/socket_unsafe.go index eed0c7837..6c69ba9c7 100644 --- a/pkg/sentry/socket/hostinet/socket_unsafe.go +++ b/pkg/sentry/socket/hostinet/socket_unsafe.go @@ -18,12 +18,13 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" ) func firstBytePtr(bs []byte) unsafe.Pointer { @@ -52,7 +53,7 @@ func writev(fd int, srcs []syscall.Iovec) (uint64, error) { } // Ioctl implements fs.FileOperations.Ioctl. -func (s *socketOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (s *socketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { switch cmd := uintptr(args[1].Int()); cmd { case syscall.TIOCINQ, syscall.TIOCOUTQ: var val int32 diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go index 9c45991ba..11f94281c 100644 --- a/pkg/sentry/socket/hostinet/stack.go +++ b/pkg/sentry/socket/hostinet/stack.go @@ -21,12 +21,12 @@ import ( "strings" "syscall" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) var defaultRecvBufSize = inet.TCPBufferSize{ diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 148306329..45ebb2a0e 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -9,7 +9,7 @@ go_library( "provider.go", "socket.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink", + importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -20,7 +20,6 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/time", "//pkg/sentry/socket", "//pkg/sentry/socket/netlink/port", diff --git a/pkg/sentry/socket/netlink/message.go b/pkg/sentry/socket/netlink/message.go index 5bd3b49ce..ce0a1afd0 100644 --- a/pkg/sentry/socket/netlink/message.go +++ b/pkg/sentry/socket/netlink/message.go @@ -18,9 +18,9 @@ import ( "fmt" "math" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // alignUp rounds a length up to an alignment. diff --git a/pkg/sentry/socket/netlink/port/BUILD b/pkg/sentry/socket/netlink/port/BUILD index a7370a4ec..9e2e12799 100644 --- a/pkg/sentry/socket/netlink/port/BUILD +++ b/pkg/sentry/socket/netlink/port/BUILD @@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library", "go_test") go_library( name = "port", srcs = ["port.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/port", + importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port", visibility = ["//pkg/sentry:internal"], ) diff --git a/pkg/sentry/socket/netlink/provider.go b/pkg/sentry/socket/netlink/provider.go index 5dc103877..689cad997 100644 --- a/pkg/sentry/socket/netlink/provider.go +++ b/pkg/sentry/socket/netlink/provider.go @@ -17,12 +17,12 @@ package netlink import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/syserr" ) // Protocol is the implementation of a netlink socket protocol. @@ -89,7 +89,7 @@ func (*socketProvider) Socket(t *kernel.Task, stype linux.SockType, protocol int d := socket.NewDirent(t, netlinkSocketDevice) defer d.DecRef() - return fs.NewFile(t, d, fs.FileFlags{Read: true, Write: true}, s), nil + return fs.NewFile(t, d, fs.FileFlags{Read: true, Write: true, NonSeekable: true}, s), nil } // Pair implements socket.Provider.Pair by returning an error. diff --git a/pkg/sentry/socket/netlink/route/BUILD b/pkg/sentry/socket/netlink/route/BUILD index be0419679..5dc8533ec 100644 --- a/pkg/sentry/socket/netlink/route/BUILD +++ b/pkg/sentry/socket/netlink/route/BUILD @@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library") go_library( name = "route", srcs = ["protocol.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/route", + importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/socket/netlink/route/protocol.go b/pkg/sentry/socket/netlink/route/protocol.go index 9f0a81403..fb1ff329c 100644 --- a/pkg/sentry/socket/netlink/route/protocol.go +++ b/pkg/sentry/socket/netlink/route/protocol.go @@ -18,13 +18,13 @@ package route import ( "bytes" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink" - "gvisor.googlesource.com/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/socket/netlink" + "gvisor.dev/gvisor/pkg/syserr" ) // commandKind describes the operational class of a message type. diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index 62659784a..f3d6c1e9b 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -19,25 +19,24 @@ import ( "math" "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/port" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port" + "gvisor.dev/gvisor/pkg/sentry/socket/unix" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/waiter" ) const sizeOfInt32 int = 4 @@ -111,7 +110,7 @@ var _ socket.Socket = (*Socket)(nil) // NewSocket creates a new Socket. func NewSocket(t *kernel.Task, skType linux.SockType, protocol Protocol) (*Socket, *syserr.Error) { // Datagram endpoint used to buffer kernel -> user messages. - ep := transport.NewConnectionless() + ep := transport.NewConnectionless(t) // Bind the endpoint for good measure so we can connect to it. The // bound address will never be exposed. @@ -121,7 +120,7 @@ func NewSocket(t *kernel.Task, skType linux.SockType, protocol Protocol) (*Socke } // Create a connection from which the kernel can write messages. - connection, err := ep.(transport.BoundEndpoint).UnidirectionalConnect() + connection, err := ep.(transport.BoundEndpoint).UnidirectionalConnect(t) if err != nil { ep.Close() return nil, err @@ -173,7 +172,7 @@ func (s *Socket) EventUnregister(e *waiter.Entry) { } // Ioctl implements fs.FileOperations.Ioctl. -func (s *Socket) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (*Socket) Ioctl(context.Context, *fs.File, usermem.IO, arch.SyscallArguments) (uintptr, error) { // TODO(b/68878065): no ioctls supported. return 0, syserror.ENOTTY } @@ -272,7 +271,7 @@ func (s *Socket) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr } // Accept implements socket.Socket.Accept. -func (s *Socket) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (kdefs.FD, interface{}, uint32, *syserr.Error) { +func (s *Socket) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, interface{}, uint32, *syserr.Error) { // Netlink sockets never support accept. return 0, nil, 0, syserr.ErrNotSupported } diff --git a/pkg/sentry/socket/rpcinet/BUILD b/pkg/sentry/socket/rpcinet/BUILD index 33ba20de7..5061dcbde 100644 --- a/pkg/sentry/socket/rpcinet/BUILD +++ b/pkg/sentry/socket/rpcinet/BUILD @@ -12,7 +12,7 @@ go_library( "stack.go", "stack_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet", + importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet", visibility = ["//pkg/sentry:internal"], deps = [ ":syscall_rpc_go_proto", @@ -25,7 +25,6 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/inet", "//pkg/sentry/kernel", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/time", "//pkg/sentry/socket", "//pkg/sentry/socket/hostinet", @@ -52,7 +51,7 @@ proto_library( go_proto_library( name = "syscall_rpc_go_proto", - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto", + importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto", proto = ":syscall_rpc_proto", visibility = [ "//visibility:public", diff --git a/pkg/sentry/socket/rpcinet/conn/BUILD b/pkg/sentry/socket/rpcinet/conn/BUILD index 4336ae9b4..23eadcb1b 100644 --- a/pkg/sentry/socket/rpcinet/conn/BUILD +++ b/pkg/sentry/socket/rpcinet/conn/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "conn", srcs = ["conn.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/conn", + importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/binary", diff --git a/pkg/sentry/socket/rpcinet/conn/conn.go b/pkg/sentry/socket/rpcinet/conn/conn.go index f537c7f63..356adad99 100644 --- a/pkg/sentry/socket/rpcinet/conn/conn.go +++ b/pkg/sentry/socket/rpcinet/conn/conn.go @@ -22,11 +22,11 @@ import ( "syscall" "github.com/golang/protobuf/proto" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/unet" - pb "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" + pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" ) type request struct { diff --git a/pkg/sentry/socket/rpcinet/device.go b/pkg/sentry/socket/rpcinet/device.go index 44c0a39b7..8cfd5f6e5 100644 --- a/pkg/sentry/socket/rpcinet/device.go +++ b/pkg/sentry/socket/rpcinet/device.go @@ -14,6 +14,6 @@ package rpcinet -import "gvisor.googlesource.com/gvisor/pkg/sentry/device" +import "gvisor.dev/gvisor/pkg/sentry/device" var socketDevice = device.NewAnonDevice() diff --git a/pkg/sentry/socket/rpcinet/notifier/BUILD b/pkg/sentry/socket/rpcinet/notifier/BUILD index b0b107ddb..a536f2e44 100644 --- a/pkg/sentry/socket/rpcinet/notifier/BUILD +++ b/pkg/sentry/socket/rpcinet/notifier/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "notifier", srcs = ["notifier.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/notifier", + importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto", diff --git a/pkg/sentry/socket/rpcinet/notifier/notifier.go b/pkg/sentry/socket/rpcinet/notifier/notifier.go index 601e05994..aa157dd51 100644 --- a/pkg/sentry/socket/rpcinet/notifier/notifier.go +++ b/pkg/sentry/socket/rpcinet/notifier/notifier.go @@ -20,9 +20,9 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/conn" - pb "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn" + pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" + "gvisor.dev/gvisor/pkg/waiter" ) type fdInfo struct { diff --git a/pkg/sentry/socket/rpcinet/socket.go b/pkg/sentry/socket/rpcinet/socket.go index c22ff1ff0..ccaaddbfc 100644 --- a/pkg/sentry/socket/rpcinet/socket.go +++ b/pkg/sentry/socket/rpcinet/socket.go @@ -19,26 +19,25 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/conn" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/notifier" - pb "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" - "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn" + "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier" + pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" + "gvisor.dev/gvisor/pkg/sentry/unimpl" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/waiter" ) // socketOperations implements fs.FileOperations and socket.Socket for a socket @@ -286,7 +285,7 @@ func rpcAccept(t *kernel.Task, fd uint32, peer bool) (*pb.AcceptResponse_ResultP } // Accept implements socket.Socket.Accept. -func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (kdefs.FD, interface{}, uint32, *syserr.Error) { +func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, interface{}, uint32, *syserr.Error) { payload, se := rpcAccept(t, s.fd, peerRequested) // Check if we need to block. @@ -322,7 +321,13 @@ func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, dirent := socket.NewDirent(t, socketDevice) defer dirent.DecRef() - file := fs.NewFile(t, dirent, fs.FileFlags{Read: true, Write: true, NonBlocking: flags&linux.SOCK_NONBLOCK != 0}, &socketOperations{ + fileFlags := fs.FileFlags{ + Read: true, + Write: true, + NonSeekable: true, + NonBlocking: flags&linux.SOCK_NONBLOCK != 0, + } + file := fs.NewFile(t, dirent, fileFlags, &socketOperations{ wq: &wq, fd: payload.Fd, rpcConn: s.rpcConn, @@ -330,10 +335,9 @@ func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, }) defer file.DecRef() - fdFlags := kernel.FDFlags{ + fd, err := t.NewFDFrom(0, file, kernel.FDFlags{ CloseOnExec: flags&linux.SOCK_CLOEXEC != 0, - } - fd, err := t.FDMap().NewFDFrom(0, file, fdFlags, t.ThreadGroup().Limits()) + }) if err != nil { return 0, nil, 0, syserr.FromError(err) } @@ -558,7 +562,7 @@ func ifconfIoctlFromStack(ctx context.Context, io usermem.IO, ifc *linux.IFConf) } // Ioctl implements fs.FileOperations.Ioctl. -func (s *socketOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (s *socketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { t := ctx.(*kernel.Task) cmd := uint32(args[1].Int()) diff --git a/pkg/sentry/socket/rpcinet/stack.go b/pkg/sentry/socket/rpcinet/stack.go index a1be711df..3038f25a7 100644 --- a/pkg/sentry/socket/rpcinet/stack.go +++ b/pkg/sentry/socket/rpcinet/stack.go @@ -18,12 +18,12 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/conn" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/notifier" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/socket/hostinet" + "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn" + "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/unet" ) // Stack implements inet.Stack for RPC backed sockets. diff --git a/pkg/sentry/socket/rpcinet/stack_unsafe.go b/pkg/sentry/socket/rpcinet/stack_unsafe.go index e53f578ba..a94bdad83 100644 --- a/pkg/sentry/socket/rpcinet/stack_unsafe.go +++ b/pkg/sentry/socket/rpcinet/stack_unsafe.go @@ -18,11 +18,11 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - pb "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserr" ) // NewNetlinkRouteRequest builds a netlink message for getting the RIB, diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go index d60944b6b..0efa58a58 100644 --- a/pkg/sentry/socket/socket.go +++ b/pkg/sentry/socket/socket.go @@ -21,18 +21,17 @@ import ( "fmt" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/device" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/device" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/tcpip" ) // ControlMessages represents the union of unix control messages and tcpip @@ -53,7 +52,7 @@ type Socket interface { // Accept implements the accept4(2) linux syscall. // Returns fd, real peer address length and error. Real peer address // length is only set if len(peer) > 0. - Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (kdefs.FD, interface{}, uint32, *syserr.Error) + Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, interface{}, uint32, *syserr.Error) // Bind implements the bind(2) linux syscall. Bind(t *kernel.Task, sockaddr []byte) *syserr.Error @@ -199,7 +198,7 @@ func NewDirent(ctx context.Context, d *device.Device) *fs.Dirent { User: fs.PermMask{Read: true, Write: true}, }, linux.SOCKFS_MAGIC), } - inode := fs.NewInode(iops, fs.NewPseudoMountSource(), fs.StableAttr{ + inode := fs.NewInode(ctx, iops, fs.NewPseudoMountSource(ctx), fs.StableAttr{ Type: fs.Socket, DeviceID: d.DeviceID(), InodeID: ino, @@ -207,7 +206,7 @@ func NewDirent(ctx context.Context, d *device.Device) *fs.Dirent { }) // Dirent name matches net/socket.c:sockfs_dname. - return fs.NewDirent(inode, fmt.Sprintf("socket:[%d]", ino)) + return fs.NewDirent(ctx, inode, fmt.Sprintf("socket:[%d]", ino)) } // SendReceiveTimeout stores timeouts for send and receive calls. diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index fe6871cc6..da9977fde 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -9,7 +9,7 @@ go_library( "io.go", "unix.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix", + importpath = "gvisor.dev/gvisor/pkg/sentry/socket/unix", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -20,7 +20,6 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/time", "//pkg/sentry/safemem", "//pkg/sentry/socket", diff --git a/pkg/sentry/socket/unix/device.go b/pkg/sentry/socket/unix/device.go index 734d39ee6..db01ac4c9 100644 --- a/pkg/sentry/socket/unix/device.go +++ b/pkg/sentry/socket/unix/device.go @@ -14,7 +14,7 @@ package unix -import "gvisor.googlesource.com/gvisor/pkg/sentry/device" +import "gvisor.dev/gvisor/pkg/sentry/device" // unixSocketDevice is the unix socket virtual device. var unixSocketDevice = device.NewAnonDevice() diff --git a/pkg/sentry/socket/unix/io.go b/pkg/sentry/socket/unix/io.go index 5a1475ec2..760c7beab 100644 --- a/pkg/sentry/socket/unix/io.go +++ b/pkg/sentry/socket/unix/io.go @@ -15,15 +15,18 @@ package unix import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/tcpip" ) // EndpointWriter implements safemem.Writer that writes to a transport.Endpoint. // // EndpointWriter is not thread-safe. type EndpointWriter struct { + Ctx context.Context + // Endpoint is the transport.Endpoint to write to. Endpoint transport.Endpoint @@ -37,7 +40,7 @@ type EndpointWriter struct { // WriteFromBlocks implements safemem.Writer.WriteFromBlocks. func (w *EndpointWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { return safemem.FromVecWriterFunc{func(bufs [][]byte) (int64, error) { - n, err := w.Endpoint.SendMsg(bufs, w.Control, w.To) + n, err := w.Endpoint.SendMsg(w.Ctx, bufs, w.Control, w.To) if err != nil { return int64(n), err.ToError() } @@ -50,6 +53,8 @@ func (w *EndpointWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) // // EndpointReader is not thread-safe. type EndpointReader struct { + Ctx context.Context + // Endpoint is the transport.Endpoint to read from. Endpoint transport.Endpoint @@ -81,7 +86,7 @@ type EndpointReader struct { // ReadToBlocks implements safemem.Reader.ReadToBlocks. func (r *EndpointReader) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { return safemem.FromVecReaderFunc{func(bufs [][]byte) (int64, error) { - n, ms, c, ct, err := r.Endpoint.RecvMsg(bufs, r.Creds, r.NumRights, r.Peek, r.From) + n, ms, c, ct, err := r.Endpoint.RecvMsg(r.Ctx, bufs, r.Creds, r.NumRights, r.Peek, r.From) r.Control = c r.ControlTrunc = ct r.MsgSize = ms diff --git a/pkg/sentry/socket/unix/transport/BUILD b/pkg/sentry/socket/unix/transport/BUILD index 52f324eed..0b0240336 100644 --- a/pkg/sentry/socket/unix/transport/BUILD +++ b/pkg/sentry/socket/unix/transport/BUILD @@ -25,12 +25,13 @@ go_library( "transport_message_list.go", "unix.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport", + importpath = "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", "//pkg/ilist", "//pkg/refs", + "//pkg/sentry/context", "//pkg/syserr", "//pkg/tcpip", "//pkg/tcpip/buffer", diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go index db79ac904..73d2df15d 100644 --- a/pkg/sentry/socket/unix/transport/connectioned.go +++ b/pkg/sentry/socket/unix/transport/connectioned.go @@ -17,10 +17,11 @@ package transport import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/waiter" ) // UniqueIDProvider generates a sequence of unique identifiers useful for, @@ -111,8 +112,13 @@ type connectionedEndpoint struct { acceptedChan chan *connectionedEndpoint `state:".([]*connectionedEndpoint)"` } +var ( + _ = BoundEndpoint((*connectionedEndpoint)(nil)) + _ = Endpoint((*connectionedEndpoint)(nil)) +) + // NewConnectioned creates a new unbound connectionedEndpoint. -func NewConnectioned(stype linux.SockType, uid UniqueIDProvider) Endpoint { +func NewConnectioned(ctx context.Context, stype linux.SockType, uid UniqueIDProvider) Endpoint { return &connectionedEndpoint{ baseEndpoint: baseEndpoint{Queue: &waiter.Queue{}}, id: uid.UniqueID(), @@ -122,7 +128,7 @@ func NewConnectioned(stype linux.SockType, uid UniqueIDProvider) Endpoint { } // NewPair allocates a new pair of connected unix-domain connectionedEndpoints. -func NewPair(stype linux.SockType, uid UniqueIDProvider) (Endpoint, Endpoint) { +func NewPair(ctx context.Context, stype linux.SockType, uid UniqueIDProvider) (Endpoint, Endpoint) { a := &connectionedEndpoint{ baseEndpoint: baseEndpoint{Queue: &waiter.Queue{}}, id: uid.UniqueID(), @@ -137,7 +143,9 @@ func NewPair(stype linux.SockType, uid UniqueIDProvider) (Endpoint, Endpoint) { } q1 := &queue{ReaderQueue: a.Queue, WriterQueue: b.Queue, limit: initialLimit} + q1.EnableLeakCheck("transport.queue") q2 := &queue{ReaderQueue: b.Queue, WriterQueue: a.Queue, limit: initialLimit} + q2.EnableLeakCheck("transport.queue") if stype == linux.SOCK_STREAM { a.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{q1}} @@ -163,7 +171,7 @@ func NewPair(stype linux.SockType, uid UniqueIDProvider) (Endpoint, Endpoint) { // NewExternal creates a new externally backed Endpoint. It behaves like a // socketpair. -func NewExternal(stype linux.SockType, uid UniqueIDProvider, queue *waiter.Queue, receiver Receiver, connected ConnectedEndpoint) Endpoint { +func NewExternal(ctx context.Context, stype linux.SockType, uid UniqueIDProvider, queue *waiter.Queue, receiver Receiver, connected ConnectedEndpoint) Endpoint { return &connectionedEndpoint{ baseEndpoint: baseEndpoint{Queue: queue, receiver: receiver, connected: connected}, id: uid.UniqueID(), @@ -238,7 +246,7 @@ func (e *connectionedEndpoint) Close() { } // BidirectionalConnect implements BoundEndpoint.BidirectionalConnect. -func (e *connectionedEndpoint) BidirectionalConnect(ce ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error { +func (e *connectionedEndpoint) BidirectionalConnect(ctx context.Context, ce ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error { if ce.Type() != e.stype { return syserr.ErrConnectionRefused } @@ -288,12 +296,14 @@ func (e *connectionedEndpoint) BidirectionalConnect(ce ConnectingEndpoint, retur } readQueue := &queue{ReaderQueue: ce.WaiterQueue(), WriterQueue: ne.Queue, limit: initialLimit} + readQueue.EnableLeakCheck("transport.queue") ne.connected = &connectedEndpoint{ endpoint: ce, writeQueue: readQueue, } writeQueue := &queue{ReaderQueue: ne.Queue, WriterQueue: ce.WaiterQueue(), limit: initialLimit} + writeQueue.EnableLeakCheck("transport.queue") if e.stype == linux.SOCK_STREAM { ne.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{readQueue: writeQueue}} } else { @@ -334,19 +344,19 @@ func (e *connectionedEndpoint) BidirectionalConnect(ce ConnectingEndpoint, retur } // UnidirectionalConnect implements BoundEndpoint.UnidirectionalConnect. -func (e *connectionedEndpoint) UnidirectionalConnect() (ConnectedEndpoint, *syserr.Error) { +func (e *connectionedEndpoint) UnidirectionalConnect(ctx context.Context) (ConnectedEndpoint, *syserr.Error) { return nil, syserr.ErrConnectionRefused } // Connect attempts to directly connect to another Endpoint. // Implements Endpoint.Connect. -func (e *connectionedEndpoint) Connect(server BoundEndpoint) *syserr.Error { +func (e *connectionedEndpoint) Connect(ctx context.Context, server BoundEndpoint) *syserr.Error { returnConnect := func(r Receiver, ce ConnectedEndpoint) { e.receiver = r e.connected = ce } - return server.BidirectionalConnect(e, returnConnect) + return server.BidirectionalConnect(ctx, e, returnConnect) } // Listen starts listening on the connection. @@ -426,13 +436,13 @@ func (e *connectionedEndpoint) Bind(addr tcpip.FullAddress, commit func() *syser // SendMsg writes data and a control message to the endpoint's peer. // This method does not block if the data cannot be written. -func (e *connectionedEndpoint) SendMsg(data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) { +func (e *connectionedEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) { // Stream sockets do not support specifying the endpoint. Seqpacket // sockets ignore the passed endpoint. if e.stype == linux.SOCK_STREAM && to != nil { return 0, syserr.ErrNotSupported } - return e.baseEndpoint.SendMsg(data, c, to) + return e.baseEndpoint.SendMsg(ctx, data, c, to) } // Readiness returns the current readiness of the connectionedEndpoint. For diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go index 81ebfba10..c7f7c5b16 100644 --- a/pkg/sentry/socket/unix/transport/connectionless.go +++ b/pkg/sentry/socket/unix/transport/connectionless.go @@ -15,14 +15,15 @@ package transport import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/waiter" ) // connectionlessEndpoint is a unix endpoint for unix sockets that support operating in -// a conectionless fashon. +// a connectionless fashon. // // Specifically, this means datagram unix sockets not created with // socketpair(2). @@ -32,10 +33,17 @@ type connectionlessEndpoint struct { baseEndpoint } +var ( + _ = BoundEndpoint((*connectionlessEndpoint)(nil)) + _ = Endpoint((*connectionlessEndpoint)(nil)) +) + // NewConnectionless creates a new unbound dgram endpoint. -func NewConnectionless() Endpoint { +func NewConnectionless(ctx context.Context) Endpoint { ep := &connectionlessEndpoint{baseEndpoint{Queue: &waiter.Queue{}}} - ep.receiver = &queueReceiver{readQueue: &queue{ReaderQueue: ep.Queue, WriterQueue: &waiter.Queue{}, limit: initialLimit}} + q := queue{ReaderQueue: ep.Queue, WriterQueue: &waiter.Queue{}, limit: initialLimit} + q.EnableLeakCheck("transport.queue") + ep.receiver = &queueReceiver{readQueue: &q} return ep } @@ -46,38 +54,33 @@ func (e *connectionlessEndpoint) isBound() bool { // Close puts the endpoint in a closed state and frees all resources associated // with it. -// -// The socket will be a fresh state after a call to close and may be reused. -// That is, close may be used to "unbind" or "disconnect" the socket in error -// paths. func (e *connectionlessEndpoint) Close() { e.Lock() - var r Receiver - if e.Connected() { - e.receiver.CloseRecv() - r = e.receiver - e.receiver = nil - + if e.connected != nil { e.connected.Release() e.connected = nil } + if e.isBound() { e.path = "" } + + e.receiver.CloseRecv() + r := e.receiver + e.receiver = nil e.Unlock() - if r != nil { - r.CloseNotify() - r.Release() - } + + r.CloseNotify() + r.Release() } // BidirectionalConnect implements BoundEndpoint.BidirectionalConnect. -func (e *connectionlessEndpoint) BidirectionalConnect(ce ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error { +func (e *connectionlessEndpoint) BidirectionalConnect(ctx context.Context, ce ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error { return syserr.ErrConnectionRefused } // UnidirectionalConnect implements BoundEndpoint.UnidirectionalConnect. -func (e *connectionlessEndpoint) UnidirectionalConnect() (ConnectedEndpoint, *syserr.Error) { +func (e *connectionlessEndpoint) UnidirectionalConnect(ctx context.Context) (ConnectedEndpoint, *syserr.Error) { e.Lock() r := e.receiver e.Unlock() @@ -96,12 +99,12 @@ func (e *connectionlessEndpoint) UnidirectionalConnect() (ConnectedEndpoint, *sy // SendMsg writes data and a control message to the specified endpoint. // This method does not block if the data cannot be written. -func (e *connectionlessEndpoint) SendMsg(data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) { +func (e *connectionlessEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) { if to == nil { - return e.baseEndpoint.SendMsg(data, c, nil) + return e.baseEndpoint.SendMsg(ctx, data, c, nil) } - connected, err := to.UnidirectionalConnect() + connected, err := to.UnidirectionalConnect(ctx) if err != nil { return 0, syserr.ErrInvalidEndpointState } @@ -124,13 +127,16 @@ func (e *connectionlessEndpoint) Type() linux.SockType { } // Connect attempts to connect directly to server. -func (e *connectionlessEndpoint) Connect(server BoundEndpoint) *syserr.Error { - connected, err := server.UnidirectionalConnect() +func (e *connectionlessEndpoint) Connect(ctx context.Context, server BoundEndpoint) *syserr.Error { + connected, err := server.UnidirectionalConnect(ctx) if err != nil { return err } e.Lock() + if e.connected != nil { + e.connected.Release() + } e.connected = connected e.Unlock() diff --git a/pkg/sentry/socket/unix/transport/queue.go b/pkg/sentry/socket/unix/transport/queue.go index b650caae7..0415fae9a 100644 --- a/pkg/sentry/socket/unix/transport/queue.go +++ b/pkg/sentry/socket/unix/transport/queue.go @@ -17,9 +17,9 @@ package transport import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/waiter" ) // queue is a buffer queue. @@ -100,7 +100,7 @@ func (q *queue) IsWritable() bool { // Enqueue adds an entry to the data queue if room is available. // -// If truncate is true, Enqueue may truncate the message beforing enqueuing it. +// If truncate is true, Enqueue may truncate the message before enqueuing it. // Otherwise, the entire message must fit. If n < e.Length(), err indicates why. // // If notify is true, ReaderQueue.Notify must be called: diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go index 5c55c529e..b0765ba55 100644 --- a/pkg/sentry/socket/unix/transport/unix.go +++ b/pkg/sentry/socket/unix/transport/unix.go @@ -19,11 +19,12 @@ import ( "sync" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/waiter" ) // initialLimit is the starting limit for the socket buffers. @@ -120,13 +121,13 @@ type Endpoint interface { // CMTruncated indicates that the numRights hint was used to receive fewer // than the total available SCM_RIGHTS FDs. Additional truncation may be // required by the caller. - RecvMsg(data [][]byte, creds bool, numRights uintptr, peek bool, addr *tcpip.FullAddress) (recvLen, msgLen uintptr, cm ControlMessages, CMTruncated bool, err *syserr.Error) + RecvMsg(ctx context.Context, data [][]byte, creds bool, numRights uintptr, peek bool, addr *tcpip.FullAddress) (recvLen, msgLen uintptr, cm ControlMessages, CMTruncated bool, err *syserr.Error) // SendMsg writes data and a control message to the endpoint's peer. // This method does not block if the data cannot be written. // // SendMsg does not take ownership of any of its arguments on error. - SendMsg([][]byte, ControlMessages, BoundEndpoint) (uintptr, *syserr.Error) + SendMsg(context.Context, [][]byte, ControlMessages, BoundEndpoint) (uintptr, *syserr.Error) // Connect connects this endpoint directly to another. // @@ -134,7 +135,7 @@ type Endpoint interface { // endpoint passed in as a parameter. // // The error codes are the same as Connect. - Connect(server BoundEndpoint) *syserr.Error + Connect(ctx context.Context, server BoundEndpoint) *syserr.Error // Shutdown closes the read and/or write end of the endpoint connection // to its peer. @@ -215,7 +216,7 @@ type BoundEndpoint interface { // // This method will return syserr.ErrConnectionRefused on endpoints with a // type that isn't SockStream or SockSeqpacket. - BidirectionalConnect(ep ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error + BidirectionalConnect(ctx context.Context, ep ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error // UnidirectionalConnect establishes a write-only connection to a unix // endpoint. @@ -225,7 +226,7 @@ type BoundEndpoint interface { // // This method will return syserr.ErrConnectionRefused on a non-SockDgram // endpoint. - UnidirectionalConnect() (ConnectedEndpoint, *syserr.Error) + UnidirectionalConnect(ctx context.Context) (ConnectedEndpoint, *syserr.Error) // Passcred returns whether or not the SO_PASSCRED socket option is // enabled on this end. @@ -776,7 +777,7 @@ func (e *baseEndpoint) Connected() bool { } // RecvMsg reads data and a control message from the endpoint. -func (e *baseEndpoint) RecvMsg(data [][]byte, creds bool, numRights uintptr, peek bool, addr *tcpip.FullAddress) (uintptr, uintptr, ControlMessages, bool, *syserr.Error) { +func (e *baseEndpoint) RecvMsg(ctx context.Context, data [][]byte, creds bool, numRights uintptr, peek bool, addr *tcpip.FullAddress) (uintptr, uintptr, ControlMessages, bool, *syserr.Error) { e.Lock() if e.receiver == nil { @@ -802,7 +803,7 @@ func (e *baseEndpoint) RecvMsg(data [][]byte, creds bool, numRights uintptr, pee // SendMsg writes data and a control message to the endpoint's peer. // This method does not block if the data cannot be written. -func (e *baseEndpoint) SendMsg(data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) { +func (e *baseEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) { e.Lock() if !e.Connected() { e.Unlock() diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index b07e8d67b..637168714 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -21,24 +21,23 @@ import ( "strings" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserr" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/socket/control" + "gvisor.dev/gvisor/pkg/sentry/socket/epsocket" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/waiter" ) // SocketOperations is a Unix socket. It is similar to an epsocket, except it @@ -64,15 +63,18 @@ type SocketOperations struct { func New(ctx context.Context, endpoint transport.Endpoint, stype linux.SockType) *fs.File { dirent := socket.NewDirent(ctx, unixSocketDevice) defer dirent.DecRef() - return NewWithDirent(ctx, dirent, endpoint, stype, fs.FileFlags{Read: true, Write: true}) + return NewWithDirent(ctx, dirent, endpoint, stype, fs.FileFlags{Read: true, Write: true, NonSeekable: true}) } // NewWithDirent creates a new unix socket using an existing dirent. func NewWithDirent(ctx context.Context, d *fs.Dirent, ep transport.Endpoint, stype linux.SockType, flags fs.FileFlags) *fs.File { - return fs.NewFile(ctx, d, flags, &SocketOperations{ + s := SocketOperations{ ep: ep, stype: stype, - }) + } + s.EnableLeakCheck("unix.SocketOperations") + + return fs.NewFile(ctx, d, flags, &s) } // DecRef implements RefCounter.DecRef. @@ -108,7 +110,7 @@ func (s *SocketOperations) Endpoint() transport.Endpoint { // extractPath extracts and validates the address. func extractPath(sockaddr []byte) (string, *syserr.Error) { - addr, err := epsocket.GetAddress(linux.AF_UNIX, sockaddr) + addr, err := epsocket.GetAddress(linux.AF_UNIX, sockaddr, true /* strict */) if err != nil { return "", err } @@ -152,7 +154,7 @@ func (s *SocketOperations) GetSockName(t *kernel.Task) (interface{}, uint32, *sy } // Ioctl implements fs.FileOperations.Ioctl. -func (s *SocketOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { return epsocket.Ioctl(ctx, s.ep, io, args) } @@ -191,7 +193,7 @@ func (s *SocketOperations) blockingAccept(t *kernel.Task) (transport.Endpoint, * // Accept implements the linux syscall accept(2) for sockets backed by // a transport.Endpoint. -func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (kdefs.FD, interface{}, uint32, *syserr.Error) { +func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, interface{}, uint32, *syserr.Error) { // Issue the accept request to get the new endpoint. ep, err := s.ep.Accept() if err != nil { @@ -226,10 +228,9 @@ func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, } } - fdFlags := kernel.FDFlags{ + fd, e := t.NewFDFrom(0, ns, kernel.FDFlags{ CloseOnExec: flags&linux.SOCK_CLOEXEC != 0, - } - fd, e := t.FDMap().NewFDFrom(0, ns, fdFlags, t.ThreadGroup().Limits()) + }) if e != nil { return 0, nil, 0, syserr.FromError(e) } @@ -363,7 +364,7 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo defer ep.Release() // Connect the server endpoint. - return s.ep.Connect(ep) + return s.ep.Connect(t, ep) } // Writev implements fs.FileOperations.Write. @@ -372,11 +373,12 @@ func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO ctrl := control.New(t, s.ep, nil) if src.NumBytes() == 0 { - nInt, err := s.ep.SendMsg([][]byte{}, ctrl, nil) + nInt, err := s.ep.SendMsg(ctx, [][]byte{}, ctrl, nil) return int64(nInt), err.ToError() } return src.CopyInTo(ctx, &EndpointWriter{ + Ctx: ctx, Endpoint: s.ep, Control: ctrl, To: nil, @@ -387,6 +389,7 @@ func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO // a transport.Endpoint. func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) { w := EndpointWriter{ + Ctx: t, Endpoint: s.ep, Control: controlMessages.Unix, To: nil, @@ -486,6 +489,7 @@ func (s *SocketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS return 0, nil } return dst.CopyOutFrom(ctx, &EndpointReader{ + Ctx: ctx, Endpoint: s.ep, NumRights: 0, Peek: false, @@ -522,6 +526,7 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags } r := EndpointReader{ + Ctx: t, Endpoint: s.ep, Creds: wantCreds, NumRights: uintptr(numRights), @@ -635,9 +640,9 @@ func (*provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*fs var ep transport.Endpoint switch stype { case linux.SOCK_DGRAM: - ep = transport.NewConnectionless() + ep = transport.NewConnectionless(t) case linux.SOCK_SEQPACKET, linux.SOCK_STREAM: - ep = transport.NewConnectioned(stype, t.Kernel()) + ep = transport.NewConnectioned(t, stype, t.Kernel()) default: return nil, syserr.ErrInvalidArgument } @@ -660,7 +665,7 @@ func (*provider) Pair(t *kernel.Task, stype linux.SockType, protocol int) (*fs.F } // Create the endpoints and sockets. - ep1, ep2 := transport.NewPair(stype, t.Kernel()) + ep1, ep2 := transport.NewPair(t, stype, t.Kernel()) s1 := New(t, ep1, stype) s2 := New(t, ep2, stype) diff --git a/pkg/sentry/state/BUILD b/pkg/sentry/state/BUILD index cee18f681..f297ef3b7 100644 --- a/pkg/sentry/state/BUILD +++ b/pkg/sentry/state/BUILD @@ -9,7 +9,7 @@ go_library( "state_metadata.go", "state_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/state", + importpath = "gvisor.dev/gvisor/pkg/sentry/state", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/state/state.go b/pkg/sentry/state/state.go index 27fde505b..026549756 100644 --- a/pkg/sentry/state/state.go +++ b/pkg/sentry/state/state.go @@ -19,12 +19,12 @@ import ( "fmt" "io" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" - "gvisor.googlesource.com/gvisor/pkg/state/statefile" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/state/statefile" + "gvisor.dev/gvisor/pkg/syserror" ) var previousMetadata map[string]string diff --git a/pkg/sentry/state/state_metadata.go b/pkg/sentry/state/state_metadata.go index b8e128c40..cefd20b9b 100644 --- a/pkg/sentry/state/state_metadata.go +++ b/pkg/sentry/state/state_metadata.go @@ -18,7 +18,7 @@ import ( "fmt" "time" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) // The save metadata keys for timestamp. diff --git a/pkg/sentry/state/state_unsafe.go b/pkg/sentry/state/state_unsafe.go index 7745b6ac6..d271c6fc9 100644 --- a/pkg/sentry/state/state_unsafe.go +++ b/pkg/sentry/state/state_unsafe.go @@ -20,7 +20,7 @@ import ( "time" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) // CPUTime returns the CPU time usage by Sentry and app. diff --git a/pkg/sentry/strace/BUILD b/pkg/sentry/strace/BUILD index eaaa4d118..445d25010 100644 --- a/pkg/sentry/strace/BUILD +++ b/pkg/sentry/strace/BUILD @@ -18,7 +18,7 @@ go_library( "strace.go", "syscalls.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/strace", + importpath = "gvisor.dev/gvisor/pkg/sentry/strace", visibility = ["//:sandbox"], deps = [ ":strace_go_proto", @@ -30,7 +30,6 @@ go_library( "//pkg/seccomp", "//pkg/sentry/arch", "//pkg/sentry/kernel", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/socket/control", "//pkg/sentry/socket/epsocket", "//pkg/sentry/socket/netlink", @@ -47,7 +46,7 @@ proto_library( go_proto_library( name = "strace_go_proto", - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/strace/strace_go_proto", + importpath = "gvisor.dev/gvisor/pkg/sentry/strace/strace_go_proto", proto = ":strace_proto", visibility = ["//visibility:public"], ) diff --git a/pkg/sentry/strace/capability.go b/pkg/sentry/strace/capability.go index f85d6636e..3255dc18d 100644 --- a/pkg/sentry/strace/capability.go +++ b/pkg/sentry/strace/capability.go @@ -15,11 +15,11 @@ package strace import ( - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" ) -// CapabilityBitset is the set of capabilties in a bitset. +// CapabilityBitset is the set of capabilities in a bitset. var CapabilityBitset = abi.FlagSet{ { Flag: 1 << uint32(linux.CAP_CHOWN), diff --git a/pkg/sentry/strace/clone.go b/pkg/sentry/strace/clone.go index ff6a432c6..e99158712 100644 --- a/pkg/sentry/strace/clone.go +++ b/pkg/sentry/strace/clone.go @@ -17,7 +17,7 @@ package strace import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi" ) // CloneFlagSet is the set of clone(2) flags. diff --git a/pkg/sentry/strace/futex.go b/pkg/sentry/strace/futex.go index 24301bda6..d55c4080e 100644 --- a/pkg/sentry/strace/futex.go +++ b/pkg/sentry/strace/futex.go @@ -15,8 +15,8 @@ package strace import ( - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" ) // FutexCmd are the possible futex(2) commands. diff --git a/pkg/sentry/strace/open.go b/pkg/sentry/strace/open.go index 140727b02..e40bcb53b 100644 --- a/pkg/sentry/strace/open.go +++ b/pkg/sentry/strace/open.go @@ -17,7 +17,7 @@ package strace import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi" ) // OpenMode represents the mode to open(2) a file. diff --git a/pkg/sentry/strace/poll.go b/pkg/sentry/strace/poll.go index 15605187d..5187594a7 100644 --- a/pkg/sentry/strace/poll.go +++ b/pkg/sentry/strace/poll.go @@ -18,12 +18,11 @@ import ( "fmt" "strings" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/kernel" + slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // PollEventSet is the set of poll(2) event flags. @@ -50,7 +49,7 @@ func pollFD(t *kernel.Task, pfd *linux.PollFD, post bool) string { if post { revents = PollEventSet.Parse(uint64(pfd.REvents)) } - return fmt.Sprintf("{FD: %s, Events: %s, REvents: %s}", fd(t, kdefs.FD(pfd.FD)), PollEventSet.Parse(uint64(pfd.Events)), revents) + return fmt.Sprintf("{FD: %s, Events: %s, REvents: %s}", fd(t, pfd.FD), PollEventSet.Parse(uint64(pfd.Events)), revents) } func pollFDs(t *kernel.Task, addr usermem.Addr, nfds uint, post bool) string { diff --git a/pkg/sentry/strace/ptrace.go b/pkg/sentry/strace/ptrace.go index 485aacb8a..338bafc6c 100644 --- a/pkg/sentry/strace/ptrace.go +++ b/pkg/sentry/strace/ptrace.go @@ -15,8 +15,8 @@ package strace import ( - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" ) // PtraceRequestSet are the possible ptrace(2) requests. diff --git a/pkg/sentry/strace/signal.go b/pkg/sentry/strace/signal.go index f82460e1c..5656d53eb 100644 --- a/pkg/sentry/strace/signal.go +++ b/pkg/sentry/strace/signal.go @@ -18,10 +18,10 @@ import ( "fmt" "strings" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // signalNames contains the names of all named signals. diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go index 0b5ef84c4..386b40af7 100644 --- a/pkg/sentry/strace/socket.go +++ b/pkg/sentry/strace/socket.go @@ -18,15 +18,15 @@ import ( "fmt" "strings" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink" - slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket/control" + "gvisor.dev/gvisor/pkg/sentry/socket/epsocket" + "gvisor.dev/gvisor/pkg/sentry/socket/netlink" + slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // SocketFamily are the possible socket(2) families. @@ -332,7 +332,7 @@ func sockAddr(t *kernel.Task, addr usermem.Addr, length uint32) string { switch family { case linux.AF_INET, linux.AF_INET6, linux.AF_UNIX: - fa, err := epsocket.GetAddress(int(family), b) + fa, err := epsocket.GetAddress(int(family), b, true /* strict */) if err != nil { return fmt.Sprintf("%#x {Family: %s, error extracting address: %v}", addr, familyStr, err) } diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go index f4c1be4ce..311389547 100644 --- a/pkg/sentry/strace/strace.go +++ b/pkg/sentry/strace/strace.go @@ -24,17 +24,16 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/bits" - "gvisor.googlesource.com/gvisor/pkg/eventchannel" - "gvisor.googlesource.com/gvisor/pkg/seccomp" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - pb "gvisor.googlesource.com/gvisor/pkg/sentry/strace/strace_go_proto" - slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bits" + "gvisor.dev/gvisor/pkg/eventchannel" + "gvisor.dev/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + pb "gvisor.dev/gvisor/pkg/sentry/strace/strace_go_proto" + slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // DefaultLogMaximumSize is the default LogMaximumSize. @@ -133,7 +132,7 @@ func path(t *kernel.Task, addr usermem.Addr) string { return fmt.Sprintf("%#x %s", addr, path) } -func fd(t *kernel.Task, fd kdefs.FD) string { +func fd(t *kernel.Task, fd int32) string { root := t.FSContext().RootDirectory() if root != nil { defer root.DecRef() @@ -151,7 +150,7 @@ func fd(t *kernel.Task, fd kdefs.FD) string { return fmt.Sprintf("AT_FDCWD %s", name) } - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { // Cast FD to uint64 to avoid printing negative hex. return fmt.Sprintf("%#x (bad FD)", uint64(fd)) @@ -375,7 +374,7 @@ func (i *SyscallInfo) pre(t *kernel.Task, args arch.SyscallArguments, maximumBlo } switch i.format[arg] { case FD: - output = append(output, fd(t, kdefs.FD(args[arg].Int()))) + output = append(output, fd(t, args[arg].Int())) case WriteBuffer: output = append(output, dump(t, args[arg].Pointer(), args[arg+1].SizeT(), maximumBlobSize)) case WriteIOVec: diff --git a/pkg/sentry/strace/syscalls.go b/pkg/sentry/strace/syscalls.go index eae2d6c12..3c389d375 100644 --- a/pkg/sentry/strace/syscalls.go +++ b/pkg/sentry/strace/syscalls.go @@ -15,9 +15,9 @@ package strace import ( - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) // FormatSpecifier values describe how an individual syscall argument should be diff --git a/pkg/sentry/syscalls/BUILD b/pkg/sentry/syscalls/BUILD index 877318fa9..79d972202 100644 --- a/pkg/sentry/syscalls/BUILD +++ b/pkg/sentry/syscalls/BUILD @@ -8,14 +8,13 @@ go_library( "epoll.go", "syscalls.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls", + importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", "//pkg/sentry/arch", "//pkg/sentry/kernel", "//pkg/sentry/kernel/epoll", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/time", "//pkg/syserror", "//pkg/waiter", diff --git a/pkg/sentry/syscalls/epoll.go b/pkg/sentry/syscalls/epoll.go index ec1eab331..11850dd0f 100644 --- a/pkg/sentry/syscalls/epoll.go +++ b/pkg/sentry/syscalls/epoll.go @@ -18,22 +18,20 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/epoll" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/waiter" ) // CreateEpoll implements the epoll_create(2) linux syscall. -func CreateEpoll(t *kernel.Task, closeOnExec bool) (kdefs.FD, error) { +func CreateEpoll(t *kernel.Task, closeOnExec bool) (int32, error) { file := epoll.NewEventPoll(t) defer file.DecRef() - flags := kernel.FDFlags{ + fd, err := t.NewFDFrom(0, file, kernel.FDFlags{ CloseOnExec: closeOnExec, - } - fd, err := t.FDMap().NewFDFrom(0, file, flags, t.ThreadGroup().Limits()) + }) if err != nil { return 0, err } @@ -42,16 +40,16 @@ func CreateEpoll(t *kernel.Task, closeOnExec bool) (kdefs.FD, error) { } // AddEpoll implements the epoll_ctl(2) linux syscall when op is EPOLL_CTL_ADD. -func AddEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD, flags epoll.EntryFlags, mask waiter.EventMask, userData [2]int32) error { +func AddEpoll(t *kernel.Task, epfd int32, fd int32, flags epoll.EntryFlags, mask waiter.EventMask, userData [2]int32) error { // Get epoll from the file descriptor. - epollfile := t.FDMap().GetFile(epfd) + epollfile := t.GetFile(epfd) if epollfile == nil { return syscall.EBADF } defer epollfile.DecRef() // Get the target file id. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return syscall.EBADF } @@ -68,16 +66,16 @@ func AddEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD, flags epoll.EntryFlags } // UpdateEpoll implements the epoll_ctl(2) linux syscall when op is EPOLL_CTL_MOD. -func UpdateEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD, flags epoll.EntryFlags, mask waiter.EventMask, userData [2]int32) error { +func UpdateEpoll(t *kernel.Task, epfd int32, fd int32, flags epoll.EntryFlags, mask waiter.EventMask, userData [2]int32) error { // Get epoll from the file descriptor. - epollfile := t.FDMap().GetFile(epfd) + epollfile := t.GetFile(epfd) if epollfile == nil { return syscall.EBADF } defer epollfile.DecRef() // Get the target file id. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return syscall.EBADF } @@ -94,16 +92,16 @@ func UpdateEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD, flags epoll.EntryFl } // RemoveEpoll implements the epoll_ctl(2) linux syscall when op is EPOLL_CTL_DEL. -func RemoveEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD) error { +func RemoveEpoll(t *kernel.Task, epfd int32, fd int32) error { // Get epoll from the file descriptor. - epollfile := t.FDMap().GetFile(epfd) + epollfile := t.GetFile(epfd) if epollfile == nil { return syscall.EBADF } defer epollfile.DecRef() // Get the target file id. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return syscall.EBADF } @@ -120,9 +118,9 @@ func RemoveEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD) error { } // WaitEpoll implements the epoll_wait(2) linux syscall. -func WaitEpoll(t *kernel.Task, fd kdefs.FD, max int, timeout int) ([]epoll.Event, error) { +func WaitEpoll(t *kernel.Task, fd int32, max int, timeout int) ([]epoll.Event, error) { // Get epoll from the file descriptor. - epollfile := t.FDMap().GetFile(fd) + epollfile := t.GetFile(fd) if epollfile == nil { return nil, syscall.EBADF } diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 1c057526b..33a40b9c6 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -49,7 +49,7 @@ go_library( "sys_write.go", "timespec.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux", + importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls/linux", visibility = ["//:sandbox"], deps = [ "//pkg/abi", @@ -71,7 +71,6 @@ go_library( "//pkg/sentry/kernel/epoll", "//pkg/sentry/kernel/eventfd", "//pkg/sentry/kernel/fasync", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/sched", "//pkg/sentry/kernel/shm", @@ -86,6 +85,7 @@ go_library( "//pkg/sentry/syscalls", "//pkg/sentry/usage", "//pkg/sentry/usermem", + "//pkg/syserr", "//pkg/syserror", "//pkg/waiter", ], diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go index 72146ea63..ac3905c5c 100644 --- a/pkg/sentry/syscalls/linux/error.go +++ b/pkg/sentry/syscalls/linux/error.go @@ -19,12 +19,12 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/metric" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/metric" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) var ( diff --git a/pkg/sentry/syscalls/linux/flags.go b/pkg/sentry/syscalls/linux/flags.go index d83e12971..0c1b5ec27 100644 --- a/pkg/sentry/syscalls/linux/flags.go +++ b/pkg/sentry/syscalls/linux/flags.go @@ -15,8 +15,8 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // flagsToPermissions returns a Permissions object from Linux flags. diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index 5251c2463..7bbbb5008 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -18,13 +18,13 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/syscalls" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // AUDIT_ARCH_X86_64 identifies the Linux syscall API on AMD64, and is taken @@ -50,244 +50,244 @@ var AMD64 = &kernel.SyscallTable{ Table: map[uintptr]kernel.Syscall{ 0: syscalls.Supported("read", Read), 1: syscalls.Supported("write", Write), - 2: syscalls.Supported("open", Open), + 2: syscalls.PartiallySupported("open", Open, "Options O_DIRECT, O_NOATIME, O_PATH, O_TMPFILE, O_SYNC are not supported.", nil), 3: syscalls.Supported("close", Close), - 4: syscalls.Undocumented("stat", Stat), - 5: syscalls.Undocumented("fstat", Fstat), - 6: syscalls.Undocumented("lstat", Lstat), - 7: syscalls.Undocumented("poll", Poll), - 8: syscalls.Undocumented("lseek", Lseek), - 9: syscalls.Undocumented("mmap", Mmap), - 10: syscalls.Undocumented("mprotect", Mprotect), - 11: syscalls.Undocumented("munmap", Munmap), - 12: syscalls.Undocumented("brk", Brk), - 13: syscalls.Undocumented("rt_sigaction", RtSigaction), - 14: syscalls.Undocumented("rt_sigprocmask", RtSigprocmask), - 15: syscalls.Undocumented("rt_sigreturn", RtSigreturn), - 16: syscalls.Undocumented("ioctl", Ioctl), - 17: syscalls.Undocumented("pread64", Pread64), - 18: syscalls.Undocumented("pwrite64", Pwrite64), - 19: syscalls.Undocumented("readv", Readv), - 20: syscalls.Undocumented("writev", Writev), - 21: syscalls.Undocumented("access", Access), - 22: syscalls.Undocumented("pipe", Pipe), - 23: syscalls.Undocumented("select", Select), - 24: syscalls.Undocumented("sched_yield", SchedYield), - 25: syscalls.Undocumented("mremap", Mremap), - 26: syscalls.Undocumented("msync", Msync), - 27: syscalls.Undocumented("mincore", Mincore), - 28: syscalls.Undocumented("madvise", Madvise), - 29: syscalls.Undocumented("shmget", Shmget), - 30: syscalls.Undocumented("shmat", Shmat), - 31: syscalls.Undocumented("shmctl", Shmctl), - 32: syscalls.Undocumented("dup", Dup), - 33: syscalls.Undocumented("dup2", Dup2), - 34: syscalls.Undocumented("pause", Pause), - 35: syscalls.Undocumented("nanosleep", Nanosleep), - 36: syscalls.Undocumented("getitimer", Getitimer), - 37: syscalls.Undocumented("alarm", Alarm), - 38: syscalls.Undocumented("setitimer", Setitimer), - 39: syscalls.Undocumented("getpid", Getpid), - 40: syscalls.Undocumented("sendfile", Sendfile), - 41: syscalls.Undocumented("socket", Socket), - 42: syscalls.Undocumented("connect", Connect), - 43: syscalls.Undocumented("accept", Accept), - 44: syscalls.Undocumented("sendto", SendTo), - 45: syscalls.Undocumented("recvfrom", RecvFrom), - 46: syscalls.Undocumented("sendmsg", SendMsg), - 47: syscalls.Undocumented("recvmsg", RecvMsg), - 48: syscalls.Undocumented("shutdown", Shutdown), - 49: syscalls.Undocumented("bind", Bind), - 50: syscalls.Undocumented("listen", Listen), - 51: syscalls.Undocumented("getsockname", GetSockName), - 52: syscalls.Undocumented("getpeername", GetPeerName), - 53: syscalls.Undocumented("socketpair", SocketPair), - 54: syscalls.Undocumented("setsockopt", SetSockOpt), - 55: syscalls.Undocumented("getsockopt", GetSockOpt), - 56: syscalls.Undocumented("clone", Clone), - 57: syscalls.Undocumented("fork", Fork), - 58: syscalls.Undocumented("vfork", Vfork), - 59: syscalls.Undocumented("execve", Execve), - 60: syscalls.Undocumented("exit", Exit), - 61: syscalls.Undocumented("wait4", Wait4), - 62: syscalls.Undocumented("kill", Kill), - 63: syscalls.Undocumented("uname", Uname), - 64: syscalls.Undocumented("semget", Semget), - 65: syscalls.Undocumented("semop", Semop), - 66: syscalls.Undocumented("semctl", Semctl), - 67: syscalls.Undocumented("shmdt", Shmdt), + 4: syscalls.Supported("stat", Stat), + 5: syscalls.Supported("fstat", Fstat), + 6: syscalls.Supported("lstat", Lstat), + 7: syscalls.Supported("poll", Poll), + 8: syscalls.Supported("lseek", Lseek), + 9: syscalls.PartiallySupported("mmap", Mmap, "Generally supported with exceptions. Options MAP_FIXED_NOREPLACE, MAP_SHARED_VALIDATE, MAP_SYNC MAP_GROWSDOWN, MAP_HUGETLB are not supported.", nil), + 10: syscalls.Supported("mprotect", Mprotect), + 11: syscalls.Supported("munmap", Munmap), + 12: syscalls.Supported("brk", Brk), + 13: syscalls.Supported("rt_sigaction", RtSigaction), + 14: syscalls.Supported("rt_sigprocmask", RtSigprocmask), + 15: syscalls.Supported("rt_sigreturn", RtSigreturn), + 16: syscalls.PartiallySupported("ioctl", Ioctl, "Only a few ioctls are implemented for backing devices and file systems.", nil), + 17: syscalls.Supported("pread64", Pread64), + 18: syscalls.Supported("pwrite64", Pwrite64), + 19: syscalls.Supported("readv", Readv), + 20: syscalls.Supported("writev", Writev), + 21: syscalls.Supported("access", Access), + 22: syscalls.Supported("pipe", Pipe), + 23: syscalls.Supported("select", Select), + 24: syscalls.Supported("sched_yield", SchedYield), + 25: syscalls.Supported("mremap", Mremap), + 26: syscalls.PartiallySupported("msync", Msync, "Full data flush is not guaranteed at this time.", nil), + 27: syscalls.PartiallySupported("mincore", Mincore, "Stub implementation. The sandbox does not have access to this information. Reports all mapped pages are resident.", nil), + 28: syscalls.PartiallySupported("madvise", Madvise, "Options MADV_DONTNEED, MADV_DONTFORK are supported. Other advice is ignored.", nil), + 29: syscalls.PartiallySupported("shmget", Shmget, "Option SHM_HUGETLB is not supported.", nil), + 30: syscalls.PartiallySupported("shmat", Shmat, "Option SHM_RND is not supported.", nil), + 31: syscalls.PartiallySupported("shmctl", Shmctl, "Options SHM_LOCK, SHM_UNLOCK are not supported.", nil), + 32: syscalls.Supported("dup", Dup), + 33: syscalls.Supported("dup2", Dup2), + 34: syscalls.Supported("pause", Pause), + 35: syscalls.Supported("nanosleep", Nanosleep), + 36: syscalls.Supported("getitimer", Getitimer), + 37: syscalls.Supported("alarm", Alarm), + 38: syscalls.Supported("setitimer", Setitimer), + 39: syscalls.Supported("getpid", Getpid), + 40: syscalls.Supported("sendfile", Sendfile), + 41: syscalls.PartiallySupported("socket", Socket, "Limited support for AF_NETLINK, NETLINK_ROUTE sockets. Limited support for SOCK_RAW.", nil), + 42: syscalls.Supported("connect", Connect), + 43: syscalls.Supported("accept", Accept), + 44: syscalls.Supported("sendto", SendTo), + 45: syscalls.Supported("recvfrom", RecvFrom), + 46: syscalls.Supported("sendmsg", SendMsg), + 47: syscalls.PartiallySupported("recvmsg", RecvMsg, "Not all flags and control messages are supported.", nil), + 48: syscalls.PartiallySupported("shutdown", Shutdown, "Not all flags and control messages are supported.", nil), + 49: syscalls.PartiallySupported("bind", Bind, "Autobind for abstract Unix sockets is not supported.", nil), + 50: syscalls.Supported("listen", Listen), + 51: syscalls.Supported("getsockname", GetSockName), + 52: syscalls.Supported("getpeername", GetPeerName), + 53: syscalls.Supported("socketpair", SocketPair), + 54: syscalls.PartiallySupported("setsockopt", SetSockOpt, "Not all socket options are supported.", nil), + 55: syscalls.PartiallySupported("getsockopt", GetSockOpt, "Not all socket options are supported.", nil), + 56: syscalls.PartiallySupported("clone", Clone, "Mount namespace (CLONE_NEWNS) not supported. Options CLONE_PARENT, CLONE_SYSVSEM not supported.", nil), + 57: syscalls.Supported("fork", Fork), + 58: syscalls.Supported("vfork", Vfork), + 59: syscalls.Supported("execve", Execve), + 60: syscalls.Supported("exit", Exit), + 61: syscalls.Supported("wait4", Wait4), + 62: syscalls.Supported("kill", Kill), + 63: syscalls.Supported("uname", Uname), + 64: syscalls.Supported("semget", Semget), + 65: syscalls.PartiallySupported("semop", Semop, "Option SEM_UNDO not supported.", nil), + 66: syscalls.PartiallySupported("semctl", Semctl, "Options IPC_INFO, SEM_INFO, IPC_STAT, SEM_STAT, SEM_STAT_ANY, GETNCNT, GETZCNT not supported.", nil), + 67: syscalls.Supported("shmdt", Shmdt), 68: syscalls.ErrorWithEvent("msgget", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) 69: syscalls.ErrorWithEvent("msgsnd", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) 70: syscalls.ErrorWithEvent("msgrcv", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) 71: syscalls.ErrorWithEvent("msgctl", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 72: syscalls.Undocumented("fcntl", Fcntl), - 73: syscalls.Undocumented("flock", Flock), - 74: syscalls.Undocumented("fsync", Fsync), - 75: syscalls.Undocumented("fdatasync", Fdatasync), - 76: syscalls.Undocumented("truncate", Truncate), - 77: syscalls.Undocumented("ftruncate", Ftruncate), - 78: syscalls.Undocumented("getdents", Getdents), - 79: syscalls.Undocumented("getcwd", Getcwd), - 80: syscalls.Undocumented("chdir", Chdir), - 81: syscalls.Undocumented("fchdir", Fchdir), - 82: syscalls.Undocumented("rename", Rename), - 83: syscalls.Undocumented("mkdir", Mkdir), - 84: syscalls.Undocumented("rmdir", Rmdir), - 85: syscalls.Undocumented("creat", Creat), - 86: syscalls.Undocumented("link", Link), - 87: syscalls.Undocumented("link", Unlink), - 88: syscalls.Undocumented("symlink", Symlink), - 89: syscalls.Undocumented("readlink", Readlink), - 90: syscalls.Undocumented("chmod", Chmod), - 91: syscalls.Undocumented("fchmod", Fchmod), - 92: syscalls.Undocumented("chown", Chown), - 93: syscalls.Undocumented("fchown", Fchown), - 94: syscalls.Undocumented("lchown", Lchown), - 95: syscalls.Undocumented("umask", Umask), - 96: syscalls.Undocumented("gettimeofday", Gettimeofday), - 97: syscalls.Undocumented("getrlimit", Getrlimit), - 98: syscalls.Undocumented("getrusage", Getrusage), - 99: syscalls.Undocumented("sysinfo", Sysinfo), - 100: syscalls.Undocumented("times", Times), - 101: syscalls.Undocumented("ptrace", Ptrace), - 102: syscalls.Undocumented("getuid", Getuid), - 103: syscalls.Undocumented("syslog", Syslog), - 104: syscalls.Undocumented("getgid", Getgid), - 105: syscalls.Undocumented("setuid", Setuid), - 106: syscalls.Undocumented("setgid", Setgid), - 107: syscalls.Undocumented("geteuid", Geteuid), - 108: syscalls.Undocumented("getegid", Getegid), - 109: syscalls.Undocumented("setpgid", Setpgid), - 110: syscalls.Undocumented("getppid", Getppid), - 111: syscalls.Undocumented("getpgrp", Getpgrp), - 112: syscalls.Undocumented("setsid", Setsid), - 113: syscalls.Undocumented("setreuid", Setreuid), - 114: syscalls.Undocumented("setregid", Setregid), - 115: syscalls.Undocumented("getgroups", Getgroups), - 116: syscalls.Undocumented("setgroups", Setgroups), - 117: syscalls.Undocumented("setresuid", Setresuid), - 118: syscalls.Undocumented("getresuid", Getresuid), - 119: syscalls.Undocumented("setresgid", Setresgid), - 120: syscalls.Undocumented("setresgid", Getresgid), - 121: syscalls.Undocumented("getpgid", Getpgid), + 72: syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil), + 73: syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil), + 74: syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil), + 75: syscalls.PartiallySupported("fdatasync", Fdatasync, "Full data flush is not guaranteed at this time.", nil), + 76: syscalls.Supported("truncate", Truncate), + 77: syscalls.Supported("ftruncate", Ftruncate), + 78: syscalls.Supported("getdents", Getdents), + 79: syscalls.Supported("getcwd", Getcwd), + 80: syscalls.Supported("chdir", Chdir), + 81: syscalls.Supported("fchdir", Fchdir), + 82: syscalls.Supported("rename", Rename), + 83: syscalls.Supported("mkdir", Mkdir), + 84: syscalls.Supported("rmdir", Rmdir), + 85: syscalls.Supported("creat", Creat), + 86: syscalls.Supported("link", Link), + 87: syscalls.Supported("unlink", Unlink), + 88: syscalls.Supported("symlink", Symlink), + 89: syscalls.Supported("readlink", Readlink), + 90: syscalls.Supported("chmod", Chmod), + 91: syscalls.PartiallySupported("fchmod", Fchmod, "Options S_ISUID and S_ISGID not supported.", nil), + 92: syscalls.Supported("chown", Chown), + 93: syscalls.Supported("fchown", Fchown), + 94: syscalls.Supported("lchown", Lchown), + 95: syscalls.Supported("umask", Umask), + 96: syscalls.Supported("gettimeofday", Gettimeofday), + 97: syscalls.Supported("getrlimit", Getrlimit), + 98: syscalls.PartiallySupported("getrusage", Getrusage, "Fields ru_maxrss, ru_minflt, ru_majflt, ru_inblock, ru_oublock are not supported. Fields ru_utime and ru_stime have low precision.", nil), + 99: syscalls.PartiallySupported("sysinfo", Sysinfo, "Fields loads, sharedram, bufferram, totalswap, freeswap, totalhigh, freehigh not supported.", nil), + 100: syscalls.Supported("times", Times), + 101: syscalls.PartiallySupported("ptrace", Ptrace, "Options PTRACE_PEEKSIGINFO, PTRACE_SECCOMP_GET_FILTER not supported.", nil), + 102: syscalls.Supported("getuid", Getuid), + 103: syscalls.PartiallySupported("syslog", Syslog, "Outputs a dummy message for security reasons.", nil), + 104: syscalls.Supported("getgid", Getgid), + 105: syscalls.Supported("setuid", Setuid), + 106: syscalls.Supported("setgid", Setgid), + 107: syscalls.Supported("geteuid", Geteuid), + 108: syscalls.Supported("getegid", Getegid), + 109: syscalls.Supported("setpgid", Setpgid), + 110: syscalls.Supported("getppid", Getppid), + 111: syscalls.Supported("getpgrp", Getpgrp), + 112: syscalls.Supported("setsid", Setsid), + 113: syscalls.Supported("setreuid", Setreuid), + 114: syscalls.Supported("setregid", Setregid), + 115: syscalls.Supported("getgroups", Getgroups), + 116: syscalls.Supported("setgroups", Setgroups), + 117: syscalls.Supported("setresuid", Setresuid), + 118: syscalls.Supported("getresuid", Getresuid), + 119: syscalls.Supported("setresgid", Setresgid), + 120: syscalls.Supported("getresgid", Getresgid), + 121: syscalls.Supported("getpgid", Getpgid), 122: syscalls.ErrorWithEvent("setfsuid", syscall.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) 123: syscalls.ErrorWithEvent("setfsgid", syscall.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) - 124: syscalls.Undocumented("getsid", Getsid), - 125: syscalls.Undocumented("capget", Capget), - 126: syscalls.Undocumented("capset", Capset), - 127: syscalls.Undocumented("rt_sigpending", RtSigpending), - 128: syscalls.Undocumented("rt_sigtimedwait", RtSigtimedwait), - 129: syscalls.Undocumented("rt_sigqueueinfo", RtSigqueueinfo), - 130: syscalls.Undocumented("rt_sigsuspend", RtSigsuspend), - 131: syscalls.Undocumented("sigaltstack", Sigaltstack), - 132: syscalls.Undocumented("utime", Utime), - 133: syscalls.Undocumented("mknod", Mknod), + 124: syscalls.Supported("getsid", Getsid), + 125: syscalls.Supported("capget", Capget), + 126: syscalls.Supported("capset", Capset), + 127: syscalls.Supported("rt_sigpending", RtSigpending), + 128: syscalls.Supported("rt_sigtimedwait", RtSigtimedwait), + 129: syscalls.Supported("rt_sigqueueinfo", RtSigqueueinfo), + 130: syscalls.Supported("rt_sigsuspend", RtSigsuspend), + 131: syscalls.Supported("sigaltstack", Sigaltstack), + 132: syscalls.Supported("utime", Utime), + 133: syscalls.PartiallySupported("mknod", Mknod, "Device creation is not generally supported. Only regular file and FIFO creation are supported.", nil), 134: syscalls.Error("uselib", syscall.ENOSYS, "Obsolete", nil), 135: syscalls.ErrorWithEvent("personality", syscall.EINVAL, "Unable to change personality.", nil), 136: syscalls.ErrorWithEvent("ustat", syscall.ENOSYS, "Needs filesystem support.", nil), - 137: syscalls.Undocumented("statfs", Statfs), - 138: syscalls.Undocumented("fstatfs", Fstatfs), + 137: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil), + 138: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil), 139: syscalls.ErrorWithEvent("sysfs", syscall.ENOSYS, "", []string{"gvisor.dev/issue/165"}), - 140: syscalls.Undocumented("getpriority", Getpriority), - 141: syscalls.Undocumented("setpriority", Setpriority), + 140: syscalls.PartiallySupported("getpriority", Getpriority, "Stub implementation.", nil), + 141: syscalls.PartiallySupported("setpriority", Setpriority, "Stub implementation.", nil), 142: syscalls.CapError("sched_setparam", linux.CAP_SYS_NICE, "", nil), - 143: syscalls.Undocumented("sched_getparam", SchedGetparam), - 144: syscalls.Undocumented("sched_setscheduler", SchedSetscheduler), - 145: syscalls.Undocumented("sched_getscheduler", SchedGetscheduler), - 146: syscalls.Undocumented("sched_get_priority_max", SchedGetPriorityMax), - 147: syscalls.Undocumented("sched_get_priority_min", SchedGetPriorityMin), + 143: syscalls.PartiallySupported("sched_getparam", SchedGetparam, "Stub implementation.", nil), + 144: syscalls.PartiallySupported("sched_setscheduler", SchedSetscheduler, "Stub implementation.", nil), + 145: syscalls.PartiallySupported("sched_getscheduler", SchedGetscheduler, "Stub implementation.", nil), + 146: syscalls.PartiallySupported("sched_get_priority_max", SchedGetPriorityMax, "Stub implementation.", nil), + 147: syscalls.PartiallySupported("sched_get_priority_min", SchedGetPriorityMin, "Stub implementation.", nil), 148: syscalls.ErrorWithEvent("sched_rr_get_interval", syscall.EPERM, "", nil), - 149: syscalls.Undocumented("mlock", Mlock), - 150: syscalls.Undocumented("munlock", Munlock), - 151: syscalls.Undocumented("mlockall", Mlockall), - 152: syscalls.Undocumented("munlockall", Munlockall), + 149: syscalls.PartiallySupported("mlock", Mlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 150: syscalls.PartiallySupported("munlock", Munlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 151: syscalls.PartiallySupported("mlockall", Mlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 152: syscalls.PartiallySupported("munlockall", Munlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), 153: syscalls.CapError("vhangup", linux.CAP_SYS_TTY_CONFIG, "", nil), 154: syscalls.Error("modify_ldt", syscall.EPERM, "", nil), 155: syscalls.Error("pivot_root", syscall.EPERM, "", nil), - 156: syscalls.Error("sysctl", syscall.EPERM, `syscall is "worthless"`, nil), - 157: syscalls.Undocumented("prctl", Prctl), - 158: syscalls.Undocumented("arch_prctl", ArchPrctl), + 156: syscalls.Error("sysctl", syscall.EPERM, "Deprecated. Use /proc/sys instead.", nil), + 157: syscalls.PartiallySupported("prctl", Prctl, "Not all options are supported.", nil), + 158: syscalls.PartiallySupported("arch_prctl", ArchPrctl, "Options ARCH_GET_GS, ARCH_SET_GS not supported.", nil), 159: syscalls.CapError("adjtimex", linux.CAP_SYS_TIME, "", nil), - 160: syscalls.Undocumented("setrlimit", Setrlimit), - 161: syscalls.Undocumented("chroot", Chroot), - 162: syscalls.Undocumented("sync", Sync), + 160: syscalls.PartiallySupported("setrlimit", Setrlimit, "Not all rlimits are enforced.", nil), + 161: syscalls.Supported("chroot", Chroot), + 162: syscalls.PartiallySupported("sync", Sync, "Full data flush is not guaranteed at this time.", nil), 163: syscalls.CapError("acct", linux.CAP_SYS_PACCT, "", nil), 164: syscalls.CapError("settimeofday", linux.CAP_SYS_TIME, "", nil), - 165: syscalls.Undocumented("mount", Mount), - 166: syscalls.Undocumented("umount2", Umount2), + 165: syscalls.PartiallySupported("mount", Mount, "Not all options or file systems are supported.", nil), + 166: syscalls.PartiallySupported("umount2", Umount2, "Not all options or file systems are supported.", nil), 167: syscalls.CapError("swapon", linux.CAP_SYS_ADMIN, "", nil), 168: syscalls.CapError("swapoff", linux.CAP_SYS_ADMIN, "", nil), 169: syscalls.CapError("reboot", linux.CAP_SYS_BOOT, "", nil), - 170: syscalls.Undocumented("sethostname", Sethostname), - 171: syscalls.Undocumented("setdomainname", Setdomainname), + 170: syscalls.Supported("sethostname", Sethostname), + 171: syscalls.Supported("setdomainname", Setdomainname), 172: syscalls.CapError("iopl", linux.CAP_SYS_RAWIO, "", nil), 173: syscalls.CapError("ioperm", linux.CAP_SYS_RAWIO, "", nil), 174: syscalls.CapError("create_module", linux.CAP_SYS_MODULE, "", nil), 175: syscalls.CapError("init_module", linux.CAP_SYS_MODULE, "", nil), 176: syscalls.CapError("delete_module", linux.CAP_SYS_MODULE, "", nil), - 177: syscalls.Error("get_kernel_syms", syscall.ENOSYS, "Not supported in > 2.6", nil), - 178: syscalls.Error("query_module", syscall.ENOSYS, "Not supported in > 2.6", nil), + 177: syscalls.Error("get_kernel_syms", syscall.ENOSYS, "Not supported in Linux > 2.6.", nil), + 178: syscalls.Error("query_module", syscall.ENOSYS, "Not supported in Linux > 2.6.", nil), 179: syscalls.CapError("quotactl", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_admin for most operations - 180: syscalls.Error("nfsservctl", syscall.ENOSYS, "Does not exist > 3.1", nil), - 181: syscalls.Error("getpmsg", syscall.ENOSYS, "Not implemented in Linux", nil), - 182: syscalls.Error("putpmsg", syscall.ENOSYS, "Not implemented in Linux", nil), - 183: syscalls.Error("afs_syscall", syscall.ENOSYS, "Not implemented in Linux", nil), - 184: syscalls.Error("tuxcall", syscall.ENOSYS, "Not implemented in Linux", nil), - 185: syscalls.Error("security", syscall.ENOSYS, "Not implemented in Linux", nil), - 186: syscalls.Undocumented("gettid", Gettid), + 180: syscalls.Error("nfsservctl", syscall.ENOSYS, "Removed after Linux 3.1.", nil), + 181: syscalls.Error("getpmsg", syscall.ENOSYS, "Not implemented in Linux.", nil), + 182: syscalls.Error("putpmsg", syscall.ENOSYS, "Not implemented in Linux.", nil), + 183: syscalls.Error("afs_syscall", syscall.ENOSYS, "Not implemented in Linux.", nil), + 184: syscalls.Error("tuxcall", syscall.ENOSYS, "Not implemented in Linux.", nil), + 185: syscalls.Error("security", syscall.ENOSYS, "Not implemented in Linux.", nil), + 186: syscalls.Supported("gettid", Gettid), 187: syscalls.ErrorWithEvent("readahead", syscall.ENOSYS, "", []string{"gvisor.dev/issue/261"}), // TODO(b/29351341) - 188: syscalls.ErrorWithEvent("setxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 189: syscalls.ErrorWithEvent("lsetxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 190: syscalls.ErrorWithEvent("fsetxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 191: syscalls.ErrorWithEvent("getxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 192: syscalls.ErrorWithEvent("lgetxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 193: syscalls.ErrorWithEvent("fgetxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 194: syscalls.ErrorWithEvent("listxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 195: syscalls.ErrorWithEvent("llistxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 196: syscalls.ErrorWithEvent("flistxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 197: syscalls.ErrorWithEvent("removexattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 198: syscalls.ErrorWithEvent("lremovexattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 199: syscalls.ErrorWithEvent("fremovexattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 200: syscalls.Undocumented("tkill", Tkill), - 201: syscalls.Undocumented("time", Time), - 202: syscalls.Undocumented("futex", Futex), - 203: syscalls.Undocumented("sched_setaffinity", SchedSetaffinity), - 204: syscalls.Undocumented("sched_getaffinity", SchedGetaffinity), + 188: syscalls.ErrorWithEvent("setxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 189: syscalls.ErrorWithEvent("lsetxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 190: syscalls.ErrorWithEvent("fsetxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 191: syscalls.ErrorWithEvent("getxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 192: syscalls.ErrorWithEvent("lgetxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 193: syscalls.ErrorWithEvent("fgetxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 194: syscalls.ErrorWithEvent("listxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 195: syscalls.ErrorWithEvent("llistxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 196: syscalls.ErrorWithEvent("flistxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 197: syscalls.ErrorWithEvent("removexattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 198: syscalls.ErrorWithEvent("lremovexattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 199: syscalls.ErrorWithEvent("fremovexattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 200: syscalls.Supported("tkill", Tkill), + 201: syscalls.Supported("time", Time), + 202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil), + 203: syscalls.PartiallySupported("sched_setaffinity", SchedSetaffinity, "Stub implementation.", nil), + 204: syscalls.PartiallySupported("sched_getaffinity", SchedGetaffinity, "Stub implementation.", nil), 205: syscalls.Error("set_thread_area", syscall.ENOSYS, "Expected to return ENOSYS on 64-bit", nil), - 206: syscalls.Undocumented("io_setup", IoSetup), - 207: syscalls.Undocumented("io_destroy", IoDestroy), - 208: syscalls.Undocumented("io_getevents", IoGetevents), - 209: syscalls.Undocumented("io_submit", IoSubmit), - 210: syscalls.Undocumented("io_cancel", IoCancel), + 206: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 207: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 208: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 209: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 210: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 211: syscalls.Error("get_thread_area", syscall.ENOSYS, "Expected to return ENOSYS on 64-bit", nil), 212: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil), - 213: syscalls.Undocumented("epoll_create", EpollCreate), - 214: syscalls.ErrorWithEvent("epoll_ctl_old", syscall.ENOSYS, "Deprecated", nil), - 215: syscalls.ErrorWithEvent("epoll_wait_old", syscall.ENOSYS, "Deprecated", nil), - 216: syscalls.ErrorWithEvent("remap_file_pages", syscall.ENOSYS, "Deprecated since 3.16", nil), - 217: syscalls.Undocumented("getdents64", Getdents64), - 218: syscalls.Undocumented("set_tid_address", SetTidAddress), - 219: syscalls.Undocumented("restart_syscall", RestartSyscall), + 213: syscalls.Supported("epoll_create", EpollCreate), + 214: syscalls.ErrorWithEvent("epoll_ctl_old", syscall.ENOSYS, "Deprecated.", nil), + 215: syscalls.ErrorWithEvent("epoll_wait_old", syscall.ENOSYS, "Deprecated.", nil), + 216: syscalls.ErrorWithEvent("remap_file_pages", syscall.ENOSYS, "Deprecated since Linux 3.16.", nil), + 217: syscalls.Supported("getdents64", Getdents64), + 218: syscalls.Supported("set_tid_address", SetTidAddress), + 219: syscalls.Supported("restart_syscall", RestartSyscall), 220: syscalls.ErrorWithEvent("semtimedop", syscall.ENOSYS, "", []string{"gvisor.dev/issue/137"}), // TODO(b/29354920) - 221: syscalls.Undocumented("fadvise64", Fadvise64), - 222: syscalls.Undocumented("timer_create", TimerCreate), - 223: syscalls.Undocumented("timer_settime", TimerSettime), - 224: syscalls.Undocumented("timer_gettime", TimerGettime), - 225: syscalls.Undocumented("timer_getoverrun", TimerGetoverrun), - 226: syscalls.Undocumented("timer_delete", TimerDelete), - 227: syscalls.Undocumented("clock_settime", ClockSettime), - 228: syscalls.Undocumented("clock_gettime", ClockGettime), - 229: syscalls.Undocumented("clock_getres", ClockGetres), - 230: syscalls.Undocumented("clock_nanosleep", ClockNanosleep), - 231: syscalls.Undocumented("exit_group", ExitGroup), - 232: syscalls.Undocumented("epoll_wait", EpollWait), - 233: syscalls.Undocumented("epoll_ctl", EpollCtl), - 234: syscalls.Undocumented("tgkill", Tgkill), - 235: syscalls.Undocumented("utimes", Utimes), + 221: syscalls.PartiallySupported("fadvise64", Fadvise64, "Not all options are supported.", nil), + 222: syscalls.Supported("timer_create", TimerCreate), + 223: syscalls.Supported("timer_settime", TimerSettime), + 224: syscalls.Supported("timer_gettime", TimerGettime), + 225: syscalls.Supported("timer_getoverrun", TimerGetoverrun), + 226: syscalls.Supported("timer_delete", TimerDelete), + 227: syscalls.Supported("clock_settime", ClockSettime), + 228: syscalls.Supported("clock_gettime", ClockGettime), + 229: syscalls.Supported("clock_getres", ClockGetres), + 230: syscalls.Supported("clock_nanosleep", ClockNanosleep), + 231: syscalls.Supported("exit_group", ExitGroup), + 232: syscalls.Supported("epoll_wait", EpollWait), + 233: syscalls.Supported("epoll_ctl", EpollCtl), + 234: syscalls.Supported("tgkill", Tgkill), + 235: syscalls.Supported("utimes", Utimes), 236: syscalls.Error("vserver", syscall.ENOSYS, "Not implemented by Linux", nil), 237: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}), - 238: syscalls.Undocumented("set_mempolicy", SetMempolicy), - 239: syscalls.Undocumented("get_mempolicy", GetMempolicy), + 238: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil), + 239: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil), 240: syscalls.ErrorWithEvent("mq_open", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) 241: syscalls.ErrorWithEvent("mq_unlink", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) 242: syscalls.ErrorWithEvent("mq_timedsend", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) @@ -295,69 +295,69 @@ var AMD64 = &kernel.SyscallTable{ 244: syscalls.ErrorWithEvent("mq_notify", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) 245: syscalls.ErrorWithEvent("mq_getsetattr", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) 246: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil), - 247: syscalls.Undocumented("waitid", Waitid), - 248: syscalls.Error("add_key", syscall.EACCES, "Not available to user", nil), - 249: syscalls.Error("request_key", syscall.EACCES, "Not available to user", nil), - 250: syscalls.Error("keyctl", syscall.EACCES, "Not available to user", nil), + 247: syscalls.Supported("waitid", Waitid), + 248: syscalls.Error("add_key", syscall.EACCES, "Not available to user.", nil), + 249: syscalls.Error("request_key", syscall.EACCES, "Not available to user.", nil), + 250: syscalls.Error("keyctl", syscall.EACCES, "Not available to user.", nil), 251: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) 252: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) - 253: syscalls.Undocumented("inotify_init", InotifyInit), - 254: syscalls.Undocumented("inotify_add_watch", InotifyAddWatch), - 255: syscalls.Undocumented("inotify_rm_watch", InotifyRmWatch), + 253: syscalls.PartiallySupported("inotify_init", InotifyInit, "inotify events are only available inside the sandbox.", nil), + 254: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil), + 255: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil), 256: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil), - 257: syscalls.Undocumented("openat", Openat), - 258: syscalls.Undocumented("mkdirat", Mkdirat), - 259: syscalls.Undocumented("mknodat", Mknodat), - 260: syscalls.Undocumented("fchownat", Fchownat), - 261: syscalls.Undocumented("futimesat", Futimesat), - 262: syscalls.Undocumented("fstatat", Fstatat), - 263: syscalls.Undocumented("unlinkat", Unlinkat), - 264: syscalls.Undocumented("renameat", Renameat), - 265: syscalls.Undocumented("linkat", Linkat), - 266: syscalls.Undocumented("symlinkat", Symlinkat), - 267: syscalls.Undocumented("readlinkat", Readlinkat), - 268: syscalls.Undocumented("fchmodat", Fchmodat), - 269: syscalls.Undocumented("faccessat", Faccessat), - 270: syscalls.Undocumented("pselect", Pselect), - 271: syscalls.Undocumented("ppoll", Ppoll), - 272: syscalls.Undocumented("unshare", Unshare), - 273: syscalls.Error("set_robust_list", syscall.ENOSYS, "Obsolete", nil), - 274: syscalls.Error("get_robust_list", syscall.ENOSYS, "Obsolete", nil), - 275: syscalls.PartiallySupported("splice", Splice, "Stub implementation", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) - 276: syscalls.ErrorWithEvent("tee", syscall.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) - 277: syscalls.Undocumented("sync_file_range", SyncFileRange), + 257: syscalls.Supported("openat", Openat), + 258: syscalls.Supported("mkdirat", Mkdirat), + 259: syscalls.Supported("mknodat", Mknodat), + 260: syscalls.Supported("fchownat", Fchownat), + 261: syscalls.Supported("futimesat", Futimesat), + 262: syscalls.Supported("fstatat", Fstatat), + 263: syscalls.Supported("unlinkat", Unlinkat), + 264: syscalls.Supported("renameat", Renameat), + 265: syscalls.Supported("linkat", Linkat), + 266: syscalls.Supported("symlinkat", Symlinkat), + 267: syscalls.Supported("readlinkat", Readlinkat), + 268: syscalls.Supported("fchmodat", Fchmodat), + 269: syscalls.Supported("faccessat", Faccessat), + 270: syscalls.Supported("pselect", Pselect), + 271: syscalls.Supported("ppoll", Ppoll), + 272: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil), + 273: syscalls.Error("set_robust_list", syscall.ENOSYS, "Obsolete.", nil), + 274: syscalls.Error("get_robust_list", syscall.ENOSYS, "Obsolete.", nil), + 275: syscalls.PartiallySupported("splice", Splice, "Stub implementation.", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) + 276: syscalls.ErrorWithEvent("tee", syscall.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) + 277: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil), 278: syscalls.ErrorWithEvent("vmsplice", syscall.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) 279: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly) - 280: syscalls.Undocumented("utimensat", Utimensat), - 281: syscalls.Undocumented("epoll_pwait", EpollPwait), + 280: syscalls.Supported("utimensat", Utimensat), + 281: syscalls.Supported("epoll_pwait", EpollPwait), 282: syscalls.ErrorWithEvent("signalfd", syscall.ENOSYS, "", []string{"gvisor.dev/issue/139"}), // TODO(b/19846426) - 283: syscalls.Undocumented("timerfd_create", TimerfdCreate), - 284: syscalls.Undocumented("eventfd", Eventfd), - 285: syscalls.Undocumented("fallocate", Fallocate), - 286: syscalls.Undocumented("timerfd_settime", TimerfdSettime), - 287: syscalls.Undocumented("timerfd_gettime", TimerfdGettime), - 288: syscalls.Undocumented("accept4", Accept4), + 283: syscalls.Supported("timerfd_create", TimerfdCreate), + 284: syscalls.Supported("eventfd", Eventfd), + 285: syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil), + 286: syscalls.Supported("timerfd_settime", TimerfdSettime), + 287: syscalls.Supported("timerfd_gettime", TimerfdGettime), + 288: syscalls.Supported("accept4", Accept4), 289: syscalls.ErrorWithEvent("signalfd4", syscall.ENOSYS, "", []string{"gvisor.dev/issue/139"}), // TODO(b/19846426) - 290: syscalls.Undocumented("eventfd2", Eventfd2), - 291: syscalls.Undocumented("epoll_create1", EpollCreate1), - 292: syscalls.Undocumented("dup3", Dup3), - 293: syscalls.Undocumented("pipe2", Pipe2), - 294: syscalls.Undocumented("inotify_init1", InotifyInit1), - 295: syscalls.Undocumented("preadv", Preadv), - 296: syscalls.Undocumented("pwritev", Pwritev), - 297: syscalls.Undocumented("rt_tgsigqueueinfo", RtTgsigqueueinfo), + 290: syscalls.Supported("eventfd2", Eventfd2), + 291: syscalls.Supported("epoll_create1", EpollCreate1), + 292: syscalls.Supported("dup3", Dup3), + 293: syscalls.Supported("pipe2", Pipe2), + 294: syscalls.Supported("inotify_init1", InotifyInit1), + 295: syscalls.Supported("preadv", Preadv), + 296: syscalls.Supported("pwritev", Pwritev), + 297: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo), 298: syscalls.ErrorWithEvent("perf_event_open", syscall.ENODEV, "No support for perf counters", nil), - 299: syscalls.Undocumented("recvmmsg", RecvMMsg), + 299: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil), 300: syscalls.ErrorWithEvent("fanotify_init", syscall.ENOSYS, "Needs CONFIG_FANOTIFY", nil), 301: syscalls.ErrorWithEvent("fanotify_mark", syscall.ENOSYS, "Needs CONFIG_FANOTIFY", nil), - 302: syscalls.Undocumented("prlimit64", Prlimit64), - 303: syscalls.ErrorWithEvent("name_to_handle_at", syscall.EOPNOTSUPP, "Needs filesystem support", nil), - 304: syscalls.ErrorWithEvent("open_by_handle_at", syscall.EOPNOTSUPP, "Needs filesystem support", nil), + 302: syscalls.Supported("prlimit64", Prlimit64), + 303: syscalls.Error("name_to_handle_at", syscall.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), + 304: syscalls.Error("open_by_handle_at", syscall.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), 305: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil), - 306: syscalls.Undocumented("syncfs", Syncfs), - 307: syscalls.Undocumented("sendmmsg", SendMMsg), + 306: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil), + 307: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil), 308: syscalls.ErrorWithEvent("setns", syscall.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995) - 309: syscalls.Undocumented("getcpu", Getcpu), + 309: syscalls.Supported("getcpu", Getcpu), 310: syscalls.ErrorWithEvent("process_vm_readv", syscall.ENOSYS, "", []string{"gvisor.dev/issue/158"}), 311: syscalls.ErrorWithEvent("process_vm_writev", syscall.ENOSYS, "", []string{"gvisor.dev/issue/158"}), 312: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil), @@ -365,20 +365,21 @@ var AMD64 = &kernel.SyscallTable{ 314: syscalls.ErrorWithEvent("sched_setattr", syscall.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) 315: syscalls.ErrorWithEvent("sched_getattr", syscall.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) 316: syscalls.ErrorWithEvent("renameat2", syscall.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772) - 317: syscalls.Undocumented("seccomp", Seccomp), - 318: syscalls.Undocumented("getrandom", GetRandom), - 319: syscalls.Undocumented("memfd_create", MemfdCreate), + 317: syscalls.Supported("seccomp", Seccomp), + 318: syscalls.Supported("getrandom", GetRandom), + 319: syscalls.Supported("memfd_create", MemfdCreate), 320: syscalls.CapError("kexec_file_load", linux.CAP_SYS_BOOT, "", nil), 321: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil), 322: syscalls.ErrorWithEvent("execveat", syscall.ENOSYS, "", []string{"gvisor.dev/issue/265"}), // TODO(b/118901836) 323: syscalls.ErrorWithEvent("userfaultfd", syscall.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345) 324: syscalls.ErrorWithEvent("membarrier", syscall.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(b/118904897) - 325: syscalls.Undocumented("mlock2", Mlock2), + 325: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil), // Syscalls after 325 are "backports" from versions of Linux after 4.4. 326: syscalls.ErrorWithEvent("copy_file_range", syscall.ENOSYS, "", nil), - 327: syscalls.Undocumented("preadv2", Preadv2), - 328: syscalls.Undocumented("pwritev2", Pwritev2), + 327: syscalls.Supported("preadv2", Preadv2), + 328: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil), + 332: syscalls.Supported("statx", Statx), }, Emulate: map[usermem.Addr]uintptr{ diff --git a/pkg/sentry/syscalls/linux/sigset.go b/pkg/sentry/syscalls/linux/sigset.go index 5438b664b..00b7e7cf2 100644 --- a/pkg/sentry/syscalls/linux/sigset.go +++ b/pkg/sentry/syscalls/linux/sigset.go @@ -17,10 +17,10 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // copyInSigSet copies in a sigset_t, checks its size, and ensures that KILL and diff --git a/pkg/sentry/syscalls/linux/sys_aio.go b/pkg/sentry/syscalls/linux/sys_aio.go index 1b27b2415..f56411bfe 100644 --- a/pkg/sentry/syscalls/linux/sys_aio.go +++ b/pkg/sentry/syscalls/linux/sys_aio.go @@ -17,15 +17,14 @@ package linux import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/eventfd" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // I/O commands. @@ -55,7 +54,7 @@ type ioCallback struct { OpCode uint16 ReqPrio int16 - FD uint32 + FD int32 Buf uint64 Bytes uint64 @@ -65,7 +64,7 @@ type ioCallback struct { Flags uint32 // eventfd to signal if IOCB_FLAG_RESFD is set in flags. - ResFD uint32 + ResFD int32 } // ioEvent describes an I/O result. @@ -292,7 +291,7 @@ func performCallback(t *kernel.Task, file *fs.File, cbAddr usermem.Addr, cb *ioC // submitCallback processes a single callback. func submitCallback(t *kernel.Task, id uint64, cb *ioCallback, cbAddr usermem.Addr) error { - file := t.FDMap().GetFile(kdefs.FD(cb.FD)) + file := t.GetFile(cb.FD) if file == nil { // File not found. return syserror.EBADF @@ -302,7 +301,7 @@ func submitCallback(t *kernel.Task, id uint64, cb *ioCallback, cbAddr usermem.Ad // Was there an eventFD? Extract it. var eventFile *fs.File if cb.Flags&_IOCB_FLAG_RESFD != 0 { - eventFile = t.FDMap().GetFile(kdefs.FD(cb.ResFD)) + eventFile = t.GetFile(cb.ResFD) if eventFile == nil { // Bad FD. return syserror.EBADF diff --git a/pkg/sentry/syscalls/linux/sys_capability.go b/pkg/sentry/syscalls/linux/sys_capability.go index 622cb8d0d..adf5ea5f2 100644 --- a/pkg/sentry/syscalls/linux/sys_capability.go +++ b/pkg/sentry/syscalls/linux/sys_capability.go @@ -15,11 +15,11 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" ) func lookupCaps(t *kernel.Task, tid kernel.ThreadID) (permitted, inheritable, effective auth.CapabilitySet, err error) { diff --git a/pkg/sentry/syscalls/linux/sys_epoll.go b/pkg/sentry/syscalls/linux/sys_epoll.go index 1467feb4e..a0dd4d4e3 100644 --- a/pkg/sentry/syscalls/linux/sys_epoll.go +++ b/pkg/sentry/syscalls/linux/sys_epoll.go @@ -17,14 +17,13 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/epoll" + "gvisor.dev/gvisor/pkg/sentry/syscalls" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // EpollCreate1 implements the epoll_create1(2) linux syscall. @@ -61,9 +60,9 @@ func EpollCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // EpollCtl implements the epoll_ctl(2) linux syscall. func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - epfd := kdefs.FD(args[0].Int()) + epfd := args[0].Int() op := args[1].Int() - fd := kdefs.FD(args[2].Int()) + fd := args[2].Int() eventAddr := args[3].Pointer() // Capture the event state if needed. @@ -132,7 +131,7 @@ func copyOutEvents(t *kernel.Task, addr usermem.Addr, e []epoll.Event) error { // EpollWait implements the epoll_wait(2) linux syscall. func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - epfd := kdefs.FD(args[0].Int()) + epfd := args[0].Int() eventsAddr := args[1].Pointer() maxEvents := int(args[2].Int()) timeout := int(args[3].Int()) diff --git a/pkg/sentry/syscalls/linux/sys_eventfd.go b/pkg/sentry/syscalls/linux/sys_eventfd.go index ca4ead488..5cfc2c3c8 100644 --- a/pkg/sentry/syscalls/linux/sys_eventfd.go +++ b/pkg/sentry/syscalls/linux/sys_eventfd.go @@ -17,10 +17,10 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/eventfd" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd" ) const ( @@ -47,10 +47,9 @@ func Eventfd2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc }) defer event.DecRef() - fd, err := t.FDMap().NewFDFrom(0, event, kernel.FDFlags{ + fd, err := t.NewFDFrom(0, event, kernel.FDFlags{ CloseOnExec: flags&EFD_CLOEXEC != 0, - }, - t.ThreadGroup().Limits()) + }) if err != nil { return 0, nil, err } diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 19f579930..eb6f5648f 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -17,31 +17,29 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/fasync" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/pipe" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/lock" + "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/fasync" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // fileOpAt performs an operation on the second last component in the path. -func fileOpAt(t *kernel.Task, dirFD kdefs.FD, path string, fn func(root *fs.Dirent, d *fs.Dirent, name string) error) error { +func fileOpAt(t *kernel.Task, dirFD int32, path string, fn func(root *fs.Dirent, d *fs.Dirent, name string, remainingTraversals uint) error) error { // Extract the last component. dir, name := fs.SplitLast(path) if dir == "/" { // Common case: we are accessing a file in the root. root := t.FSContext().RootDirectory() - err := fn(root, root, name) + err := fn(root, root, name, linux.MaxSymlinkTraversals) root.DecRef() return err } else if dir == "." && dirFD == linux.AT_FDCWD { @@ -49,19 +47,19 @@ func fileOpAt(t *kernel.Task, dirFD kdefs.FD, path string, fn func(root *fs.Dire // working directory; skip the look-up. wd := t.FSContext().WorkingDirectory() root := t.FSContext().RootDirectory() - err := fn(root, wd, name) + err := fn(root, wd, name, linux.MaxSymlinkTraversals) wd.DecRef() root.DecRef() return err } - return fileOpOn(t, dirFD, dir, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { - return fn(root, d, name) + return fileOpOn(t, dirFD, dir, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, remainingTraversals uint) error { + return fn(root, d, name, remainingTraversals) }) } // fileOpOn performs an operation on the last entry of the path. -func fileOpOn(t *kernel.Task, dirFD kdefs.FD, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent) error) error { +func fileOpOn(t *kernel.Task, dirFD int32, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent, remainingTraversals uint) error) error { var ( d *fs.Dirent // The file. wd *fs.Dirent // The working directory (if required.) @@ -79,7 +77,7 @@ func fileOpOn(t *kernel.Task, dirFD kdefs.FD, path string, resolve bool, fn func rel = wd } else { // Need to extract the given FD. - f = t.FDMap().GetFile(dirFD) + f = t.GetFile(dirFD) if f == nil { return syserror.EBADF } @@ -110,7 +108,7 @@ func fileOpOn(t *kernel.Task, dirFD kdefs.FD, path string, resolve bool, fn func return err } - err = fn(root, d) + err = fn(root, d, remainingTraversals) d.DecRef() return err } @@ -132,14 +130,14 @@ func copyInPath(t *kernel.Task, addr usermem.Addr, allowEmpty bool) (path string return path, dirPath, nil } -func openAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint) (fd uintptr, err error) { +func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uintptr, err error) { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return 0, err } resolve := flags&linux.O_NOFOLLOW == 0 - err = fileOpOn(t, dirFD, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error { + err = fileOpOn(t, dirFD, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // First check a few things about the filesystem before trying to get the file // reference. // @@ -185,8 +183,9 @@ func openAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint) (fd u defer file.DecRef() // Success. - fdFlags := kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0} - newFD, err := t.FDMap().NewFDFrom(0, file, fdFlags, t.ThreadGroup().Limits()) + newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{ + CloseOnExec: flags&linux.O_CLOEXEC != 0, + }) if err != nil { return err } @@ -202,7 +201,7 @@ func openAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint) (fd u return fd, err // Use result in frame. } -func mknodAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, mode linux.FileMode) error { +func mknodAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode) error { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err @@ -211,7 +210,7 @@ func mknodAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, mode linux.FileM return syserror.ENOENT } - return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error { + return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -286,7 +285,7 @@ func Mknod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Mknodat implements the linux syscall mknodat(2). func Mknodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() path := args[1].Pointer() mode := linux.FileMode(args[2].ModeT()) // We don't need this argument until we support creation of device nodes. @@ -295,7 +294,7 @@ func Mknodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca return 0, nil, mknodAt(t, dirFD, path, mode) } -func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mode linux.FileMode) (fd uintptr, err error) { +func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode linux.FileMode) (fd uintptr, err error) { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return 0, err @@ -304,45 +303,105 @@ func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mod return 0, syserror.ENOENT } - err = fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error { - if !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR - } - - fileFlags := linuxToFlags(flags) - // Linux always adds the O_LARGEFILE flag when running in 64-bit mode. - fileFlags.LargeFile = true + fileFlags := linuxToFlags(flags) + // Linux always adds the O_LARGEFILE flag when running in 64-bit mode. + fileFlags.LargeFile = true + + err = fileOpAt(t, dirFD, path, func(root *fs.Dirent, parent *fs.Dirent, name string, remainingTraversals uint) error { + // Resolve the name to see if it exists, and follow any + // symlinks along the way. We must do the symlink resolution + // manually because if the symlink target does not exist, we + // must create the target (and not the symlink itself). + var ( + found *fs.Dirent + err error + ) + for { + if !fs.IsDir(parent.Inode.StableAttr) { + return syserror.ENOTDIR + } - // Does this file exist already? - remainingTraversals := uint(linux.MaxSymlinkTraversals) - targetDirent, err := t.MountNamespace().FindInode(t, root, d, name, &remainingTraversals) - var newFile *fs.File - switch err { - case nil: - // The file existed. - defer targetDirent.DecRef() + // Start by looking up the dirent at 'name'. + found, err = t.MountNamespace().FindLink(t, root, parent, name, &remainingTraversals) + if err != nil { + break + } + defer found.DecRef() - // Check if we wanted to create. + // We found something (possibly a symlink). If the + // O_EXCL flag was passed, then we can immediately + // return EEXIST. if flags&linux.O_EXCL != 0 { return syserror.EEXIST } + // If we have a non-symlink, then we can proceed. + if !fs.IsSymlink(found.Inode.StableAttr) { + break + } + + // If O_NOFOLLOW was passed, then don't try to resolve + // anything. + if flags&linux.O_NOFOLLOW != 0 { + return syserror.ELOOP + } + + // Try to resolve the symlink directly to a Dirent. + var resolved *fs.Dirent + resolved, err = found.Inode.Getlink(t) + if err == nil { + // No more resolution necessary. + defer resolved.DecRef() + break + } else if err != fs.ErrResolveViaReadlink { + return err + } + + // Are we able to resolve further? + if remainingTraversals == 0 { + return syscall.ELOOP + } + + // Resolve the symlink to a path via Readlink. + path, err := found.Inode.Readlink(t) + if err != nil { + break + } + remainingTraversals-- + + // Get the new parent from the target path. + newParentPath, newName := fs.SplitLast(path) + newParent, err := t.MountNamespace().FindInode(t, root, parent, newParentPath, &remainingTraversals) + if err != nil { + break + } + defer newParent.DecRef() + + // Repeat the process with the parent and name of the + // symlink target. + parent = newParent + name = newName + } + + var newFile *fs.File + switch err { + case nil: // Like sys_open, check for a few things about the // filesystem before trying to get a reference to the // fs.File. The same constraints on Check apply. - if err := targetDirent.Inode.CheckPermission(t, flagsToPermissions(flags)); err != nil { + if err := found.Inode.CheckPermission(t, flagsToPermissions(flags)); err != nil { return err } // Should we truncate the file? if flags&linux.O_TRUNC != 0 { - if err := targetDirent.Inode.Truncate(t, targetDirent, 0); err != nil { + if err := found.Inode.Truncate(t, found, 0); err != nil { return err } } // Create a new fs.File. - newFile, err = targetDirent.Inode.GetFile(t, targetDirent, fileFlags) + newFile, err = found.Inode.GetFile(t, found, fileFlags) if err != nil { return syserror.ConvertIntr(err, kernel.ERESTARTSYS) } @@ -351,26 +410,27 @@ func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mod // File does not exist. Proceed with creation. // Do we have write permissions on the parent? - if err := d.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil { + if err := parent.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil { return err } // Attempt a creation. perms := fs.FilePermsFromMode(mode &^ linux.FileMode(t.FSContext().Umask())) - newFile, err = d.Create(t, root, name, fileFlags, perms) + newFile, err = parent.Create(t, root, name, fileFlags, perms) if err != nil { // No luck, bail. return err } defer newFile.DecRef() - targetDirent = newFile.Dirent + found = newFile.Dirent default: return err } // Success. - fdFlags := kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0} - newFD, err := t.FDMap().NewFDFrom(0, newFile, fdFlags, t.ThreadGroup().Limits()) + newFD, err := t.NewFDFrom(0, newFile, kernel.FDFlags{ + CloseOnExec: flags&linux.O_CLOEXEC != 0, + }) if err != nil { return err } @@ -379,10 +439,10 @@ func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mod fd = uintptr(newFD) // Queue the open inotify event. The creation event is - // automatically queued when the dirent is targetDirent. The - // open events are implemented at the syscall layer so we need - // to manually queue one here. - targetDirent.InotifyEvent(linux.IN_OPEN, 0) + // automatically queued when the dirent is found. The open + // events are implemented at the syscall layer so we need to + // manually queue one here. + found.InotifyEvent(linux.IN_OPEN, 0) return nil }) @@ -404,7 +464,7 @@ func Open(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Openat implements linux syscall openat(2). func Openat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() addr := args[1].Pointer() flags := uint(args[2].Uint()) if flags&linux.O_CREAT != 0 { @@ -443,7 +503,7 @@ func (ac accessContext) Value(key interface{}) interface{} { } } -func accessAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, resolve bool, mode uint) error { +func accessAt(t *kernel.Task, dirFD int32, addr usermem.Addr, resolve bool, mode uint) error { const rOK = 4 const wOK = 2 const xOK = 1 @@ -458,7 +518,7 @@ func accessAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, resolve bool, m return syserror.EINVAL } - return fileOpOn(t, dirFD, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error { + return fileOpOn(t, dirFD, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // access(2) and faccessat(2) check permissions using real // UID/GID, not effective UID/GID. // @@ -498,7 +558,7 @@ func Access(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Faccessat implements linux syscall faccessat(2). func Faccessat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() addr := args[1].Pointer() mode := args[2].ModeT() flags := args[3].Int() @@ -508,10 +568,10 @@ func Faccessat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // Ioctl implements linux syscall ioctl(2). func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() request := int(args[1].Int()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -520,12 +580,12 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Shared flags between file and socket. switch request { case linux.FIONCLEX: - t.FDMap().SetFlags(fd, kernel.FDFlags{ + t.FDTable().SetFlags(fd, kernel.FDFlags{ CloseOnExec: false, }) return 0, nil, nil case linux.FIOCLEX: - t.FDMap().SetFlags(fd, kernel.FDFlags{ + t.FDTable().SetFlags(fd, kernel.FDFlags{ CloseOnExec: true, }) return 0, nil, nil @@ -572,7 +632,7 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err default: - ret, err := file.FileOperations.Ioctl(t, t.MemoryManager(), args) + ret, err := file.FileOperations.Ioctl(t, file, t.MemoryManager(), args) if err != nil { return 0, nil, err } @@ -626,7 +686,7 @@ func Chroot(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, err } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // Is it a directory? if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR @@ -651,7 +711,7 @@ func Chdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // Is it a directory? if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR @@ -669,9 +729,9 @@ func Chdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Fchdir implements the linux syscall fchdir(2). func Fchdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -693,10 +753,13 @@ func Fchdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Close implements linux syscall close(2). func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() - file, ok := t.FDMap().Remove(fd) - if !ok { + // Note that Remove provides a reference on the file that we may use to + // flush. It is still active until we drop the final reference below + // (and other reference-holding operations complete). + file := t.FDTable().Remove(fd) + if file == nil { return 0, nil, syserror.EBADF } defer file.DecRef() @@ -707,30 +770,30 @@ func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Dup implements linux syscall dup(2). func Dup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } defer file.DecRef() - newfd, err := t.FDMap().NewFDFrom(0, file, kernel.FDFlags{}, t.ThreadGroup().Limits()) + newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{}) if err != nil { return 0, nil, syserror.EMFILE } - return uintptr(newfd), nil, nil + return uintptr(newFD), nil, nil } // Dup2 implements linux syscall dup2(2). func Dup2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - oldfd := kdefs.FD(args[0].Int()) - newfd := kdefs.FD(args[1].Int()) + oldfd := args[0].Int() + newfd := args[1].Int() // If oldfd is a valid file descriptor, and newfd has the same value as oldfd, // then dup2() does nothing, and returns newfd. if oldfd == newfd { - oldFile := t.FDMap().GetFile(oldfd) + oldFile := t.GetFile(oldfd) if oldFile == nil { return 0, nil, syserror.EBADF } @@ -746,21 +809,21 @@ func Dup2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Dup3 implements linux syscall dup3(2). func Dup3(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - oldfd := kdefs.FD(args[0].Int()) - newfd := kdefs.FD(args[1].Int()) + oldfd := args[0].Int() + newfd := args[1].Int() flags := args[2].Uint() if oldfd == newfd { return 0, nil, syserror.EINVAL } - oldFile := t.FDMap().GetFile(oldfd) + oldFile := t.GetFile(oldfd) if oldFile == nil { return 0, nil, syserror.EBADF } defer oldFile.DecRef() - err := t.FDMap().NewFDAt(newfd, oldFile, kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0}, t.ThreadGroup().Limits()) + err := t.NewFDAt(newfd, oldFile, kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0}) if err != nil { return 0, nil, err } @@ -803,10 +866,10 @@ func fSetOwn(t *kernel.Task, file *fs.File, who int32) { // Fcntl implements linux syscall fcntl(2). func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() cmd := args[1].Int() - file, flags := t.FDMap().GetDescriptor(fd) + file, flags := t.FDTable().Get(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -814,9 +877,10 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall switch cmd { case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC: - from := kdefs.FD(args[2].Int()) - fdFlags := kernel.FDFlags{CloseOnExec: cmd == linux.F_DUPFD_CLOEXEC} - fd, err := t.FDMap().NewFDFrom(from, file, fdFlags, t.ThreadGroup().Limits()) + from := args[2].Int() + fd, err := t.NewFDFrom(from, file, kernel.FDFlags{ + CloseOnExec: cmd == linux.F_DUPFD_CLOEXEC, + }) if err != nil { return 0, nil, err } @@ -825,7 +889,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return uintptr(flags.ToLinuxFDFlags()), nil, nil case linux.F_SETFD: flags := args[2].Uint() - t.FDMap().SetFlags(fd, kernel.FDFlags{ + t.FDTable().SetFlags(fd, kernel.FDFlags{ CloseOnExec: flags&linux.FD_CLOEXEC != 0, }) case linux.F_GETFL: @@ -886,8 +950,8 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err } - // The lock uid is that of the Task's FDMap. - lockUniqueID := lock.UniqueID(t.FDMap().ID()) + // The lock uid is that of the Task's FDTable. + lockUniqueID := lock.UniqueID(t.FDTable().ID()) // These locks don't block; execute the non-blocking operation using the inode's lock // context directly. @@ -945,17 +1009,18 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall err := tmpfs.AddSeals(file.Dirent.Inode, args[2].Uint()) return 0, nil, err case linux.F_GETPIPE_SZ: - sz, ok := file.FileOperations.(pipe.Sizer) + sz, ok := file.FileOperations.(fs.FifoSizer) if !ok { return 0, nil, syserror.EINVAL } - return uintptr(sz.PipeSize()), nil, nil + size, err := sz.FifoSize(t, file) + return uintptr(size), nil, err case linux.F_SETPIPE_SZ: - sz, ok := file.FileOperations.(pipe.Sizer) + sz, ok := file.FileOperations.(fs.FifoSizer) if !ok { return 0, nil, syserror.EINVAL } - n, err := sz.SetPipeSize(int64(args[2].Int())) + n, err := sz.SetFifoSize(int64(args[2].Int())) return uintptr(n), nil, err default: // Everything else is not yet supported. @@ -976,7 +1041,7 @@ const ( // Fadvise64 implements linux syscall fadvise64(2). // This implementation currently ignores the provided advice. func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() length := args[2].Int64() advice := args[3].Int() @@ -985,7 +1050,7 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, syserror.EINVAL } - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -1011,13 +1076,13 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, nil } -func mkdirAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, mode linux.FileMode) error { +func mkdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode) error { path, _, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err } - return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error { + return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1056,14 +1121,14 @@ func Mkdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Mkdirat implements linux syscall mkdirat(2). func Mkdirat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() addr := args[1].Pointer() mode := linux.FileMode(args[2].ModeT()) return 0, nil, mkdirAt(t, dirFD, addr, mode) } -func rmdirAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr) error { +func rmdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr) error { path, _, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err @@ -1074,7 +1139,7 @@ func rmdirAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr) error { return syserror.EBUSY } - return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error { + return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1103,7 +1168,7 @@ func Rmdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, rmdirAt(t, linux.AT_FDCWD, addr) } -func symlinkAt(t *kernel.Task, dirFD kdefs.FD, newAddr usermem.Addr, oldAddr usermem.Addr) error { +func symlinkAt(t *kernel.Task, dirFD int32, newAddr usermem.Addr, oldAddr usermem.Addr) error { newPath, dirPath, err := copyInPath(t, newAddr, false /* allowEmpty */) if err != nil { return err @@ -1122,7 +1187,7 @@ func symlinkAt(t *kernel.Task, dirFD kdefs.FD, newAddr usermem.Addr, oldAddr use return syserror.ENOENT } - return fileOpAt(t, dirFD, newPath, func(root *fs.Dirent, d *fs.Dirent, name string) error { + return fileOpAt(t, dirFD, newPath, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1146,7 +1211,7 @@ func Symlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Symlinkat implements linux syscall symlinkat(2). func Symlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { oldAddr := args[0].Pointer() - dirFD := kdefs.FD(args[1].Int()) + dirFD := args[1].Int() newAddr := args[2].Pointer() return 0, nil, symlinkAt(t, dirFD, newAddr, oldAddr) @@ -1188,7 +1253,7 @@ func mayLinkAt(t *kernel.Task, target *fs.Inode) error { // linkAt creates a hard link to the target specified by oldDirFD and oldAddr, // specified by newDirFD and newAddr. If resolve is true, then the symlinks // will be followed when evaluating the target. -func linkAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kdefs.FD, newAddr usermem.Addr, resolve, allowEmpty bool) error { +func linkAt(t *kernel.Task, oldDirFD int32, oldAddr usermem.Addr, newDirFD int32, newAddr usermem.Addr, resolve, allowEmpty bool) error { oldPath, _, err := copyInPath(t, oldAddr, allowEmpty) if err != nil { return err @@ -1202,7 +1267,7 @@ func linkAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kd } if allowEmpty && oldPath == "" { - target := t.FDMap().GetFile(oldDirFD) + target := t.GetFile(oldDirFD) if target == nil { return syserror.EBADF } @@ -1212,7 +1277,7 @@ func linkAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kd } // Resolve the target directory. - return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string) error { + return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error { if !fs.IsDir(newParent.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1227,13 +1292,13 @@ func linkAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kd // Resolve oldDirFD and oldAddr to a dirent. The "resolve" argument // only applies to this name. - return fileOpOn(t, oldDirFD, oldPath, resolve, func(root *fs.Dirent, target *fs.Dirent) error { + return fileOpOn(t, oldDirFD, oldPath, resolve, func(root *fs.Dirent, target *fs.Dirent, _ uint) error { if err := mayLinkAt(t, target.Inode); err != nil { return err } // Next resolve newDirFD and newAddr to the parent dirent and name. - return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string) error { + return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error { if !fs.IsDir(newParent.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1264,9 +1329,9 @@ func Link(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Linkat implements linux syscall linkat(2). func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - oldDirFD := kdefs.FD(args[0].Int()) + oldDirFD := args[0].Int() oldAddr := args[1].Pointer() - newDirFD := kdefs.FD(args[2].Int()) + newDirFD := args[2].Int() newAddr := args[3].Pointer() // man linkat(2): @@ -1291,7 +1356,7 @@ func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, linkAt(t, oldDirFD, oldAddr, newDirFD, newAddr, resolve, allowEmpty) } -func readlinkAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, bufAddr usermem.Addr, size uint) (copied uintptr, err error) { +func readlinkAt(t *kernel.Task, dirFD int32, addr usermem.Addr, bufAddr usermem.Addr, size uint) (copied uintptr, err error) { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return 0, err @@ -1300,7 +1365,7 @@ func readlinkAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, bufAddr userm return 0, syserror.ENOENT } - err = fileOpOn(t, dirFD, path, false /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + err = fileOpOn(t, dirFD, path, false /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // Check for Read permission. if err := d.Inode.CheckPermission(t, fs.PermMask{Read: true}); err != nil { return err @@ -1341,7 +1406,7 @@ func Readlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Readlinkat implements linux syscall readlinkat(2). func Readlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() addr := args[1].Pointer() bufAddr := args[2].Pointer() size := args[3].SizeT() @@ -1350,7 +1415,7 @@ func Readlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return n, nil, err } -func unlinkAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr) error { +func unlinkAt(t *kernel.Task, dirFD int32, addr usermem.Addr) error { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err @@ -1359,7 +1424,7 @@ func unlinkAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr) error { return syserror.ENOENT } - return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error { + return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1380,7 +1445,7 @@ func Unlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Unlinkat implements linux syscall unlinkat(2). func Unlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() addr := args[1].Pointer() flags := args[2].Uint() if flags&linux.AT_REMOVEDIR != 0 { @@ -1414,7 +1479,7 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, syserror.EFBIG } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { if fs.IsDir(d.Inode.StableAttr) { return syserror.EISDIR } @@ -1441,10 +1506,10 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Ftruncate implements linux syscall ftruncate(2). func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() length := args[1].Int64() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -1559,7 +1624,7 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error { return nil } -func chownAt(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, resolve, allowEmpty bool, uid auth.UID, gid auth.GID) error { +func chownAt(t *kernel.Task, fd int32, addr usermem.Addr, resolve, allowEmpty bool, uid auth.UID, gid auth.GID) error { path, _, err := copyInPath(t, addr, allowEmpty) if err != nil { return err @@ -1567,7 +1632,7 @@ func chownAt(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, resolve, allowEmpty if path == "" { // Annoying. What's wrong with fchown? - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return syserror.EBADF } @@ -1576,7 +1641,7 @@ func chownAt(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, resolve, allowEmpty return chown(t, file.Dirent, uid, gid) } - return fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error { + return fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return chown(t, d, uid, gid) }) } @@ -1601,11 +1666,11 @@ func Lchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Fchown implements linux syscall fchown(2). func Fchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() uid := auth.UID(args[1].Uint()) gid := auth.GID(args[2].Uint()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -1616,7 +1681,7 @@ func Fchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Fchownat implements Linux syscall fchownat(2). func Fchownat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() addr := args[1].Pointer() uid := auth.UID(args[2].Uint()) gid := auth.GID(args[3].Uint()) @@ -1646,13 +1711,13 @@ func chmod(t *kernel.Task, d *fs.Dirent, mode linux.FileMode) error { return nil } -func chmodAt(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, mode linux.FileMode) error { +func chmodAt(t *kernel.Task, fd int32, addr usermem.Addr, mode linux.FileMode) error { path, _, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err } - return fileOpOn(t, fd, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return fileOpOn(t, fd, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return chmod(t, d, mode) }) } @@ -1667,10 +1732,10 @@ func Chmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Fchmod implements linux syscall fchmod(2). func Fchmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() mode := linux.FileMode(args[1].ModeT()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -1681,7 +1746,7 @@ func Fchmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Fchmodat implements linux syscall fchmodat(2). func Fchmodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() mode := linux.FileMode(args[2].ModeT()) @@ -1697,8 +1762,8 @@ func defaultSetToSystemTimeSpec() fs.TimeSpec { } } -func utimes(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, ts fs.TimeSpec, resolve bool) error { - setTimestamp := func(root *fs.Dirent, d *fs.Dirent) error { +func utimes(t *kernel.Task, dirFD int32, addr usermem.Addr, ts fs.TimeSpec, resolve bool) error { + setTimestamp := func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // Does the task own the file? if !d.Inode.CheckOwnership(t) { // Trying to set a specific time? Must be owner. @@ -1730,7 +1795,7 @@ func utimes(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, ts fs.TimeSpec, r // Linux returns EINVAL in this case. See utimes.c. return syserror.EINVAL } - f := t.FDMap().GetFile(dirFD) + f := t.GetFile(dirFD) if f == nil { return syserror.EBADF } @@ -1739,7 +1804,7 @@ func utimes(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, ts fs.TimeSpec, r root := t.FSContext().RootDirectory() defer root.DecRef() - return setTimestamp(root, f.Dirent) + return setTimestamp(root, f.Dirent, linux.MaxSymlinkTraversals) } path, _, err := copyInPath(t, addr, false /* allowEmpty */) @@ -1798,7 +1863,7 @@ func timespecIsValid(ts linux.Timespec) bool { // Utimensat implements linux syscall utimensat(2). func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() pathnameAddr := args[1].Pointer() timesAddr := args[2].Pointer() flags := args[3].Int() @@ -1833,7 +1898,7 @@ func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // Futimesat implements linux syscall futimesat(2). func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() pathnameAddr := args[1].Pointer() timesAddr := args[2].Pointer() @@ -1857,7 +1922,7 @@ func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, utimes(t, dirFD, pathnameAddr, ts, true) } -func renameAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kdefs.FD, newAddr usermem.Addr) error { +func renameAt(t *kernel.Task, oldDirFD int32, oldAddr usermem.Addr, newDirFD int32, newAddr usermem.Addr) error { newPath, _, err := copyInPath(t, newAddr, false /* allowEmpty */) if err != nil { return err @@ -1867,7 +1932,7 @@ func renameAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD return err } - return fileOpAt(t, oldDirFD, oldPath, func(root *fs.Dirent, oldParent *fs.Dirent, oldName string) error { + return fileOpAt(t, oldDirFD, oldPath, func(root *fs.Dirent, oldParent *fs.Dirent, oldName string, _ uint) error { if !fs.IsDir(oldParent.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1879,7 +1944,7 @@ func renameAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD return syserror.EBUSY } - return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string) error { + return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error { if !fs.IsDir(newParent.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1905,21 +1970,21 @@ func Rename(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Renameat implements linux syscall renameat(2). func Renameat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - oldDirFD := kdefs.FD(args[0].Int()) + oldDirFD := args[0].Int() oldPathAddr := args[1].Pointer() - newDirFD := kdefs.FD(args[2].Int()) + newDirFD := args[2].Int() newPathAddr := args[3].Pointer() return 0, nil, renameAt(t, oldDirFD, oldPathAddr, newDirFD, newPathAddr) } // Fallocate implements linux system call fallocate(2). func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() mode := args[1].Int64() offset := args[2].Int64() length := args[3].Int64() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -1968,10 +2033,10 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // Flock implements linux syscall flock(2). func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() operation := args[1].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { // flock(2): EBADF fd is not an open file descriptor. return 0, nil, syserror.EBADF @@ -2067,7 +2132,7 @@ func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S name = memfdPrefix + name inode := tmpfs.NewMemfdInode(t, allowSeals) - dirent := fs.NewDirent(inode, name) + dirent := fs.NewDirent(t, inode, name) // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd files are set up with // FMODE_READ | FMODE_WRITE. file, err := inode.GetFile(t, dirent, fs.FileFlags{Read: true, Write: true}) @@ -2078,8 +2143,9 @@ func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S defer dirent.DecRef() defer file.DecRef() - fdFlags := kernel.FDFlags{CloseOnExec: cloExec} - newFD, err := t.FDMap().NewFDFrom(0, file, fdFlags, t.ThreadGroup().Limits()) + newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{ + CloseOnExec: cloExec, + }) if err != nil { return 0, nil, err } diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go index 7cef4b50c..b9bd25464 100644 --- a/pkg/sentry/syscalls/linux/sys_futex.go +++ b/pkg/sentry/syscalls/linux/sys_futex.go @@ -17,12 +17,12 @@ package linux import ( "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // futexWaitRestartBlock encapsulates the state required to restart futex(2) diff --git a/pkg/sentry/syscalls/linux/sys_getdents.go b/pkg/sentry/syscalls/linux/sys_getdents.go index 1b597d5bc..7a2beda23 100644 --- a/pkg/sentry/syscalls/linux/sys_getdents.go +++ b/pkg/sentry/syscalls/linux/sys_getdents.go @@ -19,18 +19,17 @@ import ( "io" "syscall" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // Getdents implements linux syscall getdents(2) for 64bit systems. func Getdents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() size := int(args[2].Uint()) @@ -46,7 +45,7 @@ func Getdents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Getdents64 implements linux syscall getdents64(2). func Getdents64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() size := int(args[2].Uint()) @@ -62,8 +61,8 @@ func Getdents64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // getdents implements the core of getdents(2)/getdents64(2). // f is the syscall implementation dirent serialization function. -func getdents(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, size int, f func(*dirent, io.Writer) (int, error)) (uintptr, error) { - dir := t.FDMap().GetFile(fd) +func getdents(t *kernel.Task, fd int32, addr usermem.Addr, size int, f func(*dirent, io.Writer) (int, error)) (uintptr, error) { + dir := t.GetFile(fd) if dir == nil { return 0, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_identity.go b/pkg/sentry/syscalls/linux/sys_identity.go index 27e765a2d..715ac45e6 100644 --- a/pkg/sentry/syscalls/linux/sys_identity.go +++ b/pkg/sentry/syscalls/linux/sys_identity.go @@ -15,10 +15,10 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" ) const ( diff --git a/pkg/sentry/syscalls/linux/sys_inotify.go b/pkg/sentry/syscalls/linux/sys_inotify.go index 20269a769..a4f36e01f 100644 --- a/pkg/sentry/syscalls/linux/sys_inotify.go +++ b/pkg/sentry/syscalls/linux/sys_inotify.go @@ -17,12 +17,11 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/anon" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) const allFlags = int(linux.IN_NONBLOCK | linux.IN_CLOEXEC) @@ -35,7 +34,7 @@ func InotifyInit1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. return 0, nil, syscall.EINVAL } - dirent := fs.NewDirent(anon.NewInode(t), "inotify") + dirent := fs.NewDirent(t, anon.NewInode(t), "inotify") fileFlags := fs.FileFlags{ Read: true, Write: true, @@ -44,9 +43,9 @@ func InotifyInit1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. n := fs.NewFile(t, dirent, fileFlags, fs.NewInotify(t)) defer n.DecRef() - fd, err := t.FDMap().NewFDFrom(0, n, kernel.FDFlags{ + fd, err := t.NewFDFrom(0, n, kernel.FDFlags{ CloseOnExec: flags&linux.IN_CLOEXEC != 0, - }, t.ThreadGroup().Limits()) + }) if err != nil { return 0, nil, err @@ -63,8 +62,8 @@ func InotifyInit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // fdToInotify resolves an fd to an inotify object. If successful, the file will // have an extra ref and the caller is responsible for releasing the ref. -func fdToInotify(t *kernel.Task, fd kdefs.FD) (*fs.Inotify, *fs.File, error) { - file := t.FDMap().GetFile(fd) +func fdToInotify(t *kernel.Task, fd int32) (*fs.Inotify, *fs.File, error) { + file := t.GetFile(fd) if file == nil { // Invalid fd. return nil, nil, syscall.EBADF @@ -82,7 +81,7 @@ func fdToInotify(t *kernel.Task, fd kdefs.FD) (*fs.Inotify, *fs.File, error) { // InotifyAddWatch implements the inotify_add_watch() syscall. func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() mask := args[2].Uint() @@ -107,14 +106,14 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern return 0, nil, err } - err = fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, dirent *fs.Dirent) error { + err = fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, dirent *fs.Dirent, _ uint) error { // "IN_ONLYDIR: Only watch pathname if it is a directory." -- inotify(7) if onlyDir := mask&linux.IN_ONLYDIR != 0; onlyDir && !fs.IsDir(dirent.Inode.StableAttr) { return syscall.ENOTDIR } // Copy out to the return frame. - fd = kdefs.FD(ino.AddWatch(dirent, mask)) + fd = ino.AddWatch(dirent, mask) return nil }) @@ -123,7 +122,7 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern // InotifyRmWatch implements the inotify_rm_watch() syscall. func InotifyRmWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() wd := args[1].Int() ino, file, err := fdToInotify(t, fd) diff --git a/pkg/sentry/syscalls/linux/sys_lseek.go b/pkg/sentry/syscalls/linux/sys_lseek.go index 8aadc6d8c..297e920c4 100644 --- a/pkg/sentry/syscalls/linux/sys_lseek.go +++ b/pkg/sentry/syscalls/linux/sys_lseek.go @@ -15,20 +15,19 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) // Lseek implements linux syscall lseek(2). func Lseek(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() offset := args[1].Int64() whence := args[2].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_mempolicy.go b/pkg/sentry/syscalls/linux/sys_mempolicy.go index 652b2c206..f5a519d8a 100644 --- a/pkg/sentry/syscalls/linux/sys_mempolicy.go +++ b/pkg/sentry/syscalls/linux/sys_mempolicy.go @@ -17,11 +17,11 @@ package linux import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // We unconditionally report a single NUMA node. This also means that our diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go index 9926f0ac5..58a05b5bb 100644 --- a/pkg/sentry/syscalls/linux/sys_mmap.go +++ b/pkg/sentry/syscalls/linux/sys_mmap.go @@ -17,14 +17,13 @@ package linux import ( "bytes" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // Brk implements linux syscall brk(2). @@ -40,7 +39,7 @@ func Brk(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { prot := args[2].Int() flags := args[3].Int() - fd := kdefs.FD(args[4].Int()) + fd := args[4].Int() fixed := flags&linux.MAP_FIXED != 0 private := flags&linux.MAP_PRIVATE != 0 shared := flags&linux.MAP_SHARED != 0 @@ -80,7 +79,7 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if !anon { // Convert the passed FD to a file reference. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -180,6 +179,10 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca switch adv { case linux.MADV_DONTNEED: return 0, nil, t.MemoryManager().Decommit(addr, length) + case linux.MADV_DOFORK: + return 0, nil, t.MemoryManager().SetDontFork(addr, length, false) + case linux.MADV_DONTFORK: + return 0, nil, t.MemoryManager().SetDontFork(addr, length, true) case linux.MADV_HUGEPAGE, linux.MADV_NOHUGEPAGE: fallthrough case linux.MADV_MERGEABLE, linux.MADV_UNMERGEABLE: @@ -191,7 +194,7 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca case linux.MADV_NORMAL, linux.MADV_RANDOM, linux.MADV_SEQUENTIAL, linux.MADV_WILLNEED: // Do nothing, we totally ignore the suggestions above. return 0, nil, nil - case linux.MADV_REMOVE, linux.MADV_DOFORK, linux.MADV_DONTFORK: + case linux.MADV_REMOVE: // These "suggestions" have application-visible side effects, so we // have to indicate that we don't support them. return 0, nil, syserror.ENOSYS diff --git a/pkg/sentry/syscalls/linux/sys_mount.go b/pkg/sentry/syscalls/linux/sys_mount.go index cf613bad0..9080a10c3 100644 --- a/pkg/sentry/syscalls/linux/sys_mount.go +++ b/pkg/sentry/syscalls/linux/sys_mount.go @@ -15,12 +15,12 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // Mount implements Linux syscall mount(2). @@ -109,7 +109,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, syserror.EINVAL } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, targetPath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, targetPath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return t.MountNamespace().Mount(t, d, rootInode) }) } @@ -140,7 +140,7 @@ func Umount2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca resolve := flags&linux.UMOUNT_NOFOLLOW != linux.UMOUNT_NOFOLLOW detachOnly := flags&linux.MNT_DETACH == linux.MNT_DETACH - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return t.MountNamespace().Unmount(t, d, detachOnly) }) } diff --git a/pkg/sentry/syscalls/linux/sys_pipe.go b/pkg/sentry/syscalls/linux/sys_pipe.go index 036845c13..576022dd5 100644 --- a/pkg/sentry/syscalls/linux/sys_pipe.go +++ b/pkg/sentry/syscalls/linux/sys_pipe.go @@ -17,12 +17,12 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/pipe" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // pipe2 implements the actual system call with flags. @@ -38,24 +38,17 @@ func pipe2(t *kernel.Task, addr usermem.Addr, flags uint) (uintptr, error) { w.SetFlags(linuxToFlags(flags).Settable()) defer w.DecRef() - rfd, err := t.FDMap().NewFDFrom(0, r, kernel.FDFlags{ - CloseOnExec: flags&linux.O_CLOEXEC != 0}, - t.ThreadGroup().Limits()) + fds, err := t.NewFDs(0, []*fs.File{r, w}, kernel.FDFlags{ + CloseOnExec: flags&linux.O_CLOEXEC != 0, + }) if err != nil { return 0, err } - wfd, err := t.FDMap().NewFDFrom(0, w, kernel.FDFlags{ - CloseOnExec: flags&linux.O_CLOEXEC != 0}, - t.ThreadGroup().Limits()) - if err != nil { - t.FDMap().Remove(rfd) - return 0, err - } - - if _, err := t.CopyOut(addr, []kdefs.FD{rfd, wfd}); err != nil { - t.FDMap().Remove(rfd) - t.FDMap().Remove(wfd) + if _, err := t.CopyOut(addr, fds); err != nil { + // The files are not closed in this case, the exact semantics + // of this error case are not well defined, but they could have + // already been observed by user space. return 0, syscall.EFAULT } return 0, nil diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go index e32099dd4..7a13beac2 100644 --- a/pkg/sentry/syscalls/linux/sys_poll.go +++ b/pkg/sentry/syscalls/linux/sys_poll.go @@ -17,16 +17,15 @@ package linux import ( "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // fileCap is the maximum allowable files for poll & select. @@ -64,7 +63,7 @@ func initReadiness(t *kernel.Task, pfd *linux.PollFD, state *pollState, ch chan return } - file := t.FDMap().GetFile(kdefs.FD(pfd.FD)) + file := t.GetFile(pfd.FD) if file == nil { pfd.REvents = linux.POLLNVAL return @@ -265,7 +264,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add // immediately to ensure we don't leak. Note, another thread // might be about to close fd. This is racy, but that's // OK. Linux is racy in the same way. - file := t.FDMap().GetFile(kdefs.FD(fd)) + file := t.GetFile(fd) if file == nil { return 0, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go index 1b7e5616b..5329023e6 100644 --- a/pkg/sentry/syscalls/linux/sys_prctl.go +++ b/pkg/sentry/syscalls/linux/sys_prctl.go @@ -18,13 +18,12 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/mm" ) // Prctl implements linux syscall prctl(2). @@ -122,9 +121,9 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall switch args[1].Int() { case linux.PR_SET_MM_EXE_FILE: - fd := kdefs.FD(args[2].Int()) + fd := args[2].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_random.go b/pkg/sentry/syscalls/linux/sys_random.go index fc3959a7e..bc4c588bf 100644 --- a/pkg/sentry/syscalls/linux/sys_random.go +++ b/pkg/sentry/syscalls/linux/sys_random.go @@ -18,12 +18,12 @@ import ( "io" "math" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) const ( diff --git a/pkg/sentry/syscalls/linux/sys_read.go b/pkg/sentry/syscalls/linux/sys_read.go index 48b0fd49d..b2474e60d 100644 --- a/pkg/sentry/syscalls/linux/sys_read.go +++ b/pkg/sentry/syscalls/linux/sys_read.go @@ -17,16 +17,15 @@ package linux import ( "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -39,11 +38,11 @@ const ( // they can do large reads all at once. Bug for bug. Same for other read // calls below. func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() size := args[2].SizeT() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -75,12 +74,12 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Pread64 implements linux syscall pread64(2). func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() size := args[2].SizeT() offset := args[3].Int64() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -122,11 +121,11 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Readv implements linux syscall readv(2). func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() iovcnt := int(args[2].Int()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -152,12 +151,12 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Preadv implements linux syscall preadv(2). func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() iovcnt := int(args[2].Int()) offset := args[3].Int64() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -201,13 +200,13 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // splits the offset argument into a high/low value for compatibility with // 32-bit architectures. The flags argument is the 5th argument. - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() iovcnt := int(args[2].Int()) offset := args[3].Int64() flags := int(args[5].Int()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go index 8b0379779..51e3f836b 100644 --- a/pkg/sentry/syscalls/linux/sys_rlimit.go +++ b/pkg/sentry/syscalls/linux/sys_rlimit.go @@ -15,12 +15,12 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // rlimit describes an implementation of 'struct rlimit', which may vary from diff --git a/pkg/sentry/syscalls/linux/sys_rusage.go b/pkg/sentry/syscalls/linux/sys_rusage.go index 003d718da..1674c7445 100644 --- a/pkg/sentry/syscalls/linux/sys_rusage.go +++ b/pkg/sentry/syscalls/linux/sys_rusage.go @@ -15,12 +15,12 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/syserror" ) func getrusage(t *kernel.Task, which int32) linux.Rusage { diff --git a/pkg/sentry/syscalls/linux/sys_sched.go b/pkg/sentry/syscalls/linux/sys_sched.go index 8aea03abe..434bbb322 100644 --- a/pkg/sentry/syscalls/linux/sys_sched.go +++ b/pkg/sentry/syscalls/linux/sys_sched.go @@ -17,9 +17,9 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) const ( diff --git a/pkg/sentry/syscalls/linux/sys_seccomp.go b/pkg/sentry/syscalls/linux/sys_seccomp.go index b4262162a..4885b5e40 100644 --- a/pkg/sentry/syscalls/linux/sys_seccomp.go +++ b/pkg/sentry/syscalls/linux/sys_seccomp.go @@ -17,11 +17,11 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/bpf" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // userSockFprog is equivalent to Linux's struct sock_fprog on amd64. diff --git a/pkg/sentry/syscalls/linux/sys_sem.go b/pkg/sentry/syscalls/linux/sys_sem.go index 5bd61ab87..cde3b54e7 100644 --- a/pkg/sentry/syscalls/linux/sys_sem.go +++ b/pkg/sentry/syscalls/linux/sys_sem.go @@ -17,13 +17,13 @@ package linux import ( "math" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) const opsMax = 500 // SEMOPM diff --git a/pkg/sentry/syscalls/linux/sys_shm.go b/pkg/sentry/syscalls/linux/sys_shm.go index d0eceac7c..d57ffb3a1 100644 --- a/pkg/sentry/syscalls/linux/sys_shm.go +++ b/pkg/sentry/syscalls/linux/sys_shm.go @@ -15,11 +15,11 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/shm" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/shm" + "gvisor.dev/gvisor/pkg/syserror" ) // Shmget implements shmget(2). diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go index 7fbeb4fcd..0104a94c0 100644 --- a/pkg/sentry/syscalls/linux/sys_signal.go +++ b/pkg/sentry/syscalls/linux/sys_signal.go @@ -18,10 +18,10 @@ import ( "math" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) // "For a process to have permission to send a signal it must diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index 31295a6a9..d1165a57a 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -18,18 +18,18 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/socket/control" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" ) // minListenBacklog is the minimum reasonable backlog for listening sockets. @@ -61,6 +61,8 @@ const controlLenOffset = 40 // to the Flags field. const flagsOffset = 48 +const sizeOfInt32 = 4 + // messageHeader64Len is the length of a MessageHeader64 struct. var messageHeader64Len = uint64(binary.Size(MessageHeader64{})) @@ -197,9 +199,9 @@ func Socket(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal }) defer s.DecRef() - fd, err := t.FDMap().NewFDFrom(0, s, kernel.FDFlags{ + fd, err := t.NewFDFrom(0, s, kernel.FDFlags{ CloseOnExec: stype&linux.SOCK_CLOEXEC != 0, - }, t.ThreadGroup().Limits()) + }) if err != nil { return 0, nil, err } @@ -222,9 +224,6 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy fileFlags := fs.SettableFileFlags{ NonBlocking: stype&linux.SOCK_NONBLOCK != 0, } - fdFlags := kernel.FDFlags{ - CloseOnExec: stype&linux.SOCK_CLOEXEC != 0, - } // Create the socket pair. s1, s2, e := socket.Pair(t, domain, linux.SockType(stype&0xf), protocol) @@ -237,20 +236,16 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy defer s2.DecRef() // Create the FDs for the sockets. - fd1, err := t.FDMap().NewFDFrom(0, s1, fdFlags, t.ThreadGroup().Limits()) - if err != nil { - return 0, nil, err - } - fd2, err := t.FDMap().NewFDFrom(0, s2, fdFlags, t.ThreadGroup().Limits()) + fds, err := t.NewFDs(0, []*fs.File{s1, s2}, kernel.FDFlags{ + CloseOnExec: stype&linux.SOCK_CLOEXEC != 0, + }) if err != nil { - t.FDMap().Remove(fd1) return 0, nil, err } // Copy the file descriptors out. - if _, err := t.CopyOut(socks, []int32{int32(fd1), int32(fd2)}); err != nil { - t.FDMap().Remove(fd1) - t.FDMap().Remove(fd2) + if _, err := t.CopyOut(socks, fds); err != nil { + // Note that we don't close files here; see pipe(2) also. return 0, nil, err } @@ -259,12 +254,12 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Connect implements the linux syscall connect(2). func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() addrlen := args[2].Uint() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -288,14 +283,14 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // accept is the implementation of the accept syscall. It is called by accept // and accept4 syscall handlers. -func accept(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, addrLen usermem.Addr, flags int) (uintptr, error) { +func accept(t *kernel.Task, fd int32, addr usermem.Addr, addrLen usermem.Addr, flags int) (uintptr, error) { // Check that no unsupported flags are passed in. if flags & ^(linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { return 0, syscall.EINVAL } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, syscall.EBADF } @@ -328,7 +323,7 @@ func accept(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, addrLen usermem.Addr // Accept4 implements the linux syscall accept4(2). func Accept4(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() addrlen := args[2].Pointer() flags := int(args[3].Int()) @@ -339,7 +334,7 @@ func Accept4(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Accept implements the linux syscall accept(2). func Accept(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() addrlen := args[2].Pointer() @@ -349,12 +344,12 @@ func Accept(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Bind implements the linux syscall bind(2). func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() addrlen := args[2].Uint() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -377,11 +372,11 @@ func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Listen implements the linux syscall listen(2). func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() backlog := args[1].Int() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -406,11 +401,11 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Shutdown implements the linux syscall shutdown(2). func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() how := args[1].Int() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -434,14 +429,14 @@ func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // GetSockOpt implements the linux syscall getsockopt(2). func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() level := args[1].Int() name := args[2].Int() optValAddr := args[3].Pointer() optLenAddr := args[4].Pointer() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -466,7 +461,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy } // Call syscall implementation then copy both value and value len out. - v, e := s.GetSockOpt(t, int(level), int(name), int(optLen)) + v, e := getSockOpt(t, s, int(level), int(name), int(optLen)) if e != nil { return 0, nil, e.ToError() } @@ -487,18 +482,45 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return 0, nil, nil } +// getSockOpt tries to handle common socket options, or dispatches to a specific +// socket implementation. +func getSockOpt(t *kernel.Task, s socket.Socket, level, name, len int) (interface{}, *syserr.Error) { + if level == linux.SOL_SOCKET { + switch name { + case linux.SO_TYPE, linux.SO_DOMAIN, linux.SO_PROTOCOL: + if len < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + } + + switch name { + case linux.SO_TYPE: + _, skType, _ := s.Type() + return int32(skType), nil + case linux.SO_DOMAIN: + family, _, _ := s.Type() + return int32(family), nil + case linux.SO_PROTOCOL: + _, _, protocol := s.Type() + return int32(protocol), nil + } + } + + return s.GetSockOpt(t, level, name, len) +} + // SetSockOpt implements the linux syscall setsockopt(2). // // Note that unlike Linux, enabling SO_PASSCRED does not autobind the socket. func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() level := args[1].Int() name := args[2].Int() optValAddr := args[3].Pointer() optLen := args[4].Int() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -531,12 +553,12 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // GetSockName implements the linux syscall getsockname(2). func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() addrlen := args[2].Pointer() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -559,12 +581,12 @@ func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // GetPeerName implements the linux syscall getpeername(2). func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() addrlen := args[2].Pointer() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -587,7 +609,7 @@ func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // RecvMsg implements the linux syscall recvmsg(2). func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() msgPtr := args[1].Pointer() flags := args[2].Int() @@ -597,7 +619,7 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -633,7 +655,7 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // RecvMMsg implements the linux syscall recvmmsg(2). func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() msgPtr := args[1].Pointer() vlen := args[2].Uint() flags := args[3].Int() @@ -650,7 +672,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -812,7 +834,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i // recvFrom is the implementation of the recvfrom syscall. It is called by // recvfrom and recv syscall handlers. -func recvFrom(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLenPtr usermem.Addr) (uintptr, error) { +func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLenPtr usermem.Addr) (uintptr, error) { if int(bufLen) < 0 { return 0, syscall.EINVAL } @@ -823,7 +845,7 @@ func recvFrom(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, f } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, syscall.EBADF } @@ -873,7 +895,7 @@ func recvFrom(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, f // RecvFrom implements the linux syscall recvfrom(2). func RecvFrom(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() bufPtr := args[1].Pointer() bufLen := args[2].Uint64() flags := args[3].Int() @@ -886,7 +908,7 @@ func RecvFrom(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // SendMsg implements the linux syscall sendmsg(2). func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() msgPtr := args[1].Pointer() flags := args[2].Int() @@ -896,7 +918,7 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -923,7 +945,7 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // SendMMsg implements the linux syscall sendmmsg(2). func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() msgPtr := args[1].Pointer() vlen := args[2].Uint() flags := args[3].Int() @@ -934,7 +956,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -1049,14 +1071,14 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme // sendTo is the implementation of the sendto syscall. It is called by sendto // and send syscall handlers. -func sendTo(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLen uint32) (uintptr, error) { +func sendTo(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLen uint32) (uintptr, error) { bl := int(bufLen) if bl < 0 { return 0, syscall.EINVAL } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, syscall.EBADF } @@ -1105,7 +1127,7 @@ func sendTo(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, fla // SendTo implements the linux syscall sendto(2). func SendTo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() bufPtr := args[1].Pointer() bufLen := args[2].Uint64() flags := args[3].Int() diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index 37303606f..a7c98efcb 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -15,13 +15,12 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // doSplice implements a blocking splice operation. @@ -48,12 +47,12 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB if ch == nil { ch = make(chan struct{}, 1) } - if !inW && inFile.Readiness(EventMaskRead) == 0 && !inFile.Flags().NonBlocking { + if !inW && !inFile.Flags().NonBlocking { w, _ := waiter.NewChannelEntry(ch) inFile.EventRegister(&w, EventMaskRead) defer inFile.EventUnregister(&w) inW = true // Registered. - } else if !outW && outFile.Readiness(EventMaskWrite) == 0 && !outFile.Flags().NonBlocking { + } else if !outW && !outFile.Flags().NonBlocking { w, _ := waiter.NewChannelEntry(ch) outFile.EventRegister(&w, EventMaskWrite) defer outFile.EventUnregister(&w) @@ -65,6 +64,11 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB break } + if (!inW || inFile.Readiness(EventMaskRead) != 0) && (!outW || outFile.Readiness(EventMaskWrite) != 0) { + // Something became ready, try again without blocking. + continue + } + // Block until there's data. if err = t.Block(ch); err != nil { break @@ -76,8 +80,8 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB // Sendfile implements linux system call sendfile(2). func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - outFD := kdefs.FD(args[0].Int()) - inFD := kdefs.FD(args[1].Int()) + outFD := args[0].Int() + inFD := args[1].Int() offsetAddr := args[2].Pointer() count := int64(args[3].SizeT()) @@ -87,13 +91,13 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } // Get files. - outFile := t.FDMap().GetFile(outFD) + outFile := t.GetFile(outFD) if outFile == nil { return 0, nil, syserror.EBADF } defer outFile.DecRef() - inFile := t.FDMap().GetFile(inFD) + inFile := t.GetFile(inFD) if inFile == nil { return 0, nil, syserror.EBADF } @@ -158,9 +162,9 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Splice implements splice(2). func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - inFD := kdefs.FD(args[0].Int()) + inFD := args[0].Int() inOffset := args[1].Pointer() - outFD := kdefs.FD(args[2].Int()) + outFD := args[2].Int() outOffset := args[3].Pointer() count := int64(args[4].SizeT()) flags := args[5].Int() @@ -177,13 +181,13 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0 // Get files. - outFile := t.FDMap().GetFile(outFD) + outFile := t.GetFile(outFD) if outFile == nil { return 0, nil, syserror.EBADF } defer outFile.DecRef() - inFile := t.FDMap().GetFile(inFD) + inFile := t.GetFile(inFD) if inFile == nil { return 0, nil, syserror.EBADF } @@ -246,8 +250,8 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Tee imlements tee(2). func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - inFD := kdefs.FD(args[0].Int()) - outFD := kdefs.FD(args[1].Int()) + inFD := args[0].Int() + outFD := args[1].Int() count := int64(args[2].SizeT()) flags := args[3].Int() @@ -260,13 +264,13 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0 // Get files. - outFile := t.FDMap().GetFile(outFD) + outFile := t.GetFile(outFD) if outFile == nil { return 0, nil, syserror.EBADF } defer outFile.DecRef() - inFile := t.FDMap().GetFile(inFD) + inFile := t.GetFile(inFD) if inFile == nil { return 0, nil, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_stat.go b/pkg/sentry/syscalls/linux/sys_stat.go index 10fc201ef..5556bc276 100644 --- a/pkg/sentry/syscalls/linux/sys_stat.go +++ b/pkg/sentry/syscalls/linux/sys_stat.go @@ -15,14 +15,13 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // Stat implements linux syscall stat(2). @@ -35,14 +34,14 @@ func Stat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC return 0, nil, err } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return stat(t, d, dirPath, statAddr) }) } // Fstatat implements linux syscall newfstatat, i.e. fstatat(2). func Fstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() statAddr := args[2].Pointer() flags := args[3].Int() @@ -54,7 +53,7 @@ func Fstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if path == "" { // Annoying. What's wrong with fstat? - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -67,7 +66,7 @@ func Fstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // then we must resolve the final component. resolve := dirPath || flags&linux.AT_SYMLINK_NOFOLLOW == 0 - return 0, nil, fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return stat(t, d, dirPath, statAddr) }) } @@ -86,17 +85,17 @@ func Lstat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // want to resolve the final component. resolve := dirPath - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return stat(t, d, dirPath, statAddr) }) } // Fstat implements linux syscall fstat(2). func Fstat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() statAddr := args[1].Pointer() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -132,24 +131,6 @@ func fstat(t *kernel.Task, f *fs.File, statAddr usermem.Addr) error { // t.CopyObjectOut has noticeable performance impact due to its many slice // allocations and use of reflection. func copyOutStat(t *kernel.Task, dst usermem.Addr, sattr fs.StableAttr, uattr fs.UnstableAttr) error { - var mode uint32 - switch sattr.Type { - case fs.RegularFile, fs.SpecialFile: - mode |= linux.ModeRegular - case fs.Symlink: - mode |= linux.ModeSymlink - case fs.Directory, fs.SpecialDirectory: - mode |= linux.ModeDirectory - case fs.Pipe: - mode |= linux.ModeNamedPipe - case fs.CharacterDevice: - mode |= linux.ModeCharacterDevice - case fs.BlockDevice: - mode |= linux.ModeBlockDevice - case fs.Socket: - mode |= linux.ModeSocket - } - b := t.CopyScratchBuffer(int(linux.SizeOfStat))[:0] // Dev (uint64) @@ -159,7 +140,7 @@ func copyOutStat(t *kernel.Task, dst usermem.Addr, sattr fs.StableAttr, uattr fs // Nlink (uint64) b = binary.AppendUint64(b, usermem.ByteOrder, uattr.Links) // Mode (uint32) - b = binary.AppendUint32(b, usermem.ByteOrder, mode|uint32(uattr.Perms.LinuxMode())) + b = binary.AppendUint32(b, usermem.ByteOrder, sattr.Type.LinuxType()|uint32(uattr.Perms.LinuxMode())) // UID (uint32) b = binary.AppendUint32(b, usermem.ByteOrder, uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow())) // GID (uint32) @@ -194,6 +175,98 @@ func copyOutStat(t *kernel.Task, dst usermem.Addr, sattr fs.StableAttr, uattr fs return err } +// Statx implements linux syscall statx(2). +func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + pathAddr := args[1].Pointer() + flags := args[2].Int() + mask := args[3].Uint() + statxAddr := args[4].Pointer() + + if mask&linux.STATX__RESERVED > 0 { + return 0, nil, syserror.EINVAL + } + if flags&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE { + return 0, nil, syserror.EINVAL + } + + path, dirPath, err := copyInPath(t, pathAddr, flags&linux.AT_EMPTY_PATH != 0) + if err != nil { + return 0, nil, err + } + + if path == "" { + file := t.GetFile(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + uattr, err := file.UnstableAttr(t) + if err != nil { + return 0, nil, err + } + return 0, nil, statx(t, file.Dirent.Inode.StableAttr, uattr, statxAddr) + } + + resolve := dirPath || flags&linux.AT_SYMLINK_NOFOLLOW == 0 + + return 0, nil, fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } + uattr, err := d.Inode.UnstableAttr(t) + if err != nil { + return err + } + return statx(t, d.Inode.StableAttr, uattr, statxAddr) + }) +} + +func statx(t *kernel.Task, sattr fs.StableAttr, uattr fs.UnstableAttr, statxAddr usermem.Addr) error { + // "[T]he kernel may return fields that weren't requested and may fail to + // return fields that were requested, depending on what the backing + // filesystem supports. + // [...] + // A filesystem may also fill in fields that the caller didn't ask for + // if it has values for them available and the information is available + // at no extra cost. If this happens, the corresponding bits will be + // set in stx_mask." -- statx(2) + // + // We fill in all the values we have (which currently does not include + // btime, see b/135608823), regardless of what the user asked for. The + // STATX_BASIC_STATS mask indicates that all fields are present except + // for btime. + + devMajor, devMinor := linux.DecodeDeviceID(uint32(sattr.DeviceID)) + s := linux.Statx{ + // TODO(b/135608823): Support btime, and then change this to + // STATX_ALL to indicate presence of btime. + Mask: linux.STATX_BASIC_STATS, + + // No attributes, and none supported. + Attributes: 0, + AttributesMask: 0, + + Blksize: uint32(sattr.BlockSize), + Nlink: uint32(uattr.Links), + UID: uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow()), + GID: uint32(uattr.Owner.GID.In(t.UserNamespace()).OrOverflow()), + Mode: uint16(sattr.Type.LinuxType()) | uint16(uattr.Perms.LinuxMode()), + Ino: sattr.InodeID, + Size: uint64(uattr.Size), + Blocks: uint64(uattr.Usage) / 512, + Atime: uattr.AccessTime.StatxTimestamp(), + Ctime: uattr.StatusChangeTime.StatxTimestamp(), + Mtime: uattr.ModificationTime.StatxTimestamp(), + RdevMajor: uint32(sattr.DeviceFileMajor), + RdevMinor: sattr.DeviceFileMinor, + DevMajor: uint32(devMajor), + DevMinor: devMinor, + } + _, err := t.CopyOut(statxAddr, &s) + return err +} + // Statfs implements linux syscall statfs(2). func Statfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { addr := args[0].Pointer() @@ -204,17 +277,17 @@ func Statfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, err } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return statfsImpl(t, d, statfsAddr) }) } // Fstatfs implements linux syscall fstatfs(2). func Fstatfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() statfsAddr := args[1].Pointer() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -252,8 +325,6 @@ func statfsImpl(t *kernel.Task, d *fs.Dirent, addr usermem.Addr) error { FragmentSize: d.Inode.StableAttr.BlockSize, // Leave other fields 0 like simple_statfs does. } - if _, err := t.CopyOut(addr, &statfs); err != nil { - return err - } - return nil + _, err = t.CopyOut(addr, &statfs) + return err } diff --git a/pkg/sentry/syscalls/linux/sys_sync.go b/pkg/sentry/syscalls/linux/sys_sync.go index 4352482fb..3e55235bd 100644 --- a/pkg/sentry/syscalls/linux/sys_sync.go +++ b/pkg/sentry/syscalls/linux/sys_sync.go @@ -15,12 +15,11 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) // Sync implements linux system call sync(2). @@ -32,9 +31,9 @@ func Sync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Syncfs implements linux system call syncfs(2). func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -47,9 +46,9 @@ func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Fsync implements linux syscall fsync(2). func Fsync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -63,9 +62,9 @@ func Fsync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // // At the moment, it just calls Fsync, which is a big hammer, but correct. func Fdatasync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -79,6 +78,7 @@ func Fdatasync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { var err error + fd := args[0].Int() offset := args[1].Int64() nbytes := args[2].Int64() uflags := args[3].Uint() @@ -97,8 +97,7 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel nbytes = fs.FileMaxOffset } - fd := kdefs.FD(args[0].Int()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_sysinfo.go b/pkg/sentry/syscalls/linux/sys_sysinfo.go index ecf88edc1..a65b560c8 100644 --- a/pkg/sentry/syscalls/linux/sys_sysinfo.go +++ b/pkg/sentry/syscalls/linux/sys_sysinfo.go @@ -15,10 +15,10 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usage" ) // Sysinfo implements the sysinfo syscall as described in man 2 sysinfo. diff --git a/pkg/sentry/syscalls/linux/sys_syslog.go b/pkg/sentry/syscalls/linux/sys_syslog.go index 9efc58d34..40c8bb061 100644 --- a/pkg/sentry/syscalls/linux/sys_syslog.go +++ b/pkg/sentry/syscalls/linux/sys_syslog.go @@ -15,9 +15,9 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) const ( diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index 26f7e8ead..9e037bd7b 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -17,12 +17,12 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/sched" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) const ( diff --git a/pkg/sentry/syscalls/linux/sys_time.go b/pkg/sentry/syscalls/linux/sys_time.go index b4f2609c0..fe8725191 100644 --- a/pkg/sentry/syscalls/linux/sys_time.go +++ b/pkg/sentry/syscalls/linux/sys_time.go @@ -17,12 +17,12 @@ package linux import ( "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // The most significant 29 bits hold either a pid or a file descriptor. diff --git a/pkg/sentry/syscalls/linux/sys_timer.go b/pkg/sentry/syscalls/linux/sys_timer.go index 04ea7a4e9..ca5ccb7c3 100644 --- a/pkg/sentry/syscalls/linux/sys_timer.go +++ b/pkg/sentry/syscalls/linux/sys_timer.go @@ -18,10 +18,10 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) const nsecPerSec = int64(time.Second) diff --git a/pkg/sentry/syscalls/linux/sys_timerfd.go b/pkg/sentry/syscalls/linux/sys_timerfd.go index ec0155cbb..1ce5ce4c3 100644 --- a/pkg/sentry/syscalls/linux/sys_timerfd.go +++ b/pkg/sentry/syscalls/linux/sys_timerfd.go @@ -15,14 +15,13 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/timerfd" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/timerfd" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/syserror" ) // TimerfdCreate implements Linux syscall timerfd_create(2). @@ -49,9 +48,9 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel NonBlocking: flags&linux.TFD_NONBLOCK != 0, }) - fd, err := t.FDMap().NewFDFrom(0, f, kernel.FDFlags{ + fd, err := t.NewFDFrom(0, f, kernel.FDFlags{ CloseOnExec: flags&linux.TFD_CLOEXEC != 0, - }, t.ThreadGroup().Limits()) + }) if err != nil { return 0, nil, err } @@ -61,7 +60,7 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel // TimerfdSettime implements Linux syscall timerfd_settime(2). func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() flags := args[1].Int() newValAddr := args[2].Pointer() oldValAddr := args[3].Pointer() @@ -70,7 +69,7 @@ func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne return 0, nil, syserror.EINVAL } - f := t.FDMap().GetFile(fd) + f := t.GetFile(fd) if f == nil { return 0, nil, syserror.EBADF } @@ -101,10 +100,10 @@ func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne // TimerfdGettime implements Linux syscall timerfd_gettime(2). func TimerfdGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() curValAddr := args[1].Pointer() - f := t.FDMap().GetFile(fd) + f := t.GetFile(fd) if f == nil { return 0, nil, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_tls.go b/pkg/sentry/syscalls/linux/sys_tls.go index 1e8312e00..e3d1c6201 100644 --- a/pkg/sentry/syscalls/linux/sys_tls.go +++ b/pkg/sentry/syscalls/linux/sys_tls.go @@ -19,9 +19,9 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) // ArchPrctl implements linux syscall arch_prctl(2). diff --git a/pkg/sentry/syscalls/linux/sys_utsname.go b/pkg/sentry/syscalls/linux/sys_utsname.go index fa81fe10e..271ace08e 100644 --- a/pkg/sentry/syscalls/linux/sys_utsname.go +++ b/pkg/sentry/syscalls/linux/sys_utsname.go @@ -17,10 +17,10 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) // Uname implements linux syscall uname. diff --git a/pkg/sentry/syscalls/linux/sys_write.go b/pkg/sentry/syscalls/linux/sys_write.go index 1da72d606..5278c96a6 100644 --- a/pkg/sentry/syscalls/linux/sys_write.go +++ b/pkg/sentry/syscalls/linux/sys_write.go @@ -17,16 +17,15 @@ package linux import ( "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -39,11 +38,11 @@ const ( // Write implements linux syscall write(2). func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() size := args[2].SizeT() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -75,12 +74,12 @@ func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Pwrite64 implements linux syscall pwrite64(2). func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() size := args[2].SizeT() offset := args[3].Int64() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -122,11 +121,11 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Writev implements linux syscall writev(2). func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() iovcnt := int(args[2].Int()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -152,12 +151,12 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Pwritev implements linux syscall pwritev(2). func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() iovcnt := int(args[2].Int()) offset := args[3].Int64() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -202,7 +201,7 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // splits the offset argument into a high/low value for compatibility with // 32-bit architectures. The flags argument is the 5th argument. - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() iovcnt := int(args[2].Int()) offset := args[3].Int64() @@ -212,7 +211,7 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, syserror.EACCES } - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/timespec.go b/pkg/sentry/syscalls/linux/timespec.go index fa6fcdc0b..9ba0eba7a 100644 --- a/pkg/sentry/syscalls/linux/timespec.go +++ b/pkg/sentry/syscalls/linux/timespec.go @@ -18,10 +18,10 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // copyTimespecIn copies a Timespec from the untrusted app range to the kernel. diff --git a/pkg/sentry/syscalls/syscalls.go b/pkg/sentry/syscalls/syscalls.go index 48c114232..94d52d5d5 100644 --- a/pkg/sentry/syscalls/syscalls.go +++ b/pkg/sentry/syscalls/syscalls.go @@ -28,10 +28,10 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) // Supported returns a syscall that is fully supported. @@ -40,16 +40,7 @@ func Supported(name string, fn kernel.SyscallFn) kernel.Syscall { Name: name, Fn: fn, SupportLevel: kernel.SupportFull, - Note: "Full Support", - } -} - -// Undocumented returns a syscall that is undocumented. -func Undocumented(name string, fn kernel.SyscallFn) kernel.Syscall { - return kernel.Syscall{ - Name: name, - Fn: fn, - SupportLevel: kernel.SupportUndocumented, + Note: "Fully Supported.", } } @@ -75,7 +66,7 @@ func Error(name string, err syscall.Errno, note string, urls []string) kernel.Sy return 0, nil, err }, SupportLevel: kernel.SupportUnimplemented, - Note: fmt.Sprintf("%sReturns %q", note, err.Error()), + Note: fmt.Sprintf("%sReturns %q.", note, err.Error()), URLs: urls, } } @@ -93,7 +84,7 @@ func ErrorWithEvent(name string, err syscall.Errno, note string, urls []string) return 0, nil, err }, SupportLevel: kernel.SupportUnimplemented, - Note: fmt.Sprintf("%sReturns %q", note, err.Error()), + Note: fmt.Sprintf("%sReturns %q.", note, err.Error()), URLs: urls, } } @@ -115,7 +106,7 @@ func CapError(name string, c linux.Capability, note string, urls []string) kerne return 0, nil, syserror.ENOSYS }, SupportLevel: kernel.SupportUnimplemented, - Note: fmt.Sprintf("%sReturns %q if the process does not have %s; %q otherwise", note, syserror.EPERM, c.String(), syserror.ENOSYS), + Note: fmt.Sprintf("%sReturns %q if the process does not have %s; %q otherwise.", note, syserror.EPERM, c.String(), syserror.ENOSYS), URLs: urls, } } diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD index b50579a92..d2ede0353 100644 --- a/pkg/sentry/time/BUILD +++ b/pkg/sentry/time/BUILD @@ -31,7 +31,7 @@ go_library( "tsc_amd64.s", "tsc_arm64.s", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/time", + importpath = "gvisor.dev/gvisor/pkg/sentry/time", visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/pkg/sentry/time/calibrated_clock.go b/pkg/sentry/time/calibrated_clock.go index c27e391c9..318503277 100644 --- a/pkg/sentry/time/calibrated_clock.go +++ b/pkg/sentry/time/calibrated_clock.go @@ -20,9 +20,9 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/metric" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/metric" + "gvisor.dev/gvisor/pkg/syserror" ) // fallbackMetric tracks failed updates. It is not sync, as it is not critical diff --git a/pkg/sentry/time/parameters.go b/pkg/sentry/time/parameters.go index 63cf7c4a3..65868cb26 100644 --- a/pkg/sentry/time/parameters.go +++ b/pkg/sentry/time/parameters.go @@ -18,7 +18,7 @@ import ( "fmt" "time" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) const ( diff --git a/pkg/sentry/time/sampler.go b/pkg/sentry/time/sampler.go index 2140a99b7..4ac9c4474 100644 --- a/pkg/sentry/time/sampler.go +++ b/pkg/sentry/time/sampler.go @@ -17,7 +17,7 @@ package time import ( "errors" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) const ( diff --git a/pkg/sentry/unimpl/BUILD b/pkg/sentry/unimpl/BUILD index b608867a9..b69603da3 100644 --- a/pkg/sentry/unimpl/BUILD +++ b/pkg/sentry/unimpl/BUILD @@ -12,7 +12,7 @@ proto_library( go_proto_library( name = "unimplemented_syscall_go_proto", - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto", + importpath = "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto", proto = ":unimplemented_syscall_proto", visibility = ["//visibility:public"], deps = ["//pkg/sentry/arch:registers_go_proto"], @@ -21,7 +21,7 @@ go_proto_library( go_library( name = "unimpl", srcs = ["events.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl", + importpath = "gvisor.dev/gvisor/pkg/sentry/unimpl", visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/pkg/sentry/unimpl/events.go b/pkg/sentry/unimpl/events.go index d92766e2d..79b5de9e4 100644 --- a/pkg/sentry/unimpl/events.go +++ b/pkg/sentry/unimpl/events.go @@ -17,8 +17,8 @@ package unimpl import ( - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" ) // contextID is the events package's type for context.Context.Value keys. diff --git a/pkg/sentry/uniqueid/BUILD b/pkg/sentry/uniqueid/BUILD index ccc5a28d3..86a87edd4 100644 --- a/pkg/sentry/uniqueid/BUILD +++ b/pkg/sentry/uniqueid/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "uniqueid", srcs = ["context.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid", + importpath = "gvisor.dev/gvisor/pkg/sentry/uniqueid", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/context", diff --git a/pkg/sentry/uniqueid/context.go b/pkg/sentry/uniqueid/context.go index e55b89689..4e466d66d 100644 --- a/pkg/sentry/uniqueid/context.go +++ b/pkg/sentry/uniqueid/context.go @@ -17,8 +17,8 @@ package uniqueid import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" ) // contextID is the kernel package's type for context.Context.Value keys. diff --git a/pkg/sentry/usage/BUILD b/pkg/sentry/usage/BUILD index 860733061..a34c39540 100644 --- a/pkg/sentry/usage/BUILD +++ b/pkg/sentry/usage/BUILD @@ -11,7 +11,7 @@ go_library( "memory_unsafe.go", "usage.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/usage", + importpath = "gvisor.dev/gvisor/pkg/sentry/usage", visibility = [ "//pkg/sentry:internal", ], diff --git a/pkg/sentry/usage/memory.go b/pkg/sentry/usage/memory.go index 9ed974ccb..f4326706a 100644 --- a/pkg/sentry/usage/memory.go +++ b/pkg/sentry/usage/memory.go @@ -21,8 +21,8 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/bits" - "gvisor.googlesource.com/gvisor/pkg/memutil" + "gvisor.dev/gvisor/pkg/bits" + "gvisor.dev/gvisor/pkg/memutil" ) // MemoryKind represents a type of memory used by the application. diff --git a/pkg/sentry/usermem/BUILD b/pkg/sentry/usermem/BUILD index e38b31b08..a5b4206bb 100644 --- a/pkg/sentry/usermem/BUILD +++ b/pkg/sentry/usermem/BUILD @@ -28,7 +28,7 @@ go_library( "usermem_unsafe.go", "usermem_x86.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/usermem", + importpath = "gvisor.dev/gvisor/pkg/sentry/usermem", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/atomicbitops", diff --git a/pkg/sentry/usermem/bytes_io.go b/pkg/sentry/usermem/bytes_io.go index f98d82168..8d88396ba 100644 --- a/pkg/sentry/usermem/bytes_io.go +++ b/pkg/sentry/usermem/bytes_io.go @@ -15,9 +15,9 @@ package usermem import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/syserror" ) const maxInt = int(^uint(0) >> 1) diff --git a/pkg/sentry/usermem/bytes_io_unsafe.go b/pkg/sentry/usermem/bytes_io_unsafe.go index bb49d2ff3..fca5952f4 100644 --- a/pkg/sentry/usermem/bytes_io_unsafe.go +++ b/pkg/sentry/usermem/bytes_io_unsafe.go @@ -18,8 +18,8 @@ import ( "sync/atomic" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/atomicbitops" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/sentry/context" ) // SwapUint32 implements IO.SwapUint32. diff --git a/pkg/sentry/usermem/usermem.go b/pkg/sentry/usermem/usermem.go index 9dde327a2..6eced660a 100644 --- a/pkg/sentry/usermem/usermem.go +++ b/pkg/sentry/usermem/usermem.go @@ -20,10 +20,10 @@ import ( "io" "strconv" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/syserror" ) // IO provides access to the contents of a virtual memory space. diff --git a/pkg/sentry/usermem/usermem_test.go b/pkg/sentry/usermem/usermem_test.go index 575e5039d..299f64754 100644 --- a/pkg/sentry/usermem/usermem_test.go +++ b/pkg/sentry/usermem/usermem_test.go @@ -22,9 +22,9 @@ import ( "strings" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/syserror" ) // newContext returns a context.Context that we can use in these tests (we diff --git a/pkg/sentry/watchdog/BUILD b/pkg/sentry/watchdog/BUILD index 0bbf3705c..4d8435265 100644 --- a/pkg/sentry/watchdog/BUILD +++ b/pkg/sentry/watchdog/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "watchdog", srcs = ["watchdog.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog", + importpath = "gvisor.dev/gvisor/pkg/sentry/watchdog", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go index 2fc4472dd..145102c0d 100644 --- a/pkg/sentry/watchdog/watchdog.go +++ b/pkg/sentry/watchdog/watchdog.go @@ -35,11 +35,11 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/metric" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/metric" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" ) // DefaultTimeout is a resonable timeout value for most applications. @@ -271,23 +271,23 @@ func (w *Watchdog) reportStuckWatchdog() { w.onStuckTask(true, &buf) } -func (w *Watchdog) onStuckTask(newTaskFound bool, buf *bytes.Buffer) { +func (w *Watchdog) onStuckTask(newTaskFound bool, msg *bytes.Buffer) { switch w.timeoutAction { case LogWarning: // Dump stack only if a new task is detected or if it sometime has passed since // the last time a stack dump was generated. if !newTaskFound && time.Since(w.lastStackDump) < stackDumpSameTaskPeriod { - buf.WriteString("\n...[stack dump skipped]...") - log.Warningf(buf.String()) + msg.WriteString("\n...[stack dump skipped]...") + log.Warningf(msg.String()) } else { - log.TracebackAll(buf.String()) + log.TracebackAll(msg.String()) w.lastStackDump = time.Now() } case Panic: // Panic will skip over running tasks, which is likely the culprit here. So manually // dump all stacks before panic'ing. - log.TracebackAll(buf.String()) + log.TracebackAll(msg.String()) // Attempt to flush metrics, timeout and move on in case metrics are stuck as well. metricsEmitted := make(chan struct{}, 1) @@ -300,6 +300,6 @@ func (w *Watchdog) onStuckTask(newTaskFound bool, buf *bytes.Buffer) { case <-metricsEmitted: case <-time.After(1 * time.Second): } - panic("Sentry detected stuck task(s). See stack trace and message above for more details") + panic(fmt.Sprintf("Stack for running G's are skipped while panicking.\n%s", msg.String())) } } diff --git a/pkg/sleep/BUILD b/pkg/sleep/BUILD index 2b005bf66..00665c939 100644 --- a/pkg/sleep/BUILD +++ b/pkg/sleep/BUILD @@ -10,7 +10,7 @@ go_library( "commit_noasm.go", "sleep_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sleep", + importpath = "gvisor.dev/gvisor/pkg/sleep", visibility = ["//:sandbox"], ) diff --git a/pkg/state/BUILD b/pkg/state/BUILD index 0a975e162..c0f3c658d 100644 --- a/pkg/state/BUILD +++ b/pkg/state/BUILD @@ -49,7 +49,7 @@ go_library( "state.go", "stats.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/state", + importpath = "gvisor.dev/gvisor/pkg/state", visibility = ["//:sandbox"], deps = [ ":object_go_proto", @@ -65,7 +65,7 @@ proto_library( go_proto_library( name = "object_go_proto", - importpath = "gvisor.googlesource.com/gvisor/pkg/state/object_go_proto", + importpath = "gvisor.dev/gvisor/pkg/state/object_go_proto", proto = ":object_proto", visibility = ["//:sandbox"], ) diff --git a/pkg/state/decode.go b/pkg/state/decode.go index 73a59f871..47e6b878a 100644 --- a/pkg/state/decode.go +++ b/pkg/state/decode.go @@ -24,7 +24,7 @@ import ( "sort" "github.com/golang/protobuf/proto" - pb "gvisor.googlesource.com/gvisor/pkg/state/object_go_proto" + pb "gvisor.dev/gvisor/pkg/state/object_go_proto" ) // objectState represents an object that may be in the process of being diff --git a/pkg/state/encode.go b/pkg/state/encode.go index b0714170b..5d9409a45 100644 --- a/pkg/state/encode.go +++ b/pkg/state/encode.go @@ -23,7 +23,7 @@ import ( "sort" "github.com/golang/protobuf/proto" - pb "gvisor.googlesource.com/gvisor/pkg/state/object_go_proto" + pb "gvisor.dev/gvisor/pkg/state/object_go_proto" ) // queuedObject is an object queued for encoding. @@ -84,7 +84,7 @@ type encodeState struct { // register looks up an ID, registering if necessary. // -// If the object was not previosly registered, it is enqueued to be serialized. +// If the object was not previously registered, it is enqueued to be serialized. // See the documentation for idsByObject for more information. func (es *encodeState) register(obj reflect.Value) uint64 { // It is not legal to call register for any non-pointer objects (see diff --git a/pkg/state/map.go b/pkg/state/map.go index 1fb9b47b8..7e6fefed4 100644 --- a/pkg/state/map.go +++ b/pkg/state/map.go @@ -20,7 +20,7 @@ import ( "sort" "sync" - pb "gvisor.googlesource.com/gvisor/pkg/state/object_go_proto" + pb "gvisor.dev/gvisor/pkg/state/object_go_proto" ) // entry is a single map entry. diff --git a/pkg/state/printer.go b/pkg/state/printer.go index 5174c3ba3..3ce18242f 100644 --- a/pkg/state/printer.go +++ b/pkg/state/printer.go @@ -22,7 +22,7 @@ import ( "strings" "github.com/golang/protobuf/proto" - pb "gvisor.googlesource.com/gvisor/pkg/state/object_go_proto" + pb "gvisor.dev/gvisor/pkg/state/object_go_proto" ) // format formats a single object, for pretty-printing. It also returns whether diff --git a/pkg/state/state.go b/pkg/state/state.go index cf7df803a..d408ff84a 100644 --- a/pkg/state/state.go +++ b/pkg/state/state.go @@ -55,7 +55,7 @@ import ( "reflect" "runtime" - pb "gvisor.googlesource.com/gvisor/pkg/state/object_go_proto" + pb "gvisor.dev/gvisor/pkg/state/object_go_proto" ) // ErrState is returned when an error is encountered during encode/decode. diff --git a/pkg/state/statefile/BUILD b/pkg/state/statefile/BUILD index 5967781e8..e70f4a79f 100644 --- a/pkg/state/statefile/BUILD +++ b/pkg/state/statefile/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "statefile", srcs = ["statefile.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/state/statefile", + importpath = "gvisor.dev/gvisor/pkg/state/statefile", visibility = ["//:sandbox"], deps = [ "//pkg/binary", diff --git a/pkg/state/statefile/statefile.go b/pkg/state/statefile/statefile.go index ad4e3b43e..c0f4c4954 100644 --- a/pkg/state/statefile/statefile.go +++ b/pkg/state/statefile/statefile.go @@ -55,8 +55,8 @@ import ( "strings" "time" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/compressio" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/compressio" ) // keySize is the AES-256 key length. diff --git a/pkg/state/statefile/statefile_test.go b/pkg/state/statefile/statefile_test.go index 60b769895..0b470fdec 100644 --- a/pkg/state/statefile/statefile_test.go +++ b/pkg/state/statefile/statefile_test.go @@ -24,7 +24,7 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/compressio" + "gvisor.dev/gvisor/pkg/compressio" ) func randomKey() ([]byte, error) { diff --git a/pkg/syserr/BUILD b/pkg/syserr/BUILD index 0d65115ef..5665ad4ee 100644 --- a/pkg/syserr/BUILD +++ b/pkg/syserr/BUILD @@ -9,7 +9,7 @@ go_library( "netstack.go", "syserr.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/syserr", + importpath = "gvisor.dev/gvisor/pkg/syserr", visibility = ["//visibility:public"], deps = [ "//pkg/abi/linux", diff --git a/pkg/syserr/netstack.go b/pkg/syserr/netstack.go index bd489b424..8ff922c69 100644 --- a/pkg/syserr/netstack.go +++ b/pkg/syserr/netstack.go @@ -15,8 +15,8 @@ package syserr import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/tcpip" ) // Mapping for tcpip.Error types. @@ -49,43 +49,44 @@ var ( ) var netstackErrorTranslations = map[*tcpip.Error]*Error{ - tcpip.ErrUnknownProtocol: ErrUnknownProtocol, - tcpip.ErrUnknownNICID: ErrUnknownNICID, - tcpip.ErrUnknownDevice: ErrUnknownDevice, - tcpip.ErrUnknownProtocolOption: ErrUnknownProtocolOption, - tcpip.ErrDuplicateNICID: ErrDuplicateNICID, - tcpip.ErrDuplicateAddress: ErrDuplicateAddress, - tcpip.ErrNoRoute: ErrNoRoute, - tcpip.ErrBadLinkEndpoint: ErrBadLinkEndpoint, - tcpip.ErrAlreadyBound: ErrAlreadyBound, - tcpip.ErrInvalidEndpointState: ErrInvalidEndpointState, - tcpip.ErrAlreadyConnecting: ErrAlreadyConnecting, - tcpip.ErrAlreadyConnected: ErrAlreadyConnected, - tcpip.ErrNoPortAvailable: ErrNoPortAvailable, - tcpip.ErrPortInUse: ErrPortInUse, - tcpip.ErrBadLocalAddress: ErrBadLocalAddress, - tcpip.ErrClosedForSend: ErrClosedForSend, - tcpip.ErrClosedForReceive: ErrClosedForReceive, - tcpip.ErrWouldBlock: ErrWouldBlock, - tcpip.ErrConnectionRefused: ErrConnectionRefused, - tcpip.ErrTimeout: ErrTimeout, - tcpip.ErrAborted: ErrAborted, - tcpip.ErrConnectStarted: ErrConnectStarted, - tcpip.ErrDestinationRequired: ErrDestinationRequired, - tcpip.ErrNotSupported: ErrNotSupported, - tcpip.ErrQueueSizeNotSupported: ErrQueueSizeNotSupported, - tcpip.ErrNotConnected: ErrNotConnected, - tcpip.ErrConnectionReset: ErrConnectionReset, - tcpip.ErrConnectionAborted: ErrConnectionAborted, - tcpip.ErrNoSuchFile: ErrNoSuchFile, - tcpip.ErrInvalidOptionValue: ErrInvalidOptionValue, - tcpip.ErrNoLinkAddress: ErrHostDown, - tcpip.ErrBadAddress: ErrBadAddress, - tcpip.ErrNetworkUnreachable: ErrNetworkUnreachable, - tcpip.ErrMessageTooLong: ErrMessageTooLong, - tcpip.ErrNoBufferSpace: ErrNoBufferSpace, - tcpip.ErrBroadcastDisabled: ErrBroadcastDisabled, - tcpip.ErrNotPermitted: ErrNotPermittedNet, + tcpip.ErrUnknownProtocol: ErrUnknownProtocol, + tcpip.ErrUnknownNICID: ErrUnknownNICID, + tcpip.ErrUnknownDevice: ErrUnknownDevice, + tcpip.ErrUnknownProtocolOption: ErrUnknownProtocolOption, + tcpip.ErrDuplicateNICID: ErrDuplicateNICID, + tcpip.ErrDuplicateAddress: ErrDuplicateAddress, + tcpip.ErrNoRoute: ErrNoRoute, + tcpip.ErrBadLinkEndpoint: ErrBadLinkEndpoint, + tcpip.ErrAlreadyBound: ErrAlreadyBound, + tcpip.ErrInvalidEndpointState: ErrInvalidEndpointState, + tcpip.ErrAlreadyConnecting: ErrAlreadyConnecting, + tcpip.ErrAlreadyConnected: ErrAlreadyConnected, + tcpip.ErrNoPortAvailable: ErrNoPortAvailable, + tcpip.ErrPortInUse: ErrPortInUse, + tcpip.ErrBadLocalAddress: ErrBadLocalAddress, + tcpip.ErrClosedForSend: ErrClosedForSend, + tcpip.ErrClosedForReceive: ErrClosedForReceive, + tcpip.ErrWouldBlock: ErrWouldBlock, + tcpip.ErrConnectionRefused: ErrConnectionRefused, + tcpip.ErrTimeout: ErrTimeout, + tcpip.ErrAborted: ErrAborted, + tcpip.ErrConnectStarted: ErrConnectStarted, + tcpip.ErrDestinationRequired: ErrDestinationRequired, + tcpip.ErrNotSupported: ErrNotSupported, + tcpip.ErrQueueSizeNotSupported: ErrQueueSizeNotSupported, + tcpip.ErrNotConnected: ErrNotConnected, + tcpip.ErrConnectionReset: ErrConnectionReset, + tcpip.ErrConnectionAborted: ErrConnectionAborted, + tcpip.ErrNoSuchFile: ErrNoSuchFile, + tcpip.ErrInvalidOptionValue: ErrInvalidOptionValue, + tcpip.ErrNoLinkAddress: ErrHostDown, + tcpip.ErrBadAddress: ErrBadAddress, + tcpip.ErrNetworkUnreachable: ErrNetworkUnreachable, + tcpip.ErrMessageTooLong: ErrMessageTooLong, + tcpip.ErrNoBufferSpace: ErrNoBufferSpace, + tcpip.ErrBroadcastDisabled: ErrBroadcastDisabled, + tcpip.ErrNotPermitted: ErrNotPermittedNet, + tcpip.ErrAddressFamilyNotSupported: ErrAddressFamilyNotSupported, } // TranslateNetstackError converts an error from the tcpip package to a sentry diff --git a/pkg/syserr/syserr.go b/pkg/syserr/syserr.go index 4ddbd3322..ac4b799c3 100644 --- a/pkg/syserr/syserr.go +++ b/pkg/syserr/syserr.go @@ -21,8 +21,8 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/syserror" ) // Error represents an internal error. diff --git a/pkg/syserror/BUILD b/pkg/syserror/BUILD index ac478d0ff..b149f9e02 100644 --- a/pkg/syserror/BUILD +++ b/pkg/syserror/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "syserror", srcs = ["syserror.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/syserror", + importpath = "gvisor.dev/gvisor/pkg/syserror", visibility = ["//visibility:public"], ) diff --git a/pkg/syserror/syserror_test.go b/pkg/syserror/syserror_test.go index f2a10ee7b..29719752e 100644 --- a/pkg/syserror/syserror_test.go +++ b/pkg/syserror/syserror_test.go @@ -19,7 +19,7 @@ import ( "syscall" "testing" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserror" ) var globalError error diff --git a/pkg/tcpip/BUILD b/pkg/tcpip/BUILD index 83524cc8a..047f8329a 100644 --- a/pkg/tcpip/BUILD +++ b/pkg/tcpip/BUILD @@ -8,7 +8,7 @@ go_library( "tcpip.go", "time_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip", + importpath = "gvisor.dev/gvisor/pkg/tcpip", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip/buffer", diff --git a/pkg/tcpip/adapters/gonet/BUILD b/pkg/tcpip/adapters/gonet/BUILD index ee2417238..c40924852 100644 --- a/pkg/tcpip/adapters/gonet/BUILD +++ b/pkg/tcpip/adapters/gonet/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "gonet", srcs = ["gonet.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/adapters/gonet", + importpath = "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/adapters/gonet/gonet.go b/pkg/tcpip/adapters/gonet/gonet.go index 2153222cf..308f620e5 100644 --- a/pkg/tcpip/adapters/gonet/gonet.go +++ b/pkg/tcpip/adapters/gonet/gonet.go @@ -23,12 +23,12 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/waiter" ) var ( diff --git a/pkg/tcpip/adapters/gonet/gonet_test.go b/pkg/tcpip/adapters/gonet/gonet_test.go index 2552004a9..39efe44c7 100644 --- a/pkg/tcpip/adapters/gonet/gonet_test.go +++ b/pkg/tcpip/adapters/gonet/gonet_test.go @@ -25,14 +25,14 @@ import ( "time" "golang.org/x/net/nettest" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/waiter" ) const ( diff --git a/pkg/tcpip/buffer/BUILD b/pkg/tcpip/buffer/BUILD index 648d12cdf..3301967fb 100644 --- a/pkg/tcpip/buffer/BUILD +++ b/pkg/tcpip/buffer/BUILD @@ -8,7 +8,7 @@ go_library( "prependable.go", "view.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer", + importpath = "gvisor.dev/gvisor/pkg/tcpip/buffer", visibility = ["//visibility:public"], ) diff --git a/pkg/tcpip/buffer/view.go b/pkg/tcpip/buffer/view.go index 1a9d40778..150310c11 100644 --- a/pkg/tcpip/buffer/view.go +++ b/pkg/tcpip/buffer/view.go @@ -50,7 +50,7 @@ func (v View) ToVectorisedView() VectorisedView { return NewVectorisedView(len(v), []View{v}) } -// VectorisedView is a vectorised version of View using non contigous memory. +// VectorisedView is a vectorised version of View using non contiguous memory. // It supports all the convenience methods supported by View. // // +stateify savable diff --git a/pkg/tcpip/checker/BUILD b/pkg/tcpip/checker/BUILD index f597d0b24..4cecfb989 100644 --- a/pkg/tcpip/checker/BUILD +++ b/pkg/tcpip/checker/BUILD @@ -6,7 +6,7 @@ go_library( name = "checker", testonly = 1, srcs = ["checker.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/checker", + importpath = "gvisor.dev/gvisor/pkg/tcpip/checker", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go index 6e7edf3ab..afcabd51d 100644 --- a/pkg/tcpip/checker/checker.go +++ b/pkg/tcpip/checker/checker.go @@ -21,9 +21,9 @@ import ( "reflect" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) // NetworkChecker is a function to check a property of a network packet. diff --git a/pkg/tcpip/hash/jenkins/BUILD b/pkg/tcpip/hash/jenkins/BUILD index ce2194a4d..29b30be9c 100644 --- a/pkg/tcpip/hash/jenkins/BUILD +++ b/pkg/tcpip/hash/jenkins/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "jenkins", srcs = ["jenkins.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/hash/jenkins", + importpath = "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/header/BUILD b/pkg/tcpip/header/BUILD index a5c7290ee..76ef02f13 100644 --- a/pkg/tcpip/header/BUILD +++ b/pkg/tcpip/header/BUILD @@ -18,7 +18,7 @@ go_library( "tcp.go", "udp.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/header", + importpath = "gvisor.dev/gvisor/pkg/tcpip/header", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/header/arp.go b/pkg/tcpip/header/arp.go index 55fe7292c..718a4720a 100644 --- a/pkg/tcpip/header/arp.go +++ b/pkg/tcpip/header/arp.go @@ -14,7 +14,7 @@ package header -import "gvisor.googlesource.com/gvisor/pkg/tcpip" +import "gvisor.dev/gvisor/pkg/tcpip" const ( // ARPProtocolNumber is the ARP network protocol number. diff --git a/pkg/tcpip/header/checksum.go b/pkg/tcpip/header/checksum.go index 2eaa7938a..39a4d69be 100644 --- a/pkg/tcpip/header/checksum.go +++ b/pkg/tcpip/header/checksum.go @@ -19,8 +19,8 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) func calculateChecksum(buf []byte, initial uint32) uint16 { diff --git a/pkg/tcpip/header/eth.go b/pkg/tcpip/header/eth.go index 76143f454..4c3d3311f 100644 --- a/pkg/tcpip/header/eth.go +++ b/pkg/tcpip/header/eth.go @@ -17,7 +17,7 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/header/icmpv4.go b/pkg/tcpip/header/icmpv4.go index 782e1053c..c081de61f 100644 --- a/pkg/tcpip/header/icmpv4.go +++ b/pkg/tcpip/header/icmpv4.go @@ -17,7 +17,7 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) // ICMPv4 represents an ICMPv4 header stored in a byte array. diff --git a/pkg/tcpip/header/icmpv6.go b/pkg/tcpip/header/icmpv6.go index d0b10d849..3cc57e234 100644 --- a/pkg/tcpip/header/icmpv6.go +++ b/pkg/tcpip/header/icmpv6.go @@ -17,7 +17,7 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) // ICMPv6 represents an ICMPv6 header stored in a byte array. diff --git a/pkg/tcpip/header/interfaces.go b/pkg/tcpip/header/interfaces.go index fb250ea30..861cbbb70 100644 --- a/pkg/tcpip/header/interfaces.go +++ b/pkg/tcpip/header/interfaces.go @@ -15,7 +15,7 @@ package header import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go index 96e461491..7da4c4845 100644 --- a/pkg/tcpip/header/ipv4.go +++ b/pkg/tcpip/header/ipv4.go @@ -17,7 +17,7 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/header/ipv6.go b/pkg/tcpip/header/ipv6.go index 66820a466..7163eaa36 100644 --- a/pkg/tcpip/header/ipv6.go +++ b/pkg/tcpip/header/ipv6.go @@ -18,7 +18,7 @@ import ( "encoding/binary" "strings" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/header/ipv6_fragment.go b/pkg/tcpip/header/ipv6_fragment.go index 6d896355a..018555a26 100644 --- a/pkg/tcpip/header/ipv6_fragment.go +++ b/pkg/tcpip/header/ipv6_fragment.go @@ -17,7 +17,7 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/header/ipversion_test.go b/pkg/tcpip/header/ipversion_test.go index 0c830180e..b5540bf66 100644 --- a/pkg/tcpip/header/ipversion_test.go +++ b/pkg/tcpip/header/ipversion_test.go @@ -17,7 +17,7 @@ package header_test import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/header" ) func TestIPv4(t *testing.T) { diff --git a/pkg/tcpip/header/tcp.go b/pkg/tcpip/header/tcp.go index 0cd89b992..1141443bb 100644 --- a/pkg/tcpip/header/tcp.go +++ b/pkg/tcpip/header/tcp.go @@ -18,8 +18,8 @@ import ( "encoding/binary" "github.com/google/btree" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) // These constants are the offsets of the respective fields in the TCP header. diff --git a/pkg/tcpip/header/tcp_test.go b/pkg/tcpip/header/tcp_test.go index 9a2b99489..72563837b 100644 --- a/pkg/tcpip/header/tcp_test.go +++ b/pkg/tcpip/header/tcp_test.go @@ -18,7 +18,7 @@ import ( "reflect" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/header" ) func TestEncodeSACKBlocks(t *testing.T) { diff --git a/pkg/tcpip/header/udp.go b/pkg/tcpip/header/udp.go index 2205fec18..c1f454805 100644 --- a/pkg/tcpip/header/udp.go +++ b/pkg/tcpip/header/udp.go @@ -17,7 +17,7 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/iptables/BUILD b/pkg/tcpip/iptables/BUILD new file mode 100644 index 000000000..fc9abbb55 --- /dev/null +++ b/pkg/tcpip/iptables/BUILD @@ -0,0 +1,18 @@ +package(licenses = ["notice"]) + +load("//tools/go_stateify:defs.bzl", "go_library") + +go_library( + name = "iptables", + srcs = [ + "iptables.go", + "targets.go", + "types.go", + ], + importpath = "gvisor.dev/gvisor/pkg/tcpip/iptables", + visibility = ["//visibility:public"], + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/buffer", + ], +) diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go new file mode 100644 index 000000000..f1e1d1fad --- /dev/null +++ b/pkg/tcpip/iptables/iptables.go @@ -0,0 +1,81 @@ +// Copyright 2019 The gVisor authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package iptables supports packet filtering and manipulation via the iptables +// tool. +package iptables + +const ( + tablenameNat = "nat" + tablenameMangle = "mangle" +) + +// Chain names as defined by net/ipv4/netfilter/ip_tables.c. +const ( + chainNamePrerouting = "PREROUTING" + chainNameInput = "INPUT" + chainNameForward = "FORWARD" + chainNameOutput = "OUTPUT" + chainNamePostrouting = "POSTROUTING" +) + +// DefaultTables returns a default set of tables. Each chain is set to accept +// all packets. +func DefaultTables() *IPTables { + return &IPTables{ + Tables: map[string]Table{ + tablenameNat: Table{ + BuiltinChains: map[Hook]Chain{ + Prerouting: unconditionalAcceptChain(chainNamePrerouting), + Input: unconditionalAcceptChain(chainNameInput), + Output: unconditionalAcceptChain(chainNameOutput), + Postrouting: unconditionalAcceptChain(chainNamePostrouting), + }, + DefaultTargets: map[Hook]Target{ + Prerouting: UnconditionalAcceptTarget{}, + Input: UnconditionalAcceptTarget{}, + Output: UnconditionalAcceptTarget{}, + Postrouting: UnconditionalAcceptTarget{}, + }, + UserChains: map[string]Chain{}, + }, + tablenameMangle: Table{ + BuiltinChains: map[Hook]Chain{ + Prerouting: unconditionalAcceptChain(chainNamePrerouting), + Output: unconditionalAcceptChain(chainNameOutput), + }, + DefaultTargets: map[Hook]Target{ + Prerouting: UnconditionalAcceptTarget{}, + Output: UnconditionalAcceptTarget{}, + }, + UserChains: map[string]Chain{}, + }, + }, + Priorities: map[Hook][]string{ + Prerouting: []string{tablenameMangle, tablenameNat}, + Output: []string{tablenameMangle, tablenameNat}, + }, + } +} + +func unconditionalAcceptChain(name string) Chain { + return Chain{ + Name: name, + Rules: []Rule{ + Rule{ + Target: UnconditionalAcceptTarget{}, + }, + }, + } +} diff --git a/pkg/tcpip/iptables/targets.go b/pkg/tcpip/iptables/targets.go new file mode 100644 index 000000000..19a7f77e3 --- /dev/null +++ b/pkg/tcpip/iptables/targets.go @@ -0,0 +1,35 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains various Targets. + +package iptables + +import "gvisor.dev/gvisor/pkg/tcpip/buffer" + +// UnconditionalAcceptTarget accepts all packets. +type UnconditionalAcceptTarget struct{} + +// Action implements Target.Action. +func (UnconditionalAcceptTarget) Action(packet buffer.VectorisedView) (Verdict, string) { + return Accept, "" +} + +// UnconditionalDropTarget denies all packets. +type UnconditionalDropTarget struct{} + +// Action implements Target.Action. +func (UnconditionalDropTarget) Action(packet buffer.VectorisedView) (Verdict, string) { + return Drop, "" +} diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go new file mode 100644 index 000000000..600bd9a10 --- /dev/null +++ b/pkg/tcpip/iptables/types.go @@ -0,0 +1,183 @@ +// Copyright 2019 The gVisor authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iptables + +import ( + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" +) + +// A Hook specifies one of the hooks built into the network stack. +// +// Userspace app Userspace app +// ^ | +// | v +// [Input] [Output] +// ^ | +// | v +// | routing +// | | +// | v +// ----->[Prerouting]----->routing----->[Forward]---------[Postrouting]-----> +type Hook uint + +// These values correspond to values in include/uapi/linux/netfilter.h. +const ( + // Prerouting happens before a packet is routed to applications or to + // be forwarded. + Prerouting Hook = iota + + // Input happens before a packet reaches an application. + Input + + // Forward happens once it's decided that a packet should be forwarded + // to another host. + Forward + + // Output happens after a packet is written by an application to be + // sent out. + Output + + // Postrouting happens just before a packet goes out on the wire. + Postrouting + + // The total number of hooks. + NumHooks +) + +// A Verdict is returned by a rule's target to indicate how traversal of rules +// should (or should not) continue. +type Verdict int + +const ( + // Accept indicates the packet should continue traversing netstack as + // normal. + Accept Verdict = iota + + // Drop inicates the packet should be dropped, stopping traversing + // netstack. + Drop + + // Stolen indicates the packet was co-opted by the target and should + // stop traversing netstack. + Stolen + + // Queue indicates the packet should be queued for userspace processing. + Queue + + // Repeat indicates the packet should re-traverse the chains for the + // current hook. + Repeat + + // None indicates no verdict was reached. + None + + // Jump indicates a jump to another chain. + Jump + + // Continue indicates that traversal should continue at the next rule. + Continue + + // Return indicates that traversal should return to the calling chain. + Return +) + +// IPTables holds all the tables for a netstack. +type IPTables struct { + // Tables maps table names to tables. User tables have arbitrary names. + Tables map[string]Table + + // Priorities maps each hook to a list of table names. The order of the + // list is the order in which each table should be visited for that + // hook. + Priorities map[Hook][]string +} + +// A Table defines a set of chains and hooks into the network stack. The +// currently supported tables are: +// * nat +// * mangle +type Table struct { + // BuiltinChains holds the un-deletable chains built into netstack. If + // a hook isn't present in the map, this table doesn't utilize that + // hook. + BuiltinChains map[Hook]Chain + + // DefaultTargets holds a target for each hook that will be executed if + // chain traversal doesn't yield a verdict. + DefaultTargets map[Hook]Target + + // UserChains holds user-defined chains for the keyed by name. Users + // can give their chains arbitrary names. + UserChains map[string]Chain + + // Chains maps names to chains for both builtin and user-defined chains. + // Its entries point to Chains already either in BuiltinChains or + // UserChains, and its purpose is to make looking up tables by name + // fast. + Chains map[string]*Chain +} + +// ValidHooks returns a bitmap of the builtin hooks for the given table. +func (table *Table) ValidHooks() (uint32, *tcpip.Error) { + hooks := uint32(0) + for hook, _ := range table.BuiltinChains { + hooks |= 1 << hook + } + return hooks, nil +} + +// A Chain defines a list of rules for packet processing. When a packet +// traverses a chain, it is checked against each rule until either a rule +// returns a verdict or the chain ends. +// +// By convention, builtin chains end with a rule that matches everything and +// returns either Accept or Drop. User-defined chains end with Return. These +// aren't strictly necessary here, but the iptables tool writes tables this way. +type Chain struct { + // Name is the chain name. + Name string + + // Rules is the list of rules to traverse. + Rules []Rule +} + +// A Rule is a packet processing rule. It consists of two pieces. First it +// contains zero or more matchers, each of which is a specification of which +// packets this rule applies to. If there are no matchers in the rule, it +// applies to any packet. +type Rule struct { + // Matchers is the list of matchers for this rule. + Matchers []Matcher + + // Target is the action to invoke if all the matchers match the packet. + Target Target +} + +// A Matcher is the interface for matching packets. +type Matcher interface { + // Match returns whether the packet matches and whether the packet + // should be "hotdropped", i.e. dropped immediately. This is usually + // used for suspicious packets. + Match(hook Hook, packet buffer.VectorisedView, interfaceName string) (matches bool, hotdrop bool) +} + +// A Target is the interface for taking an action for a packet. +type Target interface { + // Action takes an action on the packet and returns a verdict on how + // traversal should (or should not) continue. If the return value is + // Jump, it also returns the name of the chain to jump to. + Action(packet buffer.VectorisedView) (Verdict, string) +} diff --git a/pkg/tcpip/link/channel/BUILD b/pkg/tcpip/link/channel/BUILD index ae285e495..97a794986 100644 --- a/pkg/tcpip/link/channel/BUILD +++ b/pkg/tcpip/link/channel/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "channel", srcs = ["channel.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/channel", visibility = ["//:sandbox"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go index ee9dd8700..c40744b8e 100644 --- a/pkg/tcpip/link/channel/channel.go +++ b/pkg/tcpip/link/channel/channel.go @@ -18,9 +18,9 @@ package channel import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // PacketInfo holds all the information about an outbound packet. diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD index cef98c353..d786d8fdf 100644 --- a/pkg/tcpip/link/fdbased/BUILD +++ b/pkg/tcpip/link/fdbased/BUILD @@ -12,7 +12,7 @@ go_library( "mmap_amd64_unsafe.go", "packet_dispatchers.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/fdbased", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index b88e2e7bf..77f988b9f 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -44,11 +44,11 @@ import ( "syscall" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // linkDispatcher reads packets from the link FD and dispatches them to the diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go index ba3e09192..e305252d6 100644 --- a/pkg/tcpip/link/fdbased/endpoint_test.go +++ b/pkg/tcpip/link/fdbased/endpoint_test.go @@ -26,11 +26,11 @@ import ( "time" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) const ( diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go index 6b7f2a185..fe19c2bc2 100644 --- a/pkg/tcpip/link/fdbased/mmap.go +++ b/pkg/tcpip/link/fdbased/mmap.go @@ -16,7 +16,7 @@ package fdbased -import "gvisor.googlesource.com/gvisor/pkg/tcpip" +import "gvisor.dev/gvisor/pkg/tcpip" // Stubbed out version for non-linux/non-amd64 platforms. diff --git a/pkg/tcpip/link/fdbased/mmap_amd64.go b/pkg/tcpip/link/fdbased/mmap_amd64.go index 1c2d8c468..8bbb4f9ab 100644 --- a/pkg/tcpip/link/fdbased/mmap_amd64.go +++ b/pkg/tcpip/link/fdbased/mmap_amd64.go @@ -21,10 +21,10 @@ import ( "syscall" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" ) const ( diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go index 1ae0e3359..7ca217e5b 100644 --- a/pkg/tcpip/link/fdbased/packet_dispatchers.go +++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go @@ -19,11 +19,11 @@ package fdbased import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // BufConfig defines the shape of the vectorised view used to read packets from the NIC. diff --git a/pkg/tcpip/link/loopback/BUILD b/pkg/tcpip/link/loopback/BUILD index 710a05ede..47a54845c 100644 --- a/pkg/tcpip/link/loopback/BUILD +++ b/pkg/tcpip/link/loopback/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "loopback", srcs = ["loopback.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/loopback", visibility = ["//:sandbox"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go index 2c1148123..ab6a53988 100644 --- a/pkg/tcpip/link/loopback/loopback.go +++ b/pkg/tcpip/link/loopback/loopback.go @@ -21,9 +21,9 @@ package loopback import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) type endpoint struct { diff --git a/pkg/tcpip/link/muxed/BUILD b/pkg/tcpip/link/muxed/BUILD index 00f007888..ea12ef1ac 100644 --- a/pkg/tcpip/link/muxed/BUILD +++ b/pkg/tcpip/link/muxed/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "muxed", srcs = ["injectable.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/muxed", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/muxed", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go index be07b7c29..a577a3d52 100644 --- a/pkg/tcpip/link/muxed/injectable.go +++ b/pkg/tcpip/link/muxed/injectable.go @@ -16,9 +16,9 @@ package muxed import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // InjectableEndpoint is an injectable multi endpoint. The endpoint has diff --git a/pkg/tcpip/link/muxed/injectable_test.go b/pkg/tcpip/link/muxed/injectable_test.go index 5d40dfacc..174b9330f 100644 --- a/pkg/tcpip/link/muxed/injectable_test.go +++ b/pkg/tcpip/link/muxed/injectable_test.go @@ -21,11 +21,11 @@ import ( "syscall" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) func TestInjectableEndpointRawDispatch(t *testing.T) { diff --git a/pkg/tcpip/link/rawfile/BUILD b/pkg/tcpip/link/rawfile/BUILD index f01bb2c07..6e3a7a9d7 100644 --- a/pkg/tcpip/link/rawfile/BUILD +++ b/pkg/tcpip/link/rawfile/BUILD @@ -11,7 +11,7 @@ go_library( "errors.go", "rawfile_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/rawfile", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/link/rawfile/errors.go b/pkg/tcpip/link/rawfile/errors.go index 8bde41637..a0a873c84 100644 --- a/pkg/tcpip/link/rawfile/errors.go +++ b/pkg/tcpip/link/rawfile/errors.go @@ -20,7 +20,7 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const maxErrno = 134 @@ -30,7 +30,7 @@ var translations [maxErrno]*tcpip.Error // TranslateErrno translate an errno from the syscall package into a // *tcpip.Error. // -// Valid, but unreconigized errnos will be translated to +// Valid, but unrecognized errnos will be translated to // tcpip.ErrInvalidEndpointState (EINVAL). Panics on invalid errnos. func TranslateErrno(e syscall.Errno) *tcpip.Error { if err := translations[e]; err != nil { diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go index 86db7a487..e3fbb15c2 100644 --- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go +++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go @@ -22,7 +22,7 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) // GetMTU determines the MTU of a network interface device. @@ -110,7 +110,7 @@ type PollEvent struct { // BlockingRead reads from a file descriptor that is set up as non-blocking. If // no data is available, it will block in a poll() syscall until the file -// descirptor becomes readable. +// descriptor becomes readable. func BlockingRead(fd int, b []byte) (int, *tcpip.Error) { for { n, _, e := syscall.RawSyscall(syscall.SYS_READ, uintptr(fd), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b))) diff --git a/pkg/tcpip/link/sharedmem/BUILD b/pkg/tcpip/link/sharedmem/BUILD index dc8f1543e..f2998aa98 100644 --- a/pkg/tcpip/link/sharedmem/BUILD +++ b/pkg/tcpip/link/sharedmem/BUILD @@ -10,7 +10,7 @@ go_library( "sharedmem_unsafe.go", "tx.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem", visibility = [ "//:sandbox", ], diff --git a/pkg/tcpip/link/sharedmem/pipe/BUILD b/pkg/tcpip/link/sharedmem/pipe/BUILD index 85deafa38..94725cb11 100644 --- a/pkg/tcpip/link/sharedmem/pipe/BUILD +++ b/pkg/tcpip/link/sharedmem/pipe/BUILD @@ -10,7 +10,7 @@ go_library( "rx.go", "tx.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe", visibility = ["//:sandbox"], ) diff --git a/pkg/tcpip/link/sharedmem/queue/BUILD b/pkg/tcpip/link/sharedmem/queue/BUILD index d7dc631eb..160a8f864 100644 --- a/pkg/tcpip/link/sharedmem/queue/BUILD +++ b/pkg/tcpip/link/sharedmem/queue/BUILD @@ -8,7 +8,7 @@ go_library( "rx.go", "tx.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue", visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/pkg/tcpip/link/sharedmem/queue/queue_test.go b/pkg/tcpip/link/sharedmem/queue/queue_test.go index d3f8f4b8b..9a0aad5d7 100644 --- a/pkg/tcpip/link/sharedmem/queue/queue_test.go +++ b/pkg/tcpip/link/sharedmem/queue/queue_test.go @@ -19,7 +19,7 @@ import ( "reflect" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe" ) func TestBasicTxQueue(t *testing.T) { diff --git a/pkg/tcpip/link/sharedmem/queue/rx.go b/pkg/tcpip/link/sharedmem/queue/rx.go index d9aecf2d9..696e6c9e5 100644 --- a/pkg/tcpip/link/sharedmem/queue/rx.go +++ b/pkg/tcpip/link/sharedmem/queue/rx.go @@ -20,8 +20,8 @@ import ( "encoding/binary" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe" ) const ( diff --git a/pkg/tcpip/link/sharedmem/queue/tx.go b/pkg/tcpip/link/sharedmem/queue/tx.go index a24dccd11..beffe807b 100644 --- a/pkg/tcpip/link/sharedmem/queue/tx.go +++ b/pkg/tcpip/link/sharedmem/queue/tx.go @@ -17,8 +17,8 @@ package queue import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe" ) const ( diff --git a/pkg/tcpip/link/sharedmem/rx.go b/pkg/tcpip/link/sharedmem/rx.go index 215cb607f..eec11e4cb 100644 --- a/pkg/tcpip/link/sharedmem/rx.go +++ b/pkg/tcpip/link/sharedmem/rx.go @@ -20,8 +20,8 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue" ) // rx holds all state associated with an rx queue. diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go index e34b780f8..834ea5c40 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem.go +++ b/pkg/tcpip/link/sharedmem/sharedmem.go @@ -27,12 +27,12 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // QueueConfig holds all the file descriptors needed to describe a tx or rx diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go index 65b9d7085..98036f367 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem_test.go +++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go @@ -27,12 +27,12 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) const ( @@ -636,7 +636,7 @@ func TestSimpleReceive(t *testing.T) { syscall.Write(c.rxCfg.EventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0}) // Wait for packet to be received, then check it. - c.waitForPackets(1, time.After(time.Second), "Error waiting for packet") + c.waitForPackets(1, time.After(5*time.Second), "Timeout waiting for packet") c.mu.Lock() rcvd := []byte(c.packets[0].vv.First()) c.packets = c.packets[:0] diff --git a/pkg/tcpip/link/sharedmem/tx.go b/pkg/tcpip/link/sharedmem/tx.go index ac3577aa6..6b8d7859d 100644 --- a/pkg/tcpip/link/sharedmem/tx.go +++ b/pkg/tcpip/link/sharedmem/tx.go @@ -18,7 +18,7 @@ import ( "math" "syscall" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue" ) const ( diff --git a/pkg/tcpip/link/sniffer/BUILD b/pkg/tcpip/link/sniffer/BUILD index 7d0d1781e..1756114e6 100644 --- a/pkg/tcpip/link/sniffer/BUILD +++ b/pkg/tcpip/link/sniffer/BUILD @@ -8,7 +8,7 @@ go_library( "pcap.go", "sniffer.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sniffer", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go index 98581e50e..fc584c6a4 100644 --- a/pkg/tcpip/link/sniffer/sniffer.go +++ b/pkg/tcpip/link/sniffer/sniffer.go @@ -29,11 +29,11 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // LogPackets is a flag used to enable or disable packet logging via the log diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD index e54852d3f..92dce8fac 100644 --- a/pkg/tcpip/link/tun/BUILD +++ b/pkg/tcpip/link/tun/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "tun", srcs = ["tun_unsafe.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/tun", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/tun", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/link/waitable/BUILD b/pkg/tcpip/link/waitable/BUILD index 89a9eee23..2597d4b3e 100644 --- a/pkg/tcpip/link/waitable/BUILD +++ b/pkg/tcpip/link/waitable/BUILD @@ -7,7 +7,7 @@ go_library( srcs = [ "waitable.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/waitable", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/waitable", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go index 21690a226..3b6ac2ff7 100644 --- a/pkg/tcpip/link/waitable/waitable.go +++ b/pkg/tcpip/link/waitable/waitable.go @@ -22,10 +22,10 @@ package waitable import ( - "gvisor.googlesource.com/gvisor/pkg/gate" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/gate" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // Endpoint is a waitable link-layer endpoint. diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go index 62054fb7f..56e18ecb0 100644 --- a/pkg/tcpip/link/waitable/waitable_test.go +++ b/pkg/tcpip/link/waitable/waitable_test.go @@ -17,9 +17,9 @@ package waitable import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) type countedEndpoint struct { diff --git a/pkg/tcpip/network/arp/BUILD b/pkg/tcpip/network/arp/BUILD index 2a355e689..d95d44f56 100644 --- a/pkg/tcpip/network/arp/BUILD +++ b/pkg/tcpip/network/arp/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "arp", srcs = ["arp.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp", + importpath = "gvisor.dev/gvisor/pkg/tcpip/network/arp", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go index a3f2bce3e..ca3d6c0bf 100644 --- a/pkg/tcpip/network/arp/arp.go +++ b/pkg/tcpip/network/arp/arp.go @@ -26,10 +26,10 @@ package arp import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) const ( diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go index 1b971b1a3..66c55821b 100644 --- a/pkg/tcpip/network/arp/arp_test.go +++ b/pkg/tcpip/network/arp/arp_test.go @@ -18,15 +18,15 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/arp" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" ) const ( diff --git a/pkg/tcpip/network/fragmentation/BUILD b/pkg/tcpip/network/fragmentation/BUILD index bf0a7b99c..118bfc763 100644 --- a/pkg/tcpip/network/fragmentation/BUILD +++ b/pkg/tcpip/network/fragmentation/BUILD @@ -23,7 +23,7 @@ go_library( "reassembler.go", "reassembler_list.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/fragmentation", + importpath = "gvisor.dev/gvisor/pkg/tcpip/network/fragmentation", visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/pkg/tcpip/network/fragmentation/frag_heap.go b/pkg/tcpip/network/fragmentation/frag_heap.go index 9ad3e5a8a..0b570d25a 100644 --- a/pkg/tcpip/network/fragmentation/frag_heap.go +++ b/pkg/tcpip/network/fragmentation/frag_heap.go @@ -18,7 +18,7 @@ import ( "container/heap" "fmt" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) type fragment struct { diff --git a/pkg/tcpip/network/fragmentation/frag_heap_test.go b/pkg/tcpip/network/fragmentation/frag_heap_test.go index 3a2486ba8..9ececcb9f 100644 --- a/pkg/tcpip/network/fragmentation/frag_heap_test.go +++ b/pkg/tcpip/network/fragmentation/frag_heap_test.go @@ -19,7 +19,7 @@ import ( "reflect" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) var reassambleTestCases = []struct { diff --git a/pkg/tcpip/network/fragmentation/fragmentation.go b/pkg/tcpip/network/fragmentation/fragmentation.go index e90edb375..1628a82be 100644 --- a/pkg/tcpip/network/fragmentation/fragmentation.go +++ b/pkg/tcpip/network/fragmentation/fragmentation.go @@ -21,7 +21,7 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) // DefaultReassembleTimeout is based on the linux stack: net.ipv4.ipfrag_time. @@ -60,7 +60,7 @@ type Fragmentation struct { // lowMemoryLimit specifies the limit on which we will reach by dropping // fragments after reaching highMemoryLimit. // -// reassemblingTimeout specifes the maximum time allowed to reassemble a packet. +// reassemblingTimeout specifies the maximum time allowed to reassemble a packet. // Fragments are lazily evicted only when a new a packet with an // already existing fragmentation-id arrives after the timeout. func NewFragmentation(highMemoryLimit, lowMemoryLimit int, reassemblingTimeout time.Duration) *Fragmentation { @@ -80,7 +80,7 @@ func NewFragmentation(highMemoryLimit, lowMemoryLimit int, reassemblingTimeout t } } -// Process processes an incoming fragment beloning to an ID +// Process processes an incoming fragment belonging to an ID // and returns a complete packet when all the packets belonging to that ID have been received. func (f *Fragmentation) Process(id uint32, first, last uint16, more bool, vv buffer.VectorisedView) (buffer.VectorisedView, bool) { f.mu.Lock() diff --git a/pkg/tcpip/network/fragmentation/fragmentation_test.go b/pkg/tcpip/network/fragmentation/fragmentation_test.go index 99ded68a3..799798544 100644 --- a/pkg/tcpip/network/fragmentation/fragmentation_test.go +++ b/pkg/tcpip/network/fragmentation/fragmentation_test.go @@ -19,7 +19,7 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) // vv is a helper to build VectorisedView from different strings. diff --git a/pkg/tcpip/network/fragmentation/reassembler.go b/pkg/tcpip/network/fragmentation/reassembler.go index 04f9ab964..8037f734b 100644 --- a/pkg/tcpip/network/fragmentation/reassembler.go +++ b/pkg/tcpip/network/fragmentation/reassembler.go @@ -21,7 +21,7 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) type hole struct { diff --git a/pkg/tcpip/network/hash/BUILD b/pkg/tcpip/network/hash/BUILD index ea520c6ed..e6db5c0b0 100644 --- a/pkg/tcpip/network/hash/BUILD +++ b/pkg/tcpip/network/hash/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "hash", srcs = ["hash.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/hash", + importpath = "gvisor.dev/gvisor/pkg/tcpip/network/hash", visibility = ["//visibility:public"], deps = [ "//pkg/rand", diff --git a/pkg/tcpip/network/hash/hash.go b/pkg/tcpip/network/hash/hash.go index 0c91905dc..6a215938b 100644 --- a/pkg/tcpip/network/hash/hash.go +++ b/pkg/tcpip/network/hash/hash.go @@ -18,8 +18,8 @@ package hash import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/tcpip/header" ) var hashIV = RandN32(1)[0] diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go index 4b822e2c6..db65ee7cc 100644 --- a/pkg/tcpip/network/ip_test.go +++ b/pkg/tcpip/network/ip_test.go @@ -17,15 +17,15 @@ package ip_test import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" ) const ( diff --git a/pkg/tcpip/network/ipv4/BUILD b/pkg/tcpip/network/ipv4/BUILD index 1b4f29e0c..be84fa63d 100644 --- a/pkg/tcpip/network/ipv4/BUILD +++ b/pkg/tcpip/network/ipv4/BUILD @@ -8,7 +8,7 @@ go_library( "icmp.go", "ipv4.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4", + importpath = "gvisor.dev/gvisor/pkg/tcpip/network/ipv4", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go index 770f56c3d..bc7f1c42a 100644 --- a/pkg/tcpip/network/ipv4/icmp.go +++ b/pkg/tcpip/network/ipv4/icmp.go @@ -17,9 +17,9 @@ package ipv4 import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // handleControl handles the case when an ICMP packet contains the headers of diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index 44b1d5b9b..1e3a7425a 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -23,12 +23,12 @@ package ipv4 import ( "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/fragmentation" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/hash" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/network/fragmentation" + "gvisor.dev/gvisor/pkg/tcpip/network/hash" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) const ( diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go index 10a939287..3207a3d46 100644 --- a/pkg/tcpip/network/ipv4/ipv4_test.go +++ b/pkg/tcpip/network/ipv4/ipv4_test.go @@ -20,16 +20,16 @@ import ( "math/rand" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/waiter" ) func TestExcludeBroadcast(t *testing.T) { diff --git a/pkg/tcpip/network/ipv6/BUILD b/pkg/tcpip/network/ipv6/BUILD index 247e14e37..fae7f4507 100644 --- a/pkg/tcpip/network/ipv6/BUILD +++ b/pkg/tcpip/network/ipv6/BUILD @@ -8,7 +8,7 @@ go_library( "icmp.go", "ipv6.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6", + importpath = "gvisor.dev/gvisor/pkg/tcpip/network/ipv6", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go index 9c011e107..5e6a59e91 100644 --- a/pkg/tcpip/network/ipv6/icmp.go +++ b/pkg/tcpip/network/ipv6/icmp.go @@ -17,10 +17,10 @@ package ipv6 import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // handleControl handles the case when an ICMP packet contains the headers of diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go index d8737a616..d46d68e73 100644 --- a/pkg/tcpip/network/ipv6/icmp_test.go +++ b/pkg/tcpip/network/ipv6/icmp_test.go @@ -20,14 +20,14 @@ import ( "strings" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" + "gvisor.dev/gvisor/pkg/waiter" ) const ( diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go index bcae98e1f..27367d6c5 100644 --- a/pkg/tcpip/network/ipv6/ipv6.go +++ b/pkg/tcpip/network/ipv6/ipv6.go @@ -21,10 +21,10 @@ package ipv6 import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) const ( diff --git a/pkg/tcpip/ports/BUILD b/pkg/tcpip/ports/BUILD index 3ee80c62b..989058413 100644 --- a/pkg/tcpip/ports/BUILD +++ b/pkg/tcpip/ports/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "ports", srcs = ["ports.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/ports", + importpath = "gvisor.dev/gvisor/pkg/tcpip/ports", visibility = ["//:sandbox"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/ports/ports.go b/pkg/tcpip/ports/ports.go index a1712b590..315780c0c 100644 --- a/pkg/tcpip/ports/ports.go +++ b/pkg/tcpip/ports/ports.go @@ -20,7 +20,7 @@ import ( "math/rand" "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/ports/ports_test.go b/pkg/tcpip/ports/ports_test.go index 8466c661b..689401661 100644 --- a/pkg/tcpip/ports/ports_test.go +++ b/pkg/tcpip/ports/ports_test.go @@ -17,7 +17,7 @@ package ports import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go index 1fa899e7e..3ac381631 100644 --- a/pkg/tcpip/sample/tun_tcp_connect/main.go +++ b/pkg/tcpip/sample/tun_tcp_connect/main.go @@ -50,16 +50,16 @@ import ( "strconv" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/tun" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/link/tun" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/waiter" ) // writer reads from standard input and writes to the endpoint until standard diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go index d47085581..da425394a 100644 --- a/pkg/tcpip/sample/tun_tcp_echo/main.go +++ b/pkg/tcpip/sample/tun_tcp_echo/main.go @@ -29,16 +29,16 @@ import ( "strings" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/tun" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip/link/tun" + "gvisor.dev/gvisor/pkg/tcpip/network/arp" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/waiter" ) var tap = flag.Bool("tap", false, "use tap istead of tun") diff --git a/pkg/tcpip/seqnum/BUILD b/pkg/tcpip/seqnum/BUILD index a63665efc..76b5f4ffa 100644 --- a/pkg/tcpip/seqnum/BUILD +++ b/pkg/tcpip/seqnum/BUILD @@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library") go_library( name = "seqnum", srcs = ["seqnum.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum", + importpath = "gvisor.dev/gvisor/pkg/tcpip/seqnum", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/stack/BUILD b/pkg/tcpip/stack/BUILD index 551c3c73e..28d11c797 100644 --- a/pkg/tcpip/stack/BUILD +++ b/pkg/tcpip/stack/BUILD @@ -13,7 +13,7 @@ go_library( "stack_global_state.go", "transport_demuxer.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/stack", + importpath = "gvisor.dev/gvisor/pkg/tcpip/stack", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/stack/linkaddrcache.go b/pkg/tcpip/stack/linkaddrcache.go index b952ad20f..77bb0ccb9 100644 --- a/pkg/tcpip/stack/linkaddrcache.go +++ b/pkg/tcpip/stack/linkaddrcache.go @@ -19,8 +19,8 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" ) const linkAddrCacheSize = 512 // max cache entries diff --git a/pkg/tcpip/stack/linkaddrcache_test.go b/pkg/tcpip/stack/linkaddrcache_test.go index 91b2ffea8..924f4d240 100644 --- a/pkg/tcpip/stack/linkaddrcache_test.go +++ b/pkg/tcpip/stack/linkaddrcache_test.go @@ -20,8 +20,8 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" ) type testaddr struct { diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 50d35de88..30c0dee42 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -19,10 +19,10 @@ import ( "sync" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/ilist" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/ilist" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" ) // NIC represents a "network interface card" to which the networking stack is diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go index c70533a35..0ecaa0833 100644 --- a/pkg/tcpip/stack/registration.go +++ b/pkg/tcpip/stack/registration.go @@ -17,10 +17,10 @@ package stack import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/waiter" ) // NetworkEndpointID is the identifier of a network layer protocol endpoint. diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go index 55ed02479..36d7b6ac7 100644 --- a/pkg/tcpip/stack/route.go +++ b/pkg/tcpip/stack/route.go @@ -15,10 +15,10 @@ package stack import ( - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" ) // Route represents a route through the networking stack to a given destination. diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 9d8e8cda5..2d7f56ca9 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -28,13 +28,13 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/ports" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/ports" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -225,6 +225,45 @@ type TCPSACKInfo struct { MaxSACKED seqnum.Value } +// RcvBufAutoTuneParams holds state related to TCP receive buffer auto-tuning. +type RcvBufAutoTuneParams struct { + // MeasureTime is the time at which the current measurement + // was started. + MeasureTime time.Time + + // CopiedBytes is the number of bytes copied to user space since + // this measure began. + CopiedBytes int + + // PrevCopiedBytes is the number of bytes copied to user space in + // the previous RTT period. + PrevCopiedBytes int + + // RcvBufSize is the auto tuned receive buffer size. + RcvBufSize int + + // RTT is the smoothed RTT as measured by observing the time between + // when a byte is first acknowledged and the receipt of data that is at + // least one window beyond the sequence number that was acknowledged. + RTT time.Duration + + // RTTVar is the "round-trip time variation" as defined in section 2 + // of RFC6298. + RTTVar time.Duration + + // RTTMeasureSeqNumber is the highest acceptable sequence number at the + // time this RTT measurement period began. + RTTMeasureSeqNumber seqnum.Value + + // RTTMeasureTime is the absolute time at which the current RTT + // measurement period began. + RTTMeasureTime time.Time + + // Disabled is true if an explicit receive buffer is set for the + // endpoint. + Disabled bool +} + // TCPEndpointState is a copy of the internal state of a TCP endpoint. type TCPEndpointState struct { // ID is a copy of the TransportEndpointID for the endpoint. @@ -240,6 +279,10 @@ type TCPEndpointState struct { // buffer for the endpoint. RcvBufUsed int + // RcvBufAutoTuneParams is used to hold state variables to compute + // the auto tuned receive buffer size. + RcvAutoParams RcvBufAutoTuneParams + // RcvClosed if true, indicates the endpoint has been closed for reading. RcvClosed bool diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go index 351f63221..69884af03 100644 --- a/pkg/tcpip/stack/stack_test.go +++ b/pkg/tcpip/stack/stack_test.go @@ -24,11 +24,11 @@ import ( "strings" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) const ( diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go index 605bfadeb..cf8a6d129 100644 --- a/pkg/tcpip/stack/transport_demuxer.go +++ b/pkg/tcpip/stack/transport_demuxer.go @@ -19,10 +19,10 @@ import ( "math/rand" "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/hash/jenkins" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins" + "gvisor.dev/gvisor/pkg/tcpip/header" ) type protocolIDs struct { diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go index e8a9392b5..788ffcc8c 100644 --- a/pkg/tcpip/stack/transport_test.go +++ b/pkg/tcpip/stack/transport_test.go @@ -17,12 +17,12 @@ package stack_test import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -192,6 +192,9 @@ func (f *fakeTransportEndpoint) State() uint32 { return 0 } +func (f *fakeTransportEndpoint) ModerateRecvBuf(copied int) { +} + type fakeTransportGoodOption bool type fakeTransportBadOption bool diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 04c776205..c61f96fb0 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -38,8 +38,8 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/waiter" ) // Error represents an error in the netstack error space. Using a special type @@ -66,43 +66,44 @@ func (e *Error) IgnoreStats() bool { // Errors that can be returned by the network stack. var ( - ErrUnknownProtocol = &Error{msg: "unknown protocol"} - ErrUnknownNICID = &Error{msg: "unknown nic id"} - ErrUnknownDevice = &Error{msg: "unknown device"} - ErrUnknownProtocolOption = &Error{msg: "unknown option for protocol"} - ErrDuplicateNICID = &Error{msg: "duplicate nic id"} - ErrDuplicateAddress = &Error{msg: "duplicate address"} - ErrNoRoute = &Error{msg: "no route"} - ErrBadLinkEndpoint = &Error{msg: "bad link layer endpoint"} - ErrAlreadyBound = &Error{msg: "endpoint already bound", ignoreStats: true} - ErrInvalidEndpointState = &Error{msg: "endpoint is in invalid state"} - ErrAlreadyConnecting = &Error{msg: "endpoint is already connecting", ignoreStats: true} - ErrAlreadyConnected = &Error{msg: "endpoint is already connected", ignoreStats: true} - ErrNoPortAvailable = &Error{msg: "no ports are available"} - ErrPortInUse = &Error{msg: "port is in use"} - ErrBadLocalAddress = &Error{msg: "bad local address"} - ErrClosedForSend = &Error{msg: "endpoint is closed for send"} - ErrClosedForReceive = &Error{msg: "endpoint is closed for receive"} - ErrWouldBlock = &Error{msg: "operation would block", ignoreStats: true} - ErrConnectionRefused = &Error{msg: "connection was refused"} - ErrTimeout = &Error{msg: "operation timed out"} - ErrAborted = &Error{msg: "operation aborted"} - ErrConnectStarted = &Error{msg: "connection attempt started", ignoreStats: true} - ErrDestinationRequired = &Error{msg: "destination address is required"} - ErrNotSupported = &Error{msg: "operation not supported"} - ErrQueueSizeNotSupported = &Error{msg: "queue size querying not supported"} - ErrNotConnected = &Error{msg: "endpoint not connected"} - ErrConnectionReset = &Error{msg: "connection reset by peer"} - ErrConnectionAborted = &Error{msg: "connection aborted"} - ErrNoSuchFile = &Error{msg: "no such file"} - ErrInvalidOptionValue = &Error{msg: "invalid option value specified"} - ErrNoLinkAddress = &Error{msg: "no remote link address"} - ErrBadAddress = &Error{msg: "bad address"} - ErrNetworkUnreachable = &Error{msg: "network is unreachable"} - ErrMessageTooLong = &Error{msg: "message too long"} - ErrNoBufferSpace = &Error{msg: "no buffer space available"} - ErrBroadcastDisabled = &Error{msg: "broadcast socket option disabled"} - ErrNotPermitted = &Error{msg: "operation not permitted"} + ErrUnknownProtocol = &Error{msg: "unknown protocol"} + ErrUnknownNICID = &Error{msg: "unknown nic id"} + ErrUnknownDevice = &Error{msg: "unknown device"} + ErrUnknownProtocolOption = &Error{msg: "unknown option for protocol"} + ErrDuplicateNICID = &Error{msg: "duplicate nic id"} + ErrDuplicateAddress = &Error{msg: "duplicate address"} + ErrNoRoute = &Error{msg: "no route"} + ErrBadLinkEndpoint = &Error{msg: "bad link layer endpoint"} + ErrAlreadyBound = &Error{msg: "endpoint already bound", ignoreStats: true} + ErrInvalidEndpointState = &Error{msg: "endpoint is in invalid state"} + ErrAlreadyConnecting = &Error{msg: "endpoint is already connecting", ignoreStats: true} + ErrAlreadyConnected = &Error{msg: "endpoint is already connected", ignoreStats: true} + ErrNoPortAvailable = &Error{msg: "no ports are available"} + ErrPortInUse = &Error{msg: "port is in use"} + ErrBadLocalAddress = &Error{msg: "bad local address"} + ErrClosedForSend = &Error{msg: "endpoint is closed for send"} + ErrClosedForReceive = &Error{msg: "endpoint is closed for receive"} + ErrWouldBlock = &Error{msg: "operation would block", ignoreStats: true} + ErrConnectionRefused = &Error{msg: "connection was refused"} + ErrTimeout = &Error{msg: "operation timed out"} + ErrAborted = &Error{msg: "operation aborted"} + ErrConnectStarted = &Error{msg: "connection attempt started", ignoreStats: true} + ErrDestinationRequired = &Error{msg: "destination address is required"} + ErrNotSupported = &Error{msg: "operation not supported"} + ErrQueueSizeNotSupported = &Error{msg: "queue size querying not supported"} + ErrNotConnected = &Error{msg: "endpoint not connected"} + ErrConnectionReset = &Error{msg: "connection reset by peer"} + ErrConnectionAborted = &Error{msg: "connection aborted"} + ErrNoSuchFile = &Error{msg: "no such file"} + ErrInvalidOptionValue = &Error{msg: "invalid option value specified"} + ErrNoLinkAddress = &Error{msg: "no remote link address"} + ErrBadAddress = &Error{msg: "bad address"} + ErrNetworkUnreachable = &Error{msg: "network is unreachable"} + ErrMessageTooLong = &Error{msg: "message too long"} + ErrNoBufferSpace = &Error{msg: "no buffer space available"} + ErrBroadcastDisabled = &Error{msg: "broadcast socket option disabled"} + ErrNotPermitted = &Error{msg: "operation not permitted"} + ErrAddressFamilyNotSupported = &Error{msg: "address family not supported by protocol"} ) // Errors related to Subnet @@ -339,6 +340,10 @@ type Endpoint interface { // get the actual result. The first call to Connect after the socket has // connected returns nil. Calling connect again results in ErrAlreadyConnected. // Anything else -- the attempt to connect failed. + // + // If address.Addr is empty, this means that Enpoint has to be + // disconnected if this is supported, otherwise + // ErrAddressFamilyNotSupported must be returned. Connect(address FullAddress) *Error // Shutdown closes the read and/or write end of the endpoint connection @@ -381,6 +386,13 @@ type Endpoint interface { // State returns a socket's lifecycle state. The returned value is // protocol-specific and is primarily used for diagnostics. State() uint32 + + // ModerateRecvBuf should be called everytime data is copied to the user + // space. This allows for dynamic tuning of recv buffer space for a + // given socket. + // + // NOTE: This method is a no-op for sockets other than TCP. + ModerateRecvBuf(copied int) } // WriteOptions contains options for Endpoint.Write. @@ -480,6 +492,10 @@ type CongestionControlOption string // control algorithms. type AvailableCongestionControlOption string +// ModerateReceiveBufferOption allows the caller to enable/disable TCP receive +// buffer moderation. +type ModerateReceiveBufferOption bool + // MulticastTTLOption is used by SetSockOpt/GetSockOpt to control the default // TTL value for multicast messages. The default is 1. type MulticastTTLOption uint8 @@ -523,7 +539,7 @@ type BroadcastOption int // Route is a row in the routing table. It specifies through which NIC (and // gateway) sets of packets should be routed. A row is considered viable if the -// masked target address matches the destination adddress in the row. +// masked target address matches the destination address in the row. type Route struct { // Destination is the address that must be matched against the masked // target address to check if this row is viable. diff --git a/pkg/tcpip/transport/icmp/BUILD b/pkg/tcpip/transport/icmp/BUILD index 84a2b53b7..62182a3e6 100644 --- a/pkg/tcpip/transport/icmp/BUILD +++ b/pkg/tcpip/transport/icmp/BUILD @@ -23,8 +23,8 @@ go_library( "icmp_packet_list.go", "protocol.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp", - imports = ["gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"], + importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/icmp", + imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ "//pkg/sleep", diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go index b8005093a..ab9e80747 100644 --- a/pkg/tcpip/transport/icmp/endpoint.go +++ b/pkg/tcpip/transport/icmp/endpoint.go @@ -18,11 +18,11 @@ import ( "encoding/binary" "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) // +stateify savable @@ -127,6 +127,9 @@ func (e *endpoint) Close() { e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) } +// ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf. +func (e *endpoint) ModerateRecvBuf(copied int) {} + // Read reads data from the endpoint. This method does not block if // there is no data pending. func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { @@ -419,6 +422,11 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { e.mu.Lock() defer e.mu.Unlock() + if addr.Addr == "" { + // AF_UNSPEC isn't supported. + return tcpip.ErrAddressFamilyNotSupported + } + nicid := addr.NIC localPort := uint16(0) switch e.state { diff --git a/pkg/tcpip/transport/icmp/endpoint_state.go b/pkg/tcpip/transport/icmp/endpoint_state.go index 332b3cd33..99b8c4093 100644 --- a/pkg/tcpip/transport/icmp/endpoint_state.go +++ b/pkg/tcpip/transport/icmp/endpoint_state.go @@ -15,9 +15,9 @@ package icmp import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // saveData saves icmpPacket.data field. diff --git a/pkg/tcpip/transport/icmp/protocol.go b/pkg/tcpip/transport/icmp/protocol.go index 954fde9d8..c89538131 100644 --- a/pkg/tcpip/transport/icmp/protocol.go +++ b/pkg/tcpip/transport/icmp/protocol.go @@ -26,12 +26,12 @@ import ( "encoding/binary" "fmt" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/raw" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/raw" + "gvisor.dev/gvisor/pkg/waiter" ) const ( diff --git a/pkg/tcpip/transport/raw/BUILD b/pkg/tcpip/transport/raw/BUILD index 448fe2900..34a14bf7f 100644 --- a/pkg/tcpip/transport/raw/BUILD +++ b/pkg/tcpip/transport/raw/BUILD @@ -22,8 +22,8 @@ go_library( "endpoint_state.go", "packet_list.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/raw", - imports = ["gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"], + importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/raw", + imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ "//pkg/log", diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index e4ff50c91..42aded77f 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -16,7 +16,7 @@ // sockets allow applications to: // // * manually write and inspect transport layer headers and payloads -// * receive all traffic of a given transport protcol (e.g. ICMP or UDP) +// * receive all traffic of a given transport protocol (e.g. ICMP or UDP) // * optionally write and inspect network layer and link layer headers for // packets // @@ -29,11 +29,11 @@ package raw import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) // +stateify savable @@ -147,6 +147,9 @@ func (ep *endpoint) Close() { ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) } +// ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf. +func (ep *endpoint) ModerateRecvBuf(copied int) {} + // Read implements tcpip.Endpoint.Read. func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { ep.rcvMu.Lock() @@ -295,6 +298,11 @@ func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { ep.mu.Lock() defer ep.mu.Unlock() + if addr.Addr == "" { + // AF_UNSPEC isn't supported. + return tcpip.ErrAddressFamilyNotSupported + } + if ep.closed { return tcpip.ErrInvalidEndpointState } diff --git a/pkg/tcpip/transport/raw/endpoint_state.go b/pkg/tcpip/transport/raw/endpoint_state.go index e8907ebb1..cb5534d90 100644 --- a/pkg/tcpip/transport/raw/endpoint_state.go +++ b/pkg/tcpip/transport/raw/endpoint_state.go @@ -15,9 +15,9 @@ package raw import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // saveData saves packet.data field. diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index a9dbfb930..4cd25e8e2 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -39,8 +39,8 @@ go_library( "tcp_segment_list.go", "timer.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp", - imports = ["gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"], + importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcp", + imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ "//pkg/rand", diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index d05259c0a..52fd1bfa3 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -22,13 +22,13 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -213,6 +213,7 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i n.route = s.route.Clone() n.effectiveNetProtos = []tcpip.NetworkProtocolNumber{s.route.NetProto} n.rcvBufSize = int(l.rcvWnd) + n.amss = mssForRoute(&n.route) n.maybeEnableTimestamp(rcvdSynOpts) n.maybeEnableSACKPermitted(rcvdSynOpts) @@ -232,7 +233,11 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i // The receiver at least temporarily has a zero receive window scale, // but the caller may change it (before starting the protocol loop). n.snd = newSender(n, iss, irs, s.window, rcvdSynOpts.MSS, rcvdSynOpts.WS) - n.rcv = newReceiver(n, irs, l.rcvWnd, 0) + n.rcv = newReceiver(n, irs, seqnum.Size(n.initialReceiveWindow()), 0, seqnum.Size(n.receiveBufferSize())) + // Bootstrap the auto tuning algorithm. Starting at zero will result in + // a large step function on the first window adjustment causing the + // window to grow to a really large value. + n.rcvAutoParams.prevCopied = n.initialReceiveWindow() return n, nil } @@ -249,7 +254,7 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head } // Perform the 3-way handshake. - h := newHandshake(ep, l.rcvWnd) + h := newHandshake(ep, seqnum.Size(ep.initialReceiveWindow())) h.resetToSynRcvd(cookie, irs, opts) if err := h.execute(); err != nil { @@ -359,16 +364,19 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { return } cookie := ctx.createCookie(s.id, s.sequenceNumber, encodeMSS(opts.MSS)) - // Send SYN with window scaling because we currently + + // Send SYN without window scaling because we currently // dont't encode this information in the cookie. // // Enable Timestamp option if the original syn did have // the timestamp option specified. + mss := mssForRoute(&s.route) synOpts := header.TCPSynOptions{ WS: -1, TS: opts.TS, TSVal: tcpTimeStamp(timeStampOffset()), TSEcr: opts.TSVal, + MSS: uint16(mss), } sendSynTCP(&s.route, s.id, header.TCPFlagSyn|header.TCPFlagAck, cookie, s.sequenceNumber+1, ctx.rcvWnd, synOpts) e.stack.Stats().TCP.ListenOverflowSynCookieSent.Increment() diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index dd671f7ce..00d2ae524 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -18,14 +18,14 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) // maxSegmentsPerWake is the maximum number of segments to process in the main @@ -78,6 +78,9 @@ type handshake struct { // mss is the maximum segment size received from the peer. mss uint16 + // amss is the maximum segment size advertised by us to the peer. + amss uint16 + // sndWndScale is the send window scale, as defined in RFC 1323. A // negative value means no scaling is supported by the peer. sndWndScale int @@ -87,11 +90,24 @@ type handshake struct { } func newHandshake(ep *endpoint, rcvWnd seqnum.Size) handshake { + rcvWndScale := ep.rcvWndScaleForHandshake() + + // Round-down the rcvWnd to a multiple of wndScale. This ensures that the + // window offered in SYN won't be reduced due to the loss of precision if + // window scaling is enabled after the handshake. + rcvWnd = (rcvWnd >> uint8(rcvWndScale)) << uint8(rcvWndScale) + + // Ensure we can always accept at least 1 byte if the scale specified + // was too high for the provided rcvWnd. + if rcvWnd == 0 { + rcvWnd = 1 + } + h := handshake{ ep: ep, active: true, rcvWnd: rcvWnd, - rcvWndScale: FindWndScale(rcvWnd), + rcvWndScale: int(rcvWndScale), } h.resetState() return h @@ -224,7 +240,7 @@ func (h *handshake) synSentState(s *segment) *tcpip.Error { h.ep.state = StateSynRecv h.ep.mu.Unlock() synOpts := header.TCPSynOptions{ - WS: h.rcvWndScale, + WS: int(h.effectiveRcvWndScale()), TS: rcvSynOpts.TS, TSVal: h.ep.timestamp(), TSEcr: h.ep.recentTS, @@ -233,6 +249,7 @@ func (h *handshake) synSentState(s *segment) *tcpip.Error { // permits SACK. This is not explicitly defined in the RFC but // this is the behaviour implemented by Linux. SACKPermitted: rcvSynOpts.SACKPermitted, + MSS: h.ep.amss, } sendSynTCP(&s.route, h.ep.id, h.flags, h.iss, h.ackNum, h.rcvWnd, synOpts) @@ -277,6 +294,7 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error { TSVal: h.ep.timestamp(), TSEcr: h.ep.recentTS, SACKPermitted: h.ep.sackPermitted, + MSS: h.ep.amss, } sendSynTCP(&s.route, h.ep.id, h.flags, h.iss, h.ackNum, h.rcvWnd, synOpts) return nil @@ -419,12 +437,15 @@ func (h *handshake) execute() *tcpip.Error { // Send the initial SYN segment and loop until the handshake is // completed. + h.ep.amss = mssForRoute(&h.ep.route) + synOpts := header.TCPSynOptions{ WS: h.rcvWndScale, TS: true, TSVal: h.ep.timestamp(), TSEcr: h.ep.recentTS, SACKPermitted: bool(sackEnabled), + MSS: h.ep.amss, } // Execute is also called in a listen context so we want to make sure we @@ -433,6 +454,11 @@ func (h *handshake) execute() *tcpip.Error { if h.state == handshakeSynRcvd { synOpts.TS = h.ep.sendTSOk synOpts.SACKPermitted = h.ep.sackPermitted && bool(sackEnabled) + if h.sndWndScale < 0 { + // Disable window scaling if the peer did not send us + // the window scaling option. + synOpts.WS = -1 + } } sendSynTCP(&h.ep.route, h.ep.id, h.flags, h.iss, h.ackNum, h.rcvWnd, synOpts) for h.state != handshakeCompleted { @@ -554,13 +580,6 @@ func makeSynOptions(opts header.TCPSynOptions) []byte { } func sendSynTCP(r *stack.Route, id stack.TransportEndpointID, flags byte, seq, ack seqnum.Value, rcvWnd seqnum.Size, opts header.TCPSynOptions) *tcpip.Error { - // The MSS in opts is automatically calculated as this function is - // called from many places and we don't want every call point being - // embedded with the MSS calculation. - if opts.MSS == 0 { - opts.MSS = uint16(r.MTU() - header.TCPMinimumSize) - } - options := makeSynOptions(opts) err := sendTCP(r, id, buffer.VectorisedView{}, r.DefaultTTL(), flags, seq, ack, rcvWnd, options, nil) putOptions(options) @@ -861,7 +880,8 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { // This is an active connection, so we must initiate the 3-way // handshake, and then inform potential waiters about its // completion. - h := newHandshake(e, seqnum.Size(e.receiveBufferAvailable())) + initialRcvWnd := e.initialReceiveWindow() + h := newHandshake(e, seqnum.Size(initialRcvWnd)) e.mu.Lock() h.ep.state = StateSynSent e.mu.Unlock() @@ -886,8 +906,14 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { // (indicated by a negative send window scale). e.snd = newSender(e, h.iss, h.ackNum-1, h.sndWnd, h.mss, h.sndWndScale) + rcvBufSize := seqnum.Size(e.receiveBufferSize()) e.rcvListMu.Lock() - e.rcv = newReceiver(e, h.ackNum-1, h.rcvWnd, h.effectiveRcvWndScale()) + e.rcv = newReceiver(e, h.ackNum-1, h.rcvWnd, h.effectiveRcvWndScale(), rcvBufSize) + // boot strap the auto tuning algorithm. Starting at zero will + // result in a large step function on the first proper causing + // the window to just go to a really large value after the first + // RTT itself. + e.rcvAutoParams.prevCopied = initialRcvWnd e.rcvListMu.Unlock() } diff --git a/pkg/tcpip/transport/tcp/dual_stack_test.go b/pkg/tcpip/transport/tcp/dual_stack_test.go index 43bcfa070..d9f79e8c5 100644 --- a/pkg/tcpip/transport/tcp/dual_stack_test.go +++ b/pkg/tcpip/transport/tcp/dual_stack_test.go @@ -18,15 +18,15 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/checker" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/checker" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context" + "gvisor.dev/gvisor/pkg/waiter" ) func TestV4MappedConnectOnV6Only(t *testing.T) { diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 1efe9d3fb..cb40fea94 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -22,15 +22,15 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tmutex" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tmutex" + "gvisor.dev/gvisor/pkg/waiter" ) // EndpointState represents the state of a TCP endpoint. @@ -132,6 +132,42 @@ type SACKInfo struct { NumBlocks int } +// rcvBufAutoTuneParams are used to hold state variables to compute +// the auto tuned recv buffer size. +// +// +stateify savable +type rcvBufAutoTuneParams struct { + // measureTime is the time at which the current measurement + // was started. + measureTime time.Time `state:".(unixTime)"` + + // copied is the number of bytes copied out of the receive + // buffers since this measure began. + copied int + + // prevCopied is the number of bytes copied out of the receive + // buffers in the previous RTT period. + prevCopied int + + // rtt is the non-smoothed minimum RTT as measured by observing the time + // between when a byte is first acknowledged and the receipt of data + // that is at least one window beyond the sequence number that was + // acknowledged. + rtt time.Duration + + // rttMeasureSeqNumber is the highest acceptable sequence number at the + // time this RTT measurement period began. + rttMeasureSeqNumber seqnum.Value + + // rttMeasureTime is the absolute time at which the current rtt + // measurement period began. + rttMeasureTime time.Time `state:".(unixTime)"` + + // disabled is true if an explicit receive buffer is set for the + // endpoint. + disabled bool +} + // endpoint represents a TCP endpoint. This struct serves as the interface // between users of the endpoint and the protocol implementation; it is legal to // have concurrent goroutines make calls into the endpoint, they are properly @@ -165,11 +201,18 @@ type endpoint struct { // to indicate to users that no more data is coming. // // rcvListMu can be taken after the endpoint mu below. - rcvListMu sync.Mutex `state:"nosave"` - rcvList segmentList `state:"wait"` - rcvClosed bool - rcvBufSize int - rcvBufUsed int + rcvListMu sync.Mutex `state:"nosave"` + rcvList segmentList `state:"wait"` + rcvClosed bool + rcvBufSize int + rcvBufUsed int + rcvAutoParams rcvBufAutoTuneParams + // zeroWindow indicates that the window was closed due to receive buffer + // space being filled up. This is set by the worker goroutine before + // moving a segment to the rcvList. This setting is cleared by the + // endpoint when a Read() call reads enough data for the new window to + // be non-zero. + zeroWindow bool // The following fields are protected by the mutex. mu sync.RWMutex `state:"nosave"` @@ -339,6 +382,9 @@ type endpoint struct { bindAddress tcpip.Address connectingAddress tcpip.Address + // amss is the advertised MSS to the peer by this endpoint. + amss uint16 + gso *stack.GSO } @@ -373,8 +419,8 @@ func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waite netProto: netProto, waiterQueue: waiterQueue, state: StateInitial, - rcvBufSize: DefaultBufferSize, - sndBufSize: DefaultBufferSize, + rcvBufSize: DefaultReceiveBufferSize, + sndBufSize: DefaultSendBufferSize, sndMTU: int(math.MaxInt32), reuseAddr: true, keepalive: keepalive{ @@ -400,6 +446,11 @@ func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waite e.cc = cs } + var mrb tcpip.ModerateReceiveBufferOption + if err := stack.TransportProtocolOption(ProtocolNumber, &mrb); err == nil { + e.rcvAutoParams.disabled = !bool(mrb) + } + if p := stack.GetTCPProbe(); p != nil { e.probe = p } @@ -408,6 +459,7 @@ func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waite e.workMu.Init() e.workMu.Lock() e.tsOffset = timeStampOffset() + return e } @@ -551,6 +603,83 @@ func (e *endpoint) cleanupLocked() { tcpip.DeleteDanglingEndpoint(e) } +// initialReceiveWindow returns the initial receive window to advertise in the +// SYN/SYN-ACK. +func (e *endpoint) initialReceiveWindow() int { + rcvWnd := e.receiveBufferAvailable() + if rcvWnd > math.MaxUint16 { + rcvWnd = math.MaxUint16 + } + routeWnd := InitialCwnd * int(mssForRoute(&e.route)) * 2 + if rcvWnd > routeWnd { + rcvWnd = routeWnd + } + return rcvWnd +} + +// ModerateRecvBuf adjusts the receive buffer and the advertised window +// based on the number of bytes copied to user space. +func (e *endpoint) ModerateRecvBuf(copied int) { + e.rcvListMu.Lock() + if e.rcvAutoParams.disabled { + e.rcvListMu.Unlock() + return + } + now := time.Now() + if rtt := e.rcvAutoParams.rtt; rtt == 0 || now.Sub(e.rcvAutoParams.measureTime) < rtt { + e.rcvAutoParams.copied += copied + e.rcvListMu.Unlock() + return + } + prevRTTCopied := e.rcvAutoParams.copied + copied + prevCopied := e.rcvAutoParams.prevCopied + rcvWnd := 0 + if prevRTTCopied > prevCopied { + // The minimal receive window based on what was copied by the app + // in the immediate preceding RTT and some extra buffer for 16 + // segments to account for variations. + // We multiply by 2 to account for packet losses. + rcvWnd = prevRTTCopied*2 + 16*int(e.amss) + + // Scale for slow start based on bytes copied in this RTT vs previous. + grow := (rcvWnd * (prevRTTCopied - prevCopied)) / prevCopied + + // Multiply growth factor by 2 again to account for sender being + // in slow-start where the sender grows it's congestion window + // by 100% per RTT. + rcvWnd += grow * 2 + + // Make sure auto tuned buffer size can always receive upto 2x + // the initial window of 10 segments. + if minRcvWnd := int(e.amss) * InitialCwnd * 2; rcvWnd < minRcvWnd { + rcvWnd = minRcvWnd + } + + // Cap the auto tuned buffer size by the maximum permissible + // receive buffer size. + if max := e.maxReceiveBufferSize(); rcvWnd > max { + rcvWnd = max + } + + // We do not adjust downwards as that can cause the receiver to + // reject valid data that might already be in flight as the + // acceptable window will shrink. + if rcvWnd > e.rcvBufSize { + e.rcvBufSize = rcvWnd + e.notifyProtocolGoroutine(notifyReceiveWindowChanged) + } + + // We only update prevCopied when we grow the buffer because in cases + // where prevCopied > prevRTTCopied the existing buffer is already big + // enough to handle the current rate and we don't need to do any + // adjustments. + e.rcvAutoParams.prevCopied = prevRTTCopied + } + e.rcvAutoParams.measureTime = now + e.rcvAutoParams.copied = 0 + e.rcvListMu.Unlock() +} + // Read reads data from the endpoint. func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { e.mu.RLock() @@ -596,10 +725,12 @@ func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) { s.decRef() } - scale := e.rcv.rcvWndScale - wasZero := e.zeroReceiveWindow(scale) e.rcvBufUsed -= len(v) - if wasZero && !e.zeroReceiveWindow(scale) { + // If the window was zero before this read and if the read freed up + // enough buffer space for the scaled window to be non-zero then notify + // the protocol goroutine to send a window update. + if e.zeroWindow && !e.zeroReceiveWindow(e.rcv.rcvWndScale) { + e.zeroWindow = false e.notifyProtocolGoroutine(notifyNonZeroReceiveWindow) } @@ -819,9 +950,10 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { size = math.MaxInt32 / 2 } - wasZero := e.zeroReceiveWindow(scale) e.rcvBufSize = size - if wasZero && !e.zeroReceiveWindow(scale) { + e.rcvAutoParams.disabled = true + if e.zeroWindow && !e.zeroReceiveWindow(scale) { + e.zeroWindow = false mask |= notifyNonZeroReceiveWindow } e.rcvListMu.Unlock() @@ -1139,6 +1271,11 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress) (tcpip.NetworkProtocol // Connect connects the endpoint to its peer. func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { + if addr.Addr == "" && addr.Port == 0 { + // AF_UNSPEC isn't supported. + return tcpip.ErrAddressFamilyNotSupported + } + return e.connect(addr, true, true) } @@ -1623,6 +1760,13 @@ func (e *endpoint) readyToRead(s *segment) { if s != nil { s.incRef() e.rcvBufUsed += s.data.Size() + // Check if the receive window is now closed. If so make sure + // we set the zero window before we deliver the segment to ensure + // that a subsequent read of the segment will correctly trigger + // a non-zero notification. + if avail := e.receiveBufferAvailableLocked(); avail>>e.rcv.rcvWndScale == 0 { + e.zeroWindow = true + } e.rcvList.PushBack(s) } else { e.rcvClosed = true @@ -1632,21 +1776,26 @@ func (e *endpoint) readyToRead(s *segment) { e.waiterQueue.Notify(waiter.EventIn) } -// receiveBufferAvailable calculates how many bytes are still available in the -// receive buffer. -func (e *endpoint) receiveBufferAvailable() int { - e.rcvListMu.Lock() - size := e.rcvBufSize - used := e.rcvBufUsed - e.rcvListMu.Unlock() - +// receiveBufferAvailableLocked calculates how many bytes are still available +// in the receive buffer. +// rcvListMu must be held when this function is called. +func (e *endpoint) receiveBufferAvailableLocked() int { // We may use more bytes than the buffer size when the receive buffer // shrinks. - if used >= size { + if e.rcvBufUsed >= e.rcvBufSize { return 0 } - return size - used + return e.rcvBufSize - e.rcvBufUsed +} + +// receiveBufferAvailable calculates how many bytes are still available in the +// receive buffer. +func (e *endpoint) receiveBufferAvailable() int { + e.rcvListMu.Lock() + available := e.receiveBufferAvailableLocked() + e.rcvListMu.Unlock() + return available } func (e *endpoint) receiveBufferSize() int { @@ -1657,6 +1806,33 @@ func (e *endpoint) receiveBufferSize() int { return size } +func (e *endpoint) maxReceiveBufferSize() int { + var rs ReceiveBufferSizeOption + if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err != nil { + // As a fallback return the hardcoded max buffer size. + return MaxBufferSize + } + return rs.Max +} + +// rcvWndScaleForHandshake computes the receive window scale to offer to the +// peer when window scaling is enabled (true by default). If auto-tuning is +// disabled then the window scaling factor is based on the size of the +// receiveBuffer otherwise we use the max permissible receive buffer size to +// compute the scale. +func (e *endpoint) rcvWndScaleForHandshake() int { + bufSizeForScale := e.receiveBufferSize() + + e.rcvListMu.Lock() + autoTuningDisabled := e.rcvAutoParams.disabled + e.rcvListMu.Unlock() + if autoTuningDisabled { + return FindWndScale(seqnum.Size(bufSizeForScale)) + } + + return FindWndScale(seqnum.Size(e.maxReceiveBufferSize())) +} + // updateRecentTimestamp updates the recent timestamp using the algorithm // described in https://tools.ietf.org/html/rfc7323#section-4.3 func (e *endpoint) updateRecentTimestamp(tsVal uint32, maxSentAck seqnum.Value, segSeq seqnum.Value) { @@ -1749,6 +1925,13 @@ func (e *endpoint) completeState() stack.TCPEndpointState { s.RcvBufSize = e.rcvBufSize s.RcvBufUsed = e.rcvBufUsed s.RcvClosed = e.rcvClosed + s.RcvAutoParams.MeasureTime = e.rcvAutoParams.measureTime + s.RcvAutoParams.CopiedBytes = e.rcvAutoParams.copied + s.RcvAutoParams.PrevCopiedBytes = e.rcvAutoParams.prevCopied + s.RcvAutoParams.RTT = e.rcvAutoParams.rtt + s.RcvAutoParams.RTTMeasureSeqNumber = e.rcvAutoParams.rttMeasureSeqNumber + s.RcvAutoParams.RTTMeasureTime = e.rcvAutoParams.rttMeasureTime + s.RcvAutoParams.Disabled = e.rcvAutoParams.disabled e.rcvListMu.Unlock() // Endpoint TCP Option state. @@ -1802,13 +1985,13 @@ func (e *endpoint) completeState() stack.TCPEndpointState { RTTMeasureTime: e.snd.rttMeasureTime, Closed: e.snd.closed, RTO: e.snd.rto, - SRTTInited: e.snd.srttInited, MaxPayloadSize: e.snd.maxPayloadSize, SndWndScale: e.snd.sndWndScale, MaxSentAck: e.snd.maxSentAck, } e.snd.rtt.Lock() s.Sender.SRTT = e.snd.rtt.srtt + s.Sender.SRTTInited = e.snd.rtt.srttInited e.snd.rtt.Unlock() if cubic, ok := e.snd.cc.(*cubicState); ok { @@ -1856,3 +2039,7 @@ func (e *endpoint) State() uint32 { defer e.mu.Unlock() return uint32(e.state) } + +func mssForRoute(r *stack.Route) uint16 { + return uint16(r.MTU() - header.TCPMinimumSize) +} diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go index 5f30c2374..b93959034 100644 --- a/pkg/tcpip/transport/tcp/endpoint_state.go +++ b/pkg/tcpip/transport/tcp/endpoint_state.go @@ -19,9 +19,9 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) func (e *endpoint) drainSegmentLocked() { @@ -342,6 +342,7 @@ func loadError(s string) *tcpip.Error { tcpip.ErrNoBufferSpace, tcpip.ErrBroadcastDisabled, tcpip.ErrNotPermitted, + tcpip.ErrAddressFamilyNotSupported, } messageToError = make(map[string]*tcpip.Error) @@ -360,3 +361,23 @@ func loadError(s string) *tcpip.Error { return e } + +// saveMeasureTime is invoked by stateify. +func (r *rcvBufAutoTuneParams) saveMeasureTime() unixTime { + return unixTime{r.measureTime.Unix(), r.measureTime.UnixNano()} +} + +// loadMeasureTime is invoked by stateify. +func (r *rcvBufAutoTuneParams) loadMeasureTime(unix unixTime) { + r.measureTime = time.Unix(unix.second, unix.nano) +} + +// saveRttMeasureTime is invoked by stateify. +func (r *rcvBufAutoTuneParams) saveRttMeasureTime() unixTime { + return unixTime{r.rttMeasureTime.Unix(), r.rttMeasureTime.UnixNano()} +} + +// loadRttMeasureTime is invoked by stateify. +func (r *rcvBufAutoTuneParams) loadRttMeasureTime(unix unixTime) { + r.rttMeasureTime = time.Unix(unix.second, unix.nano) +} diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go index c30b45c2c..63666f0b3 100644 --- a/pkg/tcpip/transport/tcp/forwarder.go +++ b/pkg/tcpip/transport/tcp/forwarder.go @@ -17,12 +17,12 @@ package tcp import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) // Forwarder is a connection request forwarder, which allows clients to decide @@ -47,7 +47,7 @@ type Forwarder struct { // If rcvWnd is set to zero, the default buffer size is used instead. func NewForwarder(s *stack.Stack, rcvWnd, maxInFlight int, handler func(*ForwarderRequest)) *Forwarder { if rcvWnd == 0 { - rcvWnd = DefaultBufferSize + rcvWnd = DefaultReceiveBufferSize } return &Forwarder{ maxInFlight: maxInFlight, diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index 59f4009a1..ee04dcfcc 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -24,13 +24,13 @@ import ( "strings" "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/raw" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/raw" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -41,13 +41,18 @@ const ( ProtocolNumber = header.TCPProtocolNumber // MinBufferSize is the smallest size of a receive or send buffer. - minBufferSize = 4 << 10 // 4096 bytes. + MinBufferSize = 4 << 10 // 4096 bytes. - // DefaultBufferSize is the default size of the receive and send buffers. - DefaultBufferSize = 1 << 20 // 1MB + // DefaultSendBufferSize is the default size of the send buffer for + // an endpoint. + DefaultSendBufferSize = 1 << 20 // 1MB - // MaxBufferSize is the largest size a receive and send buffer can grow to. - maxBufferSize = 4 << 20 // 4MB + // DefaultReceiveBufferSize is the default size of the receive buffer + // for an endpoint. + DefaultReceiveBufferSize = 1 << 20 // 1MB + + // MaxBufferSize is the largest size a receive/send buffer can grow to. + MaxBufferSize = 4 << 20 // 4MB // MaxUnprocessedSegments is the maximum number of unprocessed segments // that can be queued for a given endpoint. @@ -86,6 +91,7 @@ type protocol struct { recvBufferSize ReceiveBufferSizeOption congestionControl string availableCongestionControl []string + moderateReceiveBuffer bool } // Number returns the tcp protocol number. @@ -192,6 +198,13 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error { // linux returns ENOENT when an invalid congestion control // is specified. return tcpip.ErrNoSuchFile + + case tcpip.ModerateReceiveBufferOption: + p.mu.Lock() + p.moderateReceiveBuffer = bool(v) + p.mu.Unlock() + return nil + default: return tcpip.ErrUnknownProtocolOption } @@ -217,16 +230,25 @@ func (p *protocol) Option(option interface{}) *tcpip.Error { *v = p.recvBufferSize p.mu.Unlock() return nil + case *tcpip.CongestionControlOption: p.mu.Lock() *v = tcpip.CongestionControlOption(p.congestionControl) p.mu.Unlock() return nil + case *tcpip.AvailableCongestionControlOption: p.mu.Lock() *v = tcpip.AvailableCongestionControlOption(strings.Join(p.availableCongestionControl, " ")) p.mu.Unlock() return nil + + case *tcpip.ModerateReceiveBufferOption: + p.mu.Lock() + *v = tcpip.ModerateReceiveBufferOption(p.moderateReceiveBuffer) + p.mu.Unlock() + return nil + default: return tcpip.ErrUnknownProtocolOption } @@ -235,8 +257,8 @@ func (p *protocol) Option(option interface{}) *tcpip.Error { func init() { stack.RegisterTransportProtocolFactory(ProtocolName, func() stack.TransportProtocol { return &protocol{ - sendBufferSize: SendBufferSizeOption{minBufferSize, DefaultBufferSize, maxBufferSize}, - recvBufferSize: ReceiveBufferSizeOption{minBufferSize, DefaultBufferSize, maxBufferSize}, + sendBufferSize: SendBufferSizeOption{MinBufferSize, DefaultSendBufferSize, MaxBufferSize}, + recvBufferSize: ReceiveBufferSizeOption{MinBufferSize, DefaultReceiveBufferSize, MaxBufferSize}, congestionControl: ccReno, availableCongestionControl: []string{ccReno, ccCubic}, } diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go index f02fa6105..e90f9a7d9 100644 --- a/pkg/tcpip/transport/tcp/rcv.go +++ b/pkg/tcpip/transport/tcp/rcv.go @@ -16,9 +16,10 @@ package tcp import ( "container/heap" + "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) // receiver holds the state necessary to receive TCP segments and turn them @@ -38,6 +39,9 @@ type receiver struct { // shrinking it. rcvAcc seqnum.Value + // rcvWnd is the non-scaled receive window last advertised to the peer. + rcvWnd seqnum.Size + rcvWndScale uint8 closed bool @@ -47,13 +51,14 @@ type receiver struct { pendingBufSize seqnum.Size } -func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8) *receiver { +func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8, pendingBufSize seqnum.Size) *receiver { return &receiver{ ep: ep, rcvNxt: irs + 1, rcvAcc: irs.Add(rcvWnd + 1), + rcvWnd: rcvWnd, rcvWndScale: rcvWndScale, - pendingBufSize: rcvWnd, + pendingBufSize: pendingBufSize, } } @@ -72,14 +77,16 @@ func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool { // getSendParams returns the parameters needed by the sender when building // segments to send. func (r *receiver) getSendParams() (rcvNxt seqnum.Value, rcvWnd seqnum.Size) { - // Calculate the window size based on the current buffer size. - n := r.ep.receiveBufferAvailable() - acc := r.rcvNxt.Add(seqnum.Size(n)) + // Calculate the window size based on the available buffer space. + receiveBufferAvailable := r.ep.receiveBufferAvailable() + acc := r.rcvNxt.Add(seqnum.Size(receiveBufferAvailable)) if r.rcvAcc.LessThan(acc) { r.rcvAcc = acc } - - return r.rcvNxt, r.rcvNxt.Size(r.rcvAcc) >> r.rcvWndScale + // Stash away the non-scaled receive window as we use it for measuring + // receiver's estimated RTT. + r.rcvWnd = r.rcvNxt.Size(r.rcvAcc) + return r.rcvNxt, r.rcvWnd >> r.rcvWndScale } // nonZeroWindow is called when the receive window grows from zero to nonzero; @@ -130,6 +137,21 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum // Update the segment that we're expecting to consume. r.rcvNxt = segSeq.Add(segLen) + // In cases of a misbehaving sender which could send more than the + // advertised window, we could end up in a situation where we get a + // segment that exceeds the window advertised. Instead of partially + // accepting the segment and discarding bytes beyond the advertised + // window, we accept the whole segment and make sure r.rcvAcc is moved + // forward to match r.rcvNxt to indicate that the window is now closed. + // + // In absence of this check the r.acceptable() check fails and accepts + // segments that should be dropped because rcvWnd is calculated as + // the size of the interval (rcvNxt, rcvAcc] which becomes extremely + // large if rcvAcc is ever less than rcvNxt. + if r.rcvAcc.LessThan(r.rcvNxt) { + r.rcvAcc = r.rcvNxt + } + // Trim SACK Blocks to remove any SACK information that covers // sequence numbers that have been consumed. TrimSACKBlockList(&r.ep.sack, r.rcvNxt) @@ -198,6 +220,39 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum return true } +// updateRTT updates the receiver RTT measurement based on the sequence number +// of the received segment. +func (r *receiver) updateRTT() { + // From: https://public.lanl.gov/radiant/pubs/drs/sc2001-poster.pdf + // + // A system that is only transmitting acknowledgements can still + // estimate the round-trip time by observing the time between when a byte + // is first acknowledged and the receipt of data that is at least one + // window beyond the sequence number that was acknowledged. + r.ep.rcvListMu.Lock() + if r.ep.rcvAutoParams.rttMeasureTime.IsZero() { + // New measurement. + r.ep.rcvAutoParams.rttMeasureTime = time.Now() + r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd) + r.ep.rcvListMu.Unlock() + return + } + if r.rcvNxt.LessThan(r.ep.rcvAutoParams.rttMeasureSeqNumber) { + r.ep.rcvListMu.Unlock() + return + } + rtt := time.Since(r.ep.rcvAutoParams.rttMeasureTime) + // We only store the minimum observed RTT here as this is only used in + // absence of a SRTT available from either timestamps or a sender + // measurement of RTT. + if r.ep.rcvAutoParams.rtt == 0 || rtt < r.ep.rcvAutoParams.rtt { + r.ep.rcvAutoParams.rtt = rtt + } + r.ep.rcvAutoParams.rttMeasureTime = time.Now() + r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd) + r.ep.rcvListMu.Unlock() +} + // handleRcvdSegment handles TCP segments directed at the connection managed by // r as they arrive. It is called by the protocol main loop. func (r *receiver) handleRcvdSegment(s *segment) { @@ -226,10 +281,9 @@ func (r *receiver) handleRcvdSegment(s *segment) { r.pendingBufUsed += s.logicalLen() s.incRef() heap.Push(&r.pendingRcvdSegments, s) + UpdateSACKBlocks(&r.ep.sack, segSeq, segSeq.Add(segLen), r.rcvNxt) } - UpdateSACKBlocks(&r.ep.sack, segSeq, segSeq.Add(segLen), r.rcvNxt) - // Immediately send an ack so that the peer knows it may // have to retransmit. r.ep.snd.sendAck() @@ -237,6 +291,12 @@ func (r *receiver) handleRcvdSegment(s *segment) { return } + // Since we consumed a segment update the receiver's RTT estimate + // if required. + if segLen > 0 { + r.updateRTT() + } + // By consuming the current segment, we may have filled a gap in the // sequence number domain that allows pending segments to be consumed // now. So try to do it. diff --git a/pkg/tcpip/transport/tcp/sack.go b/pkg/tcpip/transport/tcp/sack.go index 6a013d99b..7be86d68e 100644 --- a/pkg/tcpip/transport/tcp/sack.go +++ b/pkg/tcpip/transport/tcp/sack.go @@ -15,8 +15,8 @@ package tcp import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) const ( @@ -31,6 +31,13 @@ const ( // segment identified by segStart->segEnd. func UpdateSACKBlocks(sack *SACKInfo, segStart seqnum.Value, segEnd seqnum.Value, rcvNxt seqnum.Value) { newSB := header.SACKBlock{Start: segStart, End: segEnd} + + // Ignore any invalid SACK blocks or blocks that are before rcvNxt as + // those bytes have already been acked. + if newSB.End.LessThanEq(newSB.Start) || newSB.End.LessThan(rcvNxt) { + return + } + if sack.NumBlocks == 0 { sack.Blocks[0] = newSB sack.NumBlocks = 1 @@ -39,9 +46,8 @@ func UpdateSACKBlocks(sack *SACKInfo, segStart seqnum.Value, segEnd seqnum.Value var n = 0 for i := 0; i < sack.NumBlocks; i++ { start, end := sack.Blocks[i].Start, sack.Blocks[i].End - if end.LessThanEq(start) || start.LessThanEq(rcvNxt) { - // Discard any invalid blocks where end is before start - // and discard any sack blocks that are before rcvNxt as + if end.LessThanEq(rcvNxt) { + // Discard any sack blocks that are before rcvNxt as // those have already been acked. continue } diff --git a/pkg/tcpip/transport/tcp/sack_scoreboard.go b/pkg/tcpip/transport/tcp/sack_scoreboard.go index 1c5766a42..7ef2df377 100644 --- a/pkg/tcpip/transport/tcp/sack_scoreboard.go +++ b/pkg/tcpip/transport/tcp/sack_scoreboard.go @@ -19,8 +19,8 @@ import ( "strings" "github.com/google/btree" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) const ( diff --git a/pkg/tcpip/transport/tcp/sack_scoreboard_test.go b/pkg/tcpip/transport/tcp/sack_scoreboard_test.go index b59eedc9d..b4e5ba0df 100644 --- a/pkg/tcpip/transport/tcp/sack_scoreboard_test.go +++ b/pkg/tcpip/transport/tcp/sack_scoreboard_test.go @@ -17,9 +17,9 @@ package tcp_test import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" ) const smss = 1500 diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go index 450d9fbc1..ea725d513 100644 --- a/pkg/tcpip/transport/tcp/segment.go +++ b/pkg/tcpip/transport/tcp/segment.go @@ -18,10 +18,10 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // segment represents a TCP segment. It holds the payload and parsed TCP segment diff --git a/pkg/tcpip/transport/tcp/segment_state.go b/pkg/tcpip/transport/tcp/segment_state.go index dd7e14aa6..7dc2741a6 100644 --- a/pkg/tcpip/transport/tcp/segment_state.go +++ b/pkg/tcpip/transport/tcp/segment_state.go @@ -17,7 +17,7 @@ package tcp import ( "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) // saveData is invoked by stateify. diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index daa3e8341..0fee7ab72 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -20,11 +20,11 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) const ( @@ -121,9 +121,8 @@ type sender struct { // rtt.srtt, rtt.rttvar, and rto are the "smoothed round-trip time", // "round-trip time variation" and "retransmit timeout", as defined in // section 2 of RFC 6298. - rtt rtt - rto time.Duration - srttInited bool + rtt rtt + rto time.Duration // maxPayloadSize is the maximum size of the payload of a given segment. // It is initialized on demand. @@ -150,8 +149,9 @@ type sender struct { type rtt struct { sync.Mutex `state:"nosave"` - srtt time.Duration - rttvar time.Duration + srtt time.Duration + rttvar time.Duration + srttInited bool } // fastRecovery holds information related to fast recovery from a packet loss. @@ -323,10 +323,10 @@ func (s *sender) sendAck() { // available. This is done in accordance with section 2 of RFC 6298. func (s *sender) updateRTO(rtt time.Duration) { s.rtt.Lock() - if !s.srttInited { + if !s.rtt.srttInited { s.rtt.rttvar = rtt / 2 s.rtt.srtt = rtt - s.srttInited = true + s.rtt.srttInited = true } else { diff := s.rtt.srtt - rtt if diff < 0 { diff --git a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go index 4d1519860..272bbcdbd 100644 --- a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go +++ b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go @@ -26,11 +26,11 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context" ) func TestFastRecovery(t *testing.T) { diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go index 025d133be..4e7f1a740 100644 --- a/pkg/tcpip/transport/tcp/tcp_sack_test.go +++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go @@ -21,13 +21,13 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context" ) // createConnectedWithSACKPermittedOption creates and connects c.ep with the diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 7d8987219..915a98047 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -21,20 +21,20 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/checker" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/ports" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/checker" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/ports" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -1110,8 +1110,9 @@ func TestNonScaledWindowAccept(t *testing.T) { t.Fatalf("Listen failed: %v", err) } - // Do 3-way handshake. - c.PassiveConnect(100, 2, header.TCPSynOptions{MSS: defaultIPv4MSS}) + // Do 3-way handshake w/ window scaling disabled. The SYN-ACK to the SYN + // should not carry the window scaling option. + c.PassiveConnect(100, -1, header.TCPSynOptions{MSS: defaultIPv4MSS}) // Try to accept the connection. we, ch := waiter.NewChannelEntry(nil) @@ -1600,7 +1601,6 @@ func TestPassiveSendMSSLessThanMTU(t *testing.T) { // Set the buffer size to a deterministic size so that we can check the // window scaling option. const rcvBufferSize = 0x20000 - const wndScale = 2 if err := ep.SetSockOpt(tcpip.ReceiveBufferSizeOption(rcvBufferSize)); err != nil { t.Fatalf("SetSockOpt failed failed: %v", err) } @@ -1614,7 +1614,7 @@ func TestPassiveSendMSSLessThanMTU(t *testing.T) { } // Do 3-way handshake. - c.PassiveConnect(maxPayload, wndScale, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize}) + c.PassiveConnect(maxPayload, -1, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize}) // Try to accept the connection. we, ch := waiter.NewChannelEntry(nil) @@ -1713,7 +1713,7 @@ func TestForwarderSendMSSLessThanMTU(t *testing.T) { s.SetTransportProtocolHandler(tcp.ProtocolNumber, f.HandlePacket) // Do 3-way handshake. - c.PassiveConnect(maxPayload, 1, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize}) + c.PassiveConnect(maxPayload, -1, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize}) // Wait for connection to be available. select { @@ -2767,11 +2767,11 @@ func TestDefaultBufferSizes(t *testing.T) { } }() - checkSendBufferSize(t, ep, tcp.DefaultBufferSize) - checkRecvBufferSize(t, ep, tcp.DefaultBufferSize) + checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize) + checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize) // Change the default send buffer size. - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultBufferSize * 2, tcp.DefaultBufferSize * 20}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize * 2, tcp.DefaultSendBufferSize * 20}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %v", err) } @@ -2781,11 +2781,11 @@ func TestDefaultBufferSizes(t *testing.T) { t.Fatalf("NewEndpoint failed; %v", err) } - checkSendBufferSize(t, ep, tcp.DefaultBufferSize*2) - checkRecvBufferSize(t, ep, tcp.DefaultBufferSize) + checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*2) + checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize) // Change the default receive buffer size. - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultBufferSize * 3, tcp.DefaultBufferSize * 30}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultReceiveBufferSize * 3, tcp.DefaultReceiveBufferSize * 30}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %v", err) } @@ -2795,8 +2795,8 @@ func TestDefaultBufferSizes(t *testing.T) { t.Fatalf("NewEndpoint failed; %v", err) } - checkSendBufferSize(t, ep, tcp.DefaultBufferSize*2) - checkRecvBufferSize(t, ep, tcp.DefaultBufferSize*3) + checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*2) + checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*3) } func TestMinMaxBufferSizes(t *testing.T) { @@ -2810,11 +2810,11 @@ func TestMinMaxBufferSizes(t *testing.T) { defer ep.Close() // Change the min/max values for send/receive - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{200, tcp.DefaultBufferSize * 2, tcp.DefaultBufferSize * 20}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{200, tcp.DefaultReceiveBufferSize * 2, tcp.DefaultReceiveBufferSize * 20}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %v", err) } - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{300, tcp.DefaultBufferSize * 3, tcp.DefaultBufferSize * 30}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{300, tcp.DefaultSendBufferSize * 3, tcp.DefaultSendBufferSize * 30}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %v", err) } @@ -2832,17 +2832,17 @@ func TestMinMaxBufferSizes(t *testing.T) { checkSendBufferSize(t, ep, 300) // Set values above the max. - if err := ep.SetSockOpt(tcpip.ReceiveBufferSizeOption(1 + tcp.DefaultBufferSize*20)); err != nil { + if err := ep.SetSockOpt(tcpip.ReceiveBufferSizeOption(1 + tcp.DefaultReceiveBufferSize*20)); err != nil { t.Fatalf("GetSockOpt failed: %v", err) } - checkRecvBufferSize(t, ep, tcp.DefaultBufferSize*20) + checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*20) - if err := ep.SetSockOpt(tcpip.SendBufferSizeOption(1 + tcp.DefaultBufferSize*30)); err != nil { + if err := ep.SetSockOpt(tcpip.SendBufferSizeOption(1 + tcp.DefaultSendBufferSize*30)); err != nil { t.Fatalf("GetSockOpt failed: %v", err) } - checkSendBufferSize(t, ep, tcp.DefaultBufferSize*30) + checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*30) } func makeStack() (*stack.Stack, *tcpip.Error) { @@ -3976,3 +3976,273 @@ func TestEndpointBindListenAcceptState(t *testing.T) { } } + +// This test verifies that the auto tuning does not grow the receive buffer if +// the application is not reading the data actively. +func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) { + const mtu = 1500 + const mss = mtu - header.IPv4MinimumSize - header.TCPMinimumSize + + c := context.New(t, mtu) + defer c.Cleanup() + + stk := c.Stack() + // Set lower limits for auto-tuning tests. This is required because the + // test stops the worker which can cause packets to be dropped because + // the segment queue holding unprocessed packets is limited to 500. + const receiveBufferSize = 80 << 10 // 80KB. + const maxReceiveBufferSize = receiveBufferSize * 10 + if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil { + t.Fatalf("SetTransportProtocolOption failed: %v", err) + } + + // Enable auto-tuning. + if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { + t.Fatalf("SetTransportProtocolOption failed: %v", err) + } + // Change the expected window scale to match the value needed for the + // maximum buffer size defined above. + c.WindowScale = uint8(tcp.FindWndScale(maxReceiveBufferSize)) + + rawEP := c.CreateConnectedWithOptions(header.TCPSynOptions{TS: true, WS: 4}) + + // NOTE: The timestamp values in the sent packets are meaningless to the + // peer so we just increment the timestamp value by 1 every batch as we + // are not really using them for anything. Send a single byte to verify + // the advertised window. + tsVal := rawEP.TSVal + 1 + + // Introduce a 25ms latency by delaying the first byte. + latency := 25 * time.Millisecond + time.Sleep(latency) + rawEP.SendPacketWithTS([]byte{1}, tsVal) + + // Verify that the ACK has the expected window. + wantRcvWnd := receiveBufferSize + wantRcvWnd = (wantRcvWnd >> uint32(c.WindowScale)) + rawEP.VerifyACKRcvWnd(uint16(wantRcvWnd - 1)) + time.Sleep(25 * time.Millisecond) + + // Allocate a large enough payload for the test. + b := make([]byte, int(receiveBufferSize)*2) + offset := 0 + payloadSize := receiveBufferSize - 1 + worker := (c.EP).(interface { + StopWork() + ResumeWork() + }) + tsVal++ + + // Stop the worker goroutine. + worker.StopWork() + start := offset + end := offset + payloadSize + packetsSent := 0 + for ; start < end; start += mss { + rawEP.SendPacketWithTS(b[start:start+mss], tsVal) + packetsSent++ + } + // Resume the worker so that it only sees the packets once all of them + // are waiting to be read. + worker.ResumeWork() + + // Since we read no bytes the window should goto zero till the + // application reads some of the data. + // Discard all intermediate acks except the last one. + if packetsSent > 100 { + for i := 0; i < (packetsSent / 100); i++ { + _ = c.GetPacket() + } + } + rawEP.VerifyACKRcvWnd(0) + + time.Sleep(25 * time.Millisecond) + // Verify that sending more data when window is closed is dropped and + // not acked. + rawEP.SendPacketWithTS(b[start:start+mss], tsVal) + + // Verify that the stack sends us back an ACK with the sequence number + // of the last packet sent indicating it was dropped. + p := c.GetPacket() + checker.IPv4(t, p, checker.TCP( + checker.AckNum(uint32(rawEP.NextSeqNum)-uint32(mss)), + checker.Window(0), + )) + + // Now read all the data from the endpoint and verify that advertised + // window increases to the full available buffer size. + for { + _, _, err := c.EP.Read(nil) + if err == tcpip.ErrWouldBlock { + break + } + } + + // Verify that we receive a non-zero window update ACK. When running + // under thread santizer this test can end up sending more than 1 + // ack, 1 for the non-zero window + p = c.GetPacket() + checker.IPv4(t, p, checker.TCP( + checker.AckNum(uint32(rawEP.NextSeqNum)-uint32(mss)), + func(t *testing.T, h header.Transport) { + tcp, ok := h.(header.TCP) + if !ok { + return + } + if w := tcp.WindowSize(); w == 0 || w > uint16(wantRcvWnd) { + t.Errorf("expected a non-zero window: got %d, want <= wantRcvWnd", w, wantRcvWnd) + } + }, + )) +} + +// This test verifies that the auto tuning does not grow the receive buffer if +// the application is not reading the data actively. +func TestReceiveBufferAutoTuning(t *testing.T) { + const mtu = 1500 + const mss = mtu - header.IPv4MinimumSize - header.TCPMinimumSize + + c := context.New(t, mtu) + defer c.Cleanup() + + // Enable Auto-tuning. + stk := c.Stack() + // Set lower limits for auto-tuning tests. This is required because the + // test stops the worker which can cause packets to be dropped because + // the segment queue holding unprocessed packets is limited to 500. + const receiveBufferSize = 80 << 10 // 80KB. + const maxReceiveBufferSize = receiveBufferSize * 10 + if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil { + t.Fatalf("SetTransportProtocolOption failed: %v", err) + } + + // Enable auto-tuning. + if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { + t.Fatalf("SetTransportProtocolOption failed: %v", err) + } + // Change the expected window scale to match the value needed for the + // maximum buffer size used by stack. + c.WindowScale = uint8(tcp.FindWndScale(maxReceiveBufferSize)) + + rawEP := c.CreateConnectedWithOptions(header.TCPSynOptions{TS: true, WS: 4}) + + wantRcvWnd := receiveBufferSize + scaleRcvWnd := func(rcvWnd int) uint16 { + return uint16(rcvWnd >> uint16(c.WindowScale)) + } + // Allocate a large array to send to the endpoint. + b := make([]byte, receiveBufferSize*48) + + // In every iteration we will send double the number of bytes sent in + // the previous iteration and read the same from the app. The received + // window should grow by at least 2x of bytes read by the app in every + // RTT. + offset := 0 + payloadSize := receiveBufferSize / 8 + worker := (c.EP).(interface { + StopWork() + ResumeWork() + }) + tsVal := rawEP.TSVal + // We are going to do our own computation of what the moderated receive + // buffer should be based on sent/copied data per RTT and verify that + // the advertised window by the stack matches our calculations. + prevCopied := 0 + done := false + latency := 1 * time.Millisecond + for i := 0; !done; i++ { + tsVal++ + + // Stop the worker goroutine. + worker.StopWork() + start := offset + end := offset + payloadSize + totalSent := 0 + packetsSent := 0 + for ; start < end; start += mss { + rawEP.SendPacketWithTS(b[start:start+mss], tsVal) + totalSent += mss + packetsSent++ + } + // Resume it so that it only sees the packets once all of them + // are waiting to be read. + worker.ResumeWork() + + // Give 1ms for the worker to process the packets. + time.Sleep(1 * time.Millisecond) + + // Verify that the advertised window on the ACK is reduced by + // the total bytes sent. + expectedWnd := wantRcvWnd - totalSent + if packetsSent > 100 { + for i := 0; i < (packetsSent / 100); i++ { + _ = c.GetPacket() + } + } + rawEP.VerifyACKRcvWnd(scaleRcvWnd(expectedWnd)) + + // Now read all the data from the endpoint and invoke the + // moderation API to allow for receive buffer auto-tuning + // to happen before we measure the new window. + totalCopied := 0 + for { + b, _, err := c.EP.Read(nil) + if err == tcpip.ErrWouldBlock { + break + } + totalCopied += len(b) + } + + // Invoke the moderation API. This is required for auto-tuning + // to happen. This method is normally expected to be invoked + // from a higher layer than tcpip.Endpoint. So we simulate + // copying to user-space by invoking it explicitly here. + c.EP.ModerateRecvBuf(totalCopied) + + // Now send a keep-alive packet to trigger an ACK so that we can + // measure the new window. + rawEP.NextSeqNum-- + rawEP.SendPacketWithTS(nil, tsVal) + rawEP.NextSeqNum++ + + if i == 0 { + // In the first iteration the receiver based RTT is not + // yet known as a result the moderation code should not + // increase the advertised window. + rawEP.VerifyACKRcvWnd(scaleRcvWnd(wantRcvWnd)) + prevCopied = totalCopied + } else { + rttCopied := totalCopied + if i == 1 { + // The moderation code accumulates copied bytes till + // RTT is established. So add in the bytes sent in + // the first iteration to the total bytes for this + // RTT. + rttCopied += prevCopied + // Now reset it to the initial value used by the + // auto tuning logic. + prevCopied = tcp.InitialCwnd * mss * 2 + } + newWnd := rttCopied<<1 + 16*mss + grow := (newWnd * (rttCopied - prevCopied)) / prevCopied + newWnd += (grow << 1) + if newWnd > maxReceiveBufferSize { + newWnd = maxReceiveBufferSize + done = true + } + rawEP.VerifyACKRcvWnd(scaleRcvWnd(newWnd)) + wantRcvWnd = newWnd + prevCopied = rttCopied + // Increase the latency after first two iterations to + // establish a low RTT value in the receiver since it + // only tracks the lowest value. This ensures that when + // ModerateRcvBuf is called the elapsed time is always > + // rtt. Without this the test is flaky due to delays due + // to scheduling/wakeup etc. + latency += 50 * time.Millisecond + } + time.Sleep(latency) + offset += payloadSize + payloadSize *= 2 + } +} diff --git a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go index 039bbcfba..a641e953d 100644 --- a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go @@ -20,13 +20,13 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/checker" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/checker" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context" + "gvisor.dev/gvisor/pkg/waiter" ) // createConnectedWithTimestampOption creates and connects c.ep with the @@ -182,7 +182,7 @@ func TestTimeStampEnabledAccept(t *testing.T) { wndSize uint16 }{ {true, -1, 0xffff}, // When cookie is used window scaling is disabled. - {false, 5, 0x8000}, // 0x8000 * 2^5 = 1<<20 = 1MB window (the default). + {false, 5, 0x8000}, // DefaultReceiveBufferSize is 1MB >> 5. } for _, tc := range testCases { timeStampEnabledAccept(t, tc.cookieEnabled, tc.wndScale, tc.wndSize) @@ -239,7 +239,7 @@ func TestTimeStampDisabledAccept(t *testing.T) { wndSize uint16 }{ {true, -1, 0xffff}, // When cookie is used window scaling is disabled. - {false, 5, 0x8000}, // 0x8000 * 2^5 = 1<<20 = 1MB window (the default). + {false, 5, 0x8000}, // DefaultReceiveBufferSize is 1MB >> 5. } for _, tc := range testCases { timeStampDisabledAccept(t, tc.cookieEnabled, tc.wndScale, tc.wndSize) diff --git a/pkg/tcpip/transport/tcp/testing/context/BUILD b/pkg/tcpip/transport/tcp/testing/context/BUILD index 1584e4095..19b0d31c5 100644 --- a/pkg/tcpip/transport/tcp/testing/context/BUILD +++ b/pkg/tcpip/transport/tcp/testing/context/BUILD @@ -6,7 +6,7 @@ go_library( name = "context", testonly = 1, srcs = ["context.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context", + importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context", visibility = [ "//:sandbox", ], diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go index a4d89e24d..630dd7925 100644 --- a/pkg/tcpip/transport/tcp/testing/context/context.go +++ b/pkg/tcpip/transport/tcp/testing/context/context.go @@ -21,18 +21,18 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/checker" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/checker" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -72,12 +72,6 @@ const ( testInitialSequenceNumber = 789 ) -// defaultWindowScale value specified here depends on the tcp.DefaultBufferSize -// constant defined in the tcp/endpoint.go because the tcp.DefaultBufferSize is -// used in tcp.newHandshake to determine the window scale to use when sending a -// SYN/SYN-ACK. -var defaultWindowScale = tcp.FindWndScale(tcp.DefaultBufferSize) - // Headers is used to represent the TCP header fields when building a // new packet. type Headers struct { @@ -134,6 +128,10 @@ type Context struct { // TimeStampEnabled is true if ep is connected with the timestamp option // enabled. TimeStampEnabled bool + + // WindowScale is the expected window scale in SYN packets sent by + // the stack. + WindowScale uint8 } // New allocates and initializes a test context containing a new @@ -142,11 +140,11 @@ func New(t *testing.T, mtu uint32) *Context { s := stack.New([]string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{tcp.ProtocolName}, stack.Options{}) // Allow minimum send/receive buffer sizes to be 1 during tests. - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultBufferSize, tcp.DefaultBufferSize * 10}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize, 10 * tcp.DefaultSendBufferSize}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %v", err) } - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultBufferSize, tcp.DefaultBufferSize * 10}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultReceiveBufferSize, 10 * tcp.DefaultReceiveBufferSize}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %v", err) } @@ -184,9 +182,10 @@ func New(t *testing.T, mtu uint32) *Context { }) return &Context{ - t: t, - s: s, - linkEP: linkEP, + t: t, + s: s, + linkEP: linkEP, + WindowScale: uint8(tcp.FindWndScale(tcp.DefaultReceiveBufferSize)), } } @@ -672,6 +671,21 @@ func (r *RawEndpoint) VerifyACKWithTS(tsVal uint32) { r.RecentTS = opts.TSVal } +// VerifyACKRcvWnd verifies that the window advertised by the incoming ACK +// matches the provided rcvWnd. +func (r *RawEndpoint) VerifyACKRcvWnd(rcvWnd uint16) { + ackPacket := r.C.GetPacket() + checker.IPv4(r.C.t, ackPacket, + checker.TCP( + checker.DstPort(r.SrcPort), + checker.TCPFlags(header.TCPFlagAck), + checker.SeqNum(uint32(r.AckNum)), + checker.AckNum(uint32(r.NextSeqNum)), + checker.Window(rcvWnd), + ), + ) +} + // VerifyACKNoSACK verifies that the ACK does not contain a SACK block. func (r *RawEndpoint) VerifyACKNoSACK() { r.VerifyACKHasSACK(nil) @@ -732,7 +746,7 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) * checker.TCPSynOptions(header.TCPSynOptions{ MSS: mss, TS: true, - WS: defaultWindowScale, + WS: int(c.WindowScale), SACKPermitted: c.SACKEnabled(), }), ), @@ -747,6 +761,9 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) * // Build options w/ tsVal to be sent in the SYN-ACK. synAckOptions := make([]byte, header.TCPOptionsMaximumSize) offset := 0 + if wantOptions.WS != -1 { + offset += header.EncodeWSOption(wantOptions.WS, synAckOptions[offset:]) + } if wantOptions.TS { offset += header.EncodeTSOption(wantOptions.TSVal, synOptions.TSVal, synAckOptions[offset:]) } diff --git a/pkg/tcpip/transport/tcp/timer.go b/pkg/tcpip/transport/tcp/timer.go index fc1c7cbd2..c70525f27 100644 --- a/pkg/tcpip/transport/tcp/timer.go +++ b/pkg/tcpip/transport/tcp/timer.go @@ -17,7 +17,7 @@ package tcp import ( "time" - "gvisor.googlesource.com/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/sleep" ) type timerState int diff --git a/pkg/tcpip/transport/tcpconntrack/BUILD b/pkg/tcpip/transport/tcpconntrack/BUILD index 31a845dee..4bec48c0f 100644 --- a/pkg/tcpip/transport/tcpconntrack/BUILD +++ b/pkg/tcpip/transport/tcpconntrack/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "tcpconntrack", srcs = ["tcp_conntrack.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcpconntrack", + importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcpconntrack", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip/header", diff --git a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go index f1dcd36d5..93712cd45 100644 --- a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go +++ b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go @@ -18,8 +18,8 @@ package tcpconntrack import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) // Result is returned when the state of a TCB is updated in response to an diff --git a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack_test.go b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack_test.go index 435e136de..5e271b7ca 100644 --- a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack_test.go +++ b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack_test.go @@ -17,8 +17,8 @@ package tcpconntrack_test import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcpconntrack" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcpconntrack" ) // connected creates a connection tracker TCB and sets it to a connected state diff --git a/pkg/tcpip/transport/udp/BUILD b/pkg/tcpip/transport/udp/BUILD index b9520d6e0..6dac66b50 100644 --- a/pkg/tcpip/transport/udp/BUILD +++ b/pkg/tcpip/transport/udp/BUILD @@ -24,8 +24,8 @@ go_library( "protocol.go", "udp_packet_list.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp", - imports = ["gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"], + importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/udp", + imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ "//pkg/sleep", diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index fa7278286..cb0ea83a6 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -18,11 +18,11 @@ import ( "math" "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) // +stateify savable @@ -169,6 +169,9 @@ func (e *endpoint) Close() { e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) } +// ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf. +func (e *endpoint) ModerateRecvBuf(copied int) {} + // Read reads data from the endpoint. This method does not block if // there is no data pending. func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { @@ -695,8 +698,44 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress, allowMismatch bool) (t return netProto, nil } +func (e *endpoint) disconnect() *tcpip.Error { + e.mu.Lock() + defer e.mu.Unlock() + + if e.state != stateConnected { + return nil + } + id := stack.TransportEndpointID{} + // Exclude ephemerally bound endpoints. + if e.bindNICID != 0 || e.id.LocalAddress == "" { + var err *tcpip.Error + id = stack.TransportEndpointID{ + LocalPort: e.id.LocalPort, + LocalAddress: e.id.LocalAddress, + } + id, err = e.registerWithStack(e.regNICID, e.effectiveNetProtos, id) + if err != nil { + return err + } + e.state = stateBound + } else { + e.state = stateInitial + } + + e.stack.UnregisterTransportEndpoint(e.regNICID, e.effectiveNetProtos, ProtocolNumber, e.id, e) + e.id = id + e.route.Release() + e.route = stack.Route{} + e.dstPort = 0 + + return nil +} + // Connect connects the endpoint to its peer. Specifying a NIC is optional. func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { + if addr.Addr == "" { + return e.disconnect() + } if addr.Port == 0 { // We don't support connecting to port zero. return tcpip.ErrInvalidEndpointState @@ -731,12 +770,16 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { defer r.Release() id := stack.TransportEndpointID{ - LocalAddress: r.LocalAddress, + LocalAddress: e.id.LocalAddress, LocalPort: localPort, RemotePort: addr.Port, RemoteAddress: r.RemoteAddress, } + if e.state == stateInitial { + id.LocalAddress = r.LocalAddress + } + // Even if we're connected, this endpoint can still be used to send // packets on a different network protocol, so we register both even if // v6only is set to false and this is an ipv6 endpoint. diff --git a/pkg/tcpip/transport/udp/endpoint_state.go b/pkg/tcpip/transport/udp/endpoint_state.go index 74e8e9fd5..18e786397 100644 --- a/pkg/tcpip/transport/udp/endpoint_state.go +++ b/pkg/tcpip/transport/udp/endpoint_state.go @@ -15,10 +15,10 @@ package udp import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // saveData saves udpPacket.data field. @@ -92,8 +92,6 @@ func (e *endpoint) afterLoad() { if err != nil { panic(*err) } - - e.id.LocalAddress = e.route.LocalAddress } else if len(e.id.LocalAddress) != 0 { // stateBound if e.stack.CheckLocalAddress(e.regNICID, netProto, e.id.LocalAddress) == 0 { panic(tcpip.ErrBadLocalAddress) diff --git a/pkg/tcpip/transport/udp/forwarder.go b/pkg/tcpip/transport/udp/forwarder.go index 25bdd2929..a874fc9fd 100644 --- a/pkg/tcpip/transport/udp/forwarder.go +++ b/pkg/tcpip/transport/udp/forwarder.go @@ -15,10 +15,10 @@ package udp import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) // Forwarder is a session request forwarder, which allows clients to decide diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go index 3d31dfbf1..f76e7fbe1 100644 --- a/pkg/tcpip/transport/udp/protocol.go +++ b/pkg/tcpip/transport/udp/protocol.go @@ -21,12 +21,12 @@ package udp import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/raw" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/raw" + "gvisor.dev/gvisor/pkg/waiter" ) const ( diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index 86a8fa19b..75129a2ff 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -21,17 +21,17 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/checker" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/checker" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/waiter" ) const ( diff --git a/pkg/tmutex/BUILD b/pkg/tmutex/BUILD index 69035044d..98d51cc69 100644 --- a/pkg/tmutex/BUILD +++ b/pkg/tmutex/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "tmutex", srcs = ["tmutex.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tmutex", + importpath = "gvisor.dev/gvisor/pkg/tmutex", visibility = ["//:sandbox"], ) diff --git a/pkg/unet/BUILD b/pkg/unet/BUILD index 5e177e78e..769509e80 100644 --- a/pkg/unet/BUILD +++ b/pkg/unet/BUILD @@ -8,7 +8,7 @@ go_library( "unet.go", "unet_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/unet", + importpath = "gvisor.dev/gvisor/pkg/unet", visibility = ["//visibility:public"], deps = [ "//pkg/abi/linux", diff --git a/pkg/unet/unet.go b/pkg/unet/unet.go index 2aa1af4ff..d843f19cf 100644 --- a/pkg/unet/unet.go +++ b/pkg/unet/unet.go @@ -23,7 +23,7 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/gate" + "gvisor.dev/gvisor/pkg/gate" ) // backlog is used for the listen request. diff --git a/pkg/unet/unet_test.go b/pkg/unet/unet_test.go index 763b23c7c..a3cc6f5d3 100644 --- a/pkg/unet/unet_test.go +++ b/pkg/unet/unet_test.go @@ -691,3 +691,45 @@ func TestControlMessage(t *testing.T) { } } } + +func benchmarkSendRecv(b *testing.B, packet bool) { + server, client, err := SocketPair(packet) + if err != nil { + b.Fatalf("SocketPair: got %v, wanted nil", err) + } + defer server.Close() + defer client.Close() + go func() { + buf := make([]byte, 1) + for i := 0; i < b.N; i++ { + n, err := server.Read(buf) + if n != 1 || err != nil { + b.Fatalf("server.Read: got (%d, %v), wanted (1, nil)", n, err) + } + n, err = server.Write(buf) + if n != 1 || err != nil { + b.Fatalf("server.Write: got (%d, %v), wanted (1, nil)", n, err) + } + } + }() + buf := make([]byte, 1) + b.ResetTimer() + for i := 0; i < b.N; i++ { + n, err := client.Write(buf) + if n != 1 || err != nil { + b.Fatalf("client.Write: got (%d, %v), wanted (1, nil)", n, err) + } + n, err = client.Read(buf) + if n != 1 || err != nil { + b.Fatalf("client.Read: got (%d, %v), wanted (1, nil)", n, err) + } + } +} + +func BenchmarkSendRecvStream(b *testing.B) { + benchmarkSendRecv(b, false) +} + +func BenchmarkSendRecvPacket(b *testing.B) { + benchmarkSendRecv(b, true) +} diff --git a/pkg/unet/unet_unsafe.go b/pkg/unet/unet_unsafe.go index fa0916439..f8a42c914 100644 --- a/pkg/unet/unet_unsafe.go +++ b/pkg/unet/unet_unsafe.go @@ -21,7 +21,7 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/abi/linux" ) // wait blocks until the socket FD is ready for reading or writing, depending diff --git a/pkg/urpc/BUILD b/pkg/urpc/BUILD index 0192fb35b..b7f505a84 100644 --- a/pkg/urpc/BUILD +++ b/pkg/urpc/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "urpc", srcs = ["urpc.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/urpc", + importpath = "gvisor.dev/gvisor/pkg/urpc", visibility = ["//:sandbox"], deps = [ "//pkg/fd", diff --git a/pkg/urpc/urpc.go b/pkg/urpc/urpc.go index 4ea684659..df59ffab1 100644 --- a/pkg/urpc/urpc.go +++ b/pkg/urpc/urpc.go @@ -29,9 +29,9 @@ import ( "runtime" "sync" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/unet" ) // maxFiles determines the maximum file payload. @@ -72,7 +72,7 @@ type FilePayload struct { Files []*os.File `json:"-"` } -// ReleaseFD releases the indexth FD. +// ReleaseFD releases the FD at the specified index. func (f *FilePayload) ReleaseFD(index int) (*fd.FD, error) { return fd.NewFromFile(f.Files[index]) } diff --git a/pkg/urpc/urpc_test.go b/pkg/urpc/urpc_test.go index 5bf2c5ed2..c6c7ce9d4 100644 --- a/pkg/urpc/urpc_test.go +++ b/pkg/urpc/urpc_test.go @@ -19,7 +19,7 @@ import ( "os" "testing" - "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/unet" ) type test struct { diff --git a/pkg/waiter/BUILD b/pkg/waiter/BUILD index 48ce063d7..9173dfd0f 100644 --- a/pkg/waiter/BUILD +++ b/pkg/waiter/BUILD @@ -21,7 +21,7 @@ go_library( "waiter.go", "waiter_list.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/waiter", + importpath = "gvisor.dev/gvisor/pkg/waiter", visibility = ["//visibility:public"], ) diff --git a/runsc/BUILD b/runsc/BUILD index 8a57c597b..6b8c92706 100644 --- a/runsc/BUILD +++ b/runsc/BUILD @@ -16,6 +16,7 @@ go_binary( x_defs = {"main.version": "{VERSION}"}, deps = [ "//pkg/log", + "//pkg/sentry/platform", "//runsc/boot", "//runsc/cmd", "//runsc/specutils", @@ -47,6 +48,7 @@ go_binary( x_defs = {"main.version": "{VERSION}"}, deps = [ "//pkg/log", + "//pkg/sentry/platform", "//runsc/boot", "//runsc/cmd", "//runsc/specutils", @@ -96,6 +98,11 @@ pkg_deb( maintainer = "The gVisor Authors <gvisor-dev@googlegroups.com>", package = "runsc", postinst = "debian/postinst.sh", + tags = [ + # TODO(b/135475885): pkg_deb requires python2: + # https://github.com/bazelbuild/bazel/issues/8443 + "manual", + ], version_file = ":version.txt", visibility = [ "//visibility:public", diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 744f852a1..5025401dd 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -18,8 +18,9 @@ go_library( "network.go", "pprof.go", "strace.go", + "user.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/boot", + importpath = "gvisor.dev/gvisor/runsc/boot", visibility = [ "//runsc:__subpackages__", "//test:__subpackages__", @@ -33,6 +34,7 @@ go_library( "//pkg/log", "//pkg/memutil", "//pkg/rand", + "//pkg/refs", "//pkg/sentry/arch", "//pkg/sentry/arch:registers_go_proto", "//pkg/sentry/context", @@ -50,13 +52,10 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/kernel:uncaught_signal_go_proto", "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/limits", "//pkg/sentry/loader", "//pkg/sentry/pgalloc", "//pkg/sentry/platform", - "//pkg/sentry/platform/kvm", - "//pkg/sentry/platform/ptrace", "//pkg/sentry/sighandling", "//pkg/sentry/socket/epsocket", "//pkg/sentry/socket/hostinet", @@ -69,6 +68,7 @@ go_library( "//pkg/sentry/time", "//pkg/sentry/unimpl:unimplemented_syscall_go_proto", "//pkg/sentry/usage", + "//pkg/sentry/usermem", "//pkg/sentry/watchdog", "//pkg/syserror", "//pkg/tcpip", @@ -84,6 +84,7 @@ go_library( "//pkg/tcpip/transport/udp", "//pkg/urpc", "//runsc/boot/filter", + "//runsc/boot/platforms", "//runsc/specutils", "@com_github_golang_protobuf//proto:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", @@ -97,6 +98,7 @@ go_test( "compat_test.go", "fs_test.go", "loader_test.go", + "user_test.go", ], embed = [":boot"], deps = [ diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index c369e4d64..07e35ab10 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -21,14 +21,14 @@ import ( "syscall" "github.com/golang/protobuf/proto" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/eventchannel" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto" - ucspb "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" - "gvisor.googlesource.com/gvisor/pkg/sentry/strace" - spb "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/eventchannel" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/arch" + rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" + ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" + "gvisor.dev/gvisor/pkg/sentry/strace" + spb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto" ) func initCompatLogs(fd int) error { diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go index 99df5e614..43cd0db94 100644 --- a/runsc/boot/compat_amd64.go +++ b/runsc/boot/compat_amd64.go @@ -17,7 +17,7 @@ package boot import ( "fmt" - rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto" + rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" ) // reportLimit is the max number of events that should be reported per tracker. diff --git a/runsc/boot/compat_test.go b/runsc/boot/compat_test.go index ccec3d20c..388298d8d 100644 --- a/runsc/boot/compat_test.go +++ b/runsc/boot/compat_test.go @@ -17,7 +17,7 @@ package boot import ( "testing" - rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto" + rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" ) func TestOnceTracker(t *testing.T) { diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 6112b6c0a..6f1eb9a41 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -19,43 +19,9 @@ import ( "strconv" "strings" - "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/sentry/watchdog" ) -// PlatformType tells which platform to use. -type PlatformType int - -const ( - // PlatformPtrace runs the sandbox with the ptrace platform. - PlatformPtrace PlatformType = iota - - // PlatformKVM runs the sandbox with the KVM platform. - PlatformKVM -) - -// MakePlatformType converts type from string. -func MakePlatformType(s string) (PlatformType, error) { - switch s { - case "ptrace": - return PlatformPtrace, nil - case "kvm": - return PlatformKVM, nil - default: - return 0, fmt.Errorf("invalid platform type %q", s) - } -} - -func (p PlatformType) String() string { - switch p { - case PlatformPtrace: - return "ptrace" - case PlatformKVM: - return "kvm" - default: - return fmt.Sprintf("unknown(%d)", p) - } -} - // FileAccessType tells how the filesystem is accessed. type FileAccessType int @@ -187,7 +153,7 @@ type Config struct { LogPackets bool // Platform is the platform to run on. - Platform PlatformType + Platform string // Strace indicates that strace should be enabled. Strace bool @@ -247,7 +213,7 @@ func (c *Config) ToFlags() []string { "--overlay=" + strconv.FormatBool(c.Overlay), "--network=" + c.Network.String(), "--log-packets=" + strconv.FormatBool(c.LogPackets), - "--platform=" + c.Platform.String(), + "--platform=" + c.Platform, "--strace=" + strconv.FormatBool(c.Strace), "--strace-syscalls=" + strings.Join(c.StraceSyscalls, ","), "--strace-log-size=" + strconv.Itoa(int(c.StraceLogSize)), diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 26765cc46..d79aaff60 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -22,17 +22,17 @@ import ( "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/control/server" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket" - "gvisor.googlesource.com/gvisor/pkg/sentry/state" - "gvisor.googlesource.com/gvisor/pkg/sentry/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/control/server" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket/epsocket" + "gvisor.dev/gvisor/pkg/sentry/state" + "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/urpc" ) const ( @@ -96,8 +96,10 @@ const ( // SandboxStacks collects sandbox stacks for debugging. SandboxStacks = "debug.Stacks" +) - // Profiling related commands (see pprof.go for more details). +// Profiling related commands (see pprof.go for more details). +const ( StartCPUProfile = "Profile.StartCPUProfile" StopCPUProfile = "Profile.StopCPUProfile" HeapProfile = "Profile.HeapProfile" @@ -105,6 +107,11 @@ const ( StopTrace = "Profile.StopTrace" ) +// Logging related commands (see logging.go for more details). +const ( + ChangeLogging = "Logging.Change" +) + // ControlSocketAddr generates an abstract unix socket name for the given ID. func ControlSocketAddr(id string) string { return fmt.Sprintf("\x00runsc-sandbox.%s", id) @@ -143,6 +150,7 @@ func newController(fd int, l *Loader) (*controller, error) { } srv.Register(&debug{}) + srv.Register(&control.Logging{}) if l.conf.ProfileEnable { srv.Register(&control.Profile{}) } diff --git a/runsc/boot/debug.go b/runsc/boot/debug.go index 79f7387ac..1fb32c527 100644 --- a/runsc/boot/debug.go +++ b/runsc/boot/debug.go @@ -15,7 +15,7 @@ package boot import ( - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) type debug struct { diff --git a/runsc/boot/events.go b/runsc/boot/events.go index ffd99f5e9..422f4da00 100644 --- a/runsc/boot/events.go +++ b/runsc/boot/events.go @@ -15,8 +15,8 @@ package boot import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usage" ) // Event struct for encoding the event data to JSON. Corresponds to runc's diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go index 0811e10f4..e5de1f3d7 100644 --- a/runsc/boot/fds.go +++ b/runsc/boot/fds.go @@ -17,36 +17,27 @@ package boot import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) -// createFDMap creates an FD map that contains stdin, stdout, and stderr. If -// console is true, then ioctl calls will be passed through to the host FD. +// createFDTable creates an FD table that contains stdin, stdout, and stderr. +// If console is true, then ioctl calls will be passed through to the host FD. // Upon success, createFDMap dups then closes stdioFDs. -func createFDMap(ctx context.Context, l *limits.LimitSet, console bool, stdioFDs []int) (*kernel.FDMap, error) { +func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) { if len(stdioFDs) != 3 { return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs)) } k := kernel.KernelFromContext(ctx) - fdm := k.NewFDMap() - defer fdm.DecRef() + fdTable := k.NewFDTable() + defer fdTable.DecRef() mounter := fs.FileOwnerFromContext(ctx) - // Maps sandbox FD to host FD. - fdMap := map[int]int{ - 0: stdioFDs[0], - 1: stdioFDs[1], - 2: stdioFDs[2], - } - var ttyFile *fs.File - for appFD, hostFD := range fdMap { + for appFD, hostFD := range stdioFDs { var appFile *fs.File if console && appFD < 3 { @@ -80,11 +71,11 @@ func createFDMap(ctx context.Context, l *limits.LimitSet, console bool, stdioFDs } // Add the file to the FD map. - if err := fdm.NewFDAt(kdefs.FD(appFD), appFile, kernel.FDFlags{}, l); err != nil { + if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil { return nil, err } } - fdm.IncRef() - return fdm, nil + fdTable.IncRef() + return fdTable, nil } diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD index 3b6020cf3..f5509b6b7 100644 --- a/runsc/boot/filter/BUILD +++ b/runsc/boot/filter/BUILD @@ -11,7 +11,7 @@ go_library( "extra_filters_race.go", "filter.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/boot/filter", + importpath = "gvisor.dev/gvisor/runsc/boot/filter", visibility = [ "//runsc/boot:__subpackages__", ], @@ -20,8 +20,6 @@ go_library( "//pkg/log", "//pkg/seccomp", "//pkg/sentry/platform", - "//pkg/sentry/platform/kvm", - "//pkg/sentry/platform/ptrace", "//pkg/tcpip/link/fdbased", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index ef2dbfad2..0ee5b8bbd 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -19,9 +19,9 @@ import ( "syscall" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/seccomp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" ) // allowedSyscalls is the set of syscalls executed by the Sentry to the host OS. @@ -437,29 +437,6 @@ func hostInetFilters() seccomp.SyscallRules { } } -// ptraceFilters returns syscalls made exclusively by the ptrace platform. -func ptraceFilters() seccomp.SyscallRules { - return seccomp.SyscallRules{ - unix.SYS_GETCPU: {}, - unix.SYS_SCHED_SETAFFINITY: {}, - syscall.SYS_PTRACE: {}, - syscall.SYS_TGKILL: {}, - syscall.SYS_WAIT4: {}, - } -} - -// kvmFilters returns syscalls made exclusively by the KVM platform. -func kvmFilters() seccomp.SyscallRules { - return seccomp.SyscallRules{ - syscall.SYS_ARCH_PRCTL: {}, - syscall.SYS_IOCTL: {}, - syscall.SYS_MMAP: {}, - syscall.SYS_RT_SIGSUSPEND: {}, - syscall.SYS_RT_SIGTIMEDWAIT: {}, - 0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host. - } -} - func controlServerFilters(fd int) seccomp.SyscallRules { return seccomp.SyscallRules{ syscall.SYS_ACCEPT: []seccomp.Rule{ diff --git a/runsc/boot/filter/extra_filters.go b/runsc/boot/filter/extra_filters.go index 5c5ec4e06..e28d4b8d6 100644 --- a/runsc/boot/filter/extra_filters.go +++ b/runsc/boot/filter/extra_filters.go @@ -17,11 +17,11 @@ package filter import ( - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by -// Go intrumentation tools, e.g. -race, -msan. +// Go instrumentation tools, e.g. -race, -msan. // Returns empty when disabled. func instrumentationFilters() seccomp.SyscallRules { return nil diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go index ac5a0f1aa..5e5a3c998 100644 --- a/runsc/boot/filter/extra_filters_msan.go +++ b/runsc/boot/filter/extra_filters_msan.go @@ -19,7 +19,7 @@ package filter import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by MSAN. diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go index ba3c1ce87..9ff80276a 100644 --- a/runsc/boot/filter/extra_filters_race.go +++ b/runsc/boot/filter/extra_filters_race.go @@ -19,7 +19,7 @@ package filter import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by TSAN. @@ -33,6 +33,7 @@ func instrumentationFilters() seccomp.SyscallRules { syscall.SYS_MUNLOCK: {}, syscall.SYS_NANOSLEEP: {}, syscall.SYS_OPEN: {}, + syscall.SYS_OPENAT: {}, syscall.SYS_SET_ROBUST_LIST: {}, // Used within glibc's malloc. syscall.SYS_TIME: {}, diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go index 17479e0dd..e80c171b3 100644 --- a/runsc/boot/filter/filter.go +++ b/runsc/boot/filter/filter.go @@ -18,13 +18,9 @@ package filter import ( - "fmt" - - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/seccomp" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/sentry/platform" ) // Options are seccomp filter related options. @@ -53,14 +49,7 @@ func Install(opt Options) error { s.Merge(profileFilters()) } - switch p := opt.Platform.(type) { - case *ptrace.PTrace: - s.Merge(ptraceFilters()) - case *kvm.KVM: - s.Merge(kvmFilters()) - default: - return fmt.Errorf("unknown platform type %T", p) - } + s.Merge(opt.Platform.SyscallFilters()) return seccomp.Install(s) } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 2fa0725d1..f9a6f2d3c 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -24,24 +24,24 @@ import ( "syscall" // Include filesystem types that OCI spec might mount. - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/dev" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/sys" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tty" + _ "gvisor.dev/gvisor/pkg/sentry/fs/dev" + "gvisor.dev/gvisor/pkg/sentry/fs/gofer" + _ "gvisor.dev/gvisor/pkg/sentry/fs/host" + _ "gvisor.dev/gvisor/pkg/sentry/fs/proc" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + _ "gvisor.dev/gvisor/pkg/sentry/fs/sys" + _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" + _ "gvisor.dev/gvisor/pkg/sentry/fs/tty" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/gofer" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -76,7 +76,7 @@ func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, tmpFS := mustFindFilesystem("tmpfs") if !fs.IsDir(lower.StableAttr) { // Create overlay on top of mount file, e.g. /etc/hostname. - msrc := fs.NewCachingMountSource(tmpFS, upperFlags) + msrc := fs.NewCachingMountSource(ctx, tmpFS, upperFlags) return fs.NewOverlayRootFile(ctx, msrc, lower, upperFlags) } @@ -226,7 +226,11 @@ func mustFindFilesystem(name string) fs.Filesystem { // addSubmountOverlay overlays the inode over a ramfs tree containing the given // paths. func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string) (*fs.Inode, error) { - msrc := fs.NewPseudoMountSource() + // Construct a ramfs tree of mount points. The contents never + // change, so this can be fully caching. There's no real + // filesystem backing this tree, so we set the filesystem to + // nil. + msrc := fs.NewCachingMountSource(ctx, nil, fs.MountSourceFlags{}) mountTree, err := ramfs.MakeDirectoryTree(ctx, msrc, submounts) if err != nil { return nil, fmt.Errorf("creating mount tree: %v", err) diff --git a/runsc/boot/limits.go b/runsc/boot/limits.go index 3364aa5e6..d1c0bb9b5 100644 --- a/runsc/boot/limits.go +++ b/runsc/boot/limits.go @@ -20,8 +20,8 @@ import ( "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/limits" ) // Mapping from linux resource names to limits.LimitType. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index b2a91bec9..8e8c6105b 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -20,51 +20,52 @@ import ( mrand "math/rand" "os" "runtime" + "strings" "sync" "sync/atomic" "syscall" gtime "time" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/memutil" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/loader" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace" - "gvisor.googlesource.com/gvisor/pkg/sentry/sighandling" - slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" - "gvisor.googlesource.com/gvisor/runsc/boot/filter" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/memutil" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/loader" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/sighandling" + slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" + "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/arp" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/runsc/boot/filter" + _ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms. + "gvisor.dev/gvisor/runsc/specutils" // Include supported socket providers. - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/route" - _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix" + "gvisor.dev/gvisor/pkg/sentry/socket/epsocket" + "gvisor.dev/gvisor/pkg/sentry/socket/hostinet" + _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink" + _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route" + _ "gvisor.dev/gvisor/pkg/sentry/socket/unix" ) // Loader keeps state needed to start the kernel and run the container.. @@ -205,7 +206,9 @@ func New(args Args) (*Loader, error) { // Create VDSO. // // Pass k as the platform since it is savable, unlike the actual platform. - vdso, err := loader.PrepareVDSO(k) + // + // FIXME(b/109889800): Use non-nil context. + vdso, err := loader.PrepareVDSO(nil, k) if err != nil { return nil, fmt.Errorf("creating vdso: %v", err) } @@ -259,7 +262,7 @@ func New(args Args) (*Loader, error) { // Adjust the total memory returned by the Sentry so that applications that // use /proc/meminfo can make allocations based on this limit. usage.MinimumTotalMemoryBytes = args.TotalMem - log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(1 << 30)) + log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(1<<30)) } // Initiate the Kernel object, which is required by the Context passed @@ -412,19 +415,12 @@ func (l *Loader) Destroy() { } func createPlatform(conf *Config, deviceFile *os.File) (platform.Platform, error) { - switch conf.Platform { - case PlatformPtrace: - log.Infof("Platform: ptrace") - return ptrace.New() - case PlatformKVM: - log.Infof("Platform: kvm") - if deviceFile == nil { - return nil, fmt.Errorf("kvm device file must be provided") - } - return kvm.New(deviceFile) - default: - return nil, fmt.Errorf("invalid platform %v", conf.Platform) + p, err := platform.Lookup(conf.Platform) + if err != nil { + panic(fmt.Sprintf("invalid platform %v: %v", conf.Platform, err)) } + log.Infof("Platform: %s", conf.Platform) + return p.New(deviceFile) } func createMemoryFile() (*pgalloc.MemoryFile, error) { @@ -513,13 +509,13 @@ func (l *Loader) run() error { // Create the FD map, which will set stdin, stdout, and stderr. If console // is true, then ioctl calls will be passed through to the host fd. ctx := l.rootProcArgs.NewContext(l.k) - fdm, err := createFDMap(ctx, l.rootProcArgs.Limits, l.console, l.stdioFDs) + fdTable, err := createFDTable(ctx, l.console, l.stdioFDs) if err != nil { return fmt.Errorf("importing fds: %v", err) } // CreateProcess takes a reference on FDMap if successful. We won't need // ours either way. - l.rootProcArgs.FDMap = fdm + l.rootProcArgs.FDTable = fdTable // cid for root container can be empty. Only subcontainers need it to set // the mount location. @@ -534,19 +530,37 @@ func (l *Loader) run() error { return err } + // Read /etc/passwd for the user's HOME directory and set the HOME + // environment variable as required by POSIX if it is not overridden by + // the user. + hasHomeEnvv := false + for _, envv := range l.rootProcArgs.Envv { + if strings.HasPrefix(envv, "HOME=") { + hasHomeEnvv = true + } + } + if !hasHomeEnvv { + homeDir, err := getExecUserHome(rootCtx, rootMns, uint32(l.rootProcArgs.Credentials.RealKUID)) + if err != nil { + return fmt.Errorf("error reading exec user: %v", err) + } + + l.rootProcArgs.Envv = append(l.rootProcArgs.Envv, "HOME="+homeDir) + } + // Create the root container init task. It will begin running // when the kernel is started. if _, _, err := l.k.CreateProcess(l.rootProcArgs); err != nil { return fmt.Errorf("creating init process: %v", err) } - // CreateProcess takes a reference on FDMap if successful. - l.rootProcArgs.FDMap.DecRef() + // CreateProcess takes a reference on FDTable if successful. + l.rootProcArgs.FDTable.DecRef() } ep.tg = l.k.GlobalInit() if l.console { - ttyFile := l.rootProcArgs.FDMap.GetFile(0) + ttyFile, _ := l.rootProcArgs.FDTable.Get(0) defer ttyFile.DecRef() ep.tty = ttyFile.FileOperations.(*host.TTYFileOperations) @@ -626,13 +640,13 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file // Create the FD map, which will set stdin, stdout, and stderr. ctx := procArgs.NewContext(l.k) - fdm, err := createFDMap(ctx, procArgs.Limits, false, stdioFDs) + fdTable, err := createFDTable(ctx, false, stdioFDs) if err != nil { return fmt.Errorf("importing fds: %v", err) } - // CreateProcess takes a reference on FDMap if successful. We won't need ours - // either way. - procArgs.FDMap = fdm + // CreateProcess takes a reference on fdTable if successful. We won't + // need ours either way. + procArgs.FDTable = fdTable // Can't take ownership away from os.File. dup them to get a new FDs. var goferFDs []int @@ -661,8 +675,8 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file } l.k.StartProcess(tg) - // CreateProcess takes a reference on FDMap if successful. - procArgs.FDMap.DecRef() + // CreateProcess takes a reference on FDTable if successful. + procArgs.FDTable.DecRef() l.processes[eid].tg = tg return nil @@ -826,9 +840,17 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) { // privileges. Raw: true, })} + + // Enable SACK Recovery. if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil { return nil, fmt.Errorf("failed to enable SACK: %v", err) } + + // Enable Receive Buffer Auto-Tuning. + if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { + return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) + } + return &s, nil default: @@ -977,3 +999,8 @@ func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host } return ep.tg, ep.tty, nil } + +func init() { + // TODO(gvisor.dev/issue/365): Make this configurable. + refs.SetLeakMode(refs.NoLeakChecking) +} diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 2f2499811..ff713660d 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -25,18 +25,21 @@ import ( "time" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/control/server" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/unet" - "gvisor.googlesource.com/gvisor/runsc/fsgofer" + "gvisor.dev/gvisor/pkg/control/server" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/runsc/fsgofer" ) func init() { log.SetLevel(log.Debug) rand.Seed(time.Now().UnixNano()) + if err := fsgofer.OpenProcSelfFD(); err != nil { + panic(err) + } } func testConfig() *Config { @@ -44,6 +47,7 @@ func testConfig() *Config { RootDir: "unused_root_dir", Network: NetworkNone, DisableSeccomp: true, + Platform: "ptrace", } } diff --git a/runsc/boot/network.go b/runsc/boot/network.go index d86803252..d3d98243d 100644 --- a/runsc/boot/network.go +++ b/runsc/boot/network.go @@ -19,16 +19,16 @@ import ( "net" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/arp" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/urpc" ) // Network exposes methods that can be used to configure a network stack. diff --git a/runsc/boot/platforms/BUILD b/runsc/boot/platforms/BUILD new file mode 100644 index 000000000..03391cdca --- /dev/null +++ b/runsc/boot/platforms/BUILD @@ -0,0 +1,16 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "platforms", + srcs = ["platforms.go"], + importpath = "gvisor.dev/gvisor/runsc/boot/platforms", + visibility = [ + "//runsc:__subpackages__", + ], + deps = [ + "//pkg/sentry/platform/kvm", + "//pkg/sentry/platform/ptrace", + ], +) diff --git a/runsc/boot/platforms/platforms.go b/runsc/boot/platforms/platforms.go new file mode 100644 index 000000000..056b46ad5 --- /dev/null +++ b/runsc/boot/platforms/platforms.go @@ -0,0 +1,30 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package platforms imports all available platform packages. +package platforms + +import ( + // Import platforms that runsc might use. + _ "gvisor.dev/gvisor/pkg/sentry/platform/kvm" + _ "gvisor.dev/gvisor/pkg/sentry/platform/ptrace" +) + +const ( + // Ptrace runs the sandbox with the ptrace platform. + Ptrace = "ptrace" + + // KVM runs the sandbox with the KVM platform. + KVM = "kvm" +) diff --git a/runsc/boot/strace.go b/runsc/boot/strace.go index 19c7f8fbd..fbfd3b07c 100644 --- a/runsc/boot/strace.go +++ b/runsc/boot/strace.go @@ -15,7 +15,7 @@ package boot import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/strace" + "gvisor.dev/gvisor/pkg/sentry/strace" ) func enableStrace(conf *Config) error { diff --git a/runsc/boot/user.go b/runsc/boot/user.go new file mode 100644 index 000000000..d1d423a5c --- /dev/null +++ b/runsc/boot/user.go @@ -0,0 +1,146 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +import ( + "bufio" + "io" + "strconv" + "strings" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/usermem" +) + +type fileReader struct { + // Ctx is the context for the file reader. + Ctx context.Context + + // File is the file to read from. + File *fs.File +} + +// Read implements io.Reader.Read. +func (r *fileReader) Read(buf []byte) (int, error) { + n, err := r.File.Readv(r.Ctx, usermem.BytesIOSequence(buf)) + return int(n), err +} + +// getExecUserHome returns the home directory of the executing user read from +// /etc/passwd as read from the container filesystem. +func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32) (string, error) { + // The default user home directory to return if no user matching the user + // if found in the /etc/passwd found in the image. + const defaultHome = "/" + + // Open the /etc/passwd file from the dirent via the root mount namespace. + mnsRoot := rootMns.Root() + maxTraversals := uint(linux.MaxSymlinkTraversals) + dirent, err := rootMns.FindInode(ctx, mnsRoot, nil, "/etc/passwd", &maxTraversals) + if err != nil { + // NOTE: Ignore errors opening the passwd file. If the passwd file + // doesn't exist we will return the default home directory. + return defaultHome, nil + } + defer dirent.DecRef() + + // Check read permissions on the file. + if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil { + // NOTE: Ignore permissions errors here and return default root dir. + return defaultHome, nil + } + + // Only open regular files. We don't open other files like named pipes as + // they may block and might present some attack surface to the container. + // Note that runc does not seem to do this kind of checking. + if !fs.IsRegular(dirent.Inode.StableAttr) { + return defaultHome, nil + } + + f, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Directory: false}) + if err != nil { + return "", err + } + defer f.DecRef() + + r := &fileReader{ + Ctx: ctx, + File: f, + } + + homeDir, err := findHomeInPasswd(uid, r, defaultHome) + if err != nil { + return "", err + } + + return homeDir, nil +} + +// findHomeInPasswd parses a passwd file and returns the given user's home +// directory. This function does it's best to replicate the runc's behavior. +func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) { + s := bufio.NewScanner(passwd) + + for s.Scan() { + if err := s.Err(); err != nil { + return "", err + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + // Pull out part of passwd entry. Loosely parse the passwd entry as some + // passwd files could be poorly written and for compatibility with runc. + // + // Per 'man 5 passwd' + // /etc/passwd contains one line for each user account, with seven + // fields delimited by colons (“:”). These fields are: + // + // - login name + // - optional encrypted password + // - numerical user ID + // - numerical group ID + // - user name or comment field + // - user home directory + // - optional user command interpreter + parts := strings.Split(line, ":") + + found := false + homeDir := "" + for i, p := range parts { + switch i { + case 2: + parsedUID, err := strconv.ParseUint(p, 10, 32) + if err == nil && parsedUID == uint64(uid) { + found = true + } + case 5: + homeDir = p + } + } + if found { + // NOTE: If the uid is present but the home directory is not + // present in the /etc/passwd entry we return an empty string. This + // is, for better or worse, what runc does. + return homeDir, nil + } + } + + return defaultHome, nil +} diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go new file mode 100644 index 000000000..834003430 --- /dev/null +++ b/runsc/boot/user_test.go @@ -0,0 +1,253 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +import ( + "io/ioutil" + "os" + "path/filepath" + "strings" + "syscall" + "testing" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" +) + +func setupTempDir() (string, error) { + tmpDir, err := ioutil.TempDir(os.TempDir(), "exec-user-test") + if err != nil { + return "", err + } + return tmpDir, nil +} + +func setupPasswd(contents string, perms os.FileMode) func() (string, error) { + return func() (string, error) { + tmpDir, err := setupTempDir() + if err != nil { + return "", err + } + + if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { + return "", err + } + + f, err := os.Create(filepath.Join(tmpDir, "etc", "passwd")) + if err != nil { + return "", err + } + defer f.Close() + + _, err = f.WriteString(contents) + if err != nil { + return "", err + } + + err = f.Chmod(perms) + if err != nil { + return "", err + } + return tmpDir, nil + } +} + +// TestGetExecUserHome tests the getExecUserHome function. +func TestGetExecUserHome(t *testing.T) { + tests := map[string]struct { + uid uint32 + createRoot func() (string, error) + expected string + }{ + "success": { + uid: 1000, + createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0666), + expected: "/home/adin", + }, + "no_passwd": { + uid: 1000, + createRoot: setupTempDir, + expected: "/", + }, + "no_perms": { + uid: 1000, + createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0000), + expected: "/", + }, + "directory": { + uid: 1000, + createRoot: func() (string, error) { + tmpDir, err := setupTempDir() + if err != nil { + return "", err + } + + if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { + return "", err + } + + if err := syscall.Mkdir(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil { + return "", err + } + + return tmpDir, nil + }, + expected: "/", + }, + // Currently we don't allow named pipes. + "named_pipe": { + uid: 1000, + createRoot: func() (string, error) { + tmpDir, err := setupTempDir() + if err != nil { + return "", err + } + + if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { + return "", err + } + + if err := syscall.Mkfifo(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil { + return "", err + } + + return tmpDir, nil + }, + expected: "/", + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + tmpDir, err := tc.createRoot() + if err != nil { + t.Fatalf("failed to create root dir: %v", err) + } + + sandEnd, cleanup, err := startGofer(tmpDir) + if err != nil { + t.Fatalf("failed to create gofer: %v", err) + } + defer cleanup() + + ctx := contexttest.Context(t) + conf := &Config{ + RootDir: "unused_root_dir", + Network: NetworkNone, + DisableSeccomp: true, + } + + spec := &specs.Spec{ + Root: &specs.Root{ + Path: tmpDir, + Readonly: true, + }, + // Add /proc mount as tmpfs to avoid needing a kernel. + Mounts: []specs.Mount{ + { + Destination: "/proc", + Type: "tmpfs", + }, + }, + } + + var mns *fs.MountNamespace + setMountNS := func(m *fs.MountNamespace) { + mns = m + ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root()) + } + mntr := newContainerMounter(spec, "", []int{sandEnd}, nil, &podMountHints{}) + if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil { + t.Fatalf("failed to create mount namespace: %v", err) + } + + got, err := getExecUserHome(ctx, mns, tc.uid) + if err != nil { + t.Fatalf("failed to get user home: %v", err) + } + + if got != tc.expected { + t.Fatalf("expected %v, got: %v", tc.expected, got) + } + }) + } +} + +// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing. +func TestFindHomeInPasswd(t *testing.T) { + tests := map[string]struct { + uid uint32 + passwd string + expected string + def string + }{ + "empty": { + uid: 1000, + passwd: "", + expected: "/", + def: "/", + }, + "whitespace": { + uid: 1000, + passwd: " ", + expected: "/", + def: "/", + }, + "full": { + uid: 1000, + passwd: "adin::1000:1111::/home/adin:/bin/sh", + expected: "/home/adin", + def: "/", + }, + // For better or worse, this is how runc works. + "partial": { + uid: 1000, + passwd: "adin::1000:1111:", + expected: "", + def: "/", + }, + "multiple": { + uid: 1001, + passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh", + expected: "/home/ian", + def: "/", + }, + "duplicate": { + uid: 1000, + passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh", + expected: "/home/adin", + def: "/", + }, + "empty_lines": { + uid: 1001, + passwd: "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh", + expected: "/home/ian", + def: "/", + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def) + if err != nil { + t.Fatalf("error parsing passwd: %v", err) + } + if tc.expected != got { + t.Fatalf("expected %v, got: %v", tc.expected, got) + } + }) + } +} diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD index 620d33a19..ab2387614 100644 --- a/runsc/cgroup/BUILD +++ b/runsc/cgroup/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "cgroup", srcs = ["cgroup.go"], - importpath = "gvisor.googlesource.com/gvisor/runsc/cgroup", + importpath = "gvisor.dev/gvisor/runsc/cgroup", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 7431b17d6..ab3a25b9b 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -30,8 +30,8 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/specutils" ) const ( diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index df6af0ced..5223b9972 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -33,7 +33,7 @@ go_library( "syscalls.go", "wait.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/cmd", + importpath = "gvisor.dev/gvisor/runsc/cmd", visibility = [ "//runsc:__subpackages__", ], @@ -46,6 +46,7 @@ go_library( "//pkg/unet", "//pkg/urpc", "//runsc/boot", + "//runsc/boot/platforms", "//runsc/console", "//runsc/container", "//runsc/fsgofer", diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index e0a950e9c..b40fded5b 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -24,9 +24,10 @@ import ( "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/boot/platforms" + "gvisor.dev/gvisor/runsc/specutils" ) // Boot implements subcommands.Command for the "boot" command which starts a @@ -172,7 +173,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) if caps == nil { caps = &specs.LinuxCapabilities{} } - if conf.Platform == boot.PlatformPtrace { + if conf.Platform == platforms.Ptrace { // Ptrace platform requires extra capabilities. const c = "CAP_SYS_PTRACE" caps.Bounding = append(caps.Bounding, c) diff --git a/runsc/cmd/capability.go b/runsc/cmd/capability.go index 312e5b471..abfbb7cfc 100644 --- a/runsc/cmd/capability.go +++ b/runsc/cmd/capability.go @@ -19,7 +19,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) var allCapTypes = []capability.CapType{ diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go index 2825dfaa5..3ae25a257 100644 --- a/runsc/cmd/capability_test.go +++ b/runsc/cmd/capability_test.go @@ -21,11 +21,11 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/test/testutil" ) func init() { @@ -97,7 +97,12 @@ func TestCapabilities(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - c, err := container.Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := container.Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := container.New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go index 96d3c3378..d8b3a8573 100644 --- a/runsc/cmd/checkpoint.go +++ b/runsc/cmd/checkpoint.go @@ -22,10 +22,10 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" ) // File containing the container's saved image/state within the given image-path's directory. @@ -133,7 +133,12 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interfa Fatalf("destroying container: %v", err) } - cont, err = container.Create(id, spec, conf, bundleDir, "", "", "") + contArgs := container.Args{ + ID: id, + Spec: spec, + BundleDir: bundleDir, + } + cont, err = container.New(conf, contArgs) if err != nil { Fatalf("restoring container: %v", err) } diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go index 1a774db04..b5a0ce17d 100644 --- a/runsc/cmd/chroot.go +++ b/runsc/cmd/chroot.go @@ -20,8 +20,8 @@ import ( "path/filepath" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/specutils" ) // mountInChroot creates the destination mount point in the given chroot and diff --git a/runsc/cmd/cmd.go b/runsc/cmd/cmd.go index 5b4cc4a39..f1a4887ef 100644 --- a/runsc/cmd/cmd.go +++ b/runsc/cmd/cmd.go @@ -22,8 +22,8 @@ import ( "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/specutils" ) // intFlags can be used with int flags that appear multiple times. diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go index e82e8c667..a4e3071b3 100644 --- a/runsc/cmd/create.go +++ b/runsc/cmd/create.go @@ -18,9 +18,9 @@ import ( "context" "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" ) // Create implements subcommands.Command for the "create" command. @@ -99,7 +99,15 @@ func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{} // Create the container. A new sandbox will be created for the // container unless the metadata specifies that it should be run in an // existing container. - if _, err := container.Create(id, spec, conf, bundleDir, c.consoleSocket, c.pidFile, c.userLog); err != nil { + contArgs := container.Args{ + ID: id, + Spec: spec, + BundleDir: bundleDir, + ConsoleSocket: c.consoleSocket, + PIDFile: c.pidFile, + UserLog: c.userLog, + } + if _, err := container.New(conf, contArgs); err != nil { return Errorf("creating container: %v", err) } return subcommands.ExitSuccess diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go index 27eb51172..7313e473f 100644 --- a/runsc/cmd/debug.go +++ b/runsc/cmd/debug.go @@ -17,14 +17,17 @@ package cmd import ( "context" "os" + "strconv" + "strings" "syscall" "time" "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Debug implements subcommands.Command for the "debug" command. @@ -36,6 +39,9 @@ type Debug struct { profileCPU string profileDelay int trace string + strace string + logLevel string + logPackets string } // Name implements subcommands.Command. @@ -62,6 +68,9 @@ func (d *Debug) SetFlags(f *flag.FlagSet) { f.IntVar(&d.profileDelay, "profile-delay", 5, "amount of time to wait before stoping CPU profile") f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.") f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox") + f.StringVar(&d.strace, "strace", "", `A comma separated list of syscalls to trace. "all" enables all traces, "off" disables all`) + f.StringVar(&d.logLevel, "log-level", "", "The log level to set: warning (0), info (1), or debug (2).") + f.StringVar(&d.logPackets, "log-packets", "", "A boolean value to enable or disable packet logging: true or false.") } // Execute implements subcommands.Command.Execute. @@ -78,7 +87,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) var err error c, err = container.Load(conf.RootDir, f.Arg(0)) if err != nil { - Fatalf("loading container %q: %v", f.Arg(0), err) + return Errorf("loading container %q: %v", f.Arg(0), err) } } else { if f.NArg() != 0 { @@ -88,12 +97,12 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Go over all sandboxes and find the one that matches PID. ids, err := container.List(conf.RootDir) if err != nil { - Fatalf("listing containers: %v", err) + return Errorf("listing containers: %v", err) } for _, id := range ids { candidate, err := container.Load(conf.RootDir, id) if err != nil { - Fatalf("loading container %q: %v", id, err) + return Errorf("loading container %q: %v", id, err) } if candidate.SandboxPid() == d.pid { c = candidate @@ -101,38 +110,38 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } } if c == nil { - Fatalf("container with PID %d not found", d.pid) + return Errorf("container with PID %d not found", d.pid) } } if c.Sandbox == nil || !c.Sandbox.IsRunning() { - Fatalf("container sandbox is not running") + return Errorf("container sandbox is not running") } log.Infof("Found sandbox %q, PID: %d", c.Sandbox.ID, c.Sandbox.Pid) if d.signal > 0 { log.Infof("Sending signal %d to process: %d", d.signal, c.Sandbox.Pid) if err := syscall.Kill(c.Sandbox.Pid, syscall.Signal(d.signal)); err != nil { - Fatalf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid) + return Errorf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid) } } if d.stacks { log.Infof("Retrieving sandbox stacks") stacks, err := c.Sandbox.Stacks() if err != nil { - Fatalf("retrieving stacks: %v", err) + return Errorf("retrieving stacks: %v", err) } log.Infof(" *** Stack dump ***\n%s", stacks) } if d.profileHeap != "" { f, err := os.Create(d.profileHeap) if err != nil { - Fatalf(err.Error()) + return Errorf(err.Error()) } defer f.Close() if err := c.Sandbox.HeapProfile(f); err != nil { - Fatalf(err.Error()) + return Errorf(err.Error()) } log.Infof("Heap profile written to %q", d.profileHeap) } @@ -142,7 +151,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) delay = true f, err := os.Create(d.profileCPU) if err != nil { - Fatalf(err.Error()) + return Errorf(err.Error()) } defer func() { f.Close() @@ -152,7 +161,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) log.Infof("CPU profile written to %q", d.profileCPU) }() if err := c.Sandbox.StartCPUProfile(f); err != nil { - Fatalf(err.Error()) + return Errorf(err.Error()) } log.Infof("CPU profile started for %d sec, writing to %q", d.profileDelay, d.profileCPU) } @@ -160,7 +169,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) delay = true f, err := os.Create(d.trace) if err != nil { - Fatalf(err.Error()) + return Errorf(err.Error()) } defer func() { f.Close() @@ -170,15 +179,71 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) log.Infof("Trace written to %q", d.trace) }() if err := c.Sandbox.StartTrace(f); err != nil { - Fatalf(err.Error()) + return Errorf(err.Error()) } log.Infof("Tracing started for %d sec, writing to %q", d.profileDelay, d.trace) } + if d.strace != "" || len(d.logLevel) != 0 || len(d.logPackets) != 0 { + args := control.LoggingArgs{} + switch strings.ToLower(d.strace) { + case "": + // strace not set, nothing to do here. + + case "off": + log.Infof("Disabling strace") + args.SetStrace = true + + case "all": + log.Infof("Enabling all straces") + args.SetStrace = true + args.EnableStrace = true + + default: + log.Infof("Enabling strace for syscalls: %s", d.strace) + args.SetStrace = true + args.EnableStrace = true + args.StraceWhitelist = strings.Split(d.strace, ",") + } + + if len(d.logLevel) != 0 { + args.SetLevel = true + switch strings.ToLower(d.logLevel) { + case "warning", "0": + args.Level = log.Warning + case "info", "1": + args.Level = log.Info + case "debug", "2": + args.Level = log.Debug + default: + return Errorf("invalid log level %q", d.logLevel) + } + log.Infof("Setting log level %v", args.Level) + } + + if len(d.logPackets) != 0 { + args.SetLogPackets = true + lp, err := strconv.ParseBool(d.logPackets) + if err != nil { + return Errorf("invalid value for log_packets %q", d.logPackets) + } + args.LogPackets = lp + if args.LogPackets { + log.Infof("Enabling packet logging") + } else { + log.Infof("Disabling packet logging") + } + } + + if err := c.Sandbox.ChangeLogging(args); err != nil { + return Errorf(err.Error()) + } + log.Infof("Logging options changed") + } + if delay { time.Sleep(time.Duration(d.profileDelay) * time.Second) - } return subcommands.ExitSuccess diff --git a/runsc/cmd/delete.go b/runsc/cmd/delete.go index 9039723e9..30d8164b1 100644 --- a/runsc/cmd/delete.go +++ b/runsc/cmd/delete.go @@ -21,9 +21,9 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Delete implements subcommands.Command for the "delete" command. diff --git a/runsc/cmd/delete_test.go b/runsc/cmd/delete_test.go index 45fc91016..cb59516a3 100644 --- a/runsc/cmd/delete_test.go +++ b/runsc/cmd/delete_test.go @@ -18,7 +18,7 @@ import ( "io/ioutil" "testing" - "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/boot" ) func TestNotFound(t *testing.T) { diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go index 3f6e46fce..9a8a49054 100644 --- a/runsc/cmd/do.go +++ b/runsc/cmd/do.go @@ -30,18 +30,19 @@ import ( "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" ) // Do implements subcommands.Command for the "do" command. It sets up a simple // sandbox and executes the command inside it. See Usage() for more details. type Do struct { - root string - cwd string - ip string + root string + cwd string + ip string + quiet bool } // Name implements subcommands.Command.Name. @@ -71,6 +72,7 @@ func (c *Do) SetFlags(f *flag.FlagSet) { f.StringVar(&c.root, "root", "/", `path to the root directory, defaults to "/"`) f.StringVar(&c.cwd, "cwd", ".", "path to the current directory, defaults to the current directory") f.StringVar(&c.ip, "ip", "192.168.10.2", "IPv4 address for the sandbox") + f.BoolVar(&c.quiet, "quiet", false, "suppress runsc messages to stdout. Application output is still sent to stdout and stderr") } // Execute implements subcommands.Command.Execute. @@ -134,7 +136,7 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su } else if conf.Rootless { if conf.Network == boot.NetworkSandbox { - fmt.Println("*** Rootless requires changing network type to host ***") + c.notifyUser("*** Warning: using host network due to --rootless ***") conf.Network = boot.NetworkHost } @@ -164,7 +166,13 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su return Errorf("Error write spec: %v", err) } - ws, err := container.Run(cid, spec, conf, tmpDir, "", "", "", false) + runArgs := container.Args{ + ID: cid, + Spec: spec, + BundleDir: tmpDir, + Attached: true, + } + ws, err := container.Run(conf, runArgs) if err != nil { return Errorf("running container: %v", err) } @@ -173,6 +181,13 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su return subcommands.ExitSuccess } +func (c *Do) notifyUser(format string, v ...interface{}) { + if !c.quiet { + fmt.Printf(format+"\n", v...) + } + log.Warningf(format, v...) +} + func resolvePath(path string) (string, error) { var err error path, err = filepath.Abs(path) diff --git a/runsc/cmd/error.go b/runsc/cmd/error.go index 700b19f14..3585b5448 100644 --- a/runsc/cmd/error.go +++ b/runsc/cmd/error.go @@ -22,7 +22,7 @@ import ( "time" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) // ErrorLogger is where error messages should be written to. These messages are diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go index c6bc8fc3a..3972e9224 100644 --- a/runsc/cmd/events.go +++ b/runsc/cmd/events.go @@ -22,9 +22,9 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Events implements subcommands.Command for the "events" command. diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go index 0eeaaadba..e817eff77 100644 --- a/runsc/cmd/exec.go +++ b/runsc/cmd/exec.go @@ -30,14 +30,14 @@ import ( "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/urpc" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/console" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/console" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" ) // Exec implements subcommands.Command for the "exec" command. @@ -235,7 +235,11 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi cmd.SysProcAttr = &syscall.SysProcAttr{ Setsid: true, Setctty: true, - Ctty: int(tty.Fd()), + // The Ctty FD must be the FD in the child process's FD + // table. Since we set cmd.Stdin/Stdout/Stderr to the + // tty FD, we can use any of 0, 1, or 2 here. + // See https://github.com/golang/go/issues/29458. + Ctty: 0, } } diff --git a/runsc/cmd/exec_test.go b/runsc/cmd/exec_test.go index 6f0f258c0..eb38a431f 100644 --- a/runsc/cmd/exec_test.go +++ b/runsc/cmd/exec_test.go @@ -21,10 +21,10 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/urpc" ) func TestUser(t *testing.T) { diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index bccb29397..9faabf494 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -27,13 +27,13 @@ import ( "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/pkg/unet" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/fsgofer" - "gvisor.googlesource.com/gvisor/runsc/fsgofer/filter" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/fsgofer" + "gvisor.dev/gvisor/runsc/fsgofer/filter" + "gvisor.dev/gvisor/runsc/specutils" ) var caps = []string{ @@ -152,6 +152,10 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // modes exactly as sent by the sandbox, which will have applied its own umask. syscall.Umask(0) + if err := fsgofer.OpenProcSelfFD(); err != nil { + Fatalf("failed to open /proc/self/fd: %v", err) + } + if err := syscall.Chroot(root); err != nil { Fatalf("failed to chroot to %q: %v", root, err) } diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go index aed5f3291..6c1f197a6 100644 --- a/runsc/cmd/kill.go +++ b/runsc/cmd/kill.go @@ -24,8 +24,8 @@ import ( "flag" "github.com/google/subcommands" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Kill implements subcommands.Command for the "kill" command. diff --git a/runsc/cmd/list.go b/runsc/cmd/list.go index 1f5ca2473..dd2d99a6b 100644 --- a/runsc/cmd/list.go +++ b/runsc/cmd/list.go @@ -25,8 +25,8 @@ import ( "flag" "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // List implements subcommands.Command for the "list" command for the "list" command. diff --git a/runsc/cmd/pause.go b/runsc/cmd/pause.go index 11b36aa10..9c0e92001 100644 --- a/runsc/cmd/pause.go +++ b/runsc/cmd/pause.go @@ -19,8 +19,8 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Pause implements subcommands.Command for the "pause" command. diff --git a/runsc/cmd/ps.go b/runsc/cmd/ps.go index 3a3e6f17a..45c644f3f 100644 --- a/runsc/cmd/ps.go +++ b/runsc/cmd/ps.go @@ -20,9 +20,9 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // PS implements subcommands.Command for the "ps" command. diff --git a/runsc/cmd/restore.go b/runsc/cmd/restore.go index a78a0dce6..7be60cd7d 100644 --- a/runsc/cmd/restore.go +++ b/runsc/cmd/restore.go @@ -21,9 +21,9 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" ) // Restore implements subcommands.Command for the "restore" command. @@ -100,7 +100,16 @@ func (r *Restore) Execute(_ context.Context, f *flag.FlagSet, args ...interface{ conf.RestoreFile = filepath.Join(r.imagePath, checkpointFileName) - ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach) + runArgs := container.Args{ + ID: id, + Spec: spec, + BundleDir: bundleDir, + ConsoleSocket: r.consoleSocket, + PIDFile: r.pidFile, + UserLog: r.userLog, + Attached: !r.detach, + } + ws, err := container.Run(conf, runArgs) if err != nil { return Errorf("running container: %v", err) } diff --git a/runsc/cmd/resume.go b/runsc/cmd/resume.go index 9a2ade41e..b2df5c640 100644 --- a/runsc/cmd/resume.go +++ b/runsc/cmd/resume.go @@ -19,8 +19,8 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Resume implements subcommands.Command for the "resume" command. diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go index abf602239..33f4bc12b 100644 --- a/runsc/cmd/run.go +++ b/runsc/cmd/run.go @@ -20,9 +20,9 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/specutils" ) // Run implements subcommands.Command for the "run" command. @@ -81,7 +81,16 @@ func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s } specutils.LogSpec(spec) - ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach) + runArgs := container.Args{ + ID: id, + Spec: spec, + BundleDir: bundleDir, + ConsoleSocket: r.consoleSocket, + PIDFile: r.pidFile, + UserLog: r.userLog, + Attached: !r.detach, + } + ws, err := container.Run(conf, runArgs) if err != nil { return Errorf("running container: %v", err) } diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go index 31e8f42bb..de2115dff 100644 --- a/runsc/cmd/start.go +++ b/runsc/cmd/start.go @@ -18,8 +18,8 @@ import ( "context" "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // Start implements subcommands.Command for the "start" command. diff --git a/runsc/cmd/state.go b/runsc/cmd/state.go index f0d449b19..e9f41cbd8 100644 --- a/runsc/cmd/state.go +++ b/runsc/cmd/state.go @@ -21,9 +21,9 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) // State implements subcommands.Command for the "state" command. diff --git a/runsc/cmd/syscalls.go b/runsc/cmd/syscalls.go index 9c8a66490..fb6c1ab29 100644 --- a/runsc/cmd/syscalls.go +++ b/runsc/cmd/syscalls.go @@ -27,7 +27,7 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) // Syscalls implements subcommands.Command for the "syscalls" command. @@ -41,7 +41,7 @@ type Syscalls struct { // Maps operating system to architecture to ArchInfo. type CompatibilityInfo map[string]map[string]ArchInfo -// ArchInfo is compatbility doc for an architecture. +// ArchInfo is compatibility doc for an architecture. type ArchInfo struct { // Syscalls maps syscall number for the architecture to the doc. Syscalls map[uintptr]SyscallDoc `json:"syscalls"` diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go index 58fd01974..046489687 100644 --- a/runsc/cmd/wait.go +++ b/runsc/cmd/wait.go @@ -22,8 +22,8 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/container" ) const ( diff --git a/runsc/console/BUILD b/runsc/console/BUILD index 3ff9eba27..e623c1a0f 100644 --- a/runsc/console/BUILD +++ b/runsc/console/BUILD @@ -4,8 +4,10 @@ package(licenses = ["notice"]) go_library( name = "console", - srcs = ["console.go"], - importpath = "gvisor.googlesource.com/gvisor/runsc/console", + srcs = [ + "console.go", + ], + importpath = "gvisor.dev/gvisor/runsc/console", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 13709a0ae..e246c38ae 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -9,7 +9,7 @@ go_library( "hook.go", "status.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/container", + importpath = "gvisor.dev/gvisor/runsc/container", visibility = [ "//runsc:__subpackages__", "//test:__subpackages__", @@ -53,6 +53,7 @@ go_test( "//pkg/unet", "//pkg/urpc", "//runsc/boot", + "//runsc/boot/platforms", "//runsc/specutils", "//runsc/test/testutil", "@com_github_cenkalti_backoff//:go_default_library", diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index d016533e6..e9372989f 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -27,10 +27,10 @@ import ( "github.com/kr/pty" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/unet" - "gvisor.googlesource.com/gvisor/pkg/urpc" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/runsc/test/testutil" ) // socketPath creates a path inside bundleDir and ensures that the returned @@ -138,8 +138,13 @@ func TestConsoleSocket(t *testing.T) { defer cleanup() // Create the container and pass the socket name. - id := testutil.UniqueContainerID() - c, err := Create(id, spec, conf, bundleDir, sock, "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + ConsoleSocket: sock, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -167,7 +172,12 @@ func TestJobControlSignalExec(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -186,7 +196,7 @@ func TestJobControlSignalExec(t *testing.T) { defer ptySlave.Close() // Exec bash and attach a terminal. - args := &control.ExecArgs{ + execArgs := &control.ExecArgs{ Filename: "/bin/bash", // Don't let bash execute from profile or rc files, otherwise // our PID counts get messed up. @@ -198,7 +208,7 @@ func TestJobControlSignalExec(t *testing.T) { StdioIsPty: true, } - pid, err := c.Execute(args) + pid, err := c.Execute(execArgs) if err != nil { t.Fatalf("error executing: %v", err) } @@ -296,8 +306,13 @@ func TestJobControlSignalRootContainer(t *testing.T) { defer cleanup() // Create the container and pass the socket name. - id := testutil.UniqueContainerID() - c, err := Create(id, spec, conf, bundleDir, sock, "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + ConsoleSocket: sock, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } diff --git a/runsc/container/container.go b/runsc/container/container.go index 04b611b56..8320bb2ca 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -33,12 +33,12 @@ import ( "github.com/cenkalti/backoff" "github.com/gofrs/flock" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/cgroup" - "gvisor.googlesource.com/gvisor/runsc/sandbox" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/cgroup" + "gvisor.dev/gvisor/runsc/sandbox" + "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -206,7 +206,7 @@ func findContainerRoot(rootDir, partialID string) (string, error) { } // Now see whether id could be an abbreviation of exactly 1 of the - // container ids. If id is ambigious (it could match more than 1 + // container ids. If id is ambiguous (it could match more than 1 // container), it is an error. cRoot = "" ids, err := List(rootDir) @@ -242,16 +242,47 @@ func List(rootDir string) ([]string, error) { return out, nil } -// Create creates the container in a new Sandbox process, unless the metadata +// Args is used to configure a new container. +type Args struct { + // ID is the container unique identifier. + ID string + + // Spec is the OCI spec that describes the container. + Spec *specs.Spec + + // BundleDir is the directory containing the container bundle. + BundleDir string + + // ConsoleSocket is the path to a unix domain socket that will receive + // the console FD. It may be empty. + ConsoleSocket string + + // PIDFile is the filename where the container's root process PID will be + // written to. It may be empty. + PIDFile string + + // UserLog is the filename to send user-visible logs to. It may be empty. + // + // It only applies for the init container. + UserLog string + + // Attached indicates that the sandbox lifecycle is attached with the caller. + // If the caller exits, the sandbox should exit too. + // + // It only applies for the init container. + Attached bool +} + +// New creates the container in a new Sandbox process, unless the metadata // indicates that an existing Sandbox should be used. The caller must call // Destroy() on the container. -func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile, userLog string) (*Container, error) { - log.Debugf("Create container %q in root dir: %s", id, conf.RootDir) - if err := validateID(id); err != nil { +func New(conf *boot.Config, args Args) (*Container, error) { + log.Debugf("Create container %q in root dir: %s", args.ID, conf.RootDir) + if err := validateID(args.ID); err != nil { return nil, err } - unlockRoot, err := maybeLockRootContainer(spec, conf.RootDir) + unlockRoot, err := maybeLockRootContainer(args.Spec, conf.RootDir) if err != nil { return nil, err } @@ -259,7 +290,7 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // Lock the container metadata file to prevent concurrent creations of // containers with the same id. - containerRoot := filepath.Join(conf.RootDir, id) + containerRoot := filepath.Join(conf.RootDir, args.ID) unlock, err := lockContainerMetadata(containerRoot) if err != nil { return nil, err @@ -269,16 +300,16 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // Check if the container already exists by looking for the metadata // file. if _, err := os.Stat(filepath.Join(containerRoot, metadataFilename)); err == nil { - return nil, fmt.Errorf("container with id %q already exists", id) + return nil, fmt.Errorf("container with id %q already exists", args.ID) } else if !os.IsNotExist(err) { return nil, fmt.Errorf("looking for existing container in %q: %v", containerRoot, err) } c := &Container{ - ID: id, - Spec: spec, - ConsoleSocket: consoleSocket, - BundleDir: bundleDir, + ID: args.ID, + Spec: args.Spec, + ConsoleSocket: args.ConsoleSocket, + BundleDir: args.BundleDir, Root: containerRoot, Status: Creating, CreatedAt: time.Now(), @@ -294,31 +325,47 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // started in an existing sandbox, we must do so. The metadata will // indicate the ID of the sandbox, which is the same as the ID of the // init container in the sandbox. - if isRoot(spec) { - log.Debugf("Creating new sandbox for container %q", id) + if isRoot(args.Spec) { + log.Debugf("Creating new sandbox for container %q", args.ID) // Create and join cgroup before processes are created to ensure they are - // part of the cgroup from the start (and all tneir children processes). - cg, err := cgroup.New(spec) + // part of the cgroup from the start (and all their children processes). + cg, err := cgroup.New(args.Spec) if err != nil { return nil, err } if cg != nil { // If there is cgroup config, install it before creating sandbox process. - if err := cg.Install(spec.Linux.Resources); err != nil { + if err := cg.Install(args.Spec.Linux.Resources); err != nil { return nil, fmt.Errorf("configuring cgroup: %v", err) } } if err := runInCgroup(cg, func() error { - ioFiles, specFile, err := c.createGoferProcess(spec, conf, bundleDir) + ioFiles, specFile, err := c.createGoferProcess(args.Spec, conf, args.BundleDir) if err != nil { return err } // Start a new sandbox for this container. Any errors after this point // must destroy the container. - c.Sandbox, err = sandbox.New(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, cg) - return err + sandArgs := &sandbox.Args{ + ID: args.ID, + Spec: args.Spec, + BundleDir: args.BundleDir, + ConsoleSocket: args.ConsoleSocket, + UserLog: args.UserLog, + IOFiles: ioFiles, + MountsFile: specFile, + Cgroup: cg, + Attached: args.Attached, + } + sand, err := sandbox.New(conf, sandArgs) + if err != nil { + return err + } + c.Sandbox = sand + return nil + }); err != nil { return nil, err } @@ -331,7 +378,7 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // * A container struct whose sandbox ID is equal to the above // container/sandbox ID, but that has a different container // ID. This is the child container. - sbid, ok := specutils.SandboxID(spec) + sbid, ok := specutils.SandboxID(args.Spec) if !ok { return nil, fmt.Errorf("no sandbox ID found when creating container") } @@ -356,8 +403,8 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // Write the PID file. Containerd considers the create complete after // this file is created, so it must be the last thing we do. - if pidFile != "" { - if err := ioutil.WriteFile(pidFile, []byte(strconv.Itoa(c.SandboxPid())), 0644); err != nil { + if args.PIDFile != "" { + if err := ioutil.WriteFile(args.PIDFile, []byte(strconv.Itoa(c.SandboxPid())), 0644); err != nil { return nil, fmt.Errorf("error writing PID file: %v", err) } } @@ -399,7 +446,7 @@ func (c *Container) Start(conf *boot.Config) error { } } else { // Join cgroup to strt gofer process to ensure it's part of the cgroup from - // the start (and all tneir children processes). + // the start (and all their children processes). if err := runInCgroup(c.Sandbox.Cgroup, func() error { // Create the gofer process. ioFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir) @@ -461,13 +508,13 @@ func (c *Container) Restore(spec *specs.Spec, conf *boot.Config, restoreFile str } // Run is a helper that calls Create + Start + Wait. -func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile, userLog string, detach bool) (syscall.WaitStatus, error) { - log.Debugf("Run container %q in root dir: %s", id, conf.RootDir) - c, err := Create(id, spec, conf, bundleDir, consoleSocket, pidFile, userLog) +func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) { + log.Debugf("Run container %q in root dir: %s", args.ID, conf.RootDir) + c, err := New(conf, args) if err != nil { return 0, fmt.Errorf("creating container: %v", err) } - // Clean up partially created container if an error ocurrs. + // Clean up partially created container if an error occurs. // Any errors returned by Destroy() itself are ignored. cu := specutils.MakeCleanup(func() { c.Destroy() @@ -476,7 +523,7 @@ func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke if conf.RestoreFile != "" { log.Debugf("Restore: %v", conf.RestoreFile) - if err := c.Restore(spec, conf, conf.RestoreFile); err != nil { + if err := c.Restore(args.Spec, conf, conf.RestoreFile); err != nil { return 0, fmt.Errorf("starting container: %v", err) } } else { @@ -484,11 +531,11 @@ func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke return 0, fmt.Errorf("starting container: %v", err) } } - if detach { - cu.Release() - return 0, nil + if args.Attached { + return c.Wait() } - return c.Wait() + cu.Release() + return 0, nil } // Execute runs the specified command in the container. It returns the PID of diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 867bf8187..c1d6ca7b8 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -17,6 +17,7 @@ package container import ( "bytes" "fmt" + "io" "io/ioutil" "os" "path" @@ -31,13 +32,14 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/specutils" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/boot/platforms" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/test/testutil" ) // waitForProcessList waits for the given process list to show up in the container. @@ -211,7 +213,13 @@ func run(spec *specs.Spec, conf *boot.Config) error { defer os.RemoveAll(bundleDir) // Create, start and wait for the container. - ws, err := Run(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "", false) + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + Attached: true, + } + ws, err := Run(conf, args) if err != nil { return fmt.Errorf("running container: %v", err) } @@ -250,7 +258,7 @@ func configs(opts ...configOption) []*boot.Config { if testutil.RaceEnabled { continue } - c.Platform = boot.PlatformKVM + c.Platform = platforms.KVM case nonExclusiveFS: c.FileAccess = boot.FileAccessShared default: @@ -295,15 +303,19 @@ func TestLifecycle(t *testing.T) { }, } // Create the container. - id := testutil.UniqueContainerID() - c, err := Create(id, spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } defer c.Destroy() // Load the container from disk and check the status. - c, err = Load(rootDir, id) + c, err = Load(rootDir, args.ID) if err != nil { t.Fatalf("error loading container: %v", err) } @@ -316,7 +328,7 @@ func TestLifecycle(t *testing.T) { if err != nil { t.Fatalf("error listing containers: %v", err) } - if got, want := ids, []string{id}; !reflect.DeepEqual(got, want) { + if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) { t.Errorf("container list got %v, want %v", got, want) } @@ -326,7 +338,7 @@ func TestLifecycle(t *testing.T) { } // Load the container from disk and check the status. - c, err = Load(rootDir, id) + c, err = Load(rootDir, args.ID) if err != nil { t.Fatalf("error loading container: %v", err) } @@ -367,7 +379,7 @@ func TestLifecycle(t *testing.T) { wg.Wait() // Load the container from disk and check the status. - c, err = Load(rootDir, id) + c, err = Load(rootDir, args.ID) if err != nil { t.Fatalf("error loading container: %v", err) } @@ -390,7 +402,7 @@ func TestLifecycle(t *testing.T) { } // Loading the container by id should fail. - if _, err = Load(rootDir, id); err == nil { + if _, err = Load(rootDir, args.ID); err == nil { t.Errorf("expected loading destroyed container to fail, but it did not") } } @@ -398,6 +410,46 @@ func TestLifecycle(t *testing.T) { // Test the we can execute the application with different path formats. func TestExePath(t *testing.T) { + // Create two directories that will be prepended to PATH. + firstPath, err := ioutil.TempDir(testutil.TmpDir(), "first") + if err != nil { + t.Fatal(err) + } + secondPath, err := ioutil.TempDir(testutil.TmpDir(), "second") + if err != nil { + t.Fatal(err) + } + + // Create two minimal executables in the second path, two of which + // will be masked by files in first path. + for _, p := range []string{"unmasked", "masked1", "masked2"} { + path := filepath.Join(secondPath, p) + f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0777) + if err != nil { + t.Fatal(err) + } + defer f.Close() + if _, err := io.WriteString(f, "#!/bin/true\n"); err != nil { + t.Fatal(err) + } + } + + // Create a non-executable file in the first path which masks a healthy + // executable in the second. + nonExecutable := filepath.Join(firstPath, "masked1") + f2, err := os.OpenFile(nonExecutable, os.O_CREATE|os.O_EXCL, 0666) + if err != nil { + t.Fatal(err) + } + f2.Close() + + // Create a non-regular file in the first path which masks a healthy + // executable in the second. + nonRegular := filepath.Join(firstPath, "masked2") + if err := os.Mkdir(nonRegular, 0777); err != nil { + t.Fatal(err) + } + for _, conf := range configs(overlay) { t.Logf("Running test with conf: %+v", conf) for _, test := range []struct { @@ -410,14 +462,36 @@ func TestExePath(t *testing.T) { {path: "thisfiledoesntexit", success: false}, {path: "bin/thisfiledoesntexit", success: false}, {path: "/bin/thisfiledoesntexit", success: false}, + + {path: "unmasked", success: true}, + {path: filepath.Join(firstPath, "unmasked"), success: false}, + {path: filepath.Join(secondPath, "unmasked"), success: true}, + + {path: "masked1", success: true}, + {path: filepath.Join(firstPath, "masked1"), success: false}, + {path: filepath.Join(secondPath, "masked1"), success: true}, + + {path: "masked2", success: true}, + {path: filepath.Join(firstPath, "masked2"), success: false}, + {path: filepath.Join(secondPath, "masked2"), success: true}, } { spec := testutil.NewSpecWithArgs(test.path) + spec.Process.Env = []string{ + fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")), + } + rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("exec: %s, error setting up container: %v", test.path, err) } - ws, err := Run(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "", false) + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + Attached: true, + } + ws, err := Run(conf, args) os.RemoveAll(rootDir) os.RemoveAll(bundleDir) @@ -450,7 +524,13 @@ func TestAppExitStatus(t *testing.T) { defer os.RemoveAll(rootDir) defer os.RemoveAll(bundleDir) - ws, err := Run(testutil.UniqueContainerID(), succSpec, conf, bundleDir, "", "", "", false) + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: succSpec, + BundleDir: bundleDir, + Attached: true, + } + ws, err := Run(conf, args) if err != nil { t.Fatalf("error running container: %v", err) } @@ -469,7 +549,13 @@ func TestAppExitStatus(t *testing.T) { defer os.RemoveAll(rootDir2) defer os.RemoveAll(bundleDir2) - ws, err = Run(testutil.UniqueContainerID(), errSpec, conf, bundleDir2, "", "", "", false) + args2 := Args{ + ID: testutil.UniqueContainerID(), + Spec: errSpec, + BundleDir: bundleDir2, + Attached: true, + } + ws, err = Run(conf, args2) if err != nil { t.Fatalf("error running container: %v", err) } @@ -494,7 +580,12 @@ func TestExec(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -526,7 +617,7 @@ func TestExec(t *testing.T) { t.Error(err) } - args := &control.ExecArgs{ + execArgs := &control.ExecArgs{ Filename: "/bin/sleep", Argv: []string{"/bin/sleep", "5"}, WorkingDirectory: "/", @@ -537,7 +628,7 @@ func TestExec(t *testing.T) { // First, start running exec (whick blocks). status := make(chan error, 1) go func() { - exitStatus, err := cont.executeSync(args) + exitStatus, err := cont.executeSync(execArgs) if err != nil { log.Debugf("error executing: %v", err) status <- err @@ -585,7 +676,12 @@ func TestKillPid(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -656,7 +752,12 @@ func TestCheckpointRestore(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -702,7 +803,12 @@ func TestCheckpointRestore(t *testing.T) { defer outputFile2.Close() // Restore into a new container. - cont2, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args2 := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont2, err := New(conf, args2) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -741,7 +847,12 @@ func TestCheckpointRestore(t *testing.T) { defer outputFile3.Close() // Restore into a new container. - cont3, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args3 := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont3, err := New(conf, args3) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -778,7 +889,7 @@ func TestUnixDomainSockets(t *testing.T) { t.Logf("Running test with conf: %+v", conf) // UDS path is limited to 108 chars for compatibility with older systems. - // Use '/tmp' (instead of testutil.TmpDir) to to ensure the size limit is + // Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is // not exceeded. Assumes '/tmp' exists in the system. dir, err := ioutil.TempDir("/tmp", "uds-test") if err != nil { @@ -820,7 +931,12 @@ func TestUnixDomainSockets(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -867,7 +983,12 @@ func TestUnixDomainSockets(t *testing.T) { defer outputFile2.Close() // Restore into a new container. - contRestore, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + argsRestore := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + contRestore, err := New(conf, argsRestore) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -921,7 +1042,12 @@ func TestPauseResume(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -949,7 +1075,7 @@ func TestPauseResume(t *testing.T) { } script := fmt.Sprintf("while [[ -f %q ]]; do sleep 0.1; done", lock.Name()) - args := &control.ExecArgs{ + execArgs := &control.ExecArgs{ Filename: "/bin/bash", Argv: []string{"bash", "-c", script}, WorkingDirectory: "/", @@ -957,7 +1083,7 @@ func TestPauseResume(t *testing.T) { } // First, start running exec. - _, err = cont.Execute(args) + _, err = cont.Execute(execArgs) if err != nil { t.Fatalf("error executing: %v", err) } @@ -1026,7 +1152,12 @@ func TestPauseResumeStatus(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1090,7 +1221,12 @@ func TestCapabilities(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1132,7 +1268,7 @@ func TestCapabilities(t *testing.T) { // Need to traverse the intermediate directory. os.Chmod(rootDir, 0755) - args := &control.ExecArgs{ + execArgs := &control.ExecArgs{ Filename: exePath, Argv: []string{exePath}, WorkingDirectory: "/", @@ -1142,16 +1278,16 @@ func TestCapabilities(t *testing.T) { } // "exe" should fail because we don't have the necessary permissions. - if _, err := cont.executeSync(args); err == nil { + if _, err := cont.executeSync(execArgs); err == nil { t.Fatalf("container executed without error, but an error was expected") } // Now we run with the capability enabled and should succeed. - args.Capabilities = &auth.TaskCapabilities{ + execArgs.Capabilities = &auth.TaskCapabilities{ EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE), } // "exe" should not fail this time. - if _, err := cont.executeSync(args); err != nil { + if _, err := cont.executeSync(execArgs); err != nil { t.Fatalf("container failed to exec %v: %v", args, err) } } @@ -1232,7 +1368,12 @@ func TestReadonlyRoot(t *testing.T) { defer os.RemoveAll(bundleDir) // Create, start and wait for the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1300,7 +1441,12 @@ func TestUIDMap(t *testing.T) { defer os.RemoveAll(bundleDir) // Create, start and wait for the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1352,7 +1498,12 @@ func TestReadonlyMount(t *testing.T) { defer os.RemoveAll(bundleDir) // Create, start and wait for the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1396,7 +1547,12 @@ func TestAbbreviatedIDs(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - cont, err := Create(cid, spec, conf, bundleDir, "", "", "") + args := Args{ + ID: cid, + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1441,7 +1597,12 @@ func TestGoferExits(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1520,7 +1681,14 @@ func TestUserLog(t *testing.T) { userLog := filepath.Join(dir, "user.log") // Create, start and wait for the container. - ws, err := Run(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", userLog, false) + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + UserLog: userLog, + Attached: true, + } + ws, err := Run(conf, args) if err != nil { t.Fatalf("error running container: %v", err) } @@ -1554,7 +1722,12 @@ func TestWaitOnExitedSandbox(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and Start the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1597,7 +1770,12 @@ func TestDestroyNotStarted(t *testing.T) { defer os.RemoveAll(bundleDir) // Create the container and check that it can be destroyed. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1619,15 +1797,19 @@ func TestDestroyStarting(t *testing.T) { defer os.RemoveAll(bundleDir) // Create the container and check that it can be destroyed. - id := testutil.UniqueContainerID() - c, err := Create(id, spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } // Container is not thread safe, so load another instance to run in // concurrently. - startCont, err := Load(rootDir, id) + startCont, err := Load(rootDir, args.ID) if err != nil { t.Fatalf("error loading container: %v", err) } @@ -1732,7 +1914,12 @@ func TestMountPropagation(t *testing.T) { defer os.RemoveAll(rootDir) defer os.RemoveAll(bundleDir) - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("creating container: %v", err) } @@ -1750,21 +1937,21 @@ func TestMountPropagation(t *testing.T) { // Check that mount didn't propagate to private mount. privFile := filepath.Join(priv, "mnt", "file") - args := &control.ExecArgs{ + execArgs := &control.ExecArgs{ Filename: "/usr/bin/test", Argv: []string{"test", "!", "-f", privFile}, } - if ws, err := cont.executeSync(args); err != nil || ws != 0 { + if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 { t.Fatalf("exec: test ! -f %q, ws: %v, err: %v", privFile, ws, err) } // Check that mount propagated to slave mount. slaveFile := filepath.Join(slave, "mnt", "file") - args = &control.ExecArgs{ + execArgs = &control.ExecArgs{ Filename: "/usr/bin/test", Argv: []string{"test", "-f", slaveFile}, } - if ws, err := cont.executeSync(args); err != nil || ws != 0 { + if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 { t.Fatalf("exec: test -f %q, ws: %v, err: %v", privFile, ws, err) } } @@ -1813,7 +2000,12 @@ func TestMountSymlink(t *testing.T) { defer os.RemoveAll(rootDir) defer os.RemoveAll(bundleDir) - cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("creating container: %v", err) } @@ -1826,11 +2018,11 @@ func TestMountSymlink(t *testing.T) { // Check that symlink was resolved and mount was created where the symlink // is pointing to. file := path.Join(target, "file") - args := &control.ExecArgs{ + execArgs := &control.ExecArgs{ Filename: "/usr/bin/test", Argv: []string{"test", "-f", file}, } - if ws, err := cont.executeSync(args); err != nil || ws != 0 { + if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 { t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err) } } diff --git a/runsc/container/hook.go b/runsc/container/hook.go index acae6781e..901607aee 100644 --- a/runsc/container/hook.go +++ b/runsc/container/hook.go @@ -24,7 +24,7 @@ import ( "time" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) // This file implements hooks as defined in OCI spec: diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index d57a73d46..c0f9b372c 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -28,10 +28,10 @@ import ( "time" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/specutils" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/test/testutil" ) func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { @@ -84,7 +84,12 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C } bundles = append(bundles, bundleDir) - cont, err := Create(ids[i], spec, conf, bundleDir, "", "", "") + args := Args{ + ID: ids[i], + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { cleanup() return nil, nil, fmt.Errorf("error creating container: %v", err) @@ -661,7 +666,12 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) { } defer os.RemoveAll(rootBundleDir) - root, err := Create(ids[0], specs[0], conf, rootBundleDir, "", "", "") + rootArgs := Args{ + ID: ids[0], + Spec: specs[0], + BundleDir: rootBundleDir, + } + root, err := New(conf, rootArgs) if err != nil { t.Fatalf("error creating root container: %v", err) } @@ -677,7 +687,12 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) { } defer os.RemoveAll(bundleDir) - cont, err := Create(ids[1], specs[1], conf, bundleDir, "", "", "") + args := Args{ + ID: ids[1], + Spec: specs[1], + BundleDir: bundleDir, + } + cont, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -712,7 +727,12 @@ func TestMultiContainerDestroyStarting(t *testing.T) { } defer os.RemoveAll(rootBundleDir) - root, err := Create(ids[0], specs[0], conf, rootBundleDir, "", "", "") + rootArgs := Args{ + ID: ids[0], + Spec: specs[0], + BundleDir: rootBundleDir, + } + root, err := New(conf, rootArgs) if err != nil { t.Fatalf("error creating root container: %v", err) } @@ -733,7 +753,12 @@ func TestMultiContainerDestroyStarting(t *testing.T) { } defer os.RemoveAll(bundleDir) - cont, err := Create(ids[i], specs[i], conf, bundleDir, "", "", "") + rootArgs := Args{ + ID: ids[i], + Spec: specs[i], + BundleDir: rootBundleDir, + } + cont, err := New(conf, rootArgs) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -807,7 +832,12 @@ func TestMultiContainerGoferStop(t *testing.T) { // Start root container. conf := testutil.TestConfigWithRoot(rootDir) - root, err := Create(rootID, rootSpec, conf, bundleDir, "", "", "") + rootArgs := Args{ + ID: rootID, + Spec: rootSpec, + BundleDir: bundleDir, + } + root, err := New(conf, rootArgs) if err != nil { t.Fatalf("error creating root container: %v", err) } @@ -831,7 +861,12 @@ func TestMultiContainerGoferStop(t *testing.T) { } defer os.RemoveAll(bundleDir) - child, err := Create(ids[j], spec, conf, bundleDir, "", "", "") + args := Args{ + ID: ids[j], + Spec: spec, + BundleDir: bundleDir, + } + child, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -1087,7 +1122,12 @@ func TestMultiContainerSharedMountRestart(t *testing.T) { } defer os.RemoveAll(bundleDir) - containers[1], err = Create(ids[1], podSpec[1], conf, bundleDir, "", "", "") + args := Args{ + ID: ids[1], + Spec: podSpec[1], + BundleDir: bundleDir, + } + containers[1], err = New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go index 9d5a592a5..1f90d2462 100644 --- a/runsc/container/shared_volume_test.go +++ b/runsc/container/shared_volume_test.go @@ -22,10 +22,10 @@ import ( "path/filepath" "testing" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/test/testutil" ) // TestSharedVolume checks that modifications to a volume mount are propagated @@ -52,7 +52,12 @@ func TestSharedVolume(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } @@ -206,7 +211,12 @@ func TestSharedVolumeFile(t *testing.T) { defer os.RemoveAll(bundleDir) // Create and start the container. - c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "") + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) if err != nil { t.Fatalf("error creating container: %v", err) } diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go index 62923f1ef..b7fc6498f 100644 --- a/runsc/container/test_app/test_app.go +++ b/runsc/container/test_app/test_app.go @@ -29,7 +29,7 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/test/testutil" ) func main() { diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD index 4adc9c1bc..80a4aa2fe 100644 --- a/runsc/fsgofer/BUILD +++ b/runsc/fsgofer/BUILD @@ -8,7 +8,7 @@ go_library( "fsgofer.go", "fsgofer_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/fsgofer", + importpath = "gvisor.dev/gvisor/runsc/fsgofer", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/fsgofer/filter/BUILD b/runsc/fsgofer/filter/BUILD index 78c5b526c..e2318a978 100644 --- a/runsc/fsgofer/filter/BUILD +++ b/runsc/fsgofer/filter/BUILD @@ -11,7 +11,7 @@ go_library( "extra_filters_race.go", "filter.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/fsgofer/filter", + importpath = "gvisor.dev/gvisor/runsc/fsgofer/filter", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go index 4faab2946..2d50774d4 100644 --- a/runsc/fsgofer/filter/config.go +++ b/runsc/fsgofer/filter/config.go @@ -19,8 +19,8 @@ import ( "syscall" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/seccomp" ) // allowedSyscalls is the set of syscalls executed by the gofer. diff --git a/runsc/fsgofer/filter/extra_filters.go b/runsc/fsgofer/filter/extra_filters.go index 5c5ec4e06..e28d4b8d6 100644 --- a/runsc/fsgofer/filter/extra_filters.go +++ b/runsc/fsgofer/filter/extra_filters.go @@ -17,11 +17,11 @@ package filter import ( - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by -// Go intrumentation tools, e.g. -race, -msan. +// Go instrumentation tools, e.g. -race, -msan. // Returns empty when disabled. func instrumentationFilters() seccomp.SyscallRules { return nil diff --git a/runsc/fsgofer/filter/extra_filters_msan.go b/runsc/fsgofer/filter/extra_filters_msan.go index 553060bc3..8c6179c8f 100644 --- a/runsc/fsgofer/filter/extra_filters_msan.go +++ b/runsc/fsgofer/filter/extra_filters_msan.go @@ -19,8 +19,8 @@ package filter import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by MSAN. diff --git a/runsc/fsgofer/filter/extra_filters_race.go b/runsc/fsgofer/filter/extra_filters_race.go index 28555f898..885c92f7a 100644 --- a/runsc/fsgofer/filter/extra_filters_race.go +++ b/runsc/fsgofer/filter/extra_filters_race.go @@ -19,8 +19,8 @@ package filter import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/seccomp" ) // instrumentationFilters returns additional filters for syscalls used by TSAN. diff --git a/runsc/fsgofer/filter/filter.go b/runsc/fsgofer/filter/filter.go index ff8154369..65053415f 100644 --- a/runsc/fsgofer/filter/filter.go +++ b/runsc/fsgofer/filter/filter.go @@ -18,7 +18,7 @@ package filter import ( - "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.dev/gvisor/pkg/seccomp" ) // Install installs seccomp filters. diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 2cf50290a..fe450c64f 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -28,15 +28,16 @@ import ( "path" "path/filepath" "runtime" + "strconv" "sync" "syscall" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/fd" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -223,6 +224,28 @@ type localFile struct { lastDirentOffset uint64 } +var procSelfFD *fd.FD + +// OpenProcSelfFD opens the /proc/self/fd directory, which will be used to +// reopen file descriptors. +func OpenProcSelfFD() error { + d, err := syscall.Open("/proc/self/fd", syscall.O_RDONLY|syscall.O_DIRECTORY, 0) + if err != nil { + return fmt.Errorf("error opening /proc/self/fd: %v", err) + } + procSelfFD = fd.New(d) + return nil +} + +func reopenProcFd(f *fd.FD, mode int) (*fd.FD, error) { + d, err := syscall.Openat(int(procSelfFD.FD()), strconv.Itoa(f.FD()), mode&^syscall.O_NOFOLLOW, 0) + if err != nil { + return nil, err + } + + return fd.New(d), nil +} + func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, error) { path := path.Join(parent.hostPath, name) f, err := openAnyFile(path, func(mode int) (*fd.FD, error) { @@ -348,7 +371,7 @@ func (l *localFile) Open(mode p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) { // name_to_handle_at and open_by_handle_at aren't supported by overlay2. log.Debugf("Open reopening file, mode: %v, %q", mode, l.hostPath) var err error - newFile, err = fd.Open(l.hostPath, openFlags|mode.OSFlags(), 0) + newFile, err = reopenProcFd(l.file, openFlags|mode.OSFlags()) if err != nil { return nil, p9.QID{}, 0, extractErrno(err) } @@ -477,7 +500,7 @@ func (l *localFile) Walk(names []string) ([]p9.QID, p9.File, error) { // Duplicate current file if 'names' is empty. if len(names) == 0 { newFile, err := openAnyFile(l.hostPath, func(mode int) (*fd.FD, error) { - return fd.Open(l.hostPath, openFlags|mode, 0) + return reopenProcFd(l.file, openFlags|mode) }) if err != nil { return nil, nil, extractErrno(err) @@ -596,7 +619,7 @@ func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) } // SetAttr implements p9.File. Due to mismatch in file API, options -// cannot be changed atomicaly and user may see partial changes when +// cannot be changed atomically and user may see partial changes when // an error happens. func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { conf := l.attachPoint.conf @@ -635,7 +658,7 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { f := l.file if l.ft == regular && l.mode != p9.WriteOnly && l.mode != p9.ReadWrite { var err error - f, err = fd.Open(l.hostPath, openFlags|syscall.O_WRONLY, 0) + f, err = reopenProcFd(l.file, openFlags|os.O_WRONLY) if err != nil { return extractErrno(err) } diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go index 695836927..0a162bb8a 100644 --- a/runsc/fsgofer/fsgofer_test.go +++ b/runsc/fsgofer/fsgofer_test.go @@ -22,8 +22,8 @@ import ( "syscall" "testing" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" ) func init() { @@ -31,6 +31,10 @@ func init() { allConfs = append(allConfs, rwConfs...) allConfs = append(allConfs, roConfs...) + + if err := OpenProcSelfFD(); err != nil { + panic(err) + } } func assertPanic(t *testing.T, f func()) { diff --git a/runsc/fsgofer/fsgofer_unsafe.go b/runsc/fsgofer/fsgofer_unsafe.go index 58af5e44d..ff2556aee 100644 --- a/runsc/fsgofer/fsgofer_unsafe.go +++ b/runsc/fsgofer/fsgofer_unsafe.go @@ -18,8 +18,8 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/syserr" ) func statAt(dirFd int, name string) (syscall.Stat_t, error) { diff --git a/runsc/main.go b/runsc/main.go index cfe3a78d0..bc83c57a2 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -29,10 +29,11 @@ import ( "flag" "github.com/google/subcommands" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/cmd" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/cmd" + "gvisor.dev/gvisor/runsc/specutils" ) var ( @@ -61,7 +62,7 @@ var ( straceLogSize = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs") // Flags that control sandbox runtime behavior. - platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm") + platformName = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm") network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.") gso = flag.Bool("gso", true, "enable generic segmenation offload") fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") @@ -139,8 +140,8 @@ func main() { } cmd.ErrorLogger = errorLogger - platformType, err := boot.MakePlatformType(*platform) - if err != nil { + platformType := *platformName + if _, err := platform.Lookup(platformType); err != nil { cmd.Fatalf("%v", err) } diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index c0de9a28f..7fdceaab6 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -9,7 +9,7 @@ go_library( "network_unsafe.go", "sandbox.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/sandbox", + importpath = "gvisor.dev/gvisor/runsc/sandbox", visibility = [ "//runsc:__subpackages__", ], @@ -18,9 +18,10 @@ go_library( "//pkg/control/server", "//pkg/log", "//pkg/sentry/control", - "//pkg/sentry/platform/kvm", + "//pkg/sentry/platform", "//pkg/urpc", "//runsc/boot", + "//runsc/boot/platforms", "//runsc/cgroup", "//runsc/console", "//runsc/specutils", diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go index e9e24fc58..a965a9dcb 100644 --- a/runsc/sandbox/network.go +++ b/runsc/sandbox/network.go @@ -27,10 +27,10 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/vishvananda/netlink" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/urpc" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/specutils" ) const ( diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 5ff6f879c..4a11f617d 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -28,16 +28,17 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" - "gvisor.googlesource.com/gvisor/pkg/control/client" - "gvisor.googlesource.com/gvisor/pkg/control/server" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm" - "gvisor.googlesource.com/gvisor/pkg/urpc" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/cgroup" - "gvisor.googlesource.com/gvisor/runsc/console" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/pkg/control/client" + "gvisor.dev/gvisor/pkg/control/server" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/boot/platforms" + "gvisor.dev/gvisor/runsc/cgroup" + "gvisor.dev/gvisor/runsc/console" + "gvisor.dev/gvisor/runsc/specutils" ) // Sandbox wraps a sandbox process. @@ -73,10 +74,46 @@ type Sandbox struct { statusMu sync.Mutex } +// Args is used to configure a new sandbox. +type Args struct { + // ID is the sandbox unique identifier. + ID string + + // Spec is the OCI spec that describes the container. + Spec *specs.Spec + + // BundleDir is the directory containing the container bundle. + BundleDir string + + // ConsoleSocket is the path to a unix domain socket that will receive + // the console FD. It may be empty. + ConsoleSocket string + + // UserLog is the filename to send user-visible logs to. It may be empty. + UserLog string + + // IOFiles is the list of files that connect to a 9P endpoint for the mounts + // points using Gofers. They must be in the same order as mounts appear in + // the spec. + IOFiles []*os.File + + // MountsFile is a file container mount information from the spec. It's + // equivalent to the mounts from the spec, except that all paths have been + // resolved to their final absolute location. + MountsFile *os.File + + // Gcgroup is the cgroup that the sandbox is part of. + Cgroup *cgroup.Cgroup + + // Attached indicates that the sandbox lifecycle is attached with the caller. + // If the caller exits, the sandbox should exit too. + Attached bool +} + // New creates the sandbox process. The caller must call Destroy() on the // sandbox. -func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, specFile *os.File, cg *cgroup.Cgroup) (*Sandbox, error) { - s := &Sandbox{ID: id, Cgroup: cg} +func New(conf *boot.Config, args *Args) (*Sandbox, error) { + s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup} // The Cleanup object cleans up partially created sandboxes when an error // occurs. Any errors occurring during cleanup itself are ignored. c := specutils.MakeCleanup(func() { @@ -93,7 +130,7 @@ func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke defer clientSyncFile.Close() // Create the sandbox process. - err = s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, sandboxSyncFile) + err = s.createSandboxProcess(conf, args, sandboxSyncFile) // sandboxSyncFile has to be closed to be able to detect when the sandbox // process exits unexpectedly. sandboxSyncFile.Close() @@ -291,7 +328,7 @@ func (s *Sandbox) connError(err error) error { // createSandboxProcess starts the sandbox as a subprocess by running the "boot" // command, passing in the bundle dir. -func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, mountsFile, startSyncFile *os.File) error { +func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncFile *os.File) error { // nextFD is used to get unused FDs that we can pass to the sandbox. It // starts at 3 because 0, 1, and 2 are taken by stdin/out/err. nextFD := 3 @@ -327,7 +364,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // Add the "boot" command to the args. // // All flags after this must be for the boot command - cmd.Args = append(cmd.Args, "boot", "--bundle="+bundleDir) + cmd.Args = append(cmd.Args, "boot", "--bundle="+args.BundleDir) // Create a socket for the control server and donate it to the sandbox. addr := boot.ControlSocketAddr(s.ID) @@ -342,12 +379,12 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund cmd.Args = append(cmd.Args, "--controller-fd="+strconv.Itoa(nextFD)) nextFD++ - defer mountsFile.Close() - cmd.ExtraFiles = append(cmd.ExtraFiles, mountsFile) + defer args.MountsFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, args.MountsFile) cmd.Args = append(cmd.Args, "--mounts-fd="+strconv.Itoa(nextFD)) nextFD++ - specFile, err := specutils.OpenSpec(bundleDir) + specFile, err := specutils.OpenSpec(args.BundleDir) if err != nil { return err } @@ -361,7 +398,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund nextFD++ // If there is a gofer, sends all socket ends to the sandbox. - for _, f := range ioFiles { + for _, f := range args.IOFiles { defer f.Close() cmd.ExtraFiles = append(cmd.ExtraFiles, f) cmd.Args = append(cmd.Args, "--io-fds="+strconv.Itoa(nextFD)) @@ -389,23 +426,22 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // If the console control socket file is provided, then create a new // pty master/slave pair and set the TTY on the sandbox process. - if consoleSocket != "" { + if args.ConsoleSocket != "" { cmd.Args = append(cmd.Args, "--console=true") // console.NewWithSocket will send the master on the given // socket, and return the slave. - tty, err := console.NewWithSocket(consoleSocket) + tty, err := console.NewWithSocket(args.ConsoleSocket) if err != nil { - return fmt.Errorf("setting up console with socket %q: %v", consoleSocket, err) + return fmt.Errorf("setting up console with socket %q: %v", args.ConsoleSocket, err) } defer tty.Close() // Set the TTY as a controlling TTY on the sandbox process. - // Note that the Ctty field must be the FD of the TTY in the - // *new* process, not this process. Since we are about to - // assign the TTY to nextFD, we can use that value here. - // stdin, we can use FD 0 here. cmd.SysProcAttr.Setctty = true + // The Ctty FD must be the FD in the child process's FD table, + // which will be nextFD in this case. + // See https://github.com/golang/go/issues/29458. cmd.SysProcAttr.Ctty = nextFD // Pass the tty as all stdio fds to sandbox. @@ -456,7 +492,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund {Type: specs.UTSNamespace}, } - if conf.Platform == boot.PlatformPtrace { + if conf.Platform == platforms.Ptrace { // TODO(b/75837838): Also set a new PID namespace so that we limit // access to other host processes. log.Infof("Sandbox will be started in the current PID namespace") @@ -469,7 +505,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // Joins the network namespace if network is enabled. the sandbox talks // directly to the host network, which may have been configured in the // namespace. - if ns, ok := specutils.GetNS(specs.NetworkNamespace, spec); ok && conf.Network != boot.NetworkNone { + if ns, ok := specutils.GetNS(specs.NetworkNamespace, args.Spec); ok && conf.Network != boot.NetworkNone { log.Infof("Sandbox will be started in the container's network namespace: %+v", ns) nss = append(nss, ns) } else if conf.Network == boot.NetworkHost { @@ -483,10 +519,10 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // inside the user namespace specified in the spec or the current namespace // if none is configured. if conf.Network == boot.NetworkHost { - if userns, ok := specutils.GetNS(specs.UserNamespace, spec); ok { + if userns, ok := specutils.GetNS(specs.UserNamespace, args.Spec); ok { log.Infof("Sandbox will be started in container's user namespace: %+v", userns) nss = append(nss, userns) - specutils.SetUIDGIDMappings(cmd, spec) + specutils.SetUIDGIDMappings(cmd, args.Spec) } else { log.Infof("Sandbox will be started in the current user namespace") } @@ -598,8 +634,8 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund } } - if userLog != "" { - f, err := os.OpenFile(userLog, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664) + if args.UserLog != "" { + f, err := os.OpenFile(args.UserLog, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664) if err != nil { return fmt.Errorf("opening compat log file: %v", err) } @@ -618,6 +654,11 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund log.Debugf("Donating FD %d: %q", i+3, f.Name()) } + if args.Attached { + // Kill sandbox if parent process exits in attached mode. + cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL + } + log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args) log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr) if err := specutils.StartInNS(cmd, nss); err != nil { @@ -920,7 +961,7 @@ func (s *Sandbox) StartTrace(f *os.File) error { return nil } -// StopTrace stops a previously started trace.. +// StopTrace stops a previously started trace. func (s *Sandbox) StopTrace() error { log.Debugf("Trace stop %q", s.ID) conn, err := s.sandboxConnect() @@ -935,6 +976,21 @@ func (s *Sandbox) StopTrace() error { return nil } +// ChangeLogging changes logging options. +func (s *Sandbox) ChangeLogging(args control.LoggingArgs) error { + log.Debugf("Change logging start %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + if err := conn.Call(boot.ChangeLogging, &args, nil); err != nil { + return fmt.Errorf("changing sandbox %q logging: %v", s.ID, err) + } + return nil +} + // DestroyContainer destroys the given container. If it is the root container, // then the entire sandbox is destroyed. func (s *Sandbox) DestroyContainer(cid string) error { @@ -991,19 +1047,15 @@ func (s *Sandbox) waitForStopped() error { // deviceFileForPlatform opens the device file for the given platform. If the // platform does not need a device file, then nil is returned. -func deviceFileForPlatform(p boot.PlatformType) (*os.File, error) { - var ( - f *os.File - err error - ) - switch p { - case boot.PlatformKVM: - f, err = kvm.OpenDevice() - default: - return nil, nil +func deviceFileForPlatform(name string) (*os.File, error) { + p, err := platform.Lookup(name) + if err != nil { + return nil, err } + + f, err := p.OpenDevice() if err != nil { return nil, fmt.Errorf("opening device file for platform %q: %v", p, err) } - return f, err + return f, nil } diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD index 0456e4c4f..fbfb8e2f8 100644 --- a/runsc/specutils/BUILD +++ b/runsc/specutils/BUILD @@ -9,7 +9,7 @@ go_library( "namespace.go", "specutils.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/specutils", + importpath = "gvisor.dev/gvisor/runsc/specutils", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go index 6e6902e9f..138aa4dd1 100644 --- a/runsc/specutils/fs.go +++ b/runsc/specutils/fs.go @@ -87,7 +87,7 @@ func OptionsToFlags(opts []string) uint32 { // PropOptionsToFlags converts propagation mount options to syscall flags. // Propagation options cannot be set other with other options and must be -// handled separatedly. +// handled separately. func PropOptionsToFlags(opts []string) uint32 { return optionsToFlags(opts, propOptionsMap) } diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index 06c13d1ab..d441419cb 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -25,7 +25,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/log" ) // nsCloneFlag returns the clone flag that can be used to set a namespace of @@ -204,7 +204,7 @@ func SetUIDGIDMappings(cmd *exec.Cmd, s *specs.Spec) { } } -// HasCapabilities returns true if the user has all capabilties in 'cs'. +// HasCapabilities returns true if the user has all capabilities in 'cs'. func HasCapabilities(cs ...capability.Cap) bool { caps, err := capability.NewPid2(os.Getpid()) if err != nil { diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 2888f55db..0b40e38a3 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -29,9 +29,9 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ) // ExePath must point to runsc binary, which is normally the same binary. It's @@ -394,7 +394,7 @@ func WaitForReady(pid int, timeout time.Duration, ready func() (bool, error)) er // DebugLogFile opens a log file using 'logPattern' as location. If 'logPattern' // ends with '/', it's used as a directory with default file name. -// 'logPattern' can contain variables that are substitued: +// 'logPattern' can contain variables that are substituted: // - %TIMESTAMP%: is replaced with a timestamp using the following format: // <yyyymmdd-hhmmss.uuuuuu> // - %COMMAND%: is replaced with 'command' diff --git a/runsc/test/BUILD b/runsc/test/BUILD new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/runsc/test/BUILD diff --git a/runsc/test/build_defs.bzl b/runsc/test/build_defs.bzl new file mode 100644 index 000000000..ac28cc037 --- /dev/null +++ b/runsc/test/build_defs.bzl @@ -0,0 +1,19 @@ +"""Defines a rule for runsc test targets.""" + +load("@io_bazel_rules_go//go:def.bzl", _go_test = "go_test") + +# runtime_test is a macro that will create targets to run the given test target +# with different runtime options. +def runtime_test(**kwargs): + """Runs the given test target with different runtime options.""" + name = kwargs["name"] + _go_test(**kwargs) + kwargs["name"] = name + "_hostnet" + kwargs["args"] = ["--runtime-type=hostnet"] + _go_test(**kwargs) + kwargs["name"] = name + "_kvm" + kwargs["args"] = ["--runtime-type=kvm"] + _go_test(**kwargs) + kwargs["name"] = name + "_overlay" + kwargs["args"] = ["--runtime-type=overlay"] + _go_test(**kwargs) diff --git a/runsc/test/image/BUILD b/runsc/test/image/BUILD index e8b629c6a..58758fde5 100644 --- a/runsc/test/image/BUILD +++ b/runsc/test/image/BUILD @@ -1,8 +1,9 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//runsc/test:build_defs.bzl", "runtime_test") package(licenses = ["notice"]) -go_test( +runtime_test( name = "image_test", size = "large", srcs = [ @@ -26,5 +27,5 @@ go_test( go_library( name = "image", srcs = ["image.go"], - importpath = "gvisor.googlesource.com/gvisor/runsc/test/image", + importpath = "gvisor.dev/gvisor/runsc/test/image", ) diff --git a/runsc/test/image/image_test.go b/runsc/test/image/image_test.go index b969731b0..ddaa2c13b 100644 --- a/runsc/test/image/image_test.go +++ b/runsc/test/image/image_test.go @@ -32,7 +32,7 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/test/testutil" ) func TestHelloWorld(t *testing.T) { @@ -209,11 +209,14 @@ func TestMysql(t *testing.T) { } func TestPythonHello(t *testing.T) { - if err := testutil.Pull("google/python-hello"); err != nil { + // TODO(b/136503277): Once we have more complete python runtime tests, + // we can drop this one. + const img = "gcr.io/gvisor-presubmit/python-hello" + if err := testutil.Pull(img); err != nil { t.Fatalf("docker pull failed: %v", err) } d := testutil.MakeDocker("python-hello-test") - if err := d.Run("-p", "8080", "google/python-hello"); err != nil { + if err := d.Run("-p", "8080", img); err != nil { t.Fatalf("docker run failed: %v", err) } defer d.CleanUp() diff --git a/runsc/test/integration/BUILD b/runsc/test/integration/BUILD index 04ed885c6..12065617c 100644 --- a/runsc/test/integration/BUILD +++ b/runsc/test/integration/BUILD @@ -1,8 +1,9 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//runsc/test:build_defs.bzl", "runtime_test") package(licenses = ["notice"]) -go_test( +runtime_test( name = "integration_test", size = "large", srcs = [ @@ -25,5 +26,5 @@ go_test( go_library( name = "integration", srcs = ["integration.go"], - importpath = "gvisor.googlesource.com/gvisor/runsc/test/integration", + importpath = "gvisor.dev/gvisor/runsc/test/integration", ) diff --git a/runsc/test/integration/exec_test.go b/runsc/test/integration/exec_test.go index 7c0e61ac3..993136f96 100644 --- a/runsc/test/integration/exec_test.go +++ b/runsc/test/integration/exec_test.go @@ -34,8 +34,8 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/runsc/test/testutil" ) func TestExecCapabilities(t *testing.T) { diff --git a/runsc/test/integration/integration_test.go b/runsc/test/integration/integration_test.go index c51cab3ae..7cef4b9dd 100644 --- a/runsc/test/integration/integration_test.go +++ b/runsc/test/integration/integration_test.go @@ -32,7 +32,7 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/test/testutil" ) // httpRequestSucceeds sends a request to a given url and checks that the status is OK. @@ -86,16 +86,17 @@ func TestLifeCycle(t *testing.T) { } func TestPauseResume(t *testing.T) { + const img = "gcr.io/gvisor-presubmit/python-hello" if !testutil.IsPauseResumeSupported() { t.Log("Pause/resume is not supported, skipping test.") return } - if err := testutil.Pull("google/python-hello"); err != nil { + if err := testutil.Pull(img); err != nil { t.Fatal("docker pull failed:", err) } d := testutil.MakeDocker("pause-resume-test") - if err := d.Run("-p", "8080", "google/python-hello"); err != nil { + if err := d.Run("-p", "8080", img); err != nil { t.Fatalf("docker run failed: %v", err) } defer d.CleanUp() @@ -149,15 +150,16 @@ func TestPauseResume(t *testing.T) { } func TestCheckpointRestore(t *testing.T) { + const img = "gcr.io/gvisor-presubmit/python-hello" if !testutil.IsPauseResumeSupported() { t.Log("Pause/resume is not supported, skipping test.") return } - if err := testutil.Pull("google/python-hello"); err != nil { + if err := testutil.Pull(img); err != nil { t.Fatal("docker pull failed:", err) } d := testutil.MakeDocker("save-restore-test") - if err := d.Run("-p", "8080", "google/python-hello"); err != nil { + if err := d.Run("-p", "8080", img); err != nil { t.Fatalf("docker run failed: %v", err) } defer d.CleanUp() diff --git a/runsc/test/integration/regression_test.go b/runsc/test/integration/regression_test.go index 80bae9970..39b30e757 100644 --- a/runsc/test/integration/regression_test.go +++ b/runsc/test/integration/regression_test.go @@ -18,7 +18,7 @@ import ( "strings" "testing" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/test/testutil" ) // Test that UDS can be created using overlay when parent directory is in lower diff --git a/runsc/test/root/BUILD b/runsc/test/root/BUILD index 7ded78baa..500ef7b8e 100644 --- a/runsc/test/root/BUILD +++ b/runsc/test/root/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "root", srcs = ["root.go"], - importpath = "gvisor.googlesource.com/gvisor/runsc/test/root", + importpath = "gvisor.dev/gvisor/runsc/test/root", ) go_test( diff --git a/runsc/test/root/cgroup_test.go b/runsc/test/root/cgroup_test.go index edb6dee1d..5392dc6e0 100644 --- a/runsc/test/root/cgroup_test.go +++ b/runsc/test/root/cgroup_test.go @@ -25,8 +25,8 @@ import ( "strings" "testing" - "gvisor.googlesource.com/gvisor/runsc/cgroup" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/cgroup" + "gvisor.dev/gvisor/runsc/test/testutil" ) func verifyPid(pid int, path string) error { diff --git a/runsc/test/root/chroot_test.go b/runsc/test/root/chroot_test.go index da2f473b9..d0f236580 100644 --- a/runsc/test/root/chroot_test.go +++ b/runsc/test/root/chroot_test.go @@ -31,8 +31,8 @@ import ( "testing" "github.com/syndtr/gocapability/capability" - "gvisor.googlesource.com/gvisor/runsc/specutils" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/test/testutil" ) // TestChroot verifies that the sandbox is chroot'd and that mounts are cleaned diff --git a/runsc/test/root/crictl_test.go b/runsc/test/root/crictl_test.go index 3cc176104..515ae2df1 100644 --- a/runsc/test/root/crictl_test.go +++ b/runsc/test/root/crictl_test.go @@ -29,9 +29,9 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/runsc/specutils" - "gvisor.googlesource.com/gvisor/runsc/test/root/testdata" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/test/root/testdata" + "gvisor.dev/gvisor/runsc/test/testutil" ) // Tests for crictl have to be run as root (rather than in a user namespace) diff --git a/runsc/test/root/testdata/BUILD b/runsc/test/root/testdata/BUILD index 7f272dcd3..80dc5f214 100644 --- a/runsc/test/root/testdata/BUILD +++ b/runsc/test/root/testdata/BUILD @@ -11,7 +11,7 @@ go_library( "httpd_mount_paths.go", "sandbox.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/test/root/testdata", + importpath = "gvisor.dev/gvisor/runsc/test/root/testdata", visibility = [ "//visibility:public", ], diff --git a/runsc/test/testutil/BUILD b/runsc/test/testutil/BUILD index eedf962a4..327e7ca4d 100644 --- a/runsc/test/testutil/BUILD +++ b/runsc/test/testutil/BUILD @@ -10,7 +10,7 @@ go_library( "testutil.go", "testutil_race.go", ], - importpath = "gvisor.googlesource.com/gvisor/runsc/test/testutil", + importpath = "gvisor.dev/gvisor/runsc/test/testutil", visibility = ["//:sandbox"], deps = [ "//runsc/boot", diff --git a/runsc/test/testutil/docker.go b/runsc/test/testutil/docker.go index 81f5a9ef0..3f3e191b0 100644 --- a/runsc/test/testutil/docker.go +++ b/runsc/test/testutil/docker.go @@ -15,6 +15,7 @@ package testutil import ( + "flag" "fmt" "io/ioutil" "log" @@ -30,10 +31,15 @@ import ( "github.com/kr/pty" ) +var runtimeType = flag.String("runtime-type", "", "specify which runtime to use: kvm, hostnet, overlay") + func getRuntime() string { r, ok := os.LookupEnv("RUNSC_RUNTIME") if !ok { - return "runsc-test" + r = "runsc-test" + } + if *runtimeType != "" { + r += "-" + *runtimeType } return r } @@ -197,7 +203,7 @@ func (d *Docker) Stop() error { } // Run calls 'docker run' with the arguments provided. The container starts -// running in the backgroud and the call returns immediately. +// running in the background and the call returns immediately. func (d *Docker) Run(args ...string) error { a := []string{"run", "--runtime", d.Runtime, "--name", d.Name, "-d"} a = append(a, args...) diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go index 1bd5adc54..a98675bfc 100644 --- a/runsc/test/testutil/testutil.go +++ b/runsc/test/testutil/testutil.go @@ -38,8 +38,8 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/specutils" ) func init() { @@ -132,6 +132,7 @@ func TestConfig() *boot.Config { LogPackets: true, Network: boot.NetworkNone, Strace: true, + Platform: "ptrace", FileAccess: boot.FileAccessExclusive, TestOnlyAllowRunAsCurrentUserWithoutChroot: true, NumNetworkChannels: 1, diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 731e2aa85..4ac511740 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -255,7 +255,7 @@ syscall_test(test = "//test/syscalls/linux:pause_test") syscall_test( size = "large", - add_overlay = False, # TODO(gvisor.dev/issue/318): enable when fixed. + add_overlay = True, shard_count = 5, test = "//test/syscalls/linux:pipe_test", ) diff --git a/test/syscalls/gtest/BUILD b/test/syscalls/gtest/BUILD index 22e061652..9293f25cb 100644 --- a/test/syscalls/gtest/BUILD +++ b/test/syscalls/gtest/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "gtest", srcs = ["gtest.go"], - importpath = "gvisor.googlesource.com/gvisor/test/syscalls/gtest", + importpath = "gvisor.dev/gvisor/test/syscalls/gtest", visibility = [ "//test:__subpackages__", ], diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 9bafc6e4f..4735284ba 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -42,6 +42,7 @@ cc_binary( name = "exec_state_workload", testonly = 1, srcs = ["exec_state_workload.cc"], + deps = ["@com_google_absl//absl/strings"], ) sh_binary( @@ -984,6 +985,7 @@ cc_binary( "//test/util:file_descriptor", "//test/util:logging", "//test/util:memory_util", + "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", @@ -1175,6 +1177,7 @@ cc_binary( "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", + "//test/util:thread_util", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest", ], @@ -1909,6 +1912,7 @@ cc_library( ":unix_domain_socket_test_util", "//test/util:test_util", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", "@com_google_googletest//:gtest", ], alwayslink = 1, @@ -2427,6 +2431,7 @@ cc_binary( "//test/util:file_descriptor", "//test/util:test_main", "//test/util:test_util", + "@com_google_absl//absl/strings:str_format", "@com_google_googletest//:gtest", ], ) @@ -2936,6 +2941,8 @@ cc_binary( testonly = 1, srcs = ["tcp_socket.cc"], linkstatic = 1, + # FIXME(b/135470853) + tags = ["flaky"], deps = [ ":socket_test_util", "//test/util:file_descriptor", @@ -2994,6 +3001,8 @@ cc_binary( testonly = 1, srcs = ["timers.cc"], linkstatic = 1, + # FIXME(b/136599201) + tags = ["flaky"], deps = [ "//test/util:cleanup", "//test/util:logging", @@ -3336,3 +3345,18 @@ cc_binary( "@com_google_googletest//:gtest", ], ) + +cc_binary( + name = "proc_net_tcp_test", + testonly = 1, + srcs = ["proc_net_tcp.cc"], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + "//test/util:file_descriptor", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) diff --git a/test/syscalls/linux/chmod.cc b/test/syscalls/linux/chmod.cc index 79e98597f..7e918b9b2 100644 --- a/test/syscalls/linux/chmod.cc +++ b/test/syscalls/linux/chmod.cc @@ -140,7 +140,8 @@ TEST(ChmodTest, FchmodatFile) { SyscallSucceeds()); ASSERT_THAT( - fchmodat(parent_fd, std::string(Basename(temp_file.path())).c_str(), 0444, 0), + fchmodat(parent_fd, std::string(Basename(temp_file.path())).c_str(), 0444, + 0), SyscallSucceeds()); EXPECT_THAT(close(parent_fd), SyscallSucceeds()); @@ -165,8 +166,9 @@ TEST(ChmodTest, FchmodatDir) { SyscallSucceeds()); EXPECT_THAT(close(fd), SyscallSucceeds()); - ASSERT_THAT(fchmodat(parent_fd, std::string(Basename(dir.path())).c_str(), 0, 0), - SyscallSucceeds()); + ASSERT_THAT( + fchmodat(parent_fd, std::string(Basename(dir.path())).c_str(), 0, 0), + SyscallSucceeds()); EXPECT_THAT(close(parent_fd), SyscallSucceeds()); EXPECT_THAT(open(dir.path().c_str(), O_RDONLY | O_DIRECTORY), diff --git a/test/syscalls/linux/chown.cc b/test/syscalls/linux/chown.cc index eb1762ddf..2e82f0b3a 100644 --- a/test/syscalls/linux/chown.cc +++ b/test/syscalls/linux/chown.cc @@ -186,10 +186,10 @@ INSTANTIATE_TEST_SUITE_P( return errorFromReturn("fchownat-fd", rc); }, [](const std::string& path, uid_t owner, gid_t group) -> PosixError { - ASSIGN_OR_RETURN_ERRNO( - auto dirfd, Open(std::string(Dirname(path)), O_DIRECTORY | O_RDONLY)); - int rc = fchownat(dirfd.get(), std::string(Basename(path)).c_str(), owner, - group, 0); + ASSIGN_OR_RETURN_ERRNO(auto dirfd, Open(std::string(Dirname(path)), + O_DIRECTORY | O_RDONLY)); + int rc = fchownat(dirfd.get(), std::string(Basename(path)).c_str(), + owner, group, 0); MaybeSave(); return errorFromReturn("fchownat-dirfd", rc); })); diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc index a4354ff62..498c45f16 100644 --- a/test/syscalls/linux/chroot.cc +++ b/test/syscalls/linux/chroot.cc @@ -273,7 +273,8 @@ TEST(ChrootTest, ProcMemSelfMapsNoEscapeProcOpen) { ASSERT_GT(bytes_read, 0); // Finally we want to make sure the maps don't contain the chroot path - ASSERT_EQ(std::string(buf, bytes_read).find(temp_dir.path()), std::string::npos); + ASSERT_EQ(std::string(buf, bytes_read).find(temp_dir.path()), + std::string::npos); } // Test that mounts outside the chroot will not appear in /proc/self/mounts or diff --git a/test/syscalls/linux/eventfd.cc b/test/syscalls/linux/eventfd.cc index 5e5c39d44..367682c3d 100644 --- a/test/syscalls/linux/eventfd.cc +++ b/test/syscalls/linux/eventfd.cc @@ -53,9 +53,9 @@ TEST(EventfdTest, Nonblock) { void* read_three_times(void* arg) { int efd = *reinterpret_cast<int*>(arg); uint64_t l; - read(efd, &l, sizeof(l)); - read(efd, &l, sizeof(l)); - read(efd, &l, sizeof(l)); + EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l))); + EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l))); + EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l))); return nullptr; } @@ -160,7 +160,8 @@ TEST(EventfdTest, NotifyNonZero_NoRandomSave) { ScopedThread t([&efd] { sleep(5); uint64_t val = 1; - write(efd.get(), &val, sizeof(val)); + EXPECT_THAT(write(efd.get(), &val, sizeof(val)), + SyscallSucceedsWithValue(sizeof(val))); }); // epoll_wait should return once the thread writes. diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc index 06c322a99..4c7c95321 100644 --- a/test/syscalls/linux/exec.cc +++ b/test/syscalls/linux/exec.cc @@ -255,7 +255,8 @@ TEST(ExecDeathTest, InterpreterScriptArgNUL) { TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( GetAbsoluteTestTmpdir(), - absl::StrCat("#!", link.path(), " foo", std::string(1, '\0'), "bar"), 0755)); + absl::StrCat("#!", link.path(), " foo", std::string(1, '\0'), "bar"), + 0755)); CheckOutput(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0), absl::StrCat(link.path(), "\nfoo\n", script.path(), "\n")); diff --git a/test/syscalls/linux/exec_proc_exe_workload.cc b/test/syscalls/linux/exec_proc_exe_workload.cc index b3fbd5042..b790fe5be 100644 --- a/test/syscalls/linux/exec_proc_exe_workload.cc +++ b/test/syscalls/linux/exec_proc_exe_workload.cc @@ -21,7 +21,8 @@ #include "test/util/posix_error.h" int main(int argc, char** argv, char** envp) { - std::string exe = gvisor::testing::ProcessExePath(getpid()).ValueOrDie(); + std::string exe = + gvisor::testing::ProcessExePath(getpid()).ValueOrDie(); if (exe[0] != '/') { std::cerr << "relative path: " << exe << std::endl; exit(1); diff --git a/test/syscalls/linux/exec_state_workload.cc b/test/syscalls/linux/exec_state_workload.cc index 725c2977f..028902b14 100644 --- a/test/syscalls/linux/exec_state_workload.cc +++ b/test/syscalls/linux/exec_state_workload.cc @@ -19,10 +19,13 @@ #include <sys/auxv.h> #include <sys/prctl.h> #include <sys/time.h> + #include <iostream> #include <ostream> #include <string> +#include "absl/strings/numbers.h" + // Pretty-print a sigset_t. std::ostream& operator<<(std::ostream& out, const sigset_t& s) { out << "{ "; @@ -138,15 +141,14 @@ int main(int argc, char** argv) { return 1; } - char* end; - uint32_t signo = strtoul(argv[2], &end, 10); - if (end == argv[2]) { + uint32_t signo; + if (!absl::SimpleAtoi(argv[2], &signo)) { std::cerr << "invalid signo: " << argv[2] << std::endl; return 1; } - uintptr_t handler = strtoull(argv[3], &end, 16); - if (end == argv[3]) { + uintptr_t handler; + if (!absl::numbers_internal::safe_strtoi_base(argv[3], &handler, 16)) { std::cerr << "invalid handler: " << std::hex << argv[3] << std::endl; return 1; } @@ -160,9 +162,8 @@ int main(int argc, char** argv) { return 1; } - char* end; - uint32_t signo = strtoul(argv[2], &end, 10); - if (end == argv[2]) { + uint32_t signo; + if (!absl::SimpleAtoi(argv[2], &signo)) { std::cerr << "invalid signo: " << argv[2] << std::endl; return 1; } @@ -176,9 +177,8 @@ int main(int argc, char** argv) { return 1; } - char* end; - uint32_t timer = strtoul(argv[2], &end, 10); - if (end == argv[2]) { + uint32_t timer; + if (!absl::SimpleAtoi(argv[2], &timer)) { std::cerr << "invalid signo: " << argv[2] << std::endl; return 1; } diff --git a/test/syscalls/linux/file_base.h b/test/syscalls/linux/file_base.h index b5b972c07..36efabcae 100644 --- a/test/syscalls/linux/file_base.h +++ b/test/syscalls/linux/file_base.h @@ -160,7 +160,7 @@ class SocketTest : public ::testing::Test { // MatchesStringLength checks that a tuple argument of (struct iovec *, int) // corresponding to an iovec array and its length, contains data that matches -// the std::string length strlen. +// the string length strlen. MATCHER_P(MatchesStringLength, strlen, "") { struct iovec* iovs = arg.first; int niov = arg.second; @@ -177,7 +177,7 @@ MATCHER_P(MatchesStringLength, strlen, "") { // MatchesStringValue checks that a tuple argument of (struct iovec *, int) // corresponding to an iovec array and its length, contains data that matches -// the std::string value str. +// the string value str. MATCHER_P(MatchesStringValue, str, "") { struct iovec* iovs = arg.first; int len = strlen(str); diff --git a/test/syscalls/linux/flock.cc b/test/syscalls/linux/flock.cc index d89cfcbd7..b4a91455d 100644 --- a/test/syscalls/linux/flock.cc +++ b/test/syscalls/linux/flock.cc @@ -428,7 +428,7 @@ TEST_F(FlockTest, TestDupFdFollowedByLock) { ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); } -// NOTE: These blocking tests are not perfect. Unfortunantely it's very hard to +// NOTE: These blocking tests are not perfect. Unfortunately it's very hard to // determine if a thread was actually blocked in the kernel so we're forced // to use timing. TEST_F(FlockTest, BlockingLockNoBlockingForSharedLocks_NoRandomSave) { diff --git a/test/syscalls/linux/getdents.cc b/test/syscalls/linux/getdents.cc index d146c8db7..8e4efa8d6 100644 --- a/test/syscalls/linux/getdents.cc +++ b/test/syscalls/linux/getdents.cc @@ -364,7 +364,7 @@ TYPED_TEST(GetdentsTest, PartialBuffer) { } // Open many file descriptors, then scan through /proc/self/fd to find and close -// them all. (The latter is commonly used to handle races betweek fork/execve +// them all. (The latter is commonly used to handle races between fork/execve // and the creation of unwanted non-O_CLOEXEC file descriptors.) This tests that // getdents iterates correctly despite mutation of /proc/self/fd. TYPED_TEST(GetdentsTest, ProcSelfFd) { @@ -437,6 +437,19 @@ TYPED_TEST(GetdentsTest, SeekResetsCursor) { EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents)); } +// Test that getdents() after SEEK_END succeeds. +// This is a regression test for #128. +TYPED_TEST(GetdentsTest, Issue128ProcSeekEnd) { + auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self", O_RDONLY | O_DIRECTORY)); + typename TestFixture::DirentBufferType dirents(256); + + ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(syscall)(this->SyscallNum(), fd.get(), dirents.Data(), + dirents.Size()), + SyscallSucceeds()); +} + // Some tests using the glibc readdir interface. TEST(ReaddirTest, OpenDir) { DIR* dev; diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 6a3539e22..7384c27dc 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -122,8 +122,8 @@ std::string DumpEvents(const std::vector<Event>& events, int indent_level) { (events.size() > 1) ? "s" : ""); int i = 0; for (const Event& ev : events) { - ss << StreamFormat("%sevents[%d]: %s\n", std::string(indent_level, '\t'), i++, - DumpEvent(ev)); + ss << StreamFormat("%sevents[%d]: %s\n", std::string(indent_level, '\t'), + i++, DumpEvent(ev)); } return ss.str(); } @@ -295,10 +295,10 @@ PosixErrorOr<std::vector<Event>> DrainEvents(int fd) { if (event.len > 0) { TEST_CHECK(static_cast<int>(sizeof(struct inotify_event) + event.len) <= readlen); - ev.name = - std::string(cursor + offsetof(struct inotify_event, name)); // NOLINT + ev.name = std::string(cursor + + offsetof(struct inotify_event, name)); // NOLINT // Name field should always be smaller than event.len, otherwise we have - // a buffer overflow. The two sizes aren't equal because the std::string + // a buffer overflow. The two sizes aren't equal because the string // constructor will stop at the first null byte, while event.name may be // padded up to event.len using multiple null bytes. TEST_CHECK(ev.name.size() <= event.len); @@ -319,7 +319,8 @@ PosixErrorOr<FileDescriptor> InotifyInit1(int flags) { return FileDescriptor(fd); } -PosixErrorOr<int> InotifyAddWatch(int fd, const std::string& path, uint32_t mask) { +PosixErrorOr<int> InotifyAddWatch(int fd, const std::string& path, + uint32_t mask) { int wd; EXPECT_THAT(wd = inotify_add_watch(fd, path.c_str(), mask), SyscallSucceeds()); @@ -980,8 +981,8 @@ TEST(Inotify, WatchOnRelativePath) { EXPECT_THAT(chdir(root.path().c_str()), SyscallSucceeds()); // Add a watch on file1 with a relative path. - const int wd = ASSERT_NO_ERRNO_AND_VALUE( - InotifyAddWatch(fd.get(), std::string(Basename(file1.path())), IN_ALL_EVENTS)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + fd.get(), std::string(Basename(file1.path())), IN_ALL_EVENTS)); // Perform a read on file1, this should generate an IN_ACCESS event. char c; diff --git a/test/syscalls/linux/ioctl.cc b/test/syscalls/linux/ioctl.cc index c525d41d2..4948a76f0 100644 --- a/test/syscalls/linux/ioctl.cc +++ b/test/syscalls/linux/ioctl.cc @@ -229,6 +229,37 @@ TEST_F(IoctlTest, FIOASYNCSelfTarget2) { EXPECT_EQ(io_received, 1); } +// Check that closing an FD does not result in an event. +TEST_F(IoctlTest, FIOASYNCSelfTargetClose) { + // Count SIGIOs received. + struct sigaction sa; + io_received = 0; + sa.sa_sigaction = inc_io_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + // Actually allow SIGIO delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO)); + + for (int i = 0; i < 2; i++) { + auto pair = ASSERT_NO_ERRNO_AND_VALUE( + UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + pid_t pid = getpid(); + EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds()); + + int set = 1; + EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds()); + } + + // FIXME(b/120624367): gVisor erroneously sends SIGIO on close. + SKIP_IF(IsRunningOnGvisor()); + + EXPECT_EQ(io_received, 0); +} + TEST_F(IoctlTest, FIOASYNCInvalidPID) { auto pair = ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc index 7612919d4..5fc4e9115 100644 --- a/test/syscalls/linux/ip_socket_test_util.cc +++ b/test/syscalls/linux/ip_socket_test_util.cc @@ -45,70 +45,79 @@ SocketPairKind IPv6TCPAcceptBindSocketPair(int type) { std::string description = absl::StrCat(DescribeSocketType(type), "connected IPv6 TCP socket"); return SocketPairKind{ - description, TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM, - 0, /* dual_stack = */ false)}; + description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM, 0, + /* dual_stack = */ false)}; } SocketPairKind IPv4TCPAcceptBindSocketPair(int type) { std::string description = absl::StrCat(DescribeSocketType(type), "connected IPv4 TCP socket"); return SocketPairKind{ - description, TCPAcceptBindSocketPairCreator(AF_INET, type | SOCK_STREAM, - 0, /* dual_stack = */ false)}; + description, AF_INET, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindSocketPairCreator(AF_INET, type | SOCK_STREAM, 0, + /* dual_stack = */ false)}; } SocketPairKind DualStackTCPAcceptBindSocketPair(int type) { std::string description = absl::StrCat(DescribeSocketType(type), "connected dual stack TCP socket"); return SocketPairKind{ - description, TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM, - 0, /* dual_stack = */ true)}; + description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM, 0, + /* dual_stack = */ true)}; } SocketPairKind IPv6UDPBidirectionalBindSocketPair(int type) { std::string description = absl::StrCat(DescribeSocketType(type), "connected IPv6 UDP socket"); - return SocketPairKind{description, UDPBidirectionalBindSocketPairCreator( - AF_INET6, type | SOCK_DGRAM, 0, - /* dual_stack = */ false)}; + return SocketPairKind{ + description, AF_INET6, type | SOCK_DGRAM, IPPROTO_UDP, + UDPBidirectionalBindSocketPairCreator(AF_INET6, type | SOCK_DGRAM, 0, + /* dual_stack = */ false)}; } SocketPairKind IPv4UDPBidirectionalBindSocketPair(int type) { std::string description = absl::StrCat(DescribeSocketType(type), "connected IPv4 UDP socket"); - return SocketPairKind{description, UDPBidirectionalBindSocketPairCreator( - AF_INET, type | SOCK_DGRAM, 0, - /* dual_stack = */ false)}; + return SocketPairKind{ + description, AF_INET, type | SOCK_DGRAM, IPPROTO_UDP, + UDPBidirectionalBindSocketPairCreator(AF_INET, type | SOCK_DGRAM, 0, + /* dual_stack = */ false)}; } SocketPairKind DualStackUDPBidirectionalBindSocketPair(int type) { std::string description = absl::StrCat(DescribeSocketType(type), "connected dual stack UDP socket"); - return SocketPairKind{description, UDPBidirectionalBindSocketPairCreator( - AF_INET6, type | SOCK_DGRAM, 0, - /* dual_stack = */ true)}; + return SocketPairKind{ + description, AF_INET6, type | SOCK_DGRAM, IPPROTO_UDP, + UDPBidirectionalBindSocketPairCreator(AF_INET6, type | SOCK_DGRAM, 0, + /* dual_stack = */ true)}; } SocketPairKind IPv4UDPUnboundSocketPair(int type) { std::string description = absl::StrCat(DescribeSocketType(type), "IPv4 UDP socket"); return SocketPairKind{ - description, UDPUnboundSocketPairCreator(AF_INET, type | SOCK_DGRAM, 0, - /* dual_stack = */ false)}; + description, AF_INET, type | SOCK_DGRAM, IPPROTO_UDP, + UDPUnboundSocketPairCreator(AF_INET, type | SOCK_DGRAM, 0, + /* dual_stack = */ false)}; } SocketKind IPv4UDPUnboundSocket(int type) { std::string description = absl::StrCat(DescribeSocketType(type), "IPv4 UDP socket"); - return SocketKind{description, UnboundSocketCreator( - AF_INET, type | SOCK_DGRAM, IPPROTO_UDP)}; + return SocketKind{ + description, AF_INET, type | SOCK_DGRAM, IPPROTO_UDP, + UnboundSocketCreator(AF_INET, type | SOCK_DGRAM, IPPROTO_UDP)}; } SocketKind IPv4TCPUnboundSocket(int type) { std::string description = absl::StrCat(DescribeSocketType(type), "IPv4 TCP socket"); - return SocketKind{description, UnboundSocketCreator( - AF_INET, type | SOCK_STREAM, IPPROTO_TCP)}; + return SocketKind{ + description, AF_INET, type | SOCK_STREAM, IPPROTO_TCP, + UnboundSocketCreator(AF_INET, type | SOCK_STREAM, IPPROTO_TCP)}; } } // namespace testing diff --git a/test/syscalls/linux/itimer.cc b/test/syscalls/linux/itimer.cc index 57ffd1595..51ce323b9 100644 --- a/test/syscalls/linux/itimer.cc +++ b/test/syscalls/linux/itimer.cc @@ -197,12 +197,17 @@ int TestSIGALRMToMainThread() { // (but don't guarantee it), so we expect to see most samples on the main // thread. // + // The number of SIGALRMs delivered to a worker should not exceed 20% + // of the number of total signals expected (this is somewhat arbitrary). + const int worker_threshold = result.expected_total / 5; + + // // Linux only guarantees timers will never expire before the requested time. // Thus, we only check the upper bound and also it at least have one sample. TEST_CHECK(result.main_thread_samples <= result.expected_total); TEST_CHECK(result.main_thread_samples > 0); for (int num : result.worker_samples) { - TEST_CHECK_MSG(num <= 50, "worker received too many samples"); + TEST_CHECK_MSG(num <= worker_threshold, "worker received too many samples"); } return 0; diff --git a/test/syscalls/linux/madvise.cc b/test/syscalls/linux/madvise.cc index f6ad4d18b..08ff4052c 100644 --- a/test/syscalls/linux/madvise.cc +++ b/test/syscalls/linux/madvise.cc @@ -29,6 +29,7 @@ #include "test/util/file_descriptor.h" #include "test/util/logging.h" #include "test/util/memory_util.h" +#include "test/util/multiprocess_util.h" #include "test/util/posix_error.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" @@ -136,6 +137,115 @@ TEST(MadviseDontneedTest, IgnoresPermissions) { EXPECT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds()); } +TEST(MadviseDontforkTest, AddressLength) { + auto m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE)); + char *addr = static_cast<char *>(m.ptr()); + + // Address must be page aligned. + EXPECT_THAT(madvise(addr + 1, kPageSize, MADV_DONTFORK), + SyscallFailsWithErrno(EINVAL)); + + // Zero length madvise always succeeds. + EXPECT_THAT(madvise(addr, 0, MADV_DONTFORK), SyscallSucceeds()); + + // Length must not roll over after rounding up. + size_t badlen = std::numeric_limits<std::size_t>::max() - (kPageSize / 2); + EXPECT_THAT(madvise(0, badlen, MADV_DONTFORK), SyscallFailsWithErrno(EINVAL)); + + // Length need not be page aligned - it is implicitly rounded up. + EXPECT_THAT(madvise(addr, 1, MADV_DONTFORK), SyscallSucceeds()); + EXPECT_THAT(madvise(addr, kPageSize, MADV_DONTFORK), SyscallSucceeds()); +} + +TEST(MadviseDontforkTest, DontforkShared) { + // Mmap two shared file-backed pages and MADV_DONTFORK the second page. + TempPath f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + /* parent = */ GetAbsoluteTestTmpdir(), + /* content = */ std::string(kPageSize * 2, 2), + TempPath::kDefaultFileMode)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); + + Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize * 2, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0)); + + const Mapping ms1 = Mapping(reinterpret_cast<void *>(m.addr()), kPageSize); + const Mapping ms2 = + Mapping(reinterpret_cast<void *>(m.addr() + kPageSize), kPageSize); + m.release(); + + ASSERT_THAT(madvise(ms2.ptr(), kPageSize, MADV_DONTFORK), SyscallSucceeds()); + + const auto rest = [&] { + // First page is mapped in child and modifications are visible to parent + // via the shared mapping. + TEST_CHECK(IsMapped(ms1.addr())); + ExpectAllMappingBytes(ms1, 2); + memset(ms1.ptr(), 1, kPageSize); + ExpectAllMappingBytes(ms1, 1); + + // Second page must not be mapped in child. + TEST_CHECK(!IsMapped(ms2.addr())); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); + + ExpectAllMappingBytes(ms1, 1); // page contents modified by child. + ExpectAllMappingBytes(ms2, 2); // page contents unchanged. +} + +TEST(MadviseDontforkTest, DontforkAnonPrivate) { + // Mmap three anonymous pages and MADV_DONTFORK the middle page. + Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize * 3, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + const Mapping mp1 = Mapping(reinterpret_cast<void *>(m.addr()), kPageSize); + const Mapping mp2 = + Mapping(reinterpret_cast<void *>(m.addr() + kPageSize), kPageSize); + const Mapping mp3 = + Mapping(reinterpret_cast<void *>(m.addr() + 2 * kPageSize), kPageSize); + m.release(); + + ASSERT_THAT(madvise(mp2.ptr(), kPageSize, MADV_DONTFORK), SyscallSucceeds()); + + // Verify that all pages are zeroed and memset the first, second and third + // pages to 1, 2, and 3 respectively. + ExpectAllMappingBytes(mp1, 0); + memset(mp1.ptr(), 1, kPageSize); + + ExpectAllMappingBytes(mp2, 0); + memset(mp2.ptr(), 2, kPageSize); + + ExpectAllMappingBytes(mp3, 0); + memset(mp3.ptr(), 3, kPageSize); + + const auto rest = [&] { + // Verify first page is mapped, verify its contents and then modify the + // page. The mapping is private so the modifications are not visible to + // the parent. + TEST_CHECK(IsMapped(mp1.addr())); + ExpectAllMappingBytes(mp1, 1); + memset(mp1.ptr(), 11, kPageSize); + ExpectAllMappingBytes(mp1, 11); + + // Verify second page is not mapped. + TEST_CHECK(!IsMapped(mp2.addr())); + + // Verify third page is mapped, verify its contents and then modify the + // page. The mapping is private so the modifications are not visible to + // the parent. + TEST_CHECK(IsMapped(mp3.addr())); + ExpectAllMappingBytes(mp3, 3); + memset(mp3.ptr(), 13, kPageSize); + ExpectAllMappingBytes(mp3, 13); + }; + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); + + // The fork and COW by child should not affect the parent mappings. + ExpectAllMappingBytes(mp1, 1); + ExpectAllMappingBytes(mp2, 2); + ExpectAllMappingBytes(mp3, 3); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/memfd.cc b/test/syscalls/linux/memfd.cc index 3494bcd13..e57b49a4a 100644 --- a/test/syscalls/linux/memfd.cc +++ b/test/syscalls/linux/memfd.cc @@ -60,7 +60,8 @@ int memfd_create(const std::string& name, unsigned int flags) { return syscall(__NR_memfd_create, name.c_str(), flags); } -PosixErrorOr<FileDescriptor> MemfdCreate(const std::string& name, uint32_t flags) { +PosixErrorOr<FileDescriptor> MemfdCreate(const std::string& name, + uint32_t flags) { int fd = memfd_create(name, flags); if (fd < 0) { return PosixError( diff --git a/test/syscalls/linux/mknod.cc b/test/syscalls/linux/mknod.cc index febf2eb14..4c45766c7 100644 --- a/test/syscalls/linux/mknod.cc +++ b/test/syscalls/linux/mknod.cc @@ -90,7 +90,7 @@ TEST(MknodTest, Fifo) { ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds()); EXPECT_TRUE(S_ISFIFO(st.st_mode)); - std::string msg = "some string"; + std::string msg = "some std::string"; std::vector<char> buf(512); // Read-end of the pipe. @@ -116,7 +116,7 @@ TEST(MknodTest, FifoOtrunc) { ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds()); EXPECT_TRUE(S_ISFIFO(st.st_mode)); - std::string msg = "some string"; + std::string msg = "some std::string"; std::vector<char> buf(512); // Read-end of the pipe. ScopedThread t([&fifo, &buf, &msg]() { @@ -144,7 +144,7 @@ TEST(MknodTest, FifoTruncNoOp) { ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds()); EXPECT_TRUE(S_ISFIFO(st.st_mode)); - std::string msg = "some string"; + std::string msg = "some std::string"; std::vector<char> buf(512); // Read-end of the pipe. ScopedThread t([&fifo, &buf, &msg]() { diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc index 5b5b4c2e8..a112316e9 100644 --- a/test/syscalls/linux/mmap.cc +++ b/test/syscalls/linux/mmap.cc @@ -113,7 +113,7 @@ class MMapTest : public ::testing::Test { size_t length_ = 0; }; -// Matches if arg contains the same contents as std::string str. +// Matches if arg contains the same contents as string str. MATCHER_P(EqualsMemory, str, "") { if (0 == memcmp(arg, str.c_str(), str.size())) { return true; @@ -1086,8 +1086,8 @@ TEST_F(MMapFileTest, WriteShared) { ASSERT_THAT(Read(buf.data(), buf.size()), SyscallSucceedsWithValue(buf.size())); // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a - // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C - // std::string, possibly overruning the buffer. + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // string, possibly overruning the buffer. EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(std::string(kFileContents))); } @@ -1122,6 +1122,7 @@ TEST_F(MMapFileTest, WriteSharedBeyondEnd) { ASSERT_THAT(Read(buf.data(), buf.size()), SyscallSucceedsWithValue(first.size())); // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C // std::string, possibly overruning the buffer. EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first)); @@ -1159,8 +1160,8 @@ TEST_F(MMapFileTest, WriteSharedTruncateUp) { ASSERT_THAT(Read(buf.data(), buf.size()), SyscallSucceedsWithValue(buf.size())); // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a - // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C - // std::string, possibly overruning the buffer. + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // string, possibly overruning the buffer. EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first)); EXPECT_THAT(reinterpret_cast<void*>(buf.data() + kPageSize / 2), EqualsMemory(second)); @@ -1234,8 +1235,8 @@ TEST_F(MMapFileTest, WriteSharedTruncateDownThenUp) { ASSERT_THAT(Read(buf.data(), buf.size()), SyscallSucceedsWithValue(buf.size())); // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a - // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C - // std::string, possibly overruning the buffer. + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // string, possibly overruning the buffer. EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(zeroed)); } @@ -1363,8 +1364,8 @@ TEST_F(MMapFileTest, WritePrivate) { ASSERT_THAT(Read(buf.data(), buf.size()), SyscallSucceedsWithValue(buf.size())); // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a - // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C - // std::string, possibly overruning the buffer. + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // string, possibly overruning the buffer. EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(std::string(len, '\0'))); } diff --git a/test/syscalls/linux/mount.cc b/test/syscalls/linux/mount.cc index 3a17672aa..e35be3cab 100644 --- a/test/syscalls/linux/mount.cc +++ b/test/syscalls/linux/mount.cc @@ -190,6 +190,14 @@ TEST(MountTest, MountTmpfs) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // NOTE(b/129868551): Inode IDs are only stable across S/R if we have an open + // FD for that inode. Since we are going to compare inode IDs below, get a + // FileDescriptor for this directory here, which will be closed automatically + // at the end of the test. + auto const fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY, O_RDONLY)); + const struct stat before = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); { diff --git a/test/syscalls/linux/mremap.cc b/test/syscalls/linux/mremap.cc index 7298d4ca8..64e435cb7 100644 --- a/test/syscalls/linux/mremap.cc +++ b/test/syscalls/linux/mremap.cc @@ -46,17 +46,6 @@ PosixErrorOr<void*> Mremap(void* old_address, size_t old_size, size_t new_size, return rv; } -// Returns true if the page containing addr is mapped. -bool IsMapped(uintptr_t addr) { - int const rv = msync(reinterpret_cast<void*>(addr & ~(kPageSize - 1)), - kPageSize, MS_ASYNC); - if (rv == 0) { - return true; - } - TEST_PCHECK_MSG(errno == ENOMEM, "msync failed with unexpected errno"); - return false; -} - // Fixture for mremap tests parameterized by mmap flags. using MremapParamTest = ::testing::TestWithParam<int>; diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc index 42646bb02..e0525f386 100644 --- a/test/syscalls/linux/open.cc +++ b/test/syscalls/linux/open.cc @@ -28,6 +28,7 @@ #include "test/util/fs_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" +#include "test/util/thread_util.h" namespace gvisor { namespace testing { @@ -214,6 +215,42 @@ TEST_F(OpenTest, AppendOnly) { SyscallSucceedsWithValue(kBufSize * 3)); } +TEST_F(OpenTest, AppendConcurrentWrite) { + constexpr int kThreadCount = 5; + constexpr int kBytesPerThread = 10000; + std::unique_ptr<ScopedThread> threads[kThreadCount]; + + // In case of the uncached policy, we expect that a file system can be changed + // externally, so we create a new inode each time when we open a file and we + // can't guarantee that writes to files with O_APPEND will work correctly. + SKIP_IF(getenv("GVISOR_GOFER_UNCACHED")); + + EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds()); + + std::string filename = test_file_name_; + DisableSave ds; // Too many syscalls. + // Start kThreadCount threads which will write concurrently into the same + // file. + for (int i = 0; i < kThreadCount; i++) { + threads[i] = absl::make_unique<ScopedThread>([filename]() { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_RDWR | O_APPEND)); + + for (int j = 0; j < kBytesPerThread; j++) { + EXPECT_THAT(WriteFd(fd.get(), &j, 1), SyscallSucceedsWithValue(1)); + } + }); + } + for (int i = 0; i < kThreadCount; i++) { + threads[i]->Join(); + } + + // Check that the size of the file is correct. + struct stat st; + EXPECT_THAT(stat(test_file_name_.c_str(), &st), SyscallSucceeds()); + EXPECT_EQ(st.st_size, kThreadCount * kBytesPerThread); +} + TEST_F(OpenTest, Truncate) { { // First write some data to the new file and close it. diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc index 67b93ecf5..65afb90f3 100644 --- a/test/syscalls/linux/pipe.cc +++ b/test/syscalls/linux/pipe.cc @@ -50,32 +50,28 @@ struct PipeCreator { }; class PipeTest : public ::testing::TestWithParam<PipeCreator> { - protected: - FileDescriptor rfd; - FileDescriptor wfd; - public: static void SetUpTestSuite() { // Tests intentionally generate SIGPIPE. TEST_PCHECK(signal(SIGPIPE, SIG_IGN) != SIG_ERR); } - // Initializes rfd and wfd as a blocking pipe. + // Initializes rfd_ and wfd_ as a blocking pipe. // // The return value indicates success: the test should be skipped otherwise. bool CreateBlocking() { return create(true); } - // Initializes rfd and wfd as a non-blocking pipe. + // Initializes rfd_ and wfd_ as a non-blocking pipe. // // The return value is per CreateBlocking. bool CreateNonBlocking() { return create(false); } // Returns true iff the pipe represents a named pipe. - bool IsNamedPipe() { return namedpipe_; } + bool IsNamedPipe() const { return named_pipe_; } - int Size() { - int s1 = fcntl(rfd.get(), F_GETPIPE_SZ); - int s2 = fcntl(wfd.get(), F_GETPIPE_SZ); + int Size() const { + int s1 = fcntl(rfd_.get(), F_GETPIPE_SZ); + int s2 = fcntl(wfd_.get(), F_GETPIPE_SZ); EXPECT_GT(s1, 0); EXPECT_GT(s2, 0); EXPECT_EQ(s1, s2); @@ -87,20 +83,18 @@ class PipeTest : public ::testing::TestWithParam<PipeCreator> { } private: - bool namedpipe_ = false; - bool create(bool wants_blocking) { // Generate the pipe. int fds[2] = {-1, -1}; bool is_blocking = false; - GetParam().create_(fds, &is_blocking, &namedpipe_); + GetParam().create_(fds, &is_blocking, &named_pipe_); if (fds[0] < 0 || fds[1] < 0) { return false; } // Save descriptors. - rfd.reset(fds[0]); - wfd.reset(fds[1]); + rfd_.reset(fds[0]); + wfd_.reset(fds[1]); // Adjust blocking, if needed. if (!is_blocking && wants_blocking) { @@ -115,6 +109,13 @@ class PipeTest : public ::testing::TestWithParam<PipeCreator> { return true; } + + protected: + FileDescriptor rfd_; + FileDescriptor wfd_; + + private: + bool named_pipe_ = false; }; TEST_P(PipeTest, Inode) { @@ -122,9 +123,9 @@ TEST_P(PipeTest, Inode) { // Ensure that the inode number is the same for each end. struct stat rst; - ASSERT_THAT(fstat(rfd.get(), &rst), SyscallSucceeds()); + ASSERT_THAT(fstat(rfd_.get(), &rst), SyscallSucceeds()); struct stat wst; - ASSERT_THAT(fstat(wfd.get(), &wst), SyscallSucceeds()); + ASSERT_THAT(fstat(wfd_.get(), &wst), SyscallSucceeds()); EXPECT_EQ(rst.st_ino, wst.st_ino); } @@ -133,9 +134,10 @@ TEST_P(PipeTest, Permissions) { // Attempt bad operations. int buf = kTestValue; - ASSERT_THAT(write(rfd.get(), &buf, sizeof(buf)), + ASSERT_THAT(write(rfd_.get(), &buf, sizeof(buf)), + SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(read(wfd_.get(), &buf, sizeof(buf)), SyscallFailsWithErrno(EBADF)); - EXPECT_THAT(read(wfd.get(), &buf, sizeof(buf)), SyscallFailsWithErrno(EBADF)); } TEST_P(PipeTest, Flags) { @@ -144,13 +146,13 @@ TEST_P(PipeTest, Flags) { if (IsNamedPipe()) { // May be stubbed to zero; define locally. constexpr int kLargefile = 0100000; - EXPECT_THAT(fcntl(rfd.get(), F_GETFL), + EXPECT_THAT(fcntl(rfd_.get(), F_GETFL), SyscallSucceedsWithValue(kLargefile | O_RDONLY)); - EXPECT_THAT(fcntl(wfd.get(), F_GETFL), + EXPECT_THAT(fcntl(wfd_.get(), F_GETFL), SyscallSucceedsWithValue(kLargefile | O_WRONLY)); } else { - EXPECT_THAT(fcntl(rfd.get(), F_GETFL), SyscallSucceedsWithValue(O_RDONLY)); - EXPECT_THAT(fcntl(wfd.get(), F_GETFL), SyscallSucceedsWithValue(O_WRONLY)); + EXPECT_THAT(fcntl(rfd_.get(), F_GETFL), SyscallSucceedsWithValue(O_RDONLY)); + EXPECT_THAT(fcntl(wfd_.get(), F_GETFL), SyscallSucceedsWithValue(O_WRONLY)); } } @@ -159,9 +161,9 @@ TEST_P(PipeTest, Write) { int wbuf = kTestValue; int rbuf = ~kTestValue; - ASSERT_THAT(write(wfd.get(), &wbuf, sizeof(wbuf)), + ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)), SyscallSucceedsWithValue(sizeof(wbuf))); - ASSERT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)), + ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), SyscallSucceedsWithValue(sizeof(rbuf))); EXPECT_EQ(wbuf, rbuf); } @@ -171,15 +173,15 @@ TEST_P(PipeTest, NonBlocking) { int wbuf = kTestValue; int rbuf = ~kTestValue; - EXPECT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)), + EXPECT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), SyscallFailsWithErrno(EWOULDBLOCK)); - ASSERT_THAT(write(wfd.get(), &wbuf, sizeof(wbuf)), + ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)), SyscallSucceedsWithValue(sizeof(wbuf))); - ASSERT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)), + ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), SyscallSucceedsWithValue(sizeof(rbuf))); EXPECT_EQ(wbuf, rbuf); - EXPECT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)), + EXPECT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), SyscallFailsWithErrno(EWOULDBLOCK)); } @@ -202,26 +204,26 @@ TEST_P(PipeTest, Seek) { for (int i = 0; i < 4; i++) { // Attempt absolute seeks. - EXPECT_THAT(lseek(rfd.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); - EXPECT_THAT(lseek(rfd.get(), 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); - EXPECT_THAT(lseek(wfd.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); - EXPECT_THAT(lseek(wfd.get(), 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(rfd_.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(rfd_.get(), 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(wfd_.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(wfd_.get(), 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); // Attempt relative seeks. - EXPECT_THAT(lseek(rfd.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); - EXPECT_THAT(lseek(rfd.get(), 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); - EXPECT_THAT(lseek(wfd.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); - EXPECT_THAT(lseek(wfd.get(), 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(rfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(rfd_.get(), 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(wfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(wfd_.get(), 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); // Attempt end-of-file seeks. - EXPECT_THAT(lseek(rfd.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); - EXPECT_THAT(lseek(rfd.get(), -4, SEEK_END), SyscallFailsWithErrno(ESPIPE)); - EXPECT_THAT(lseek(wfd.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); - EXPECT_THAT(lseek(wfd.get(), -4, SEEK_END), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(rfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(rfd_.get(), -4, SEEK_END), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(wfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(wfd_.get(), -4, SEEK_END), SyscallFailsWithErrno(ESPIPE)); // Add some more data to the pipe. int buf = kTestValue; - ASSERT_THAT(write(wfd.get(), &buf, sizeof(buf)), + ASSERT_THAT(write(wfd_.get(), &buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); } } @@ -230,14 +232,14 @@ TEST_P(PipeTest, OffsetCalls) { SKIP_IF(!CreateBlocking()); int buf; - EXPECT_THAT(pread(wfd.get(), &buf, sizeof(buf), 0), + EXPECT_THAT(pread(wfd_.get(), &buf, sizeof(buf), 0), SyscallFailsWithErrno(ESPIPE)); - EXPECT_THAT(pwrite(rfd.get(), &buf, sizeof(buf), 0), + EXPECT_THAT(pwrite(rfd_.get(), &buf, sizeof(buf), 0), SyscallFailsWithErrno(ESPIPE)); struct iovec iov; - EXPECT_THAT(preadv(wfd.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE)); - EXPECT_THAT(pwritev(rfd.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(preadv(wfd_.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(pwritev(rfd_.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE)); } TEST_P(PipeTest, WriterSideCloses) { @@ -245,13 +247,13 @@ TEST_P(PipeTest, WriterSideCloses) { ScopedThread t([this]() { int buf = ~kTestValue; - ASSERT_THAT(read(rfd.get(), &buf, sizeof(buf)), + ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_EQ(buf, kTestValue); // This will return when the close() completes. - ASSERT_THAT(read(rfd.get(), &buf, sizeof(buf)), SyscallSucceeds()); + ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)), SyscallSucceeds()); // This will return straight away. - ASSERT_THAT(read(rfd.get(), &buf, sizeof(buf)), + ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)), SyscallSucceedsWithValue(0)); }); @@ -260,14 +262,14 @@ TEST_P(PipeTest, WriterSideCloses) { // Write to unblock. int buf = kTestValue; - ASSERT_THAT(write(wfd.get(), &buf, sizeof(buf)), + ASSERT_THAT(write(wfd_.get(), &buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); // Sleep a bit so the thread can block again. absl::SleepFor(syncDelay); // Allow the thread to complete. - ASSERT_THAT(close(wfd.release()), SyscallSucceeds()); + ASSERT_THAT(close(wfd_.release()), SyscallSucceeds()); t.Join(); } @@ -275,36 +277,36 @@ TEST_P(PipeTest, WriterSideClosesReadDataFirst) { SKIP_IF(!CreateBlocking()); int wbuf = kTestValue; - ASSERT_THAT(write(wfd.get(), &wbuf, sizeof(wbuf)), + ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)), SyscallSucceedsWithValue(sizeof(wbuf))); - ASSERT_THAT(close(wfd.release()), SyscallSucceeds()); + ASSERT_THAT(close(wfd_.release()), SyscallSucceeds()); int rbuf; - ASSERT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)), + ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), SyscallSucceedsWithValue(sizeof(rbuf))); EXPECT_EQ(wbuf, rbuf); - EXPECT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)), + EXPECT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), SyscallSucceedsWithValue(0)); } TEST_P(PipeTest, ReaderSideCloses) { SKIP_IF(!CreateBlocking()); - ASSERT_THAT(close(rfd.release()), SyscallSucceeds()); + ASSERT_THAT(close(rfd_.release()), SyscallSucceeds()); int buf = kTestValue; - EXPECT_THAT(write(wfd.get(), &buf, sizeof(buf)), + EXPECT_THAT(write(wfd_.get(), &buf, sizeof(buf)), SyscallFailsWithErrno(EPIPE)); } TEST_P(PipeTest, CloseTwice) { SKIP_IF(!CreateBlocking()); - int _rfd = rfd.release(); - int _wfd = wfd.release(); - ASSERT_THAT(close(_rfd), SyscallSucceeds()); - ASSERT_THAT(close(_wfd), SyscallSucceeds()); - EXPECT_THAT(close(_rfd), SyscallFailsWithErrno(EBADF)); - EXPECT_THAT(close(_wfd), SyscallFailsWithErrno(EBADF)); + int reader = rfd_.release(); + int writer = wfd_.release(); + ASSERT_THAT(close(reader), SyscallSucceeds()); + ASSERT_THAT(close(writer), SyscallSucceeds()); + EXPECT_THAT(close(reader), SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(close(writer), SyscallFailsWithErrno(EBADF)); } // Blocking write returns EPIPE when read end is closed if nothing has been @@ -316,18 +318,18 @@ TEST_P(PipeTest, BlockWriteClosed) { ScopedThread t([this, ¬ify]() { std::vector<char> buf(Size()); // Exactly fill the pipe buffer. - ASSERT_THAT(WriteFd(wfd.get(), buf.data(), buf.size()), + ASSERT_THAT(WriteFd(wfd_.get(), buf.data(), buf.size()), SyscallSucceedsWithValue(buf.size())); notify.Notify(); // Attempt to write one more byte. Blocks. // N.B. Don't use WriteFd, we don't want a retry. - EXPECT_THAT(write(wfd.get(), buf.data(), 1), SyscallFailsWithErrno(EPIPE)); + EXPECT_THAT(write(wfd_.get(), buf.data(), 1), SyscallFailsWithErrno(EPIPE)); }); notify.WaitForNotification(); - ASSERT_THAT(close(rfd.release()), SyscallSucceeds()); + ASSERT_THAT(close(rfd_.release()), SyscallSucceeds()); t.Join(); } @@ -337,12 +339,14 @@ TEST_P(PipeTest, BlockPartialWriteClosed) { SKIP_IF(!CreateBlocking()); ScopedThread t([this]() { - std::vector<char> buf(2 * Size()); + const int pipe_size = Size(); + std::vector<char> buf(2 * pipe_size); + // Write more than fits in the buffer. Blocks then returns partial write // when the other end is closed. The next call returns EPIPE. - ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), - SyscallSucceedsWithValue(Size())); - EXPECT_THAT(write(wfd.get(), buf.data(), buf.size()), + ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(pipe_size)); + EXPECT_THAT(write(wfd_.get(), buf.data(), buf.size()), SyscallFailsWithErrno(EPIPE)); }); @@ -350,7 +354,7 @@ TEST_P(PipeTest, BlockPartialWriteClosed) { absl::SleepFor(syncDelay); // Unblock the above. - ASSERT_THAT(close(rfd.release()), SyscallSucceeds()); + ASSERT_THAT(close(rfd_.release()), SyscallSucceeds()); t.Join(); } @@ -361,7 +365,7 @@ TEST_P(PipeTest, ReadFromClosedFd_NoRandomSave) { ScopedThread t([this, ¬ify]() { notify.Notify(); int buf; - ASSERT_THAT(read(rfd.get(), &buf, sizeof(buf)), + ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); ASSERT_EQ(kTestValue, buf); }); @@ -375,9 +379,9 @@ TEST_P(PipeTest, ReadFromClosedFd_NoRandomSave) { // is ongoing read() above. We will not be able to restart the read() // successfully in restore run since the read fd is closed. const DisableSave ds; - ASSERT_THAT(close(rfd.release()), SyscallSucceeds()); + ASSERT_THAT(close(rfd_.release()), SyscallSucceeds()); int buf = kTestValue; - ASSERT_THAT(write(wfd.get(), &buf, sizeof(buf)), + ASSERT_THAT(write(wfd_.get(), &buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); t.Join(); } @@ -387,18 +391,18 @@ TEST_P(PipeTest, FionRead) { SKIP_IF(!CreateBlocking()); int n; - ASSERT_THAT(ioctl(rfd.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + ASSERT_THAT(ioctl(rfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); - ASSERT_THAT(ioctl(wfd.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + ASSERT_THAT(ioctl(wfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); std::vector<char> buf(Size()); - ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()), SyscallSucceedsWithValue(buf.size())); - EXPECT_THAT(ioctl(rfd.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(rfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, buf.size()); - EXPECT_THAT(ioctl(wfd.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(wfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, buf.size()); } @@ -409,11 +413,11 @@ TEST_P(PipeTest, OpenViaProcSelfFD) { SKIP_IF(IsNamedPipe()); // Close the write end of the pipe. - ASSERT_THAT(close(wfd.release()), SyscallSucceeds()); + ASSERT_THAT(close(wfd_.release()), SyscallSucceeds()); // Open other side via /proc/self/fd. It should not block. FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE( - Open(absl::StrCat("/proc/self/fd/", rfd.get()), O_RDONLY)); + Open(absl::StrCat("/proc/self/fd/", rfd_.get()), O_RDONLY)); } // Test that opening and reading from an anonymous pipe (with existing writes) @@ -424,13 +428,13 @@ TEST_P(PipeTest, OpenViaProcSelfFDWithWrites) { // Write to the pipe and then close the write fd. int wbuf = kTestValue; - ASSERT_THAT(write(wfd.get(), &wbuf, sizeof(wbuf)), + ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)), SyscallSucceedsWithValue(sizeof(wbuf))); - ASSERT_THAT(close(wfd.release()), SyscallSucceeds()); + ASSERT_THAT(close(wfd_.release()), SyscallSucceeds()); // Open read side via /proc/self/fd, and read from it. FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE( - Open(absl::StrCat("/proc/self/fd/", rfd.get()), O_RDONLY)); + Open(absl::StrCat("/proc/self/fd/", rfd_.get()), O_RDONLY)); int rbuf; ASSERT_THAT(read(proc_self_fd.get(), &rbuf, sizeof(rbuf)), SyscallSucceedsWithValue(sizeof(rbuf))); @@ -443,13 +447,13 @@ TEST_P(PipeTest, ProcFDReleasesFile) { // Stat the pipe FD, which shouldn't alter the refcount. struct stat wst; - ASSERT_THAT(lstat(absl::StrCat("/proc/self/fd/", wfd.get()).c_str(), &wst), + ASSERT_THAT(lstat(absl::StrCat("/proc/self/fd/", wfd_.get()).c_str(), &wst), SyscallSucceeds()); // Close the write end and ensure that read indicates EOF. - wfd.reset(); + wfd_.reset(); char buf; - ASSERT_THAT(read(rfd.get(), &buf, 1), SyscallSucceedsWithValue(0)); + ASSERT_THAT(read(rfd_.get(), &buf, 1), SyscallSucceedsWithValue(0)); } // Same for /proc/<PID>/fdinfo. @@ -459,30 +463,30 @@ TEST_P(PipeTest, ProcFDInfoReleasesFile) { // Stat the pipe FD, which shouldn't alter the refcount. struct stat wst; ASSERT_THAT( - lstat(absl::StrCat("/proc/self/fdinfo/", wfd.get()).c_str(), &wst), + lstat(absl::StrCat("/proc/self/fdinfo/", wfd_.get()).c_str(), &wst), SyscallSucceeds()); // Close the write end and ensure that read indicates EOF. - wfd.reset(); + wfd_.reset(); char buf; - ASSERT_THAT(read(rfd.get(), &buf, 1), SyscallSucceedsWithValue(0)); + ASSERT_THAT(read(rfd_.get(), &buf, 1), SyscallSucceedsWithValue(0)); } TEST_P(PipeTest, SizeChange) { SKIP_IF(!CreateBlocking()); // Set the minimum possible size. - ASSERT_THAT(fcntl(rfd.get(), F_SETPIPE_SZ, 0), SyscallSucceeds()); + ASSERT_THAT(fcntl(rfd_.get(), F_SETPIPE_SZ, 0), SyscallSucceeds()); int min = Size(); EXPECT_GT(min, 0); // Should be rounded up. // Set from the read end. - ASSERT_THAT(fcntl(rfd.get(), F_SETPIPE_SZ, min + 1), SyscallSucceeds()); + ASSERT_THAT(fcntl(rfd_.get(), F_SETPIPE_SZ, min + 1), SyscallSucceeds()); int med = Size(); EXPECT_GT(med, min); // Should have grown, may be rounded. // Set from the write end. - ASSERT_THAT(fcntl(wfd.get(), F_SETPIPE_SZ, med + 1), SyscallSucceeds()); + ASSERT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, med + 1), SyscallSucceeds()); int max = Size(); EXPECT_GT(max, med); // Ditto. } @@ -491,9 +495,9 @@ TEST_P(PipeTest, SizeChangeMax) { SKIP_IF(!CreateBlocking()); // Assert there's some maximum. - EXPECT_THAT(fcntl(rfd.get(), F_SETPIPE_SZ, 0x7fffffffffffffff), + EXPECT_THAT(fcntl(rfd_.get(), F_SETPIPE_SZ, 0x7fffffffffffffff), SyscallFailsWithErrno(EINVAL)); - EXPECT_THAT(fcntl(wfd.get(), F_SETPIPE_SZ, 0x7fffffffffffffff), + EXPECT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, 0x7fffffffffffffff), SyscallFailsWithErrno(EINVAL)); } @@ -505,14 +509,14 @@ TEST_P(PipeTest, SizeChangeFull) { // adjust the size and the call below will return success. It was found via // experimentation that this granularity avoids the rounding for Linux. constexpr int kDelta = 64 * 1024; - ASSERT_THAT(fcntl(wfd.get(), F_SETPIPE_SZ, Size() + kDelta), + ASSERT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, Size() + kDelta), SyscallSucceeds()); // Fill the buffer and try to change down. std::vector<char> buf(Size()); - ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()), SyscallSucceedsWithValue(buf.size())); - EXPECT_THAT(fcntl(wfd.get(), F_SETPIPE_SZ, Size() - kDelta), + EXPECT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, Size() - kDelta), SyscallFailsWithErrno(EBUSY)); } @@ -522,23 +526,32 @@ TEST_P(PipeTest, Streaming) { // We make too many calls to go through full save cycles. DisableSave ds; + // Size() requires 2 syscalls, call it once and remember the value. + const int pipe_size = Size(); + absl::Notification notify; - ScopedThread t([this, ¬ify]() { + ScopedThread t([this, ¬ify, pipe_size]() { // Don't start until it's full. notify.WaitForNotification(); - for (int i = 0; i < 2 * Size(); i++) { + for (int i = 0; i < pipe_size; i++) { int rbuf; - ASSERT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)), + ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), SyscallSucceedsWithValue(sizeof(rbuf))); EXPECT_EQ(rbuf, i); } }); - for (int i = 0; i < 2 * Size(); i++) { - int wbuf = i; - ASSERT_THAT(write(wfd.get(), &wbuf, sizeof(wbuf)), - SyscallSucceedsWithValue(sizeof(wbuf))); - // Did that write just fill up the buffer? Wake up the reader. Once only. - if ((i * sizeof(wbuf)) < Size() && ((i + 1) * sizeof(wbuf)) >= Size()) { + + // Write 4 bytes * pipe_size. It will fill up the pipe once, notify the reader + // to start. Then we write pipe size worth 3 more times to ensure the reader + // can follow along. + ssize_t total = 0; + for (int i = 0; i < pipe_size; i++) { + ssize_t written = write(wfd_.get(), &i, sizeof(i)); + ASSERT_THAT(written, SyscallSucceedsWithValue(sizeof(i))); + total += written; + + // Is the next write about to fill up the buffer? Wake up the reader once. + if (total < pipe_size && (total + written) >= pipe_size) { notify.Notify(); } } diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 924b98e3a..490bf4424 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -206,8 +206,8 @@ PosixError WithSubprocess(SubprocessCallback const& running, } // Access the file returned by name when a subprocess is running. -PosixError AccessWhileRunning(std::function<std::string(int pid)> name, int flags, - std::function<void(int fd)> access) { +PosixError AccessWhileRunning(std::function<std::string(int pid)> name, + int flags, std::function<void(int fd)> access) { FileDescriptor fd; return WithSubprocess( [&](int pid) -> PosixError { @@ -221,8 +221,8 @@ PosixError AccessWhileRunning(std::function<std::string(int pid)> name, int flag } // Access the file returned by name when the a subprocess is zombied. -PosixError AccessWhileZombied(std::function<std::string(int pid)> name, int flags, - std::function<void(int fd)> access) { +PosixError AccessWhileZombied(std::function<std::string(int pid)> name, + int flags, std::function<void(int fd)> access) { FileDescriptor fd; return WithSubprocess( [&](int pid) -> PosixError { @@ -239,8 +239,8 @@ PosixError AccessWhileZombied(std::function<std::string(int pid)> name, int flag } // Access the file returned by name when the a subprocess is exited. -PosixError AccessWhileExited(std::function<std::string(int pid)> name, int flags, - std::function<void(int fd)> access) { +PosixError AccessWhileExited(std::function<std::string(int pid)> name, + int flags, std::function<void(int fd)> access) { FileDescriptor fd; return WithSubprocess( [&](int pid) -> PosixError { @@ -704,7 +704,8 @@ TEST(ProcSelfExe, Absolute) { // Sanity check for /proc/cpuinfo fields that must be present. TEST(ProcCpuinfo, RequiredFieldsArePresent) { - std::string proc_cpuinfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo")); + std::string proc_cpuinfo = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo")); ASSERT_FALSE(proc_cpuinfo.empty()); std::vector<std::string> cpuinfo_fields = absl::StrSplit(proc_cpuinfo, '\n'); @@ -743,7 +744,8 @@ TEST(ProcCpuinfo, DeniesWrite) { // Sanity checks that uptime is present. TEST(ProcUptime, IsPresent) { - std::string proc_uptime = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime")); + std::string proc_uptime = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime")); ASSERT_FALSE(proc_uptime.empty()); std::vector<std::string> uptime_parts = absl::StrSplit(proc_uptime, ' '); @@ -775,7 +777,8 @@ TEST(ProcUptime, IsPresent) { } TEST(ProcMeminfo, ContainsBasicFields) { - std::string proc_meminfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/meminfo")); + std::string proc_meminfo = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/meminfo")); EXPECT_THAT(proc_meminfo, AllOf(ContainsRegex(R"(MemTotal:\s+[0-9]+ kB)"), ContainsRegex(R"(MemFree:\s+[0-9]+ kB)"))); } @@ -853,12 +856,14 @@ TEST(ProcStat, Fields) { } TEST(ProcLoadavg, EndsWithNewline) { - std::string proc_loadvg = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); + std::string proc_loadvg = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); EXPECT_EQ(proc_loadvg.back(), '\n'); } TEST(ProcLoadavg, Fields) { - std::string proc_loadvg = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); + std::string proc_loadvg = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); std::vector<std::string> lines = absl::StrSplit(proc_loadvg, '\n'); // Single line. @@ -1238,10 +1243,12 @@ TEST(ProcPidStatTest, VmStats) { EXPECT_NE('0', data_str[0]); } -// Parse an array of NUL-terminated char* arrays, returning a vector of strings. +// Parse an array of NUL-terminated char* arrays, returning a vector of +// strings. std::vector<std::string> ParseNulTerminatedStrings(std::string contents) { EXPECT_EQ('\0', contents.back()); - // The split will leave an empty std::string if the NUL-byte remains, so pop it. + // The split will leave an empty string if the NUL-byte remains, so pop + // it. contents.pop_back(); return absl::StrSplit(contents, '\0'); @@ -1491,7 +1498,8 @@ TEST(ProcPidFile, SubprocessExited) { } PosixError DirContainsImpl(absl::string_view path, - const std::vector<std::string>& targets, bool strict) { + const std::vector<std::string>& targets, + bool strict) { ASSIGN_OR_RETURN_ERRNO(auto listing, ListDir(path, false)); bool success = true; @@ -1530,8 +1538,8 @@ PosixError DirContainsExactly(absl::string_view path, return DirContainsImpl(path, targets, true); } -PosixError EventuallyDirContainsExactly(absl::string_view path, - const std::vector<std::string>& targets) { +PosixError EventuallyDirContainsExactly( + absl::string_view path, const std::vector<std::string>& targets) { constexpr int kRetryCount = 100; const absl::Duration kRetryDelay = absl::Milliseconds(100); @@ -1553,11 +1561,13 @@ TEST(ProcTask, Basic) { DirContains("/proc/self/task", {".", "..", absl::StrCat(getpid())})); } -std::vector<std::string> TaskFiles(const std::vector<std::string>& initial_contents, - const std::vector<pid_t>& pids) { +std::vector<std::string> TaskFiles( + const std::vector<std::string>& initial_contents, + const std::vector<pid_t>& pids) { return VecCat<std::string>( initial_contents, - ApplyVec<std::string>([](const pid_t p) { return absl::StrCat(p); }, pids)); + ApplyVec<std::string>([](const pid_t p) { return absl::StrCat(p); }, + pids)); } std::vector<std::string> TaskFiles(const std::vector<pid_t>& pids) { @@ -1894,7 +1904,8 @@ void CheckDuplicatesRecursively(std::string path) { continue; } - ASSERT_EQ(children.find(std::string(dp->d_name)), children.end()) << dp->d_name; + ASSERT_EQ(children.find(std::string(dp->d_name)), children.end()) + << dp->d_name; children.insert(std::string(dp->d_name)); ASSERT_NE(dp->d_type, DT_UNKNOWN); diff --git a/test/syscalls/linux/proc_net_tcp.cc b/test/syscalls/linux/proc_net_tcp.cc new file mode 100644 index 000000000..578b20680 --- /dev/null +++ b/test/syscalls/linux/proc_net_tcp.cc @@ -0,0 +1,281 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +using absl::StrCat; +using absl::StrSplit; + +constexpr char kProcNetTCPHeader[] = + " sl local_address rem_address st tx_queue rx_queue tr tm->when " + "retrnsmt uid timeout inode " + " "; + +// Possible values of the "st" field in a /proc/net/tcp entry. Source: Linux +// kernel, include/net/tcp_states.h. +enum { + TCP_ESTABLISHED = 1, + TCP_SYN_SENT, + TCP_SYN_RECV, + TCP_FIN_WAIT1, + TCP_FIN_WAIT2, + TCP_TIME_WAIT, + TCP_CLOSE, + TCP_CLOSE_WAIT, + TCP_LAST_ACK, + TCP_LISTEN, + TCP_CLOSING, + TCP_NEW_SYN_RECV, + + TCP_MAX_STATES +}; + +// TCPEntry represents a single entry from /proc/net/tcp. +struct TCPEntry { + uint32_t local_addr; + uint16_t local_port; + + uint32_t remote_addr; + uint16_t remote_port; + + uint64_t state; + uint64_t uid; + uint64_t inode; +}; + +uint32_t IP(const struct sockaddr* addr) { + auto* in_addr = reinterpret_cast<const struct sockaddr_in*>(addr); + return in_addr->sin_addr.s_addr; +} + +uint16_t Port(const struct sockaddr* addr) { + auto* in_addr = reinterpret_cast<const struct sockaddr_in*>(addr); + return ntohs(in_addr->sin_port); +} + +// Finds the first entry in 'entries' for which 'predicate' returns true. +// Returns true on match, and sets 'match' to point to the matching entry. +bool FindBy(std::vector<TCPEntry> entries, TCPEntry* match, + std::function<bool(const TCPEntry&)> predicate) { + for (int i = 0; i < entries.size(); ++i) { + if (predicate(entries[i])) { + *match = entries[i]; + return true; + } + } + return false; +} + +bool FindByLocalAddr(std::vector<TCPEntry> entries, TCPEntry* match, + const struct sockaddr* addr) { + uint32_t host = IP(addr); + uint16_t port = Port(addr); + return FindBy(entries, match, [host, port](const TCPEntry& e) { + return (e.local_addr == host && e.local_port == port); + }); +} + +bool FindByRemoteAddr(std::vector<TCPEntry> entries, TCPEntry* match, + const struct sockaddr* addr) { + uint32_t host = IP(addr); + uint16_t port = Port(addr); + return FindBy(entries, match, [host, port](const TCPEntry& e) { + return (e.remote_addr == host && e.remote_port == port); + }); +} + +// Returns a parsed representation of /proc/net/tcp entries. +PosixErrorOr<std::vector<TCPEntry>> ProcNetTCPEntries() { + std::string content; + RETURN_IF_ERRNO(GetContents("/proc/net/tcp", &content)); + + bool found_header = false; + std::vector<TCPEntry> entries; + std::vector<std::string> lines = StrSplit(content, '\n'); + std::cerr << "<contents of /proc/net/tcp>" << std::endl; + for (std::string line : lines) { + std::cerr << line << std::endl; + + if (!found_header) { + EXPECT_EQ(line, kProcNetTCPHeader); + found_header = true; + continue; + } + if (line.empty()) { + continue; + } + + // Parse a single entry from /proc/net/tcp. + // + // Example entries: + // + // clang-format off + // + // sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode + // 0: 00000000:006F 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 1968 1 0000000000000000 100 0 0 10 0 + // 1: 0100007F:7533 00000000:0000 0A 00000000:00000000 00:00000000 00000000 120 0 10684 1 0000000000000000 100 0 0 10 0 + // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 + // + // clang-format on + + TCPEntry entry; + std::vector<std::string> fields = + StrSplit(line, absl::ByAnyChar(": "), absl::SkipEmpty()); + + ASSIGN_OR_RETURN_ERRNO(entry.local_addr, AtoiBase(fields[1], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.local_port, AtoiBase(fields[2], 16)); + + ASSIGN_OR_RETURN_ERRNO(entry.remote_addr, AtoiBase(fields[3], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.remote_port, AtoiBase(fields[4], 16)); + + ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi<uint64_t>(fields[11])); + ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi<uint64_t>(fields[13])); + + entries.push_back(entry); + } + std::cerr << "<end of /proc/net/tcp>" << std::endl; + + return entries; +} + +TEST(ProcNetTCP, Exists) { + const std::string content = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/tcp")); + const std::string header_line = StrCat(kProcNetTCPHeader, "\n"); + if (IsRunningOnGvisor()) { + // Should be just the header since we don't have any tcp sockets yet. + EXPECT_EQ(content, header_line); + } else { + // On a general linux machine, we could have abitrary sockets on the system, + // so just check the header. + EXPECT_THAT(content, ::testing::StartsWith(header_line)); + } +} + +TEST(ProcNetTCP, EntryUID) { + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create()); + std::vector<TCPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries()); + TCPEntry e; + EXPECT_TRUE(FindByLocalAddr(entries, &e, sockets->first_addr())); + EXPECT_EQ(e.uid, geteuid()); + EXPECT_TRUE(FindByRemoteAddr(entries, &e, sockets->first_addr())); + EXPECT_EQ(e.uid, geteuid()); +} + +TEST(ProcNetTCP, BindAcceptConnect) { + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create()); + std::vector<TCPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries()); + // We can only make assertions about the total number of entries if we control + // the entire "machine". + if (IsRunningOnGvisor()) { + EXPECT_EQ(entries.size(), 2); + } + + TCPEntry e; + EXPECT_TRUE(FindByLocalAddr(entries, &e, sockets->first_addr())); + EXPECT_TRUE(FindByRemoteAddr(entries, &e, sockets->first_addr())); +} + +TEST(ProcNetTCP, InodeReasonable) { + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create()); + std::vector<TCPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries()); + + TCPEntry accepted_entry; + ASSERT_TRUE(FindByLocalAddr(entries, &accepted_entry, sockets->first_addr())); + EXPECT_NE(accepted_entry.inode, 0); + + TCPEntry client_entry; + ASSERT_TRUE(FindByRemoteAddr(entries, &client_entry, sockets->first_addr())); + EXPECT_NE(client_entry.inode, 0); + EXPECT_NE(accepted_entry.inode, client_entry.inode); +} + +TEST(ProcNetTCP, State) { + std::unique_ptr<FileDescriptor> server = + ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPUnboundSocket(0).Create()); + + auto test_addr = V4Loopback(); + ASSERT_THAT( + bind(server->get(), reinterpret_cast<struct sockaddr*>(&test_addr.addr), + test_addr.addr_len), + SyscallSucceeds()); + + struct sockaddr addr; + socklen_t addrlen = sizeof(struct sockaddr); + ASSERT_THAT(getsockname(server->get(), &addr, &addrlen), SyscallSucceeds()); + ASSERT_EQ(addrlen, sizeof(struct sockaddr)); + + ASSERT_THAT(listen(server->get(), 10), SyscallSucceeds()); + std::vector<TCPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries()); + TCPEntry listen_entry; + ASSERT_TRUE(FindByLocalAddr(entries, &listen_entry, &addr)); + EXPECT_EQ(listen_entry.state, TCP_LISTEN); + + std::unique_ptr<FileDescriptor> client = + ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPUnboundSocket(0).Create()); + ASSERT_THAT(connect(client->get(), &addr, addrlen), SyscallSucceeds()); + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries()); + ASSERT_TRUE(FindByLocalAddr(entries, &listen_entry, &addr)); + EXPECT_EQ(listen_entry.state, TCP_LISTEN); + TCPEntry client_entry; + ASSERT_TRUE(FindByRemoteAddr(entries, &client_entry, &addr)); + EXPECT_EQ(client_entry.state, TCP_ESTABLISHED); + + FileDescriptor accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(server->get(), nullptr, nullptr)); + + const uint32_t accepted_local_host = IP(&addr); + const uint16_t accepted_local_port = Port(&addr); + + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries()); + TCPEntry accepted_entry; + ASSERT_TRUE(FindBy(entries, &accepted_entry, + [client_entry, accepted_local_host, + accepted_local_port](const TCPEntry& e) { + return e.local_addr == accepted_local_host && + e.local_port == accepted_local_port && + e.remote_addr == client_entry.local_addr && + e.remote_port == client_entry.local_port; + })); + EXPECT_EQ(accepted_entry.state, TCP_ESTABLISHED); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/proc_net_unix.cc b/test/syscalls/linux/proc_net_unix.cc index 82d325c17..9b9be66ff 100644 --- a/test/syscalls/linux/proc_net_unix.cc +++ b/test/syscalls/linux/proc_net_unix.cc @@ -67,7 +67,7 @@ std::string ExtractPath(const struct sockaddr* addr) { // Abstract socket paths are null padded to the end of the struct // sockaddr. However, these null bytes may or may not show up in // /proc/net/unix depending on the kernel version. Truncate after the first - // null byte (by treating path as a c-std::string). + // null byte (by treating path as a c-string). return StrCat("@", &path[1]); } return std::string(path); @@ -80,7 +80,7 @@ PosixErrorOr<std::vector<UnixEntry>> ProcNetUnixEntries() { bool skipped_header = false; std::vector<UnixEntry> entries; - std::vector<std::string> lines = absl::StrSplit(content, absl::ByAnyChar("\n")); + std::vector<std::string> lines = absl::StrSplit(content, '\n'); std::cerr << "<contents of /proc/net/unix>" << std::endl; for (std::string line : lines) { // Emit the proc entry to the test output to provide context for the test @@ -123,7 +123,8 @@ PosixErrorOr<std::vector<UnixEntry>> ProcNetUnixEntries() { UnixEntry entry; // Process the first 6 fields, up to but not including "Inode". - std::vector<std::string> fields = absl::StrSplit(line, absl::MaxSplits(' ', 6)); + std::vector<std::string> fields = + absl::StrSplit(line, absl::MaxSplits(' ', 6)); if (fields.size() < 7) { return PosixError(EINVAL, StrFormat("Invalid entry: '%s'\n", line)); @@ -162,7 +163,7 @@ PosixErrorOr<std::vector<UnixEntry>> ProcNetUnixEntries() { // Finds the first entry in 'entries' for which 'predicate' returns true. // Returns true on match, and sets 'match' to point to the matching entry. bool FindBy(std::vector<UnixEntry> entries, UnixEntry* match, - std::function<bool(UnixEntry)> predicate) { + std::function<bool(const UnixEntry&)> predicate) { for (int i = 0; i < entries.size(); ++i) { if (predicate(entries[i])) { *match = entries[i]; @@ -174,7 +175,8 @@ bool FindBy(std::vector<UnixEntry> entries, UnixEntry* match, bool FindByPath(std::vector<UnixEntry> entries, UnixEntry* match, const std::string& path) { - return FindBy(entries, match, [path](UnixEntry e) { return e.path == path; }); + return FindBy(entries, match, + [path](const UnixEntry& e) { return e.path == path; }); } TEST(ProcNetUnix, Exists) { diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc index f926ac0f9..d1ab4703f 100644 --- a/test/syscalls/linux/pty.cc +++ b/test/syscalls/linux/pty.cc @@ -105,7 +105,7 @@ struct Field { uint64_t value; }; -// ParseFields returns a std::string representation of value, using the names in +// ParseFields returns a string representation of value, using the names in // fields. std::string ParseFields(const Field* fields, size_t len, uint64_t value) { bool first = true; diff --git a/test/syscalls/linux/raw_socket_icmp.cc b/test/syscalls/linux/raw_socket_icmp.cc index 24d9dc79a..7fe7c03a5 100644 --- a/test/syscalls/linux/raw_socket_icmp.cc +++ b/test/syscalls/linux/raw_socket_icmp.cc @@ -234,7 +234,7 @@ TEST_F(RawSocketICMPTest, MultipleSocketReceive) { } // A raw ICMP socket and ping socket should both receive the ICMP packets -// indended for the ping socket. +// intended for the ping socket. TEST_F(RawSocketICMPTest, RawAndPingSockets) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); diff --git a/test/syscalls/linux/readv_common.h b/test/syscalls/linux/readv_common.h index b16179fca..2fa40c35f 100644 --- a/test/syscalls/linux/readv_common.h +++ b/test/syscalls/linux/readv_common.h @@ -20,7 +20,7 @@ namespace gvisor { namespace testing { -// A NUL-terminated std::string containing the data used by tests using the following +// A NUL-terminated string containing the data used by tests using the following // test helpers. extern const char kReadvTestData[]; diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc index 2fbb3f4ef..e5d72e28a 100644 --- a/test/syscalls/linux/sendfile.cc +++ b/test/syscalls/linux/sendfile.cc @@ -135,7 +135,7 @@ TEST(SendFileTest, SendTriviallyWithBothFilesReadWrite) { TEST(SendFileTest, SendAndUpdateFileOffset) { // Create temp files. - // Test input std::string length must be > 2 AND even. + // Test input string length must be > 2 AND even. constexpr char kData[] = "The slings and arrows of outrageous fortune,"; constexpr int kDataSize = sizeof(kData) - 1; constexpr int kHalfDataSize = kDataSize / 2; @@ -180,7 +180,7 @@ TEST(SendFileTest, SendAndUpdateFileOffset) { TEST(SendFileTest, SendAndUpdateFileOffsetFromNonzeroStartingPoint) { // Create temp files. - // Test input std::string length must be > 2 AND divisible by 4. + // Test input string length must be > 2 AND divisible by 4. constexpr char kData[] = "The slings and arrows of outrageous fortune,"; constexpr int kDataSize = sizeof(kData) - 1; constexpr int kHalfDataSize = kDataSize / 2; @@ -233,7 +233,7 @@ TEST(SendFileTest, SendAndUpdateFileOffsetFromNonzeroStartingPoint) { TEST(SendFileTest, SendAndUpdateGivenOffset) { // Create temp files. - // Test input std::string length must be >= 4 AND divisible by 4. + // Test input string length must be >= 4 AND divisible by 4. constexpr char kData[] = "Or to take Arms against a Sea of troubles,"; constexpr int kDataSize = sizeof(kData) + 1; constexpr int kHalfDataSize = kDataSize / 2; diff --git a/test/syscalls/linux/socket_generic.cc b/test/syscalls/linux/socket_generic.cc index f99f3fe62..51d614639 100644 --- a/test/syscalls/linux/socket_generic.cc +++ b/test/syscalls/linux/socket_generic.cc @@ -21,6 +21,7 @@ #include "gtest/gtest.h" #include "gtest/gtest.h" +#include "absl/strings/str_format.h" #include "absl/strings/string_view.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" @@ -687,5 +688,54 @@ TEST_P(AllSocketPairTest, RecvTimeoutWaitAll) { EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); } +TEST_P(AllSocketPairTest, GetSockoptType) { + int type = GetParam().type; + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + for (const int fd : {sockets->first_fd(), sockets->second_fd()}) { + int opt; + socklen_t optlen = sizeof(opt); + EXPECT_THAT(getsockopt(fd, SOL_SOCKET, SO_TYPE, &opt, &optlen), + SyscallSucceeds()); + + // Type may have SOCK_NONBLOCK and SOCK_CLOEXEC ORed into it. Remove these + // before comparison. + type &= ~(SOCK_NONBLOCK | SOCK_CLOEXEC); + EXPECT_EQ(opt, type) << absl::StrFormat( + "getsockopt(%d, SOL_SOCKET, SO_TYPE, &opt, &optlen) => opt=%d was " + "unexpected", + fd, opt); + } +} + +TEST_P(AllSocketPairTest, GetSockoptDomain) { + const int domain = GetParam().domain; + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + for (const int fd : {sockets->first_fd(), sockets->second_fd()}) { + int opt; + socklen_t optlen = sizeof(opt); + EXPECT_THAT(getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &opt, &optlen), + SyscallSucceeds()); + EXPECT_EQ(opt, domain) << absl::StrFormat( + "getsockopt(%d, SOL_SOCKET, SO_DOMAIN, &opt, &optlen) => opt=%d was " + "unexpected", + fd, opt); + } +} + +TEST_P(AllSocketPairTest, GetSockoptProtocol) { + const int protocol = GetParam().protocol; + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + for (const int fd : {sockets->first_fd(), sockets->second_fd()}) { + int opt; + socklen_t optlen = sizeof(opt); + EXPECT_THAT(getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &opt, &optlen), + SyscallSucceeds()); + EXPECT_EQ(opt, protocol) << absl::StrFormat( + "getsockopt(%d, SOL_SOCKET, SO_PROTOCOL, &opt, &optlen) => opt=%d was " + "unexpected", + fd, opt); + } +} + } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index c8693225f..b5c38f27e 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -23,6 +23,7 @@ #include <vector> #include "gtest/gtest.h" +#include "absl/strings/str_format.h" #include "test/syscalls/linux/socket_netlink_util.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/cleanup.h" @@ -144,24 +145,56 @@ TEST(NetlinkRouteTest, GetPeerName) { EXPECT_EQ(addr.nl_pid, 0); } -using IntSockOptTest = ::testing::TestWithParam<int>; +// Parameters for GetSockOpt test. They are: +// 0: Socket option to query. +// 1: A predicate to run on the returned sockopt value. Should return true if +// the value is considered ok. +// 2: A description of what the sockopt value is expected to be. Should complete +// the sentence "<value> was unexpected, expected <description>" +using SockOptTest = ::testing::TestWithParam< + std::tuple<int, std::function<bool(int)>, std::string>>; + +TEST_P(SockOptTest, GetSockOpt) { + int sockopt = std::get<0>(GetParam()); + auto verifier = std::get<1>(GetParam()); + std::string verifier_description = std::get<2>(GetParam()); -TEST_P(IntSockOptTest, GetSockOpt) { FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)); int res; socklen_t len = sizeof(res); - EXPECT_THAT(getsockopt(fd.get(), SOL_SOCKET, GetParam(), &res, &len), + EXPECT_THAT(getsockopt(fd.get(), SOL_SOCKET, sockopt, &res, &len), SyscallSucceeds()); EXPECT_EQ(len, sizeof(res)); - EXPECT_GT(res, 0); + EXPECT_TRUE(verifier(res)) << absl::StrFormat( + "getsockopt(%d, SOL_SOCKET, %d, &res, &len) => res=%d was unexpected, " + "expected %s", + fd.get(), sockopt, res, verifier_description); +} + +std::function<bool(int)> IsPositive() { + return [](int val) { return val > 0; }; +} + +std::function<bool(int)> IsEqual(int target) { + return [target](int val) { return val == target; }; } -INSTANTIATE_TEST_SUITE_P(NetlinkRouteTest, IntSockOptTest, - ::testing::Values(SO_SNDBUF, SO_RCVBUF)); +INSTANTIATE_TEST_SUITE_P( + NetlinkRouteTest, SockOptTest, + ::testing::Values( + std::make_tuple(SO_SNDBUF, IsPositive(), "positive send buffer size"), + std::make_tuple(SO_RCVBUF, IsPositive(), + "positive receive buffer size"), + std::make_tuple(SO_TYPE, IsEqual(SOCK_RAW), + absl::StrFormat("SOCK_RAW (%d)", SOCK_RAW)), + std::make_tuple(SO_DOMAIN, IsEqual(AF_NETLINK), + absl::StrFormat("AF_NETLINK (%d)", AF_NETLINK)), + std::make_tuple(SO_PROTOCOL, IsEqual(NETLINK_ROUTE), + absl::StrFormat("NETLINK_ROUTE (%d)", NETLINK_ROUTE)))); // Validates the reponses to RTM_GETLINK + NLM_F_DUMP. void CheckGetLinkResponse(const struct nlmsghdr* hdr, int seq, int port) { diff --git a/test/syscalls/linux/socket_test_util.cc b/test/syscalls/linux/socket_test_util.cc index da69de37c..4f65cf5ae 100644 --- a/test/syscalls/linux/socket_test_util.cc +++ b/test/syscalls/linux/socket_test_util.cc @@ -457,7 +457,8 @@ Creator<SocketPair> UDPUnboundSocketPairCreator(int domain, int type, SocketPairKind Reversed(SocketPairKind const& base) { auto const& creator = base.creator; return SocketPairKind{ - absl::StrCat("reversed ", base.description), + absl::StrCat("reversed ", base.description), base.domain, base.type, + base.protocol, [creator]() -> PosixErrorOr<std::unique_ptr<ReversedSocketPair>> { ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator()); return absl::make_unique<ReversedSocketPair>(std::move(creator_value)); @@ -542,8 +543,8 @@ struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in6& addr) { SocketKind SimpleSocket(int fam, int type, int proto) { return SocketKind{ - absl::StrCat("Family ", fam, ", type ", type, ", proto ", proto), - SyscallSocketCreator(fam, type, proto)}; + absl::StrCat("Family ", fam, ", type ", type, ", proto ", proto), fam, + type, proto, SyscallSocketCreator(fam, type, proto)}; } ssize_t SendLargeSendMsg(const std::unique_ptr<SocketPair>& sockets, diff --git a/test/syscalls/linux/socket_test_util.h b/test/syscalls/linux/socket_test_util.h index 058313986..4fd59767a 100644 --- a/test/syscalls/linux/socket_test_util.h +++ b/test/syscalls/linux/socket_test_util.h @@ -287,6 +287,9 @@ Creator<FileDescriptor> UnboundSocketCreator(int domain, int type, // a function that creates such a socket pair. struct SocketPairKind { std::string description; + int domain; + int type; + int protocol; Creator<SocketPair> creator; // Create creates a socket pair of this kind. @@ -297,6 +300,9 @@ struct SocketPairKind { // a function that creates such a socket. struct SocketKind { std::string description; + int domain; + int type; + int protocol; Creator<FileDescriptor> creator; // Create creates a socket pair of this kind. @@ -353,6 +359,7 @@ Middleware SetSockOpt(int level, int optname, T* value) { return SocketPairKind{ absl::StrCat("setsockopt(", level, ", ", optname, ", ", *value, ") ", base.description), + base.domain, base.type, base.protocol, [creator, level, optname, value]() -> PosixErrorOr<std::unique_ptr<SocketPair>> { ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator()); diff --git a/test/syscalls/linux/socket_unix_cmsg.cc b/test/syscalls/linux/socket_unix_cmsg.cc index b0ab26847..1092e29b1 100644 --- a/test/syscalls/linux/socket_unix_cmsg.cc +++ b/test/syscalls/linux/socket_unix_cmsg.cc @@ -220,7 +220,7 @@ TEST_P(UnixSocketPairCmsgTest, BasicFDPassNoSpaceMsgCtrunc) { // BasicFDPassNullControlMsgCtrunc sends an FD and sets contradictory values for // msg_controllen and msg_control. msg_controllen is set to the correct size to -// accomidate the FD, but msg_control is set to NULL. In this case, msg_control +// accommodate the FD, but msg_control is set to NULL. In this case, msg_control // should override msg_controllen. TEST_P(UnixSocketPairCmsgTest, BasicFDPassNullControlMsgCtrunc) { // FIXME(gvisor.dev/issue/207): Fix handling of NULL msg_control. @@ -531,7 +531,7 @@ TEST_P(UnixSocketPairCmsgTest, FDPassInterspersed1) { } // FDPassInterspersed2 checks that sent control messages cannot be read after -// their assocated data has been read while ignoring the control message by +// their associated data has been read while ignoring the control message by // using read(2) instead of recvmsg(2). TEST_P(UnixSocketPairCmsgTest, FDPassInterspersed2) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); diff --git a/test/syscalls/linux/socket_unix_unbound_stream.cc b/test/syscalls/linux/socket_unix_unbound_stream.cc index 091d546b3..e483d2777 100644 --- a/test/syscalls/linux/socket_unix_unbound_stream.cc +++ b/test/syscalls/linux/socket_unix_unbound_stream.cc @@ -29,7 +29,7 @@ namespace { using UnixStreamSocketPairTest = SocketPairTest; // FDPassPartialRead checks that sent control messages cannot be read after -// any of their assocated data has been read while ignoring the control message +// any of their associated data has been read while ignoring the control message // by using read(2) instead of recvmsg(2). TEST_P(UnixStreamSocketPairTest, FDPassPartialRead) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc index 80ba67496..510f7bee5 100644 --- a/test/syscalls/linux/stat.cc +++ b/test/syscalls/linux/stat.cc @@ -16,7 +16,9 @@ #include <fcntl.h> #include <sys/stat.h> #include <sys/statfs.h> +#include <sys/types.h> #include <unistd.h> + #include <string> #include <vector> @@ -554,6 +556,103 @@ TEST(SimpleStatTest, AnonDeviceAllocatesUniqueInodesAcrossSaveRestore) { EXPECT_EQ(st2_after.st_ino, st2.st_ino); } +#ifndef SYS_statx +#if defined(__x86_64__) +#define SYS_statx 332 +#else +#error "Unknown architecture" +#endif +#endif // SYS_statx + +#ifndef STATX_ALL +#define STATX_ALL 0x00000fffU +#endif // STATX_ALL + +// struct kernel_statx_timestamp is a Linux statx_timestamp struct. +struct kernel_statx_timestamp { + int64_t tv_sec; + uint32_t tv_nsec; + int32_t __reserved; +}; + +// struct kernel_statx is a Linux statx struct. Old versions of glibc do not +// expose it. See include/uapi/linux/stat.h +struct kernel_statx { + uint32_t stx_mask; + uint32_t stx_blksize; + uint64_t stx_attributes; + uint32_t stx_nlink; + uint32_t stx_uid; + uint32_t stx_gid; + uint16_t stx_mode; + uint16_t __spare0[1]; + uint64_t stx_ino; + uint64_t stx_size; + uint64_t stx_blocks; + uint64_t stx_attributes_mask; + struct kernel_statx_timestamp stx_atime; + struct kernel_statx_timestamp stx_btime; + struct kernel_statx_timestamp stx_ctime; + struct kernel_statx_timestamp stx_mtime; + uint32_t stx_rdev_major; + uint32_t stx_rdev_minor; + uint32_t stx_dev_major; + uint32_t stx_dev_minor; + uint64_t __spare2[14]; +}; + +int statx(int dirfd, const char *pathname, int flags, unsigned int mask, + struct kernel_statx *statxbuf) { + return syscall(SYS_statx, dirfd, pathname, flags, mask, statxbuf); +} + +TEST_F(StatTest, StatxAbsPath) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 && + errno == ENOSYS); + + struct kernel_statx stx; + EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxRelPathDirFD) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 && + errno == ENOSYS); + + struct kernel_statx stx; + auto const dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY)); + auto filename = std::string(Basename(test_file_name_)); + + EXPECT_THAT(statx(dirfd.get(), filename.c_str(), 0, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxRelPathCwd) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 && + errno == ENOSYS); + + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + auto filename = std::string(Basename(test_file_name_)); + struct kernel_statx stx; + EXPECT_THAT(statx(AT_FDCWD, filename.c_str(), 0, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxEmptyPath) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 && + errno == ENOSYS); + + const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); + struct kernel_statx stx; + EXPECT_THAT(statx(fd.get(), "", AT_EMPTY_PATH, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/symlink.cc b/test/syscalls/linux/symlink.cc index 494072a9b..69650a1d3 100644 --- a/test/syscalls/linux/symlink.cc +++ b/test/syscalls/linux/symlink.cc @@ -272,6 +272,90 @@ TEST(SymlinkTest, ChmodSymlink) { EXPECT_EQ(FilePermission(newpath), 0777); } +class ParamSymlinkTest : public ::testing::TestWithParam<std::string> {}; + +// Test that creating an existing symlink with creat will create the target. +TEST_P(ParamSymlinkTest, CreatLinkCreatesTarget) { + const std::string target = GetParam(); + const std::string linkpath = NewTempAbsPath(); + + ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds()); + + int fd; + EXPECT_THAT(fd = creat(linkpath.c_str(), 0666), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + struct stat st; + EXPECT_THAT(stat(target.c_str(), &st), SyscallSucceeds()); + + ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds()); + ASSERT_THAT(unlink(target.c_str()), SyscallSucceeds()); +} + +// Test that opening an existing symlink with O_CREAT will create the target. +TEST_P(ParamSymlinkTest, OpenLinkCreatesTarget) { + const std::string target = GetParam(); + const std::string linkpath = NewTempAbsPath(); + + ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds()); + + int fd; + EXPECT_THAT(fd = open(linkpath.c_str(), O_CREAT, 0666), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + struct stat st; + EXPECT_THAT(stat(target.c_str(), &st), SyscallSucceeds()); + + ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds()); + ASSERT_THAT(unlink(target.c_str()), SyscallSucceeds()); +} + +// Test that opening a self-symlink with O_CREAT will fail with ELOOP. +TEST_P(ParamSymlinkTest, CreateExistingSelfLink) { + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + + const std::string linkpath = GetParam(); + ASSERT_THAT(symlink(linkpath.c_str(), linkpath.c_str()), SyscallSucceeds()); + + EXPECT_THAT(open(linkpath.c_str(), O_CREAT, 0666), + SyscallFailsWithErrno(ELOOP)); + + ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds()); +} + +// Test that opening an existing symlink with O_CREAT|O_EXCL will fail with +// EEXIST. +TEST_P(ParamSymlinkTest, OpenLinkExclFails) { + const std::string target = GetParam(); + const std::string linkpath = NewTempAbsPath(); + + ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds()); + + EXPECT_THAT(open(linkpath.c_str(), O_CREAT | O_EXCL, 0666), + SyscallFailsWithErrno(EEXIST)); + + ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds()); +} + +// Test that opening an existing symlink with O_CREAT|O_NOFOLLOW will fail with +// ELOOP. +TEST_P(ParamSymlinkTest, OpenLinkNoFollowFails) { + const std::string target = GetParam(); + const std::string linkpath = NewTempAbsPath(); + + ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds()); + + EXPECT_THAT(open(linkpath.c_str(), O_CREAT | O_NOFOLLOW, 0666), + SyscallFailsWithErrno(ELOOP)); + + ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds()); +} + +INSTANTIATE_TEST_SUITE_P(AbsAndRelTarget, ParamSymlinkTest, + ::testing::Values(NewTempAbsPath(), NewTempRelPath())); + } // namespace } // namespace testing diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index e95b644ac..4597e91e3 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -265,10 +265,16 @@ TEST_P(TcpSocketTest, BlockingLargeWrite_NoRandomSave) { ScopedThread t([this, &read_bytes]() { // Avoid interrupting the blocking write in main thread. const DisableSave ds; + + // Take ownership of the FD so that we close it on failure. This will + // unblock the blocking write below. + FileDescriptor fd(t_); + t_ = -1; + char readbuf[2500] = {}; int n = -1; while (n != 0) { - EXPECT_THAT(n = RetryEINTR(read)(t_, &readbuf, sizeof(readbuf)), + ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)), SyscallSucceeds()); read_bytes += n; } @@ -342,10 +348,16 @@ TEST_P(TcpSocketTest, BlockingLargeSend_NoRandomSave) { ScopedThread t([this, &read_bytes]() { // Avoid interrupting the blocking write in main thread. const DisableSave ds; + + // Take ownership of the FD so that we close it on failure. This will + // unblock the blocking write below. + FileDescriptor fd(t_); + t_ = -1; + char readbuf[2500] = {}; int n = -1; while (n != 0) { - EXPECT_THAT(n = RetryEINTR(read)(t_, &readbuf, sizeof(readbuf)), + ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)), SyscallSucceeds()); read_bytes += n; } diff --git a/test/syscalls/linux/udp_socket.cc b/test/syscalls/linux/udp_socket.cc index 31db8a2ad..1bb0307c4 100644 --- a/test/syscalls/linux/udp_socket.cc +++ b/test/syscalls/linux/udp_socket.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include <arpa/inet.h> #include <fcntl.h> #include <linux/errqueue.h> #include <netinet/in.h> @@ -304,12 +305,50 @@ TEST_P(UdpSocketTest, ReceiveAfterConnect) { SyscallSucceedsWithValue(sizeof(buf))); // Receive the data. - char received[512]; + char received[sizeof(buf)]; EXPECT_THAT(recv(s_, received, sizeof(received), 0), SyscallSucceedsWithValue(sizeof(received))); EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); } +TEST_P(UdpSocketTest, ReceiveAfterDisconnect) { + // Connect s_ to loopback:TestPort, and bind t_ to loopback:TestPort. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(bind(t_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(t_, addr_[1], addrlen_), SyscallSucceeds()); + + // Get the address s_ was bound to during connect. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + + for (int i = 0; i < 2; i++) { + // Send from t_ to s_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, + reinterpret_cast<sockaddr*>(&addr), addrlen), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data. + char received[sizeof(buf)]; + EXPECT_THAT(recv(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); + + // Disconnect s_. + struct sockaddr addr = {}; + addr.sa_family = AF_UNSPEC; + ASSERT_THAT(connect(s_, &addr, sizeof(addr.sa_family)), SyscallSucceeds()); + // Connect s_ loopback:TestPort. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + } +} + TEST_P(UdpSocketTest, Connect) { ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); @@ -335,6 +374,112 @@ TEST_P(UdpSocketTest, Connect) { EXPECT_EQ(memcmp(&peer, addr_[2], addrlen_), 0); } +TEST_P(UdpSocketTest, DisconnectAfterBind) { + ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds()); + // Connect the socket. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + struct sockaddr_storage addr = {}; + addr.ss_family = AF_UNSPEC; + EXPECT_THAT( + connect(s_, reinterpret_cast<sockaddr*>(&addr), sizeof(addr.ss_family)), + SyscallSucceeds()); + + // Check that we're still bound. + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, addr_[1], addrlen_), 0); + + addrlen = sizeof(addr); + EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(UdpSocketTest, DisconnectAfterBindToAny) { + struct sockaddr_storage baddr = {}; + socklen_t addrlen; + auto port = *Port(reinterpret_cast<struct sockaddr_storage*>(addr_[1])); + if (addr_[0]->sa_family == AF_INET) { + auto addr_in = reinterpret_cast<struct sockaddr_in*>(&baddr); + addr_in->sin_family = AF_INET; + addr_in->sin_port = port; + inet_pton(AF_INET, "0.0.0.0", + reinterpret_cast<void*>(&addr_in->sin_addr.s_addr)); + } else { + auto addr_in = reinterpret_cast<struct sockaddr_in6*>(&baddr); + addr_in->sin6_family = AF_INET6; + addr_in->sin6_port = port; + inet_pton(AF_INET6, + "::", reinterpret_cast<void*>(&addr_in->sin6_addr.s6_addr)); + addr_in->sin6_scope_id = 0; + } + ASSERT_THAT(bind(s_, reinterpret_cast<sockaddr*>(&baddr), addrlen_), + SyscallSucceeds()); + // Connect the socket. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + struct sockaddr_storage addr = {}; + addr.ss_family = AF_UNSPEC; + EXPECT_THAT( + connect(s_, reinterpret_cast<sockaddr*>(&addr), sizeof(addr.ss_family)), + SyscallSucceeds()); + + // Check that we're still bound. + addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, &baddr, addrlen), 0); + + addrlen = sizeof(addr); + EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(UdpSocketTest, Disconnect) { + for (int i = 0; i < 2; i++) { + // Try to connect again. + EXPECT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); + + // Check that we're connected to the right peer. + struct sockaddr_storage peer; + socklen_t peerlen = sizeof(peer); + EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&peer), &peerlen), + SyscallSucceeds()); + EXPECT_EQ(peerlen, addrlen_); + EXPECT_EQ(memcmp(&peer, addr_[2], addrlen_), 0); + + // Try to disconnect. + struct sockaddr_storage addr = {}; + addr.ss_family = AF_UNSPEC; + EXPECT_THAT( + connect(s_, reinterpret_cast<sockaddr*>(&addr), sizeof(addr.ss_family)), + SyscallSucceeds()); + + peerlen = sizeof(peer); + EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&peer), &peerlen), + SyscallFailsWithErrno(ENOTCONN)); + + // Check that we're still bound. + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(*Port(&addr), 0); + } +} + +TEST_P(UdpSocketTest, ConnectBadAddress) { + struct sockaddr addr = {}; + addr.sa_family = addr_[0]->sa_family; + ASSERT_THAT(connect(s_, &addr, sizeof(addr.sa_family)), + SyscallFailsWithErrno(EINVAL)); +} + TEST_P(UdpSocketTest, SendToAddressOtherThanConnected) { ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); @@ -397,7 +542,7 @@ TEST_P(UdpSocketTest, SendAndReceiveNotConnected) { SyscallSucceedsWithValue(sizeof(buf))); // Receive the data. - char received[512]; + char received[sizeof(buf)]; EXPECT_THAT(recv(s_, received, sizeof(received), 0), SyscallSucceedsWithValue(sizeof(received))); EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); @@ -419,7 +564,7 @@ TEST_P(UdpSocketTest, SendAndReceiveConnected) { SyscallSucceedsWithValue(sizeof(buf))); // Receive the data. - char received[512]; + char received[sizeof(buf)]; EXPECT_THAT(recv(s_, received, sizeof(received), 0), SyscallSucceedsWithValue(sizeof(received))); EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); @@ -462,7 +607,7 @@ TEST_P(UdpSocketTest, ReceiveBeforeConnect) { ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); // Receive the data. It works because it was sent before the connect. - char received[512]; + char received[sizeof(buf)]; EXPECT_THAT(recv(s_, received, sizeof(received), 0), SyscallSucceedsWithValue(sizeof(received))); EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); @@ -491,7 +636,7 @@ TEST_P(UdpSocketTest, ReceiveFrom) { SyscallSucceedsWithValue(sizeof(buf))); // Receive the data and sender address. - char received[512]; + char received[sizeof(buf)]; struct sockaddr_storage addr; socklen_t addrlen = sizeof(addr); EXPECT_THAT(recvfrom(s_, received, sizeof(received), 0, diff --git a/test/syscalls/linux/unix_domain_socket_test_util.cc b/test/syscalls/linux/unix_domain_socket_test_util.cc index 6f49e3660..7fb9eed8d 100644 --- a/test/syscalls/linux/unix_domain_socket_test_util.cc +++ b/test/syscalls/linux/unix_domain_socket_test_util.cc @@ -47,38 +47,40 @@ std::string DescribeUnixDomainSocketType(int type) { } SocketPairKind UnixDomainSocketPair(int type) { - return SocketPairKind{DescribeUnixDomainSocketType(type), + return SocketPairKind{DescribeUnixDomainSocketType(type), AF_UNIX, type, 0, SyscallSocketPairCreator(AF_UNIX, type, 0)}; } SocketPairKind FilesystemBoundUnixDomainSocketPair(int type) { std::string description = absl::StrCat(DescribeUnixDomainSocketType(type), - " created with filesystem binding"); + " created with filesystem binding"); if ((type & SOCK_DGRAM) == SOCK_DGRAM) { return SocketPairKind{ - description, + description, AF_UNIX, type, 0, FilesystemBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)}; } return SocketPairKind{ - description, FilesystemAcceptBindSocketPairCreator(AF_UNIX, type, 0)}; + description, AF_UNIX, type, 0, + FilesystemAcceptBindSocketPairCreator(AF_UNIX, type, 0)}; } SocketPairKind AbstractBoundUnixDomainSocketPair(int type) { - std::string description = absl::StrCat(DescribeUnixDomainSocketType(type), - " created with abstract namespace binding"); + std::string description = + absl::StrCat(DescribeUnixDomainSocketType(type), + " created with abstract namespace binding"); if ((type & SOCK_DGRAM) == SOCK_DGRAM) { return SocketPairKind{ - description, + description, AF_UNIX, type, 0, AbstractBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)}; } - return SocketPairKind{description, + return SocketPairKind{description, AF_UNIX, type, 0, AbstractAcceptBindSocketPairCreator(AF_UNIX, type, 0)}; } SocketPairKind SocketpairGoferUnixDomainSocketPair(int type) { std::string description = absl::StrCat(DescribeUnixDomainSocketType(type), - " created with the socketpair gofer"); - return SocketPairKind{description, + " created with the socketpair gofer"); + return SocketPairKind{description, AF_UNIX, type, 0, SocketpairGoferSocketPairCreator(AF_UNIX, type, 0)}; } @@ -87,13 +89,15 @@ SocketPairKind SocketpairGoferFileSocketPair(int type) { absl::StrCat(((type & O_NONBLOCK) != 0) ? "non-blocking " : "", ((type & O_CLOEXEC) != 0) ? "close-on-exec " : "", "file socket created with the socketpair gofer"); - return SocketPairKind{description, + // The socketpair gofer always creates SOCK_STREAM sockets on open(2). + return SocketPairKind{description, AF_UNIX, SOCK_STREAM, 0, SocketpairGoferFileSocketPairCreator(type)}; } SocketPairKind FilesystemUnboundUnixDomainSocketPair(int type) { return SocketPairKind{absl::StrCat(DescribeUnixDomainSocketType(type), " unbound with a filesystem address"), + AF_UNIX, type, 0, FilesystemUnboundSocketPairCreator(AF_UNIX, type, 0)}; } @@ -101,7 +105,7 @@ SocketPairKind AbstractUnboundUnixDomainSocketPair(int type) { return SocketPairKind{ absl::StrCat(DescribeUnixDomainSocketType(type), " unbound with an abstract namespace address"), - AbstractUnboundSocketPairCreator(AF_UNIX, type, 0)}; + AF_UNIX, type, 0, AbstractUnboundSocketPairCreator(AF_UNIX, type, 0)}; } void SendSingleFD(int sock, int fd, char buf[], int buf_size) { diff --git a/test/syscalls/linux/unix_domain_socket_test_util.h b/test/syscalls/linux/unix_domain_socket_test_util.h index aae990245..5eca0b7f0 100644 --- a/test/syscalls/linux/unix_domain_socket_test_util.h +++ b/test/syscalls/linux/unix_domain_socket_test_util.h @@ -21,7 +21,7 @@ namespace gvisor { namespace testing { -// DescribeUnixDomainSocketType returns a human-readable std::string explaining the +// DescribeUnixDomainSocketType returns a human-readable string explaining the // given Unix domain socket type. std::string DescribeUnixDomainSocketType(int type); @@ -40,7 +40,7 @@ SocketPairKind FilesystemBoundUnixDomainSocketPair(int type); SocketPairKind AbstractBoundUnixDomainSocketPair(int type); // SocketpairGoferUnixDomainSocketPair returns a SocketPairKind that was created -// with two sockets conected to the socketpair gofer. +// with two sockets connected to the socketpair gofer. SocketPairKind SocketpairGoferUnixDomainSocketPair(int type); // SocketpairGoferFileSocketPair returns a SocketPairKind that was created with diff --git a/test/syscalls/syscall_test_runner.go b/test/syscalls/syscall_test_runner.go index 476248184..5936d66ff 100644 --- a/test/syscalls/syscall_test_runner.go +++ b/test/syscalls/syscall_test_runner.go @@ -31,10 +31,10 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/specutils" - "gvisor.googlesource.com/gvisor/runsc/test/testutil" - "gvisor.googlesource.com/gvisor/test/syscalls/gtest" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/test/syscalls/gtest" ) // Location of syscall tests, relative to the repo root. diff --git a/test/util/capability_util.h b/test/util/capability_util.h index e968a2583..bb9ea1fe5 100644 --- a/test/util/capability_util.h +++ b/test/util/capability_util.h @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Utilities for testing capabilties. +// Utilities for testing capabilities. #ifndef GVISOR_TEST_UTIL_CAPABILITY_UTIL_H_ #define GVISOR_TEST_UTIL_CAPABILITY_UTIL_H_ diff --git a/test/util/fs_util.cc b/test/util/fs_util.cc index bc90bd78e..ae49725a0 100644 --- a/test/util/fs_util.cc +++ b/test/util/fs_util.cc @@ -290,7 +290,7 @@ PosixError WalkTree( } PosixErrorOr<std::vector<std::string>> ListDir(absl::string_view abspath, - bool skipdots) { + bool skipdots) { std::vector<std::string> files; DIR* dir = opendir(std::string(abspath).c_str()); @@ -381,7 +381,7 @@ PosixError RecursivelyCreateDir(absl::string_view path) { // Makes a path absolute with respect to an optional base. If no base is // provided it will use the current working directory. PosixErrorOr<std::string> MakeAbsolute(absl::string_view filename, - absl::string_view base) { + absl::string_view base) { if (filename.empty()) { return PosixError(EINVAL, "filename cannot be empty."); } @@ -494,7 +494,7 @@ std::string CleanPath(const absl::string_view unclean_path) { } PosixErrorOr<std::string> GetRelativePath(absl::string_view source, - absl::string_view dest) { + absl::string_view dest) { if (!absl::StartsWith(source, "/") || !absl::StartsWith(dest, "/")) { // At least one of the inputs is not an absolute path. return PosixError( diff --git a/test/util/fs_util.h b/test/util/fs_util.h index eb7cdaa24..3969f8309 100644 --- a/test/util/fs_util.h +++ b/test/util/fs_util.h @@ -61,7 +61,7 @@ PosixError SetContents(absl::string_view path, absl::string_view contents); PosixError CreateWithContents(absl::string_view path, absl::string_view contents, int mode = 0666); -// Attempts to read the entire contents of the file into the provided std::string +// Attempts to read the entire contents of the file into the provided string // buffer or returns an error. PosixError GetContents(absl::string_view path, std::string* output); @@ -69,7 +69,7 @@ PosixError GetContents(absl::string_view path, std::string* output); PosixErrorOr<std::string> GetContents(absl::string_view path); // Attempts to read the entire contents of the provided fd into the provided -// std::string or returns an error. +// string or returns an error. PosixError GetContentsFD(int fd, std::string* output); // Attempts to read the entire contents of the provided fd or returns an error. @@ -94,7 +94,7 @@ PosixError WalkTree( // method does not walk the tree recursively it only returns the elements // in that directory. PosixErrorOr<std::vector<std::string>> ListDir(absl::string_view abspath, - bool skipdots); + bool skipdots); // Attempt to recursively delete a directory or file. Returns an error and // the number of undeleted directories and files. If either @@ -108,20 +108,20 @@ PosixError RecursivelyCreateDir(absl::string_view path); // Makes a path absolute with respect to an optional base. If no base is // provided it will use the current working directory. PosixErrorOr<std::string> MakeAbsolute(absl::string_view filename, - absl::string_view base); + absl::string_view base); // Generates a relative path from the source directory to the destination // (dest) file or directory. This uses ../ when necessary for destinations // which are not nested within the source. Both source and dest are required -// to be absolute paths, and an empty std::string will be returned if they are not. +// to be absolute paths, and an empty string will be returned if they are not. PosixErrorOr<std::string> GetRelativePath(absl::string_view source, - absl::string_view dest); + absl::string_view dest); // Returns the part of the path before the final "/", EXCEPT: // * If there is a single leading "/" in the path, the result will be the // leading "/". // * If there is no "/" in the path, the result is the empty prefix of the -// input std::string. +// input string. absl::string_view Dirname(absl::string_view path); // Return the parts of the path, split on the final "/". If there is no @@ -135,7 +135,7 @@ std::pair<absl::string_view, absl::string_view> SplitPath( // "/" in the path, the result is the same as the input. // Note that this function's behavior differs from the Unix basename // command if path ends with "/". For such paths, this function returns the -// empty std::string. +// empty string. absl::string_view Basename(absl::string_view path); // Collapse duplicate "/"s, resolve ".." and "." path elements, remove @@ -144,7 +144,7 @@ absl::string_view Basename(absl::string_view path); // NOTE: This respects relative vs. absolute paths, but does not // invoke any system calls (getcwd(2)) in order to resolve relative // paths wrt actual working directory. That is, this is purely a -// std::string manipulation, completely independent of process state. +// string manipulation, completely independent of process state. std::string CleanPath(absl::string_view path); // Returns the full path to the executable of the given pid or a PosixError. @@ -174,7 +174,7 @@ inline std::string JoinPath(absl::string_view path) { std::string JoinPath(absl::string_view path1, absl::string_view path2); template <typename... T> inline std::string JoinPath(absl::string_view path1, absl::string_view path2, - absl::string_view path3, const T&... args) { + absl::string_view path3, const T&... args) { return internal::JoinPathImpl({path1, path2, path3, args...}); } } // namespace testing diff --git a/test/util/fs_util_test.cc b/test/util/fs_util_test.cc index 4e12076a1..2a200320a 100644 --- a/test/util/fs_util_test.cc +++ b/test/util/fs_util_test.cc @@ -29,7 +29,8 @@ namespace { TEST(FsUtilTest, RecursivelyCreateDirManualDelete) { const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); - const std::string base_path = JoinPath(root.path(), "/a/b/c/d/e/f/g/h/i/j/k/l/m"); + const std::string base_path = + JoinPath(root.path(), "/a/b/c/d/e/f/g/h/i/j/k/l/m"); ASSERT_THAT(Exists(base_path), IsPosixErrorOkAndHolds(false)); ASSERT_NO_ERRNO(RecursivelyCreateDir(base_path)); @@ -48,7 +49,8 @@ TEST(FsUtilTest, RecursivelyCreateDirManualDelete) { TEST(FsUtilTest, RecursivelyCreateAndDeleteDir) { const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); - const std::string base_path = JoinPath(root.path(), "/a/b/c/d/e/f/g/h/i/j/k/l/m"); + const std::string base_path = + JoinPath(root.path(), "/a/b/c/d/e/f/g/h/i/j/k/l/m"); ASSERT_THAT(Exists(base_path), IsPosixErrorOkAndHolds(false)); ASSERT_NO_ERRNO(RecursivelyCreateDir(base_path)); @@ -60,7 +62,8 @@ TEST(FsUtilTest, RecursivelyCreateAndDeleteDir) { TEST(FsUtilTest, RecursivelyCreateAndDeletePartial) { const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); - const std::string base_path = JoinPath(root.path(), "/a/b/c/d/e/f/g/h/i/j/k/l/m"); + const std::string base_path = + JoinPath(root.path(), "/a/b/c/d/e/f/g/h/i/j/k/l/m"); ASSERT_THAT(Exists(base_path), IsPosixErrorOkAndHolds(false)); ASSERT_NO_ERRNO(RecursivelyCreateDir(base_path)); diff --git a/test/util/logging.cc b/test/util/logging.cc index cc71d77b0..5d5e76c46 100644 --- a/test/util/logging.cc +++ b/test/util/logging.cc @@ -50,7 +50,7 @@ int WriteNumber(int fd, uint32_t val) { constexpr int kBufferSize = 11; char buf[kBufferSize]; - // Convert the number to std::string. + // Convert the number to string. char* s = buf + sizeof(buf) - 1; size_t size = 0; diff --git a/test/util/memory_util.h b/test/util/memory_util.h index 8c77778ea..190c469b5 100644 --- a/test/util/memory_util.h +++ b/test/util/memory_util.h @@ -118,6 +118,17 @@ inline PosixErrorOr<Mapping> MmapAnon(size_t length, int prot, int flags) { return Mmap(nullptr, length, prot, flags | MAP_ANONYMOUS, -1, 0); } +// Returns true if the page containing addr is mapped. +inline bool IsMapped(uintptr_t addr) { + int const rv = msync(reinterpret_cast<void*>(addr & ~(kPageSize - 1)), + kPageSize, MS_ASYNC); + if (rv == 0) { + return true; + } + TEST_PCHECK_MSG(errno == ENOMEM, "msync failed with unexpected errno"); + return false; +} + } // namespace testing } // namespace gvisor diff --git a/test/util/mount_util.h b/test/util/mount_util.h index 7782e6bf2..38ec6c8a1 100644 --- a/test/util/mount_util.h +++ b/test/util/mount_util.h @@ -30,9 +30,11 @@ namespace testing { // Mount mounts the filesystem, and unmounts when the returned reference is // destroyed. -inline PosixErrorOr<Cleanup> Mount(const std::string &source, const std::string &target, +inline PosixErrorOr<Cleanup> Mount(const std::string &source, + const std::string &target, const std::string &fstype, uint64_t mountflags, - const std::string &data, uint64_t umountflags) { + const std::string &data, + uint64_t umountflags) { if (mount(source.c_str(), target.c_str(), fstype.c_str(), mountflags, data.c_str()) == -1) { return PosixError(errno, "mount failed"); diff --git a/test/util/posix_error.h b/test/util/posix_error.h index b604f4f8f..ad666bce0 100644 --- a/test/util/posix_error.h +++ b/test/util/posix_error.h @@ -51,7 +51,7 @@ class ABSL_MUST_USE_RESULT PosixError { std::string error_message() const { return msg_; } - // ToString produces a full std::string representation of this posix error + // ToString produces a full string representation of this posix error // including the printable representation of the errno and the error message. std::string ToString() const; diff --git a/test/util/proc_util.cc b/test/util/proc_util.cc index 9d4db37c3..75b24da37 100644 --- a/test/util/proc_util.cc +++ b/test/util/proc_util.cc @@ -76,7 +76,8 @@ PosixErrorOr<ProcMapsEntry> ParseProcMapsLine(absl::string_view line) { if (parts.size() == 6) { // A filename is present. However, absl::StrSplit retained the whitespace // between the inode number and the filename. - map_entry.filename = std::string(absl::StripLeadingAsciiWhitespace(parts[5])); + map_entry.filename = + std::string(absl::StripLeadingAsciiWhitespace(parts[5])); } return map_entry; diff --git a/test/util/temp_path.cc b/test/util/temp_path.cc index de7c04a6f..35aacb172 100644 --- a/test/util/temp_path.cc +++ b/test/util/temp_path.cc @@ -70,13 +70,16 @@ std::string NewTempAbsPathInDir(absl::string_view const dir) { return JoinPath(dir, NextTempBasename()); } -std::string NewTempAbsPath() { return NewTempAbsPathInDir(GetAbsoluteTestTmpdir()); } +std::string NewTempAbsPath() { + return NewTempAbsPathInDir(GetAbsoluteTestTmpdir()); +} std::string NewTempRelPath() { return NextTempBasename(); } std::string GetAbsoluteTestTmpdir() { char* env_tmpdir = getenv("TEST_TMPDIR"); - std::string tmp_dir = env_tmpdir != nullptr ? std::string(env_tmpdir) : "/tmp"; + std::string tmp_dir = + env_tmpdir != nullptr ? std::string(env_tmpdir) : "/tmp"; return MakeAbsolute(tmp_dir, "").ValueOrDie(); } diff --git a/test/util/temp_path.h b/test/util/temp_path.h index 89302e0fd..92d669503 100644 --- a/test/util/temp_path.h +++ b/test/util/temp_path.h @@ -30,7 +30,7 @@ namespace testing { // Distinct calls to NewTempAbsPathInDir from the same process, even from // multiple threads, are guaranteed to return different paths. Distinct calls to // NewTempAbsPathInDir from different processes are not synchronized. -std::string NewTempAbsPathInDir(absl::string_view base); +std::string NewTempAbsPathInDir(absl::string_view const dir); // Like NewTempAbsPathInDir, but the returned path is in the test's temporary // directory, as provided by the testing framework. @@ -105,7 +105,7 @@ class TempPath { // Changes the path this TempPath represents. If the TempPath already // represented a path, deletes and returns that path. Otherwise returns the - // empty std::string. + // empty string. std::string reset(std::string newpath); std::string reset() { return reset(""); } diff --git a/test/util/test_util.cc b/test/util/test_util.cc index bf0029951..4fe15afaa 100644 --- a/test/util/test_util.cc +++ b/test/util/test_util.cc @@ -73,7 +73,7 @@ Platform GvisorPlatform() { CPUVendor GetCPUVendor() { uint32_t eax, ebx, ecx, edx; std::string vendor_str; - // Get vendor std::string (issue CPUID with eax = 0) + // Get vendor string (issue CPUID with eax = 0) GETCPUID(eax, ebx, ecx, edx, 0, 0); vendor_str.append(reinterpret_cast<char*>(&ebx), 4); vendor_str.append(reinterpret_cast<char*>(&edx), 4); @@ -93,7 +93,8 @@ bool operator==(const KernelVersion& first, const KernelVersion& second) { PosixErrorOr<KernelVersion> ParseKernelVersion(absl::string_view vers_str) { KernelVersion version = {}; - std::vector<std::string> values = absl::StrSplit(vers_str, absl::ByAnyChar(".-")); + std::vector<std::string> values = + absl::StrSplit(vers_str, absl::ByAnyChar(".-")); if (values.size() == 2) { ASSIGN_OR_RETURN_ERRNO(version.major, Atoi<int>(values[0])); ASSIGN_OR_RETURN_ERRNO(version.minor, Atoi<int>(values[1])); @@ -230,6 +231,7 @@ void TestInit(int* argc, char*** argv) { sa.sa_handler = SIG_IGN; TEST_CHECK(sigaction(SIGPIPE, &sa, nullptr) == 0); } + gvisor:case-end } // namespace testing } // namespace gvisor diff --git a/test/util/timer_util.h b/test/util/timer_util.h index 2cebfa5d1..31aea4fc6 100644 --- a/test/util/timer_util.h +++ b/test/util/timer_util.h @@ -30,7 +30,7 @@ namespace gvisor { namespace testing { -// MonotonicTimer is a simple timer that uses a monotic clock. +// MonotonicTimer is a simple timer that uses a monotonic clock. class MonotonicTimer { public: MonotonicTimer() {} diff --git a/third_party/gvsync/BUILD b/third_party/gvsync/BUILD index 04a1fbeba..8dab51daa 100644 --- a/third_party/gvsync/BUILD +++ b/third_party/gvsync/BUILD @@ -40,7 +40,7 @@ go_library( "race_unsafe.go", "seqcount.go", ], - importpath = "gvisor.googlesource.com/gvisor/third_party/gvsync", + importpath = "gvisor.dev/gvisor/third_party/gvsync", ) go_test( diff --git a/third_party/gvsync/atomicptr_unsafe.go b/third_party/gvsync/atomicptr_unsafe.go index 53a943282..525c4beed 100644 --- a/third_party/gvsync/atomicptr_unsafe.go +++ b/third_party/gvsync/atomicptr_unsafe.go @@ -21,8 +21,18 @@ type Value struct{} // Note that copying AtomicPtr by value performs a non-atomic read of the // stored pointer, which is unsafe if Store() can be called concurrently; in // this case, do `dst.Store(src.Load())` instead. +// +// +stateify savable type AtomicPtr struct { - ptr unsafe.Pointer + ptr unsafe.Pointer `state:".(*Value)"` +} + +func (p *AtomicPtr) savePtr() *Value { + return p.Load() +} + +func (p *AtomicPtr) loadPtr(v *Value) { + p.Store(v) } // Load returns the value set by the most recent Store. It returns nil if there diff --git a/third_party/gvsync/atomicptrtest/BUILD b/third_party/gvsync/atomicptrtest/BUILD index 74c51fd18..631b0b64c 100644 --- a/third_party/gvsync/atomicptrtest/BUILD +++ b/third_party/gvsync/atomicptrtest/BUILD @@ -18,7 +18,7 @@ go_template_instance( go_library( name = "atomicptr", srcs = ["atomicptr_int.go"], - importpath = "gvisor.googlesource.com/gvisor/third_party/gvsync/atomicptr", + importpath = "gvisor.dev/gvisor/third_party/gvsync/atomicptr", ) go_test( diff --git a/third_party/gvsync/seqatomic_unsafe.go b/third_party/gvsync/seqatomic_unsafe.go index c52d378f1..382eeed43 100644 --- a/third_party/gvsync/seqatomic_unsafe.go +++ b/third_party/gvsync/seqatomic_unsafe.go @@ -13,7 +13,7 @@ import ( "strings" "unsafe" - "gvisor.googlesource.com/gvisor/third_party/gvsync" + "gvisor.dev/gvisor/third_party/gvsync" ) // Value is a required type parameter. diff --git a/third_party/gvsync/seqatomictest/BUILD b/third_party/gvsync/seqatomictest/BUILD index d83149e81..9dd600148 100644 --- a/third_party/gvsync/seqatomictest/BUILD +++ b/third_party/gvsync/seqatomictest/BUILD @@ -18,7 +18,7 @@ go_template_instance( go_library( name = "seqatomic", srcs = ["seqatomic_int.go"], - importpath = "gvisor.googlesource.com/gvisor/third_party/gvsync/seqatomic", + importpath = "gvisor.dev/gvisor/third_party/gvsync/seqatomic", deps = [ "//third_party/gvsync", ], diff --git a/third_party/gvsync/seqatomictest/seqatomic_test.go b/third_party/gvsync/seqatomictest/seqatomic_test.go index 2da73cf96..a5447f589 100644 --- a/third_party/gvsync/seqatomictest/seqatomic_test.go +++ b/third_party/gvsync/seqatomictest/seqatomic_test.go @@ -19,7 +19,7 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/third_party/gvsync" + "gvisor.dev/gvisor/third_party/gvsync" ) func TestSeqAtomicLoadUncontended(t *testing.T) { diff --git a/tools/go_branch.sh b/tools/go_branch.sh index 8ea6a6d8d..d9e79401d 100755 --- a/tools/go_branch.sh +++ b/tools/go_branch.sh @@ -19,6 +19,7 @@ set -eo pipefail # Discovery the package name from the go.mod file. declare -r gomod="$(pwd)/go.mod" declare -r module=$(cat "${gomod}" | grep -E "^module" | cut -d' ' -f2) +declare -r gosum="$(pwd)/go.sum" # Check that gopath has been built. declare -r gopath_dir="$(pwd)/bazel-bin/gopath/src/${module}" @@ -63,6 +64,7 @@ git merge --allow-unrelated-histories --no-commit --strategy ours ${head} # Sync the entire gopath_dir and go.mod. rsync --recursive --verbose --delete --exclude .git --exclude README.md -L "${gopath_dir}/" . cp "${gomod}" . +cp "${gosum}" . # There are a few solitary files that can get left behind due to the way bazel # constructs the gopath target. Note that we don't find all Go files here diff --git a/tools/go_generics/generics.go b/tools/go_generics/generics.go index ca414d8cb..22c714c13 100644 --- a/tools/go_generics/generics.go +++ b/tools/go_generics/generics.go @@ -98,7 +98,7 @@ import ( "regexp" "strings" - "gvisor.googlesource.com/gvisor/tools/go_generics/globals" + "gvisor.dev/gvisor/tools/go_generics/globals" ) var ( @@ -222,7 +222,11 @@ func main() { // Modify the state tag appropriately. if m := stateTagRegexp.FindStringSubmatch(ident.Name); m != nil { if t := identifierRegexp.FindStringSubmatch(m[2]); t != nil { - ident.Name = m[1] + `state:".(` + t[1] + *prefix + t[2] + *suffix + t[3] + `)"` + m[3] + typeName := *prefix + t[2] + *suffix + if n, ok := types[t[2]]; ok { + typeName = n + } + ident.Name = m[1] + `state:".(` + t[1] + typeName + t[3] + `)"` + m[3] } } } diff --git a/tools/go_generics/globals/BUILD b/tools/go_generics/globals/BUILD index 6628132f5..74853c7d2 100644 --- a/tools/go_generics/globals/BUILD +++ b/tools/go_generics/globals/BUILD @@ -8,6 +8,6 @@ go_library( "globals_visitor.go", "scope.go", ], - importpath = "gvisor.googlesource.com/gvisor/tools/go_generics/globals", + importpath = "gvisor.dev/gvisor/tools/go_generics/globals", visibility = ["//tools/go_generics:__pkg__"], ) diff --git a/tools/go_generics/globals/globals_visitor.go b/tools/go_generics/globals/globals_visitor.go index 7ae48c662..3f948637b 100644 --- a/tools/go_generics/globals/globals_visitor.go +++ b/tools/go_generics/globals/globals_visitor.go @@ -132,7 +132,7 @@ func (v *globalsVisitor) visitFields(l *ast.FieldList, kind SymKind) { } } -// visitGenDecl is called when a generic declation is encountered, for example, +// visitGenDecl is called when a generic declaration is encountered, for example, // on variable, constant and type declarations. It adds all newly defined // symbols to the current scope and reports them if the current scope is the // global one. @@ -490,7 +490,7 @@ func (v *globalsVisitor) visitBlockStmt(s *ast.BlockStmt) { v.popScope() } -// visitFuncDecl is called when a function or method declation is encountered. +// visitFuncDecl is called when a function or method declaration is encountered. // it creates a new scope for the function [optional] receiver, parameters and // results, and visits all children nodes. func (v *globalsVisitor) visitFuncDecl(d *ast.FuncDecl) { diff --git a/tools/go_generics/imports.go b/tools/go_generics/imports.go index 3a7230c97..148dc7216 100644 --- a/tools/go_generics/imports.go +++ b/tools/go_generics/imports.go @@ -23,7 +23,7 @@ import ( "go/token" "strconv" - "gvisor.googlesource.com/gvisor/tools/go_generics/globals" + "gvisor.dev/gvisor/tools/go_generics/globals" ) type importedPackage struct { diff --git a/tools/go_stateify/defs.bzl b/tools/go_stateify/defs.bzl index 70ce73d7b..aeba197e2 100644 --- a/tools/go_stateify/defs.bzl +++ b/tools/go_stateify/defs.bzl @@ -50,7 +50,7 @@ def _go_stateify_impl(ctx): args += ["-imports=%s" % ",".join(ctx.attr.imports)] args += ["--"] for src in ctx.attr.srcs: - args += [f.path for f in src.files] + args += [f.path for f in src.files.to_list()] ctx.actions.run( inputs = ctx.files.srcs, outputs = [output], @@ -76,7 +76,7 @@ go_stateify = rule( "package": attr.string(mandatory = True), "out": attr.output(mandatory = True), "_tool": attr.label(executable = True, cfg = "host", default = Label("//tools/go_stateify:stateify")), - "_statepkg": attr.string(default = "gvisor.googlesource.com/gvisor/pkg/state"), + "_statepkg": attr.string(default = "gvisor.dev/gvisor/pkg/state"), }, ) diff --git a/tools/run_tests.sh b/tools/run_tests.sh index 7a1f889dd..d1669b343 100755 --- a/tools/run_tests.sh +++ b/tools/run_tests.sh @@ -92,6 +92,14 @@ build_everything() { "${BUILD_PACKAGES[@]}" } +build_runsc_debian() { + cd ${WORKSPACE_DIR} + + # TODO(b/135475885): pkg_deb is incompatible with Python3. + # https://github.com/bazelbuild/bazel/issues/8443 + bazel build --host_force_python=py2 runsc:runsc-debian +} + # Run simple tests runs the tests that require no special setup or # configuration. run_simple_tests() { @@ -173,20 +181,24 @@ run_docker_tests() { # These names are used to exclude tests not supported in certain # configuration, e.g. save/restore not supported with hostnet. - declare -a variations=("" "-kvm" "-hostnet" "-overlay") - for v in "${variations[@]}"; do - # Change test names otherwise each run of tests will overwrite logs and - # results of the previous run. - sed -i "s/name = \"integration_test.*\"/name = \"integration_test${v}\"/" runsc/test/integration/BUILD - sed -i "s/name = \"image_test.*\"/name = \"image_test${v}\"/" runsc/test/image/BUILD - # Run runsc tests with docker that are tagged manual. - bazel test \ - "${BAZEL_BUILD_FLAGS[@]}" \ - --test_env=RUNSC_RUNTIME="${RUNTIME}${v}" \ - --test_output=all \ - //runsc/test/image:image_test${v} \ - //runsc/test/integration:integration_test${v} - done + # Run runsc tests with docker that are tagged manual. + # + # The --nocache_test_results option is used here to eliminate cached results + # from the previous run for the runc runtime. + bazel test \ + "${BAZEL_BUILD_FLAGS[@]}" \ + --test_env=RUNSC_RUNTIME="${RUNTIME}" \ + --test_output=all \ + --nocache_test_results \ + --test_output=streamed \ + //runsc/test/integration:integration_test \ + //runsc/test/integration:integration_test_hostnet \ + //runsc/test/integration:integration_test_overlay \ + //runsc/test/integration:integration_test_kvm \ + //runsc/test/image:image_test \ + //runsc/test/image:image_test_overlay \ + //runsc/test/image:image_test_hostnet \ + //runsc/test/image:image_test_kvm } # Run the tests that require root. @@ -277,6 +289,8 @@ main() { run_syscall_tests run_runsc_do_tests + build_runsc_debian + # Build other flavors too. build_everything dbg diff --git a/vdso/check_vdso.py b/vdso/check_vdso.py index e41b09709..b3ee574f3 100644 --- a/vdso/check_vdso.py +++ b/vdso/check_vdso.py @@ -30,7 +30,7 @@ def PageRoundDown(addr): addr: An address. Returns: - The address rounded down to thie nearest page. + The address rounded down to the nearest page. """ return addr & ~(PAGE_SIZE - 1) |