diff options
328 files changed, 4260 insertions, 2580 deletions
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c53df7d25..8790f4a2f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -65,10 +65,14 @@ Rules: * Itself. * Go standard library. - * Except (transitively) package "net" (this will result in a non-cgo - binary). Use `//pkg/unet` instead. + * Except (transitively) package "net", which would result in a cgo + binary. Use `//pkg/unet` instead. * `@org_golang_x_sys//unix:go_default_library` (Go import `golang.org/x/sys/unix`). + * `@org_golang_x_time//rate:go_default_library` (Go import + `golang.org/x/time/rate`). + * `@com_github_google_btree//:go_default_library"` (Go import + `github.com/google/btree`). * Generated Go protobuf packages. * `@org_golang_google_protobuf//proto:go_default_library` (Go import `google.golang.org/protobuf`). @@ -444,7 +444,7 @@ $(RELEASE_ARTIFACTS)/%: @mkdir -p $@ @$(call copy,//runsc:runsc,$@) @$(call copy,//shim:containerd-shim-runsc-v1,$@) - @$(call copy,//debian:debian,$@) + @$(call deb_copy,//debian:debian,$@) release: $(RELEASE_KEY) $(RELEASE_ARTIFACTS)/$(ARCH) @mkdir -p $(RELEASE_ROOT) @@ -670,13 +670,6 @@ go_repository( ) go_repository( - name = "com_github_jonboulle_clockwork", - importpath = "github.com/jonboulle/clockwork", - sum = "h1:VKV+ZcuP6l3yW9doeqz6ziZGgcynBVQO+obU0+0hcPo=", - version = "v0.1.0", -) - -go_repository( name = "com_github_jtolds_gls", importpath = "github.com/jtolds/gls", sum = "h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=", @@ -1245,8 +1238,8 @@ rbe_autoconfig(name = "rbe_default") http_archive( name = "rules_pkg", - sha256 = "6b5969a7acd7b60c02f816773b06fcf32fbe8ba0c7919ccdc2df4f8fb923804a", - url = "https://github.com/bazelbuild/rules_pkg/releases/download/0.3.0/rules_pkg-0.3.0.tar.gz", + sha256 = "353b20e8b093d42dd16889c7f918750fb8701c485ac6cceb69a5236500507c27", + url = "https://github.com/bazelbuild/rules_pkg/releases/download/0.5.0/rules_pkg-0.5.0.tar.gz", ) load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies") @@ -1640,9 +1633,9 @@ go_repository( go_repository( name = "co_honnef_go_tools", importpath = "honnef.co/go/tools", - sum = "h1:EVDuO03OCZwpV2t/tLLxPmPiomagMoBOgfPt0FM+4IY=", - version = "v0.1.1", -) + sum = "h1:Tyybiul3hjaq0dkv+kcf5/MPTfo+ZBiEWrkhgxMPH54=", + version = "v0.3.0-0.dev.0.20210801021341-453cb28c0b15", + ) go_repository( name = "com_github_burntsushi_toml", diff --git a/debian/BUILD b/debian/BUILD index 32cc209bf..a62164cb0 100644 --- a/debian/BUILD +++ b/debian/BUILD @@ -28,12 +28,10 @@ pkg_deb( amd64 = "amd64", arm64 = "arm64", ), - changes = "runsc.changes", conffiles = [ "/etc/containerd/runsc.toml", ], data = ":debian-data", - deb = "runsc.deb", # Note that the description_file will be flatten (all newlines removed), # and therefore it is kept to a simple one-line description. The expected # format for debian packages is "short summary\nLonger explanation of @@ -42,6 +40,7 @@ pkg_deb( homepage = "https://gvisor.dev/", maintainer = "The gVisor Authors <gvisor-dev@googlegroups.com>", package = "runsc", + package_file_name = "runsc.deb", postinst = "postinst.sh", version_file = version, visibility = [ diff --git a/debian/postinst.sh b/debian/postinst.sh index 6a326f823..b387b9f22 100755 --- a/debian/postinst.sh +++ b/debian/postinst.sh @@ -22,7 +22,7 @@ fi if [ -f /etc/docker/daemon.json ]; then runsc install if systemctl is-active -q docker; then - systemctl restart docker || echo "unable to restart docker; you must do so manually." >&2 + systemctl reload docker || echo "unable to reload docker; you must do so manually." >&2 fi fi diff --git a/debian/show_paths.bzl b/debian/show_paths.bzl new file mode 100644 index 000000000..366b9d2e8 --- /dev/null +++ b/debian/show_paths.bzl @@ -0,0 +1,9 @@ +"""Formatter to extract the output files from pkg_deb.""" + +def format(target): + provider_map = providers(target) + return "\n".join([ + provider_map["OutputGroupInfo"].out.to_list()[0].path, + provider_map["OutputGroupInfo"].deb.to_list()[0].path, + provider_map["OutputGroupInfo"].changes.to_list()[0].path, + ]) diff --git a/images/default/Dockerfile b/images/default/Dockerfile index 5f652f2c3..4384d6271 100644 --- a/images/default/Dockerfile +++ b/images/default/Dockerfile @@ -15,7 +15,7 @@ RUN add-apt-repository \ "deb https://download.docker.com/linux/ubuntu \ $(lsb_release -cs) \ stable" -RUN apt-get install docker-ce-cli +RUN apt-get -y install docker-ce-cli # Install gcloud. RUN curl https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-289.0.0-linux-x86_64.tar.gz | \ diff --git a/images/syzkaller/Dockerfile b/images/syzkaller/Dockerfile index 9a85ae345..9f739d972 100644 --- a/images/syzkaller/Dockerfile +++ b/images/syzkaller/Dockerfile @@ -2,7 +2,7 @@ FROM gcr.io/syzkaller/env # This image is mostly for investigating syzkaller crashes, so let's install # developer tools. -RUN apt update && apt install -y git vim strace gdb procps +RUN apt update --allow-releaseinfo-change && DEBIAN_FRONTEND=noninteractive apt install -y git vim strace gdb procps WORKDIR /syzkaller/gopath/src/github.com/google/syzkaller @@ -48,6 +48,8 @@ global: - "duplicate import" # These will never be annotated. - "unexpected call to atomic function" + # Generated proto code creates declarations like 'var start int = iNdEx' + - "should omit type .* from declaration; it will be inferred from the right-hand side" internal: suppress: # We use ALL_CAPS for system definitions, @@ -161,29 +163,35 @@ analyzers: # targets in the standard library, so we still need to run # checklinkname on stdlib generally. - "linkname to unknown symbol" - SA4016: + SA1019: # Use of deprecated identifier. + # disable for now due to misattribution from golang.org/issue/44195. + generated: + exclude: [".*"] internal: - exclude: - - pkg/gohacks/gohacks_unsafe.go # x ^ 0 always equals x. - SA2001: + exclude: [".*"] + SA2001: # Empty critical section. internal: exclude: - pkg/sentry/fs/fs.go # Intentional. - pkg/sentry/fs/gofer/inode.go # Intentional. - pkg/refs/refcounter_test.go # Intentional. - ST1019: + SA4016: # Useless bitwise operations. + internal: + exclude: + - pkg/gohacks/gohacks_unsafe.go # x ^ 0 always equals x. + SA5011: # Possible nil pointer dereference. + internal: + exclude: + # https://github.com/dominikh/go-tools/issues/924 + - pkg/sentry/fs/fdpipe/pipe_opener_test.go + - pkg/tcpip/tests/integration/link_resolution_test.go + ST1019: # Multiple imports of the same package. generated: exclude: # package ".../kubeapi/core/v1/v1" is being imported more than once - generated.gen.pb.go - ST1021: + ST1021: # Doc should start with type name. internal: suppress: - "comment on exported type Translation" # Intentional. - "comment on exported type PinnedRange" # Intentional. - SA5011: - internal: - exclude: - # https://github.com/dominikh/go-tools/issues/924 - - pkg/sentry/fs/fdpipe/pipe_opener_test.go - - pkg/tcpip/tests/integration/link_resolution_test.go diff --git a/pkg/abi/linux/errno/errno.go b/pkg/abi/linux/errno/errno.go index 5a09c6605..38ebbb1d7 100644 --- a/pkg/abi/linux/errno/errno.go +++ b/pkg/abi/linux/errno/errno.go @@ -157,9 +157,32 @@ const ( EHWPOISON ) -// errnos derived from other errnos +// errnos derived from other errnos. const ( EWOULDBLOCK = EAGAIN EDEADLOCK = EDEADLK ENONET = ENOENT ) + +// errnos for internal errors. +const ( + // ERESTARTSYS is returned by an interrupted syscall to indicate that it + // should be converted to EINTR if interrupted by a signal delivered to a + // user handler without SA_RESTART set, and restarted otherwise. + ERESTARTSYS = 512 + + // ERESTARTNOINTR is returned by an interrupted syscall to indicate that it + // should always be restarted. + ERESTARTNOINTR = 513 + + // ERESTARTNOHAND is returned by an interrupted syscall to indicate that it + // should be converted to EINTR if interrupted by a signal delivered to a + // user handler, and restarted otherwise. + ERESTARTNOHAND = 514 + + // ERESTART_RESTARTBLOCK is returned by an interrupted syscall to indicate + // that it should be restarted using a custom function. The interrupted + // syscall must register a custom restart function by calling + // Task.SetRestartSyscallFn. + ERESTART_RESTARTBLOCK = 516 +) diff --git a/pkg/amutex/BUILD b/pkg/amutex/BUILD index bd3a5cce9..6d8b5f818 100644 --- a/pkg/amutex/BUILD +++ b/pkg/amutex/BUILD @@ -8,7 +8,7 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/context", - "//pkg/syserror", + "//pkg/errors/linuxerr", ], ) diff --git a/pkg/amutex/amutex.go b/pkg/amutex/amutex.go index d7acc1d9f..985199cfa 100644 --- a/pkg/amutex/amutex.go +++ b/pkg/amutex/amutex.go @@ -20,7 +20,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ) // Sleeper must be implemented by users of the abortable mutex to allow for @@ -33,7 +33,7 @@ type NoopSleeper = context.Context // Block blocks until either receiving from ch succeeds (in which case it // returns nil) or sleeper is interrupted (in which case it returns -// syserror.ErrInterrupted). +// linuxerr.ErrInterrupted). func Block(sleeper Sleeper, ch <-chan struct{}) error { cancel := sleeper.SleepStart() select { @@ -42,7 +42,7 @@ func Block(sleeper Sleeper, ch <-chan struct{}) error { return nil case <-cancel: sleeper.SleepFinish(false) - return syserror.ErrInterrupted + return linuxerr.ErrInterrupted } } diff --git a/pkg/atomicbitops/BUILD b/pkg/atomicbitops/BUILD index 11072d4de..02c0e52b9 100644 --- a/pkg/atomicbitops/BUILD +++ b/pkg/atomicbitops/BUILD @@ -18,7 +18,10 @@ go_library( go_test( name = "atomicbitops_test", size = "small", - srcs = ["atomicbitops_test.go"], + srcs = [ + "aligned_test.go", + "atomicbitops_test.go", + ], library = ":atomicbitops", deps = ["//pkg/sync"], ) diff --git a/pkg/atomicbitops/aligned_32bit_unsafe.go b/pkg/atomicbitops/aligned_32bit_unsafe.go index 383f81ff2..0e4765c48 100644 --- a/pkg/atomicbitops/aligned_32bit_unsafe.go +++ b/pkg/atomicbitops/aligned_32bit_unsafe.go @@ -34,14 +34,15 @@ import ( // // +stateify savable type AlignedAtomicInt64 struct { - value [15]byte + value int64 + value32 int32 } func (aa *AlignedAtomicInt64) ptr() *int64 { - // In the 15-byte aa.value, there are guaranteed to be 8 contiguous - // bytes with 64-bit alignment. We find an address in this range by - // adding 7, then clear the 3 least significant bits to get its start. - return (*int64)(unsafe.Pointer((uintptr(unsafe.Pointer(&aa.value[0])) + 7) &^ 7)) + // On 32-bit systems, aa.value is guaranteed to be 32-bit aligned. It means + // that in the 12-byte aa.value, there are guaranteed to be 8 contiguous bytes + // with 64-bit alignment. + return (*int64)(unsafe.Pointer((uintptr(unsafe.Pointer(&aa.value)) + 4) &^ 7)) } // Load is analagous to atomic.LoadInt64. @@ -71,14 +72,15 @@ func (aa *AlignedAtomicInt64) Add(v int64) int64 { // // +stateify savable type AlignedAtomicUint64 struct { - value [15]byte + value uint64 + value32 uint32 } func (aa *AlignedAtomicUint64) ptr() *uint64 { - // In the 15-byte aa.value, there are guaranteed to be 8 contiguous - // bytes with 64-bit alignment. We find an address in this range by - // adding 7, then clear the 3 least significant bits to get its start. - return (*uint64)(unsafe.Pointer((uintptr(unsafe.Pointer(&aa.value[0])) + 7) &^ 7)) + // On 32-bit systems, aa.value is guaranteed to be 32-bit aligned. It means + // that in the 12-byte aa.value, there are guaranteed to be 8 contiguous bytes + // with 64-bit alignment. + return (*uint64)(unsafe.Pointer((uintptr(unsafe.Pointer(&aa.value)) + 4) &^ 7)) } // Load is analagous to atomic.LoadUint64. diff --git a/pkg/atomicbitops/aligned_test.go b/pkg/atomicbitops/aligned_test.go new file mode 100644 index 000000000..e7123d2b8 --- /dev/null +++ b/pkg/atomicbitops/aligned_test.go @@ -0,0 +1,35 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package atomicbitops + +import ( + "testing" +) + +func TestAtomiciInt64(t *testing.T) { + v := struct { + v8 int8 + v64 AlignedAtomicInt64 + }{} + v.v64.Add(1) +} + +func TestAtomicUint64(t *testing.T) { + v := struct { + v8 uint8 + v64 AlignedAtomicUint64 + }{} + v.v64.Add(1) +} diff --git a/pkg/buffer/view.go b/pkg/buffer/view.go index 7bcfcd543..a4610f977 100644 --- a/pkg/buffer/view.go +++ b/pkg/buffer/view.go @@ -378,6 +378,20 @@ func (v *View) Copy() (other View) { return } +// Clone makes a more shallow copy compared to Copy. The underlying payload +// slice (buffer.data) is shared but the buffers themselves are copied. +func (v *View) Clone() *View { + other := &View{ + size: v.size, + } + for buf := v.data.Front(); buf != nil; buf = buf.Next() { + newBuf := other.pool.getNoInit() + *newBuf = *buf + other.data.PushBack(newBuf) + } + return other +} + // Apply applies the given function across all valid data. func (v *View) Apply(fn func([]byte)) { for buf := v.data.Front(); buf != nil; buf = buf.Next() { diff --git a/pkg/errors/linuxerr/BUILD b/pkg/errors/linuxerr/BUILD index 201727780..e73b0e28a 100644 --- a/pkg/errors/linuxerr/BUILD +++ b/pkg/errors/linuxerr/BUILD @@ -4,7 +4,10 @@ package(licenses = ["notice"]) go_library( name = "linuxerr", - srcs = ["linuxerr.go"], + srcs = [ + "internal.go", + "linuxerr.go", + ], visibility = ["//visibility:public"], deps = [ "//pkg/abi/linux/errno", @@ -20,7 +23,6 @@ go_test( ":linuxerr", "//pkg/abi/linux/errno", "//pkg/errors", - "//pkg/syserror", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/errors/linuxerr/internal.go b/pkg/errors/linuxerr/internal.go new file mode 100644 index 000000000..127bba0df --- /dev/null +++ b/pkg/errors/linuxerr/internal.go @@ -0,0 +1,120 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"),; +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linuxerr + +import ( + "gvisor.dev/gvisor/pkg/abi/linux/errno" + "gvisor.dev/gvisor/pkg/errors" +) + +var ( + // ErrWouldBlock is an internal error used to indicate that an operation + // cannot be satisfied immediately, and should be retried at a later + // time, possibly when the caller has received a notification that the + // operation may be able to complete. It is used by implementations of + // the kio.File interface. + ErrWouldBlock = errors.New(errno.EWOULDBLOCK, "request would block") + + // ErrInterrupted is returned if a request is interrupted before it can + // complete. + ErrInterrupted = errors.New(errno.EINTR, "request was interrupted") + + // ErrExceedsFileSizeLimit is returned if a request would exceed the + // file's size limit. + ErrExceedsFileSizeLimit = errors.New(errno.E2BIG, "exceeds file size limit") +) + +var errorMap = map[error]*errors.Error{ + ErrWouldBlock: EWOULDBLOCK, + ErrInterrupted: EINTR, + ErrExceedsFileSizeLimit: EFBIG, +} + +// errorUnwrappers is an array of unwrap functions to extract typed errors. +var errorUnwrappers = []func(error) (*errors.Error, bool){} + +// AddErrorUnwrapper registers an unwrap method that can extract a concrete error +// from a typed, but not initialized, error. +func AddErrorUnwrapper(unwrap func(e error) (*errors.Error, bool)) { + errorUnwrappers = append(errorUnwrappers, unwrap) +} + +// TranslateError translates errors to errnos, it will return false if +// the error was not registered. +func TranslateError(from error) (*errors.Error, bool) { + if err, ok := errorMap[from]; ok { + return err, true + } + // Try to unwrap the error if we couldn't match an error + // exactly. This might mean that a package has its own + // error type. + for _, unwrap := range errorUnwrappers { + if err, ok := unwrap(from); ok { + return err, true + } + } + return nil, false +} + +// These errors are significant because ptrace syscall exit tracing can +// observe them. +// +// For all of the following errors, if the syscall is not interrupted by a +// signal delivered to a user handler, the syscall is restarted. +var ( + // ERESTARTSYS is returned by an interrupted syscall to indicate that it + // should be converted to EINTR if interrupted by a signal delivered to a + // user handler without SA_RESTART set, and restarted otherwise. + ERESTARTSYS = errors.New(errno.ERESTARTSYS, "to be restarted if SA_RESTART is set") + + // ERESTARTNOINTR is returned by an interrupted syscall to indicate that it + // should always be restarted. + ERESTARTNOINTR = errors.New(errno.ERESTARTNOINTR, "to be restarted") + + // ERESTARTNOHAND is returned by an interrupted syscall to indicate that it + // should be converted to EINTR if interrupted by a signal delivered to a + // user handler, and restarted otherwise. + ERESTARTNOHAND = errors.New(errno.ERESTARTNOHAND, "to be restarted if no handler") + + // ERESTART_RESTARTBLOCK is returned by an interrupted syscall to indicate + // that it should be restarted using a custom function. The interrupted + // syscall must register a custom restart function by calling + // Task.SetRestartSyscallFn. + ERESTART_RESTARTBLOCK = errors.New(errno.ERESTART_RESTARTBLOCK, "interrupted by signal") +) + +var restartMap = map[int]*errors.Error{ + -int(errno.ERESTARTSYS): ERESTARTSYS, + -int(errno.ERESTARTNOINTR): ERESTARTNOINTR, + -int(errno.ERESTARTNOHAND): ERESTARTNOHAND, + -int(errno.ERESTART_RESTARTBLOCK): ERESTART_RESTARTBLOCK, +} + +// IsRestartError checks if a given error is a restart error. +func IsRestartError(err error) bool { + switch err { + case ERESTARTSYS, ERESTARTNOINTR, ERESTARTNOHAND, ERESTART_RESTARTBLOCK: + return true + default: + return false + } +} + +// SyscallRestartErrorFromReturn returns the SyscallRestartErrno represented by +// rv, the value in a syscall return register. +func SyscallRestartErrorFromReturn(rv uintptr) (*errors.Error, bool) { + err, ok := restartMap[int(rv)] + return err, ok +} diff --git a/pkg/errors/linuxerr/linuxerr.go b/pkg/errors/linuxerr/linuxerr.go index f9f8412e0..5905ef593 100644 --- a/pkg/errors/linuxerr/linuxerr.go +++ b/pkg/errors/linuxerr/linuxerr.go @@ -27,6 +27,12 @@ import ( const maxErrno uint32 = errno.EHWPOISON + 1 +// The following errors are semantically identical to Errno of type unix.Errno +// or sycall.Errno. However, since the type are distinct ( these are +// *errors.Error), they are not directly comperable. However, the Errno method +// returns an Errno number such that the error can be compared to unix/syscall.Errno +// (e.g. unix.Errno(EPERM.Errno()) == unix.EPERM is true). Converting unix/syscall.Errno +// to the errors should be done via the lookup methods provided. var ( NOERROR = errors.New(errno.NOERRNO, "not an error") EPERM = errors.New(errno.EPERM, "operation not permitted") @@ -177,7 +183,7 @@ var ( var errNotValidError = errors.New(errno.Errno(maxErrno), "not a valid error") // The following errorSlice holds errors by errno for fast translation between -// errnos (especially uint32(sycall.Errno)) and *Error. +// errnos (especially uint32(sycall.Errno)) and *errors.Error. var errorSlice = []*errors.Error{ // Errno values from include/uapi/asm-generic/errno-base.h. errno.NOERRNO: NOERROR, diff --git a/pkg/errors/linuxerr/linuxerr_test.go b/pkg/errors/linuxerr/linuxerr_test.go index f09d61b02..df7cd1c5a 100644 --- a/pkg/errors/linuxerr/linuxerr_test.go +++ b/pkg/errors/linuxerr/linuxerr_test.go @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -package syserror_test +package linuxerr_test import ( "errors" + "fmt" "io" "io/fs" "syscall" @@ -25,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux/errno" gErrors "gvisor.dev/gvisor/pkg/errors" "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/syserror" ) var globalError error @@ -42,12 +42,6 @@ func BenchmarkAssignLinuxerr(b *testing.B) { } } -func BenchmarkAssignSyserror(b *testing.B) { - for i := b.N; i > 0; i-- { - globalError = linuxerr.ENOMSG - } -} - func BenchmarkCompareUnix(b *testing.B) { globalError = unix.EAGAIN j := 0 @@ -68,16 +62,6 @@ func BenchmarkCompareLinuxerr(b *testing.B) { } } -func BenchmarkCompareSyserror(b *testing.B) { - globalError = linuxerr.EAGAIN - j := 0 - for i := b.N; i > 0; i-- { - if globalError == linuxerr.EACCES { - j++ - } - } -} - func BenchmarkSwitchUnix(b *testing.B) { globalError = unix.EPERM j := 0 @@ -108,21 +92,6 @@ func BenchmarkSwitchLinuxerr(b *testing.B) { } } -func BenchmarkSwitchSyserror(b *testing.B) { - globalError = linuxerr.EPERM - j := 0 - for i := b.N; i > 0; i-- { - switch globalError { - case linuxerr.EACCES: - j++ - case syserror.EINTR: - j += 2 - case linuxerr.EAGAIN: - j += 3 - } - } -} - func BenchmarkReturnUnix(b *testing.B) { var localError error f := func() error { @@ -170,47 +139,40 @@ func BenchmarkConvertUnixLinuxerrZero(b *testing.B) { } type translationTestTable struct { - fn string errIn error - syscallErrorIn unix.Errno expectedBool bool - expectedTranslation unix.Errno + expectedTranslation *gErrors.Error } func TestErrorTranslation(t *testing.T) { - myError := errors.New("My test error") - myError2 := errors.New("Another test error") testTable := []translationTestTable{ - {"TranslateError", myError, 0, false, 0}, - {"TranslateError", myError2, 0, false, 0}, - {"AddErrorTranslation", myError, unix.EAGAIN, true, 0}, - {"AddErrorTranslation", myError, unix.EAGAIN, false, 0}, - {"AddErrorTranslation", myError, unix.EPERM, false, 0}, - {"TranslateError", myError, 0, true, unix.EAGAIN}, - {"TranslateError", myError2, 0, false, 0}, - {"AddErrorTranslation", myError2, unix.EPERM, true, 0}, - {"AddErrorTranslation", myError2, unix.EPERM, false, 0}, - {"AddErrorTranslation", myError2, unix.EAGAIN, false, 0}, - {"TranslateError", myError, 0, true, unix.EAGAIN}, - {"TranslateError", myError2, 0, true, unix.EPERM}, + { + errIn: linuxerr.ENOENT, + }, + { + errIn: unix.ENOENT, + }, + { + errIn: linuxerr.ErrInterrupted, + expectedBool: true, + expectedTranslation: linuxerr.EINTR, + }, + { + errIn: linuxerr.ERESTART_RESTARTBLOCK, + }, + { + errIn: errors.New("some new error"), + }, } for _, tt := range testTable { - switch tt.fn { - case "TranslateError": - err, ok := syserror.TranslateError(tt.errIn) - if ok != tt.expectedBool { - t.Fatalf("%v(%v) => %v expected %v", tt.fn, tt.errIn, ok, tt.expectedBool) + t.Run(fmt.Sprintf("err: %v %T", tt.errIn, tt.errIn), func(t *testing.T) { + err, ok := linuxerr.TranslateError(tt.errIn) + if (!tt.expectedBool && err != nil) || (tt.expectedBool != ok) { + t.Fatalf("%v => %v %v expected %v err: nil", tt.errIn, err, ok, tt.expectedBool) } else if err != tt.expectedTranslation { - t.Fatalf("%v(%v) (error) => %v expected %v", tt.fn, tt.errIn, err, tt.expectedTranslation) - } - case "AddErrorTranslation": - ok := syserror.AddErrorTranslation(tt.errIn, tt.syscallErrorIn) - if ok != tt.expectedBool { - t.Fatalf("%v(%v) => %v expected %v", tt.fn, tt.errIn, ok, tt.expectedBool) + t.Fatalf("%v => %v expected %v", tt.errIn, err, tt.expectedTranslation) } - default: - t.Fatalf("Unknown function %v", tt.fn) - } + }) } } diff --git a/pkg/eventchannel/BUILD b/pkg/eventchannel/BUILD index ad15d3672..56399232a 100644 --- a/pkg/eventchannel/BUILD +++ b/pkg/eventchannel/BUILD @@ -7,6 +7,7 @@ go_library( srcs = [ "event.go", "event_any.go", + "processor.go", "rate.go", ], visibility = ["//:sandbox"], diff --git a/pkg/eventchannel/event_any.go b/pkg/eventchannel/event_any.go index 13f300061..b708937a4 100644 --- a/pkg/eventchannel/event_any.go +++ b/pkg/eventchannel/event_any.go @@ -26,3 +26,8 @@ import ( func newAny(m proto.Message) (*anypb.Any, error) { return anypb.New(m) } + +func emptyAny() *anypb.Any { + var any anypb.Any + return &any +} diff --git a/pkg/eventchannel/processor.go b/pkg/eventchannel/processor.go new file mode 100644 index 000000000..e765c10d1 --- /dev/null +++ b/pkg/eventchannel/processor.go @@ -0,0 +1,130 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package eventchannel + +import ( + "encoding/binary" + "fmt" + "io" + "os" + "time" + + "google.golang.org/protobuf/proto" + pb "gvisor.dev/gvisor/pkg/eventchannel/eventchannel_go_proto" +) + +// eventProcessor carries display state across multiple events. +type eventProcessor struct { + filtering bool + // filtered is the number of events omitted since printing the last matching + // event. Only meaningful when filtering == true. + filtered uint64 + // allowlist is the set of event names to display. If empty, all events are + // displayed. + allowlist map[string]bool +} + +// newEventProcessor creates a new EventProcessor with filters. +func newEventProcessor(filters []string) *eventProcessor { + e := &eventProcessor{ + filtering: len(filters) > 0, + allowlist: make(map[string]bool), + } + for _, f := range filters { + e.allowlist[f] = true + } + return e +} + +// processOne reads, parses and displays a single event from the event channel. +// +// The event channel is a stream of (msglen, payload) packets; this function +// processes a single such packet. The msglen is a uvarint-encoded length for +// the associated payload. The payload is a binary-encoded 'Any' protobuf, which +// in turn encodes an arbitrary event protobuf. +func (e *eventProcessor) processOne(src io.Reader, out *os.File) error { + // Read and parse the msglen. + lenbuf := make([]byte, binary.MaxVarintLen64) + if _, err := io.ReadFull(src, lenbuf); err != nil { + return err + } + msglen, consumed := binary.Uvarint(lenbuf) + if consumed <= 0 { + return fmt.Errorf("couldn't parse the message length") + } + + // Read the payload. + buf := make([]byte, msglen) + // Copy any unused bytes from the len buffer into the payload buffer. These + // bytes are actually part of the payload. + extraBytes := copy(buf, lenbuf[consumed:]) + if _, err := io.ReadFull(src, buf[extraBytes:]); err != nil { + return err + } + + // Unmarshal the payload into an "Any" protobuf, which encodes the actual + // event. + encodedEv := emptyAny() + if err := proto.Unmarshal(buf, encodedEv); err != nil { + return fmt.Errorf("failed to unmarshal 'any' protobuf message: %v", err) + } + + var ev pb.DebugEvent + if err := (encodedEv).UnmarshalTo(&ev); err != nil { + return fmt.Errorf("failed to decode 'any' protobuf message: %v", err) + } + + if e.filtering && e.allowlist[ev.Name] { + e.filtered++ + return nil + } + + if e.filtering && e.filtered > 0 { + if e.filtered == 1 { + fmt.Fprintf(out, "... filtered %d event ...\n\n", e.filtered) + } else { + fmt.Fprintf(out, "... filtered %d events ...\n\n", e.filtered) + } + e.filtered = 0 + } + + // Extract the inner event and display it. Example: + // + // 2017-10-04 14:35:05.316180374 -0700 PDT m=+1.132485846 + // cloud_gvisor.MemoryUsage { + // total: 23822336 + // } + fmt.Fprintf(out, "%v\n%v {\n", time.Now(), ev.Name) + fmt.Fprintf(out, "%v", ev.Text) + fmt.Fprintf(out, "}\n\n") + + return nil +} + +// ProcessAll reads, parses and displays all events from src. The events are +// displayed to out. +func ProcessAll(src io.Reader, filters []string, out *os.File) error { + ep := newEventProcessor(filters) + for { + switch err := ep.processOne(src, out); err { + case nil: + continue + case io.EOF: + return nil + default: + return err + } + } +} diff --git a/pkg/gohacks/gohacks_unsafe.go b/pkg/gohacks/gohacks_unsafe.go index bd8ceba19..a055b3e8d 100644 --- a/pkg/gohacks/gohacks_unsafe.go +++ b/pkg/gohacks/gohacks_unsafe.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build go1.13 && !go1.18 -// +build go1.13,!go1.18 +//go:build go1.13 && !go1.19 +// +build go1.13,!go1.19 // //go:linkname directives type-checked by checklinkname. Any other // non-linkname assumptions outside the Go 1 compatibility guarantee should diff --git a/pkg/goid/goid.go b/pkg/goid/goid.go index 85fb2f6d4..0887f79ab 100644 --- a/pkg/goid/goid.go +++ b/pkg/goid/goid.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build go1.12 && !go1.18 -// +build go1.12,!go1.18 +//go:build go1.12 && !go1.19 +// +build go1.12,!go1.19 // Check type signatures when updating Go version. diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go index 161b451cc..a8f8a9d03 100644 --- a/pkg/p9/handlers.go +++ b/pkg/p9/handlers.go @@ -45,6 +45,8 @@ func ExtractErrno(err error) unix.Errno { // Attempt to unwrap. switch e := err.(type) { + case *errors.Error: + return unix.Errno(e.Errno()) case unix.Errno: return e case *os.PathError: diff --git a/pkg/procid/procid_amd64.s b/pkg/procid/procid_amd64.s index 74a8de42c..7073a0810 100644 --- a/pkg/procid/procid_amd64.s +++ b/pkg/procid/procid_amd64.s @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build amd64 && go1.8 && !go1.18 && go1.1 -// +build amd64,go1.8,!go1.18,go1.1 +//go:build amd64 && go1.8 && !go1.19 && go1.1 +// +build amd64,go1.8,!go1.19,go1.1 // //go:linkname directives type-checked by checklinkname. Any other // non-linkname assumptions outside the Go 1 compatibility guarantee should diff --git a/pkg/procid/procid_arm64.s b/pkg/procid/procid_arm64.s index 48182c4a9..bdc3bdcb0 100644 --- a/pkg/procid/procid_arm64.s +++ b/pkg/procid/procid_arm64.s @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build arm64 && go1.8 && !go1.18 && go1.1 -// +build arm64,go1.8,!go1.18,go1.1 +//go:build arm64 && go1.8 && !go1.19 && go1.1 +// +build arm64,go1.8,!go1.19,go1.1 // //go:linkname directives type-checked by checklinkname. Any other // non-linkname assumptions outside the Go 1 compatibility guarantee should diff --git a/pkg/safecopy/BUILD b/pkg/safecopy/BUILD index db5787302..0a045fc8e 100644 --- a/pkg/safecopy/BUILD +++ b/pkg/safecopy/BUILD @@ -19,7 +19,8 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", - "//pkg/syserror", + "//pkg/errors", + "//pkg/errors/linuxerr", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/safecopy/safecopy.go b/pkg/safecopy/safecopy.go index df63dd5f1..5e6f903ff 100644 --- a/pkg/safecopy/safecopy.go +++ b/pkg/safecopy/safecopy.go @@ -21,7 +21,8 @@ import ( "runtime" "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ) // SegvError is returned when a safecopy function receives SIGSEGV. @@ -137,12 +138,12 @@ func init() { if err := ReplaceSignalHandler(unix.SIGBUS, addrOfSignalHandler(), &savedSigBusHandler); err != nil { panic(fmt.Sprintf("Unable to set handler for SIGBUS: %v", err)) } - syserror.AddErrorUnwrapper(func(e error) (unix.Errno, bool) { + linuxerr.AddErrorUnwrapper(func(e error) (*errors.Error, bool) { switch e.(type) { case SegvError, BusError, AlignmentError: - return unix.EFAULT, true + return linuxerr.EFAULT, true default: - return 0, false + return nil, false } }) } diff --git a/pkg/safemem/io.go b/pkg/safemem/io.go index f039a5c34..9551ca853 100644 --- a/pkg/safemem/io.go +++ b/pkg/safemem/io.go @@ -207,58 +207,6 @@ func (r FromIOReader) readToBlock(dst Block, buf []byte) (int, []byte, error) { return wbn, buf, rerr } -// FromIOReaderAt implements Reader for an io.ReaderAt. Does not repeatedly -// invoke io.ReaderAt.ReadAt because ReadAt is more strict than Read. A partial -// read indicates an error. This is not thread-safe. -type FromIOReaderAt struct { - ReaderAt io.ReaderAt - Offset int64 -} - -// ReadToBlocks implements Reader.ReadToBlocks. -func (r FromIOReaderAt) ReadToBlocks(dsts BlockSeq) (uint64, error) { - var buf []byte - var done uint64 - for !dsts.IsEmpty() { - dst := dsts.Head() - var n int - var err error - n, buf, err = r.readToBlock(dst, buf) - done += uint64(n) - if n != dst.Len() { - return done, err - } - dsts = dsts.Tail() - if err != nil { - if dsts.IsEmpty() && err == io.EOF { - return done, nil - } - return done, err - } - } - return done, nil -} - -func (r FromIOReaderAt) readToBlock(dst Block, buf []byte) (int, []byte, error) { - // io.Reader isn't safecopy-aware, so we have to buffer Blocks that require - // safecopy. - if !dst.NeedSafecopy() { - n, err := r.ReaderAt.ReadAt(dst.ToSlice(), r.Offset) - r.Offset += int64(n) - return n, buf, err - } - if len(buf) < dst.Len() { - buf = make([]byte, dst.Len()) - } - rn, rerr := r.ReaderAt.ReadAt(buf[:dst.Len()], r.Offset) - r.Offset += int64(rn) - wbn, wberr := Copy(dst, BlockFromSafeSlice(buf[:rn])) - if wberr != nil { - return wbn, buf, wberr - } - return wbn, buf, rerr -} - // FromIOWriter implements Writer for an io.Writer by repeatedly invoking // io.Writer.Write until it returns an error or partial write. // diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD index deaf5fa23..a4934a565 100644 --- a/pkg/sentry/control/BUILD +++ b/pkg/sentry/control/BUILD @@ -6,16 +6,22 @@ go_library( name = "control", srcs = [ "control.go", + "events.go", + "fs.go", + "lifecycle.go", "logging.go", "pprof.go", "proc.go", "state.go", + "usage.go", ], visibility = [ "//:sandbox", ], deps = [ "//pkg/abi/linux", + "//pkg/context", + "//pkg/eventchannel", "//pkg/fd", "//pkg/log", "//pkg/sentry/fdimport", @@ -35,6 +41,8 @@ go_library( "//pkg/sync", "//pkg/tcpip/link/sniffer", "//pkg/urpc", + "//pkg/usermem", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/control/events.go b/pkg/sentry/control/events.go new file mode 100644 index 000000000..92e437ae7 --- /dev/null +++ b/pkg/sentry/control/events.go @@ -0,0 +1,65 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package control + +import ( + "errors" + "fmt" + + "gvisor.dev/gvisor/pkg/eventchannel" + "gvisor.dev/gvisor/pkg/urpc" +) + +// EventsOpts are the arguments for eventchannel-related commands. +type EventsOpts struct { + urpc.FilePayload +} + +// Events is the control server state for eventchannel-related commands. +type Events struct { + emitter eventchannel.Emitter +} + +// AttachDebugEmitter receives a connected unix domain socket FD from the client +// and establishes it as a new emitter for the sentry eventchannel. Any existing +// emitters are replaced on a subsequent attach. +func (e *Events) AttachDebugEmitter(o *EventsOpts, _ *struct{}) error { + if len(o.FilePayload.Files) < 1 { + return errors.New("no output writer provided") + } + + sock, err := o.ReleaseFD(0) + if err != nil { + return err + } + sockFD := sock.Release() + + // SocketEmitter takes ownership of sockFD. + emitter, err := eventchannel.SocketEmitter(sockFD) + if err != nil { + return fmt.Errorf("failed to create SocketEmitter for FD %d: %v", sockFD, err) + } + + // If there is already a debug emitter, close the old one. + if e.emitter != nil { + e.emitter.Close() + } + + e.emitter = eventchannel.DebugEmitterFrom(emitter) + + // Register the new stream destination. + eventchannel.AddEmitter(e.emitter) + return nil +} diff --git a/pkg/sentry/control/fs.go b/pkg/sentry/control/fs.go new file mode 100644 index 000000000..d19b21f2d --- /dev/null +++ b/pkg/sentry/control/fs.go @@ -0,0 +1,93 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package control + +import ( + "fmt" + "io" + "os" + + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/pkg/usermem" +) + +// CatOpts contains options for the Cat RPC call. +type CatOpts struct { + // Files are the filesystem paths for the files to cat. + Files []string `json:"files"` + + // FilePayload contains the destination for output. + urpc.FilePayload +} + +// Fs includes fs-related functions. +type Fs struct { + Kernel *kernel.Kernel +} + +// Cat is a RPC stub which prints out and returns the content of the files. +func (f *Fs) Cat(o *CatOpts, _ *struct{}) error { + // Create an output stream. + if len(o.FilePayload.Files) != 1 { + return ErrInvalidFiles + } + + output := o.FilePayload.Files[0] + for _, file := range o.Files { + if err := cat(f.Kernel, file, output); err != nil { + return fmt.Errorf("cannot read from file %s: %v", file, err) + } + } + + return nil +} + +// fileReader encapsulates a fs.File and provides an io.Reader interface. +type fileReader struct { + ctx context.Context + file *fs.File +} + +// Read implements io.Reader.Read. +func (f *fileReader) Read(p []byte) (int, error) { + n, err := f.file.Readv(f.ctx, usermem.BytesIOSequence(p)) + return int(n), err +} + +func cat(k *kernel.Kernel, path string, output *os.File) error { + ctx := k.SupervisorContext() + mns := k.GlobalInit().Leader().MountNamespace() + root := mns.Root() + defer root.DecRef(ctx) + + remainingTraversals := uint(fs.DefaultTraversalLimit) + d, err := mns.FindInode(ctx, root, nil, path, &remainingTraversals) + if err != nil { + return fmt.Errorf("cannot find file %s: %v", path, err) + } + defer d.DecRef(ctx) + + file, err := d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true}) + if err != nil { + return fmt.Errorf("cannot get file for path %s: %v", path, err) + } + defer file.DecRef(ctx) + + _, err = io.Copy(output, &fileReader{ctx: ctx, file: file}) + return err +} diff --git a/pkg/sentry/control/lifecycle.go b/pkg/sentry/control/lifecycle.go new file mode 100644 index 000000000..67abf497d --- /dev/null +++ b/pkg/sentry/control/lifecycle.go @@ -0,0 +1,36 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package control + +import ( + "gvisor.dev/gvisor/pkg/sentry/kernel" +) + +// Lifecycle provides functions related to starting and stopping tasks. +type Lifecycle struct { + Kernel *kernel.Kernel +} + +// Pause pauses all tasks, blocking until they are stopped. +func (l *Lifecycle) Pause(_, _ *struct{}) error { + l.Kernel.Pause() + return nil +} + +// Resume resumes all tasks. +func (l *Lifecycle) Resume(_, _ *struct{}) error { + l.Kernel.Unpause() + return nil +} diff --git a/pkg/sentry/control/usage.go b/pkg/sentry/control/usage.go new file mode 100644 index 000000000..cc78d3f45 --- /dev/null +++ b/pkg/sentry/control/usage.go @@ -0,0 +1,183 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package control + +import ( + "fmt" + "os" + "runtime" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/urpc" +) + +// Usage includes usage-related RPC stubs. +type Usage struct { + Kernel *kernel.Kernel +} + +// MemoryUsageOpts contains usage options. +type MemoryUsageOpts struct { + // Full indicates that a full accounting should be done. If Full is not + // specified, then a partial accounting will be done, and Unknown will + // contain a majority of memory. See Collect for more information. + Full bool `json:"Full"` +} + +// MemoryUsage is a memory usage structure. +type MemoryUsage struct { + Unknown uint64 `json:"Unknown"` + System uint64 `json:"System"` + Anonymous uint64 `json:"Anonymous"` + PageCache uint64 `json:"PageCache"` + Mapped uint64 `json:"Mapped"` + Tmpfs uint64 `json:"Tmpfs"` + Ramdiskfs uint64 `json:"Ramdiskfs"` + Total uint64 `json:"Total"` +} + +// MemoryUsageFileOpts contains usage file options. +type MemoryUsageFileOpts struct { + // Version is used to ensure both sides agree on the format of the + // shared memory buffer. + Version uint64 `json:"Version"` +} + +// MemoryUsageFile contains the file handle to the usage file. +type MemoryUsageFile struct { + urpc.FilePayload +} + +// UsageFD returns the file that tracks the memory usage of the application. +func (u *Usage) UsageFD(opts *MemoryUsageFileOpts, out *MemoryUsageFile) error { + // Only support version 1 for now. + if opts.Version != 1 { + return fmt.Errorf("unsupported version requested: %d", opts.Version) + } + + mf := u.Kernel.MemoryFile() + *out = MemoryUsageFile{ + FilePayload: urpc.FilePayload{ + Files: []*os.File{ + usage.MemoryAccounting.File, + mf.File(), + }, + }, + } + + return nil +} + +// Collect returns memory used by the sandboxed application. +func (u *Usage) Collect(opts *MemoryUsageOpts, out *MemoryUsage) error { + if opts.Full { + // Ensure everything is up to date. + if err := u.Kernel.MemoryFile().UpdateUsage(); err != nil { + return err + } + + // Copy out a snapshot. + snapshot, total := usage.MemoryAccounting.Copy() + *out = MemoryUsage{ + System: snapshot.System, + Anonymous: snapshot.Anonymous, + PageCache: snapshot.PageCache, + Mapped: snapshot.Mapped, + Tmpfs: snapshot.Tmpfs, + Ramdiskfs: snapshot.Ramdiskfs, + Total: total, + } + } else { + // Get total usage from the MemoryFile implementation. + total, err := u.Kernel.MemoryFile().TotalUsage() + if err != nil { + return err + } + + // The memory accounting is guaranteed to be accurate only when + // UpdateUsage is called. If UpdateUsage is not called, then only Mapped + // will be up-to-date. + snapshot, _ := usage.MemoryAccounting.Copy() + *out = MemoryUsage{ + Unknown: total, + Mapped: snapshot.Mapped, + Total: total + snapshot.Mapped, + } + + } + + return nil +} + +// UsageReduceOpts contains options to Usage.Reduce(). +type UsageReduceOpts struct { + // If Wait is true, Reduce blocks until all activity initiated by + // Usage.Reduce() has completed. + Wait bool `json:"wait"` +} + +// UsageReduceOutput contains output from Usage.Reduce(). +type UsageReduceOutput struct{} + +// Reduce requests that the sentry attempt to reduce its memory usage. +func (u *Usage) Reduce(opts *UsageReduceOpts, out *UsageReduceOutput) error { + mf := u.Kernel.MemoryFile() + mf.StartEvictions() + if opts.Wait { + mf.WaitForEvictions() + } + return nil +} + +// MemoryUsageRecord contains the mapping and platform memory file. +type MemoryUsageRecord struct { + mmap uintptr + stats *usage.RTMemoryStats + mf os.File +} + +// NewMemoryUsageRecord creates a new MemoryUsageRecord from usageFile and +// platformFile. +func NewMemoryUsageRecord(usageFile, platformFile os.File) (*MemoryUsageRecord, error) { + mmap, _, e := unix.RawSyscall6(unix.SYS_MMAP, 0, usage.RTMemoryStatsSize, unix.PROT_READ, unix.MAP_SHARED, usageFile.Fd(), 0) + if e != 0 { + return nil, fmt.Errorf("mmap returned %d, want 0", e) + } + + m := MemoryUsageRecord{ + mmap: mmap, + stats: usage.RTMemoryStatsPointer(mmap), + mf: platformFile, + } + + runtime.SetFinalizer(&m, finalizer) + return &m, nil +} + +func finalizer(m *MemoryUsageRecord) { + unix.RawSyscall(unix.SYS_MUNMAP, m.mmap, usage.RTMemoryStatsSize, 0) +} + +// Fetch fetches the usage info from a MemoryUsageRecord. +func (m *MemoryUsageRecord) Fetch() (mapped, unknown, total uint64, err error) { + var stat unix.Stat_t + if err := unix.Fstat(int(m.mf.Fd()), &stat); err != nil { + return 0, 0, 0, err + } + fmem := uint64(stat.Blocks) * 512 + return m.stats.RTMapped, fmem, m.stats.RTMapped + fmem, nil +} diff --git a/pkg/sentry/devices/memdev/BUILD b/pkg/sentry/devices/memdev/BUILD index 4c8604d58..66b9ed523 100644 --- a/pkg/sentry/devices/memdev/BUILD +++ b/pkg/sentry/devices/memdev/BUILD @@ -15,6 +15,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/rand", "//pkg/safemem", "//pkg/sentry/fsimpl/devtmpfs", @@ -23,7 +24,6 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", "//pkg/sentry/vfs", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/devices/memdev/full.go b/pkg/sentry/devices/memdev/full.go index fece3e762..fc702c9f6 100644 --- a/pkg/sentry/devices/memdev/full.go +++ b/pkg/sentry/devices/memdev/full.go @@ -16,8 +16,8 @@ package memdev import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -66,12 +66,12 @@ func (fd *fullFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.Rea // PWrite implements vfs.FileDescriptionImpl.PWrite. func (fd *fullFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.ENOSPC + return 0, linuxerr.ENOSPC } // Write implements vfs.FileDescriptionImpl.Write. func (fd *fullFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.ENOSPC + return 0, linuxerr.ENOSPC } // Seek implements vfs.FileDescriptionImpl.Seek. diff --git a/pkg/sentry/devices/quotedev/BUILD b/pkg/sentry/devices/quotedev/BUILD index d09214e3e..ee946610a 100644 --- a/pkg/sentry/devices/quotedev/BUILD +++ b/pkg/sentry/devices/quotedev/BUILD @@ -9,8 +9,8 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/sentry/fsimpl/devtmpfs", "//pkg/sentry/vfs", - "//pkg/syserror", ], ) diff --git a/pkg/sentry/devices/quotedev/quotedev.go b/pkg/sentry/devices/quotedev/quotedev.go index 6114cb724..140856a4a 100644 --- a/pkg/sentry/devices/quotedev/quotedev.go +++ b/pkg/sentry/devices/quotedev/quotedev.go @@ -18,9 +18,9 @@ package quotedev import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) const ( @@ -35,7 +35,7 @@ type quoteDevice struct{} // Open implements vfs.Device.Open. // TODO(b/157161182): Add support for attestation ioctls. func (quoteDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - return nil, syserror.EIO + return nil, linuxerr.EIO } // Register registers all devices implemented by this package in vfsObj. diff --git a/pkg/sentry/devices/ttydev/BUILD b/pkg/sentry/devices/ttydev/BUILD index b4b6ca38a..ab4cd0b33 100644 --- a/pkg/sentry/devices/ttydev/BUILD +++ b/pkg/sentry/devices/ttydev/BUILD @@ -9,8 +9,8 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/sentry/fsimpl/devtmpfs", "//pkg/sentry/vfs", - "//pkg/syserror", ], ) diff --git a/pkg/sentry/devices/ttydev/ttydev.go b/pkg/sentry/devices/ttydev/ttydev.go index a287c65ca..29b79b5d6 100644 --- a/pkg/sentry/devices/ttydev/ttydev.go +++ b/pkg/sentry/devices/ttydev/ttydev.go @@ -18,9 +18,9 @@ package ttydev import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) const ( @@ -36,7 +36,7 @@ type ttyDevice struct{} // Open implements vfs.Device.Open. func (ttyDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - return nil, syserror.EIO + return nil, linuxerr.EIO } // Register registers all devices implemented by this package in vfsObj. diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD index 58fe1e77c..4e573d249 100644 --- a/pkg/sentry/fs/BUILD +++ b/pkg/sentry/fs/BUILD @@ -68,7 +68,6 @@ go_library( "//pkg/sentry/usage", "//pkg/state", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go index a8591052c..e48bd4dba 100644 --- a/pkg/sentry/fs/copy_up.go +++ b/pkg/sentry/fs/copy_up.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -195,7 +194,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { attrs, err := next.Inode.overlay.lower.UnstableAttr(ctx) if err != nil { log.Warningf("copy up failed to get lower attributes: %v", err) - return syserror.EIO + return linuxerr.EIO } var childUpperInode *Inode @@ -211,7 +210,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { childFile, err := parentUpper.Create(ctx, root, next.name, FileFlags{Read: true, Write: true}, attrs.Perms) if err != nil { log.Warningf("copy up failed to create file: %v", err) - return syserror.EIO + return linuxerr.EIO } defer childFile.DecRef(ctx) childUpperInode = childFile.Dirent.Inode @@ -219,13 +218,13 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { case Directory: if err := parentUpper.CreateDirectory(ctx, root, next.name, attrs.Perms); err != nil { log.Warningf("copy up failed to create directory: %v", err) - return syserror.EIO + return linuxerr.EIO } childUpper, err := parentUpper.Lookup(ctx, next.name) if err != nil { werr := fmt.Errorf("copy up failed to lookup directory: %v", err) cleanupUpper(ctx, parentUpper, next.name, werr) - return syserror.EIO + return linuxerr.EIO } defer childUpper.DecRef(ctx) childUpperInode = childUpper.Inode @@ -235,17 +234,17 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { link, err := childLower.Readlink(ctx) if err != nil { log.Warningf("copy up failed to read symlink value: %v", err) - return syserror.EIO + return linuxerr.EIO } if err := parentUpper.CreateLink(ctx, root, link, next.name); err != nil { log.Warningf("copy up failed to create symlink: %v", err) - return syserror.EIO + return linuxerr.EIO } childUpper, err := parentUpper.Lookup(ctx, next.name) if err != nil { werr := fmt.Errorf("copy up failed to lookup symlink: %v", err) cleanupUpper(ctx, parentUpper, next.name, werr) - return syserror.EIO + return linuxerr.EIO } defer childUpper.DecRef(ctx) childUpperInode = childUpper.Inode @@ -259,14 +258,14 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { if err := copyAttributesLocked(ctx, childUpperInode, next.Inode.overlay.lower); err != nil { werr := fmt.Errorf("copy up failed to copy up attributes: %v", err) cleanupUpper(ctx, parentUpper, next.name, werr) - return syserror.EIO + return linuxerr.EIO } // Copy the entire file. if err := copyContentsLocked(ctx, childUpperInode, next.Inode.overlay.lower, attrs.Size); err != nil { werr := fmt.Errorf("copy up failed to copy up contents: %v", err) cleanupUpper(ctx, parentUpper, next.name, werr) - return syserror.EIO + return linuxerr.EIO } lowerMappable := next.Inode.overlay.lower.Mappable() @@ -274,7 +273,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { if lowerMappable != nil && upperMappable == nil { werr := fmt.Errorf("copy up failed: cannot ensure memory mapping coherence") cleanupUpper(ctx, parentUpper, next.name, werr) - return syserror.EIO + return linuxerr.EIO } // Propagate memory mappings to the upper Inode. diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD index e28a8961b..7baf26b24 100644 --- a/pkg/sentry/fs/dev/BUILD +++ b/pkg/sentry/fs/dev/BUILD @@ -34,7 +34,6 @@ go_library( "//pkg/sentry/mm", "//pkg/sentry/pgalloc", "//pkg/sentry/socket/netstack", - "//pkg/syserror", "//pkg/tcpip/link/tun", "//pkg/usermem", "//pkg/waiter", diff --git a/pkg/sentry/fs/dev/full.go b/pkg/sentry/fs/dev/full.go index deb9c6ad8..6f0c1fc68 100644 --- a/pkg/sentry/fs/dev/full.go +++ b/pkg/sentry/fs/dev/full.go @@ -17,9 +17,9 @@ package dev import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -77,5 +77,5 @@ var _ fs.FileOperations = (*fullFileOperations)(nil) // Write implements FileOperations.Write. func (*fullFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, syserror.ENOSPC + return 0, linuxerr.ENOSPC } diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index ad8ff227e..d300a32e0 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -28,7 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) type globalDirentMap struct { @@ -963,7 +962,7 @@ func (d *Dirent) isMountPointLocked() bool { func (d *Dirent) mount(ctx context.Context, inode *Inode) (newChild *Dirent, err error) { // Did we race with deletion? if atomic.LoadInt32(&d.deleted) != 0 { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } // Refuse to mount a symlink. @@ -998,7 +997,7 @@ func (d *Dirent) mount(ctx context.Context, inode *Inode) (newChild *Dirent, err func (d *Dirent) unmount(ctx context.Context, replacement *Dirent) error { // Did we race with deletion? if atomic.LoadInt32(&d.deleted) != 0 { - return syserror.ENOENT + return linuxerr.ENOENT } // Remount our former child in its place. diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD index 5c889c861..9f1fe5160 100644 --- a/pkg/sentry/fs/fdpipe/BUILD +++ b/pkg/sentry/fs/fdpipe/BUILD @@ -22,7 +22,6 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", @@ -46,7 +45,6 @@ go_test( "//pkg/hostarch", "//pkg/sentry/contexttest", "//pkg/sentry/fs", - "//pkg/syserror", "//pkg/usermem", "@com_github_google_uuid//:go_default_library", "@org_golang_x_sys//unix:go_default_library", diff --git a/pkg/sentry/fs/fdpipe/pipe.go b/pkg/sentry/fs/fdpipe/pipe.go index f8a29816b..4370cce33 100644 --- a/pkg/sentry/fs/fdpipe/pipe.go +++ b/pkg/sentry/fs/fdpipe/pipe.go @@ -29,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -142,7 +141,7 @@ func (p *pipeOperations) Read(ctx context.Context, file *fs.File, dst usermem.IO n, err := dst.CopyOutFrom(ctx, safemem.FromIOReader{secio.FullReader{p.file}}) total := int64(bufN) + n if err != nil && isBlockError(err) { - return total, syserror.ErrWouldBlock + return total, linuxerr.ErrWouldBlock } return total, err } @@ -151,13 +150,13 @@ func (p *pipeOperations) Read(ctx context.Context, file *fs.File, dst usermem.IO func (p *pipeOperations) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { n, err := src.CopyInTo(ctx, safemem.FromIOWriter{p.file}) if err != nil && isBlockError(err) { - return n, syserror.ErrWouldBlock + return n, linuxerr.ErrWouldBlock } return n, err } // isBlockError unwraps os errors and checks if they are caused by EAGAIN or -// EWOULDBLOCK. This is so they can be transformed into syserror.ErrWouldBlock. +// EWOULDBLOCK. This is so they can be transformed into linuxerr.ErrWouldBlock. func isBlockError(err error) bool { if linuxerr.Equals(linuxerr.EAGAIN, err) || linuxerr.Equals(linuxerr.EWOULDBLOCK, err) { return true diff --git a/pkg/sentry/fs/fdpipe/pipe_opener.go b/pkg/sentry/fs/fdpipe/pipe_opener.go index adda19168..e91e1b5cb 100644 --- a/pkg/sentry/fs/fdpipe/pipe_opener.go +++ b/pkg/sentry/fs/fdpipe/pipe_opener.go @@ -21,9 +21,9 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/syserror" ) // NonBlockingOpener is a generic host file opener used to retry opening host @@ -40,7 +40,7 @@ func Open(ctx context.Context, opener NonBlockingOpener, flags fs.FileFlags) (fs p := &pipeOpenState{} canceled := false for { - if file, err := p.TryOpen(ctx, opener, flags); err != syserror.ErrWouldBlock { + if file, err := p.TryOpen(ctx, opener, flags); err != linuxerr.ErrWouldBlock { return file, err } @@ -51,7 +51,7 @@ func Open(ctx context.Context, opener NonBlockingOpener, flags fs.FileFlags) (fs if p.hostFile != nil { p.hostFile.Close() } - return nil, syserror.ErrInterrupted + return nil, linuxerr.ErrInterrupted } cancel := ctx.SleepStart() @@ -106,13 +106,13 @@ func (p *pipeOpenState) TryOpen(ctx context.Context, opener NonBlockingOpener, f } return newPipeOperations(ctx, opener, flags, f, nil) - // Handle opening O_WRONLY blocking: convert ENXIO to syserror.ErrWouldBlock. + // Handle opening O_WRONLY blocking: convert ENXIO to linuxerr.ErrWouldBlock. // See TryOpenWriteOnly for more details. case flags.Write: return p.TryOpenWriteOnly(ctx, opener) default: - // Handle opening O_RDONLY blocking: convert EOF from read to syserror.ErrWouldBlock. + // Handle opening O_RDONLY blocking: convert EOF from read to linuxerr.ErrWouldBlock. // See TryOpenReadOnly for more details. return p.TryOpenReadOnly(ctx, opener) } @@ -120,7 +120,7 @@ func (p *pipeOpenState) TryOpen(ctx context.Context, opener NonBlockingOpener, f // TryOpenReadOnly tries to open a host pipe read only but only returns a fs.File when // there is a coordinating writer. Call TryOpenReadOnly repeatedly on the same pipeOpenState -// until syserror.ErrWouldBlock is no longer returned. +// until linuxerr.ErrWouldBlock is no longer returned. // // How it works: // @@ -150,7 +150,7 @@ func (p *pipeOpenState) TryOpenReadOnly(ctx context.Context, opener NonBlockingO if n == 0 { // EOF means that we're not ready yet. if rerr == nil || rerr == io.EOF { - return nil, syserror.ErrWouldBlock + return nil, linuxerr.ErrWouldBlock } // Any error that is not EWOULDBLOCK also means we're not // ready yet, and probably never will be ready. In this @@ -175,16 +175,16 @@ func (p *pipeOpenState) TryOpenReadOnly(ctx context.Context, opener NonBlockingO // TryOpenWriteOnly tries to open a host pipe write only but only returns a fs.File when // there is a coordinating reader. Call TryOpenWriteOnly repeatedly on the same pipeOpenState -// until syserror.ErrWouldBlock is no longer returned. +// until linuxerr.ErrWouldBlock is no longer returned. // // How it works: // // Opening a pipe write only will return ENXIO until readers are available. Converts the ENXIO -// to an syserror.ErrWouldBlock, to tell callers to retry. +// to an linuxerr.ErrWouldBlock, to tell callers to retry. func (*pipeOpenState) TryOpenWriteOnly(ctx context.Context, opener NonBlockingOpener) (*pipeOperations, error) { hostFile, err := opener.NonBlockingOpen(ctx, fs.PermMask{Write: true}) if unwrapError(err) == unix.ENXIO { - return nil, syserror.ErrWouldBlock + return nil, linuxerr.ErrWouldBlock } if err != nil { return nil, err diff --git a/pkg/sentry/fs/fdpipe/pipe_opener_test.go b/pkg/sentry/fs/fdpipe/pipe_opener_test.go index 89d8be741..e1587288e 100644 --- a/pkg/sentry/fs/fdpipe/pipe_opener_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_opener_test.go @@ -30,7 +30,6 @@ import ( "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -146,18 +145,18 @@ func TestTryOpen(t *testing.T) { err: unix.ENOENT, }, { - desc: "Blocking Write only returns with syserror.ErrWouldBlock", + desc: "Blocking Write only returns with linuxerr.ErrWouldBlock", makePipe: true, flags: fs.FileFlags{Write: true}, expectFile: false, - err: syserror.ErrWouldBlock, + err: linuxerr.ErrWouldBlock, }, { - desc: "Blocking Read only returns with syserror.ErrWouldBlock", + desc: "Blocking Read only returns with linuxerr.ErrWouldBlock", makePipe: true, flags: fs.FileFlags{Read: true}, expectFile: false, - err: syserror.ErrWouldBlock, + err: linuxerr.ErrWouldBlock, }, } { name := pipename() @@ -316,7 +315,7 @@ func TestCopiedReadAheadBuffer(t *testing.T) { // another writer comes along. This means we can open the same pipe write only // with no problems + write to it, given that opener.Open already tried to open // the pipe RDONLY and succeeded, which we know happened if TryOpen returns - // syserror.ErrwouldBlock. + // linuxerr.ErrwouldBlock. // // This simulates the open(RDONLY) <-> open(WRONLY)+write race we care about, but // does not cause our test to be racy (which would be terrible). @@ -328,8 +327,8 @@ func TestCopiedReadAheadBuffer(t *testing.T) { pipeOps.Release(ctx) t.Fatalf("open(%s, %o) got file, want nil", name, unix.O_RDONLY) } - if err != syserror.ErrWouldBlock { - t.Fatalf("open(%s, %o) got error %v, want %v", name, unix.O_RDONLY, err, syserror.ErrWouldBlock) + if err != linuxerr.ErrWouldBlock { + t.Fatalf("open(%s, %o) got error %v, want %v", name, unix.O_RDONLY, err, linuxerr.ErrWouldBlock) } // Then open the same pipe write only and write some bytes to it. The next diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go index 4c8905a7e..63900e766 100644 --- a/pkg/sentry/fs/fdpipe/pipe_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_test.go @@ -28,7 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -238,7 +237,7 @@ func TestPipeRequest(t *testing.T) { context: &Readv{Dst: usermem.BytesIOSequence(make([]byte, 10))}, flags: fs.FileFlags{Read: true}, keepOpenPartner: true, - err: syserror.ErrWouldBlock, + err: linuxerr.ErrWouldBlock, }, { desc: "Writev on pipe from empty buffer returns nil", @@ -410,8 +409,8 @@ func TestPipeReadsAccumulate(t *testing.T) { n, err := p.Read(ctx, file, iov, 0) total := n iov = iov.DropFirst64(n) - if err != syserror.ErrWouldBlock { - t.Fatalf("Readv got error %v, want %v", err, syserror.ErrWouldBlock) + if err != linuxerr.ErrWouldBlock { + t.Fatalf("Readv got error %v, want %v", err, linuxerr.ErrWouldBlock) } // Write a few more bytes to allow us to read more/accumulate. @@ -479,8 +478,8 @@ func TestPipeWritesAccumulate(t *testing.T) { } iov := usermem.BytesIOSequence(writeBuffer) n, err := p.Write(ctx, file, iov, 0) - if err != syserror.ErrWouldBlock { - t.Fatalf("Writev got error %v, want %v", err, syserror.ErrWouldBlock) + if err != linuxerr.ErrWouldBlock { + t.Fatalf("Writev got error %v, want %v", err, linuxerr.ErrWouldBlock) } if n != int64(pipeSize) { t.Fatalf("Writev partial write, got: %v, want %v", n, pipeSize) diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index 57f904801..df04f044d 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/amutex" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsmetric" @@ -27,7 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -195,10 +195,10 @@ func (f *File) EventUnregister(e *waiter.Entry) { // offset to the value returned by f.FileOperations.Seek if the operation // is successful. // -// Returns syserror.ErrInterrupted if seeking was interrupted. +// Returns linuxerr.ErrInterrupted if seeking was interrupted. func (f *File) Seek(ctx context.Context, whence SeekWhence, offset int64) (int64, error) { if !f.mu.Lock(ctx) { - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } defer f.mu.Unlock() @@ -217,10 +217,10 @@ func (f *File) Seek(ctx context.Context, whence SeekWhence, offset int64) (int64 // Readdir unconditionally updates the access time on the File's Inode, // see fs/readdir.c:iterate_dir. // -// Returns syserror.ErrInterrupted if reading was interrupted. +// Returns linuxerr.ErrInterrupted if reading was interrupted. func (f *File) Readdir(ctx context.Context, serializer DentrySerializer) error { if !f.mu.Lock(ctx) { - return syserror.ErrInterrupted + return linuxerr.ErrInterrupted } defer f.mu.Unlock() @@ -232,13 +232,13 @@ func (f *File) Readdir(ctx context.Context, serializer DentrySerializer) error { // Readv calls f.FileOperations.Read with f as the File, advancing the file // offset if f.FileOperations.Read returns bytes read > 0. // -// Returns syserror.ErrInterrupted if reading was interrupted. +// Returns linuxerr.ErrInterrupted if reading was interrupted. func (f *File) Readv(ctx context.Context, dst usermem.IOSequence) (int64, error) { start := fsmetric.StartReadWait() defer fsmetric.FinishReadWait(fsmetric.ReadWait, start) if !f.mu.Lock(ctx) { - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } fsmetric.Reads.Increment() @@ -260,7 +260,7 @@ func (f *File) Preadv(ctx context.Context, dst usermem.IOSequence, offset int64) defer fsmetric.FinishReadWait(fsmetric.ReadWait, start) if !f.mu.Lock(ctx) { - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } fsmetric.Reads.Increment() @@ -276,10 +276,10 @@ func (f *File) Preadv(ctx context.Context, dst usermem.IOSequence, offset int64) // unavoidably racy for network file systems. Writev also truncates src // to avoid overrunning the current file size limit if necessary. // -// Returns syserror.ErrInterrupted if writing was interrupted. +// Returns linuxerr.ErrInterrupted if writing was interrupted. func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error) { if !f.mu.Lock(ctx) { - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append) // Handle append mode. @@ -297,7 +297,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error case ok && limit == 0: unlockAppendMu() f.mu.Unlock() - return 0, syserror.ErrExceedsFileSizeLimit + return 0, linuxerr.ErrExceedsFileSizeLimit case ok: src = src.TakeFirst64(limit) } @@ -335,7 +335,7 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64 limit, ok := f.checkLimit(ctx, offset) switch { case ok && limit == 0: - return 0, syserror.ErrExceedsFileSizeLimit + return 0, linuxerr.ErrExceedsFileSizeLimit case ok: src = src.TakeFirst64(limit) } @@ -352,7 +352,7 @@ func (f *File) offsetForAppend(ctx context.Context, offset *int64) error { if err != nil { // This is an odd error, we treat it as evidence that // something is terribly wrong with the filesystem. - return syserror.EIO + return linuxerr.EIO } // Update the offset. @@ -381,10 +381,10 @@ func (f *File) checkLimit(ctx context.Context, offset int64) (int64, bool) { // Fsync calls f.FileOperations.Fsync with f as the File. // -// Returns syserror.ErrInterrupted if syncing was interrupted. +// Returns linuxerr.ErrInterrupted if syncing was interrupted. func (f *File) Fsync(ctx context.Context, start int64, end int64, syncType SyncType) error { if !f.mu.Lock(ctx) { - return syserror.ErrInterrupted + return linuxerr.ErrInterrupted } defer f.mu.Unlock() @@ -393,10 +393,10 @@ func (f *File) Fsync(ctx context.Context, start int64, end int64, syncType SyncT // Flush calls f.FileOperations.Flush with f as the File. // -// Returns syserror.ErrInterrupted if syncing was interrupted. +// Returns linuxerr.ErrInterrupted if syncing was interrupted. func (f *File) Flush(ctx context.Context) error { if !f.mu.Lock(ctx) { - return syserror.ErrInterrupted + return linuxerr.ErrInterrupted } defer f.mu.Unlock() @@ -405,10 +405,10 @@ func (f *File) Flush(ctx context.Context) error { // ConfigureMMap calls f.FileOperations.ConfigureMMap with f as the File. // -// Returns syserror.ErrInterrupted if interrupted. +// Returns linuxerr.ErrInterrupted if interrupted. func (f *File) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { if !f.mu.Lock(ctx) { - return syserror.ErrInterrupted + return linuxerr.ErrInterrupted } defer f.mu.Unlock() @@ -417,10 +417,10 @@ func (f *File) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { // UnstableAttr calls f.FileOperations.UnstableAttr with f as the File. // -// Returns syserror.ErrInterrupted if interrupted. +// Returns linuxerr.ErrInterrupted if interrupted. func (f *File) UnstableAttr(ctx context.Context) (UnstableAttr, error) { if !f.mu.Lock(ctx) { - return UnstableAttr{}, syserror.ErrInterrupted + return UnstableAttr{}, linuxerr.ErrInterrupted } defer f.mu.Unlock() @@ -495,7 +495,7 @@ type lockedReader struct { // Read implements io.Reader.Read. func (r *lockedReader) Read(buf []byte) (int, error) { if r.Ctx.Interrupted() { - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } n, err := r.File.FileOperations.Read(r.Ctx, r.File, usermem.BytesIOSequence(buf), r.Offset) r.Offset += n @@ -505,7 +505,7 @@ func (r *lockedReader) Read(buf []byte) (int, error) { // ReadAt implements io.Reader.ReadAt. func (r *lockedReader) ReadAt(buf []byte, offset int64) (int, error) { if r.Ctx.Interrupted() { - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } n, err := r.File.FileOperations.Read(r.Ctx, r.File, usermem.BytesIOSequence(buf), offset) return int(n), err @@ -530,7 +530,7 @@ type lockedWriter struct { // Write implements io.Writer.Write. func (w *lockedWriter) Write(buf []byte) (int, error) { if w.Ctx.Interrupted() { - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } n, err := w.WriteAt(buf, w.Offset) w.Offset += int64(n) @@ -549,7 +549,7 @@ func (w *lockedWriter) WriteAt(buf []byte, offset int64) (int, error) { // contract. Enforce that here. for written < len(buf) { if w.Ctx.Interrupted() { - return written, syserror.ErrInterrupted + return written, linuxerr.ErrInterrupted } var n int64 n, err = w.File.FileOperations.Write(w.Ctx, w.File, usermem.BytesIOSequence(buf[written:]), offset+int64(written)) diff --git a/pkg/sentry/fs/file_operations.go b/pkg/sentry/fs/file_operations.go index 6ec721022..ce47c3907 100644 --- a/pkg/sentry/fs/file_operations.go +++ b/pkg/sentry/fs/file_operations.go @@ -120,7 +120,7 @@ type FileOperations interface { // Files with !FileFlags.Pwrite. // // If only part of src could be written, Write must return an error - // indicating why (e.g. syserror.ErrWouldBlock). + // indicating why (e.g. linuxerr.ErrWouldBlock). // // Write does not check permissions nor flags. // diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go index 06c07c807..031cd33ce 100644 --- a/pkg/sentry/fs/file_overlay.go +++ b/pkg/sentry/fs/file_overlay.go @@ -23,7 +23,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -246,7 +245,7 @@ func (f *overlayFileOperations) onTop(ctx context.Context, file *File, fn func(* // Something very wrong; return a generic filesystem // error to avoid propagating internals. f.upperMu.Unlock() - return syserror.EIO + return linuxerr.EIO } // Save upper file. diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD index 6bf2d51cb..1a59800ea 100644 --- a/pkg/sentry/fs/fsutil/BUILD +++ b/pkg/sentry/fs/fsutil/BUILD @@ -90,7 +90,6 @@ go_library( "//pkg/sentry/usage", "//pkg/state", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", diff --git a/pkg/sentry/fs/fsutil/file.go b/pkg/sentry/fs/fsutil/file.go index 00b3bb29b..3ece73b81 100644 --- a/pkg/sentry/fs/fsutil/file.go +++ b/pkg/sentry/fs/fsutil/file.go @@ -22,7 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -232,12 +231,12 @@ type FileNoSplice struct{} // WriteTo implements fs.FileOperations.WriteTo. func (FileNoSplice) WriteTo(context.Context, *fs.File, io.Writer, int64, bool) (int64, error) { - return 0, syserror.ENOSYS + return 0, linuxerr.ENOSYS } // ReadFrom implements fs.FileOperations.ReadFrom. func (FileNoSplice) ReadFrom(context.Context, *fs.File, io.Reader, int64) (int64, error) { - return 0, syserror.ENOSYS + return 0, linuxerr.ENOSYS } // DirFileOperations implements most of fs.FileOperations for directories, @@ -255,12 +254,12 @@ type DirFileOperations struct { // Read implements fs.FileOperations.Read func (*DirFileOperations) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } // Write implements fs.FileOperations.Write. func (*DirFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } // StaticDirFileOperations implements fs.FileOperations for directories with diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go index 7c2de04c1..06a994193 100644 --- a/pkg/sentry/fs/fsutil/inode.go +++ b/pkg/sentry/fs/fsutil/inode.go @@ -23,7 +23,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -167,7 +166,7 @@ func (i *InodeSimpleAttributes) DropLink() { // StatFS implements fs.InodeOperations.StatFS. func (i *InodeSimpleAttributes) StatFS(context.Context) (fs.Info, error) { if i.fsType == 0 { - return fs.Info{}, syserror.ENOSYS + return fs.Info{}, linuxerr.ENOSYS } return fs.Info{Type: i.fsType}, nil } @@ -294,7 +293,7 @@ type InodeNoStatFS struct{} // StatFS implements fs.InodeOperations.StatFS. func (InodeNoStatFS) StatFS(context.Context) (fs.Info, error) { - return fs.Info{}, syserror.ENOSYS + return fs.Info{}, linuxerr.ENOSYS } // InodeStaticFileGetter implements GetFile for a file with static contents. @@ -401,7 +400,7 @@ type InodeIsDirTruncate struct{} // Truncate implements fs.InodeOperations.Truncate. func (InodeIsDirTruncate) Truncate(context.Context, *fs.Inode, int64) error { - return syserror.EISDIR + return linuxerr.EISDIR } // InodeNoopTruncate implements fs.InodeOperations.Truncate as a noop. @@ -425,7 +424,7 @@ type InodeNotOpenable struct{} // GetFile implements fs.InodeOperations.GetFile. func (InodeNotOpenable) GetFile(context.Context, *fs.Dirent, fs.FileFlags) (*fs.File, error) { - return nil, syserror.EIO + return nil, linuxerr.EIO } // InodeNotVirtual can be used by Inodes that are not virtual. @@ -529,5 +528,5 @@ type InodeIsDirAllocate struct{} // Allocate implements fs.InodeOperations.Allocate. func (InodeIsDirAllocate) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error { - return syserror.EISDIR + return linuxerr.EISDIR } diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD index c08301d19..ee2f287d9 100644 --- a/pkg/sentry/fs/gofer/BUILD +++ b/pkg/sentry/fs/gofer/BUILD @@ -26,6 +26,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors", "//pkg/errors/linuxerr", "//pkg/fd", "//pkg/hostarch", @@ -48,7 +49,6 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sync", "//pkg/syserr", - "//pkg/syserror", "//pkg/unet", "//pkg/usermem", "//pkg/waiter", @@ -63,10 +63,10 @@ go_test( library = ":gofer", deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/p9", "//pkg/p9/p9test", "//pkg/sentry/contexttest", "//pkg/sentry/fs", - "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/fs/gofer/file.go b/pkg/sentry/fs/gofer/file.go index 73d80d9b5..62a517cd7 100644 --- a/pkg/sentry/fs/gofer/file.go +++ b/pkg/sentry/fs/gofer/file.go @@ -20,6 +20,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/p9" @@ -28,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fsmetric" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -226,7 +226,7 @@ func (f *fileOperations) maybeSync(ctx context.Context, file *fs.File, offset, n func (f *fileOperations) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { if fs.IsDir(file.Dirent.Inode.StableAttr) { // Not all remote file systems enforce this so this client does. - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } var ( @@ -294,7 +294,7 @@ func (f *fileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IO if fs.IsDir(file.Dirent.Inode.StableAttr) { // Not all remote file systems enforce this so this client does. f.incrementReadCounters(start) - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } if f.inodeOperations.session().cachePolicy.useCachingInodeOps(file.Dirent.Inode) { diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go index 546ee7d04..4924debeb 100644 --- a/pkg/sentry/fs/gofer/gofer_test.go +++ b/pkg/sentry/fs/gofer/gofer_test.go @@ -19,8 +19,8 @@ import ( "testing" "time" - "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/p9/p9test" "gvisor.dev/gvisor/pkg/sentry/contexttest" @@ -97,7 +97,7 @@ func TestLookup(t *testing.T) { }, { name: "mock Walk fails (function fails)", - want: unix.ENOENT, + want: linuxerr.ENOENT, }, } @@ -123,7 +123,7 @@ func TestLookup(t *testing.T) { var newInodeOperations fs.InodeOperations if dirent != nil { if dirent.IsNegative() { - err = unix.ENOENT + err = linuxerr.ENOENT } else { newInodeOperations = dirent.Inode.InodeOperations } @@ -131,9 +131,11 @@ func TestLookup(t *testing.T) { // Check return values. if err != test.want { + t.Logf("err: %v %T", err, err) t.Errorf("Lookup got err %v, want %v", err, test.want) } if err == nil && newInodeOperations == nil { + t.Logf("err: %v %T", err, err) t.Errorf("Lookup got non-nil err and non-nil node, wanted at least one non-nil") } }) diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index 9ff64a8b6..c3856094f 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -20,6 +20,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + gErr "gvisor.dev/gvisor/pkg/errors" "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" @@ -32,7 +33,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/host" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // inodeOperations implements fs.InodeOperations. @@ -719,12 +719,12 @@ func (i *inodeOperations) configureMMap(file *fs.File, opts *memmap.MMapOpts) er } func init() { - syserror.AddErrorUnwrapper(func(err error) (unix.Errno, bool) { + linuxerr.AddErrorUnwrapper(func(err error) (*gErr.Error, bool) { if _, ok := err.(p9.ErrSocket); ok { // Treat as an I/O error. - return unix.EIO, true + return linuxerr.EIO, true } - return 0, false + return nil, false }) } diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go index 88d83060c..2f8769f1e 100644 --- a/pkg/sentry/fs/gofer/path.go +++ b/pkg/sentry/fs/gofer/path.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/syserror" ) // maxFilenameLen is the maximum length of a filename. This is dictated by 9P's @@ -60,7 +59,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string if cp.cacheNegativeDirents() { return fs.NewNegativeDirent(name), nil } - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } i.readdirMu.Unlock() } @@ -74,7 +73,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string // is created over it. return fs.NewNegativeDirent(name), nil } - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } return nil, err } @@ -169,7 +168,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string hostFile.Close() } unopened.close(ctx) - return nil, syserror.EIO + return nil, linuxerr.EIO } qid := qids[0] diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index 24fc6305c..921612e9c 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -52,7 +52,6 @@ go_library( "//pkg/sentry/uniqueid", "//pkg/sync", "//pkg/syserr", - "//pkg/syserror", "//pkg/tcpip", "//pkg/unet", "//pkg/usermem", diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go index 77c08a7ce..1d0d95634 100644 --- a/pkg/sentry/fs/host/file.go +++ b/pkg/sentry/fs/host/file.go @@ -28,7 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -201,7 +200,7 @@ func (f *fileOperations) Write(ctx context.Context, file *fs.File, src usermem.I writer := fd.NewReadWriter(f.iops.fileState.FD()) n, err := src.CopyInTo(ctx, safemem.FromIOWriter{writer}) if isBlockError(err) { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } return n, err } @@ -232,7 +231,7 @@ func (f *fileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IO if n != 0 { err = nil } else { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } } return n, err diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go index 5f6af2067..92d58e3e9 100644 --- a/pkg/sentry/fs/host/inode.go +++ b/pkg/sentry/fs/host/inode.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -220,7 +219,7 @@ func (i *inodeOperations) Release(context.Context) { // Lookup implements fs.InodeOperations.Lookup. func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } // Create implements fs.InodeOperations.Create. @@ -400,7 +399,7 @@ func (i *inodeOperations) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error // StatFS implements fs.InodeOperations.StatFS. func (i *inodeOperations) StatFS(context.Context) (fs.Info, error) { - return fs.Info{}, syserror.ENOSYS + return fs.Info{}, linuxerr.ENOSYS } // AddLink implements fs.InodeOperations.AddLink. diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index 6f38b25c3..4e561c5ed 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -327,7 +326,7 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e // If the signal is SIGTTIN, then we are attempting to read // from the TTY. Don't send the signal and return EIO. if sig == linux.SIGTTIN { - return syserror.EIO + return linuxerr.EIO } // Otherwise, we are writing or changing terminal state. This is allowed. @@ -336,7 +335,7 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e // If the process group is an orphan, return EIO. if pg.IsOrphan() { - return syserror.EIO + return linuxerr.EIO } // Otherwise, send the signal to the process group and return ERESTARTSYS. @@ -349,7 +348,7 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e // // Linux ignores the result of kill_pgrp(). _ = pg.SendSignal(kernel.SignalInfoPriv(sig)) - return syserror.ERESTARTSYS + return linuxerr.ERESTARTSYS } // LINT.ThenChange(../../fsimpl/host/tty.go) diff --git a/pkg/sentry/fs/host/util.go b/pkg/sentry/fs/host/util.go index e7db79189..f2a33cc14 100644 --- a/pkg/sentry/fs/host/util.go +++ b/pkg/sentry/fs/host/util.go @@ -96,7 +96,7 @@ type dirInfo struct { // LINT.IfChange // isBlockError unwraps os errors and checks if they are caused by EAGAIN or -// EWOULDBLOCK. This is so they can be transformed into syserror.ErrWouldBlock. +// EWOULDBLOCK. This is so they can be transformed into linuxerr.ErrWouldBlock. func isBlockError(err error) bool { if linuxerr.Equals(linuxerr.EAGAIN, err) || linuxerr.Equals(linuxerr.EWOULDBLOCK, err) { return true diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index ec204e5cf..2c6b9e9db 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // Inode is a file system object that can be simultaneously referenced by different @@ -357,7 +356,7 @@ func (i *Inode) SetTimestamps(ctx context.Context, d *Dirent, ts TimeSpec) error // Truncate calls i.InodeOperations.Truncate with i as the Inode. func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error { if IsDir(i.StableAttr) { - return syserror.EISDIR + return linuxerr.EISDIR } if i.overlay != nil { diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go index 98e9fb2b1..0f8022906 100644 --- a/pkg/sentry/fs/inode_operations.go +++ b/pkg/sentry/fs/inode_operations.go @@ -66,7 +66,7 @@ type InodeOperations interface { // // * A nil Dirent and a non-nil error. If the reason that Lookup failed // was because the name does not exist under Inode, then must return - // syserror.ENOENT. + // linuxerr.ENOENT. // // * If name does not exist under dir and the file system wishes this // fact to be cached, a non-nil Dirent containing a nil Inode and a diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index c47b9ce58..21ad7fa69 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -22,7 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/syserror" ) func overlayHasWhiteout(ctx context.Context, parent *Inode, name string) bool { @@ -103,7 +102,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name // Upper fs is not OK with a negative Dirent // being cached in the Dirent tree, so don't // return one. - return nil, false, syserror.ENOENT + return nil, false, linuxerr.ENOENT } entry, err := newOverlayEntry(ctx, upperInode, nil, false) if err != nil { @@ -165,7 +164,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name if negativeUpperChild { return NewNegativeDirent(name), false, nil } - return nil, false, syserror.ENOENT + return nil, false, linuxerr.ENOENT } // Did we find a lower Inode? Remember this because we may decide we don't diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go index ee28b0f99..51cd6cd37 100644 --- a/pkg/sentry/fs/inotify.go +++ b/pkg/sentry/fs/inotify.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -141,7 +140,7 @@ func (i *Inotify) Read(ctx context.Context, _ *File, dst usermem.IOSequence, _ i if i.events.Empty() { // Nothing to read yet, tell caller to block. - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } var writeLen int64 @@ -179,7 +178,7 @@ func (i *Inotify) Read(ctx context.Context, _ *File, dst usermem.IOSequence, _ i // WriteTo implements FileOperations.WriteTo. func (*Inotify) WriteTo(context.Context, *File, io.Writer, int64, bool) (int64, error) { - return 0, syserror.ENOSYS + return 0, linuxerr.ENOSYS } // Fsync implements FileOperations.Fsync. @@ -189,7 +188,7 @@ func (*Inotify) Fsync(context.Context, *File, int64, int64, SyncType) error { // ReadFrom implements FileOperations.ReadFrom. func (*Inotify) ReadFrom(context.Context, *File, io.Reader, int64) (int64, error) { - return 0, syserror.ENOSYS + return 0, linuxerr.ENOSYS } // Flush implements FileOperations.Flush. diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index e6d74b949..bc75ae505 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -50,7 +50,6 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", "//pkg/sync", - "//pkg/syserror", "//pkg/tcpip/header", "//pkg/tcpip/network/ipv4", "//pkg/usermem", diff --git a/pkg/sentry/fs/proc/fds.go b/pkg/sentry/fs/proc/fds.go index e90da225a..e68bb46c0 100644 --- a/pkg/sentry/fs/proc/fds.go +++ b/pkg/sentry/fs/proc/fds.go @@ -20,12 +20,12 @@ import ( "strconv" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // LINT.IfChange @@ -37,7 +37,7 @@ func walkDescriptors(t *kernel.Task, p string, toInode func(*fs.File, kernel.FDF n, err := strconv.ParseUint(p, 10, 64) if err != nil { // Not found. - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } var file *fs.File @@ -48,7 +48,7 @@ func walkDescriptors(t *kernel.Task, p string, toInode func(*fs.File, kernel.FDF } }) if file == nil { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } return toInode(file, fdFlags), nil } diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go index 546b57287..dc8211871 100644 --- a/pkg/sentry/fs/proc/proc.go +++ b/pkg/sentry/fs/proc/proc.go @@ -28,7 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // LINT.IfChange @@ -125,7 +124,7 @@ func (s *self) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { if t := kernel.TaskFromContext(ctx); t != nil { tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup()) if tgid == 0 { - return "", syserror.ENOENT + return "", linuxerr.ENOENT } return strconv.FormatUint(uint64(tgid), 10), nil } @@ -149,7 +148,7 @@ func (s *threadSelf) Readlink(ctx context.Context, inode *fs.Inode) (string, err tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup()) tid := s.pidns.IDOfTask(t) if tid == 0 || tgid == 0 { - return "", syserror.ENOENT + return "", linuxerr.ENOENT } return fmt.Sprintf("%d/task/%d", tgid, tid), nil } diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index edd62b857..89a799b21 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -35,7 +35,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -182,7 +181,7 @@ func (f *subtasksFile) Readdir(ctx context.Context, file *fs.File, ser fs.Dentry tasks := f.t.ThreadGroup().MemberIDs(f.pidns) if len(tasks) == 0 { - return offset, syserror.ENOENT + return offset, linuxerr.ENOENT } if offset == 0 { @@ -234,15 +233,15 @@ var _ fs.FileOperations = (*subtasksFile)(nil) func (s *subtasks) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dirent, error) { tid, err := strconv.ParseUint(p, 10, 32) if err != nil { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } task := s.p.pidns.TaskWithID(kernel.ThreadID(tid)) if task == nil { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } if task.ThreadGroup() != s.t.ThreadGroup() { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } td := s.p.newTaskDir(ctx, task, dir.MountSource, false) @@ -479,7 +478,7 @@ func (m *memDataFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequen return int64(n), nil } if readErr != nil { - return 0, syserror.EIO + return 0, linuxerr.EIO } return 0, nil } diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD index b46567cf8..bfff010c5 100644 --- a/pkg/sentry/fs/ramfs/BUILD +++ b/pkg/sentry/fs/ramfs/BUILD @@ -21,7 +21,6 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/socket/unix/transport", "//pkg/sync", - "//pkg/syserror", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go index 33023af77..b1fadee7a 100644 --- a/pkg/sentry/fs/ramfs/dir.go +++ b/pkg/sentry/fs/ramfs/dir.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // CreateOps represents operations to create different file types. @@ -284,9 +283,9 @@ func (d *Dir) walkLocked(ctx context.Context, p string) (*fs.Inode, error) { return inode, nil } - // fs.InodeOperations.Lookup returns syserror.ENOENT if p + // fs.InodeOperations.Lookup returns linuxerr.ENOENT if p // does not exist. - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } // createInodeOperationsCommon creates a new child node at this dir by calling diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go index fff4befb2..266140f6f 100644 --- a/pkg/sentry/fs/splice.go +++ b/pkg/sentry/fs/splice.go @@ -20,7 +20,6 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/syserror" ) // Splice moves data to this file, directly from another. @@ -55,26 +54,26 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, case dst.UniqueID < src.UniqueID: // Acquire dst first. if !dst.mu.Lock(ctx) { - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } if !src.mu.Lock(ctx) { dst.mu.Unlock() - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } case dst.UniqueID > src.UniqueID: // Acquire src first. if !src.mu.Lock(ctx) { - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } if !dst.mu.Lock(ctx) { src.mu.Unlock() - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } case dst.UniqueID == src.UniqueID: // Acquire only one lock; it's the same file. This is a // bit of a edge case, but presumably it's possible. if !dst.mu.Lock(ctx) { - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } srcLock = false // Only need one unlock. } @@ -84,13 +83,13 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, case dstLock: // Acquire only dst. if !dst.mu.Lock(ctx) { - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } opts.DstStart = dst.offset // Safe: locked. case srcLock: // Acquire only src. if !src.mu.Lock(ctx) { - return 0, syserror.ErrInterrupted + return 0, linuxerr.ErrInterrupted } opts.SrcStart = src.offset // Safe: locked. } @@ -108,7 +107,7 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, limit, ok := dst.checkLimit(ctx, opts.DstStart) switch { case ok && limit == 0: - err = syserror.ErrExceedsFileSizeLimit + err = linuxerr.ErrExceedsFileSizeLimit case ok && limit < opts.Length: opts.Length = limit // Cap the write. } diff --git a/pkg/sentry/fs/timerfd/BUILD b/pkg/sentry/fs/timerfd/BUILD index 0148b33cf..e61115932 100644 --- a/pkg/sentry/fs/timerfd/BUILD +++ b/pkg/sentry/fs/timerfd/BUILD @@ -14,7 +14,6 @@ go_library( "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel/time", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/timerfd/timerfd.go b/pkg/sentry/fs/timerfd/timerfd.go index 093a14c1f..1c8518d71 100644 --- a/pkg/sentry/fs/timerfd/timerfd.go +++ b/pkg/sentry/fs/timerfd/timerfd.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -134,7 +133,7 @@ func (t *TimerOperations) Read(ctx context.Context, file *fs.File, dst usermem.I } return sizeofUint64, nil } - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } // Write implements fs.FileOperations.Write. diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD index 5933cb67b..9e9dc06f3 100644 --- a/pkg/sentry/fs/tty/BUILD +++ b/pkg/sentry/fs/tty/BUILD @@ -31,7 +31,6 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/unimpl", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go index 3242dcb6a..5716e2ee9 100644 --- a/pkg/sentry/fs/tty/dir.go +++ b/pkg/sentry/fs/tty/dir.go @@ -29,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -155,12 +154,12 @@ func (d *dirInodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name str n, err := strconv.ParseUint(name, 10, 32) if err != nil { // Not found. - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } s, ok := d.replicas[uint32(n)] if !ok { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } s.IncRef() @@ -235,7 +234,7 @@ func (d *dirInodeOperations) allocateTerminal(ctx context.Context) (*Terminal, e n := d.next if n == math.MaxUint32 { - return nil, syserror.ENOMEM + return nil, linuxerr.ENOMEM } if _, ok := d.replicas[n]; ok { @@ -335,10 +334,10 @@ func (df *dirFileOperations) Readdir(ctx context.Context, file *fs.File, seriali // Read implements FileOperations.Read func (df *dirFileOperations) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } // Write implements FileOperations.Write. func (df *dirFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go index 3ba02c218..f9fca6d8e 100644 --- a/pkg/sentry/fs/tty/line_discipline.go +++ b/pkg/sentry/fs/tty/line_discipline.go @@ -20,10 +20,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -193,7 +193,7 @@ func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSeque } return n, nil } - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) { @@ -207,7 +207,7 @@ func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequ l.replicaWaiter.Notify(waiter.ReadableEvents) return n, nil } - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } func (l *lineDiscipline) outputQueueReadSize(t *kernel.Task, args arch.SyscallArguments) error { @@ -228,7 +228,7 @@ func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequ } return n, nil } - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } func (l *lineDiscipline) outputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) { @@ -242,7 +242,7 @@ func (l *lineDiscipline) outputQueueWrite(ctx context.Context, src usermem.IOSeq l.masterWaiter.Notify(waiter.ReadableEvents) return n, nil } - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } // transformer is a helper interface to make it easier to stateify queue. diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go index 11d6c15d0..25d3c887e 100644 --- a/pkg/sentry/fs/tty/queue.go +++ b/pkg/sentry/fs/tty/queue.go @@ -17,12 +17,12 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -110,7 +110,7 @@ func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipl defer q.mu.Unlock() if !q.readable { - return 0, false, syserror.ErrWouldBlock + return 0, false, linuxerr.ErrWouldBlock } if dst.NumBytes() > canonMaxBytes { @@ -155,7 +155,7 @@ func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscip room := waitBufMaxBytes - q.waitBufLen // If out of room, return EAGAIN. if room == 0 && copyLen > 0 { - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } // Cap the size of the wait buffer. if copyLen > room { diff --git a/pkg/sentry/fs/user/BUILD b/pkg/sentry/fs/user/BUILD index 4acc73ee0..23b5508fd 100644 --- a/pkg/sentry/fs/user/BUILD +++ b/pkg/sentry/fs/user/BUILD @@ -19,7 +19,6 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/fs/user/path.go b/pkg/sentry/fs/user/path.go index f6eaab2bd..67a9adfd7 100644 --- a/pkg/sentry/fs/user/path.go +++ b/pkg/sentry/fs/user/path.go @@ -28,7 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // ResolveExecutablePath resolves the given executable name given the working @@ -81,7 +80,7 @@ func resolve(ctx context.Context, mns *fs.MountNamespace, paths []string, name s root := fs.RootFromContext(ctx) if root == nil { // Caller has no root. Don't bother traversing anything. - return "", syserror.ENOENT + return "", linuxerr.ENOENT } defer root.DecRef(ctx) for _, p := range paths { @@ -117,7 +116,7 @@ func resolve(ctx context.Context, mns *fs.MountNamespace, paths []string, name s } // Couldn't find it. - return "", syserror.ENOENT + return "", linuxerr.ENOENT } func resolveVFS2(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, paths []string, name string) (string, error) { @@ -156,7 +155,7 @@ func resolveVFS2(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNam } // Couldn't find it. - return "", syserror.ENOENT + return "", linuxerr.ENOENT } // getPath returns the PATH as a slice of strings given the environment diff --git a/pkg/sentry/fsimpl/cgroupfs/BUILD b/pkg/sentry/fsimpl/cgroupfs/BUILD index 4c9c5b344..e5fdcc776 100644 --- a/pkg/sentry/fsimpl/cgroupfs/BUILD +++ b/pkg/sentry/fsimpl/cgroupfs/BUILD @@ -32,6 +32,7 @@ go_library( "//pkg/context", "//pkg/coverage", "//pkg/errors/linuxerr", + "//pkg/fspath", "//pkg/log", "//pkg/refs", "//pkg/refsvfs2", @@ -43,7 +44,6 @@ go_library( "//pkg/sentry/usage", "//pkg/sentry/vfs", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsimpl/cgroupfs/base.go b/pkg/sentry/fsimpl/cgroupfs/base.go index 4290ffe0d..71bb0a9c8 100644 --- a/pkg/sentry/fsimpl/cgroupfs/base.go +++ b/pkg/sentry/fsimpl/cgroupfs/base.go @@ -88,7 +88,6 @@ type controller interface { // +stateify savable type cgroupInode struct { dir - fs *filesystem // ts is the list of tasks in this cgroup. The kernel is responsible for // removing tasks from this list before they're destroyed, so any tasks on @@ -102,9 +101,10 @@ var _ kernel.CgroupImpl = (*cgroupInode)(nil) func (fs *filesystem) newCgroupInode(ctx context.Context, creds *auth.Credentials) kernfs.Inode { c := &cgroupInode{ - fs: fs, - ts: make(map[*kernel.Task]struct{}), + dir: dir{fs: fs}, + ts: make(map[*kernel.Task]struct{}), } + c.dir.cgi = c contents := make(map[string]kernfs.Inode) contents["cgroup.procs"] = fs.newControllerFile(ctx, creds, &cgroupProcsData{c}) @@ -115,8 +115,7 @@ func (fs *filesystem) newCgroupInode(ctx context.Context, creds *auth.Credential } c.dir.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|linux.FileMode(0555)) - c.dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - c.dir.InitRefs() + c.dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{Writable: true}) c.dir.IncLinks(c.dir.OrderedChildren.Populate(contents)) atomic.AddUint64(&fs.numCgroups, 1) diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go index 22c8b7fda..edc3b50b9 100644 --- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go +++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go @@ -32,7 +32,8 @@ // controllers associated with them. // // Since cgroupfs doesn't allow hardlinks, there is a unique mapping between -// cgroupfs dentries and inodes. +// cgroupfs dentries and inodes. Thus, cgroupfs inodes don't need to be ref +// counted and exist until they're unlinked once or the FS is destroyed. // // # Synchronization // @@ -48,10 +49,11 @@ // Lock order: // // kernel.CgroupRegistry.mu -// cgroupfs.filesystem.mu -// kernel.TaskSet.mu -// kernel.Task.mu -// cgroupfs.filesystem.tasksMu. +// kernfs.filesystem.mu +// kernel.TaskSet.mu +// kernel.Task.mu +// cgroupfs.filesystem.tasksMu. +// cgroupfs.dir.OrderedChildren.mu package cgroupfs import ( @@ -63,6 +65,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -108,6 +111,7 @@ type FilesystemType struct{} // +stateify savable type InternalData struct { DefaultControlValues map[string]int64 + InitialCgroupPath string } // filesystem implements vfs.FilesystemImpl and kernel.cgroupFS. @@ -134,6 +138,11 @@ type filesystem struct { numCgroups uint64 // Protected by atomic ops. root *kernfs.Dentry + // effectiveRoot is the initial cgroup new tasks are created in. Unless + // overwritten by internal mount options, root == effectiveRoot. If + // effectiveRoot != root, an extra reference is held on effectiveRoot for + // the lifetime of the filesystem. + effectiveRoot *kernfs.Dentry // tasksMu serializes task membership changes across all cgroups within a // filesystem. @@ -229,6 +238,9 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt fs := vfsfs.Impl().(*filesystem) ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: mounting new view to hierarchy %v", fs.hierarchyID) fs.root.IncRef() + if fs.effectiveRoot != fs.root { + fs.effectiveRoot.IncRef() + } return vfsfs, fs.root.VFSDentry(), nil } @@ -245,8 +257,8 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt var defaults map[string]int64 if opts.InternalData != nil { - ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: default control values: %v", defaults) defaults = opts.InternalData.(*InternalData).DefaultControlValues + ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: default control values: %v", defaults) } for _, ty := range wantControllers { @@ -286,6 +298,14 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt var rootD kernfs.Dentry rootD.InitRoot(&fs.Filesystem, root) fs.root = &rootD + fs.effectiveRoot = fs.root + + if err := fs.prepareInitialCgroup(ctx, vfsObj, opts); err != nil { + ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: failed to prepare initial cgroup: %v", err) + rootD.DecRef(ctx) + fs.VFSFilesystem().DecRef(ctx) + return nil, nil, err + } // Register controllers. The registry may be modified concurrently, so if we // get an error, we raced with someone else who registered the same @@ -303,10 +323,47 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt return fs.VFSFilesystem(), rootD.VFSDentry(), nil } +// prepareInitialCgroup creates the initial cgroup according to opts. An initial +// cgroup is optional, and if not specified, this function is a no-op. +func (fs *filesystem) prepareInitialCgroup(ctx context.Context, vfsObj *vfs.VirtualFilesystem, opts vfs.GetFilesystemOptions) error { + if opts.InternalData == nil { + return nil + } + initPathStr := opts.InternalData.(*InternalData).InitialCgroupPath + if initPathStr == "" { + return nil + } + ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup path: %v", initPathStr) + initPath := fspath.Parse(initPathStr) + if !initPath.Absolute || !initPath.HasComponents() { + ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup path invalid: %+v", initPath) + return linuxerr.EINVAL + } + + // Have initial cgroup target, create the tree. + cgDir := fs.root.Inode().(*cgroupInode) + for pit := initPath.Begin; pit.Ok(); pit = pit.Next() { + cgDirI, err := cgDir.NewDir(ctx, pit.String(), vfs.MkdirOptions{}) + if err != nil { + return err + } + cgDir = cgDirI.(*cgroupInode) + } + + // Walk to target dentry. + initDentry, err := fs.root.WalkDentryTree(ctx, vfsObj, initPath) + if err != nil { + ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup dentry not found: %v", err) + return linuxerr.ENOENT + } + fs.effectiveRoot = initDentry // Reference from WalkDentryTree transferred here. + return nil +} + func (fs *filesystem) rootCgroup() kernel.Cgroup { return kernel.Cgroup{ - Dentry: fs.root, - CgroupImpl: fs.root.Inode().(kernel.CgroupImpl), + Dentry: fs.effectiveRoot, + CgroupImpl: fs.effectiveRoot.Inode().(kernel.CgroupImpl), } } @@ -320,6 +377,10 @@ func (fs *filesystem) Release(ctx context.Context) { r.Unregister(fs.hierarchyID) } + if fs.root != fs.effectiveRoot { + fs.effectiveRoot.DecRef(ctx) + } + fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) fs.Filesystem.Release(ctx) } @@ -346,15 +407,18 @@ func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error // // +stateify savable type dir struct { - dirRefs + kernfs.InodeNoopRefCount kernfs.InodeAlwaysValid kernfs.InodeAttrs kernfs.InodeNotSymlink - kernfs.InodeDirectoryNoNewChildren // TODO(b/183137098): Implement mkdir. + kernfs.InodeDirectoryNoNewChildren kernfs.OrderedChildren implStatFS locks vfs.FileLocks + + fs *filesystem // Immutable. + cgi *cgroupInode // Immutable. } // Keep implements kernfs.Inode.Keep. @@ -378,9 +442,100 @@ func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, kd *kernfs.Dentry return fd.VFSFileDescription(), nil } -// DecRef implements kernfs.Inode.DecRef. -func (d *dir) DecRef(ctx context.Context) { - d.dirRefs.DecRef(func() { d.Destroy(ctx) }) +// NewDir implements kernfs.Inode.NewDir. +func (d *dir) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (kernfs.Inode, error) { + // "Do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable." + // -- Linux, kernel/cgroup.c:cgroup_mkdir(). + if strings.Contains(name, "\n") { + return nil, linuxerr.EINVAL + } + return d.OrderedChildren.Inserter(name, func() kernfs.Inode { + d.IncLinks(1) + return d.fs.newCgroupInode(ctx, auth.CredentialsFromContext(ctx)) + }) +} + +// Rename implements kernfs.Inode.Rename. Cgroupfs only allows renaming of +// cgroup directories, and the rename may only change the name within the same +// parent. See linux, kernel/cgroup.c:cgroup_rename(). +func (d *dir) Rename(ctx context.Context, oldname, newname string, child, dst kernfs.Inode) error { + if _, ok := child.(*cgroupInode); !ok { + // Not a cgroup directory. Control files are backed by different types. + return linuxerr.ENOTDIR + } + + dstCGInode, ok := dst.(*cgroupInode) + if !ok { + // Not a cgroup inode, so definitely can't be *this* inode. + return linuxerr.EIO + } + // Note: We're intentionally comparing addresses, since two different dirs + // could plausibly be identical in memory, but would occupy different + // locations in memory. + if d != &dstCGInode.dir { + // Destination dir is a different cgroup inode. Cross directory renames + // aren't allowed. + return linuxerr.EIO + } + + // Rename moves oldname to newname within d. Proceed. + return d.OrderedChildren.Rename(ctx, oldname, newname, child, dst) +} + +// Unlink implements kernfs.Inode.Unlink. Cgroupfs disallows unlink, as the only +// files in the filesystem are control files, which can't be deleted. +func (d *dir) Unlink(ctx context.Context, name string, child kernfs.Inode) error { + return linuxerr.EPERM +} + +// hasChildrenLocked returns whether the cgroup dir contains any objects that +// prevent it from being deleted. +func (d *dir) hasChildrenLocked() bool { + // Subdirs take a link on the parent, so checks if there are any direct + // children cgroups. Exclude the dir's self link and the link from ".". + if d.InodeAttrs.Links()-2 > 0 { + return true + } + return len(d.cgi.ts) > 0 +} + +// HasChildren implements kernfs.Inode.HasChildren. +// +// The empty check for a cgroupfs directory is unlike a regular directory since +// a cgroupfs directory will always have control files. A cgroupfs directory can +// be deleted if cgroup contains no tasks and has no sub-cgroups. +func (d *dir) HasChildren() bool { + d.fs.tasksMu.RLock() + defer d.fs.tasksMu.RUnlock() + return d.hasChildrenLocked() +} + +// RmDir implements kernfs.Inode.RmDir. +func (d *dir) RmDir(ctx context.Context, name string, child kernfs.Inode) error { + // Unlike a normal directory, we need to recheck if d is empty again, since + // vfs/kernfs can't stop tasks from entering or leaving the cgroup. + d.fs.tasksMu.RLock() + defer d.fs.tasksMu.RUnlock() + + cgi, ok := child.(*cgroupInode) + if !ok { + return linuxerr.ENOTDIR + } + if cgi.dir.hasChildrenLocked() { + return linuxerr.ENOTEMPTY + } + + // Disallow deletion of the effective root cgroup. + if cgi == d.fs.effectiveRoot.Inode().(*cgroupInode) { + ctx.Warningf("Cannot delete initial cgroup for new tasks %q", d.fs.effectiveRoot.FSLocalPath()) + return linuxerr.EBUSY + } + + err := d.OrderedChildren.RmDir(ctx, name, child) + if err == nil { + d.InodeAttrs.DecLinks() + } + return err } // controllerFile represents a generic control file that appears within a cgroup diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD index f981ff296..e0b879339 100644 --- a/pkg/sentry/fsimpl/devpts/BUILD +++ b/pkg/sentry/fsimpl/devpts/BUILD @@ -45,7 +45,6 @@ go_library( "//pkg/sentry/unimpl", "//pkg/sentry/vfs", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go index 7a488e9fd..e711debcb 100644 --- a/pkg/sentry/fsimpl/devpts/devpts.go +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -29,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // Name is the filesystem name. @@ -180,7 +179,7 @@ func (i *rootInode) allocateTerminal(ctx context.Context, creds *auth.Credential i.mu.Lock() defer i.mu.Unlock() if i.nextIdx == math.MaxUint32 { - return nil, syserror.ENOMEM + return nil, linuxerr.ENOMEM } idx := i.nextIdx i.nextIdx++ @@ -241,7 +240,7 @@ func (i *rootInode) Lookup(ctx context.Context, name string) (kernfs.Inode, erro // Not a static entry. idx, err := strconv.ParseUint(name, 10, 32) if err != nil { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } i.mu.Lock() defer i.mu.Unlock() @@ -250,7 +249,7 @@ func (i *rootInode) Lookup(ctx context.Context, name string) (kernfs.Inode, erro return ri, nil } - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } // IterDirents implements kernfs.Inode.IterDirents. diff --git a/pkg/sentry/fsimpl/devpts/line_discipline.go b/pkg/sentry/fsimpl/devpts/line_discipline.go index 9cb21e83b..609623f9f 100644 --- a/pkg/sentry/fsimpl/devpts/line_discipline.go +++ b/pkg/sentry/fsimpl/devpts/line_discipline.go @@ -20,10 +20,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -203,7 +203,7 @@ func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSeque } else if notifyEcho { l.masterWaiter.Notify(waiter.ReadableEvents) } - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) { @@ -220,7 +220,7 @@ func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequ l.replicaWaiter.Notify(waiter.ReadableEvents) return n, nil } - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } func (l *lineDiscipline) outputQueueReadSize(t *kernel.Task, io usermem.IO, args arch.SyscallArguments) error { @@ -242,7 +242,7 @@ func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequ } return n, nil } - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } func (l *lineDiscipline) outputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) { @@ -257,7 +257,7 @@ func (l *lineDiscipline) outputQueueWrite(ctx context.Context, src usermem.IOSeq l.masterWaiter.Notify(waiter.ReadableEvents) return n, nil } - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } // transformer is a helper interface to make it easier to stateify queue. diff --git a/pkg/sentry/fsimpl/devpts/queue.go b/pkg/sentry/fsimpl/devpts/queue.go index ff1d89955..85aeefa43 100644 --- a/pkg/sentry/fsimpl/devpts/queue.go +++ b/pkg/sentry/fsimpl/devpts/queue.go @@ -17,12 +17,12 @@ package devpts import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -110,7 +110,7 @@ func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipl defer q.mu.Unlock() if !q.readable { - return 0, false, false, syserror.ErrWouldBlock + return 0, false, false, linuxerr.ErrWouldBlock } if dst.NumBytes() > canonMaxBytes { @@ -156,7 +156,7 @@ func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscip room := waitBufMaxBytes - q.waitBufLen // If out of room, return EAGAIN. if room == 0 && copyLen > 0 { - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } // Cap the size of the wait buffer. if copyLen > room { diff --git a/pkg/sentry/fsimpl/eventfd/BUILD b/pkg/sentry/fsimpl/eventfd/BUILD index c09fdc7f9..1cb049a29 100644 --- a/pkg/sentry/fsimpl/eventfd/BUILD +++ b/pkg/sentry/fsimpl/eventfd/BUILD @@ -9,11 +9,11 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fdnotifier", "//pkg/hostarch", "//pkg/log", "//pkg/sentry/vfs", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", diff --git a/pkg/sentry/fsimpl/eventfd/eventfd.go b/pkg/sentry/fsimpl/eventfd/eventfd.go index 4f79cfcb7..af5ba5131 100644 --- a/pkg/sentry/fsimpl/eventfd/eventfd.go +++ b/pkg/sentry/fsimpl/eventfd/eventfd.go @@ -22,11 +22,11 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -149,7 +149,7 @@ func (efd *EventFileDescription) hostReadLocked(ctx context.Context, dst usermem var buf [8]byte if _, err := unix.Read(efd.hostfd, buf[:]); err != nil { if err == unix.EWOULDBLOCK { - return syserror.ErrWouldBlock + return linuxerr.ErrWouldBlock } return err } @@ -167,7 +167,7 @@ func (efd *EventFileDescription) read(ctx context.Context, dst usermem.IOSequenc // We can't complete the read if the value is currently zero. if efd.val == 0 { efd.mu.Unlock() - return syserror.ErrWouldBlock + return linuxerr.ErrWouldBlock } // Update the value based on the mode the event is operating in. @@ -200,7 +200,7 @@ func (efd *EventFileDescription) hostWriteLocked(val uint64) error { hostarch.ByteOrder.PutUint64(buf[:], val) _, err := unix.Write(efd.hostfd, buf[:]) if err == unix.EWOULDBLOCK { - return syserror.ErrWouldBlock + return linuxerr.ErrWouldBlock } return err } @@ -232,7 +232,7 @@ func (efd *EventFileDescription) Signal(val uint64) error { // uint64 minus 1. if val > math.MaxUint64-1-efd.val { efd.mu.Unlock() - return syserror.ErrWouldBlock + return linuxerr.ErrWouldBlock } efd.val += val diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/pkg/sentry/fsimpl/ext/BUILD diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD index 871df5984..05c4fbeb2 100644 --- a/pkg/sentry/fsimpl/fuse/BUILD +++ b/pkg/sentry/fsimpl/fuse/BUILD @@ -59,7 +59,6 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", @@ -84,7 +83,6 @@ go_test( "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go index dab1e779d..0f855ac59 100644 --- a/pkg/sentry/fsimpl/fuse/dev.go +++ b/pkg/sentry/fsimpl/fuse/dev.go @@ -23,7 +23,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -38,7 +37,7 @@ type fuseDevice struct{} // Open implements vfs.Device.Open. func (fuseDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { if !kernel.FUSEEnabled { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } var fd DeviceFD @@ -126,7 +125,7 @@ func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset in return 0, linuxerr.EPERM } - return 0, syserror.ENOSYS + return 0, linuxerr.ENOSYS } // Read implements vfs.FileDescriptionImpl.Read. @@ -192,7 +191,7 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts } if req == nil { - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } // We already checked the size: dst must be able to fit the whole request. @@ -205,7 +204,7 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts return 0, err } if n != len(req.data) { - return 0, syserror.EIO + return 0, linuxerr.EIO } if req.hdr.Opcode == linux.FUSE_WRITE { @@ -214,7 +213,7 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts return 0, err } if written != len(req.payload) { - return 0, syserror.EIO + return 0, linuxerr.EIO } n += int(written) } @@ -238,7 +237,7 @@ func (fd *DeviceFD) PWrite(ctx context.Context, src usermem.IOSequence, offset i return 0, linuxerr.EPERM } - return 0, syserror.ENOSYS + return 0, linuxerr.ENOSYS } // Write implements vfs.FileDescriptionImpl.Write. @@ -395,7 +394,7 @@ func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64 return 0, linuxerr.EPERM } - return 0, syserror.ENOSYS + return 0, linuxerr.ENOSYS } // sendResponse sends a response to the waiting task (if any). diff --git a/pkg/sentry/fsimpl/fuse/dev_test.go b/pkg/sentry/fsimpl/fuse/dev_test.go index 04250d796..8951b5ba8 100644 --- a/pkg/sentry/fsimpl/fuse/dev_test.go +++ b/pkg/sentry/fsimpl/fuse/dev_test.go @@ -20,11 +20,11 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -186,7 +186,7 @@ func ReadTest(serverTask *kernel.Task, fd *vfs.FileDescription, inIOseq usermem. // "would block". n, err = dev.Read(serverTask, inIOseq, vfs.ReadOptions{}) total += n - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { break } diff --git a/pkg/sentry/fsimpl/fuse/directory.go b/pkg/sentry/fsimpl/fuse/directory.go index fcc5d9a2a..9611edd5a 100644 --- a/pkg/sentry/fsimpl/fuse/directory.go +++ b/pkg/sentry/fsimpl/fuse/directory.go @@ -19,10 +19,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -32,27 +32,27 @@ type directoryFD struct { // Allocate implements directoryFD.Allocate. func (*directoryFD) Allocate(ctx context.Context, mode, offset, length uint64) error { - return syserror.EISDIR + return linuxerr.EISDIR } // PRead implements vfs.FileDescriptionImpl.PRead. func (*directoryFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } // Read implements vfs.FileDescriptionImpl.Read. func (*directoryFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } // PWrite implements vfs.FileDescriptionImpl.PWrite. func (*directoryFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } // Write implements vfs.FileDescriptionImpl.Write. func (*directoryFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } // IterDirents implements vfs.FileDescriptionImpl.IterDirents. diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go index 172cbd88f..af16098d2 100644 --- a/pkg/sentry/fsimpl/fuse/fusefs.go +++ b/pkg/sentry/fsimpl/fuse/fusefs.go @@ -30,7 +30,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -612,7 +611,7 @@ func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMo return nil, err } if opcode != linux.FUSE_LOOKUP && ((out.Attr.Mode&linux.S_IFMT)^uint32(fileType) != 0 || out.NodeID == 0 || out.NodeID == linux.FUSE_ROOT_ID) { - return nil, syserror.EIO + return nil, linuxerr.EIO } child := i.fs.newInode(ctx, out.NodeID, out.Attr) return child, nil diff --git a/pkg/sentry/fsimpl/fuse/read_write.go b/pkg/sentry/fsimpl/fuse/read_write.go index 35d0ab6f4..fe119aa43 100644 --- a/pkg/sentry/fsimpl/fuse/read_write.go +++ b/pkg/sentry/fsimpl/fuse/read_write.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" ) // ReadInPages sends FUSE_READ requests for the size after round it up to @@ -221,7 +220,7 @@ func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, off uint64, // Write more than requested? EIO. if out.Size > toWrite { - return 0, syserror.EIO + return 0, linuxerr.EIO } written += out.Size diff --git a/pkg/sentry/fsimpl/fuse/regular_file.go b/pkg/sentry/fsimpl/fuse/regular_file.go index 6c4de3507..38cde8208 100644 --- a/pkg/sentry/fsimpl/fuse/regular_file.go +++ b/pkg/sentry/fsimpl/fuse/regular_file.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -108,7 +107,7 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs return 0, err } if int64(cp) != toCopy { - return 0, syserror.EIO + return 0, linuxerr.EIO } copied += toCopy } @@ -205,7 +204,7 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off return 0, offset, err } if int64(cp) != srclen { - return 0, offset, syserror.EIO + return 0, offset, linuxerr.EIO } n, err := fd.inode().fs.Write(ctx, fd, uint64(offset), uint32(srclen), data) @@ -216,7 +215,7 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off if n == 0 { // We have checked srclen != 0 previously. // If err == nil, then it's a short write and we return EIO. - return 0, offset, syserror.EIO + return 0, offset, linuxerr.EIO } written = int64(n) diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD index 752060044..4244f2cf5 100644 --- a/pkg/sentry/fsimpl/gofer/BUILD +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -79,7 +79,6 @@ go_library( "//pkg/sentry/vfs", "//pkg/sync", "//pkg/syserr", - "//pkg/syserror", "//pkg/unet", "//pkg/usermem", "//pkg/waiter", diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 05b776c2e..00228c469 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -33,7 +33,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // Sync implements vfs.FilesystemImpl.Sync. @@ -235,7 +234,7 @@ func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name s } if child, ok := parent.children[name]; ok || parent.isSynthetic() { if child == nil { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } return child, nil } @@ -349,7 +348,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir return linuxerr.EEXIST } if parent.isDeleted() { - return syserror.ENOENT + return linuxerr.ENOENT } if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, name, &ds); err != nil { return err @@ -395,7 +394,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir return err } if !dir && rp.MustBeDir() { - return syserror.ENOENT + return linuxerr.ENOENT } if parent.isSynthetic() { if createInSyntheticDir == nil { @@ -463,7 +462,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b } } else { if name == "." || name == ".." { - return syserror.EISDIR + return linuxerr.EISDIR } } @@ -486,7 +485,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b child, ok = parent.children[name] if ok && child == nil { // Hit a negative cached entry, child doesn't exist. - return syserror.ENOENT + return linuxerr.ENOENT } } else { child, _, err = fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds) @@ -552,7 +551,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b // child must be a non-directory file. if child != nil && child.isDir() { vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. - return syserror.EISDIR + return linuxerr.EISDIR } if rp.MustBeDir() { if child != nil { @@ -563,7 +562,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b } if parent.isSynthetic() { if child == nil { - return syserror.ENOENT + return linuxerr.ENOENT } } else if child == nil || !child.isSynthetic() { err = parent.file.unlinkAt(ctx, name, flags) @@ -674,7 +673,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return err } if d.nlink == 0 { - return syserror.ENOENT + return linuxerr.ENOENT } if d.nlink == math.MaxUint32 { return linuxerr.EMLINK @@ -811,7 +810,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf if rp.Done() { // Reject attempts to open mount root directory with O_CREAT. if mayCreate && rp.MustBeDir() { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } if mustCreate { return nil, linuxerr.EEXIST @@ -841,7 +840,7 @@ afterTrailingSymlink: } // Reject attempts to open directories with O_CREAT. if mayCreate && rp.MustBeDir() { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, rp.Component(), &ds); err != nil { return nil, err @@ -922,11 +921,11 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open case linux.S_IFDIR: // Can't open directories with O_CREAT. if opts.Flags&linux.O_CREAT != 0 { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } // Can't open directories writably. if ats&vfs.MayWrite != 0 { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } if opts.Flags&linux.O_DIRECT != 0 { return nil, linuxerr.EINVAL @@ -1054,7 +1053,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving return nil, err } if d.isDeleted() { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } mnt := rp.Mount() if err := mnt.CheckBeginWrite(); err != nil { @@ -1268,7 +1267,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa defer newParent.dirMu.Unlock() } if newParent.isDeleted() { - return syserror.ENOENT + return linuxerr.ENOENT } replaced, err := fs.getChildLocked(ctx, newParent, newName, &ds) if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) { @@ -1282,7 +1281,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa replacedVFSD = &replaced.vfsd if replaced.isDir() { if !renamed.isDir() { - return syserror.EISDIR + return linuxerr.EISDIR } if genericIsAncestorDentry(replaced, renamed) { return linuxerr.ENOTEMPTY diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 25d2e39d6..bd6b30397 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -62,7 +62,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/unet" ) @@ -865,11 +864,11 @@ func dentryAttrMask() p9.AttrMask { func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, mask p9.AttrMask, attr *p9.Attr) (*dentry, error) { if !mask.Mode { ctx.Warningf("can't create gofer.dentry without file type") - return nil, syserror.EIO + return nil, linuxerr.EIO } if attr.Mode.FileType() == p9.ModeRegular && !mask.Size { ctx.Warningf("can't create regular file gofer.dentry without file size") - return nil, syserror.EIO + return nil, linuxerr.EIO } d := &dentry{ @@ -1112,7 +1111,7 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs case linux.S_IFREG: // ok case linux.S_IFDIR: - return syserror.EISDIR + return linuxerr.EISDIR default: return linuxerr.EINVAL } diff --git a/pkg/sentry/fsimpl/gofer/host_named_pipe.go b/pkg/sentry/fsimpl/gofer/host_named_pipe.go index 398288ee3..505916a57 100644 --- a/pkg/sentry/fsimpl/gofer/host_named_pipe.go +++ b/pkg/sentry/fsimpl/gofer/host_named_pipe.go @@ -22,7 +22,6 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/syserror" ) // Global pipe used by blockUntilNonblockingPipeHasWriter since we can't create @@ -109,6 +108,6 @@ func sleepBetweenNamedPipeOpenChecks(ctx context.Context) error { return nil case <-cancel: ctx.SleepFinish(false) - return syserror.ErrInterrupted + return linuxerr.ErrInterrupted } } diff --git a/pkg/sentry/fsimpl/gofer/p9file.go b/pkg/sentry/fsimpl/gofer/p9file.go index b0a429d42..5a3ddfc9d 100644 --- a/pkg/sentry/fsimpl/gofer/p9file.go +++ b/pkg/sentry/fsimpl/gofer/p9file.go @@ -16,9 +16,9 @@ package gofer import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/syserror" ) // p9file is a wrapper around p9.File that provides methods that are @@ -59,7 +59,7 @@ func (f p9file) walkGetAttrOne(ctx context.Context, name string) (p9.QID, p9file if newfile != nil { p9file{newfile}.close(ctx) } - return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, syserror.EIO + return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, linuxerr.EIO } return qids[0], p9file{newfile}, attrMask, attr, nil } diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index 4b59c1c3c..144a1045e 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -28,7 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsmetric" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -238,7 +237,7 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs buf := make([]byte, dst.NumBytes()) n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset)) if linuxerr.Equals(linuxerr.EAGAIN, err) { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } if n == 0 { return bufN, err @@ -326,7 +325,7 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off } n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:copied])), uint64(offset)) if linuxerr.Equals(linuxerr.EAGAIN, err) { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } // Update offset if the offset is valid. if offset >= 0 { diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index 476545d00..180a35583 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -70,7 +70,6 @@ go_library( "//pkg/sentry/vfs", "//pkg/sync", "//pkg/syserr", - "//pkg/syserror", "//pkg/tcpip", "//pkg/unet", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 89aa7b3d9..984c6e8ee 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -37,7 +37,6 @@ import ( unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -712,7 +711,7 @@ func (f *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts if total != 0 { err = nil } else { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } } return total, err @@ -766,7 +765,7 @@ func (f *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opt if !i.seekable { n, err := f.writeToHostFD(ctx, src, -1, opts.Flags) if isBlockError(err) { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } return n, err } diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go index 7f6ce4ee5..04ac73255 100644 --- a/pkg/sentry/fsimpl/host/tty.go +++ b/pkg/sentry/fsimpl/host/tty.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -346,7 +345,7 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal) // If the signal is SIGTTIN, then we are attempting to read // from the TTY. Don't send the signal and return EIO. if sig == linux.SIGTTIN { - return syserror.EIO + return linuxerr.EIO } // Otherwise, we are writing or changing terminal state. This is allowed. @@ -355,7 +354,7 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal) // If the process group is an orphan, return EIO. if pg.IsOrphan() { - return syserror.EIO + return linuxerr.EIO } // Otherwise, send the signal to the process group and return ERESTARTSYS. @@ -368,5 +367,5 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal) // // Linux ignores the result of kill_pgrp(). _ = pg.SendSignal(kernel.SignalInfoPriv(sig)) - return syserror.ERESTARTSYS + return linuxerr.ERESTARTSYS } diff --git a/pkg/sentry/fsimpl/host/util.go b/pkg/sentry/fsimpl/host/util.go index 95d7ebe2e..9850f3f41 100644 --- a/pkg/sentry/fsimpl/host/util.go +++ b/pkg/sentry/fsimpl/host/util.go @@ -42,7 +42,7 @@ func timespecToStatxTimestamp(ts unix.Timespec) linux.StatxTimestamp { } // isBlockError checks if an error is EAGAIN or EWOULDBLOCK. -// If so, they can be transformed into syserror.ErrWouldBlock. +// If so, they can be transformed into linuxerr.ErrWouldBlock. func isBlockError(err error) bool { return linuxerr.Equals(linuxerr.EAGAIN, err) || linuxerr.Equals(linuxerr.EWOULDBLOCK, err) } diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD index d53937db6..4b577ea43 100644 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ b/pkg/sentry/fsimpl/kernfs/BUILD @@ -119,7 +119,6 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", ], ) @@ -137,6 +136,7 @@ go_test( "//pkg/abi/linux", "//pkg/context", "//pkg/errors/linuxerr", + "//pkg/fspath", "//pkg/log", "//pkg/refs", "//pkg/refsvfs2", @@ -144,7 +144,6 @@ go_test( "//pkg/sentry/fsimpl/testutil", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", - "//pkg/syserror", "//pkg/usermem", "@com_github_google_go_cmp//cmp:go_default_library", ], diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go index 8b008dc10..7db1473c4 100644 --- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -99,7 +98,7 @@ func NewGenericDirectoryFD(m *vfs.Mount, d *Dentry, children *OrderedChildren, l func (fd *GenericDirectoryFD) Init(children *OrderedChildren, locks *vfs.FileLocks, opts *vfs.OpenOptions, fdOpts GenericDirectoryFDOptions) error { if vfs.AccessTypesForOpenFlags(opts)&vfs.MayWrite != 0 { // Can't open directories for writing. - return syserror.EISDIR + return linuxerr.EISDIR } fd.LockFD.Init(locks) fd.seekEnd = fdOpts.SeekEnd diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index a97473f7d..363ebc466 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // stepExistingLocked resolves rp.Component() in parent directory vfsd. @@ -224,7 +223,7 @@ func checkCreateLocked(ctx context.Context, creds *auth.Credentials, name string return linuxerr.EEXIST } if parent.VFSDentry().IsDead() { - return syserror.ENOENT + return linuxerr.ENOENT } if err := parent.inode.CheckPermissions(ctx, creds, vfs.MayWrite); err != nil { return err @@ -241,7 +240,7 @@ func checkDeleteLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry) er return linuxerr.EBUSY } if parent.vfsd.IsDead() { - return syserror.ENOENT + return linuxerr.ENOENT } if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { return err @@ -362,7 +361,7 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return err } if rp.MustBeDir() { - return syserror.ENOENT + return linuxerr.ENOENT } if rp.Mount() != vd.Mount() { return linuxerr.EXDEV @@ -443,7 +442,7 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return err } if rp.MustBeDir() { - return syserror.ENOENT + return linuxerr.ENOENT } if err := rp.Mount().CheckBeginWrite(); err != nil { return err @@ -509,7 +508,7 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf defer unlock() if rp.Done() { if rp.MustBeDir() { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } if mustCreate { return nil, linuxerr.EEXIST @@ -536,11 +535,11 @@ afterTrailingSymlink: } // Reject attempts to open directories with O_CREAT. if rp.MustBeDir() { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } pc := rp.Component() if pc == "." || pc == ".." { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } if len(pc) > linux.NAME_MAX { return nil, linuxerr.ENAMETOOLONG @@ -861,7 +860,7 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ return err } if rp.MustBeDir() { - return syserror.ENOENT + return linuxerr.ENOENT } if err := rp.Mount().CheckBeginWrite(); err != nil { return err @@ -895,7 +894,7 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error return err } if d.isDir() { - return syserror.EISDIR + return linuxerr.EISDIR } virtfs := rp.VirtualFilesystem() parentDentry := d.parent diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index a42fc79b4..b96dc9ef7 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -26,7 +26,6 @@ import ( ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // InodeNoopRefCount partially implements the Inode interface, specifically the @@ -234,6 +233,11 @@ func (a *InodeAttrs) Mode() linux.FileMode { return linux.FileMode(atomic.LoadUint32(&a.mode)) } +// Links returns the link count. +func (a *InodeAttrs) Links() uint32 { + return atomic.LoadUint32(&a.nlink) +} + // TouchAtime updates a.atime to the current time. func (a *InodeAttrs) TouchAtime(ctx context.Context, mnt *vfs.Mount) { if mnt.Flags.NoATime || mnt.ReadOnly() { @@ -289,7 +293,7 @@ func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *aut return linuxerr.EPERM } if opts.Stat.Mask&linux.STATX_SIZE != 0 && a.Mode().IsDir() { - return syserror.EISDIR + return linuxerr.EISDIR } if err := vfs.CheckSetStat(ctx, creds, &opts, a.Mode(), auth.KUID(atomic.LoadUint32(&a.uid)), auth.KGID(atomic.LoadUint32(&a.gid))); err != nil { return err @@ -475,7 +479,7 @@ func (o *OrderedChildren) Lookup(ctx context.Context, name string) (Inode, error s, ok := o.set[name] if !ok { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } s.inode.IncRef() // This ref is passed to the dentry upon creation via Init. @@ -502,6 +506,30 @@ func (o *OrderedChildren) Insert(name string, child Inode) error { return o.insert(name, child, false) } +// Inserter is like Insert, but obtains the child to insert by calling +// makeChild. makeChild is only called if the insert will succeed. This allows +// the caller to atomically check and insert a child without having to +// clean up the child on failure. +func (o *OrderedChildren) Inserter(name string, makeChild func() Inode) (Inode, error) { + o.mu.Lock() + defer o.mu.Unlock() + if _, ok := o.set[name]; ok { + return nil, linuxerr.EEXIST + } + + // Note: We must not fail after we call makeChild(). + + child := makeChild() + s := &slot{ + name: name, + inode: child, + static: false, + } + o.order.PushBack(s) + o.set[name] = s + return child, nil +} + // insert inserts child into o. // // Precondition: Caller must be holding a ref on child if static is true. @@ -559,7 +587,7 @@ func (o *OrderedChildren) replaceChildLocked(ctx context.Context, name string, n func (o *OrderedChildren) checkExistingLocked(name string, child Inode) error { s, ok := o.set[name] if !ok { - return syserror.ENOENT + return linuxerr.ENOENT } if s.inode != child { panic(fmt.Sprintf("Inode doesn't match what kernfs thinks! OrderedChild: %+v, kernfs: %+v", s.inode, child)) @@ -746,5 +774,5 @@ type InodeNoStatFS struct{} // StatFS implements Inode.StatFS. func (*InodeNoStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) { - return linux.Statfs{}, syserror.ENOSYS + return linux.Statfs{}, linuxerr.ENOSYS } diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index 0e2867d49..544698694 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -61,6 +61,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/refsvfs2" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -542,6 +543,63 @@ func (d *Dentry) FSLocalPath() string { return b.String() } +// WalkDentryTree traverses p in the dentry tree for this filesystem. Note that +// this only traverses the dentry tree and is not a general path traversal. No +// symlinks and dynamic children are resolved, and no permission checks are +// performed. The caller is responsible for ensuring the returned Dentry exists +// for an appropriate lifetime. +// +// p is interpreted starting at d, and may be absolute or relative (absolute vs +// relative paths both refer to the same target here, since p is absolute from +// d). p may contain "." and "..", but will not allow traversal above d (similar +// to ".." at the root dentry). +// +// This is useful for filesystem internals, where the filesystem may not be +// mounted yet. For a mounted filesystem, use GetDentryAt. +func (d *Dentry) WalkDentryTree(ctx context.Context, vfsObj *vfs.VirtualFilesystem, p fspath.Path) (*Dentry, error) { + d.fs.mu.RLock() + defer d.fs.processDeferredDecRefs(ctx) + defer d.fs.mu.RUnlock() + + target := d + + for pit := p.Begin; pit.Ok(); pit = pit.Next() { + pc := pit.String() + + switch { + case target == nil: + return nil, linuxerr.ENOENT + case pc == ".": + // No-op, consume component and continue. + case pc == "..": + if target == d { + // Don't let .. traverse above the start point of the walk. + continue + } + target = target.parent + // Parent doesn't need revalidation since we revalidated it on the + // way to the child, and we're still holding fs.mu. + default: + var err error + + d.dirMu.Lock() + target, err = d.fs.revalidateChildLocked(ctx, vfsObj, target, pc, target.children[pc]) + d.dirMu.Unlock() + + if err != nil { + return nil, err + } + } + } + + if target == nil { + return nil, linuxerr.ENOENT + } + + target.IncRef() + return target, nil +} + // The Inode interface maps filesystem-level operations that operate on paths to // equivalent operations on specific filesystem nodes. // @@ -667,12 +725,15 @@ type inodeDirectory interface { // RmDir removes an empty child directory from this directory // inode. Implementations must update the parent directory's link count, // if required. Implementations are not responsible for checking that child - // is a directory, checking for an empty directory. + // is a directory, or checking for an empty directory. RmDir(ctx context.Context, name string, child Inode) error // Rename is called on the source directory containing an inode being - // renamed. child should point to the resolved child in the source - // directory. + // renamed. child points to the resolved child in the source directory. + // dstDir is guaranteed to be a directory inode. + // + // On a successful call to Rename, the caller updates the dentry tree to + // reflect the name change. // // Precondition: Caller must serialize concurrent calls to Rename. Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go index 609887943..a2aba9321 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" @@ -346,3 +347,63 @@ func TestDirFDIterDirents(t *testing.T) { "file1": linux.DT_REG, }) } + +func TestDirWalkDentryTree(t *testing.T) { + sys := newTestSystem(t, func(ctx context.Context, creds *auth.Credentials, fs *filesystem) kernfs.Inode { + return fs.newDir(ctx, creds, 0755, map[string]kernfs.Inode{ + "dir1": fs.newDir(ctx, creds, 0755, nil), + "dir2": fs.newDir(ctx, creds, 0755, map[string]kernfs.Inode{ + "file1": fs.newFile(ctx, creds, staticFileContent), + "dir3": fs.newDir(ctx, creds, 0755, nil), + }), + }) + }) + defer sys.Destroy() + + testWalk := func(from *kernfs.Dentry, getDentryPath, walkPath string, expectedErr error) { + var d *kernfs.Dentry + if getDentryPath != "" { + pop := sys.PathOpAtRoot(getDentryPath) + vd := sys.GetDentryOrDie(pop) + defer vd.DecRef(sys.Ctx) + d = vd.Dentry().Impl().(*kernfs.Dentry) + } + + match, err := from.WalkDentryTree(sys.Ctx, sys.VFS, fspath.Parse(walkPath)) + if err == nil { + defer match.DecRef(sys.Ctx) + } + + if err != expectedErr { + t.Fatalf("WalkDentryTree from %q to %q (with expected error: %v) unexpected error, want: %v, got: %v", from.FSLocalPath(), walkPath, expectedErr, expectedErr, err) + } + if expectedErr != nil { + return + } + + if d != match { + t.Fatalf("WalkDentryTree from %q to %q (with expected error: %v) found unexpected dentry; want: %v, got: %v", from.FSLocalPath(), walkPath, expectedErr, d, match) + } + } + + rootD := sys.Root.Dentry().Impl().(*kernfs.Dentry) + + testWalk(rootD, "dir1", "/dir1", nil) + testWalk(rootD, "", "/dir-non-existent", linuxerr.ENOENT) + testWalk(rootD, "", "/dir1/child-non-existent", linuxerr.ENOENT) + testWalk(rootD, "", "/dir2/inner-non-existent/dir3", linuxerr.ENOENT) + + testWalk(rootD, "dir2/dir3", "/dir2/../dir2/dir3", nil) + testWalk(rootD, "dir2/dir3", "/dir2/././dir3", nil) + testWalk(rootD, "dir2/dir3", "/dir2/././dir3/.././dir3", nil) + + pop := sys.PathOpAtRoot("dir2") + dir2VD := sys.GetDentryOrDie(pop) + defer dir2VD.DecRef(sys.Ctx) + dir2D := dir2VD.Dentry().Impl().(*kernfs.Dentry) + + testWalk(dir2D, "dir2/dir3", "/dir3", nil) + testWalk(dir2D, "dir2/dir3", "/../../../dir3", nil) + testWalk(dir2D, "dir2/file1", "/file1", nil) + testWalk(dir2D, "dir2/file1", "file1", nil) +} diff --git a/pkg/sentry/fsimpl/overlay/BUILD b/pkg/sentry/fsimpl/overlay/BUILD index ed730e215..d16dfef9b 100644 --- a/pkg/sentry/fsimpl/overlay/BUILD +++ b/pkg/sentry/fsimpl/overlay/BUILD @@ -42,7 +42,6 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go index 1f85a1f0d..618092ef1 100644 --- a/pkg/sentry/fsimpl/overlay/copy_up.go +++ b/pkg/sentry/fsimpl/overlay/copy_up.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) func (d *dentry) isCopiedUp() bool { @@ -72,7 +71,7 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { } if d.vfsd.IsDead() { // Raced with deletion of d. - return syserror.ENOENT + return linuxerr.ENOENT } // Obtain settable timestamps from the lower layer. diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go index 5e89928c5..c04c80590 100644 --- a/pkg/sentry/fsimpl/overlay/filesystem.go +++ b/pkg/sentry/fsimpl/overlay/filesystem.go @@ -28,7 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // _OVL_XATTR_PREFIX is an extended attribute key prefix to identify overlayfs @@ -314,7 +313,7 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str } if !topLookupLayer.existsInOverlay() { child.destroyLocked(ctx) - return nil, topLookupLayer, syserror.ENOENT + return nil, topLookupLayer, linuxerr.ENOENT } // Device and inode numbers were copied from the topmost layer above. Remap @@ -483,7 +482,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir return linuxerr.EEXIST } if parent.vfsd.IsDead() { - return syserror.ENOENT + return linuxerr.ENOENT } if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { @@ -506,7 +505,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir } if !dir && rp.MustBeDir() { - return syserror.ENOENT + return linuxerr.ENOENT } mnt := rp.Mount() @@ -780,7 +779,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf start := rp.Start().Impl().(*dentry) if rp.Done() { if mayCreate && rp.MustBeDir() { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } if mustCreate { return nil, linuxerr.EEXIST @@ -807,7 +806,7 @@ afterTrailingSymlink: } // Reject attempts to open directories with O_CREAT. if mayCreate && rp.MustBeDir() { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } // Determine whether or not we need to create a file. parent.dirMu.Lock() @@ -865,11 +864,11 @@ func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts * if ftype == linux.S_IFDIR { // Can't open directories with O_CREAT. if opts.Flags&linux.O_CREAT != 0 { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } // Can't open directories writably. if ats.MayWrite() { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } if opts.Flags&linux.O_DIRECT != 0 { return nil, linuxerr.EINVAL @@ -919,7 +918,7 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving return nil, err } if parent.vfsd.IsDead() { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } mnt := rp.Mount() if err := mnt.CheckBeginWrite(); err != nil { @@ -1086,7 +1085,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa defer newParent.dirMu.Unlock() } if newParent.vfsd.IsDead() { - return syserror.ENOENT + return linuxerr.ENOENT } var ( replaced *dentry @@ -1105,7 +1104,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa replacedVFSD = &replaced.vfsd if replaced.isDir() { if !renamed.isDir() { - return syserror.EISDIR + return linuxerr.EISDIR } if genericIsAncestorDentry(replaced, renamed) { return linuxerr.ENOTEMPTY @@ -1533,7 +1532,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error defer rp.Mount().EndWrite() name := rp.Component() if name == "." || name == ".." { - return syserror.EISDIR + return linuxerr.EISDIR } if rp.MustBeDir() { return linuxerr.ENOTDIR @@ -1557,7 +1556,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error return err } if child.isDir() { - return syserror.EISDIR + return linuxerr.EISDIR } if err := parent.mayDelete(rp.Credentials(), child); err != nil { return err diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index 1d3d2d95f..95cfbdc42 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -102,7 +102,6 @@ go_library( "//pkg/sentry/usage", "//pkg/sentry/vfs", "//pkg/sync", - "//pkg/syserror", "//pkg/tcpip/header", "//pkg/tcpip/network/ipv4", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go index d99f90b36..e04ae6660 100644 --- a/pkg/sentry/fsimpl/proc/subtasks.go +++ b/pkg/sentry/fsimpl/proc/subtasks.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // subtasksInode represents the inode for /proc/[pid]/task/ directory. @@ -71,15 +70,15 @@ func (fs *filesystem) newSubtasks(ctx context.Context, task *kernel.Task, pidns func (i *subtasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) { tid, err := strconv.ParseUint(name, 10, 32) if err != nil { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } subTask := i.pidns.TaskWithID(kernel.ThreadID(tid)) if subTask == nil { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } if subTask.ThreadGroup() != i.task.ThreadGroup() { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } return i.fs.newTaskInode(ctx, subTask, i.pidns, false, i.cgroupControllers) } @@ -88,7 +87,7 @@ func (i *subtasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, func (i *subtasksInode) IterDirents(ctx context.Context, mnt *vfs.Mount, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) { tasks := i.task.ThreadGroup().MemberIDs(i.pidns) if len(tasks) == 0 { - return offset, syserror.ENOENT + return offset, linuxerr.ENOENT } if relOffset >= int64(len(tasks)) { return offset, nil @@ -124,7 +123,7 @@ type subtasksFD struct { func (fd *subtasksFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { if fd.task.ExitState() >= kernel.TaskExitZombie { - return syserror.ENOENT + return linuxerr.ENOENT } return fd.GenericDirectoryFD.IterDirents(ctx, cb) } @@ -132,7 +131,7 @@ func (fd *subtasksFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallbac // Seek implements vfs.FileDescriptionImpl.Seek. func (fd *subtasksFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { if fd.task.ExitState() >= kernel.TaskExitZombie { - return 0, syserror.ENOENT + return 0, linuxerr.ENOENT } return fd.GenericDirectoryFD.Seek(ctx, offset, whence) } @@ -140,7 +139,7 @@ func (fd *subtasksFD) Seek(ctx context.Context, offset int64, whence int32) (int // Stat implements vfs.FileDescriptionImpl.Stat. func (fd *subtasksFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { if fd.task.ExitState() >= kernel.TaskExitZombie { - return linux.Statx{}, syserror.ENOENT + return linux.Statx{}, linuxerr.ENOENT } return fd.GenericDirectoryFD.Stat(ctx, opts) } @@ -148,7 +147,7 @@ func (fd *subtasksFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Sta // SetStat implements vfs.FileDescriptionImpl.SetStat. func (fd *subtasksFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { if fd.task.ExitState() >= kernel.TaskExitZombie { - return syserror.ENOENT + return linuxerr.ENOENT } return fd.GenericDirectoryFD.SetStat(ctx, opts) } diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go index dfc0a924e..5c6412fc0 100644 --- a/pkg/sentry/fsimpl/proc/task_fds.go +++ b/pkg/sentry/fsimpl/proc/task_fds.go @@ -22,11 +22,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) func getTaskFD(t *kernel.Task, fd int32) (*vfs.FileDescription, kernel.FDFlags) { @@ -142,11 +142,11 @@ func (i *fdDirInode) IterDirents(ctx context.Context, mnt *vfs.Mount, cb vfs.Ite func (i *fdDirInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) { fdInt, err := strconv.ParseInt(name, 10, 32) if err != nil { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } fd := int32(fdInt) if !taskFDExists(ctx, i.fs, i.task, fd) { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } return i.fs.newFDSymlink(ctx, i.task, fd, i.fs.NextIno()), nil } @@ -218,7 +218,7 @@ func (fs *filesystem) newFDSymlink(ctx context.Context, task *kernel.Task, fd in func (s *fdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) { file, _ := getTaskFD(s.task, s.fd) if file == nil { - return "", syserror.ENOENT + return "", linuxerr.ENOENT } defer s.fs.SafeDecRefFD(ctx, file) root := vfs.RootFromContext(ctx) @@ -231,7 +231,7 @@ func (s *fdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) { file, _ := getTaskFD(s.task, s.fd) if file == nil { - return vfs.VirtualDentry{}, "", syserror.ENOENT + return vfs.VirtualDentry{}, "", linuxerr.ENOENT } defer s.fs.SafeDecRefFD(ctx, file) vd := file.VirtualDentry() @@ -278,11 +278,11 @@ func (fs *filesystem) newFDInfoDirInode(ctx context.Context, task *kernel.Task) func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) { fdInt, err := strconv.ParseInt(name, 10, 32) if err != nil { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } fd := int32(fdInt) if !taskFDExists(ctx, i.fs, i.task, fd) { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } data := &fdInfoData{ fs: i.fs, @@ -330,7 +330,7 @@ var _ dynamicInode = (*fdInfoData)(nil) func (d *fdInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { file, descriptorFlags := getTaskFD(d.task, d.fd) if file == nil { - return syserror.ENOENT + return linuxerr.ENOENT } defer d.fs.SafeDecRefFD(ctx, file) // TODO(b/121266871): Include pos, locks, and other data. For now we only diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 0ce3ed797..34b0c4f63 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -33,7 +33,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -491,7 +490,7 @@ func (fd *memFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64 return int64(n), nil } if readErr != nil { - return 0, syserror.EIO + return 0, linuxerr.EIO } return 0, nil } diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go index cf905fae4..26d44744b 100644 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ b/pkg/sentry/fsimpl/proc/tasks.go @@ -21,11 +21,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) const ( @@ -116,12 +116,12 @@ func (i *tasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, err case threadSelfName: return i.newThreadSelfSymlink(ctx, root), nil } - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } task := i.pidns.TaskWithID(kernel.ThreadID(tid)) if task == nil { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } return i.fs.newTaskInode(ctx, task, i.pidns, true, i.fakeCgroupControllers) diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go index 03bed22a3..4d3a2f7e6 100644 --- a/pkg/sentry/fsimpl/proc/tasks_files.go +++ b/pkg/sentry/fsimpl/proc/tasks_files.go @@ -29,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // +stateify savable @@ -58,7 +57,7 @@ func (s *selfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error } tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup()) if tgid == 0 { - return "", syserror.ENOENT + return "", linuxerr.ENOENT } return strconv.FormatUint(uint64(tgid), 10), nil } @@ -100,7 +99,7 @@ func (s *threadSelfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup()) tid := s.pidns.IDOfTask(t) if tid == 0 || tgid == 0 { - return "", syserror.ENOENT + return "", linuxerr.ENOENT } return fmt.Sprintf("%d/task/%d", tgid, tid), nil } diff --git a/pkg/sentry/fsimpl/signalfd/BUILD b/pkg/sentry/fsimpl/signalfd/BUILD index adb610213..403c6f254 100644 --- a/pkg/sentry/fsimpl/signalfd/BUILD +++ b/pkg/sentry/fsimpl/signalfd/BUILD @@ -9,10 +9,10 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/sentry/kernel", "//pkg/sentry/vfs", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/fsimpl/signalfd/signalfd.go b/pkg/sentry/fsimpl/signalfd/signalfd.go index a7f5928b7..bdb03ef96 100644 --- a/pkg/sentry/fsimpl/signalfd/signalfd.go +++ b/pkg/sentry/fsimpl/signalfd/signalfd.go @@ -18,10 +18,10 @@ package signalfd import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -91,7 +91,7 @@ func (sfd *SignalFileDescription) Read(ctx context.Context, dst usermem.IOSequen info, err := sfd.target.Sigtimedwait(sfd.Mask(), 0) if err != nil { // There must be no signal available. - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } // Copy out the signal info using the specified format. diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD index 1af0a5cbc..ab21f028e 100644 --- a/pkg/sentry/fsimpl/sys/BUILD +++ b/pkg/sentry/fsimpl/sys/BUILD @@ -36,7 +36,6 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", "//pkg/sentry/vfs", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsimpl/timerfd/BUILD b/pkg/sentry/fsimpl/timerfd/BUILD index e6980a314..2b83d7d9a 100644 --- a/pkg/sentry/fsimpl/timerfd/BUILD +++ b/pkg/sentry/fsimpl/timerfd/BUILD @@ -12,7 +12,6 @@ go_library( "//pkg/hostarch", "//pkg/sentry/kernel/time", "//pkg/sentry/vfs", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/fsimpl/timerfd/timerfd.go b/pkg/sentry/fsimpl/timerfd/timerfd.go index 655a1c76a..68b785791 100644 --- a/pkg/sentry/fsimpl/timerfd/timerfd.go +++ b/pkg/sentry/fsimpl/timerfd/timerfd.go @@ -23,7 +23,6 @@ import ( "gvisor.dev/gvisor/pkg/hostarch" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -82,7 +81,7 @@ func (tfd *TimerFileDescription) Read(ctx context.Context, dst usermem.IOSequenc } return sizeofUint64, nil } - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } // Clock returns the timer fd's Clock. diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index dc8b9bfeb..94486bb63 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -82,7 +82,6 @@ go_library( "//pkg/sentry/vfs", "//pkg/sentry/vfs/memxattr", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", ], ) @@ -125,7 +124,6 @@ go_test( "//pkg/sentry/fs/lock", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 8b04df038..e067f136e 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // Sync implements vfs.FilesystemImpl.Sync. @@ -75,7 +74,7 @@ afterSymlink: } child, ok := dir.childMap[name] if !ok { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } if err := rp.CheckMount(ctx, &child.vfsd); err != nil { return nil, err @@ -171,12 +170,12 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir return linuxerr.EEXIST } if !dir && rp.MustBeDir() { - return syserror.ENOENT + return linuxerr.ENOENT } // tmpfs never calls VFS.InvalidateDentry(), so parentDir.dentry can only // be dead if it was deleted. if parentDir.dentry.vfsd.IsDead() { - return syserror.ENOENT + return linuxerr.ENOENT } mnt := rp.Mount() if err := mnt.CheckBeginWrite(); err != nil { @@ -258,7 +257,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return err } if i.nlink == 0 { - return syserror.ENOENT + return linuxerr.ENOENT } if i.nlink == maxLinks { return linuxerr.EMLINK @@ -345,7 +344,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf if rp.Done() { // Reject attempts to open mount root directory with O_CREAT. if rp.MustBeDir() { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } if mustCreate { return nil, linuxerr.EEXIST @@ -366,11 +365,11 @@ afterTrailingSymlink: } // Reject attempts to open directories with O_CREAT. if rp.MustBeDir() { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } name := rp.Component() if name == "." || name == ".." { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } if len(name) > linux.NAME_MAX { return nil, linuxerr.ENAMETOOLONG @@ -457,7 +456,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open case *directory: // Can't open directories writably. if ats&vfs.MayWrite != 0 { - return nil, syserror.EISDIR + return nil, linuxerr.EISDIR } var fd directoryFD fd.LockFD.Init(&d.inode.locks) @@ -532,7 +531,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } renamed, ok := oldParentDir.childMap[oldName] if !ok { - return syserror.ENOENT + return linuxerr.ENOENT } if err := oldParentDir.mayDelete(rp.Credentials(), renamed); err != nil { return err @@ -567,7 +566,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa replacedDir, ok := replaced.inode.impl.(*directory) if ok { if !renamed.inode.isDir() { - return syserror.EISDIR + return linuxerr.EISDIR } if len(replacedDir.childMap) != 0 { return linuxerr.ENOTEMPTY @@ -588,7 +587,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa // tmpfs never calls VFS.InvalidateDentry(), so newParentDir.dentry can // only be dead if it was deleted. if newParentDir.dentry.vfsd.IsDead() { - return syserror.ENOENT + return linuxerr.ENOENT } // Linux places this check before some of those above; we do it here for @@ -654,7 +653,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error } child, ok := parentDir.childMap[name] if !ok { - return syserror.ENOENT + return linuxerr.ENOENT } if err := parentDir.mayDelete(rp.Credentials(), child); err != nil { return err @@ -754,17 +753,17 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error } name := rp.Component() if name == "." || name == ".." { - return syserror.EISDIR + return linuxerr.EISDIR } child, ok := parentDir.childMap[name] if !ok { - return syserror.ENOENT + return linuxerr.ENOENT } if err := parentDir.mayDelete(rp.Credentials(), child); err != nil { return err } if child.inode.isDir() { - return syserror.EISDIR + return linuxerr.EISDIR } if rp.MustBeDir() { return linuxerr.ENOTDIR diff --git a/pkg/sentry/fsimpl/tmpfs/pipe_test.go b/pkg/sentry/fsimpl/tmpfs/pipe_test.go index 418c7994e..99afd9817 100644 --- a/pkg/sentry/fsimpl/tmpfs/pipe_test.go +++ b/pkg/sentry/fsimpl/tmpfs/pipe_test.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -202,7 +201,7 @@ func checkEmpty(ctx context.Context, t *testing.T, fd *vfs.FileDescription) { readData := make([]byte, 1) dst := usermem.BytesIOSequence(readData) bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{}) - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { t.Fatalf("expected ErrWouldBlock reading from empty pipe %q, but got: %v", fileName, err) } if bytesRead != 0 { diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go index 4393cc13b..cb7711b39 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go @@ -21,10 +21,10 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -146,7 +146,7 @@ func TestLocks(t *testing.T) { if err := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, nil); err != nil { t.Fatalf("fd.Impl().LockBSD failed: err = %v", err) } - if got, want := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.WriteLock, nil), syserror.ErrWouldBlock; got != want { + if got, want := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.WriteLock, nil), linuxerr.ErrWouldBlock; got != want { t.Fatalf("fd.Impl().LockBSD failed: got = %v, want = %v", got, want) } if err := fd.Impl().UnlockBSD(ctx, uid1); err != nil { @@ -165,7 +165,7 @@ func TestLocks(t *testing.T) { if err := fd.Impl().LockPOSIX(ctx, uid1, 0 /* ownerPID */, lock.WriteLock, lock.LockRange{Start: 0, End: 1}, nil); err != nil { t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err) } - if got, want := fd.Impl().LockPOSIX(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, lock.LockRange{Start: 0, End: 1}, nil), syserror.ErrWouldBlock; got != want { + if got, want := fd.Impl().LockPOSIX(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, lock.LockRange{Start: 0, End: 1}, nil), linuxerr.ErrWouldBlock; got != want { t.Fatalf("fd.Impl().LockPOSIX failed: got = %v, want = %v", got, want) } if err := fd.Impl().UnlockPOSIX(ctx, uid1, lock.LockRange{Start: 0, End: 1}); err != nil { diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index f2250c025..feafb06e4 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -44,7 +44,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sentry/vfs/memxattr" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // Name is the default filesystem name. @@ -556,7 +555,7 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs. needsCtimeBump = true } case *directory: - return syserror.EISDIR + return linuxerr.EISDIR default: return linuxerr.EINVAL } diff --git a/pkg/sentry/fsimpl/verity/BUILD b/pkg/sentry/fsimpl/verity/BUILD index 1d855234c..5955948f0 100644 --- a/pkg/sentry/fsimpl/verity/BUILD +++ b/pkg/sentry/fsimpl/verity/BUILD @@ -28,7 +28,6 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go index 930016a3e..e147d6b07 100644 --- a/pkg/sentry/fsimpl/verity/filesystem.go +++ b/pkg/sentry/fsimpl/verity/filesystem.go @@ -32,7 +32,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -166,7 +165,7 @@ afterSymlink: // verifyChildLocked verifies the hash of child against the already verified // hash of the parent to ensure the child is expected. verifyChild triggers a // sentry panic if unexpected modifications to the file system are detected. In -// ErrorOnViolation mode it returns a syserror instead. +// ErrorOnViolation mode it returns a linuxerr instead. // // Preconditions: // * fs.renameMu must be locked. @@ -547,7 +546,7 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry, if parent.verityEnabled() { if _, ok := parent.childrenNames[name]; !ok { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } } @@ -595,23 +594,6 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry, } } - // Clear the Merkle tree file if they are to be generated at runtime. - // TODO(b/182315468): Optimize the Merkle tree generate process to - // allow only updating certain files/directories. - if fs.allowRuntimeEnable { - childMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ - Root: childMerkleVD, - Start: childMerkleVD, - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_TRUNC, - Mode: 0644, - }) - if err != nil { - return nil, err - } - childMerkleFD.DecRef(ctx) - } - // The dentry needs to be cleaned up if any error occurs. IncRef will be // called if a verity child dentry is successfully created. defer childMerkleVD.DecRef(ctx) diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go index c5fa9855b..23841ecf7 100644 --- a/pkg/sentry/fsimpl/verity/verity.go +++ b/pkg/sentry/fsimpl/verity/verity.go @@ -60,7 +60,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -239,7 +238,7 @@ func (FilesystemType) Release(ctx context.Context) {} // mode, it returns EIO, otherwise it panic. func (fs *filesystem) alertIntegrityViolation(msg string) error { if fs.action == ErrorOnViolation { - return syserror.EIO + return linuxerr.EIO } panic(msg) } @@ -1091,6 +1090,21 @@ func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) { return 0, fd.d.fs.alertIntegrityViolation("Unexpected verity fd: missing expected underlying fds") } + // Populate children names here. We cannot rely on the children + // dentries to populate parent dentry's children names, because the + // parent dentry may be destroyed before users enable verity if its ref + // count drops to zero. + if fd.d.isDir() { + if err := fd.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error { + if dirent.Name != "." && dirent.Name != ".." { + fd.d.childrenNames[dirent.Name] = struct{}{} + } + return nil + })); err != nil { + return 0, err + } + } + hash, dataSize, err := fd.generateMerkleLocked(ctx) if err != nil { return 0, err @@ -1118,9 +1132,6 @@ func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) { }); err != nil { return 0, err } - - // Add the current child's name to parent's childrenNames. - fd.d.parent.childrenNames[fd.d.name] = struct{}{} } // Record the size of the data being hashed for fd. @@ -1215,7 +1226,7 @@ func (fd *fileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch. case linux.FS_IOC_GETFLAGS: return fd.verityFlags(ctx, args[2].Pointer()) default: - return 0, syserror.ENOSYS + return 0, linuxerr.ENOSYS } } diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index e4e0dc04f..816e60329 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -281,7 +281,6 @@ go_library( "//pkg/state/wire", "//pkg/sync", "//pkg/syserr", - "//pkg/syserror", "//pkg/tcpip", "//pkg/tcpip/stack", "//pkg/usermem", diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD index 7a1a36454..9aa03f506 100644 --- a/pkg/sentry/kernel/auth/BUILD +++ b/pkg/sentry/kernel/auth/BUILD @@ -66,6 +66,5 @@ go_library( "//pkg/errors/linuxerr", "//pkg/log", "//pkg/sync", - "//pkg/syserror", ], ) diff --git a/pkg/sentry/kernel/cgroup.go b/pkg/sentry/kernel/cgroup.go index c93ef6ac1..a0e291f58 100644 --- a/pkg/sentry/kernel/cgroup.go +++ b/pkg/sentry/kernel/cgroup.go @@ -196,6 +196,7 @@ func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Files // uniqueness of controllers enforced by Register, drop the // dying hierarchy now. The eventual unregister by the FS // teardown will become a no-op. + r.unregisterLocked(h.id) return nil } return h.fs diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD index 564c3d42e..f240a68aa 100644 --- a/pkg/sentry/kernel/eventfd/BUILD +++ b/pkg/sentry/kernel/eventfd/BUILD @@ -9,13 +9,13 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fdnotifier", "//pkg/hostarch", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go index 4466fbc9d..5ea44a2c2 100644 --- a/pkg/sentry/kernel/eventfd/eventfd.go +++ b/pkg/sentry/kernel/eventfd/eventfd.go @@ -22,13 +22,13 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -145,7 +145,7 @@ func (e *EventOperations) hostRead(ctx context.Context, dst usermem.IOSequence) if _, err := unix.Read(e.hostfd, buf[:]); err != nil { if err == unix.EWOULDBLOCK { - return syserror.ErrWouldBlock + return linuxerr.ErrWouldBlock } return err } @@ -165,7 +165,7 @@ func (e *EventOperations) read(ctx context.Context, dst usermem.IOSequence) erro // We can't complete the read if the value is currently zero. if e.val == 0 { e.mu.Unlock() - return syserror.ErrWouldBlock + return linuxerr.ErrWouldBlock } // Update the value based on the mode the event is operating in. @@ -198,7 +198,7 @@ func (e *EventOperations) hostWrite(val uint64) error { hostarch.ByteOrder.PutUint64(buf[:], val) _, err := unix.Write(e.hostfd, buf[:]) if err == unix.EWOULDBLOCK { - return syserror.ErrWouldBlock + return linuxerr.ErrWouldBlock } return err } @@ -230,7 +230,7 @@ func (e *EventOperations) Signal(val uint64) error { // uint64 minus 1. if val > math.MaxUint64-1-e.val { e.mu.Unlock() - return syserror.ErrWouldBlock + return linuxerr.ErrWouldBlock } e.val += val diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD index cfdea5cf7..c897e3a5f 100644 --- a/pkg/sentry/kernel/futex/BUILD +++ b/pkg/sentry/kernel/futex/BUILD @@ -42,7 +42,6 @@ go_library( "//pkg/log", "//pkg/sentry/memmap", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go index f5c364c96..2c9ea65aa 100644 --- a/pkg/sentry/kernel/futex/futex.go +++ b/pkg/sentry/kernel/futex/futex.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // KeyKind indicates the type of a Key. @@ -166,7 +165,7 @@ func atomicOp(t Target, addr hostarch.Addr, opIn uint32) (bool, error) { case linux.FUTEX_OP_XOR: newVal = oldVal ^ opArg default: - return false, syserror.ENOSYS + return false, linuxerr.ENOSYS } prev, err := t.CompareAndSwapUint32(addr, oldVal, newVal) if err != nil { @@ -192,7 +191,7 @@ func atomicOp(t Target, addr hostarch.Addr, opIn uint32) (bool, error) { case linux.FUTEX_OP_CMP_GE: return oldVal >= cmpArg, nil default: - return false, syserror.ENOSYS + return false, linuxerr.ENOSYS } } diff --git a/pkg/sentry/kernel/msgqueue/msgqueue.go b/pkg/sentry/kernel/msgqueue/msgqueue.go index 950edfd8c..7c459d076 100644 --- a/pkg/sentry/kernel/msgqueue/msgqueue.go +++ b/pkg/sentry/kernel/msgqueue/msgqueue.go @@ -250,11 +250,13 @@ func (r *Registry) MsgInfo(ctx context.Context) *linux.MsgInfo { // Send appends a message to the message queue, and returns an error if sending // fails. See msgsnd(2). -func (q *Queue) Send(ctx context.Context, m Message, b Blocker, wait bool, pid int32) (err error) { +func (q *Queue) Send(ctx context.Context, m Message, b Blocker, wait bool, pid int32) error { // Try to perform a non-blocking send using queue.append. If EWOULDBLOCK // is returned, start the blocking procedure. Otherwise, return normally. creds := auth.CredentialsFromContext(ctx) - if err := q.append(ctx, m, creds, pid); err != linuxerr.EWOULDBLOCK { + + // Fast path: first attempt a non-blocking push. + if err := q.push(ctx, m, creds, pid); err != linuxerr.EWOULDBLOCK { return err } @@ -262,25 +264,30 @@ func (q *Queue) Send(ctx context.Context, m Message, b Blocker, wait bool, pid i return linuxerr.EAGAIN } + // Slow path: at this point, the queue was found to be full, and we were + // asked to block. + e, ch := waiter.NewChannelEntry(nil) q.senders.EventRegister(&e, waiter.EventOut) + defer q.senders.EventUnregister(&e) + // Note: we need to check again before blocking the first time since space + // may have become available. for { - if err = q.append(ctx, m, creds, pid); err != linuxerr.EWOULDBLOCK { - break + if err := q.push(ctx, m, creds, pid); err != linuxerr.EWOULDBLOCK { + return err + } + if err := b.Block(ch); err != nil { + return err } - b.Block(ch) } - - q.senders.EventUnregister(&e) - return err } -// append appends a message to the queue's message list and notifies waiting +// push appends a message to the queue's message list and notifies waiting // receivers that a message has been inserted. It returns an error if adding // the message would cause the queue to exceed its maximum capacity, which can // be used as a signal to block the task. Other errors should be returned as is. -func (q *Queue) append(ctx context.Context, m Message, creds *auth.Credentials, pid int32) error { +func (q *Queue) push(ctx context.Context, m Message, creds *auth.Credentials, pid int32) error { if m.Type <= 0 { return linuxerr.EINVAL } @@ -337,15 +344,14 @@ func (q *Queue) append(ctx context.Context, m Message, creds *auth.Credentials, } // Receive removes a message from the queue and returns it. See msgrcv(2). -func (q *Queue) Receive(ctx context.Context, b Blocker, mType int64, maxSize int64, wait, truncate, except bool, pid int32) (msg *Message, err error) { +func (q *Queue) Receive(ctx context.Context, b Blocker, mType int64, maxSize int64, wait, truncate, except bool, pid int32) (*Message, error) { if maxSize < 0 || maxSize > maxMessageBytes { return nil, linuxerr.EINVAL } max := uint64(maxSize) - - // Try to perform a non-blocking receive using queue.pop. If EWOULDBLOCK - // is returned, start the blocking procedure. Otherwise, return normally. creds := auth.CredentialsFromContext(ctx) + + // Fast path: first attempt a non-blocking pop. if msg, err := q.pop(ctx, creds, mType, max, truncate, except, pid); err != linuxerr.EWOULDBLOCK { return msg, err } @@ -354,24 +360,30 @@ func (q *Queue) Receive(ctx context.Context, b Blocker, mType int64, maxSize int return nil, linuxerr.ENOMSG } + // Slow path: at this point, the queue was found to be empty, and we were + // asked to block. + e, ch := waiter.NewChannelEntry(nil) q.receivers.EventRegister(&e, waiter.EventIn) + defer q.receivers.EventUnregister(&e) + // Note: we need to check again before blocking the first time since a + // message may have become available. for { - if msg, err = q.pop(ctx, creds, mType, max, truncate, except, pid); err != linuxerr.EWOULDBLOCK { - break + if msg, err := q.pop(ctx, creds, mType, max, truncate, except, pid); err != linuxerr.EWOULDBLOCK { + return msg, err + } + if err := b.Block(ch); err != nil { + return nil, err } - b.Block(ch) } - q.receivers.EventUnregister(&e) - return msg, err } // pop pops the first message from the queue that matches the given type. It // returns an error for all the cases specified in msgrcv(2). If the queue is // empty or no message of the specified type is available, a EWOULDBLOCK error // is returned, which can then be used as a signal to block the process or fail. -func (q *Queue) pop(ctx context.Context, creds *auth.Credentials, mType int64, maxSize uint64, truncate, except bool, pid int32) (msg *Message, _ error) { +func (q *Queue) pop(ctx context.Context, creds *auth.Credentials, mType int64, maxSize uint64, truncate, except bool, pid int32) (*Message, error) { q.mu.Lock() defer q.mu.Unlock() @@ -392,6 +404,7 @@ func (q *Queue) pop(ctx context.Context, creds *auth.Credentials, mType int64, m } // Get a message from the queue. + var msg *Message switch { case mType == 0: msg = q.messages.Front() diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD index 94ebac7c5..5b2bac783 100644 --- a/pkg/sentry/kernel/pipe/BUILD +++ b/pkg/sentry/kernel/pipe/BUILD @@ -31,7 +31,6 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/vfs", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", @@ -51,7 +50,6 @@ go_test( "//pkg/errors/linuxerr", "//pkg/sentry/contexttest", "//pkg/sentry/fs", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/kernel/pipe/node.go b/pkg/sentry/kernel/pipe/node.go index 08786d704..615591507 100644 --- a/pkg/sentry/kernel/pipe/node.go +++ b/pkg/sentry/kernel/pipe/node.go @@ -21,7 +21,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // inodeOperations implements fs.InodeOperations for pipes. @@ -95,7 +94,7 @@ func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.Fi if i.p.isNamed && !flags.NonBlocking && !i.p.HasWriters() { if !waitFor(&i.mu, &i.wWakeup, ctx) { r.DecRef(ctx) - return nil, syserror.ErrInterrupted + return nil, linuxerr.ErrInterrupted } } @@ -118,7 +117,7 @@ func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.Fi if !waitFor(&i.mu, &i.rWakeup, ctx) { w.DecRef(ctx) - return nil, syserror.ErrInterrupted + return nil, linuxerr.ErrInterrupted } } return w, nil diff --git a/pkg/sentry/kernel/pipe/node_test.go b/pkg/sentry/kernel/pipe/node_test.go index d25cf658e..31bd7910a 100644 --- a/pkg/sentry/kernel/pipe/node_test.go +++ b/pkg/sentry/kernel/pipe/node_test.go @@ -22,7 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/syserror" ) type sleeper struct { @@ -240,7 +239,7 @@ func TestBlockedOpenIsCancellable(t *testing.T) { // If the cancel on the sleeper didn't work, the open for read would never // return. res := <-done - if res.error != syserror.ErrInterrupted { + if res.error != linuxerr.ErrInterrupted { t.Fatalf("Cancellation didn't cause GetFile to return fs.ErrInterrupted, got %v.", res.error) } diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index 85e3ce9f4..86beee6fe 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -27,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -201,7 +200,7 @@ func (p *Pipe) peekLocked(count int64, f func(safemem.BlockSeq) (uint64, error)) if !p.HasWriters() { return 0, io.EOF } - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } count = p.size } @@ -250,7 +249,7 @@ func (p *Pipe) writeLocked(count int64, f func(safemem.BlockSeq) (uint64, error) avail := p.max - p.size if avail == 0 { - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } short := false if count > avail { @@ -258,7 +257,7 @@ func (p *Pipe) writeLocked(count int64, f func(safemem.BlockSeq) (uint64, error) // (PIPE_BUF) be atomic, but requires no atomicity for writes // larger than this. if count <= atomicIOBytes { - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } count = avail short = true @@ -307,7 +306,7 @@ func (p *Pipe) writeLocked(count int64, f func(safemem.BlockSeq) (uint64, error) // If we shortened the write, adjust the returned error appropriately. if short { - return done, syserror.ErrWouldBlock + return done, linuxerr.ErrWouldBlock } return done, nil diff --git a/pkg/sentry/kernel/pipe/pipe_test.go b/pkg/sentry/kernel/pipe/pipe_test.go index 867f4a76b..aa3ab305d 100644 --- a/pkg/sentry/kernel/pipe/pipe_test.go +++ b/pkg/sentry/kernel/pipe/pipe_test.go @@ -18,8 +18,8 @@ import ( "bytes" "testing" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -51,8 +51,8 @@ func TestPipeReadBlock(t *testing.T) { defer w.DecRef(ctx) n, err := r.Readv(ctx, usermem.BytesIOSequence(make([]byte, 1))) - if n != 0 || err != syserror.ErrWouldBlock { - t.Fatalf("Readv: got (%d, %v), wanted (0, %v)", n, err, syserror.ErrWouldBlock) + if n != 0 || err != linuxerr.ErrWouldBlock { + t.Fatalf("Readv: got (%d, %v), wanted (0, %v)", n, err, linuxerr.ErrWouldBlock) } } @@ -67,7 +67,7 @@ func TestPipeWriteBlock(t *testing.T) { msg := make([]byte, capacity+1) n, err := w.Writev(ctx, usermem.BytesIOSequence(msg)) - if wantN, wantErr := int64(capacity), syserror.ErrWouldBlock; n != wantN || err != wantErr { + if wantN, wantErr := int64(capacity), linuxerr.ErrWouldBlock; n != wantN || err != wantErr { t.Fatalf("Writev: got (%d, %v), wanted (%d, %v)", n, err, wantN, wantErr) } } @@ -102,7 +102,7 @@ func TestPipeWriteUntilEnd(t *testing.T) { for { n, err := r.Readv(ctx, dst) dst = dst.DropFirst64(n) - if err == syserror.ErrWouldBlock { + if err == linuxerr.ErrWouldBlock { select { case <-ch: continue @@ -129,7 +129,7 @@ func TestPipeWriteUntilEnd(t *testing.T) { for src.NumBytes() != 0 { n, err := w.Writev(ctx, src) src = src.DropFirst64(n) - if err == syserror.ErrWouldBlock { + if err == linuxerr.ErrWouldBlock { <-ch continue } diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index 077d5fd7f..a6f1989f5 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -23,7 +23,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -121,7 +120,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s // writer, we have to wait for a writer to open the other end. if vp.pipe.isNamed && statusFlags&linux.O_NONBLOCK == 0 && !vp.pipe.HasWriters() && !waitFor(&vp.mu, &vp.wWakeup, ctx) { fd.DecRef(ctx) - return nil, syserror.EINTR + return nil, linuxerr.EINTR } case writable: @@ -137,7 +136,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s // Wait for a reader to open the other end. if !waitFor(&vp.mu, &vp.rWakeup, ctx) { fd.DecRef(ctx) - return nil, syserror.EINTR + return nil, linuxerr.EINTR } } diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index 079294f81..717c9a6b3 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -23,7 +23,6 @@ import ( "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -465,7 +464,7 @@ func (t *Task) ptraceUnfreezeLocked() { // stop. func (t *Task) ptraceUnstop(mode ptraceSyscallMode, singlestep bool, sig linux.Signal) error { if sig != 0 && !sig.IsValid() { - return syserror.EIO + return linuxerr.EIO } t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() @@ -532,7 +531,7 @@ func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error { } if seize { if err := target.ptraceSetOptionsLocked(opts); err != nil { - return syserror.EIO + return linuxerr.EIO } } target.ptraceTracer.Store(t) @@ -569,7 +568,7 @@ func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error { // ptrace stop. func (t *Task) ptraceDetach(target *Task, sig linux.Signal) error { if sig != 0 && !sig.IsValid() { - return syserror.EIO + return linuxerr.EIO } t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() @@ -967,7 +966,7 @@ func (t *Task) ptraceInterrupt(target *Task) error { return linuxerr.ESRCH } if !target.ptraceSeized { - return syserror.EIO + return linuxerr.EIO } target.tg.signalHandlers.mu.Lock() defer target.tg.signalHandlers.mu.Unlock() @@ -1030,7 +1029,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { if req == linux.PTRACE_ATTACH || req == linux.PTRACE_SEIZE { seize := req == linux.PTRACE_SEIZE if seize && addr != 0 { - return syserror.EIO + return linuxerr.EIO } return t.ptraceAttach(target, seize, uintptr(data)) } @@ -1120,13 +1119,13 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { t.tg.pidns.owner.mu.RLock() defer t.tg.pidns.owner.mu.RUnlock() if !target.ptraceSeized { - return syserror.EIO + return linuxerr.EIO } if target.ptraceSiginfo == nil { - return syserror.EIO + return linuxerr.EIO } if target.ptraceSiginfo.Code>>8 != linux.PTRACE_EVENT_STOP { - return syserror.EIO + return linuxerr.EIO } target.tg.signalHandlers.mu.Lock() defer target.tg.signalHandlers.mu.Unlock() diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go index 63422e155..564add01b 100644 --- a/pkg/sentry/kernel/ptrace_amd64.go +++ b/pkg/sentry/kernel/ptrace_amd64.go @@ -19,8 +19,8 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -88,6 +88,6 @@ func (t *Task) ptraceArch(target *Task, req int64, addr, data hostarch.Addr) err return err default: - return syserror.EIO + return linuxerr.EIO } } diff --git a/pkg/sentry/kernel/ptrace_arm64.go b/pkg/sentry/kernel/ptrace_arm64.go index 27514d67b..7c2b94339 100644 --- a/pkg/sentry/kernel/ptrace_arm64.go +++ b/pkg/sentry/kernel/ptrace_arm64.go @@ -18,11 +18,11 @@ package kernel import ( + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/syserror" ) // ptraceArch implements arch-specific ptrace commands. func (t *Task) ptraceArch(target *Task, req int64, addr, data hostarch.Addr) error { - return syserror.EIO + return linuxerr.EIO } diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go index 54ca43c2e..0d66648c3 100644 --- a/pkg/sentry/kernel/seccomp.go +++ b/pkg/sentry/kernel/seccomp.go @@ -18,9 +18,9 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/syserror" ) const maxSyscallFilterInstructions = 1 << 15 @@ -176,7 +176,7 @@ func (t *Task) AppendSyscallFilter(p bpf.Program, syncAll bool) error { } if totalLength > maxSyscallFilterInstructions { - return syserror.ENOMEM + return linuxerr.ENOMEM } newFilters = append(newFilters, p) diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD index 2ae08ed12..6aa74219e 100644 --- a/pkg/sentry/kernel/semaphore/BUILD +++ b/pkg/sentry/kernel/semaphore/BUILD @@ -31,7 +31,6 @@ go_library( "//pkg/sentry/kernel/ipc", "//pkg/sentry/kernel/time", "//pkg/sync", - "//pkg/syserror", ], ) @@ -43,9 +42,9 @@ go_test( deps = [ "//pkg/abi/linux", # keep "//pkg/context", # keep + "//pkg/errors/linuxerr", #keep "//pkg/sentry/contexttest", # keep "//pkg/sentry/kernel/auth", # keep "//pkg/sentry/kernel/ipc", # keep - "//pkg/syserror", # keep ], ) diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go index 52030df19..28e466948 100644 --- a/pkg/sentry/kernel/semaphore/semaphore.go +++ b/pkg/sentry/kernel/semaphore/semaphore.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) const ( @@ -151,10 +150,10 @@ func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, nsems int32, m // Map reg.objects and map indexes in a registry are of the same size, // check map reg.objects only here for the system limit. if r.reg.ObjectCount() >= setsMax { - return nil, syserror.ENOSPC + return nil, linuxerr.ENOSPC } if r.totalSems() > int(semsTotalMax-nsems) { - return nil, syserror.ENOSPC + return nil, linuxerr.ENOSPC } // Finally create a new set. @@ -545,7 +544,7 @@ func (s *Set) ExecuteOps(ctx context.Context, ops []linux.Sembuf, creds *auth.Cr // Did it race with a removal operation? if s.dead { - return nil, 0, syserror.EIDRM + return nil, 0, linuxerr.EIDRM } // Validate the operations. @@ -584,7 +583,7 @@ func (s *Set) executeOps(ctx context.Context, ops []linux.Sembuf, pid int32) (ch if tmpVals[op.SemNum] != 0 { // Semaphore isn't 0, must wait. if op.SemFlg&linux.IPC_NOWAIT != 0 { - return nil, 0, syserror.ErrWouldBlock + return nil, 0, linuxerr.ErrWouldBlock } w := newWaiter(op.SemOp) @@ -600,7 +599,7 @@ func (s *Set) executeOps(ctx context.Context, ops []linux.Sembuf, pid int32) (ch if -op.SemOp > tmpVals[op.SemNum] { // Not enough resources, must wait. if op.SemFlg&linux.IPC_NOWAIT != 0 { - return nil, 0, syserror.ErrWouldBlock + return nil, 0, linuxerr.ErrWouldBlock } w := newWaiter(op.SemOp) diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go index 2e4ab8121..59ac92ef1 100644 --- a/pkg/sentry/kernel/semaphore/semaphore_test.go +++ b/pkg/sentry/kernel/semaphore/semaphore_test.go @@ -19,10 +19,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/ipc" - "gvisor.dev/gvisor/pkg/syserror" ) func executeOps(ctx context.Context, t *testing.T, set *Set, ops []linux.Sembuf, block bool) chan struct{} { @@ -124,14 +124,14 @@ func TestNoWait(t *testing.T) { ops[0].SemOp = -2 ops[0].SemFlg = linux.IPC_NOWAIT - if _, _, err := set.executeOps(ctx, ops, 123); err != syserror.ErrWouldBlock { - t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, syserror.ErrWouldBlock) + if _, _, err := set.executeOps(ctx, ops, 123); err != linuxerr.ErrWouldBlock { + t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, linuxerr.ErrWouldBlock) } ops[0].SemOp = 0 ops[0].SemFlg = linux.IPC_NOWAIT - if _, _, err := set.executeOps(ctx, ops, 123); err != syserror.ErrWouldBlock { - t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, syserror.ErrWouldBlock) + if _, _, err := set.executeOps(ctx, ops, 123); err != linuxerr.ErrWouldBlock { + t.Fatalf("ExecuteOps(ops) wrong result, got: %v, expected: %v", err, linuxerr.ErrWouldBlock) } } diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD index 4e8deac4c..2547957ba 100644 --- a/pkg/sentry/kernel/shm/BUILD +++ b/pkg/sentry/kernel/shm/BUILD @@ -42,7 +42,6 @@ go_library( "//pkg/sentry/pgalloc", "//pkg/sentry/usage", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index ba0fbcf90..ab938fa3c 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -49,7 +49,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // Registry tracks all shared memory segments in an IPC namespace. The registry @@ -151,7 +150,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, siz if r.reg.ObjectCount() >= linux.SHMMNI { // "All possible shared memory IDs have been taken (SHMMNI) ..." // - man shmget(2) - return nil, syserror.ENOSPC + return nil, linuxerr.ENOSPC } if !private { @@ -184,7 +183,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, siz // "... allocating a segment of the requested size would cause the // system to exceed the system-wide limit on shared memory (SHMALL)." // - man shmget(2) - return nil, syserror.ENOSPC + return nil, linuxerr.ENOSPC } // Need to create a new segment. @@ -521,7 +520,7 @@ func (s *Shm) ConfigureAttach(ctx context.Context, addr hostarch.Addr, opts Atta s.mu.Lock() defer s.mu.Unlock() if s.pendingDestruction && s.ReadRefs() == 0 { - return memmap.MMapOpts{}, syserror.EIDRM + return memmap.MMapOpts{}, linuxerr.EIDRM } creds := auth.CredentialsFromContext(ctx) diff --git a/pkg/sentry/kernel/signalfd/BUILD b/pkg/sentry/kernel/signalfd/BUILD index 1110ecca5..4180ca28e 100644 --- a/pkg/sentry/kernel/signalfd/BUILD +++ b/pkg/sentry/kernel/signalfd/BUILD @@ -15,7 +15,6 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/kernel/signalfd/signalfd.go b/pkg/sentry/kernel/signalfd/signalfd.go index 47958e2d4..9c5e6698c 100644 --- a/pkg/sentry/kernel/signalfd/signalfd.go +++ b/pkg/sentry/kernel/signalfd/signalfd.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -99,7 +98,7 @@ func (s *SignalOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS info, err := s.target.Sigtimedwait(s.Mask(), 0) if err != nil { // There must be no signal available. - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } // Copy out the signal info using the specified format. diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go index b2520eecf..9bfc155e4 100644 --- a/pkg/sentry/kernel/task_block.go +++ b/pkg/sentry/kernel/task_block.go @@ -22,7 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // BlockWithTimeout blocks t until an event is received from C, the application @@ -33,7 +32,7 @@ import ( // and is unspecified if haveTimeout is false. // // - An error which is nil if an event is received from C, ETIMEDOUT if the timeout -// expired, and syserror.ErrInterrupted if t is interrupted. +// expired, and linuxerr.ErrInterrupted if t is interrupted. // // Preconditions: The caller must be running on the task goroutine. func (t *Task) BlockWithTimeout(C chan struct{}, haveTimeout bool, timeout time.Duration) (time.Duration, error) { @@ -67,7 +66,7 @@ func (t *Task) BlockWithTimeout(C chan struct{}, haveTimeout bool, timeout time. // application monotonic clock indicates a time of deadline (only if // haveDeadline is true), or t is interrupted. It returns nil if an event is // received from C, ETIMEDOUT if the deadline expired, and -// syserror.ErrInterrupted if t is interrupted. +// linuxerr.ErrInterrupted if t is interrupted. // // Preconditions: The caller must be running on the task goroutine. func (t *Task) BlockWithDeadline(C <-chan struct{}, haveDeadline bool, deadline ktime.Time) error { @@ -95,7 +94,7 @@ func (t *Task) BlockWithDeadline(C <-chan struct{}, haveDeadline bool, deadline // BlockWithTimer blocks t until an event is received from C or tchan, or t is // interrupted. It returns nil if an event is received from C, ETIMEDOUT if an -// event is received from tchan, and syserror.ErrInterrupted if t is +// event is received from tchan, and linuxerr.ErrInterrupted if t is // interrupted. // // Most clients should use BlockWithDeadline or BlockWithTimeout instead. @@ -106,7 +105,7 @@ func (t *Task) BlockWithTimer(C <-chan struct{}, tchan <-chan struct{}) error { } // Block blocks t until an event is received from C or t is interrupted. It -// returns nil if an event is received from C and syserror.ErrInterrupted if t +// returns nil if an event is received from C and linuxerr.ErrInterrupted if t // is interrupted. // // Preconditions: The caller must be running on the task goroutine. @@ -157,7 +156,7 @@ func (t *Task) block(C <-chan struct{}, timerChan <-chan struct{}) error { region.End() t.SleepFinish(false) // Return the indicated error on interrupt. - return syserror.ErrInterrupted + return linuxerr.ErrInterrupted case <-timerChan: region.End() diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go index cf8571262..9175b911c 100644 --- a/pkg/sentry/kernel/task_exec.go +++ b/pkg/sentry/kernel/task_exec.go @@ -66,10 +66,10 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // execStop is a TaskStop that a task sets on itself when it wants to execve @@ -97,7 +97,7 @@ func (t *Task) Execve(newImage *TaskImage) (*SyscallControl, error) { // We lost to a racing group-exit, kill, or exec from another thread // and should just exit. newImage.release() - return nil, syserror.EINTR + return nil, linuxerr.EINTR } // Cancel any racing group stops. diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index fbfcc19e5..342e5debe 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -32,7 +32,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/waiter" ) @@ -859,7 +859,7 @@ func (t *Task) Wait(opts *WaitOptions) (*WaitResult, error) { return wr, err } if err := t.Block(ch); err != nil { - return wr, syserror.ConvertIntr(err, opts.BlockInterruptErr) + return wr, syserr.ConvertIntr(err, opts.BlockInterruptErr) } } } diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go index 054ff212f..7b336a46b 100644 --- a/pkg/sentry/kernel/task_run.go +++ b/pkg/sentry/kernel/task_run.go @@ -22,6 +22,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/goid" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -29,7 +30,6 @@ import ( ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/syserror" ) // A taskRunState is a reified state in the task state machine. See README.md @@ -197,8 +197,8 @@ func (app *runApp) execute(t *Task) taskRunState { // a pending signal, causing another interruption, but that signal should // not interact with the interrupted syscall.) if t.haveSyscallReturn { - if sre, ok := syserror.SyscallRestartErrnoFromReturn(t.Arch().Return()); ok { - if sre == syserror.ERESTART_RESTARTBLOCK { + if sre, ok := linuxerr.SyscallRestartErrorFromReturn(t.Arch().Return()); ok { + if sre == linuxerr.ERESTART_RESTARTBLOCK { t.Debugf("Restarting syscall %d with restart block after errno %d: not interrupted by handled signal", t.Arch().SyscallNo(), sre) t.Arch().RestartSyscallWithRestartBlock() } else { diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go index 7065ac79c..eeb3c5e69 100644 --- a/pkg/sentry/kernel/task_signals.go +++ b/pkg/sentry/kernel/task_signals.go @@ -28,7 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -161,7 +160,7 @@ func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRu sigact := computeAction(sig, act) if t.haveSyscallReturn { - if sre, ok := syserror.SyscallRestartErrnoFromReturn(t.Arch().Return()); ok { + if sre, ok := linuxerr.SyscallRestartErrorFromReturn(t.Arch().Return()); ok { // Signals that are ignored, cause a thread group stop, or // terminate the thread group do not interact with interrupted // syscalls; in Linux terms, they are never returned to the signal @@ -170,13 +169,13 @@ func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRu // signal that is actually handled (by userspace). if sigact == SignalActionHandler { switch { - case sre == syserror.ERESTARTNOHAND: + case sre == linuxerr.ERESTARTNOHAND: fallthrough - case sre == syserror.ERESTART_RESTARTBLOCK: + case sre == linuxerr.ERESTART_RESTARTBLOCK: fallthrough - case (sre == syserror.ERESTARTSYS && act.Flags&linux.SA_RESTART == 0): + case (sre == linuxerr.ERESTARTSYS && act.Flags&linux.SA_RESTART == 0): t.Debugf("Not restarting syscall %d after errno %d: interrupted by signal %d", t.Arch().SyscallNo(), sre, info.Signo) - t.Arch().SetReturn(uintptr(-ExtractErrno(syserror.EINTR, -1))) + t.Arch().SetReturn(uintptr(-ExtractErrno(linuxerr.EINTR, -1))) default: t.Debugf("Restarting syscall %d after errno %d: interrupted by signal %d", t.Arch().SyscallNo(), sre, info.Signo) t.Arch().RestartSyscall() diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go index 0565059c1..217c6f531 100644 --- a/pkg/sentry/kernel/task_start.go +++ b/pkg/sentry/kernel/task_start.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/sched" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // TaskConfig defines the configuration of a new Task (see below). @@ -170,7 +169,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) { // doesn't matter too much since the caller will exit before it returns // to userspace. If the caller isn't in the same thread group, then // we're in uncharted territory and can return whatever we want. - return nil, syserror.EINTR + return nil, linuxerr.EINTR } if err := ts.assignTIDsLocked(t); err != nil { return nil, err @@ -268,7 +267,7 @@ func (ns *PIDNamespace) allocateTID() (ThreadID, error) { // fail with the error ENOMEM; it is not possible to create a new // processes [sic] in a PID namespace whose init process has // terminated." - pid_namespaces(7) - return 0, syserror.ENOMEM + return 0, linuxerr.ENOMEM } tid := ns.last for { diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go index 0586c9def..2b1d7e114 100644 --- a/pkg/sentry/kernel/task_syscall.go +++ b/pkg/sentry/kernel/task_syscall.go @@ -29,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/syserror" ) // SyscallRestartBlock represents the restart block for a syscall restartable @@ -383,8 +382,6 @@ func ExtractErrno(err error, sysno int) int { return int(err) case *errors.Error: return int(err.Errno()) - case syserror.SyscallRestartErrno: - return int(err) case *memmap.BusError: // Bus errors may generate SIGBUS, but for syscalls they still // return EFAULT. See case in task_run.go where the fault is @@ -397,8 +394,8 @@ func ExtractErrno(err error, sysno int) int { case *os.SyscallError: return ExtractErrno(err.Err, sysno) default: - if errno, ok := syserror.TranslateError(err); ok { - return int(errno) + if errno, ok := linuxerr.TranslateError(err); ok { + return int(errno.Errno()) } } panic(fmt.Sprintf("Unknown syscall %d error: %v", sysno, err)) diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go index 8e2c36598..bff226a11 100644 --- a/pkg/sentry/kernel/task_usermem.go +++ b/pkg/sentry/kernel/task_usermem.go @@ -22,7 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -105,7 +104,7 @@ func (t *Task) CopyInVector(addr hostarch.Addr, maxElemSize, maxTotalSize int) ( // Each string has a zero terminating byte counted, so copying out a string // requires at least one byte of space. Also, see the calculation below. if maxTotalSize <= 0 { - return nil, syserror.ENOMEM + return nil, linuxerr.ENOMEM } thisMax := maxElemSize if maxTotalSize < thisMax { @@ -148,7 +147,7 @@ func (t *Task) CopyOutIovecs(addr hostarch.Addr, src hostarch.AddrRangeSeq) erro } default: - return syserror.ENOSYS + return linuxerr.ENOSYS } return nil @@ -220,7 +219,7 @@ func (t *Task) CopyInIovecs(addr hostarch.Addr, numIovecs int) (hostarch.AddrRan } default: - return hostarch.AddrRangeSeq{}, syserror.ENOSYS + return hostarch.AddrRangeSeq{}, linuxerr.ENOSYS } // Truncate to MAX_RW_COUNT. diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD index 54bfed644..560a0f33c 100644 --- a/pkg/sentry/loader/BUILD +++ b/pkg/sentry/loader/BUILD @@ -37,7 +37,6 @@ go_library( "//pkg/sentry/usage", "//pkg/sentry/vfs", "//pkg/syserr", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go index 577374fa4..fb213d109 100644 --- a/pkg/sentry/loader/elf.go +++ b/pkg/sentry/loader/elf.go @@ -32,7 +32,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -116,7 +115,7 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) { log.Infof("Error reading ELF ident: %v", err) // The entire ident array always exists. if err == io.EOF || err == io.ErrUnexpectedEOF { - err = syserror.ENOEXEC + err = linuxerr.ENOEXEC } return elfInfo{}, err } @@ -124,22 +123,22 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) { // Only some callers pre-check the ELF magic. if !bytes.Equal(ident[:len(elfMagic)], []byte(elfMagic)) { log.Infof("File is not an ELF") - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } // We only support 64-bit, little endian binaries if class := elf.Class(ident[elf.EI_CLASS]); class != elf.ELFCLASS64 { log.Infof("Unsupported ELF class: %v", class) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } if endian := elf.Data(ident[elf.EI_DATA]); endian != elf.ELFDATA2LSB { log.Infof("Unsupported ELF endianness: %v", endian) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } if version := elf.Version(ident[elf.EI_VERSION]); version != elf.EV_CURRENT { log.Infof("Unsupported ELF version: %v", version) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } // EI_OSABI is ignored by Linux, which is the only OS supported. os := abi.Linux @@ -151,7 +150,7 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) { log.Infof("Error reading ELF header: %v", err) // The entire header always exists. if err == io.EOF || err == io.ErrUnexpectedEOF { - err = syserror.ENOEXEC + err = linuxerr.ENOEXEC } return elfInfo{}, err } @@ -166,7 +165,7 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) { a = arch.ARM64 default: log.Infof("Unsupported ELF machine %d", machine) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } var sharedObject bool @@ -178,25 +177,25 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) { sharedObject = true default: log.Infof("Unsupported ELF type %v", elfType) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } if int(hdr.Phentsize) != prog64Size { log.Infof("Unsupported phdr size %d", hdr.Phentsize) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } totalPhdrSize := prog64Size * int(hdr.Phnum) if totalPhdrSize < prog64Size { log.Warningf("No phdrs or total phdr size overflows: prog64Size: %d phnum: %d", prog64Size, int(hdr.Phnum)) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } if totalPhdrSize > maxTotalPhdrSize { log.Infof("Too many phdrs (%d): total size %d > %d", hdr.Phnum, totalPhdrSize, maxTotalPhdrSize) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } if int64(hdr.Phoff) < 0 || int64(hdr.Phoff+uint64(totalPhdrSize)) < 0 { ctx.Infof("Unsupported phdr offset %d", hdr.Phoff) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } phdrBuf := make([]byte, totalPhdrSize) @@ -205,7 +204,7 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) { log.Infof("Error reading ELF phdrs: %v", err) // If phdrs were specified, they should all exist. if err == io.EOF || err == io.ErrUnexpectedEOF { - err = syserror.ENOEXEC + err = linuxerr.ENOEXEC } return elfInfo{}, err } @@ -248,19 +247,19 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr if !ok { // If offset != 0 we should have ensured this would fit. ctx.Warningf("Computed segment load address overflows: %#x + %#x", phdr.Vaddr, offset) - return syserror.ENOEXEC + return linuxerr.ENOEXEC } addr -= hostarch.Addr(adjust) fileSize := phdr.Filesz + adjust if fileSize < phdr.Filesz { ctx.Infof("Computed segment file size overflows: %#x + %#x", phdr.Filesz, adjust) - return syserror.ENOEXEC + return linuxerr.ENOEXEC } ms, ok := hostarch.Addr(fileSize).RoundUp() if !ok { ctx.Infof("fileSize %#x too large", fileSize) - return syserror.ENOEXEC + return linuxerr.ENOEXEC } mapSize := uint64(ms) @@ -321,7 +320,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr memSize := phdr.Memsz + adjust if memSize < phdr.Memsz { ctx.Infof("Computed segment mem size overflows: %#x + %#x", phdr.Memsz, adjust) - return syserror.ENOEXEC + return linuxerr.ENOEXEC } // Allocate more anonymous pages if necessary. @@ -333,7 +332,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr anonSize, ok := hostarch.Addr(memSize - mapSize).RoundUp() if !ok { ctx.Infof("extra anon pages too large: %#x", memSize-mapSize) - return syserror.ENOEXEC + return linuxerr.ENOEXEC } // N.B. Linux uses vm_brk_flags to map these pages, which only @@ -423,27 +422,27 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, in // NOTE(b/37474556): Linux allows out-of-order // segments, in violation of the spec. ctx.Infof("PT_LOAD headers out-of-order. %#x < %#x", vaddr, end) - return loadedELF{}, syserror.ENOEXEC + return loadedELF{}, linuxerr.ENOEXEC } var ok bool end, ok = vaddr.AddLength(phdr.Memsz) if !ok { ctx.Infof("PT_LOAD header size overflows. %#x + %#x", vaddr, phdr.Memsz) - return loadedELF{}, syserror.ENOEXEC + return loadedELF{}, linuxerr.ENOEXEC } case elf.PT_INTERP: if phdr.Filesz < 2 { ctx.Infof("PT_INTERP path too small: %v", phdr.Filesz) - return loadedELF{}, syserror.ENOEXEC + return loadedELF{}, linuxerr.ENOEXEC } if phdr.Filesz > linux.PATH_MAX { ctx.Infof("PT_INTERP path too big: %v", phdr.Filesz) - return loadedELF{}, syserror.ENOEXEC + return loadedELF{}, linuxerr.ENOEXEC } if int64(phdr.Off) < 0 || int64(phdr.Off+phdr.Filesz) < 0 { ctx.Infof("Unsupported PT_INTERP offset %d", phdr.Off) - return loadedELF{}, syserror.ENOEXEC + return loadedELF{}, linuxerr.ENOEXEC } path := make([]byte, phdr.Filesz) @@ -451,12 +450,12 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, in if err != nil { // If an interpreter was specified, it should exist. ctx.Infof("Error reading PT_INTERP path: %v", err) - return loadedELF{}, syserror.ENOEXEC + return loadedELF{}, linuxerr.ENOEXEC } if path[len(path)-1] != 0 { ctx.Infof("PT_INTERP path not NUL-terminated: %v", path) - return loadedELF{}, syserror.ENOEXEC + return loadedELF{}, linuxerr.ENOEXEC } // Strip NUL-terminator and everything beyond from @@ -498,7 +497,7 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, in totalSize, ok := totalSize.RoundUp() if !ok { ctx.Infof("ELF PT_LOAD segments too big") - return loadedELF{}, syserror.ENOEXEC + return loadedELF{}, linuxerr.ENOEXEC } var err error @@ -592,7 +591,7 @@ func loadInitialELF(ctx context.Context, m *mm.MemoryManager, fs *cpuid.FeatureS // Check Image Compatibility. if arch.Host != info.arch { ctx.Warningf("Found mismatch for platform %s with ELF type %s", arch.Host.String(), info.arch.String()) - return loadedELF{}, nil, syserror.ENOEXEC + return loadedELF{}, nil, linuxerr.ENOEXEC } // Create the arch.Context now so we can prepare the mmap layout before @@ -681,7 +680,7 @@ func loadELF(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, error if interp.interpreter != "" { // No recursive interpreters! ctx.Infof("Interpreter requires an interpreter") - return loadedELF{}, nil, syserror.ENOEXEC + return loadedELF{}, nil, linuxerr.ENOEXEC } } diff --git a/pkg/sentry/loader/interpreter.go b/pkg/sentry/loader/interpreter.go index 3e302d92c..1ec0d7019 100644 --- a/pkg/sentry/loader/interpreter.go +++ b/pkg/sentry/loader/interpreter.go @@ -19,8 +19,8 @@ import ( "io" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsbridge" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -43,14 +43,14 @@ func parseInterpreterScript(ctx context.Context, filename string, f fsbridge.Fil // Short read is OK. if err != nil && err != io.ErrUnexpectedEOF { if err == io.EOF { - err = syserror.ENOEXEC + err = linuxerr.ENOEXEC } return "", []string{}, err } line = line[:n] if !bytes.Equal(line[:2], []byte(interpreterScriptMagic)) { - return "", []string{}, syserror.ENOEXEC + return "", []string{}, linuxerr.ENOEXEC } // Ignore #!. line = line[2:] @@ -82,7 +82,7 @@ func parseInterpreterScript(ctx context.Context, filename string, f fsbridge.Fil if string(interp) == "" { ctx.Infof("Interpreter script contains no interpreter: %v", line) - return "", []string{}, syserror.ENOEXEC + return "", []string{}, linuxerr.ENOEXEC } // Build the new argument list: diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go index 86d0c54cd..6a356779c 100644 --- a/pkg/sentry/loader/loader.go +++ b/pkg/sentry/loader/loader.go @@ -35,7 +35,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -91,7 +90,7 @@ type LoadArgs struct { func openPath(ctx context.Context, args LoadArgs) (fsbridge.File, error) { if args.Filename == "" { ctx.Infof("cannot open empty name") - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } // TODO(gvisor.dev/issue/160): Linux requires only execute permission, @@ -172,7 +171,7 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context // (e.g., #!a). if err != nil && err != io.ErrUnexpectedEOF { if err == io.EOF { - err = syserror.ENOEXEC + err = linuxerr.ENOEXEC } return loadedELF{}, nil, nil, nil, err } @@ -190,7 +189,7 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context case bytes.Equal(hdr[:2], []byte(interpreterScriptMagic)): if args.CloseOnExec { - return loadedELF{}, nil, nil, nil, syserror.ENOENT + return loadedELF{}, nil, nil, nil, linuxerr.ENOENT } args.Filename, args.Argv, err = parseInterpreterScript(ctx, args.Filename, args.File, args.Argv) if err != nil { @@ -202,7 +201,7 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context default: ctx.Infof("Unknown magic: %v", hdr) - return loadedELF{}, nil, nil, nil, syserror.ENOEXEC + return loadedELF{}, nil, nil, nil, linuxerr.ENOEXEC } // Set to nil in case we loop on a Interpreter Script. args.File = nil diff --git a/pkg/sentry/loader/vdso.go b/pkg/sentry/loader/vdso.go index 054ef1723..3abd2ee7d 100644 --- a/pkg/sentry/loader/vdso.go +++ b/pkg/sentry/loader/vdso.go @@ -34,7 +34,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -102,14 +101,14 @@ func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, erro first = &info.phdrs[i] if phdr.Off != 0 { log.Warningf("First PT_LOAD segment has non-zero file offset") - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } } memoryOffset := phdr.Vaddr - first.Vaddr if memoryOffset != phdr.Off { log.Warningf("PT_LOAD segment memory offset %#x != file offset %#x", memoryOffset, phdr.Off) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } // memsz larger than filesz means that extra zeroed space should be @@ -118,24 +117,24 @@ func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, erro // zeroes. if phdr.Memsz != phdr.Filesz { log.Warningf("PT_LOAD segment memsz %#x != filesz %#x", phdr.Memsz, phdr.Filesz) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } start := hostarch.Addr(memoryOffset) end, ok := start.AddLength(phdr.Memsz) if !ok { log.Warningf("PT_LOAD segment size overflows: %#x + %#x", start, end) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } if uint64(end) > size { log.Warningf("PT_LOAD segment end %#x extends beyond end of file %#x", end, size) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } if prev != nil { if start < prevEnd { log.Warningf("PT_LOAD segments out of order") - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } // We mprotect entire pages, so each segment must be in @@ -144,7 +143,7 @@ func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, erro startPage := start.RoundDown() if prevEndPage >= startPage { log.Warningf("PT_LOAD segments share a page: %#x", prevEndPage) - return elfInfo{}, syserror.ENOEXEC + return elfInfo{}, linuxerr.ENOEXEC } } prev = &info.phdrs[i] @@ -271,11 +270,11 @@ func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) { func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (hostarch.Addr, error) { if v.os != bin.os { ctx.Warningf("Binary ELF OS %v and VDSO ELF OS %v differ", bin.os, v.os) - return 0, syserror.ENOEXEC + return 0, linuxerr.ENOEXEC } if v.arch != bin.arch { ctx.Warningf("Binary ELF arch %v and VDSO ELF arch %v differ", bin.arch, v.arch) - return 0, syserror.ENOEXEC + return 0, linuxerr.ENOEXEC } // Reserve address space for the VDSO and its parameter page, which is @@ -348,35 +347,35 @@ func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) segAddr, ok := vdsoAddr.AddLength(memoryOffset) if !ok { ctx.Warningf("PT_LOAD segment address overflows: %#x + %#x", segAddr, memoryOffset) - return 0, syserror.ENOEXEC + return 0, linuxerr.ENOEXEC } segPage := segAddr.RoundDown() segSize := hostarch.Addr(phdr.Memsz) segSize, ok = segSize.AddLength(segAddr.PageOffset()) if !ok { ctx.Warningf("PT_LOAD segment memsize %#x + offset %#x overflows", phdr.Memsz, segAddr.PageOffset()) - return 0, syserror.ENOEXEC + return 0, linuxerr.ENOEXEC } segSize, ok = segSize.RoundUp() if !ok { ctx.Warningf("PT_LOAD segment size overflows: %#x", phdr.Memsz+segAddr.PageOffset()) - return 0, syserror.ENOEXEC + return 0, linuxerr.ENOEXEC } segEnd, ok := segPage.AddLength(uint64(segSize)) if !ok { ctx.Warningf("PT_LOAD segment range overflows: %#x + %#x", segAddr, segSize) - return 0, syserror.ENOEXEC + return 0, linuxerr.ENOEXEC } if segEnd > vdsoEnd { ctx.Warningf("PT_LOAD segment ends beyond VDSO: %#x > %#x", segEnd, vdsoEnd) - return 0, syserror.ENOEXEC + return 0, linuxerr.ENOEXEC } perms := progFlagsAsPerms(phdr.Flags) if perms != hostarch.Read { if err := m.MProtect(segPage, uint64(segSize), perms, false); err != nil { ctx.Warningf("Unable to set PT_LOAD segment protections %+v at [%#x, %#x): %v", perms, segAddr, segEnd, err) - return 0, syserror.ENOEXEC + return 0, linuxerr.ENOEXEC } } } diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD index c30e88725..a89bfa680 100644 --- a/pkg/sentry/memmap/BUILD +++ b/pkg/sentry/memmap/BUILD @@ -54,7 +54,6 @@ go_library( "//pkg/hostarch", "//pkg/log", "//pkg/safemem", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index 69aff21b6..b7d782b7f 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -144,7 +144,6 @@ go_library( "//pkg/sentry/platform", "//pkg/sentry/usage", "//pkg/sync", - "//pkg/syserror", "//pkg/tcpip/buffer", "//pkg/usermem", ], diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go index 256eb4afb..9e00c2cec 100644 --- a/pkg/sentry/mm/syscalls.go +++ b/pkg/sentry/mm/syscalls.go @@ -27,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/syserror" ) // HandleUserFault handles an application page fault. sp is the faulting @@ -79,7 +78,7 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (hostar } length, ok := hostarch.Addr(opts.Length).RoundUp() if !ok { - return 0, syserror.ENOMEM + return 0, linuxerr.ENOMEM } opts.Length = uint64(length) @@ -90,7 +89,7 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (hostar } // Offset + length must not overflow. if end := opts.Offset + opts.Length; end < opts.Offset { - return 0, syserror.ENOMEM + return 0, linuxerr.ENOMEM } } else { opts.Offset = 0 @@ -253,7 +252,7 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (hostarch.AddrRange, erro ctx.Warningf("Capping stack size from RLIMIT_STACK of %v down to %v.", sz, maxStackSize) sz = maxStackSize } else if sz == 0 { - return hostarch.AddrRange{}, syserror.ENOMEM + return hostarch.AddrRange{}, linuxerr.ENOMEM } szaddr := hostarch.Addr(sz) ctx.Debugf("Allocating stack with size of %v bytes", sz) @@ -262,7 +261,7 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (hostarch.AddrRange, erro // randomization can't be disabled. stackEnd := mm.layout.MaxAddr - hostarch.Addr(mrand.Int63n(int64(mm.layout.MaxStackRand))).RoundDown() if stackEnd < szaddr { - return hostarch.AddrRange{}, syserror.ENOMEM + return hostarch.AddrRange{}, linuxerr.ENOMEM } stackStart := stackEnd - szaddr mm.mappingMu.Lock() @@ -500,7 +499,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS // Check against RLIMIT_AS. newUsageAS := mm.usageAS - uint64(oldAR.Length()) + uint64(newAR.Length()) if limitAS := limits.FromContext(ctx).Get(limits.AS).Cur; newUsageAS > limitAS { - return 0, syserror.ENOMEM + return 0, linuxerr.ENOMEM } if vma := vseg.ValuePtr(); vma.mappable != nil { @@ -599,11 +598,11 @@ func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms h } rlength, ok := hostarch.Addr(length).RoundUp() if !ok { - return syserror.ENOMEM + return linuxerr.ENOMEM } ar, ok := addr.ToRange(uint64(rlength)) if !ok { - return syserror.ENOMEM + return linuxerr.ENOMEM } effectivePerms := realPerms.Effective() @@ -616,19 +615,19 @@ func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms h // the non-growsDown case. vseg := mm.vmas.LowerBoundSegment(ar.Start) if !vseg.Ok() { - return syserror.ENOMEM + return linuxerr.ENOMEM } if growsDown { if !vseg.ValuePtr().growsDown { return linuxerr.EINVAL } if ar.End <= vseg.Start() { - return syserror.ENOMEM + return linuxerr.ENOMEM } ar.Start = vseg.Start() } else { if ar.Start < vseg.Start() { - return syserror.ENOMEM + return linuxerr.ENOMEM } } @@ -688,7 +687,7 @@ func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms h } vseg, _ = vseg.NextNonEmpty() if !vseg.Ok() { - return syserror.ENOMEM + return linuxerr.ENOMEM } } } @@ -724,7 +723,7 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr hostarch.Addr) (hostarch. if uint64(addr-mm.brk.Start) > limits.FromContext(ctx).Get(limits.Data).Cur { addr = mm.brk.End mm.mappingMu.Unlock() - return addr, syserror.ENOMEM + return addr, linuxerr.ENOMEM } oldbrkpg, _ := mm.brk.End.RoundUp() @@ -798,7 +797,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u } if newLockedAS := mm.lockedAS + uint64(ar.Length()) - mm.mlockedBytesRangeLocked(ar); newLockedAS > mlockLimit { mm.mappingMu.Unlock() - return syserror.ENOMEM + return linuxerr.ENOMEM } } } @@ -835,7 +834,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u mm.vmas.MergeAdjacent(ar) if unmapped { mm.mappingMu.Unlock() - return syserror.ENOMEM + return linuxerr.ENOMEM } if mode == memmap.MLockEager { @@ -850,7 +849,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u // case, which is converted to ENOMEM by mlock. mm.activeMu.Unlock() mm.mappingMu.RUnlock() - return syserror.ENOMEM + return linuxerr.ENOMEM } _, _, err := mm.getPMAsLocked(ctx, vseg, vseg.Range().Intersect(ar), hostarch.NoAccess) if err != nil { @@ -858,7 +857,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u mm.mappingMu.RUnlock() // Linux: mm/mlock.c:__mlock_posix_error_return() if linuxerr.Equals(linuxerr.EFAULT, err) { - return syserror.ENOMEM + return linuxerr.ENOMEM } if linuxerr.Equals(linuxerr.ENOMEM, err) { return linuxerr.EAGAIN @@ -917,7 +916,7 @@ func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error } if uint64(mm.vmas.Span()) > mlockLimit { mm.mappingMu.Unlock() - return syserror.ENOMEM + return linuxerr.ENOMEM } } } @@ -1040,7 +1039,7 @@ func (mm *MemoryManager) SetDontFork(addr hostarch.Addr, length uint64, dontfork } if mm.vmas.SpanRange(ar) != ar.Length() { - return syserror.ENOMEM + return linuxerr.ENOMEM } return nil } @@ -1099,7 +1098,7 @@ func (mm *MemoryManager) Decommit(addr hostarch.Addr, length uint64) error { // to the rest (but returns ENOMEM from the system call, as it should)." - // madvise(2) if mm.vmas.SpanRange(ar) != ar.Length() { - return syserror.ENOMEM + return linuxerr.ENOMEM } return nil } @@ -1123,11 +1122,11 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length u } la, ok := hostarch.Addr(length).RoundUp() if !ok { - return syserror.ENOMEM + return linuxerr.ENOMEM } ar, ok := addr.ToRange(uint64(la)) if !ok { - return syserror.ENOMEM + return linuxerr.ENOMEM } mm.mappingMu.RLock() @@ -1135,7 +1134,7 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length u vseg := mm.vmas.LowerBoundSegment(ar.Start) if !vseg.Ok() { mm.mappingMu.RUnlock() - return syserror.ENOMEM + return linuxerr.ENOMEM } var unmapped bool lastEnd := ar.Start @@ -1184,7 +1183,7 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length u } if unmapped { - return syserror.ENOMEM + return linuxerr.ENOMEM } return nil } diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go index 5f8ab7ca3..e34b7a2f7 100644 --- a/pkg/sentry/mm/vma.go +++ b/pkg/sentry/mm/vma.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/syserror" ) // Preconditions: @@ -59,7 +58,7 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp newUsageAS -= uint64(mm.vmas.SpanRange(ar)) } if limitAS := limits.FromContext(ctx).Get(limits.AS).Cur; newUsageAS > limitAS { - return vmaIterator{}, hostarch.AddrRange{}, syserror.ENOMEM + return vmaIterator{}, hostarch.AddrRange{}, linuxerr.ENOMEM } if opts.MLockMode != memmap.MLockNone { @@ -178,7 +177,7 @@ func (mm *MemoryManager) findAvailableLocked(length uint64, opts findAvailableOp // Fixed mappings accept only the requested address. if opts.Fixed { - return 0, syserror.ENOMEM + return 0, linuxerr.ENOMEM } // Prefer hugepage alignment if a hugepage or more is requested. @@ -216,7 +215,7 @@ func (mm *MemoryManager) findLowestAvailableLocked(length, alignment uint64, bou return gr.Start, nil } } - return 0, syserror.ENOMEM + return 0, linuxerr.ENOMEM } // Preconditions: mm.mappingMu must be locked. @@ -236,7 +235,7 @@ func (mm *MemoryManager) findHighestAvailableLocked(length, alignment uint64, bo return start, nil } } - return 0, syserror.ENOMEM + return 0, linuxerr.ENOMEM } // Preconditions: mm.mappingMu must be locked. diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD index d351869ef..496a9fd97 100644 --- a/pkg/sentry/pgalloc/BUILD +++ b/pkg/sentry/pgalloc/BUILD @@ -97,7 +97,6 @@ go_library( "//pkg/state", "//pkg/state/wire", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go index 0c8542485..68e17d343 100644 --- a/pkg/sentry/pgalloc/pgalloc.go +++ b/pkg/sentry/pgalloc/pgalloc.go @@ -39,7 +39,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // MemoryFile is a memmap.File whose pages may be allocated to arbitrary @@ -404,7 +403,7 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (memmap.File // Find a range in the underlying file. fr, ok := findAvailableRange(&f.usage, f.fileSize, length, alignment) if !ok { - return memmap.FileRange{}, syserror.ENOMEM + return memmap.FileRange{}, linuxerr.ENOMEM } // Expand the file if needed. diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index e7092a756..d67563958 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -519,15 +519,21 @@ func (c *vCPU) lock() { // //go:nosplit func (c *vCPU) unlock() { - if atomic.CompareAndSwapUint32(&c.state, vCPUUser|vCPUGuest, vCPUGuest) { + origState := atomicbitops.CompareAndSwapUint32(&c.state, vCPUUser|vCPUGuest, vCPUGuest) + if origState == vCPUUser|vCPUGuest { // Happy path: no exits are forced, and we can continue // executing on our merry way with a single atomic access. return } // Clear the lock. - origState := atomic.LoadUint32(&c.state) - atomicbitops.AndUint32(&c.state, ^vCPUUser) + for { + state := atomicbitops.CompareAndSwapUint32(&c.state, origState, origState&^vCPUUser) + if state == origState { + break + } + origState = state + } switch origState { case vCPUUser: // Normal state. diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD index 3950caa0f..4ea89f9d0 100644 --- a/pkg/sentry/socket/hostinet/BUILD +++ b/pkg/sentry/socket/hostinet/BUILD @@ -38,7 +38,6 @@ go_library( "//pkg/sentry/socket/control", "//pkg/sentry/vfs", "//pkg/syserr", - "//pkg/syserror", "//pkg/tcpip", "//pkg/tcpip/stack", "//pkg/usermem", diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index 38cb2c99c..1c1e501ba 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -35,7 +35,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/control" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -288,7 +287,7 @@ func (s *socketOpsCommon) Accept(t *kernel.Task, peerRequested bool, flags int, fd, syscallErr := accept4(s.fd, peerAddrPtr, peerAddrlenPtr, unix.SOCK_NONBLOCK|unix.SOCK_CLOEXEC) if blocking { var ch chan struct{} - for syscallErr == syserror.ErrWouldBlock { + for syscallErr == linuxerr.ErrWouldBlock { if ch != nil { if syscallErr = t.Block(ch); syscallErr != nil { break @@ -535,7 +534,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags n, err := copyToDst() // recv*(MSG_ERRQUEUE) never blocks, even without MSG_DONTWAIT. if flags&(unix.MSG_DONTWAIT|unix.MSG_ERRQUEUE) == 0 { - for err == syserror.ErrWouldBlock { + for err == linuxerr.ErrWouldBlock { // We only expect blocking to come from the actual syscall, in which // case it can't have returned any data. if n != 0 { @@ -707,7 +706,7 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b var ch chan struct{} n, err := src.CopyInTo(t, sendmsgFromBlocks) if flags&unix.MSG_DONTWAIT == 0 { - for err == syserror.ErrWouldBlock { + for err == linuxerr.ErrWouldBlock { // We only expect blocking to come from the actual syscall, in which // case it can't have returned any data. if n != 0 { @@ -716,7 +715,7 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b if ch != nil { if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } break } @@ -735,7 +734,7 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b func translateIOSyscallError(err error) error { if err == unix.EAGAIN || err == unix.EWOULDBLOCK { - return syserror.ErrWouldBlock + return linuxerr.ErrWouldBlock } return err } diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index ed85404da..9710a15ee 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -36,7 +36,6 @@ go_library( "//pkg/sentry/vfs", "//pkg/sync", "//pkg/syserr", - "//pkg/syserror", "//pkg/tcpip", "//pkg/usermem", "//pkg/waiter", diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index 5c3ae26f8..ed5fa9c38 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -39,7 +39,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -530,7 +529,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags } } - if n, err := doRead(); err != syserror.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 { + if n, err := doRead(); err != linuxerr.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 { var mflags int if n < int64(r.MsgSize) { mflags |= linux.MSG_TRUNC @@ -548,7 +547,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags defer s.EventUnregister(&e) for { - if n, err := doRead(); err != syserror.ErrWouldBlock { + if n, err := doRead(); err != linuxerr.ErrWouldBlock { var mflags int if n < int64(r.MsgSize) { mflags |= linux.MSG_TRUNC diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index e828982eb..e347442e7 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -42,7 +42,6 @@ go_library( "//pkg/sentry/vfs", "//pkg/sync", "//pkg/syserr", - "//pkg/syserror", "//pkg/tcpip", "//pkg/tcpip/header", "//pkg/tcpip/link/tun", diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 9b844b0c0..2f9462cee 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -56,7 +56,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" @@ -459,7 +458,7 @@ func (s *SocketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS } n, _, _, _, _, err := s.nonBlockingRead(ctx, dst, false, false, false) if err == syserr.ErrWouldBlock { - return int64(n), syserror.ErrWouldBlock + return int64(n), linuxerr.ErrWouldBlock } if err != nil { return 0, err.ToError() @@ -492,14 +491,14 @@ func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO r := src.Reader(ctx) n, err := s.Endpoint.Write(r, tcpip.WriteOptions{}) if _, ok := err.(*tcpip.ErrWouldBlock); ok { - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } if err != nil { return 0, syserr.TranslateNetstackError(err).ToError() } if n < src.NumBytes() { - return n, syserror.ErrWouldBlock + return n, linuxerr.ErrWouldBlock } return n, nil @@ -2951,7 +2950,7 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy s.readMu.Lock() defer s.readMu.Unlock() if !s.timestampValid { - return 0, syserror.ENOENT + return 0, linuxerr.ENOENT } tv := linux.NsecToTimeval(s.timestampNS) diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index edc160b1b..3cdf29b80 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -27,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -113,7 +112,7 @@ func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs. } n, _, _, _, _, err := s.nonBlockingRead(ctx, dst, false, false, false) if err == syserr.ErrWouldBlock { - return int64(n), syserror.ErrWouldBlock + return int64(n), linuxerr.ErrWouldBlock } if err != nil { return 0, err.ToError() @@ -132,14 +131,14 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs r := src.Reader(ctx) n, err := s.Endpoint.Write(r, tcpip.WriteOptions{}) if _, ok := err.(*tcpip.ErrWouldBlock); ok { - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } if err != nil { return 0, syserr.TranslateNetstackError(err).ToError() } if n < src.NumBytes() { - return n, syserror.ErrWouldBlock + return n, linuxerr.ErrWouldBlock } return n, nil diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index 5c3cdef6a..7b546c04d 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -62,7 +62,6 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", "//pkg/syserr", - "//pkg/syserror", "//pkg/tcpip", "//pkg/usermem", "//pkg/waiter", diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go index 33f9aeb06..9a398c3b5 100644 --- a/pkg/sentry/socket/unix/transport/connectioned.go +++ b/pkg/sentry/socket/unix/transport/connectioned.go @@ -129,9 +129,9 @@ func newConnectioned(ctx context.Context, stype linux.SockType, uid UniqueIDProv stype: stype, } + ep.ops.InitHandler(ep, &stackHandler{}, getSendBufferLimits, getReceiveBufferLimits) ep.ops.SetSendBufferSize(defaultBufferSize, false /* notify */) ep.ops.SetReceiveBufferSize(defaultBufferSize, false /* notify */) - ep.ops.InitHandler(ep, &stackHandler{}, getSendBufferLimits, getReceiveBufferLimits) return ep } @@ -517,3 +517,6 @@ func (e *connectionedEndpoint) OnSetSendBufferSize(v int64) (newSz int64) { } return v } + +// WakeupWriters implements tcpip.SocketOptionsHandler.WakeupWriters. +func (e *connectionedEndpoint) WakeupWriters() {} diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go index 61338728a..61311718e 100644 --- a/pkg/sentry/socket/unix/transport/connectionless.go +++ b/pkg/sentry/socket/unix/transport/connectionless.go @@ -44,9 +44,9 @@ func NewConnectionless(ctx context.Context) Endpoint { q := queue{ReaderQueue: ep.Queue, WriterQueue: &waiter.Queue{}, limit: defaultBufferSize} q.InitRefs() ep.receiver = &queueReceiver{readQueue: &q} + ep.ops.InitHandler(ep, &stackHandler{}, getSendBufferLimits, getReceiveBufferLimits) ep.ops.SetSendBufferSize(defaultBufferSize, false /* notify */) ep.ops.SetReceiveBufferSize(defaultBufferSize, false /* notify */) - ep.ops.InitHandler(ep, &stackHandler{}, getSendBufferLimits, getReceiveBufferLimits) return ep } @@ -227,3 +227,6 @@ func (e *connectionlessEndpoint) OnSetSendBufferSize(v int64) (newSz int64) { } return v } + +// WakeupWriters implements tcpip.SocketOptionsHandler.WakeupWriters. +func (e *connectionlessEndpoint) WakeupWriters() {} diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 8ccdadae9..e9e482017 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -38,7 +38,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -494,7 +493,7 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b } n, err := src.CopyInTo(t, &w) - if err != syserror.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 { + if err != linuxerr.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 { return int(n), syserr.FromError(err) } @@ -514,13 +513,13 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b n, err = src.CopyInTo(t, &w) total += n - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { break } if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } break } @@ -648,7 +647,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags } var total int64 - if n, err := doRead(); err != syserror.ErrWouldBlock || dontWait { + if n, err := doRead(); err != linuxerr.ErrWouldBlock || dontWait { var from linux.SockAddr var fromLen uint32 if r.From != nil && len([]byte(r.From.Addr)) != 0 { @@ -683,7 +682,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags defer s.EventUnregister(&e) for { - if n, err := doRead(); err != syserror.ErrWouldBlock { + if n, err := doRead(); err != linuxerr.ErrWouldBlock { var from linux.SockAddr var fromLen uint32 if r.From != nil { diff --git a/pkg/sentry/syscalls/BUILD b/pkg/sentry/syscalls/BUILD index f2c55588f..7a7c80ac6 100644 --- a/pkg/sentry/syscalls/BUILD +++ b/pkg/sentry/syscalls/BUILD @@ -16,7 +16,6 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/kernel/epoll", "//pkg/sentry/kernel/time", - "//pkg/syserror", "//pkg/waiter", ], ) diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index b5a371d9a..394396cde 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -104,7 +104,6 @@ go_library( "//pkg/sentry/vfs", "//pkg/sync", "//pkg/syserr", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go index 76389fbe3..f4d549a3f 100644 --- a/pkg/sentry/syscalls/linux/error.go +++ b/pkg/sentry/syscalls/linux/error.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) var ( @@ -90,9 +89,9 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, errOrig, intr er } // Translate error, if possible, to consolidate errors from other packages - // into a smaller set of errors from syserror package. + // into a smaller set of errors from linuxerr package. translatedErr := errOrig - if errno, ok := syserror.TranslateError(errOrig); ok { + if errno, ok := linuxerr.TranslateError(errOrig); ok { translatedErr = errno } switch { @@ -167,10 +166,7 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, errOrig, intr er // files. Since we have a partial read/write, we consume // ErrWouldBlock, returning the partial result. return true, nil - } - - switch errOrig.(type) { - case syserror.SyscallRestartErrno: + case linuxerr.IsRestartError(translatedErr): // Identical to the EINTR case. return true, nil } diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index a416dfb7d..2046a48b9 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -23,7 +23,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/syscalls" - "gvisor.dev/gvisor/pkg/syserror" ) const ( @@ -175,8 +174,8 @@ var AMD64 = &kernel.SyscallTable{ 119: syscalls.Supported("setresgid", Setresgid), 120: syscalls.Supported("getresgid", Getresgid), 121: syscalls.Supported("getpgid", Getpgid), - 122: syscalls.ErrorWithEvent("setfsuid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) - 123: syscalls.ErrorWithEvent("setfsgid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) + 122: syscalls.ErrorWithEvent("setfsuid", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) + 123: syscalls.ErrorWithEvent("setfsgid", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) 124: syscalls.Supported("getsid", Getsid), 125: syscalls.Supported("capget", Capget), 126: syscalls.Supported("capset", Capset), @@ -187,12 +186,12 @@ var AMD64 = &kernel.SyscallTable{ 131: syscalls.Supported("sigaltstack", Sigaltstack), 132: syscalls.Supported("utime", Utime), 133: syscalls.PartiallySupported("mknod", Mknod, "Device creation is not generally supported. Only regular file and FIFO creation are supported.", nil), - 134: syscalls.Error("uselib", syserror.ENOSYS, "Obsolete", nil), + 134: syscalls.Error("uselib", linuxerr.ENOSYS, "Obsolete", nil), 135: syscalls.ErrorWithEvent("personality", linuxerr.EINVAL, "Unable to change personality.", nil), - 136: syscalls.ErrorWithEvent("ustat", syserror.ENOSYS, "Needs filesystem support.", nil), + 136: syscalls.ErrorWithEvent("ustat", linuxerr.ENOSYS, "Needs filesystem support.", nil), 137: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil), 138: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil), - 139: syscalls.ErrorWithEvent("sysfs", syserror.ENOSYS, "", []string{"gvisor.dev/issue/165"}), + 139: syscalls.ErrorWithEvent("sysfs", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/165"}), 140: syscalls.PartiallySupported("getpriority", Getpriority, "Stub implementation.", nil), 141: syscalls.PartiallySupported("setpriority", Setpriority, "Stub implementation.", nil), 142: syscalls.CapError("sched_setparam", linux.CAP_SYS_NICE, "", nil), @@ -230,15 +229,15 @@ var AMD64 = &kernel.SyscallTable{ 174: syscalls.CapError("create_module", linux.CAP_SYS_MODULE, "", nil), 175: syscalls.CapError("init_module", linux.CAP_SYS_MODULE, "", nil), 176: syscalls.CapError("delete_module", linux.CAP_SYS_MODULE, "", nil), - 177: syscalls.Error("get_kernel_syms", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil), - 178: syscalls.Error("query_module", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil), + 177: syscalls.Error("get_kernel_syms", linuxerr.ENOSYS, "Not supported in Linux > 2.6.", nil), + 178: syscalls.Error("query_module", linuxerr.ENOSYS, "Not supported in Linux > 2.6.", nil), 179: syscalls.CapError("quotactl", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_admin for most operations - 180: syscalls.Error("nfsservctl", syserror.ENOSYS, "Removed after Linux 3.1.", nil), - 181: syscalls.Error("getpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil), - 182: syscalls.Error("putpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil), - 183: syscalls.Error("afs_syscall", syserror.ENOSYS, "Not implemented in Linux.", nil), - 184: syscalls.Error("tuxcall", syserror.ENOSYS, "Not implemented in Linux.", nil), - 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil), + 180: syscalls.Error("nfsservctl", linuxerr.ENOSYS, "Removed after Linux 3.1.", nil), + 181: syscalls.Error("getpmsg", linuxerr.ENOSYS, "Not implemented in Linux.", nil), + 182: syscalls.Error("putpmsg", linuxerr.ENOSYS, "Not implemented in Linux.", nil), + 183: syscalls.Error("afs_syscall", linuxerr.ENOSYS, "Not implemented in Linux.", nil), + 184: syscalls.Error("tuxcall", linuxerr.ENOSYS, "Not implemented in Linux.", nil), + 185: syscalls.Error("security", linuxerr.ENOSYS, "Not implemented in Linux.", nil), 186: syscalls.Supported("gettid", Gettid), 187: syscalls.Supported("readahead", Readahead), 188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), @@ -258,18 +257,18 @@ var AMD64 = &kernel.SyscallTable{ 202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil), 203: syscalls.PartiallySupported("sched_setaffinity", SchedSetaffinity, "Stub implementation.", nil), 204: syscalls.PartiallySupported("sched_getaffinity", SchedGetaffinity, "Stub implementation.", nil), - 205: syscalls.Error("set_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil), + 205: syscalls.Error("set_thread_area", linuxerr.ENOSYS, "Expected to return ENOSYS on 64-bit", nil), 206: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 207: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 208: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 209: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 210: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 211: syscalls.Error("get_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil), + 211: syscalls.Error("get_thread_area", linuxerr.ENOSYS, "Expected to return ENOSYS on 64-bit", nil), 212: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil), 213: syscalls.Supported("epoll_create", EpollCreate), - 214: syscalls.ErrorWithEvent("epoll_ctl_old", syserror.ENOSYS, "Deprecated.", nil), - 215: syscalls.ErrorWithEvent("epoll_wait_old", syserror.ENOSYS, "Deprecated.", nil), - 216: syscalls.ErrorWithEvent("remap_file_pages", syserror.ENOSYS, "Deprecated since Linux 3.16.", nil), + 214: syscalls.ErrorWithEvent("epoll_ctl_old", linuxerr.ENOSYS, "Deprecated.", nil), + 215: syscalls.ErrorWithEvent("epoll_wait_old", linuxerr.ENOSYS, "Deprecated.", nil), + 216: syscalls.ErrorWithEvent("remap_file_pages", linuxerr.ENOSYS, "Deprecated since Linux 3.16.", nil), 217: syscalls.Supported("getdents64", Getdents64), 218: syscalls.Supported("set_tid_address", SetTidAddress), 219: syscalls.Supported("restart_syscall", RestartSyscall), @@ -289,16 +288,16 @@ var AMD64 = &kernel.SyscallTable{ 233: syscalls.Supported("epoll_ctl", EpollCtl), 234: syscalls.Supported("tgkill", Tgkill), 235: syscalls.Supported("utimes", Utimes), - 236: syscalls.Error("vserver", syserror.ENOSYS, "Not implemented by Linux", nil), + 236: syscalls.Error("vserver", linuxerr.ENOSYS, "Not implemented by Linux", nil), 237: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}), 238: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil), 239: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil), - 240: syscalls.ErrorWithEvent("mq_open", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 241: syscalls.ErrorWithEvent("mq_unlink", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 242: syscalls.ErrorWithEvent("mq_timedsend", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 243: syscalls.ErrorWithEvent("mq_timedreceive", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 244: syscalls.ErrorWithEvent("mq_notify", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 245: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 240: syscalls.ErrorWithEvent("mq_open", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 241: syscalls.ErrorWithEvent("mq_unlink", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 242: syscalls.ErrorWithEvent("mq_timedsend", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 243: syscalls.ErrorWithEvent("mq_timedreceive", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 244: syscalls.ErrorWithEvent("mq_notify", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 245: syscalls.ErrorWithEvent("mq_getsetattr", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) 246: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil), 247: syscalls.Supported("waitid", Waitid), 248: syscalls.Error("add_key", linuxerr.EACCES, "Not available to user.", nil), @@ -331,7 +330,7 @@ var AMD64 = &kernel.SyscallTable{ 275: syscalls.Supported("splice", Splice), 276: syscalls.Supported("tee", Tee), 277: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil), - 278: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) + 278: syscalls.ErrorWithEvent("vmsplice", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) 279: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly) 280: syscalls.Supported("utimensat", Utimensat), 281: syscalls.Supported("epoll_pwait", EpollPwait), @@ -353,8 +352,8 @@ var AMD64 = &kernel.SyscallTable{ 297: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo), 298: syscalls.ErrorWithEvent("perf_event_open", linuxerr.ENODEV, "No support for perf counters", nil), 299: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil), - 300: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), - 301: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), + 300: syscalls.ErrorWithEvent("fanotify_init", linuxerr.ENOSYS, "Needs CONFIG_FANOTIFY", nil), + 301: syscalls.ErrorWithEvent("fanotify_mark", linuxerr.ENOSYS, "Needs CONFIG_FANOTIFY", nil), 302: syscalls.Supported("prlimit64", Prlimit64), 303: syscalls.Error("name_to_handle_at", linuxerr.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), 304: syscalls.Error("open_by_handle_at", linuxerr.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), @@ -363,48 +362,48 @@ var AMD64 = &kernel.SyscallTable{ 307: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil), 308: syscalls.ErrorWithEvent("setns", linuxerr.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995) 309: syscalls.Supported("getcpu", Getcpu), - 310: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), - 311: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), + 310: syscalls.ErrorWithEvent("process_vm_readv", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/158"}), + 311: syscalls.ErrorWithEvent("process_vm_writev", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/158"}), 312: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil), 313: syscalls.CapError("finit_module", linux.CAP_SYS_MODULE, "", nil), - 314: syscalls.ErrorWithEvent("sched_setattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) - 315: syscalls.ErrorWithEvent("sched_getattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) - 316: syscalls.ErrorWithEvent("renameat2", syserror.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772) + 314: syscalls.ErrorWithEvent("sched_setattr", linuxerr.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) + 315: syscalls.ErrorWithEvent("sched_getattr", linuxerr.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) + 316: syscalls.ErrorWithEvent("renameat2", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772) 317: syscalls.Supported("seccomp", Seccomp), 318: syscalls.Supported("getrandom", GetRandom), 319: syscalls.Supported("memfd_create", MemfdCreate), 320: syscalls.CapError("kexec_file_load", linux.CAP_SYS_BOOT, "", nil), 321: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil), 322: syscalls.Supported("execveat", Execveat), - 323: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345) + 323: syscalls.ErrorWithEvent("userfaultfd", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345) 324: syscalls.PartiallySupported("membarrier", Membarrier, "Not supported on all platforms.", nil), 325: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil), // Syscalls implemented after 325 are "backports" from versions // of Linux after 4.4. - 326: syscalls.ErrorWithEvent("copy_file_range", syserror.ENOSYS, "", nil), + 326: syscalls.ErrorWithEvent("copy_file_range", linuxerr.ENOSYS, "", nil), 327: syscalls.Supported("preadv2", Preadv2), 328: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil), - 329: syscalls.ErrorWithEvent("pkey_mprotect", syserror.ENOSYS, "", nil), - 330: syscalls.ErrorWithEvent("pkey_alloc", syserror.ENOSYS, "", nil), - 331: syscalls.ErrorWithEvent("pkey_free", syserror.ENOSYS, "", nil), + 329: syscalls.ErrorWithEvent("pkey_mprotect", linuxerr.ENOSYS, "", nil), + 330: syscalls.ErrorWithEvent("pkey_alloc", linuxerr.ENOSYS, "", nil), + 331: syscalls.ErrorWithEvent("pkey_free", linuxerr.ENOSYS, "", nil), 332: syscalls.Supported("statx", Statx), - 333: syscalls.ErrorWithEvent("io_pgetevents", syserror.ENOSYS, "", nil), + 333: syscalls.ErrorWithEvent("io_pgetevents", linuxerr.ENOSYS, "", nil), 334: syscalls.PartiallySupported("rseq", RSeq, "Not supported on all platforms.", nil), // Linux skips ahead to syscall 424 to sync numbers between arches. - 424: syscalls.ErrorWithEvent("pidfd_send_signal", syserror.ENOSYS, "", nil), - 425: syscalls.ErrorWithEvent("io_uring_setup", syserror.ENOSYS, "", nil), - 426: syscalls.ErrorWithEvent("io_uring_enter", syserror.ENOSYS, "", nil), - 427: syscalls.ErrorWithEvent("io_uring_register", syserror.ENOSYS, "", nil), - 428: syscalls.ErrorWithEvent("open_tree", syserror.ENOSYS, "", nil), - 429: syscalls.ErrorWithEvent("move_mount", syserror.ENOSYS, "", nil), - 430: syscalls.ErrorWithEvent("fsopen", syserror.ENOSYS, "", nil), - 431: syscalls.ErrorWithEvent("fsconfig", syserror.ENOSYS, "", nil), - 432: syscalls.ErrorWithEvent("fsmount", syserror.ENOSYS, "", nil), - 433: syscalls.ErrorWithEvent("fspick", syserror.ENOSYS, "", nil), - 434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil), - 435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil), + 424: syscalls.ErrorWithEvent("pidfd_send_signal", linuxerr.ENOSYS, "", nil), + 425: syscalls.ErrorWithEvent("io_uring_setup", linuxerr.ENOSYS, "", nil), + 426: syscalls.ErrorWithEvent("io_uring_enter", linuxerr.ENOSYS, "", nil), + 427: syscalls.ErrorWithEvent("io_uring_register", linuxerr.ENOSYS, "", nil), + 428: syscalls.ErrorWithEvent("open_tree", linuxerr.ENOSYS, "", nil), + 429: syscalls.ErrorWithEvent("move_mount", linuxerr.ENOSYS, "", nil), + 430: syscalls.ErrorWithEvent("fsopen", linuxerr.ENOSYS, "", nil), + 431: syscalls.ErrorWithEvent("fsconfig", linuxerr.ENOSYS, "", nil), + 432: syscalls.ErrorWithEvent("fsmount", linuxerr.ENOSYS, "", nil), + 433: syscalls.ErrorWithEvent("fspick", linuxerr.ENOSYS, "", nil), + 434: syscalls.ErrorWithEvent("pidfd_open", linuxerr.ENOSYS, "", nil), + 435: syscalls.ErrorWithEvent("clone3", linuxerr.ENOSYS, "", nil), 441: syscalls.Supported("epoll_pwait2", EpollPwait2), }, Emulate: map[hostarch.Addr]uintptr{ @@ -414,7 +413,7 @@ var AMD64 = &kernel.SyscallTable{ }, Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) { t.Kernel().EmitUnimplementedEvent(t) - return 0, syserror.ENOSYS + return 0, linuxerr.ENOSYS }, } @@ -472,7 +471,7 @@ var ARM64 = &kernel.SyscallTable{ 39: syscalls.PartiallySupported("umount2", Umount2, "Not all options or file systems are supported.", nil), 40: syscalls.PartiallySupported("mount", Mount, "Not all options or file systems are supported.", nil), 41: syscalls.Error("pivot_root", linuxerr.EPERM, "", nil), - 42: syscalls.Error("nfsservctl", syserror.ENOSYS, "Removed after Linux 3.1.", nil), + 42: syscalls.Error("nfsservctl", linuxerr.ENOSYS, "Removed after Linux 3.1.", nil), 43: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil), 44: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil), 45: syscalls.Supported("truncate", Truncate), @@ -505,7 +504,7 @@ var ARM64 = &kernel.SyscallTable{ 72: syscalls.Supported("pselect", Pselect), 73: syscalls.Supported("ppoll", Ppoll), 74: syscalls.PartiallySupported("signalfd4", Signalfd4, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}), - 75: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) + 75: syscalls.ErrorWithEvent("vmsplice", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) 76: syscalls.Supported("splice", Splice), 77: syscalls.Supported("tee", Tee), 78: syscalls.Supported("readlinkat", Readlinkat), @@ -581,8 +580,8 @@ var ARM64 = &kernel.SyscallTable{ 148: syscalls.Supported("getresuid", Getresuid), 149: syscalls.Supported("setresgid", Setresgid), 150: syscalls.Supported("getresgid", Getresgid), - 151: syscalls.ErrorWithEvent("setfsuid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) - 152: syscalls.ErrorWithEvent("setfsgid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) + 151: syscalls.ErrorWithEvent("setfsuid", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) + 152: syscalls.ErrorWithEvent("setfsgid", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) 153: syscalls.Supported("times", Times), 154: syscalls.Supported("setpgid", Setpgid), 155: syscalls.Supported("getpgid", Getpgid), @@ -610,12 +609,12 @@ var ARM64 = &kernel.SyscallTable{ 177: syscalls.Supported("getegid", Getegid), 178: syscalls.Supported("gettid", Gettid), 179: syscalls.PartiallySupported("sysinfo", Sysinfo, "Fields loads, sharedram, bufferram, totalswap, freeswap, totalhigh, freehigh not supported.", nil), - 180: syscalls.ErrorWithEvent("mq_open", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 181: syscalls.ErrorWithEvent("mq_unlink", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 182: syscalls.ErrorWithEvent("mq_timedsend", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 183: syscalls.ErrorWithEvent("mq_timedreceive", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 184: syscalls.ErrorWithEvent("mq_notify", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 185: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 180: syscalls.ErrorWithEvent("mq_open", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 181: syscalls.ErrorWithEvent("mq_unlink", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 182: syscalls.ErrorWithEvent("mq_timedsend", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 183: syscalls.ErrorWithEvent("mq_timedreceive", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 184: syscalls.ErrorWithEvent("mq_notify", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 185: syscalls.ErrorWithEvent("mq_getsetattr", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) 186: syscalls.Supported("msgget", Msgget), 187: syscalls.Supported("msgctl", Msgctl), 188: syscalls.Supported("msgrcv", Msgrcv), @@ -664,7 +663,7 @@ var ARM64 = &kernel.SyscallTable{ 231: syscalls.PartiallySupported("munlockall", Munlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), 232: syscalls.PartiallySupported("mincore", Mincore, "Stub implementation. The sandbox does not have access to this information. Reports all mapped pages are resident.", nil), 233: syscalls.PartiallySupported("madvise", Madvise, "Options MADV_DONTNEED, MADV_DONTFORK are supported. Other advice is ignored.", nil), - 234: syscalls.ErrorWithEvent("remap_file_pages", syserror.ENOSYS, "Deprecated since Linux 3.16.", nil), + 234: syscalls.ErrorWithEvent("remap_file_pages", linuxerr.ENOSYS, "Deprecated since Linux 3.16.", nil), 235: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}), 236: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil), 237: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil), @@ -676,60 +675,60 @@ var ARM64 = &kernel.SyscallTable{ 243: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil), 260: syscalls.Supported("wait4", Wait4), 261: syscalls.Supported("prlimit64", Prlimit64), - 262: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), - 263: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), + 262: syscalls.ErrorWithEvent("fanotify_init", linuxerr.ENOSYS, "Needs CONFIG_FANOTIFY", nil), + 263: syscalls.ErrorWithEvent("fanotify_mark", linuxerr.ENOSYS, "Needs CONFIG_FANOTIFY", nil), 264: syscalls.Error("name_to_handle_at", linuxerr.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), 265: syscalls.Error("open_by_handle_at", linuxerr.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), 266: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil), 267: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil), 268: syscalls.ErrorWithEvent("setns", linuxerr.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995) 269: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil), - 270: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), - 271: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), + 270: syscalls.ErrorWithEvent("process_vm_readv", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/158"}), + 271: syscalls.ErrorWithEvent("process_vm_writev", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/158"}), 272: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil), 273: syscalls.CapError("finit_module", linux.CAP_SYS_MODULE, "", nil), - 274: syscalls.ErrorWithEvent("sched_setattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) - 275: syscalls.ErrorWithEvent("sched_getattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) - 276: syscalls.ErrorWithEvent("renameat2", syserror.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772) + 274: syscalls.ErrorWithEvent("sched_setattr", linuxerr.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) + 275: syscalls.ErrorWithEvent("sched_getattr", linuxerr.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) + 276: syscalls.ErrorWithEvent("renameat2", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772) 277: syscalls.Supported("seccomp", Seccomp), 278: syscalls.Supported("getrandom", GetRandom), 279: syscalls.Supported("memfd_create", MemfdCreate), 280: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil), 281: syscalls.Supported("execveat", Execveat), - 282: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345) + 282: syscalls.ErrorWithEvent("userfaultfd", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345) 283: syscalls.PartiallySupported("membarrier", Membarrier, "Not supported on all platforms.", nil), 284: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil), // Syscalls after 284 are "backports" from versions of Linux after 4.4. - 285: syscalls.ErrorWithEvent("copy_file_range", syserror.ENOSYS, "", nil), + 285: syscalls.ErrorWithEvent("copy_file_range", linuxerr.ENOSYS, "", nil), 286: syscalls.Supported("preadv2", Preadv2), 287: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil), - 288: syscalls.ErrorWithEvent("pkey_mprotect", syserror.ENOSYS, "", nil), - 289: syscalls.ErrorWithEvent("pkey_alloc", syserror.ENOSYS, "", nil), - 290: syscalls.ErrorWithEvent("pkey_free", syserror.ENOSYS, "", nil), + 288: syscalls.ErrorWithEvent("pkey_mprotect", linuxerr.ENOSYS, "", nil), + 289: syscalls.ErrorWithEvent("pkey_alloc", linuxerr.ENOSYS, "", nil), + 290: syscalls.ErrorWithEvent("pkey_free", linuxerr.ENOSYS, "", nil), 291: syscalls.Supported("statx", Statx), - 292: syscalls.ErrorWithEvent("io_pgetevents", syserror.ENOSYS, "", nil), + 292: syscalls.ErrorWithEvent("io_pgetevents", linuxerr.ENOSYS, "", nil), 293: syscalls.PartiallySupported("rseq", RSeq, "Not supported on all platforms.", nil), // Linux skips ahead to syscall 424 to sync numbers between arches. - 424: syscalls.ErrorWithEvent("pidfd_send_signal", syserror.ENOSYS, "", nil), - 425: syscalls.ErrorWithEvent("io_uring_setup", syserror.ENOSYS, "", nil), - 426: syscalls.ErrorWithEvent("io_uring_enter", syserror.ENOSYS, "", nil), - 427: syscalls.ErrorWithEvent("io_uring_register", syserror.ENOSYS, "", nil), - 428: syscalls.ErrorWithEvent("open_tree", syserror.ENOSYS, "", nil), - 429: syscalls.ErrorWithEvent("move_mount", syserror.ENOSYS, "", nil), - 430: syscalls.ErrorWithEvent("fsopen", syserror.ENOSYS, "", nil), - 431: syscalls.ErrorWithEvent("fsconfig", syserror.ENOSYS, "", nil), - 432: syscalls.ErrorWithEvent("fsmount", syserror.ENOSYS, "", nil), - 433: syscalls.ErrorWithEvent("fspick", syserror.ENOSYS, "", nil), - 434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil), - 435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil), + 424: syscalls.ErrorWithEvent("pidfd_send_signal", linuxerr.ENOSYS, "", nil), + 425: syscalls.ErrorWithEvent("io_uring_setup", linuxerr.ENOSYS, "", nil), + 426: syscalls.ErrorWithEvent("io_uring_enter", linuxerr.ENOSYS, "", nil), + 427: syscalls.ErrorWithEvent("io_uring_register", linuxerr.ENOSYS, "", nil), + 428: syscalls.ErrorWithEvent("open_tree", linuxerr.ENOSYS, "", nil), + 429: syscalls.ErrorWithEvent("move_mount", linuxerr.ENOSYS, "", nil), + 430: syscalls.ErrorWithEvent("fsopen", linuxerr.ENOSYS, "", nil), + 431: syscalls.ErrorWithEvent("fsconfig", linuxerr.ENOSYS, "", nil), + 432: syscalls.ErrorWithEvent("fsmount", linuxerr.ENOSYS, "", nil), + 433: syscalls.ErrorWithEvent("fspick", linuxerr.ENOSYS, "", nil), + 434: syscalls.ErrorWithEvent("pidfd_open", linuxerr.ENOSYS, "", nil), + 435: syscalls.ErrorWithEvent("clone3", linuxerr.ENOSYS, "", nil), 441: syscalls.Supported("epoll_pwait2", EpollPwait2), }, Emulate: map[hostarch.Addr]uintptr{}, Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) { t.Kernel().EmitUnimplementedEvent(t) - return 0, syserror.ENOSYS + return 0, linuxerr.ENOSYS }, } diff --git a/pkg/sentry/syscalls/linux/sigset.go b/pkg/sentry/syscalls/linux/sigset.go index 9dea78085..373948991 100644 --- a/pkg/sentry/syscalls/linux/sigset.go +++ b/pkg/sentry/syscalls/linux/sigset.go @@ -19,7 +19,6 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // CopyInSigSet copies in a sigset_t, checks its size, and ensures that KILL and @@ -67,6 +66,6 @@ func copyInSigSetWithSize(t *kernel.Task, addr hostarch.Addr) (hostarch.Addr, ui maskSize := uint(hostarch.ByteOrder.Uint64(in[8:])) return maskAddr, maskSize, nil default: - return 0, 0, syserror.ENOSYS + return 0, 0, linuxerr.ENOSYS } } diff --git a/pkg/sentry/syscalls/linux/sys_aio.go b/pkg/sentry/syscalls/linux/sys_aio.go index 4ce3430e2..2f00c3783 100644 --- a/pkg/sentry/syscalls/linux/sys_aio.go +++ b/pkg/sentry/syscalls/linux/sys_aio.go @@ -26,7 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/usermem" ) @@ -138,7 +138,7 @@ func IoGetevents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S if count > 0 || linuxerr.Equals(linuxerr.ETIMEDOUT, err) { return uintptr(count), nil, nil } - return 0, nil, syserror.ConvertIntr(err, syserror.EINTR) + return 0, nil, syserr.ConvertIntr(err, linuxerr.EINTR) } } @@ -216,7 +216,7 @@ func memoryFor(t *kernel.Task, cb *linux.IOCallback) (usermem.IOSequence, error) // It is not presently supported (ENOSYS indicates no support on this // architecture). func IoCancel(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS } // LINT.IfChange @@ -355,7 +355,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } cbAddr = hostarch.Addr(cbAddrP) default: - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS } // Copy in this callback. diff --git a/pkg/sentry/syscalls/linux/sys_epoll.go b/pkg/sentry/syscalls/linux/sys_epoll.go index daa151bb4..6c807124c 100644 --- a/pkg/sentry/syscalls/linux/sys_epoll.go +++ b/pkg/sentry/syscalls/linux/sys_epoll.go @@ -22,7 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/epoll" "gvisor.dev/gvisor/pkg/sentry/syscalls" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/waiter" ) @@ -109,7 +109,7 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc func waitEpoll(t *kernel.Task, fd int32, eventsAddr hostarch.Addr, max int, timeoutInNanos int64) (uintptr, *kernel.SyscallControl, error) { r, err := syscalls.WaitEpoll(t, fd, max, timeoutInNanos) if err != nil { - return 0, nil, syserror.ConvertIntr(err, syserror.EINTR) + return 0, nil, syserr.ConvertIntr(err, linuxerr.EINTR) } if len(r) != 0 { diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 3528d325f..e79b92fb6 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -30,7 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/fasync" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserr" ) // fileOpAt performs an operation on the second last component in the path. @@ -122,7 +122,7 @@ func copyInPath(t *kernel.Task, addr hostarch.Addr, allowEmpty bool) (path strin return "", false, err } if path == "" && !allowEmpty { - return "", false, syserror.ENOENT + return "", false, linuxerr.ENOENT } // If the path ends with a /, then checks must be enforced in various @@ -162,7 +162,7 @@ func openAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint) (fd uin if fs.IsDir(d.Inode.StableAttr) { // Don't allow directories to be opened writable. if fileFlags.Write { - return syserror.EISDIR + return linuxerr.EISDIR } } else { // If O_DIRECTORY is set, but the file is not a directory, then fail. @@ -177,7 +177,7 @@ func openAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint) (fd uin file, err := d.Inode.GetFile(t, d, fileFlags) if err != nil { - return syserror.ConvertIntr(err, syserror.ERESTARTSYS) + return syserr.ConvertIntr(err, linuxerr.ERESTARTSYS) } defer file.DecRef(t) @@ -215,7 +215,7 @@ func mknodAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode linux.FileMod return err } if dirPath { - return syserror.ENOENT + return linuxerr.ENOENT } return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { @@ -308,7 +308,7 @@ func createAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint, mode return 0, err } if dirPath { - return 0, syserror.ENOENT + return 0, linuxerr.ENOENT } fileFlags := linuxToFlags(flags) @@ -416,7 +416,7 @@ func createAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint, mode // Create a new fs.File. newFile, err = found.Inode.GetFile(t, found, fileFlags) if err != nil { - return syserror.ConvertIntr(err, syserror.ERESTARTSYS) + return syserr.ConvertIntr(err, linuxerr.ERESTARTSYS) } defer newFile.DecRef(t) case linuxerr.Equals(linuxerr.ENOENT, err): @@ -795,7 +795,7 @@ func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall defer file.DecRef(t) err := file.Flush(t) - return 0, nil, handleIOError(t, false /* partial */, err, syserror.EINTR, "close", file) + return 0, nil, handleIOError(t, false /* partial */, err, linuxerr.EINTR, "close", file) } // Dup implements linux syscall dup(2). @@ -1020,7 +1020,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } else { // Blocking lock, pass in the task to satisfy the lock.Blocker interface. if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.ReadLock, rng, t) { - return 0, nil, syserror.EINTR + return 0, nil, linuxerr.EINTR } } return 0, nil, nil @@ -1036,7 +1036,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } else { // Blocking lock, pass in the task to satisfy the lock.Blocker interface. if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.WriteLock, rng, t) { - return 0, nil, syserror.EINTR + return 0, nil, linuxerr.EINTR } } return 0, nil, nil @@ -1263,7 +1263,7 @@ func symlinkAt(t *kernel.Task, dirFD int32, newAddr hostarch.Addr, oldAddr hosta return err } if dirPath { - return syserror.ENOENT + return linuxerr.ENOENT } // The oldPath is copied in verbatim. This is because the symlink @@ -1273,7 +1273,7 @@ func symlinkAt(t *kernel.Task, dirFD int32, newAddr hostarch.Addr, oldAddr hosta return err } if oldPath == "" { - return syserror.ENOENT + return linuxerr.ENOENT } return fileOpAt(t, dirFD, newPath, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { @@ -1352,7 +1352,7 @@ func linkAt(t *kernel.Task, oldDirFD int32, oldAddr hostarch.Addr, newDirFD int3 return err } if dirPath { - return syserror.ENOENT + return linuxerr.ENOENT } if allowEmpty && oldPath == "" { @@ -1439,7 +1439,7 @@ func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal allowEmpty := flags&linux.AT_EMPTY_PATH == linux.AT_EMPTY_PATH if allowEmpty && !t.HasCapabilityIn(linux.CAP_DAC_READ_SEARCH, t.UserNamespace().Root()) { - return 0, nil, syserror.ENOENT + return 0, nil, linuxerr.ENOENT } return 0, nil, linkAt(t, oldDirFD, oldAddr, newDirFD, newAddr, resolve, allowEmpty) @@ -1455,7 +1455,7 @@ func readlinkAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, bufAddr hostarc return 0, err } if dirPath { - return 0, syserror.ENOENT + return 0, linuxerr.ENOENT } err = fileOpOn(t, dirFD, path, false /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { @@ -1579,7 +1579,7 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { if fs.IsDir(d.Inode.StableAttr) { - return syserror.EISDIR + return linuxerr.EISDIR } // In contrast to open(O_TRUNC), truncate(2) is only valid for file // types. @@ -2131,7 +2131,7 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, linuxerr.ESPIPE } if fs.IsDir(file.Dirent.Inode.StableAttr) { - return 0, nil, syserror.EISDIR + return 0, nil, linuxerr.EISDIR } if !fs.IsRegular(file.Dirent.Inode.StableAttr) { return 0, nil, linuxerr.ENODEV @@ -2189,7 +2189,7 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } else { // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.WriteLock, rng, t) { - return 0, nil, syserror.EINTR + return 0, nil, linuxerr.EINTR } } case linux.LOCK_SH: @@ -2201,7 +2201,7 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } else { // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.ReadLock, rng, t) { - return 0, nil, syserror.EINTR + return 0, nil, linuxerr.EINTR } } case linux.LOCK_UN: diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go index 717cec04d..bcdd7b633 100644 --- a/pkg/sentry/syscalls/linux/sys_futex.go +++ b/pkg/sentry/syscalls/linux/sys_futex.go @@ -23,7 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserr" ) // futexWaitRestartBlock encapsulates the state required to restart futex(2) @@ -75,7 +75,7 @@ func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, fo } t.Futex().WaitComplete(w, t) - return 0, syserror.ConvertIntr(err, syserror.ERESTARTSYS) + return 0, syserr.ConvertIntr(err, linuxerr.ERESTARTSYS) } // futexWaitDuration performs a FUTEX_WAIT, blocking until the wait is @@ -103,7 +103,7 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add // The wait was unsuccessful for some reason other than interruption. Simply // forward the error. - if err != syserror.ErrInterrupted { + if err != linuxerr.ErrInterrupted { return 0, err } @@ -111,7 +111,7 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add // The wait duration was absolute, restart with the original arguments. if forever { - return 0, syserror.ERESTARTSYS + return 0, linuxerr.ERESTARTSYS } // The wait duration was relative, restart with the remaining duration. @@ -122,7 +122,7 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add val: val, mask: mask, }) - return 0, syserror.ERESTART_RESTARTBLOCK + return 0, linuxerr.ERESTART_RESTARTBLOCK } func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr hostarch.Addr, private bool) error { @@ -150,7 +150,7 @@ func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr hostarch. } t.Futex().WaitComplete(w, t) - return syserror.ConvertIntr(err, syserror.ERESTARTSYS) + return syserr.ConvertIntr(err, linuxerr.ERESTARTSYS) } func tryLockPI(t *kernel.Task, addr hostarch.Addr, private bool) error { @@ -280,11 +280,11 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.FUTEX_WAIT_REQUEUE_PI, linux.FUTEX_CMP_REQUEUE_PI: t.Kernel().EmitUnimplementedEvent(t) - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS default: // We don't even know about this command. - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS } } diff --git a/pkg/sentry/syscalls/linux/sys_getdents.go b/pkg/sentry/syscalls/linux/sys_getdents.go index 917717e31..9f7a5ae8a 100644 --- a/pkg/sentry/syscalls/linux/sys_getdents.go +++ b/pkg/sentry/syscalls/linux/sys_getdents.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -83,7 +82,7 @@ func getdents(t *kernel.Task, fd int32, addr hostarch.Addr, size int, f func(*di ds := newDirentSerializer(f, w, t.Arch(), size) rerr := dir.Readdir(t, ds) - switch err := handleIOError(t, ds.Written() > 0, rerr, syserror.ERESTARTSYS, "getdents", dir); err { + switch err := handleIOError(t, ds.Written() > 0, rerr, linuxerr.ERESTARTSYS, "getdents", dir); err { case nil: dir.Dirent.InotifyEvent(linux.IN_ACCESS, 0) return uintptr(ds.Written()), nil diff --git a/pkg/sentry/syscalls/linux/sys_lseek.go b/pkg/sentry/syscalls/linux/sys_lseek.go index bf71a9af3..4a5712a29 100644 --- a/pkg/sentry/syscalls/linux/sys_lseek.go +++ b/pkg/sentry/syscalls/linux/sys_lseek.go @@ -19,7 +19,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // LINT.IfChange @@ -49,7 +48,7 @@ func Lseek(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } offset, serr := file.Seek(t, sw, offset) - err := handleIOError(t, false /* partialResult */, serr, syserror.ERESTARTSYS, "lseek", file) + err := handleIOError(t, false /* partialResult */, serr, linuxerr.ERESTARTSYS, "lseek", file) if err != nil { return 0, nil, err } diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go index cee621791..7efd17d40 100644 --- a/pkg/sentry/syscalls/linux/sys_mmap.go +++ b/pkg/sentry/syscalls/linux/sys_mmap.go @@ -24,7 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserr" ) // Brk implements linux syscall brk(2). @@ -211,7 +211,7 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca case linux.MADV_REMOVE: // These "suggestions" have application-visible side effects, so we // have to indicate that we don't support them. - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS case linux.MADV_HWPOISON: // Only privileged processes are allowed to poison pages. return 0, nil, linuxerr.EPERM @@ -235,18 +235,18 @@ func Mincore(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // rounded up to the next multiple of the page size." - mincore(2) la, ok := hostarch.Addr(length).RoundUp() if !ok { - return 0, nil, syserror.ENOMEM + return 0, nil, linuxerr.ENOMEM } ar, ok := addr.ToRange(uint64(la)) if !ok { - return 0, nil, syserror.ENOMEM + return 0, nil, linuxerr.ENOMEM } // Pretend that all mapped pages are "resident in core". mapped := t.MemoryManager().VirtualMemorySizeRange(ar) // "ENOMEM: addr to addr + length contained unmapped memory." if mapped != uint64(la) { - return 0, nil, syserror.ENOMEM + return 0, nil, linuxerr.ENOMEM } resident := bytes.Repeat([]byte{1}, int(mapped/hostarch.PageSize)) _, err := t.CopyOutBytes(vec, resident) @@ -277,7 +277,7 @@ func Msync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall }) // MSync calls fsync, the same interrupt conversion rules apply, see // mm/msync.c, fsync POSIX.1-2008. - return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS) + return 0, nil, syserr.ConvertIntr(err, linuxerr.ERESTARTSYS) } // Mlock implements linux syscall mlock(2). diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go index a80c84fcd..ee4dbbc64 100644 --- a/pkg/sentry/syscalls/linux/sys_poll.go +++ b/pkg/sentry/syscalls/linux/sys_poll.go @@ -25,7 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/waiter" ) @@ -185,7 +185,7 @@ func doPoll(t *kernel.Task, addr hostarch.Addr, nfds uint, timeout time.Duration pfd[i].Events |= linux.POLLHUP | linux.POLLERR } remainingTimeout, n, err := pollBlock(t, pfd, timeout) - err = syserror.ConvertIntr(err, syserror.EINTR) + err = syserr.ConvertIntr(err, linuxerr.EINTR) // The poll entries are copied out regardless of whether // any are set or not. This aligns with the Linux behavior. @@ -295,7 +295,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs hostarch.Ad // Do the syscall, then count the number of bits set. if _, _, err = pollBlock(t, pfd, timeout); err != nil { - return 0, syserror.ConvertIntr(err, syserror.EINTR) + return 0, syserr.ConvertIntr(err, linuxerr.EINTR) } // r, w, and e are currently event mask bitsets; unset bits corresponding @@ -411,7 +411,7 @@ func poll(t *kernel.Task, pfdAddr hostarch.Addr, nfds uint, timeout time.Duratio nfds: nfds, timeout: remainingTimeout, }) - return 0, syserror.ERESTART_RESTARTBLOCK + return 0, linuxerr.ERESTART_RESTARTBLOCK } return n, err } @@ -465,7 +465,7 @@ func Ppoll(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Note that this means that if err is nil but copyErr is not, copyErr is // ignored. This is consistent with Linux. if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil { - err = syserror.ERESTARTNOHAND + err = linuxerr.ERESTARTNOHAND } return n, nil, err } @@ -495,7 +495,7 @@ func Select(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal copyErr := copyOutTimevalRemaining(t, startNs, timeout, timevalAddr) // See comment in Ppoll. if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil { - err = syserror.ERESTARTNOHAND + err = linuxerr.ERESTARTNOHAND } return n, nil, err } @@ -540,7 +540,7 @@ func Pselect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca copyErr := copyOutTimespecRemaining(t, startNs, timeout, timespecAddr) // See comment in Ppoll. if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil { - err = syserror.ERESTARTNOHAND + err = linuxerr.ERESTARTNOHAND } return n, nil, err } diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go index a16b6b4d6..2ef1e6404 100644 --- a/pkg/sentry/syscalls/linux/sys_prctl.go +++ b/pkg/sentry/syscalls/linux/sys_prctl.go @@ -219,6 +219,21 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } return 0, nil, t.DropBoundingCapability(cp) + case linux.PR_SET_CHILD_SUBREAPER: + // "If arg2 is nonzero, set the "child subreaper" attribute of + // the calling process; if arg2 is zero, unset the attribute." + // + // TODO(gvisor.dev/issues/2323): We only support setting, and + // only if the task is already TID 1 in the PID namespace, + // because it already acts as a subreaper in that case. + isPid1 := t.PIDNamespace().IDOfTask(t) == kernel.InitTID + if args[1].Int() != 0 && isPid1 { + return 0, nil, nil + } + + t.Kernel().EmitUnimplementedEvent(t) + return 0, nil, linuxerr.EINVAL + case linux.PR_GET_TIMING, linux.PR_SET_TIMING, linux.PR_GET_TSC, @@ -230,7 +245,6 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall linux.PR_MCE_KILL, linux.PR_MCE_KILL_GET, linux.PR_GET_TID_ADDRESS, - linux.PR_SET_CHILD_SUBREAPER, linux.PR_GET_CHILD_SUBREAPER, linux.PR_GET_THP_DISABLE, linux.PR_SET_THP_DISABLE, diff --git a/pkg/sentry/syscalls/linux/sys_read.go b/pkg/sentry/syscalls/linux/sys_read.go index b54a3a11f..18ea23913 100644 --- a/pkg/sentry/syscalls/linux/sys_read.go +++ b/pkg/sentry/syscalls/linux/sys_read.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -72,7 +71,7 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC n, err := readv(t, file, dst) t.IOUsage().AccountReadSyscall(n) - return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "read", file) + return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "read", file) } // Readahead implements readahead(2). @@ -152,7 +151,7 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca n, err := preadv(t, file, dst, offset) t.IOUsage().AccountReadSyscall(n) - return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pread64", file) + return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "pread64", file) } // Readv implements linux syscall readv(2). @@ -182,7 +181,7 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall n, err := readv(t, file, dst) t.IOUsage().AccountReadSyscall(n) - return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "readv", file) + return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "readv", file) } // Preadv implements linux syscall preadv(2). @@ -223,7 +222,7 @@ func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal n, err := preadv(t, file, dst, offset) t.IOUsage().AccountReadSyscall(n) - return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv", file) + return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "preadv", file) } // Preadv2 implements linux syscall preadv2(2). @@ -281,17 +280,17 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if offset == -1 { n, err := readv(t, file, dst) t.IOUsage().AccountReadSyscall(n) - return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv2", file) + return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "preadv2", file) } n, err := preadv(t, file, dst, offset) t.IOUsage().AccountReadSyscall(n) - return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv2", file) + return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "preadv2", file) } func readv(t *kernel.Task, f *fs.File, dst usermem.IOSequence) (int64, error) { n, err := f.Readv(t, dst) - if err != syserror.ErrWouldBlock || f.Flags().NonBlocking { + if err != linuxerr.ErrWouldBlock || f.Flags().NonBlocking { if n > 0 { // Queue notification if we read anything. f.Dirent.InotifyEvent(linux.IN_ACCESS, 0) @@ -304,7 +303,7 @@ func readv(t *kernel.Task, f *fs.File, dst usermem.IOSequence) (int64, error) { var deadline ktime.Time if s, ok := f.FileOperations.(socket.Socket); ok { dl := s.RecvTimeout() - if dl < 0 && err == syserror.ErrWouldBlock { + if dl < 0 && err == linuxerr.ErrWouldBlock { return n, err } if dl > 0 { @@ -326,14 +325,14 @@ func readv(t *kernel.Task, f *fs.File, dst usermem.IOSequence) (int64, error) { // other than "would block". n, err = f.Readv(t, dst) total += n - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { break } // Wait for a notification that we should retry. if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } break } @@ -351,7 +350,7 @@ func readv(t *kernel.Task, f *fs.File, dst usermem.IOSequence) (int64, error) { func preadv(t *kernel.Task, f *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { n, err := f.Preadv(t, dst, offset) - if err != syserror.ErrWouldBlock || f.Flags().NonBlocking { + if err != linuxerr.ErrWouldBlock || f.Flags().NonBlocking { if n > 0 { // Queue notification if we read anything. f.Dirent.InotifyEvent(linux.IN_ACCESS, 0) @@ -372,7 +371,7 @@ func preadv(t *kernel.Task, f *fs.File, dst usermem.IOSequence, offset int64) (i // other than "would block". n, err = f.Preadv(t, dst, offset+total) total += n - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { break } diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go index a12e1c915..7210333d2 100644 --- a/pkg/sentry/syscalls/linux/sys_rlimit.go +++ b/pkg/sentry/syscalls/linux/sys_rlimit.go @@ -22,7 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/syserror" ) // rlimit describes an implementation of 'struct rlimit', which may vary from @@ -44,7 +43,7 @@ func newRlimit(t *kernel.Task) (rlimit, error) { // On 64-bit system, struct rlimit and struct rlimit64 are identical. return &rlimit64{}, nil default: - return nil, syserror.ENOSYS + return nil, linuxerr.ENOSYS } } diff --git a/pkg/sentry/syscalls/linux/sys_rseq.go b/pkg/sentry/syscalls/linux/sys_rseq.go index 5fe196647..8328a3742 100644 --- a/pkg/sentry/syscalls/linux/sys_rseq.go +++ b/pkg/sentry/syscalls/linux/sys_rseq.go @@ -19,7 +19,6 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // RSeq implements syscall rseq(2). @@ -33,7 +32,7 @@ func RSeq(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Event for applications that want rseq on a configuration // that doesn't support them. t.Kernel().EmitUnimplementedEvent(t) - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS } switch flags { diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go index 45608f3fa..03871d713 100644 --- a/pkg/sentry/syscalls/linux/sys_signal.go +++ b/pkg/sentry/syscalls/linux/sys_signal.go @@ -25,7 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/signalfd" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserr" ) // "For a process to have permission to send a signal it must @@ -348,7 +348,7 @@ func Sigaltstack(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // Pause implements linux syscall pause(2). func Pause(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - return 0, nil, syserror.ConvertIntr(t.Block(nil), syserror.ERESTARTNOHAND) + return 0, nil, syserr.ConvertIntr(t.Block(nil), linuxerr.ERESTARTNOHAND) } // RtSigpending implements linux syscall rt_sigpending(2). @@ -496,7 +496,7 @@ func RtSigsuspend(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. t.SetSavedSignalMask(oldmask) // Perform the wait. - return 0, nil, syserror.ConvertIntr(t.Block(nil), syserror.ERESTARTNOHAND) + return 0, nil, syserr.ConvertIntr(t.Block(nil), linuxerr.ERESTARTNOHAND) } // RestartSyscall implements the linux syscall restart_syscall(2). @@ -512,7 +512,7 @@ func RestartSyscall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne // function is never null by (re)initializing it with one that translates // the restart into EINTR. We'll emulate that behaviour. t.Debugf("Restart block missing in restart_syscall(2). Did ptrace inject a return value of ERESTART_RESTARTBLOCK?") - return 0, nil, syserror.EINTR + return 0, nil, linuxerr.EINTR } // sharedSignalfd is shared between the two calls. diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index 06eb8f319..50ddbc142 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -30,7 +30,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/control" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -260,7 +259,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Capture address and call syscall implementation. @@ -270,7 +269,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca } blocking := !file.Flags().NonBlocking - return 0, nil, syserror.ConvertIntr(s.Connect(t, a, blocking).ToError(), syserror.ERESTARTSYS) + return 0, nil, syserr.ConvertIntr(s.Connect(t, a, blocking).ToError(), linuxerr.ERESTARTSYS) } // accept is the implementation of the accept syscall. It is called by accept @@ -291,7 +290,7 @@ func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr, // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, syserror.ENOTSOCK + return 0, linuxerr.ENOTSOCK } // Call the syscall implementation for this socket, then copy the @@ -301,7 +300,7 @@ func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr, peerRequested := addrLen != 0 nfd, peer, peerLen, e := s.Accept(t, peerRequested, flags, blocking) if e != nil { - return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS) + return 0, syserr.ConvertIntr(e.ToError(), linuxerr.ERESTARTSYS) } if peerRequested { // NOTE(magi): Linux does not give you an error if it can't @@ -350,7 +349,7 @@ func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Capture address and call syscall implementation. @@ -377,7 +376,7 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } if backlog > maxListenBacklog { @@ -415,7 +414,7 @@ func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Validate how, then call syscall implementation. @@ -446,7 +445,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Read the length. Reject negative values. @@ -527,7 +526,7 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } if optLen < 0 { @@ -565,7 +564,7 @@ func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Get the socket name and copy it to the caller. @@ -593,7 +592,7 @@ func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Get the socket peer name and copy it to the caller. @@ -626,7 +625,7 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Reject flags that we don't handle yet. @@ -683,7 +682,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } if file.Flags().NonBlocking { @@ -763,7 +762,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr hostarch.Addr, flags if msg.ControlLen == 0 && msg.NameLen == 0 { n, mflags, _, _, cms, err := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, false, 0) if err != nil { - return 0, syserror.ConvertIntr(err.ToError(), syserror.ERESTARTSYS) + return 0, syserr.ConvertIntr(err.ToError(), linuxerr.ERESTARTSYS) } if !cms.Unix.Empty() { mflags |= linux.MSG_CTRUNC @@ -785,7 +784,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr hostarch.Addr, flags } n, mflags, sender, senderLen, cms, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, msg.NameLen != 0, msg.ControlLen) if e != nil { - return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS) + return 0, syserr.ConvertIntr(e.ToError(), linuxerr.ERESTARTSYS) } defer cms.Release(t) @@ -848,7 +847,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, fla // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, syserror.ENOTSOCK + return 0, linuxerr.ENOTSOCK } if file.Flags().NonBlocking { @@ -874,7 +873,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, fla n, _, sender, senderLen, cm, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, nameLenPtr != 0, 0) cm.Release(t) if e != nil { - return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS) + return 0, syserr.ConvertIntr(e.ToError(), linuxerr.ERESTARTSYS) } // Copy the address to the caller. @@ -921,7 +920,7 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Reject flags that we don't handle yet. @@ -963,7 +962,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Reject flags that we don't handle yet. @@ -1060,7 +1059,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr hostar // Call the syscall implementation. n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, controlMessages) - err = handleIOError(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendmsg", file) + err = handleIOError(t, n != 0, e.ToError(), linuxerr.ERESTARTSYS, "sendmsg", file) // Control messages should be released on error as well as for zero-length // messages, which are discarded by the receiver. if n == 0 || err != nil { @@ -1087,7 +1086,7 @@ func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags // Extract the socket. s, ok := file.FileOperations.(socket.Socket) if !ok { - return 0, syserror.ENOTSOCK + return 0, linuxerr.ENOTSOCK } if file.Flags().NonBlocking { @@ -1122,7 +1121,7 @@ func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags // Call the syscall implementation. n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, socket.ControlMessages{Unix: control.New(t, s, nil)}) - return uintptr(n), handleIOError(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendto", file) + return uintptr(n), handleIOError(t, n != 0, e.ToError(), linuxerr.ERESTARTSYS, "sendto", file) } // SendTo implements the linux syscall sendto(2). diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index 34d87ac1f..8c8847efa 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -21,7 +21,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -46,9 +45,9 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB for { n, err = fs.Splice(t, outFile, inFile, opts) - if n != 0 || err != syserror.ErrWouldBlock { + if n != 0 || err != linuxerr.ErrWouldBlock { break - } else if err == syserror.ErrWouldBlock && nonBlocking { + } else if err == linuxerr.ErrWouldBlock && nonBlocking { break } @@ -177,7 +176,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // We can only pass a single file to handleIOError, so pick inFile // arbitrarily. This is used only for debugging purposes. - return uintptr(n), nil, handleIOError(t, false, err, syserror.ERESTARTSYS, "sendfile", inFile) + return uintptr(n), nil, handleIOError(t, false, err, linuxerr.ERESTARTSYS, "sendfile", inFile) } // Splice implements splice(2). @@ -287,7 +286,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal } // See above; inFile is chosen arbitrarily here. - return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "splice", inFile) + return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "splice", inFile) } // Tee imlements tee(2). @@ -340,5 +339,5 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo } // See above; inFile is chosen arbitrarily here. - return uintptr(n), nil, handleIOError(t, false, err, syserror.ERESTARTSYS, "tee", inFile) + return uintptr(n), nil, handleIOError(t, false, err, linuxerr.ERESTARTSYS, "tee", inFile) } diff --git a/pkg/sentry/syscalls/linux/sys_sync.go b/pkg/sentry/syscalls/linux/sys_sync.go index 6278bef21..0c22599bf 100644 --- a/pkg/sentry/syscalls/linux/sys_sync.go +++ b/pkg/sentry/syscalls/linux/sys_sync.go @@ -20,7 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserr" ) // LINT.IfChange @@ -58,7 +58,7 @@ func Fsync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall defer file.DecRef(t) err := file.Fsync(t, 0, fs.FileMaxOffset, fs.SyncAll) - return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS) + return 0, nil, syserr.ConvertIntr(err, linuxerr.ERESTARTSYS) } // Fdatasync implements linux syscall fdatasync(2). @@ -74,7 +74,7 @@ func Fdatasync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys defer file.DecRef(t) err := file.Fsync(t, 0, fs.FileMaxOffset, fs.SyncData) - return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS) + return 0, nil, syserr.ConvertIntr(err, linuxerr.ERESTARTSYS) } // SyncFileRange implements linux syscall sync_file_rage(2) @@ -112,7 +112,7 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel if uflags&linux.SYNC_FILE_RANGE_WAIT_BEFORE != 0 && uflags&linux.SYNC_FILE_RANGE_WAIT_AFTER == 0 { t.Kernel().EmitUnimplementedEvent(t) - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS } // SYNC_FILE_RANGE_WRITE initiates write-out of all dirty pages in the @@ -137,7 +137,7 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel err = file.Fsync(t, offset, fs.FileMaxOffset, fs.SyncData) } - return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS) + return 0, nil, syserr.ConvertIntr(err, linuxerr.ERESTARTSYS) } // LINT.ThenChange(vfs2/sync.go) diff --git a/pkg/sentry/syscalls/linux/sys_syslog.go b/pkg/sentry/syscalls/linux/sys_syslog.go index ba372f9e3..15acb2b8b 100644 --- a/pkg/sentry/syscalls/linux/sys_syslog.go +++ b/pkg/sentry/syscalls/linux/sys_syslog.go @@ -18,7 +18,6 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) const ( @@ -57,6 +56,6 @@ func Syslog(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal case _SYSLOG_ACTION_SIZE_BUFFER: return logBufLen, nil, nil default: - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS } } diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index 981cdd985..d74173c56 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -27,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/sched" "gvisor.dev/gvisor/pkg/sentry/loader" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -111,7 +110,7 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr host } atEmptyPath := flags&linux.AT_EMPTY_PATH != 0 if !atEmptyPath && len(pathname) == 0 { - return 0, nil, syserror.ENOENT + return 0, nil, linuxerr.ENOENT } resolveFinal := flags&linux.AT_SYMLINK_NOFOLLOW == 0 @@ -244,7 +243,7 @@ func parseCommonWaitOptions(wopts *kernel.WaitOptions, options int) error { wopts.Events |= kernel.EventGroupContinue } if options&linux.WNOHANG == 0 { - wopts.BlockInterruptErr = syserror.ERESTARTSYS + wopts.BlockInterruptErr = linuxerr.ERESTARTSYS } if options&linux.WNOTHREAD == 0 { wopts.SiblingChildren = true diff --git a/pkg/sentry/syscalls/linux/sys_time.go b/pkg/sentry/syscalls/linux/sys_time.go index 674e74f82..4adc8b8a4 100644 --- a/pkg/sentry/syscalls/linux/sys_time.go +++ b/pkg/sentry/syscalls/linux/sys_time.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/syserror" ) // The most significant 29 bits hold either a pid or a file descriptor. @@ -214,7 +213,7 @@ func clockNanosleepUntil(t *kernel.Task, c ktime.Clock, end ktime.Time, rem host case linuxerr.Equals(linuxerr.ETIMEDOUT, err): // Slept for entire timeout. return nil - case err == syserror.ErrInterrupted: + case err == linuxerr.ErrInterrupted: // Interrupted. remaining := end.Sub(c.Now()) if remaining <= 0 { @@ -235,9 +234,9 @@ func clockNanosleepUntil(t *kernel.Task, c ktime.Clock, end ktime.Time, rem host end: end, rem: rem, }) - return syserror.ERESTART_RESTARTBLOCK + return linuxerr.ERESTART_RESTARTBLOCK } - return syserror.ERESTARTNOHAND + return linuxerr.ERESTARTNOHAND default: panic(fmt.Sprintf("Impossible BlockWithTimer error %v", err)) } diff --git a/pkg/sentry/syscalls/linux/sys_timer.go b/pkg/sentry/syscalls/linux/sys_timer.go index 45eef4feb..d39a0a6f5 100644 --- a/pkg/sentry/syscalls/linux/sys_timer.go +++ b/pkg/sentry/syscalls/linux/sys_timer.go @@ -18,9 +18,9 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) const nsecPerSec = int64(time.Second) @@ -29,7 +29,7 @@ const nsecPerSec = int64(time.Second) func Getitimer(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { if t.Arch().Width() != 8 { // Definition of linux.ItimerVal assumes 64-bit architecture. - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS } timerID := args[0].Int() @@ -51,7 +51,7 @@ func Getitimer(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys func Setitimer(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { if t.Arch().Width() != 8 { // Definition of linux.ItimerVal assumes 64-bit architecture. - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS } timerID := args[0].Int() diff --git a/pkg/sentry/syscalls/linux/sys_tls_amd64.go b/pkg/sentry/syscalls/linux/sys_tls_amd64.go index 8c6cd7511..bde672d67 100644 --- a/pkg/sentry/syscalls/linux/sys_tls_amd64.go +++ b/pkg/sentry/syscalls/linux/sys_tls_amd64.go @@ -23,7 +23,6 @@ import ( "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // ArchPrctl implements linux syscall arch_prctl(2). @@ -39,7 +38,7 @@ func ArchPrctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, err } default: - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS } case linux.ARCH_SET_FS: fsbase := args[1].Uint64() diff --git a/pkg/sentry/syscalls/linux/sys_tls_arm64.go b/pkg/sentry/syscalls/linux/sys_tls_arm64.go index ff4ac4d6d..dfa684387 100644 --- a/pkg/sentry/syscalls/linux/sys_tls_arm64.go +++ b/pkg/sentry/syscalls/linux/sys_tls_arm64.go @@ -18,12 +18,12 @@ package linux import ( + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // ArchPrctl is not defined for ARM64. func ArchPrctl(*kernel.Task, arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS } diff --git a/pkg/sentry/syscalls/linux/sys_write.go b/pkg/sentry/syscalls/linux/sys_write.go index 872168606..4a4ef5046 100644 --- a/pkg/sentry/syscalls/linux/sys_write.go +++ b/pkg/sentry/syscalls/linux/sys_write.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -72,7 +71,7 @@ func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall n, err := writev(t, file, src) t.IOUsage().AccountWriteSyscall(n) - return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "write", file) + return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "write", file) } // Pwrite64 implements linux syscall pwrite64(2). @@ -119,7 +118,7 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc n, err := pwritev(t, file, src, offset) t.IOUsage().AccountWriteSyscall(n) - return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwrite64", file) + return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "pwrite64", file) } // Writev implements linux syscall writev(2). @@ -149,7 +148,7 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal n, err := writev(t, file, src) t.IOUsage().AccountWriteSyscall(n) - return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "writev", file) + return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "writev", file) } // Pwritev implements linux syscall pwritev(2). @@ -190,7 +189,7 @@ func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca n, err := pwritev(t, file, src, offset) t.IOUsage().AccountWriteSyscall(n) - return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev", file) + return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "pwritev", file) } // Pwritev2 implements linux syscall pwritev2(2). @@ -251,17 +250,17 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if offset == -1 { n, err := writev(t, file, src) t.IOUsage().AccountWriteSyscall(n) - return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev2", file) + return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "pwritev2", file) } n, err := pwritev(t, file, src, offset) t.IOUsage().AccountWriteSyscall(n) - return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev2", file) + return uintptr(n), nil, handleIOError(t, n != 0, err, linuxerr.ERESTARTSYS, "pwritev2", file) } func writev(t *kernel.Task, f *fs.File, src usermem.IOSequence) (int64, error) { n, err := f.Writev(t, src) - if err != syserror.ErrWouldBlock || f.Flags().NonBlocking { + if err != linuxerr.ErrWouldBlock || f.Flags().NonBlocking { if n > 0 { // Queue notification if we wrote anything. f.Dirent.InotifyEvent(linux.IN_MODIFY, 0) @@ -274,7 +273,7 @@ func writev(t *kernel.Task, f *fs.File, src usermem.IOSequence) (int64, error) { var deadline ktime.Time if s, ok := f.FileOperations.(socket.Socket); ok { dl := s.SendTimeout() - if dl < 0 && err == syserror.ErrWouldBlock { + if dl < 0 && err == linuxerr.ErrWouldBlock { return n, err } if dl > 0 { @@ -296,14 +295,14 @@ func writev(t *kernel.Task, f *fs.File, src usermem.IOSequence) (int64, error) { // anything other than "would block". n, err = f.Writev(t, src) total += n - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { break } // Wait for a notification that we should retry. if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } break } @@ -321,7 +320,7 @@ func writev(t *kernel.Task, f *fs.File, src usermem.IOSequence) (int64, error) { func pwritev(t *kernel.Task, f *fs.File, src usermem.IOSequence, offset int64) (int64, error) { n, err := f.Pwritev(t, src, offset) - if err != syserror.ErrWouldBlock || f.Flags().NonBlocking { + if err != linuxerr.ErrWouldBlock || f.Flags().NonBlocking { if n > 0 { // Queue notification if we wrote anything. f.Dirent.InotifyEvent(linux.IN_MODIFY, 0) @@ -342,7 +341,7 @@ func pwritev(t *kernel.Task, f *fs.File, src usermem.IOSequence, offset int64) ( // anything other than "would block". n, err = f.Pwritev(t, src, offset+total) total += n - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { break } diff --git a/pkg/sentry/syscalls/linux/timespec.go b/pkg/sentry/syscalls/linux/timespec.go index b327e27d6..d90652a3f 100644 --- a/pkg/sentry/syscalls/linux/timespec.go +++ b/pkg/sentry/syscalls/linux/timespec.go @@ -21,7 +21,6 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // copyTimespecIn copies a Timespec from the untrusted app range to the kernel. @@ -38,7 +37,7 @@ func copyTimespecIn(t *kernel.Task, addr hostarch.Addr) (linux.Timespec, error) ts.Nsec = int64(hostarch.ByteOrder.Uint64(in[8:])) return ts, nil default: - return linux.Timespec{}, syserror.ENOSYS + return linux.Timespec{}, linuxerr.ENOSYS } } @@ -52,7 +51,7 @@ func copyTimespecOut(t *kernel.Task, addr hostarch.Addr, ts *linux.Timespec) err _, err := t.CopyOutBytes(addr, out) return err default: - return syserror.ENOSYS + return linuxerr.ENOSYS } } @@ -70,7 +69,7 @@ func copyTimevalIn(t *kernel.Task, addr hostarch.Addr) (linux.Timeval, error) { tv.Usec = int64(hostarch.ByteOrder.Uint64(in[8:])) return tv, nil default: - return linux.Timeval{}, syserror.ENOSYS + return linux.Timeval{}, linuxerr.ENOSYS } } @@ -84,7 +83,7 @@ func copyTimevalOut(t *kernel.Task, addr hostarch.Addr, tv *linux.Timeval) error _, err := t.CopyOutBytes(addr, out) return err default: - return syserror.ENOSYS + return linuxerr.ENOSYS } } diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index a73f096ff..1e3bd2a50 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -73,7 +73,6 @@ go_library( "//pkg/sentry/vfs", "//pkg/sync", "//pkg/syserr", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/syscalls/linux/vfs2/aio.go b/pkg/sentry/syscalls/linux/vfs2/aio.go index a8fa86cdc..0b57c0f7c 100644 --- a/pkg/sentry/syscalls/linux/vfs2/aio.go +++ b/pkg/sentry/syscalls/linux/vfs2/aio.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/mm" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -56,7 +55,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } cbAddr = hostarch.Addr(cbAddrP) default: - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS } // Copy in this callback. diff --git a/pkg/sentry/syscalls/linux/vfs2/execve.go b/pkg/sentry/syscalls/linux/vfs2/execve.go index 38818c175..fcf2e25de 100644 --- a/pkg/sentry/syscalls/linux/vfs2/execve.go +++ b/pkg/sentry/syscalls/linux/vfs2/execve.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/loader" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // Execve implements linux syscall execve(2). @@ -83,7 +82,7 @@ func execveat(t *kernel.Task, dirfd int32, pathnameAddr, argvAddr, envvAddr host // do_open_execat(fd=AT_FDCWD)), and the loader package is currently // incapable of handling this correctly. if !path.HasComponents() && flags&linux.AT_EMPTY_PATH == 0 { - return 0, nil, syserror.ENOENT + return 0, nil, linuxerr.ENOENT } dirfile, dirfileFlags := t.FDTable().GetVFS2(dirfd) if dirfile == nil { diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index 2cfb12cad..2198aa065 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // Close implements Linux syscall close(2). @@ -42,7 +41,7 @@ func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall defer file.DecRef(t) err := file.OnClose(t) - return 0, nil, slinux.HandleIOErrorVFS2(t, false /* partial */, err, syserror.EINTR, "close", file) + return 0, nil, slinux.HandleIOErrorVFS2(t, false /* partial */, err, linuxerr.EINTR, "close", file) } // Dup implements Linux syscall dup(2). diff --git a/pkg/sentry/syscalls/linux/vfs2/filesystem.go b/pkg/sentry/syscalls/linux/vfs2/filesystem.go index 534355237..f19f0fd41 100644 --- a/pkg/sentry/syscalls/linux/vfs2/filesystem.go +++ b/pkg/sentry/syscalls/linux/vfs2/filesystem.go @@ -21,7 +21,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // Link implements Linux syscall link(2). @@ -46,7 +45,7 @@ func linkat(t *kernel.Task, olddirfd int32, oldpathAddr hostarch.Addr, newdirfd return linuxerr.EINVAL } if flags&linux.AT_EMPTY_PATH != 0 && !t.HasCapability(linux.CAP_DAC_READ_SEARCH) { - return syserror.ENOENT + return linuxerr.ENOENT } oldpath, err := copyInPath(t, oldpathAddr) @@ -320,7 +319,7 @@ func symlinkat(t *kernel.Task, targetAddr hostarch.Addr, newdirfd int32, linkpat return err } if len(target) == 0 { - return syserror.ENOENT + return linuxerr.ENOENT } linkpath, err := copyInPath(t, linkpathAddr) if err != nil { diff --git a/pkg/sentry/syscalls/linux/vfs2/path.go b/pkg/sentry/syscalls/linux/vfs2/path.go index 2bb783a85..38796d4db 100644 --- a/pkg/sentry/syscalls/linux/vfs2/path.go +++ b/pkg/sentry/syscalls/linux/vfs2/path.go @@ -21,7 +21,6 @@ import ( "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) func copyInPath(t *kernel.Task, addr hostarch.Addr) (fspath.Path, error) { @@ -44,7 +43,7 @@ func getTaskPathOperation(t *kernel.Task, dirfd int32, path fspath.Path, shouldA if !path.Absolute { if !path.HasComponents() && !bool(shouldAllowEmptyPath) { root.DecRef(t) - return taskPathOperation{}, syserror.ENOENT + return taskPathOperation{}, linuxerr.ENOENT } if dirfd == linux.AT_FDCWD { start = t.FSContext().WorkingDirectoryVFS2() diff --git a/pkg/sentry/syscalls/linux/vfs2/poll.go b/pkg/sentry/syscalls/linux/vfs2/poll.go index 042aa4c97..204051cd0 100644 --- a/pkg/sentry/syscalls/linux/vfs2/poll.go +++ b/pkg/sentry/syscalls/linux/vfs2/poll.go @@ -20,15 +20,14 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/waiter" - - "gvisor.dev/gvisor/pkg/hostarch" ) // fileCap is the maximum allowable files for poll & select. This has no @@ -189,7 +188,7 @@ func doPoll(t *kernel.Task, addr hostarch.Addr, nfds uint, timeout time.Duration pfd[i].Events |= linux.POLLHUP | linux.POLLERR } remainingTimeout, n, err := pollBlock(t, pfd, timeout) - err = syserror.ConvertIntr(err, syserror.EINTR) + err = syserr.ConvertIntr(err, linuxerr.EINTR) // The poll entries are copied out regardless of whether // any are set or not. This aligns with the Linux behavior. @@ -299,7 +298,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs hostarch.Ad // Do the syscall, then count the number of bits set. if _, _, err = pollBlock(t, pfd, timeout); err != nil { - return 0, syserror.ConvertIntr(err, syserror.EINTR) + return 0, syserr.ConvertIntr(err, linuxerr.EINTR) } // r, w, and e are currently event mask bitsets; unset bits corresponding @@ -417,7 +416,7 @@ func poll(t *kernel.Task, pfdAddr hostarch.Addr, nfds uint, timeout time.Duratio nfds: nfds, timeout: remainingTimeout, }) - return 0, syserror.ERESTART_RESTARTBLOCK + return 0, linuxerr.ERESTART_RESTARTBLOCK } return n, err } @@ -464,7 +463,7 @@ func Ppoll(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Note that this means that if err is nil but copyErr is not, copyErr is // ignored. This is consistent with Linux. if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil { - err = syserror.ERESTARTNOHAND + err = linuxerr.ERESTARTNOHAND } return n, nil, err } @@ -494,7 +493,7 @@ func Select(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal copyErr := copyOutTimevalRemaining(t, startNs, timeout, timevalAddr) // See comment in Ppoll. if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil { - err = syserror.ERESTARTNOHAND + err = linuxerr.ERESTARTNOHAND } return n, nil, err } @@ -541,7 +540,7 @@ func Pselect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca copyErr := copyOutTimespecRemaining(t, startNs, timeout, timespecAddr) // See comment in Ppoll. if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil { - err = syserror.ERESTARTNOHAND + err = linuxerr.ERESTARTNOHAND } return n, nil, err } diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go index fe8aa06da..4e7dc5080 100644 --- a/pkg/sentry/syscalls/linux/vfs2/read_write.go +++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -63,7 +62,7 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC n, err := read(t, file, dst, vfs.ReadOptions{}) t.IOUsage().AccountReadSyscall(n) - return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "read", file) + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "read", file) } // Readv implements Linux syscall readv(2). @@ -88,12 +87,12 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall n, err := read(t, file, dst, vfs.ReadOptions{}) t.IOUsage().AccountReadSyscall(n) - return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "readv", file) + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "readv", file) } func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { n, err := file.Read(t, dst, opts) - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { return n, err } @@ -115,14 +114,14 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt // "would block". n, err = file.Read(t, dst, opts) total += n - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { break } // Wait for a notification that we should retry. if err = t.BlockWithDeadline(ch, hasDeadline, deadline); err != nil { if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } break } @@ -166,7 +165,7 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca n, err := pread(t, file, dst, offset, vfs.ReadOptions{}) t.IOUsage().AccountReadSyscall(n) - return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "pread64", file) + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "pread64", file) } // Preadv implements Linux syscall preadv(2). @@ -197,7 +196,7 @@ func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal n, err := pread(t, file, dst, offset, vfs.ReadOptions{}) t.IOUsage().AccountReadSyscall(n) - return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "preadv", file) + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "preadv", file) } // Preadv2 implements Linux syscall preadv2(2). @@ -243,12 +242,12 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca n, err = pread(t, file, dst, offset, opts) } t.IOUsage().AccountReadSyscall(n) - return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "preadv2", file) + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "preadv2", file) } func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { n, err := file.PRead(t, dst, offset, opts) - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { return n, err } @@ -270,14 +269,14 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of // "would block". n, err = file.PRead(t, dst, offset+total, opts) total += n - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { break } // Wait for a notification that we should retry. if err = t.BlockWithDeadline(ch, hasDeadline, deadline); err != nil { if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } break } @@ -314,7 +313,7 @@ func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall n, err := write(t, file, src, vfs.WriteOptions{}) t.IOUsage().AccountWriteSyscall(n) - return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "write", file) + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "write", file) } // Writev implements Linux syscall writev(2). @@ -339,12 +338,12 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal n, err := write(t, file, src, vfs.WriteOptions{}) t.IOUsage().AccountWriteSyscall(n) - return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "writev", file) + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "writev", file) } func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { n, err := file.Write(t, src, opts) - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { return n, err } @@ -366,14 +365,14 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op // "would block". n, err = file.Write(t, src, opts) total += n - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { break } // Wait for a notification that we should retry. if err = t.BlockWithDeadline(ch, hasDeadline, deadline); err != nil { if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } break } @@ -416,7 +415,7 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc n, err := pwrite(t, file, src, offset, vfs.WriteOptions{}) t.IOUsage().AccountWriteSyscall(n) - return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "pwrite64", file) + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "pwrite64", file) } // Pwritev implements Linux syscall pwritev(2). @@ -447,7 +446,7 @@ func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca n, err := pwrite(t, file, src, offset, vfs.WriteOptions{}) t.IOUsage().AccountReadSyscall(n) - return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "pwritev", file) + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "pwritev", file) } // Pwritev2 implements Linux syscall pwritev2(2). @@ -493,12 +492,12 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc n, err = pwrite(t, file, src, offset, opts) } t.IOUsage().AccountWriteSyscall(n) - return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "pwritev2", file) + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "pwritev2", file) } func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { n, err := file.PWrite(t, src, offset, opts) - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { return n, err } @@ -520,14 +519,14 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o // "would block". n, err = file.PWrite(t, src, offset+total, opts) total += n - if err != syserror.ErrWouldBlock { + if err != linuxerr.ErrWouldBlock { break } // Wait for a notification that we should retry. if err = t.BlockWithDeadline(ch, hasDeadline, deadline); err != nil { if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { - err = syserror.ErrWouldBlock + err = linuxerr.ErrWouldBlock } break } diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go index b5a3b92c5..e608572b4 100644 --- a/pkg/sentry/syscalls/linux/vfs2/setstat.go +++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) const chmodMask = 0777 | linux.S_ISUID | linux.S_ISGID | linux.S_ISVTX @@ -432,7 +431,7 @@ func setstatat(t *kernel.Task, dirfd int32, path fspath.Path, shouldAllowEmptyPa start := root if !path.Absolute { if !path.HasComponents() && !bool(shouldAllowEmptyPath) { - return syserror.ENOENT + return linuxerr.ENOENT } if dirfd == linux.AT_FDCWD { start = t.FSContext().WorkingDirectoryVFS2() @@ -465,7 +464,7 @@ func setstatat(t *kernel.Task, dirfd int32, path fspath.Path, shouldAllowEmptyPa } func handleSetSizeError(t *kernel.Task, err error) error { - if err == syserror.ErrExceedsFileSizeLimit { + if err == linuxerr.ErrExceedsFileSizeLimit { // Convert error to EFBIG and send a SIGXFSZ per setrlimit(2). t.SendSignal(kernel.SignalInfoNoInfo(linux.SIGXFSZ, t, t)) return linuxerr.EFBIG diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go index 0c2e0720b..48be5a88d 100644 --- a/pkg/sentry/syscalls/linux/vfs2/socket.go +++ b/pkg/sentry/syscalls/linux/vfs2/socket.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -30,10 +31,7 @@ import ( slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" - - "gvisor.dev/gvisor/pkg/hostarch" ) // maxAddrLen is the maximum socket address length we're willing to accept. @@ -264,7 +262,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Capture address and call syscall implementation. @@ -274,7 +272,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca } blocking := (file.StatusFlags() & linux.SOCK_NONBLOCK) == 0 - return 0, nil, syserror.ConvertIntr(s.Connect(t, a, blocking).ToError(), syserror.ERESTARTSYS) + return 0, nil, syserr.ConvertIntr(s.Connect(t, a, blocking).ToError(), linuxerr.ERESTARTSYS) } // accept is the implementation of the accept syscall. It is called by accept @@ -295,7 +293,7 @@ func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr, // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, syserror.ENOTSOCK + return 0, linuxerr.ENOTSOCK } // Call the syscall implementation for this socket, then copy the @@ -305,7 +303,7 @@ func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr, peerRequested := addrLen != 0 nfd, peer, peerLen, e := s.Accept(t, peerRequested, flags, blocking) if e != nil { - return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS) + return 0, syserr.ConvertIntr(e.ToError(), linuxerr.ERESTARTSYS) } if peerRequested { // NOTE(magi): Linux does not give you an error if it can't @@ -354,7 +352,7 @@ func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Capture address and call syscall implementation. @@ -381,7 +379,7 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } if backlog > maxListenBacklog { @@ -419,7 +417,7 @@ func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Validate how, then call syscall implementation. @@ -450,7 +448,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Read the length. Reject negative values. @@ -531,7 +529,7 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } if optLen < 0 { @@ -569,7 +567,7 @@ func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Get the socket name and copy it to the caller. @@ -597,7 +595,7 @@ func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Get the socket peer name and copy it to the caller. @@ -630,7 +628,7 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Reject flags that we don't handle yet. @@ -687,7 +685,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { @@ -767,7 +765,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr hostarch.Addr, fl if msg.ControlLen == 0 && msg.NameLen == 0 { n, mflags, _, _, cms, err := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, false, 0) if err != nil { - return 0, syserror.ConvertIntr(err.ToError(), syserror.ERESTARTSYS) + return 0, syserr.ConvertIntr(err.ToError(), linuxerr.ERESTARTSYS) } if !cms.Unix.Empty() { mflags |= linux.MSG_CTRUNC @@ -789,7 +787,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr hostarch.Addr, fl } n, mflags, sender, senderLen, cms, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, msg.NameLen != 0, msg.ControlLen) if e != nil { - return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS) + return 0, syserr.ConvertIntr(e.ToError(), linuxerr.ERESTARTSYS) } defer cms.Release(t) @@ -852,7 +850,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, fla // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, syserror.ENOTSOCK + return 0, linuxerr.ENOTSOCK } if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { @@ -878,7 +876,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, fla n, _, sender, senderLen, cm, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, nameLenPtr != 0, 0) cm.Release(t) if e != nil { - return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS) + return 0, syserr.ConvertIntr(e.ToError(), linuxerr.ERESTARTSYS) } // Copy the address to the caller. @@ -925,7 +923,7 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Reject flags that we don't handle yet. @@ -967,7 +965,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, nil, syserror.ENOTSOCK + return 0, nil, linuxerr.ENOTSOCK } // Reject flags that we don't handle yet. @@ -1064,7 +1062,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio // Call the syscall implementation. n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, controlMessages) - err = slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendmsg", file) + err = slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), linuxerr.ERESTARTSYS, "sendmsg", file) // Control messages should be released on error as well as for zero-length // messages, which are discarded by the receiver. if n == 0 || err != nil { @@ -1091,7 +1089,7 @@ func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) if !ok { - return 0, syserror.ENOTSOCK + return 0, linuxerr.ENOTSOCK } if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { @@ -1126,7 +1124,7 @@ func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags // Call the syscall implementation. n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, socket.ControlMessages{Unix: control.New(t, s, nil)}) - return uintptr(n), slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendto", file) + return uintptr(n), slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), linuxerr.ERESTARTSYS, "sendto", file) } // SendTo implements the linux syscall sendto(2). diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go index d8009123f..0205f09e0 100644 --- a/pkg/sentry/syscalls/linux/vfs2/splice.go +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -151,7 +150,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal panic("at least one end of splice must be a pipe") } - if n != 0 || err != syserror.ErrWouldBlock || nonBlock { + if n != 0 || err != linuxerr.ErrWouldBlock || nonBlock { break } if err = dw.waitForBoth(t); err != nil { @@ -173,7 +172,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // We can only pass a single file to handleIOError, so pick inFile arbitrarily. // This is used only for debugging purposes. - return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "splice", outFile) + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "splice", outFile) } // Tee implements Linux syscall tee(2). @@ -241,7 +240,7 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo defer dw.destroy() for { n, err = pipe.Tee(t, outPipeFD, inPipeFD, count) - if n != 0 || err != syserror.ErrWouldBlock || nonBlock { + if n != 0 || err != linuxerr.ErrWouldBlock || nonBlock { break } if err = dw.waitForBoth(t); err != nil { @@ -251,7 +250,7 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo if n != 0 { // If a partial write is completed, the error is dropped. Log it here. - if err != nil && err != io.EOF && err != syserror.ErrWouldBlock { + if err != nil && err != io.EOF && err != linuxerr.ErrWouldBlock { log.Debugf("tee completed a partial write with error: %v", err) err = nil } @@ -259,7 +258,7 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo // We can only pass a single file to handleIOError, so pick inFile arbitrarily. // This is used only for debugging purposes. - return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "tee", inFile) + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, linuxerr.ERESTARTSYS, "tee", inFile) } // Sendfile implements linux system call sendfile(2). @@ -360,10 +359,10 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc break } if err == nil && t.Interrupted() { - err = syserror.ErrInterrupted + err = linuxerr.ErrInterrupted break } - if err == syserror.ErrWouldBlock && !nonBlock { + if err == linuxerr.ErrWouldBlock && !nonBlock { err = dw.waitForBoth(t) } if err != nil { @@ -389,7 +388,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc var writeN int64 writeN, err = outFile.Write(t, usermem.BytesIOSequence(wbuf), vfs.WriteOptions{}) wbuf = wbuf[writeN:] - if err == syserror.ErrWouldBlock && !nonBlock { + if err == linuxerr.ErrWouldBlock && !nonBlock { err = dw.waitForOut(t) } if err != nil { @@ -420,10 +419,10 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc break } if err == nil && t.Interrupted() { - err = syserror.ErrInterrupted + err = linuxerr.ErrInterrupted break } - if err == syserror.ErrWouldBlock && !nonBlock { + if err == linuxerr.ErrWouldBlock && !nonBlock { err = dw.waitForBoth(t) } if err != nil { @@ -441,7 +440,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } if total != 0 { - if err != nil && err != io.EOF && err != syserror.ErrWouldBlock { + if err != nil && err != io.EOF && err != linuxerr.ErrWouldBlock { // If a partial write is completed, the error is dropped. Log it here. log.Debugf("sendfile completed a partial write with error: %v", err) err = nil @@ -450,7 +449,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // We can only pass a single file to handleIOError, so pick inFile arbitrarily. // This is used only for debugging purposes. - return uintptr(total), nil, slinux.HandleIOErrorVFS2(t, total != 0, err, syserror.ERESTARTSYS, "sendfile", inFile) + return uintptr(total), nil, slinux.HandleIOErrorVFS2(t, total != 0, err, linuxerr.ERESTARTSYS, "sendfile", inFile) } // dualWaiter is used to wait on one or both vfs.FileDescriptions. It is not diff --git a/pkg/sentry/syscalls/linux/vfs2/stat.go b/pkg/sentry/syscalls/linux/vfs2/stat.go index ba1d30823..adaf8db3f 100644 --- a/pkg/sentry/syscalls/linux/vfs2/stat.go +++ b/pkg/sentry/syscalls/linux/vfs2/stat.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // Stat implements Linux syscall stat(2). @@ -70,7 +69,7 @@ func fstatat(t *kernel.Task, dirfd int32, pathAddr, statAddr hostarch.Addr, flag start := root if !path.Absolute { if !path.HasComponents() && flags&linux.AT_EMPTY_PATH == 0 { - return syserror.ENOENT + return linuxerr.ENOENT } if dirfd == linux.AT_FDCWD { start = t.FSContext().WorkingDirectoryVFS2() @@ -182,7 +181,7 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall start := root if !path.Absolute { if !path.HasComponents() && flags&linux.AT_EMPTY_PATH == 0 { - return 0, nil, syserror.ENOENT + return 0, nil, linuxerr.ENOENT } if dirfd == linux.AT_FDCWD { start = t.FSContext().WorkingDirectoryVFS2() diff --git a/pkg/sentry/syscalls/linux/vfs2/sync.go b/pkg/sentry/syscalls/linux/vfs2/sync.go index d0ffc7c32..cfc693422 100644 --- a/pkg/sentry/syscalls/linux/vfs2/sync.go +++ b/pkg/sentry/syscalls/linux/vfs2/sync.go @@ -19,7 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/syserr" ) // Sync implements Linux syscall sync(2). @@ -108,12 +108,12 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel if flags&linux.SYNC_FILE_RANGE_WAIT_BEFORE != 0 && flags&linux.SYNC_FILE_RANGE_WAIT_AFTER == 0 { t.Kernel().EmitUnimplementedEvent(t) - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS } if flags&linux.SYNC_FILE_RANGE_WAIT_AFTER != 0 { if err := file.Sync(t); err != nil { - return 0, nil, syserror.ConvertIntr(err, syserror.ERESTARTSYS) + return 0, nil, syserr.ConvertIntr(err, linuxerr.ERESTARTSYS) } } return 0, nil, nil diff --git a/pkg/sentry/syscalls/syscalls.go b/pkg/sentry/syscalls/syscalls.go index 511fb8b28..cfcc21271 100644 --- a/pkg/sentry/syscalls/syscalls.go +++ b/pkg/sentry/syscalls/syscalls.go @@ -31,7 +31,6 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // Supported returns a syscall that is fully supported. @@ -103,10 +102,10 @@ func CapError(name string, c linux.Capability, note string, urls []string) kerne return 0, nil, linuxerr.EPERM } t.Kernel().EmitUnimplementedEvent(t) - return 0, nil, syserror.ENOSYS + return 0, nil, linuxerr.ENOSYS }, SupportLevel: kernel.SupportUnimplemented, - Note: fmt.Sprintf("%sReturns %q if the process does not have %s; %q otherwise.", note, linuxerr.EPERM, c.String(), syserror.ENOSYS), + Note: fmt.Sprintf("%sReturns %q if the process does not have %s; %q otherwise.", note, linuxerr.EPERM, c.String(), linuxerr.ENOSYS), URLs: urls, } } diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD index 36d999c47..c21971322 100644 --- a/pkg/sentry/time/BUILD +++ b/pkg/sentry/time/BUILD @@ -39,7 +39,6 @@ go_library( "//pkg/log", "//pkg/metric", "//pkg/sync", - "//pkg/syserror", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index a2032162d..914574543 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -116,7 +116,6 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/uniqueid", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", @@ -137,7 +136,6 @@ go_test( "//pkg/errors/linuxerr", "//pkg/sentry/contexttest", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/vfs/epoll.go b/pkg/sentry/vfs/epoll.go index befe3ca25..04bc4d10c 100644 --- a/pkg/sentry/vfs/epoll.go +++ b/pkg/sentry/vfs/epoll.go @@ -19,7 +19,6 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -259,7 +258,7 @@ func (ep *EpollInstance) ModifyInterest(file *FileDescription, num int32, event num: num, }] if !ok { - return syserror.ENOENT + return linuxerr.ENOENT } // Update epi for the next call to ep.ReadEvents(). @@ -295,7 +294,7 @@ func (ep *EpollInstance) DeleteInterest(file *FileDescription, num int32) error num: num, }] if !ok { - return syserror.ENOENT + return linuxerr.ENOENT } // Unregister from the file so that epi will no longer be readied. diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index a875fdeca..5dab069ed 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -25,7 +25,6 @@ import ( fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -56,7 +55,7 @@ func (FileDescriptionDefaultImpl) OnClose(ctx context.Context) error { // StatFS implements FileDescriptionImpl.StatFS analogously to // super_operations::statfs == NULL in Linux. func (FileDescriptionDefaultImpl) StatFS(ctx context.Context) (linux.Statfs, error) { - return linux.Statfs{}, syserror.ENOSYS + return linux.Statfs{}, linuxerr.ENOSYS } // Allocate implements FileDescriptionImpl.Allocate analogously to @@ -175,27 +174,27 @@ type DirectoryFileDescriptionDefaultImpl struct{} // Allocate implements DirectoryFileDescriptionDefaultImpl.Allocate. func (DirectoryFileDescriptionDefaultImpl) Allocate(ctx context.Context, mode, offset, length uint64) error { - return syserror.EISDIR + return linuxerr.EISDIR } // PRead implements FileDescriptionImpl.PRead. func (DirectoryFileDescriptionDefaultImpl) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) { - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } // Read implements FileDescriptionImpl.Read. func (DirectoryFileDescriptionDefaultImpl) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) { - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } // PWrite implements FileDescriptionImpl.PWrite. func (DirectoryFileDescriptionDefaultImpl) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } // Write implements FileDescriptionImpl.Write. func (DirectoryFileDescriptionDefaultImpl) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { - return 0, syserror.EISDIR + return 0, linuxerr.EISDIR } // DentryMetadataFileDescriptionImpl may be embedded by implementations of @@ -368,7 +367,7 @@ func (fd *DynamicBytesFileDescriptionImpl) pwriteLocked(ctx context.Context, src writable, ok := fd.data.(WritableDynamicBytesSource) if !ok { - return 0, syserror.EIO + return 0, linuxerr.EIO } n, err := writable.Write(ctx, src, offset) if err != nil { diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go index 3423dede1..e34a8c11b 100644 --- a/pkg/sentry/vfs/file_description_impl_util_test.go +++ b/pkg/sentry/vfs/file_description_impl_util_test.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -157,10 +156,10 @@ func TestGenCountFD(t *testing.T) { // Write and PWrite fails. if _, err := fd.Write(ctx, ioseq, WriteOptions{}); !linuxerr.Equals(linuxerr.EIO, err) { - t.Errorf("Write: got err %v, wanted %v", err, syserror.EIO) + t.Errorf("Write: got err %v, wanted %v", err, linuxerr.EIO) } if _, err := fd.PWrite(ctx, ioseq, 0, WriteOptions{}); !linuxerr.Equals(linuxerr.EIO, err) { - t.Errorf("Write: got err %v, wanted %v", err, syserror.EIO) + t.Errorf("Write: got err %v, wanted %v", err, linuxerr.EIO) } } diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go index 088beb8e2..17d94b341 100644 --- a/pkg/sentry/vfs/inotify.go +++ b/pkg/sentry/vfs/inotify.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -209,7 +208,7 @@ func (i *Inotify) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOpt if i.events.Empty() { // Nothing to read yet, tell caller to block. - return 0, syserror.ErrWouldBlock + return 0, linuxerr.ErrWouldBlock } var writeLen int64 diff --git a/pkg/sentry/vfs/lock.go b/pkg/sentry/vfs/lock.go index cbe4d8c2d..1853cdca0 100644 --- a/pkg/sentry/vfs/lock.go +++ b/pkg/sentry/vfs/lock.go @@ -17,8 +17,8 @@ package vfs import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" - "gvisor.dev/gvisor/pkg/syserror" ) // FileLocks supports POSIX and BSD style locks, which correspond to fcntl(2) @@ -47,9 +47,9 @@ func (fl *FileLocks) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerID i // Return an appropriate error for the unsuccessful lock attempt, depending on // whether this is a blocking or non-blocking operation. if block == nil { - return syserror.ErrWouldBlock + return linuxerr.ErrWouldBlock } - return syserror.ERESTARTSYS + return linuxerr.ERESTARTSYS } // UnlockBSD releases a BSD-style lock on the entire file. @@ -69,9 +69,9 @@ func (fl *FileLocks) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPI // Return an appropriate error for the unsuccessful lock attempt, depending on // whether this is a blocking or non-blocking operation. if block == nil { - return syserror.ErrWouldBlock + return linuxerr.ErrWouldBlock } - return syserror.ERESTARTSYS + return linuxerr.ERESTARTSYS } // UnlockPOSIX releases a POSIX-style lock on a file region. diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index 4d6b59a26..05a416775 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -27,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/refsvfs2" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" ) // A Mount is a replacement of a Dentry (Mount.key.point) from one Filesystem @@ -225,7 +224,7 @@ func (vfs *VirtualFilesystem) ConnectMountAt(ctx context.Context, creds *auth.Cr vdDentry.mu.Unlock() vfs.mountMu.Unlock() vd.DecRef(ctx) - return syserror.ENOENT + return linuxerr.ENOENT } // vd might have been mounted over between vfs.GetDentryAt() and // vfs.mountMu.Lock(). diff --git a/pkg/sentry/vfs/pathname.go b/pkg/sentry/vfs/pathname.go index e4da15009..7cc68a157 100644 --- a/pkg/sentry/vfs/pathname.go +++ b/pkg/sentry/vfs/pathname.go @@ -16,9 +16,9 @@ package vfs import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) var fspathBuilderPool = sync.Pool{ @@ -137,7 +137,7 @@ loop: // Linux's sys_getcwd(). func (vfs *VirtualFilesystem) PathnameForGetcwd(ctx context.Context, vfsroot, vd VirtualDentry) (string, error) { if vd.dentry.IsDead() { - return "", syserror.ENOENT + return "", linuxerr.ENOENT } b := getFSPathBuilder() diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go index 4744514bd..953d31876 100644 --- a/pkg/sentry/vfs/permissions.go +++ b/pkg/sentry/vfs/permissions.go @@ -23,7 +23,6 @@ import ( "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/syserror" ) // AccessTypes is a bitmask of Unix file permissions. @@ -195,7 +194,7 @@ func CheckSetStat(ctx context.Context, creds *auth.Credentials, opts *SetStatOpt return err } if limit < int64(stat.Size) { - return syserror.ErrExceedsFileSizeLimit + return linuxerr.ErrExceedsFileSizeLimit } } if stat.Mask&linux.STATX_MODE != 0 { @@ -282,7 +281,7 @@ func CheckLimit(ctx context.Context, offset, size int64) (int64, error) { return size, nil } if offset >= int64(fileSizeLimit) { - return 0, syserror.ErrExceedsFileSizeLimit + return 0, linuxerr.ErrExceedsFileSizeLimit } remaining := int64(fileSizeLimit) - offset if remaining < size { diff --git a/pkg/sentry/vfs/resolving_path.go b/pkg/sentry/vfs/resolving_path.go index 6f58f33ce..7fd7f000d 100644 --- a/pkg/sentry/vfs/resolving_path.go +++ b/pkg/sentry/vfs/resolving_path.go @@ -23,7 +23,6 @@ import ( "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // ResolvingPath represents the state of an in-progress path resolution, shared @@ -331,7 +330,7 @@ func (rp *ResolvingPath) HandleSymlink(target string) error { return linuxerr.ELOOP } if len(target) == 0 { - return syserror.ENOENT + return linuxerr.ENOENT } rp.symlinks++ targetPath := fspath.Parse(target) diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index eb3c60610..1b2a668c0 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -48,7 +48,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // A VirtualFilesystem (VFS for short) combines Filesystems in trees of Mounts. @@ -281,7 +280,7 @@ func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credential if newpop.Path.Absolute { return linuxerr.EEXIST } - return syserror.ENOENT + return linuxerr.ENOENT } if newpop.FollowFinalSymlink { oldVD.DecRef(ctx) @@ -318,7 +317,7 @@ func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentia if pop.Path.Absolute { return linuxerr.EEXIST } - return syserror.ENOENT + return linuxerr.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.MkdirAt: file creation paths can't follow final symlink") @@ -348,7 +347,7 @@ func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentia } // MknodAt creates a file of the given mode at the given path. It returns an -// error from the syserror package. +// error from the linuxerr package. func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *MknodOptions) error { if !pop.Path.Begin.Ok() { // pop.Path should not be empty in operations that create/delete files. @@ -356,7 +355,7 @@ func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentia if pop.Path.Absolute { return linuxerr.EEXIST } - return syserror.ENOENT + return linuxerr.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.MknodAt: file creation paths can't follow final symlink") @@ -494,7 +493,7 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti if oldpop.Path.Absolute { return linuxerr.EBUSY } - return syserror.ENOENT + return linuxerr.ENOENT } if oldpop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.RenameAt: source path can't follow final symlink") @@ -515,7 +514,7 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti if newpop.Path.Absolute { return linuxerr.EBUSY } - return syserror.ENOENT + return linuxerr.ENOENT } if newpop.FollowFinalSymlink { oldParentVD.DecRef(ctx) @@ -556,7 +555,7 @@ func (vfs *VirtualFilesystem) RmdirAt(ctx context.Context, creds *auth.Credentia if pop.Path.Absolute { return linuxerr.EBUSY } - return syserror.ENOENT + return linuxerr.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.RmdirAt: file deletion paths can't follow final symlink") @@ -639,7 +638,7 @@ func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credent if pop.Path.Absolute { return linuxerr.EEXIST } - return syserror.ENOENT + return linuxerr.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.SymlinkAt: file creation paths can't follow final symlink") @@ -673,7 +672,7 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti if pop.Path.Absolute { return linuxerr.EBUSY } - return syserror.ENOENT + return linuxerr.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.UnlinkAt: file deletion paths can't follow final symlink") diff --git a/pkg/sync/mutex_unsafe.go b/pkg/sync/mutex_unsafe.go index e00d9467d..e4701b464 100644 --- a/pkg/sync/mutex_unsafe.go +++ b/pkg/sync/mutex_unsafe.go @@ -3,8 +3,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build go1.13 && !go1.18 -// +build go1.13,!go1.18 +//go:build go1.13 && !go1.19 +// +build go1.13,!go1.19 // When updating the build constraint (above), check that syncMutex matches the // standard library sync.Mutex definition. diff --git a/pkg/sync/runtime_unsafe.go b/pkg/sync/runtime_unsafe.go index 49d4109a9..f6e6a4f7b 100644 --- a/pkg/sync/runtime_unsafe.go +++ b/pkg/sync/runtime_unsafe.go @@ -3,8 +3,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build go1.13 && !go1.18 -// +build go1.13,!go1.18 +//go:build go1.13 && !go1.19 +// +build go1.13,!go1.19 // //go:linkname directives type-checked by checklinkname. Any other // non-linkname assumptions outside the Go 1 compatibility guarantee should diff --git a/pkg/syserr/BUILD b/pkg/syserr/BUILD index ceee494fc..1cd5d641d 100644 --- a/pkg/syserr/BUILD +++ b/pkg/syserr/BUILD @@ -14,7 +14,6 @@ go_library( "//pkg/abi/linux/errno", "//pkg/errors", "//pkg/errors/linuxerr", - "//pkg/syserror", "//pkg/tcpip", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/pkg/syserr/syserr.go b/pkg/syserr/syserr.go index 558240008..a5e386e38 100644 --- a/pkg/syserr/syserr.go +++ b/pkg/syserr/syserr.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux/errno" "gvisor.dev/gvisor/pkg/errors" "gvisor.dev/gvisor/pkg/errors/linuxerr" - "gvisor.dev/gvisor/pkg/syserror" ) // Error represents an internal error. @@ -52,12 +51,12 @@ func New(message string, linuxTranslation errno.Errno) *Error { } e := error(unix.Errno(err.errno)) - // syserror.ErrWouldBlock gets translated to linuxerr.EWOULDBLOCK and + // linuxerr.ErrWouldBlock gets translated to linuxerr.EWOULDBLOCK and // enables proper blocking semantics. This should temporary address the // class of blocking bugs that keep popping up with the current state of // the error space. if err.errno == linuxerr.EWOULDBLOCK.Errno() { - e = syserror.ErrWouldBlock + e = linuxerr.ErrWouldBlock } linuxBackwardsTranslations[err.errno] = linuxBackwardsTranslation{err: e, ok: true} @@ -287,8 +286,14 @@ func FromError(err error) *Error { return FromHost(unix.Errno(linuxErr.Errno())) } - if errno, ok := syserror.TranslateError(err); ok { - return FromHost(errno) - } panic("unknown error: " + err.Error()) } + +// ConvertIntr converts the provided error code (err) to another one (intr) if +// the first error corresponds to an interrupted operation. +func ConvertIntr(err, intr error) error { + if err == linuxerr.ErrInterrupted { + return intr + } + return err +} diff --git a/pkg/syserror/BUILD b/pkg/syserror/BUILD deleted file mode 100644 index 76bee5a64..000000000 --- a/pkg/syserror/BUILD +++ /dev/null @@ -1,10 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "syserror", - srcs = ["syserror.go"], - visibility = ["//visibility:public"], - deps = ["@org_golang_x_sys//unix:go_default_library"], -) diff --git a/pkg/syserror/syserror.go b/pkg/syserror/syserror.go deleted file mode 100644 index b24edb364..000000000 --- a/pkg/syserror/syserror.go +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package syserror contains syscall error codes exported as error interface -// instead of Errno. This allows for fast comparison and returns when the -// comparand or return value is of type error because there is no need to -// convert from Errno to an interface, i.e., runtime.convT2I isn't called. -package syserror - -import ( - "errors" - - "golang.org/x/sys/unix" -) - -// The following variables have the same meaning as their syscall equivalent. -var ( - EIDRM = error(unix.EIDRM) - EINTR = error(unix.EINTR) - EIO = error(unix.EIO) - EISDIR = error(unix.EISDIR) - ENOENT = error(unix.ENOENT) - ENOEXEC = error(unix.ENOEXEC) - ENOMEM = error(unix.ENOMEM) - ENOTSOCK = error(unix.ENOTSOCK) - ENOSPC = error(unix.ENOSPC) - ENOSYS = error(unix.ENOSYS) -) - -var ( - // ErrWouldBlock is an internal error used to indicate that an operation - // cannot be satisfied immediately, and should be retried at a later - // time, possibly when the caller has received a notification that the - // operation may be able to complete. It is used by implementations of - // the kio.File interface. - ErrWouldBlock = errors.New("request would block") - - // ErrInterrupted is returned if a request is interrupted before it can - // complete. - ErrInterrupted = errors.New("request was interrupted") - - // ErrExceedsFileSizeLimit is returned if a request would exceed the - // file's size limit. - ErrExceedsFileSizeLimit = errors.New("exceeds file size limit") -) - -// errorMap is the map used to convert generic errors into errnos. -var errorMap = map[error]unix.Errno{} - -// errorUnwrappers is an array of unwrap functions to extract typed errors. -var errorUnwrappers = []func(error) (unix.Errno, bool){} - -// AddErrorTranslation allows modules to populate the error map by adding their -// own translations during initialization. Returns if the error translation is -// accepted or not. A pre-existing translation will not be overwritten by the -// new translation. -func AddErrorTranslation(from error, to unix.Errno) bool { - if _, ok := errorMap[from]; ok { - return false - } - - errorMap[from] = to - return true -} - -// AddErrorUnwrapper registers an unwrap method that can extract a concrete error -// from a typed, but not initialized, error. -func AddErrorUnwrapper(unwrap func(e error) (unix.Errno, bool)) { - errorUnwrappers = append(errorUnwrappers, unwrap) -} - -// TranslateError translates errors to errnos, it will return false if -// the error was not registered. -func TranslateError(from error) (unix.Errno, bool) { - if err, ok := errorMap[from]; ok { - return err, true - } - // Try to unwrap the error if we couldn't match an error - // exactly. This might mean that a package has its own - // error type. - for _, unwrap := range errorUnwrappers { - if err, ok := unwrap(from); ok { - return err, true - } - } - return 0, false -} - -// ConvertIntr converts the provided error code (err) to another one (intr) if -// the first error corresponds to an interrupted operation. -func ConvertIntr(err, intr error) error { - if err == ErrInterrupted { - return intr - } - return err -} - -// SyscallRestartErrno represents a ERESTART* errno defined in the Linux's kernel -// include/linux/errno.h. These errnos are never returned to userspace -// directly, but are used to communicate the expected behavior of an -// interrupted syscall from the syscall to signal handling. -type SyscallRestartErrno int - -// These numeric values are significant because ptrace syscall exit tracing can -// observe them. -// -// For all of the following errnos, if the syscall is not interrupted by a -// signal delivered to a user handler, the syscall is restarted. -const ( - // ERESTARTSYS is returned by an interrupted syscall to indicate that it - // should be converted to EINTR if interrupted by a signal delivered to a - // user handler without SA_RESTART set, and restarted otherwise. - ERESTARTSYS = SyscallRestartErrno(512) - - // ERESTARTNOINTR is returned by an interrupted syscall to indicate that it - // should always be restarted. - ERESTARTNOINTR = SyscallRestartErrno(513) - - // ERESTARTNOHAND is returned by an interrupted syscall to indicate that it - // should be converted to EINTR if interrupted by a signal delivered to a - // user handler, and restarted otherwise. - ERESTARTNOHAND = SyscallRestartErrno(514) - - // ERESTART_RESTARTBLOCK is returned by an interrupted syscall to indicate - // that it should be restarted using a custom function. The interrupted - // syscall must register a custom restart function by calling - // Task.SetRestartSyscallFn. - ERESTART_RESTARTBLOCK = SyscallRestartErrno(516) -) - -// Error implements error.Error. -func (e SyscallRestartErrno) Error() string { - // Descriptions are borrowed from strace. - switch e { - case ERESTARTSYS: - return "to be restarted if SA_RESTART is set" - case ERESTARTNOINTR: - return "to be restarted" - case ERESTARTNOHAND: - return "to be restarted if no handler" - case ERESTART_RESTARTBLOCK: - return "interrupted by signal" - default: - return "(unknown interrupt error)" - } -} - -// SyscallRestartErrnoFromReturn returns the SyscallRestartErrno represented by -// rv, the value in a syscall return register. -func SyscallRestartErrnoFromReturn(rv uintptr) (SyscallRestartErrno, bool) { - switch int(rv) { - case -int(ERESTARTSYS): - return ERESTARTSYS, true - case -int(ERESTARTNOINTR): - return ERESTARTNOINTR, true - case -int(ERESTARTNOHAND): - return ERESTARTNOHAND, true - case -int(ERESTART_RESTARTBLOCK): - return ERESTART_RESTARTBLOCK, true - default: - return 0, false - } -} - -func init() { - AddErrorTranslation(ErrWouldBlock, unix.EWOULDBLOCK) - AddErrorTranslation(ErrInterrupted, unix.EINTR) - AddErrorTranslation(ErrExceedsFileSizeLimit, unix.EFBIG) -} diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD index 4758a99ad..c3e4c3455 100644 --- a/pkg/tcpip/link/tun/BUILD +++ b/pkg/tcpip/link/tun/BUILD @@ -31,7 +31,6 @@ go_library( "//pkg/refs", "//pkg/refsvfs2", "//pkg/sync", - "//pkg/syserror", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/header", diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go index d23210503..fa2131c28 100644 --- a/pkg/tcpip/link/tun/device.go +++ b/pkg/tcpip/link/tun/device.go @@ -20,7 +20,6 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" @@ -174,7 +173,7 @@ func (d *Device) Write(data []byte) (int64, error) { return 0, linuxerr.EBADFD } if !endpoint.IsAttached() { - return 0, syserror.EIO + return 0, linuxerr.EIO } dataLen := int64(len(data)) @@ -249,7 +248,7 @@ func (d *Device) Read() ([]byte, error) { for { info, ok := endpoint.Read() if !ok { - return nil, syserror.ErrWouldBlock + return nil, linuxerr.ErrWouldBlock } v, ok := d.encodePkt(&info) diff --git a/pkg/tcpip/socketops.go b/pkg/tcpip/socketops.go index 5642c86f8..34ac62444 100644 --- a/pkg/tcpip/socketops.go +++ b/pkg/tcpip/socketops.go @@ -17,6 +17,7 @@ package tcpip import ( "sync/atomic" + "gvisor.dev/gvisor/pkg/atomicbitops" "gvisor.dev/gvisor/pkg/sync" ) @@ -56,6 +57,11 @@ type SocketOptionsHandler interface { // OnSetReceiveBufferSize is invoked by SO_RCVBUF and SO_RCVBUFFORCE. OnSetReceiveBufferSize(v, oldSz int64) (newSz int64) + + // WakeupWriters is invoked when the send buffer size for an endpoint is + // changed. The handler notifies the writers if the send buffer size is + // increased with setsockopt(2) for TCP endpoints. + WakeupWriters() } // DefaultSocketOptionsHandler is an embeddable type that implements no-op @@ -97,6 +103,9 @@ func (*DefaultSocketOptionsHandler) OnSetSendBufferSize(v int64) (newSz int64) { return v } +// WakeupWriters implements SocketOptionsHandler.WakeupWriters. +func (*DefaultSocketOptionsHandler) WakeupWriters() {} + // OnSetReceiveBufferSize implements SocketOptionsHandler.OnSetReceiveBufferSize. func (*DefaultSocketOptionsHandler) OnSetReceiveBufferSize(v, oldSz int64) (newSz int64) { return v @@ -207,24 +216,16 @@ type SocketOptions struct { // will not change. getSendBufferLimits GetSendBufferLimits `state:"manual"` - // sendBufSizeMu protects sendBufferSize and calls to - // handler.OnSetSendBufferSize. - sendBufSizeMu sync.Mutex `state:"nosave"` - // sendBufferSize determines the send buffer size for this socket. - sendBufferSize int64 + sendBufferSize atomicbitops.AlignedAtomicInt64 // getReceiveBufferLimits provides the handler to get the min, default and // max size for receive buffer. It is initialized at the creation time and // will not change. getReceiveBufferLimits GetReceiveBufferLimits `state:"manual"` - // receiveBufSizeMu protects receiveBufferSize and calls to - // handler.OnSetReceiveBufferSize. - receiveBufSizeMu sync.Mutex `state:"nosave"` - // receiveBufferSize determines the receive buffer size for this socket. - receiveBufferSize int64 + receiveBufferSize atomicbitops.AlignedAtomicInt64 // mu protects the access to the below fields. mu sync.Mutex `state:"nosave"` @@ -614,6 +615,11 @@ func (so *SocketOptions) SetBindToDevice(bindToDevice int32) Error { return nil } +// GetSendBufferSize gets value for SO_SNDBUF option. +func (so *SocketOptions) GetSendBufferSize() int64 { + return so.sendBufferSize.Load() +} + // SendBufferLimits returns the [min, max) range of allowable send buffer // sizes. func (so *SocketOptions) SendBufferLimits() (min, max int64) { @@ -621,22 +627,21 @@ func (so *SocketOptions) SendBufferLimits() (min, max int64) { return int64(limits.Min), int64(limits.Max) } -// GetSendBufferSize gets value for SO_SNDBUF option. -func (so *SocketOptions) GetSendBufferSize() int64 { - so.sendBufSizeMu.Lock() - defer so.sendBufSizeMu.Unlock() - return so.sendBufferSize -} - // SetSendBufferSize sets value for SO_SNDBUF option. notify indicates if the // stack handler should be invoked to set the send buffer size. func (so *SocketOptions) SetSendBufferSize(sendBufferSize int64, notify bool) { - so.sendBufSizeMu.Lock() - defer so.sendBufSizeMu.Unlock() if notify { sendBufferSize = so.handler.OnSetSendBufferSize(sendBufferSize) } - so.sendBufferSize = sendBufferSize + so.sendBufferSize.Store(sendBufferSize) + if notify { + so.handler.WakeupWriters() + } +} + +// GetReceiveBufferSize gets value for SO_RCVBUF option. +func (so *SocketOptions) GetReceiveBufferSize() int64 { + return so.receiveBufferSize.Load() } // ReceiveBufferLimits returns the [min, max) range of allowable receive buffer @@ -646,20 +651,12 @@ func (so *SocketOptions) ReceiveBufferLimits() (min, max int64) { return int64(limits.Min), int64(limits.Max) } -// GetReceiveBufferSize gets value for SO_RCVBUF option. -func (so *SocketOptions) GetReceiveBufferSize() int64 { - so.receiveBufSizeMu.Lock() - defer so.receiveBufSizeMu.Unlock() - return so.receiveBufferSize -} - // SetReceiveBufferSize sets the value of the SO_RCVBUF option, optionally // notifying the owning endpoint. func (so *SocketOptions) SetReceiveBufferSize(receiveBufferSize int64, notify bool) { - so.receiveBufSizeMu.Lock() - defer so.receiveBufSizeMu.Unlock() if notify { - receiveBufferSize = so.handler.OnSetReceiveBufferSize(receiveBufferSize, so.receiveBufferSize) + oldSz := so.receiveBufferSize.Load() + receiveBufferSize = so.handler.OnSetReceiveBufferSize(receiveBufferSize, oldSz) } - so.receiveBufferSize = receiveBufferSize + so.receiveBufferSize.Store(receiveBufferSize) } diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go index 9192d8433..29c22bfd4 100644 --- a/pkg/tcpip/stack/packet_buffer.go +++ b/pkg/tcpip/stack/packet_buffer.go @@ -282,14 +282,12 @@ func (pk *PacketBuffer) headerView(typ headerType) tcpipbuffer.View { return v } -// Clone makes a shallow copy of pk. -// -// Clone should be called in such cases so that no modifications is done to -// underlying packet payload. +// Clone makes a semi-deep copy of pk. The underlying packet payload is +// shared. Hence, no modifications is done to underlying packet payload. func (pk *PacketBuffer) Clone() *PacketBuffer { return &PacketBuffer{ PacketBufferEntry: pk.PacketBufferEntry, - buf: pk.buf, + buf: pk.buf.Clone(), reserved: pk.reserved, pushed: pk.pushed, consumed: pk.consumed, @@ -321,14 +319,14 @@ func (pk *PacketBuffer) Network() header.Network { } } -// CloneToInbound makes a shallow copy of the packet buffer to be used as an -// inbound packet. +// CloneToInbound makes a semi-deep copy of the packet buffer (similar to +// Clone) to be used as an inbound packet. // // See PacketBuffer.Data for details about how a packet buffer holds an inbound // packet. func (pk *PacketBuffer) CloneToInbound() *PacketBuffer { newPk := &PacketBuffer{ - buf: pk.buf, + buf: pk.buf.Clone(), // Treat unfilled header portion as reserved. reserved: pk.AvailableHeaderBytes(), } diff --git a/pkg/tcpip/stack/packet_buffer_test.go b/pkg/tcpip/stack/packet_buffer_test.go index a8da34992..87b023445 100644 --- a/pkg/tcpip/stack/packet_buffer_test.go +++ b/pkg/tcpip/stack/packet_buffer_test.go @@ -123,6 +123,32 @@ func TestPacketHeaderPush(t *testing.T) { } } +func TestPacketBufferClone(t *testing.T) { + data := concatViews(makeView(20), makeView(30), makeView(40)) + pk := NewPacketBuffer(PacketBufferOptions{ + // Make a copy of data to make sure our truth data won't be taint by + // PacketBuffer. + Data: buffer.NewViewFromBytes(data).ToVectorisedView(), + }) + + bytesToDelete := 30 + originalSize := data.Size() + + clonedPks := []*PacketBuffer{ + pk.Clone(), + pk.CloneToInbound(), + } + pk.Data().DeleteFront(bytesToDelete) + if got, want := pk.Data().Size(), originalSize-bytesToDelete; got != want { + t.Errorf("original packet was not changed: size expected = %d, got = %d", want, got) + } + for _, clonedPk := range clonedPks { + if got := clonedPk.Data().Size(); got != originalSize { + t.Errorf("cloned packet should not be modified: expected size = %d, got = %d", originalSize, got) + } + } +} + func TestPacketHeaderConsume(t *testing.T) { for _, test := range []struct { name string diff --git a/pkg/tcpip/stack/tcp.go b/pkg/tcpip/stack/tcp.go index 90a8ba6cf..93ea83cdc 100644 --- a/pkg/tcpip/stack/tcp.go +++ b/pkg/tcpip/stack/tcp.go @@ -386,6 +386,12 @@ type TCPSndBufState struct { // SndMTU is the smallest MTU seen in the control packets received. SndMTU int + + // AutoTuneSndBufDisabled indicates that the auto tuning of send buffer + // is disabled. + // + // Must be accessed using atomic operations. + AutoTuneSndBufDisabled uint32 } // TCPEndpointStateInner contains the members of TCPEndpointState used directly diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 044123185..355719beb 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -1717,6 +1717,27 @@ func (e *endpoint) OnSetReceiveBufferSize(rcvBufSz, oldSz int64) (newSz int64) { return rcvBufSz } +// OnSetSendBufferSize implements tcpip.SocketOptionsHandler.OnSetSendBufferSize. +func (e *endpoint) OnSetSendBufferSize(sz int64) int64 { + atomic.StoreUint32(&e.sndQueueInfo.TCPSndBufState.AutoTuneSndBufDisabled, 1) + return sz +} + +// WakeupWriters implements tcpip.SocketOptionsHandler.WakeupWriters. +func (e *endpoint) WakeupWriters() { + e.LockUser() + defer e.UnlockUser() + + sendBufferSize := e.getSendBufferSize() + e.sndQueueInfo.sndQueueMu.Lock() + notify := (sendBufferSize - e.sndQueueInfo.SndBufUsed) >= e.sndQueueInfo.SndBufUsed>>1 + e.sndQueueInfo.sndQueueMu.Unlock() + + if notify { + e.waiterQueue.Notify(waiter.WritableEvents) + } +} + // SetSockOptInt sets a socket option. func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error { // Lower 2 bits represents ECN bits. RFC 3168, section 23.1 @@ -2329,6 +2350,9 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) tcp e.segmentQueue.mu.Unlock() e.snd.updateMaxPayloadSize(int(e.route.MTU()), 0) e.setEndpointState(StateEstablished) + // Set the new auto tuned send buffer size after entering + // established state. + e.ops.SetSendBufferSize(e.computeTCPSendBufferSize(), false /* notify */) } if run { @@ -2763,13 +2787,20 @@ func (e *endpoint) updateSndBufferUsage(v int) { e.sndQueueInfo.sndQueueMu.Lock() notify := e.sndQueueInfo.SndBufUsed >= sendBufferSize>>1 e.sndQueueInfo.SndBufUsed -= v + + // Get the new send buffer size with auto tuning, but do not set it + // unless we decide to notify the writers. + newSndBufSz := e.computeTCPSendBufferSize() + // We only notify when there is half the sendBufferSize available after // a full buffer event occurs. This ensures that we don't wake up // writers to queue just 1-2 segments and go back to sleep. - notify = notify && e.sndQueueInfo.SndBufUsed < sendBufferSize>>1 + notify = notify && e.sndQueueInfo.SndBufUsed < int(newSndBufSz)>>1 e.sndQueueInfo.sndQueueMu.Unlock() if notify { + // Set the new send buffer size calculated from auto tuning. + e.ops.SetSendBufferSize(newSndBufSz, false /* notify */) e.waiterQueue.Notify(waiter.WritableEvents) } } @@ -3091,3 +3122,36 @@ func GetTCPReceiveBufferLimits(s tcpip.StackHandler) tcpip.ReceiveBufferSizeOpti Max: ss.Max, } } + +// computeTCPSendBufferSize implements auto tuning of send buffer size and +// returns the new send buffer size. +func (e *endpoint) computeTCPSendBufferSize() int64 { + curSndBufSz := int64(e.getSendBufferSize()) + + // Auto tuning is disabled when the user explicitly sets the send + // buffer size with SO_SNDBUF option. + if disabled := atomic.LoadUint32(&e.sndQueueInfo.TCPSndBufState.AutoTuneSndBufDisabled); disabled == 1 { + return curSndBufSz + } + + const packetOverheadFactor = 2 + curMSS := e.snd.MaxPayloadSize + numSeg := InitialCwnd + if numSeg < e.snd.SndCwnd { + numSeg = e.snd.SndCwnd + } + + // SndCwnd indicates the number of segments that can be sent. This means + // that the sender can send upto #SndCwnd segments and the send buffer + // size should be set to SndCwnd*MSS to accommodate sending of all the + // segments. + newSndBufSz := int64(numSeg * curMSS * packetOverheadFactor) + if newSndBufSz < curSndBufSz { + return curSndBufSz + } + if ss := GetTCPSendBufferLimits(e.stack); int64(ss.Max) < newSndBufSz { + newSndBufSz = int64(ss.Max) + } + + return newSndBufSz +} diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 92a66f17e..64302f576 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -1415,9 +1415,6 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) { ackLeft -= datalen } - // Update the send buffer usage and notify potential waiters. - s.ep.updateSndBufferUsage(int(acked)) - // Clear SACK information for all acked data. s.ep.scoreboard.Delete(s.SndUna) @@ -1437,6 +1434,9 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) { } } + // Update the send buffer usage and notify potential waiters. + s.ep.updateSndBufferUsage(int(acked)) + // It is possible for s.outstanding to drop below zero if we get // a retransmit timeout, reset outstanding to zero but later // get an ack that cover previously sent data. diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 031f01357..db6b0955a 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -7964,3 +7964,95 @@ func generateRandomPayload(t *testing.T, n int) []byte { } return buf } + +func TestSendBufferTuning(t *testing.T) { + const maxPayload = 536 + const mtu = header.TCPMinimumSize + header.IPv4MinimumSize + maxTCPOptionSize + maxPayload + const packetOverheadFactor = 2 + + testCases := []struct { + name string + autoTuningDisabled bool + }{ + {"autoTuningDisabled", true}, + {"autoTuningEnabled", false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c := context.New(t, mtu) + defer c.Cleanup() + + // Set the stack option for send buffer size. + const defaultSndBufSz = maxPayload * tcp.InitialCwnd + const maxSndBufSz = defaultSndBufSz * 10 + { + opt := tcpip.TCPSendBufferSizeRangeOption{Min: 1, Default: defaultSndBufSz, Max: maxSndBufSz} + if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { + t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err) + } + } + + c.CreateConnected(context.TestInitialSequenceNumber, 30000, -1 /* epRcvBuf */) + + oldSz := c.EP.SocketOptions().GetSendBufferSize() + if oldSz != defaultSndBufSz { + t.Fatalf("Wrong send buffer size got %d want %d", oldSz, defaultSndBufSz) + } + + if tc.autoTuningDisabled { + c.EP.SocketOptions().SetSendBufferSize(defaultSndBufSz, true /* notify */) + } + + data := make([]byte, maxPayload) + for i := range data { + data[i] = byte(i) + } + + w, ch := waiter.NewChannelEntry(nil) + c.WQ.EventRegister(&w, waiter.WritableEvents) + defer c.WQ.EventUnregister(&w) + + bytesRead := 0 + for { + // Packets will be sent till the send buffer + // size is reached. + var r bytes.Reader + r.Reset(data[bytesRead : bytesRead+maxPayload]) + _, err := c.EP.Write(&r, tcpip.WriteOptions{}) + if cmp.Equal(&tcpip.ErrWouldBlock{}, err) { + break + } + + c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, 0) + bytesRead += maxPayload + data = append(data, data...) + } + + // Send an ACK and wait for connection to become writable again. + c.SendAck(seqnum.Value(context.TestInitialSequenceNumber).Add(1), bytesRead) + select { + case <-ch: + if err := c.EP.LastError(); err != nil { + t.Fatalf("Write failed: %s", err) + } + case <-time.After(1 * time.Second): + t.Fatalf("Timed out waiting for connection") + } + + outSz := int64(defaultSndBufSz) + if !tc.autoTuningDisabled { + // Calculate the new auto tuned send buffer. + var info tcpip.TCPInfoOption + if err := c.EP.GetSockOpt(&info); err != nil { + t.Fatalf("GetSockOpt failed: %v", err) + } + outSz = (int64(info.SndCwnd) * packetOverheadFactor * (maxPayload)) + } + + if newSz := c.EP.SocketOptions().GetSendBufferSize(); newSz != outSz { + t.Fatalf("Wrong send buffer size, got %d want %d", newSz, outSz) + } + }) + } +} diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 82a3f2287..108580508 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -266,7 +266,7 @@ func (e *endpoint) Close() { for mem := range e.multicastMemberships { e.stack.LeaveGroup(e.NetProto, mem.nicID, mem.multicastAddr) } - e.multicastMemberships = make(map[multicastMembership]struct{}) + e.multicastMemberships = nil // Close the receive list and drain it. e.rcvMu.Lock() diff --git a/pkg/usermem/usermem.go b/pkg/usermem/usermem.go index cde1038ed..f46a00e42 100644 --- a/pkg/usermem/usermem.go +++ b/pkg/usermem/usermem.go @@ -429,7 +429,7 @@ type IOSequence struct { // return 0, nil // } // if f.availableBytes == 0 { -// return 0, syserror.ErrWouldBlock +// return 0, linuxerr.ErrWouldBlock // } // return ioseq.CopyOutFrom(..., reader) // diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index e5b0ec3ae..ae32b86e6 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -57,20 +57,12 @@ const ( // ContMgrExecuteAsync executes a command in a container. ContMgrExecuteAsync = "containerManager.ExecuteAsync" - // ContMgrPause pauses the sandbox (note that individual containers cannot be - // paused). - ContMgrPause = "containerManager.Pause" - // ContMgrProcesses lists processes running in a container. ContMgrProcesses = "containerManager.Processes" // ContMgrRestore restores a container from a statefile. ContMgrRestore = "containerManager.Restore" - // ContMgrResume unpauses the paused sandbox (note that individual containers - // cannot be resumed). - ContMgrResume = "containerManager.Resume" - // ContMgrSignal sends a signal to a container. ContMgrSignal = "containerManager.Signal" @@ -111,6 +103,29 @@ const ( LoggingChange = "Logging.Change" ) +// Lifecycle related commands (see lifecycle.go for more details). +const ( + LifecyclePause = "Lifecycle.Pause" + LifecycleResume = "Lifecycle.Resume" +) + +// Filesystem related commands (see fs.go for more details). +const ( + FsCat = "Fs.Cat" +) + +// Usage related commands (see usage.go for more details). +const ( + UsageCollect = "Usage.Collect" + UsageUsageFD = "Usage.UsageFD" + UsageReduce = "Usage.Reduce" +) + +// Events related commands (see events.go for more details). +const ( + EventsAttachDebugEmitter = "Events.AttachDebugEmitter" +) + // ControlSocketAddr generates an abstract unix socket name for the given ID. func ControlSocketAddr(id string) string { return fmt.Sprintf("\x00runsc-sandbox.%s", id) @@ -151,7 +166,11 @@ func newController(fd int, l *Loader) (*controller, error) { } ctrl.srv.Register(&debug{}) + ctrl.srv.Register(&control.Events{}) ctrl.srv.Register(&control.Logging{}) + ctrl.srv.Register(&control.Lifecycle{l.k}) + ctrl.srv.Register(&control.Fs{l.k}) + ctrl.srv.Register(&control.Usage{l.k}) if l.root.conf.ProfileEnable { ctrl.srv.Register(control.NewProfile(l.k)) @@ -340,17 +359,6 @@ func (cm *containerManager) Checkpoint(o *control.SaveOpts, _ *struct{}) error { return state.Save(o, nil) } -// Pause suspends a sandbox. -func (cm *containerManager) Pause(_, _ *struct{}) error { - log.Debugf("containerManager.Pause") - // TODO(gvisor.dev/issues/6243): save/restore not supported w/ hostinet - if cm.l.root.conf.Network == config.NetworkHost { - return errors.New("pause not supported when using hostinet") - } - cm.l.k.Pause() - return nil -} - // RestoreOpts contains options related to restoring a container's file system. type RestoreOpts struct { // FilePayload contains the state file to be restored, followed by the @@ -482,13 +490,6 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { return nil } -// Resume unpauses a sandbox. -func (cm *containerManager) Resume(_, _ *struct{}) error { - log.Debugf("containerManager.Resume") - cm.l.k.Unpause() - return nil -} - // Wait waits for the init process in the given container. func (cm *containerManager) Wait(cid *string, waitStatus *uint32) error { log.Debugf("containerManager.Wait, cid: %s", *cid) diff --git a/runsc/boot/strace.go b/runsc/boot/strace.go index c21648a32..cf5be34cd 100644 --- a/runsc/boot/strace.go +++ b/runsc/boot/strace.go @@ -35,9 +35,14 @@ func enableStrace(conf *config.Config) error { } strace.LogMaximumSize = max + sink := strace.SinkTypeLog + if conf.StraceEvent { + sink = strace.SinkTypeEvent + } + if len(conf.StraceSyscalls) == 0 { - strace.EnableAll(strace.SinkTypeLog) + strace.EnableAll(sink) return nil } - return strace.Enable(strings.Split(conf.StraceSyscalls, ","), strace.SinkTypeLog) + return strace.Enable(strings.Split(conf.StraceSyscalls, ","), sink) } diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index 39c8ff603..c5e32807d 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -36,6 +36,7 @@ go_library( "statefile.go", "symbolize.go", "syscalls.go", + "usage.go", "verity_prepare.go", "wait.go", ], @@ -95,10 +96,10 @@ go_test( "//runsc/config", "//runsc/container", "//runsc/mitigate", - "//runsc/mitigate/mock", "//runsc/specutils", "@com_github_google_go_cmp//cmp:go_default_library", "@com_github_google_go_cmp//cmp/cmpopts:go_default_library", + "@com_github_google_subcommands//:go_default_library", "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", "@com_github_syndtr_gocapability//capability:go_default_library", ], diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go index da81cf048..f773ccca0 100644 --- a/runsc/cmd/debug.go +++ b/runsc/cmd/debug.go @@ -48,6 +48,7 @@ type Debug struct { delay time.Duration duration time.Duration ps bool + cat stringSlice } // Name implements subcommands.Command. @@ -81,6 +82,7 @@ func (d *Debug) SetFlags(f *flag.FlagSet) { f.StringVar(&d.logLevel, "log-level", "", "The log level to set: warning (0), info (1), or debug (2).") f.StringVar(&d.logPackets, "log-packets", "", "A boolean value to enable or disable packet logging: true or false.") f.BoolVar(&d.ps, "ps", false, "lists processes") + f.Var(&d.cat, "cat", "reads files and print to standard output") } // Execute implements subcommands.Command.Execute. @@ -367,5 +369,11 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) return subcommands.ExitFailure } + if d.cat != nil { + if err := c.Cat(d.cat, os.Stdout); err != nil { + return Errorf("Cat failed: %v", err) + } + } + return subcommands.ExitSuccess } diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go index c1d029d7f..08246e543 100644 --- a/runsc/cmd/events.go +++ b/runsc/cmd/events.go @@ -33,6 +33,10 @@ type Events struct { intervalSec int // If true, events will print a single group of stats and exit. stats bool + // If true, events will dump all filtered events to stdout. + stream bool + // filters for streamed events. + filters stringSlice } // Name implements subcommands.Command.Name. @@ -62,6 +66,8 @@ OPTIONS: func (evs *Events) SetFlags(f *flag.FlagSet) { f.IntVar(&evs.intervalSec, "interval", 5, "set the stats collection interval, in seconds") f.BoolVar(&evs.stats, "stats", false, "display the container's stats then exit") + f.BoolVar(&evs.stream, "stream", false, "dump all filtered events to stdout") + f.Var(&evs.filters, "filters", "only display matching events") } // Execute implements subcommands.Command.Execute. @@ -79,6 +85,13 @@ func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interfa Fatalf("loading sandbox: %v", err) } + if evs.stream { + if err := c.Stream(evs.filters, os.Stdout); err != nil { + Fatalf("Stream failed: %v", err) + } + return subcommands.ExitSuccess + } + // Repeatedly get stats from the container. for { // Get the event and print it as JSON. diff --git a/runsc/cmd/mitigate.go b/runsc/cmd/mitigate.go index f4e65adb8..1aada5968 100644 --- a/runsc/cmd/mitigate.go +++ b/runsc/cmd/mitigate.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "io/ioutil" + "os" "runtime" "github.com/google/subcommands" @@ -29,8 +30,8 @@ import ( const ( // cpuInfo is the path used to parse CPU info. cpuInfo = "/proc/cpuinfo" - // allPossibleCPUs is the path used to enable CPUs. - allPossibleCPUs = "/sys/devices/system/cpu/possible" + // Path to enable/disable SMT. + smtPath = "/sys/devices/system/cpu/smt/control" ) // Mitigate implements subcommands.Command for the "mitigate" command. @@ -39,10 +40,10 @@ type Mitigate struct { dryRun bool // Reverse mitigate by turning on all CPU cores. reverse bool - // Path to file to read to create CPUSet. - path string // Extra data for post mitigate operations. data string + // Control to mitigate/reverse smt. + control machineControl } // Name implements subcommands.command.name. @@ -56,12 +57,12 @@ func (*Mitigate) Synopsis() string { } // Usage implements Usage for cmd.Mitigate. -func (m Mitigate) Usage() string { +func (m *Mitigate) Usage() string { return fmt.Sprintf(`mitigate [flags] -mitigate mitigates a system to the "MDS" vulnerability by implementing a manual shutdown of SMT. The command checks /proc/cpuinfo for cpus having the MDS vulnerability, and if found, shutdown all but one CPU per hyperthread pair via /sys/devices/system/cpu/cpu{N}/online. CPUs can be restored by writing "2" to each file in /sys/devices/system/cpu/cpu{N}/online or performing a system reboot. +mitigate mitigates a system to the "MDS" vulnerability by writing "off" to %q. CPUs can be restored by writing "on" to the same file or rebooting your system. -The command can be reversed with --reverse, which reads the total CPUs from /sys/devices/system/cpu/possible and enables all with /sys/devices/system/cpu/cpu{N}/online.%s`, m.usage()) +The command can be reversed with --reverse, which writes "on" to the file above.%s`, smtPath, m.usage()) } // SetFlags sets flags for the command Mitigate. @@ -74,104 +75,110 @@ func (m *Mitigate) SetFlags(f *flag.FlagSet) { // Execute implements subcommands.Command.Execute. func (m *Mitigate) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { if runtime.GOARCH == "arm64" || runtime.GOARCH == "arm" { - log.Warningf("As ARM is not affected by MDS, mitigate does not support") - return subcommands.ExitFailure + log.Warningf("As ARM is not affected by MDS, mitigate does not support ARM machines.") + // Set reverse flag so that we still perform post mitigate operations. mitigate reverse is a noop in this case. + m.reverse = true } if f.NArg() != 0 { f.Usage() return subcommands.ExitUsageError } + m.control = &machineControlImpl{} + return m.execute() +} - m.path = cpuInfo - if m.reverse { - m.path = allPossibleCPUs +// execute executes mitigate operations. Seperate from Execute method for +// easier mocking. +func (m *Mitigate) execute() subcommands.ExitStatus { + beforeSet, err := m.control.getCPUs() + if err != nil { + return Errorf("Get before CPUSet failed: %v", err) } + log.Infof("CPUs before: %s", beforeSet.String()) - set, err := m.doExecute() - if err != nil { - return Errorf("Execute failed: %v", err) + if err := m.doEnableDisable(beforeSet); err != nil { + return Errorf("Enabled/Disable action failed on %q: %v", smtPath, err) } - if m.data == "" { - return subcommands.ExitSuccess + afterSet, err := m.control.getCPUs() + if err != nil { + return Errorf("Get after CPUSet failed: %v", err) } + log.Infof("CPUs after: %s", afterSet.String()) - if err = m.postMitigate(set); err != nil { + if err = m.postMitigate(afterSet); err != nil { return Errorf("Post Mitigate failed: %v", err) } return subcommands.ExitSuccess } -// Execute executes the Mitigate command. -func (m *Mitigate) doExecute() (mitigate.CPUSet, error) { - if m.dryRun { - log.Infof("Running with DryRun. No cpu settings will be changed.") - } - data, err := ioutil.ReadFile(m.path) - if err != nil { - return nil, fmt.Errorf("failed to read %s: %w", m.path, err) - } +// doEnableDisable does either enable or disable operation based on flags. +func (m *Mitigate) doEnableDisable(set mitigate.CPUSet) error { if m.reverse { - set, err := m.doReverse(data) - if err != nil { - return nil, fmt.Errorf("reverse operation failed: %w", err) + if m.dryRun { + log.Infof("Skipping reverse action because dryrun is set.") + return nil } - return set, nil + return m.control.enable() } - set, err := m.doMitigate(data) - if err != nil { - return nil, fmt.Errorf("mitigate operation failed: %w", err) + if m.dryRun { + log.Infof("Skipping mitigate action because dryrun is set.") + return nil } - return set, nil + if set.IsVulnerable() { + return m.control.disable() + } + log.Infof("CPUs not vulnerable. Skipping disable call.") + return nil } -func (m *Mitigate) doMitigate(data []byte) (mitigate.CPUSet, error) { - set, err := mitigate.NewCPUSet(data) - if err != nil { - return nil, err - } +// Interface to wrap interactions with underlying machine. Done +// so testing with mocks can be done hermetically. +type machineControl interface { + enable() error + disable() error + isEnabled() (bool, error) + getCPUs() (mitigate.CPUSet, error) +} - log.Infof("Mitigate found the following CPUs...") - log.Infof("%s", set) +// Implementation of SMT control interaction with the underlying machine. +type machineControlImpl struct{} - disableList := set.GetShutdownList() - log.Infof("Disabling threads on thread pairs.") - for _, t := range disableList { - log.Infof("Disable thread: %s", t) - if m.dryRun { - continue - } - if err := t.Disable(); err != nil { - return nil, fmt.Errorf("error disabling thread: %s err: %w", t, err) - } - } - log.Infof("Shutdown successful.") - return set, nil +func (*machineControlImpl) enable() error { + return checkFileExistsOnWrite("enable", "on") } -func (m *Mitigate) doReverse(data []byte) (mitigate.CPUSet, error) { - set, err := mitigate.NewCPUSetFromPossible(data) - if err != nil { - return nil, err - } +func (*machineControlImpl) disable() error { + return checkFileExistsOnWrite("disable", "off") +} - log.Infof("Reverse mitigate found the following CPUs...") - log.Infof("%s", set) +// Writes data to SMT control. If file not found, logs file not exist error and returns nil +// error, which is done because machines without the file pointed to by smtPath only have one +// thread per core in the first place. Otherwise returns error from ioutil.WriteFile. +func checkFileExistsOnWrite(op, data string) error { + err := ioutil.WriteFile(smtPath, []byte(data), 0644) + if err != nil && os.IsExist(err) { + log.Infof("File %q does not exist for operation %s. This machine probably has no smt control.", smtPath, op) + return nil + } + return err +} - enableList := set.GetRemainingList() +func (*machineControlImpl) isEnabled() (bool, error) { + data, err := ioutil.ReadFile(cpuInfo) + return string(data) == "on", err +} - log.Infof("Enabling all CPUs...") - for _, t := range enableList { - log.Infof("Enabling thread: %s", t) - if m.dryRun { - continue - } - if err := t.Enable(); err != nil { - return nil, fmt.Errorf("error enabling thread: %s err: %w", t, err) - } +func (*machineControlImpl) getCPUs() (mitigate.CPUSet, error) { + data, err := ioutil.ReadFile(cpuInfo) + if err != nil { + return nil, fmt.Errorf("failed to read %s: %w", cpuInfo, err) + } + set, err := mitigate.NewCPUSet(string(data)) + if err != nil { + return nil, fmt.Errorf("getCPUs: %v", err) } - log.Infof("Enable successful.") return set, nil } diff --git a/runsc/cmd/mitigate_test.go b/runsc/cmd/mitigate_test.go index 51755d9f3..294fc645c 100644 --- a/runsc/cmd/mitigate_test.go +++ b/runsc/cmd/mitigate_test.go @@ -18,144 +18,133 @@ package cmd import ( - "fmt" - "io/ioutil" - "os" - "strings" "testing" - "gvisor.dev/gvisor/runsc/mitigate/mock" + "github.com/google/subcommands" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/mitigate" ) -type executeTestCase struct { - name string - mitigateData string - mitigateError error - mitigateCPU int - reverseData string - reverseError error - reverseCPU int +type mockMachineControl struct { + enabled bool + cpus mitigate.CPUSet } -func TestExecute(t *testing.T) { +func (m *mockMachineControl) enable() error { + m.enabled = true + return nil +} - partial := `processor : 1 -vendor_id : AuthenticAMD -cpu family : 23 -model : 49 -model name : AMD EPYC 7B12 -physical id : 0 -bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass -power management: -` +func (m *mockMachineControl) disable() error { + if m.cpus.IsVulnerable() { + m.enabled = false + } + return nil +} +func (m *mockMachineControl) isEnabled() (bool, error) { + return m.enabled, nil +} + +func (m *mockMachineControl) getCPUs() (mitigate.CPUSet, error) { + set := m.cpus + if !m.enabled { + set = m.cpus[:len(m.cpus)/2] + } + + // Instead of just returning the created CPU set stored in this struct, call + // NewCPUSet to exercise that code path as the machineControlImpl would. + return mitigate.NewCPUSet(set.String()) +} + +type executeTestCase struct { + name string + cpu mitigate.MockCPU + mitigateWantCPUs int + mitigateError subcommands.ExitStatus + mitigateWantEnabled bool + reverseWantCPUs int + reverseError subcommands.ExitStatus + reverseWantEnabled bool + dryrun bool +} + +func TestExecute(t *testing.T) { for _, tc := range []executeTestCase{ { - name: "CascadeLake4", - mitigateData: mock.CascadeLake4.MakeCPUString(), - mitigateCPU: 2, - reverseData: mock.CascadeLake4.MakeSysPossibleString(), - reverseCPU: 4, + name: "CascadeLake4", + cpu: mitigate.CascadeLake4, + mitigateWantCPUs: 2, + mitigateWantEnabled: false, + reverseWantCPUs: 4, + reverseWantEnabled: true, }, { - name: "Empty", - mitigateData: "", - mitigateError: fmt.Errorf(`mitigate operation failed: no cpus found for: ""`), - reverseData: "", - reverseError: fmt.Errorf(`reverse operation failed: mismatch regex from possible: ""`), + name: "CascadeLake4DryRun", + cpu: mitigate.CascadeLake4, + mitigateWantCPUs: 4, + mitigateWantEnabled: true, + reverseWantCPUs: 4, + reverseWantEnabled: true, + dryrun: true, }, { - name: "Partial", - mitigateData: `processor : 0 -vendor_id : AuthenticAMD -cpu family : 23 -model : 49 -model name : AMD EPYC 7B12 -physical id : 0 -core id : 0 -cpu cores : 1 -bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass -power management::84 - -` + partial, - mitigateError: fmt.Errorf(`mitigate operation failed: failed to match key "core id": %q`, partial), - reverseData: "1-", - reverseError: fmt.Errorf(`reverse operation failed: mismatch regex from possible: %q`, "1-"), + name: "AMD8", + cpu: mitigate.AMD8, + mitigateWantCPUs: 8, + mitigateWantEnabled: true, + reverseWantCPUs: 8, + reverseWantEnabled: true, + }, + { + name: "Empty", + cpu: mitigate.Empty, + mitigateError: Errorf(`mitigate operation failed: no cpus found for: ""`), + reverseError: Errorf(`mitigate operation failed: no cpus found for: ""`), }, } { t.Run(tc.name, func(t *testing.T) { + set := tc.cpu.MakeCPUSet() m := &Mitigate{ - dryRun: true, + control: &mockMachineControl{ + enabled: true, + cpus: set, + }, + dryRun: tc.dryrun, } - m.doExecuteTest(t, "Mitigate", tc.mitigateData, tc.mitigateCPU, tc.mitigateError) + t.Run("Mitigate", func(t *testing.T) { + m.doExecuteTest(t, tc.mitigateWantEnabled, tc.mitigateWantCPUs, tc.mitigateError) + }) m.reverse = true - m.doExecuteTest(t, "Reverse", tc.reverseData, tc.reverseCPU, tc.reverseError) + t.Run("Reverse", func(t *testing.T) { + m.doExecuteTest(t, tc.reverseWantEnabled, tc.reverseWantCPUs, tc.reverseError) + }) }) } } -func TestExecuteSmoke(t *testing.T) { - smokeMitigate, err := ioutil.ReadFile(cpuInfo) - if err != nil { - t.Fatalf("Failed to read %s: %v", cpuInfo, err) +// doExecuteTest runs Execute with the mitigate operation and reverse operation. +func (m *Mitigate) doExecuteTest(t *testing.T, wantEnabled bool, wantCPUs int, wantErr subcommands.ExitStatus) { + subError := m.execute() + if subError != wantErr { + t.Fatalf("Mitigate error mismatch: want: %v got: %v", wantErr, subError) } - m := &Mitigate{ - dryRun: true, + // case where test should end in error or we don't care + // about how many cpus are returned. + if wantErr != subcommands.ExitSuccess { + log.Infof("return") + return } - m.doExecuteTest(t, "Mitigate", string(smokeMitigate), 0, nil) - - smokeReverse, err := ioutil.ReadFile(allPossibleCPUs) - if err != nil { - t.Fatalf("Failed to read %s: %v", allPossibleCPUs, err) + gotEnabled, _ := m.control.isEnabled() + if wantEnabled != gotEnabled { + t.Fatalf("Incorrect enabled state: want: %t got: %t", wantEnabled, gotEnabled) } - m.reverse = true - m.doExecuteTest(t, "Reverse", string(smokeReverse), 0, nil) -} - -// doExecuteTest runs Execute with the mitigate operation and reverse operation. -func (m *Mitigate) doExecuteTest(t *testing.T, name, data string, want int, wantErr error) { - t.Run(name, func(t *testing.T) { - file, err := ioutil.TempFile("", "outfile.txt") - if err != nil { - t.Fatalf("Failed to create tmpfile: %v", err) - } - defer os.Remove(file.Name()) - - if _, err := file.WriteString(data); err != nil { - t.Fatalf("Failed to write to file: %v", err) - } - - // Set fields for mitigate and dryrun to keep test hermetic. - m.path = file.Name() - - set, err := m.doExecute() - if err = checkErr(wantErr, err); err != nil { - t.Fatalf("Mitigate error mismatch: %v", err) - } - - // case where test should end in error or we don't care - // about how many cpus are returned. - if wantErr != nil || want < 1 { - return - } - got := len(set.GetRemainingList()) - if want != got { - t.Fatalf("Failed wrong number of remaining CPUs: want %d, got %d", want, got) - } - - }) -} - -// checkErr checks error for equality. -func checkErr(want, got error) error { - switch { - case want == nil && got == nil: - case want == nil || got == nil || want.Error() != strings.Trim(got.Error(), " "): - return fmt.Errorf("got: %v want: %v", got, want) + gotCPUs, _ := m.control.getCPUs() + if len(gotCPUs) != wantCPUs { + t.Fatalf("Incorrect number of CPUs: want: %d got: %d", wantCPUs, len(gotCPUs)) } - return nil } diff --git a/runsc/cmd/usage.go b/runsc/cmd/usage.go new file mode 100644 index 000000000..d2aeafa28 --- /dev/null +++ b/runsc/cmd/usage.go @@ -0,0 +1,93 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "os" + + "github.com/google/subcommands" + "gvisor.dev/gvisor/runsc/config" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" +) + +// Usage implements subcommands.Command for the "usage" command. +type Usage struct { + full bool + fd bool +} + +// Name implements subcommands.Command.Name. +func (*Usage) Name() string { + return "usage" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Usage) Synopsis() string { + return "Usage shows application memory usage across various categories in bytes." +} + +// Usage implements subcommands.Command.Usage. +func (*Usage) Usage() string { + return `usage [flags] <container id> - print memory usages to standard output.` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (u *Usage) SetFlags(f *flag.FlagSet) { + f.BoolVar(&u.full, "full", false, "enumerate all usage by categories") + f.BoolVar(&u.fd, "fd", false, "retrieves a subset of usage through the established usage FD") +} + +// Execute implements subcommands.Command.Execute. +func (u *Usage) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() < 1 { + f.Usage() + return subcommands.ExitUsageError + } + + id := f.Arg(0) + conf := args[0].(*config.Config) + + cont, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) + if err != nil { + Fatalf("loading container: %v", err) + } + + if !u.fd { + m, err := cont.Usage(u.full) + if err != nil { + Fatalf("usage failed: %v", err) + } + if err := json.NewEncoder(os.Stdout).Encode(m); err != nil { + Fatalf("Encode MemoryUsage failed: %v", err) + } + } else { + m, err := cont.UsageFD() + if err != nil { + Fatalf("usagefd failed: %v", err) + } + + mapped, unknown, total, err := m.Fetch() + if err != nil { + Fatalf("Fetch memory usage failed: %v", err) + } + + fmt.Printf("Mapped %v, Unknown %v, Total %v\n", mapped, unknown, total) + } + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/verity_prepare.go b/runsc/cmd/verity_prepare.go index 85d762a51..44c1d05db 100644 --- a/runsc/cmd/verity_prepare.go +++ b/runsc/cmd/verity_prepare.go @@ -82,7 +82,7 @@ func (c *VerityPrepare) Execute(_ context.Context, f *flag.FlagSet, args ...inte }, Process: &specs.Process{ Cwd: absRoot, - Args: []string{c.tool, "--path", "/verityroot"}, + Args: []string{c.tool, "--path", "/verityroot", "--rawpath", "/rawroot"}, Env: os.Environ(), Capabilities: specutils.AllCapabilities(), }, @@ -94,6 +94,11 @@ func (c *VerityPrepare) Execute(_ context.Context, f *flag.FlagSet, args ...inte Type: "bind", Options: []string{"verity.roothash="}, }, + { + Source: c.dir, + Destination: "/rawroot", + Type: "bind", + }, }, } diff --git a/runsc/config/config.go b/runsc/config/config.go index cc4650180..b811a170a 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -117,6 +117,10 @@ type Config struct { // StraceLogSize is the max size of data blobs to display. StraceLogSize uint `flag:"strace-log-size"` + // StraceEvent indicates sending strace to events if true. Strace is + // sent to log if false. + StraceEvent bool `flag:"strace-event"` + // DisableSeccomp indicates whether seccomp syscall filters should be // disabled. Pardon the double negation, but default to enabled is important. DisableSeccomp bool diff --git a/runsc/config/flags.go b/runsc/config/flags.go index 6f1b5927a..8fde31167 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -56,6 +56,7 @@ func RegisterFlags() { flag.Bool("strace", false, "enable strace.") flag.String("strace-syscalls", "", "comma-separated list of syscalls to trace. If --strace is true and this list is empty, then all syscalls will be traced.") flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs.") + flag.Bool("strace-event", false, "send strace to event.") // Flags that control sandbox runtime behavior. flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm.") diff --git a/runsc/container/container.go b/runsc/container/container.go index 6a9a07afe..50b0dd5e7 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -646,6 +646,36 @@ func (c *Container) Resume() error { return c.saveLocked() } +// Cat prints out the content of the files. +func (c *Container) Cat(files []string, out *os.File) error { + log.Debugf("Cat in container, cid: %s, files: %+v", c.ID, files) + return c.Sandbox.Cat(c.ID, files, out) +} + +// Usage displays memory used by the application. +func (c *Container) Usage(full bool) (control.MemoryUsage, error) { + log.Debugf("Usage in container, cid: %s, full: %v", c.ID, full) + return c.Sandbox.Usage(c.ID, full) +} + +// UsageFD shows application memory usage using two donated FDs. +func (c *Container) UsageFD() (*control.MemoryUsageRecord, error) { + log.Debugf("UsageFD in container, cid: %s", c.ID) + return c.Sandbox.UsageFD(c.ID) +} + +// Reduce requests that the sentry attempt to reduce its memory usage. +func (c *Container) Reduce(wait bool) error { + log.Debugf("Reduce in container, cid: %s", c.ID) + return c.Sandbox.Reduce(c.ID, wait) +} + +// Stream dumps all events to out. +func (c *Container) Stream(filters []string, out *os.File) error { + log.Debugf("Stream in container, cid: %s", c.ID) + return c.Sandbox.Stream(c.ID, filters, out) +} + // State returns the metadata of the container. func (c *Container) State() specs.State { return specs.State{ diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 5fb4a3672..681f5c1a9 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -442,6 +442,11 @@ func configs(t *testing.T, opts ...configOption) map[string]*config.Config { return all } +// sleepSpec generates a spec with sleep 1000 and a conf. +func sleepSpecConf(t *testing.T) (*specs.Spec, *config.Config) { + return testutil.NewSpecWithArgs("sleep", "1000"), testutil.TestConfig(t) +} + // TestLifecycle tests the basic Create/Start/Signal/Destroy container lifecycle. // It verifies after each step that the container can be loaded from disk, and // has the correct status. @@ -455,7 +460,7 @@ func TestLifecycle(t *testing.T) { t.Run(name, func(t *testing.T) { // The container will just sleep for a long time. We will kill it before // it finishes sleeping. - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, _ := sleepSpecConf(t) rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { @@ -903,7 +908,7 @@ func TestExecProcList(t *testing.T) { for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { const uid = 343 - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, _ := sleepSpecConf(t) _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { @@ -1422,8 +1427,7 @@ func TestPauseResume(t *testing.T) { // with calls to pause and resume and that pausing and resuming only // occurs given the correct state. func TestPauseResumeStatus(t *testing.T) { - spec := testutil.NewSpecWithArgs("sleep", "20") - conf := testutil.TestConfig(t) + spec, conf := sleepSpecConf(t) _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -1490,7 +1494,7 @@ func TestCapabilities(t *testing.T) { for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, _ := sleepSpecConf(t) rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -1640,7 +1644,7 @@ func TestMountNewDir(t *testing.T) { func TestReadonlyRoot(t *testing.T) { for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, _ := sleepSpecConf(t) spec.Root.Readonly = true _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) @@ -1692,7 +1696,7 @@ func TestReadonlyMount(t *testing.T) { if err != nil { t.Fatalf("ioutil.TempDir() failed: %v", err) } - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, _ := sleepSpecConf(t) spec.Mounts = append(spec.Mounts, specs.Mount{ Destination: dir, Source: dir, @@ -1852,7 +1856,7 @@ func doAbbreviatedIDsTest(t *testing.T, vfs2 bool) { "baz-" + testutil.RandomContainerID(), } for _, cid := range cids { - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, _ := sleepSpecConf(t) bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -2229,7 +2233,7 @@ func TestMountPropagation(t *testing.T) { t.Fatalf("mount(%q, MS_SHARED): %v", srcMnt, err) } - spec := testutil.NewSpecWithArgs("sleep", "1000") + spec, conf := sleepSpecConf(t) priv := filepath.Join(tmpDir, "priv") slave := filepath.Join(tmpDir, "slave") @@ -2248,7 +2252,6 @@ func TestMountPropagation(t *testing.T) { }, } - conf := testutil.TestConfig(t) _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -2563,12 +2566,11 @@ func TestRlimits(t *testing.T) { // TestRlimitsExec sets limit to number of open files and checks that the limit // is propagated to exec'd processes. func TestRlimitsExec(t *testing.T) { - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, conf := sleepSpecConf(t) spec.Process.Rlimits = []specs.POSIXRlimit{ {Type: "RLIMIT_NOFILE", Hard: 1000, Soft: 100}, } - conf := testutil.TestConfig(t) _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -2597,3 +2599,233 @@ func TestRlimitsExec(t *testing.T) { t.Errorf("ulimit result, got: %q, want: %q", got, want) } } + +// TestCat creates a file and checks that cat generates the expected output. +func TestCat(t *testing.T) { + f, err := ioutil.TempFile(testutil.TmpDir(), "test-case") + if err != nil { + t.Fatalf("ioutil.TempFile failed: %v", err) + } + defer os.RemoveAll(f.Name()) + + content := "test-cat" + if _, err := f.WriteString(content); err != nil { + t.Fatalf("f.WriteString(): %v", err) + } + f.Close() + + spec, conf := sleepSpecConf(t) + + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + + cont, err := New(conf, args) + if err != nil { + t.Fatalf("Creating container: %v", err) + } + defer cont.Destroy() + + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } + + r, w, err := os.Pipe() + if err != nil { + t.Fatalf("os.Create(): %v", err) + } + + if err := cont.Cat([]string{f.Name()}, w); err != nil { + t.Fatalf("error cat from container: %v", err) + } + + buf := make([]byte, 1024) + if _, err := r.Read(buf); err != nil { + t.Fatalf("Read out: %v", err) + } + if got, want := string(buf), content; !strings.Contains(got, want) { + t.Errorf("out got %s, want include %s", buf, want) + } +} + +// TestUsage checks that usage generates the expected memory usage. +func TestUsage(t *testing.T) { + spec, conf := sleepSpecConf(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + + cont, err := New(conf, args) + if err != nil { + t.Fatalf("Creating container: %v", err) + } + defer cont.Destroy() + + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } + + for _, full := range []bool{false, true} { + m, err := cont.Usage(full) + if err != nil { + t.Fatalf("error usage from container: %v", err) + } + if m.Mapped == 0 { + t.Errorf("Usage mapped got zero") + } + if m.Total == 0 { + t.Errorf("Usage total got zero") + } + if full { + if m.System == 0 { + t.Errorf("Usage system got zero") + } + if m.Anonymous == 0 { + t.Errorf("Usage anonymous got zero") + } + } + } +} + +// TestUsageFD checks that usagefd generates the expected memory usage. +func TestUsageFD(t *testing.T) { + spec, conf := sleepSpecConf(t) + + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + + cont, err := New(conf, args) + if err != nil { + t.Fatalf("Creating container: %v", err) + } + defer cont.Destroy() + + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } + + m, err := cont.UsageFD() + if err != nil { + t.Fatalf("error usageFD from container: %v", err) + } + + mapped, unknown, total, err := m.Fetch() + if err != nil { + t.Fatalf("error Fetch memory usage: %v", err) + } + + if mapped == 0 { + t.Errorf("UsageFD Mapped got zero") + } + if unknown == 0 { + t.Errorf("UsageFD unknown got zero") + } + if total == 0 { + t.Errorf("UsageFD total got zero") + } +} + +// TestReduce checks that reduce call succeeds. +func TestReduce(t *testing.T) { + spec, conf := sleepSpecConf(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + + cont, err := New(conf, args) + if err != nil { + t.Fatalf("Creating container: %v", err) + } + defer cont.Destroy() + + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } + + if err := cont.Reduce(false); err != nil { + t.Fatalf("error reduce from container: %v", err) + } +} + +// TestStream checks that Stream dumps expected events. +func TestStream(t *testing.T) { + spec, conf := sleepSpecConf(t) + conf.Strace = true + conf.StraceEvent = true + conf.StraceSyscalls = "" + + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + + cont, err := New(conf, args) + if err != nil { + t.Fatalf("Creating container: %v", err) + } + defer cont.Destroy() + + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } + + r, w, err := os.Pipe() + if err != nil { + t.Fatalf("os.Create(): %v", err) + } + + // Spawn a new thread to Stream events as it blocks indefinitely. + go func() { + cont.Stream(nil, w) + }() + + buf := make([]byte, 1024) + if _, err := r.Read(buf); err != nil { + t.Fatalf("Read out: %v", err) + } + + // A syscall strace event includes "Strace". + if got, want := string(buf), "Strace"; !strings.Contains(got, want) { + t.Errorf("out got %s, want include %s", buf, want) + } +} diff --git a/runsc/mitigate/BUILD b/runsc/mitigate/BUILD index 1238890fc..9f4a7ba8d 100644 --- a/runsc/mitigate/BUILD +++ b/runsc/mitigate/BUILD @@ -4,7 +4,10 @@ package(licenses = ["notice"]) go_library( name = "mitigate", - srcs = ["mitigate.go"], + srcs = [ + "mitigate.go", + "mock.go", + ], visibility = [ "//runsc:__subpackages__", ], @@ -16,8 +19,5 @@ go_test( size = "small", srcs = ["mitigate_test.go"], library = ":mitigate", - deps = [ - "//runsc/mitigate/mock", - "@com_github_google_go_cmp//cmp:go_default_library", - ], + deps = ["@com_github_google_go_cmp//cmp:go_default_library"], ) diff --git a/runsc/mitigate/mitigate.go b/runsc/mitigate/mitigate.go index 9f29ec873..00e5bf2a9 100644 --- a/runsc/mitigate/mitigate.go +++ b/runsc/mitigate/mitigate.go @@ -19,10 +19,7 @@ package mitigate import ( "fmt" - "io/ioutil" - "os" "regexp" - "sort" "strconv" "strings" ) @@ -39,128 +36,20 @@ const ( physicalIDKey = "physical id" coreIDKey = "core id" bugsKey = "bugs" - - // Path to shutdown a CPU. - cpuOnlineTemplate = "/sys/devices/system/cpu/cpu%d/online" ) // CPUSet contains a map of all CPUs on the system, mapped // by Physical ID and CoreIDs. threads with the same // Core and Physical ID are Hyperthread pairs. -type CPUSet map[threadID]*ThreadGroup +type CPUSet []*CPU // NewCPUSet creates a CPUSet from data read from /proc/cpuinfo. -func NewCPUSet(data []byte) (CPUSet, error) { - processors, err := getThreads(string(data)) - if err != nil { - return nil, err - } - - set := make(CPUSet) - for _, p := range processors { - // Each ID is of the form physicalID:coreID. Hyperthread pairs - // have identical physical and core IDs. We need to match - // Hyperthread pairs so that we can shutdown all but one per - // pair. - core, ok := set[p.id] - if !ok { - core = &ThreadGroup{} - set[p.id] = core - } - core.isVulnerable = core.isVulnerable || p.IsVulnerable() - core.threads = append(core.threads, p) - } - - // We need to make sure we shutdown the lowest number processor per - // thread group. - for _, tg := range set { - sort.Slice(tg.threads, func(i, j int) bool { - return tg.threads[i].processorNumber < tg.threads[j].processorNumber - }) - } - return set, nil -} - -// NewCPUSetFromPossible makes a cpuSet data read from -// /sys/devices/system/cpu/possible. This is used in enable operations -// where the caller simply wants to enable all CPUS. -func NewCPUSetFromPossible(data []byte) (CPUSet, error) { - threads, err := GetThreadsFromPossible(data) - if err != nil { - return nil, err - } - - // We don't care if a CPU is vulnerable or not, we just - // want to return a list of all CPUs on the host. - set := CPUSet{ - threads[0].id: &ThreadGroup{ - threads: threads, - isVulnerable: false, - }, - } - return set, nil -} - -// String implements the String method for CPUSet. -func (c CPUSet) String() string { - ret := "" - for _, tg := range c { - ret += fmt.Sprintf("%s\n", tg) - } - return ret -} - -// GetRemainingList returns the list of threads that will remain active -// after mitigation. -func (c CPUSet) GetRemainingList() []Thread { - threads := make([]Thread, 0, len(c)) - for _, core := range c { - // If we're vulnerable, take only one thread from the pair. - if core.isVulnerable { - threads = append(threads, core.threads[0]) - continue - } - // Otherwise don't shutdown anything. - threads = append(threads, core.threads...) - } - return threads -} - -// GetShutdownList returns the list of threads that will be shutdown on -// mitigation. -func (c CPUSet) GetShutdownList() []Thread { - threads := make([]Thread, 0) - for _, core := range c { - // Only if we're vulnerable do shutdown anything. In this case, - // shutdown all but the first entry. - if core.isVulnerable && len(core.threads) > 1 { - threads = append(threads, core.threads[1:]...) - } - } - return threads -} - -// ThreadGroup represents Hyperthread pairs on the same physical/core ID. -type ThreadGroup struct { - threads []Thread - isVulnerable bool -} - -// String implements the String method for threadGroup. -func (c ThreadGroup) String() string { - ret := fmt.Sprintf("ThreadGroup:\nIsVulnerable: %t\n", c.isVulnerable) - for _, processor := range c.threads { - ret += fmt.Sprintf("%s\n", processor) - } - return ret -} - -// getThreads returns threads structs from reading /proc/cpuinfo. -func getThreads(data string) ([]Thread, error) { +func NewCPUSet(data string) (CPUSet, error) { // Each processor entry should start with the // processor key. Find the beginings of each. r := buildRegex(processorKey) indices := r.FindAllStringIndex(data, -1) + if len(indices) < 1 { return nil, fmt.Errorf("no cpus found for: %q", data) } @@ -172,193 +61,132 @@ func getThreads(data string) ([]Thread, error) { // indexes (e.g. data[index[i], index[i+1]]). // There should be len(indicies) - 1 CPUs // since the last index is the end of the string. - cpus := make([]Thread, 0, len(indices)) + var set CPUSet // Find each string that represents a CPU. These begin "processor". for i := 1; i < len(indices); i++ { start := indices[i-1][0] end := indices[i][0] // Parse the CPU entry, which should be between start/end. - c, err := newThread(data[start:end]) + c, err := newCPU(data[start:end]) if err != nil { return nil, err } - cpus = append(cpus, c) + set = append(set, c) } - return cpus, nil + return set, nil } -// GetThreadsFromPossible makes threads from data read from /sys/devices/system/cpu/possible. -func GetThreadsFromPossible(data []byte) ([]Thread, error) { - possibleRegex := regexp.MustCompile(`(?m)^(\d+)(-(\d+))?$`) - matches := possibleRegex.FindStringSubmatch(string(data)) - if len(matches) != 4 { - return nil, fmt.Errorf("mismatch regex from possible: %q", string(data)) - } - - // If matches[3] is empty, we only have one cpu entry. - if matches[3] == "" { - matches[3] = matches[1] - } - - begin, err := strconv.ParseInt(matches[1], 10, 64) - if err != nil { - return nil, fmt.Errorf("failed to parse begin: %v", err) - } - end, err := strconv.ParseInt(matches[3], 10, 64) - if err != nil { - return nil, fmt.Errorf("failed to parse end: %v", err) - } - if begin > end || begin < 0 || end < 0 { - return nil, fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", begin, end) - } - - ret := make([]Thread, 0, end-begin) - for i := begin; i <= end; i++ { - ret = append(ret, Thread{ - processorNumber: i, - id: threadID{ - physicalID: 0, // we don't care about id for enable ops. - coreID: 0, - }, - }) +// IsVulnerable checks if this CPUSet is vulnerable to MDS. +func (c CPUSet) IsVulnerable() bool { + for _, cpu := range c { + if cpu.IsVulnerable() { + return true + } } - - return ret, nil + return false } -// threadID for each thread is defined by the physical and -// core IDs. If equal, two threads are Hyperthread pairs. -type threadID struct { - physicalID int64 - coreID int64 +// String implements the String method for CPUSet. +func (c CPUSet) String() string { + parts := make([]string, len(c)) + for i, cpu := range c { + parts[i] = cpu.String() + } + return strings.Join(parts, "\n") } -// Thread represents pertinent info about a single hyperthread in a pair. -type Thread struct { +// CPU represents pertinent info about a single hyperthread in a pair. +type CPU struct { processorNumber int64 // the processor number of this CPU. vendorID string // the vendorID of CPU (e.g. AuthenticAMD). cpuFamily int64 // CPU family number (e.g. 6 for CascadeLake/Skylake). model int64 // CPU model number (e.g. 85 for CascadeLake/Skylake). - id threadID // id for this thread + physicalID int64 // Physical ID of this CPU. + coreID int64 // Core ID of this CPU. bugs map[string]struct{} // map of vulnerabilities parsed from the 'bugs' field. } -// newThread parses a CPU from a single cpu entry from /proc/cpuinfo. -func newThread(data string) (Thread, error) { - empty := Thread{} +func newCPU(data string) (*CPU, error) { processor, err := parseProcessor(data) if err != nil { - return empty, err + return nil, err } vendorID, err := parseVendorID(data) if err != nil { - return empty, err + return nil, err } cpuFamily, err := parseCPUFamily(data) if err != nil { - return empty, err + return nil, err } model, err := parseModel(data) if err != nil { - return empty, err + return nil, err } physicalID, err := parsePhysicalID(data) if err != nil { - return empty, err + return nil, err } coreID, err := parseCoreID(data) if err != nil { - return empty, err + return nil, err } bugs, err := parseBugs(data) if err != nil { - return empty, err + return nil, err } - return Thread{ + return &CPU{ processorNumber: processor, vendorID: vendorID, cpuFamily: cpuFamily, model: model, - id: threadID{ - physicalID: physicalID, - coreID: coreID, - }, - bugs: bugs, + physicalID: physicalID, + coreID: coreID, + bugs: bugs, }, nil } -// String implements the String method for thread. -func (t Thread) String() string { - template := `CPU: %d -CPU ID: %+v -Vendor: %s -Family/Model: %d/%d -Bugs: %s +// String implements the String method for CPU. +func (t *CPU) String() string { + template := `%s: %d +%s: %s +%s: %d +%s: %d +%s: %d +%s: %d +%s: %s ` - bugs := make([]string, 0) + var bugs []string for bug := range t.bugs { bugs = append(bugs, bug) } - return fmt.Sprintf(template, t.processorNumber, t.id, t.vendorID, t.cpuFamily, t.model, strings.Join(bugs, ",")) -} - -// Enable turns on the CPU by writing 1 to /sys/devices/cpu/cpu{N}/online. -func (t Thread) Enable() error { - // Linux ensures that "cpu0" is always online. - if t.processorNumber == 0 { - return nil - } - cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) - f, err := os.OpenFile(cpuPath, os.O_WRONLY|os.O_CREATE, 0644) - if err != nil { - return fmt.Errorf("failed to open file %s: %v", cpuPath, err) - } - if _, err = f.Write([]byte{'1'}); err != nil { - return fmt.Errorf("failed to write '1' to %s: %v", cpuPath, err) - } - return nil -} - -// Disable turns off the CPU by writing 0 to /sys/devices/cpu/cpu{N}/online. -func (t Thread) Disable() error { - // The core labeled "cpu0" can never be taken offline via this method. - // Linux will return EPERM if the user even creates a file at the /sys - // path above. - if t.processorNumber == 0 { - return fmt.Errorf("invalid shutdown operation: cpu0 cannot be disabled") - } - cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) - return ioutil.WriteFile(cpuPath, []byte{'0'}, 0644) + return fmt.Sprintf(template, + processorKey, t.processorNumber, + vendorIDKey, t.vendorID, + cpuFamilyKey, t.cpuFamily, + modelKey, t.model, + physicalIDKey, t.physicalID, + coreIDKey, t.coreID, + bugsKey, strings.Join(bugs, " ")) } // IsVulnerable checks if a CPU is vulnerable to mds. -func (t Thread) IsVulnerable() bool { +func (t *CPU) IsVulnerable() bool { _, ok := t.bugs[mds] return ok } -// isActive checks if a CPU is active from /sys/devices/system/cpu/cpu{N}/online -// If the file does not exist (ioutil returns in error), we assume the CPU is on. -func (t Thread) isActive() bool { - cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) - data, err := ioutil.ReadFile(cpuPath) - if err != nil { - return true - } - return len(data) > 0 && data[0] != '0' -} - // SimilarTo checks family/model/bugs fields for equality of two // processors. -func (t Thread) SimilarTo(other Thread) bool { +func (t *CPU) SimilarTo(other *CPU) bool { if t.vendorID != other.vendorID { return false } diff --git a/runsc/mitigate/mitigate_test.go b/runsc/mitigate/mitigate_test.go index a1d80581e..e79d879e9 100644 --- a/runsc/mitigate/mitigate_test.go +++ b/runsc/mitigate/mitigate_test.go @@ -18,90 +18,53 @@ package mitigate import ( - "fmt" "io/ioutil" "strings" "testing" - - "gvisor.dev/gvisor/runsc/mitigate/mock" ) // TestMockCPUSet tests mock cpu test cases against the cpuSet functions. func TestMockCPUSet(t *testing.T) { for _, tc := range []struct { - testCase mock.CPU + testCase MockCPU isVulnerable bool }{ { - testCase: mock.AMD8, + testCase: AMD8, isVulnerable: false, }, { - testCase: mock.Haswell2, + testCase: Haswell2, isVulnerable: true, }, { - testCase: mock.Haswell2core, + testCase: Haswell2core, isVulnerable: true, }, { - testCase: mock.CascadeLake2, + testCase: CascadeLake2, isVulnerable: true, }, { - testCase: mock.CascadeLake4, + testCase: CascadeLake4, isVulnerable: true, }, } { t.Run(tc.testCase.Name, func(t *testing.T) { - data := tc.testCase.MakeCPUString() - set, err := NewCPUSet([]byte(data)) + data := tc.testCase.MakeCPUSet().String() + set, err := NewCPUSet(data) if err != nil { t.Fatalf("Failed to create cpuSet: %v", err) } - t.Logf("data: %s", data) - - for _, tg := range set { - if err := checkSorted(tg.threads); err != nil { - t.Fatalf("Failed to sort cpuSet: %v", err) - } - } - - remaining := set.GetRemainingList() - // In the non-vulnerable case, no cores should be shutdown so all should remain. - want := tc.testCase.PhysicalCores * tc.testCase.Cores * tc.testCase.ThreadsPerCore - if tc.isVulnerable { - want = tc.testCase.PhysicalCores * tc.testCase.Cores - } - - if want != len(remaining) { - t.Fatalf("Failed to shutdown the correct number of cores: want: %d got: %d", want, len(remaining)) - } - - if !tc.isVulnerable { - return - } - - // If the set is vulnerable, we expect only 1 thread per hyperthread pair. - for _, r := range remaining { - if _, ok := set[r.id]; !ok { - t.Fatalf("Entry %+v not in map, there must be two entries in the same thread group.", r) - } - delete(set, r.id) - } - - possible := tc.testCase.MakeSysPossibleString() - set, err = NewCPUSetFromPossible([]byte(possible)) - if err != nil { - t.Fatalf("Failed to make cpuSet: %v", err) + if tc.testCase.NumCPUs() != len(set) { + t.Fatalf("Got wrong number of CPUs: want: %d got: %d", tc.testCase.NumCPUs(), len(set)) } - want = tc.testCase.PhysicalCores * tc.testCase.Cores * tc.testCase.ThreadsPerCore - got := len(set.GetRemainingList()) - if got != want { - t.Fatalf("Returned the wrong number of CPUs want: %d got: %d", want, got) + if set.IsVulnerable() != tc.isVulnerable { + t.Fatalf("incorrect vulnerable value: got: %t want: %t", set.IsVulnerable(), tc.isVulnerable) } + t.Logf("data: %s", data) }) } } @@ -117,15 +80,13 @@ physical id: 0 core id : 0 bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa itlb_multihit ` - want := Thread{ + want := CPU{ processorNumber: 0, vendorID: "GenuineIntel", cpuFamily: 6, model: 85, - id: threadID{ - physicalID: 0, - coreID: 0, - }, + physicalID: 0, + coreID: 0, bugs: map[string]struct{}{ "cpu_meltdown": {}, "spectre_v1": {}, @@ -139,7 +100,7 @@ bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa }, } - got, err := newThread(data) + got, err := newCPU(data) if err != nil { t.Fatalf("getCpu failed with error: %v", err) } @@ -154,12 +115,12 @@ bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa } func TestInvalid(t *testing.T) { - result, err := getThreads(`something not a processor`) + result, err := newCPU(`something not a processor`) if err == nil { t.Fatalf("getCPU set didn't return an error: %+v", result) } - if !strings.Contains(err.Error(), "no cpus") { + if !strings.Contains(err.Error(), "failed to match key \"processor\"") { t.Fatalf("Incorrect error returned: %v", err) } } @@ -221,7 +182,7 @@ cache_alignment : 64 address sizes : 46 bits physical, 48 bits virtual power management: ` - cpuSet, err := getThreads(data) + cpuSet, err := NewCPUSet(data) if err != nil { t.Fatalf("getCPUSet failed: %v", err) } @@ -231,7 +192,7 @@ power management: t.Fatalf("Num CPU mismatch: want: %d, got: %d", wantCPULen, len(cpuSet)) } - wantCPU := Thread{ + wantCPU := CPU{ vendorID: "GenuineIntel", cpuFamily: 6, model: 63, @@ -260,17 +221,11 @@ func TestReadFile(t *testing.T) { t.Fatalf("Failed to read cpuinfo: %v", err) } - set, err := NewCPUSet(data) + set, err := NewCPUSet(string(data)) if err != nil { t.Fatalf("Failed to parse CPU data %v\n%s", err, data) } - for _, tg := range set { - if err := checkSorted(tg.threads); err != nil { - t.Fatalf("Failed to sort cpuSet: %v", err) - } - } - if len(set) < 1 { t.Fatalf("Failed to parse any CPUs: %d", len(set)) } @@ -383,7 +338,7 @@ power management:` }, } { t.Run(tc.name, func(t *testing.T) { - set, err := getThreads(tc.cpuString) + set, err := NewCPUSet(tc.cpuString) if err != nil { t.Fatalf("Failed to getCPUSet:%v\n %s", err, tc.cpuString) } @@ -404,98 +359,3 @@ power management:` }) } } - -func TestReverse(t *testing.T) { - const noParse = "-1-" - for _, tc := range []struct { - name string - output string - wantErr error - wantCount int - }{ - { - name: "base", - output: "0-7", - wantErr: nil, - wantCount: 8, - }, - { - name: "huge", - output: "0-111", - wantErr: nil, - wantCount: 112, - }, - { - name: "not zero", - output: "50-53", - wantErr: nil, - wantCount: 4, - }, - { - name: "small", - output: "0", - wantErr: nil, - wantCount: 1, - }, - { - name: "invalid order", - output: "10-6", - wantErr: fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", 10, 6), - }, - { - name: "no parse", - output: noParse, - wantErr: fmt.Errorf(`mismatch regex from possible: %q`, noParse), - }, - } { - t.Run(tc.name, func(t *testing.T) { - threads, err := GetThreadsFromPossible([]byte(tc.output)) - - switch { - case tc.wantErr == nil: - if err != nil { - t.Fatalf("Wanted nil err, got: %v", err) - } - case err == nil: - t.Fatalf("Want error: %v got: %v", tc.wantErr, err) - default: - if tc.wantErr.Error() != err.Error() { - t.Fatalf("Want error: %v got error: %v", tc.wantErr, err) - } - } - - if len(threads) != tc.wantCount { - t.Fatalf("Want count: %d got: %d", tc.wantCount, len(threads)) - } - }) - } -} - -func TestReverseSmoke(t *testing.T) { - data, err := ioutil.ReadFile("/sys/devices/system/cpu/possible") - if err != nil { - t.Fatalf("Failed to read from possible: %v", err) - } - threads, err := GetThreadsFromPossible(data) - if err != nil { - t.Fatalf("Could not parse possible output: %v", err) - } - - if len(threads) <= 0 { - t.Fatalf("Didn't get any CPU cores: %d", len(threads)) - } -} - -func checkSorted(threads []Thread) error { - if len(threads) < 2 { - return nil - } - last := threads[0].processorNumber - for _, t := range threads[1:] { - if last >= t.processorNumber { - return fmt.Errorf("threads out of order: thread %d before %d", t.processorNumber, last) - } - last = t.processorNumber - } - return nil -} diff --git a/runsc/mitigate/mock/mock.go b/runsc/mitigate/mock.go index 12c59e356..4588ae2ed 100644 --- a/runsc/mitigate/mock/mock.go +++ b/runsc/mitigate/mock.go @@ -12,26 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package mock contains mock CPUs for mitigate tests. -package mock +package mitigate -import "fmt" +import "strings" -// CPU represents data from CPUs that will be mitigated. -type CPU struct { +// MockCPU represents data from CPUs that will be mitigated. +type MockCPU struct { Name string VendorID string - Family int - Model int + Family int64 + Model int64 ModelName string Bugs string - PhysicalCores int - Cores int - ThreadsPerCore int + PhysicalCores int64 + Cores int64 + ThreadsPerCore int64 } // CascadeLake2 is a two core Intel CascadeLake machine. -var CascadeLake2 = CPU{ +var CascadeLake2 = MockCPU{ Name: "CascadeLake", VendorID: "GenuineIntel", Family: 6, @@ -44,7 +43,7 @@ var CascadeLake2 = CPU{ } // CascadeLake4 is a four core Intel CascadeLake machine. -var CascadeLake4 = CPU{ +var CascadeLake4 = MockCPU{ Name: "CascadeLake", VendorID: "GenuineIntel", Family: 6, @@ -57,7 +56,7 @@ var CascadeLake4 = CPU{ } // Haswell2 is a two core Intel Haswell machine. -var Haswell2 = CPU{ +var Haswell2 = MockCPU{ Name: "Haswell", VendorID: "GenuineIntel", Family: 6, @@ -70,7 +69,7 @@ var Haswell2 = CPU{ } // Haswell2core is a 2 core Intel Haswell machine with no hyperthread pairs. -var Haswell2core = CPU{ +var Haswell2core = MockCPU{ Name: "Haswell2Physical", VendorID: "GenuineIntel", Family: 6, @@ -83,7 +82,7 @@ var Haswell2core = CPU{ } // AMD2 is an two core AMD machine. -var AMD2 = CPU{ +var AMD2 = MockCPU{ Name: "AMD", VendorID: "AuthenticAMD", Family: 23, @@ -96,7 +95,7 @@ var AMD2 = CPU{ } // AMD8 is an eight core AMD machine. -var AMD8 = CPU{ +var AMD8 = MockCPU{ Name: "AMD", VendorID: "AuthenticAMD", Family: 23, @@ -108,47 +107,39 @@ var AMD8 = CPU{ ThreadsPerCore: 2, } -// MakeCPUString makes a string formated like /proc/cpuinfo for each cpuTestCase -func (tc CPU) MakeCPUString() string { - template := `processor : %d -vendor_id : %s -cpu family : %d -model : %d -model name : %s -physical id : %d -core id : %d -cpu cores : %d -bugs : %s - -` +// Empty is an empty CPU set. +var Empty = MockCPU{ + Name: "Empty", +} - ret := `` - for i := 0; i < tc.PhysicalCores; i++ { - for j := 0; j < tc.Cores; j++ { - for k := 0; k < tc.ThreadsPerCore; k++ { +// MakeCPUSet makes a cpuSet from a MockCPU. +func (tc MockCPU) MakeCPUSet() CPUSet { + bugs := make(map[string]struct{}) + for _, bug := range strings.Split(tc.Bugs, " ") { + bugs[bug] = struct{}{} + } + var cpus CPUSet = []*CPU{} + for i := int64(0); i < tc.PhysicalCores; i++ { + for j := int64(0); j < tc.Cores; j++ { + for k := int64(0); k < tc.ThreadsPerCore; k++ { processorNum := (i*tc.Cores+j)*tc.ThreadsPerCore + k - ret += fmt.Sprintf(template, - processorNum, /*processor*/ - tc.VendorID, /*vendor_id*/ - tc.Family, /*cpu family*/ - tc.Model, /*model*/ - tc.ModelName, /*model name*/ - i, /*physical id*/ - j, /*core id*/ - k, /*cpu cores*/ - tc.Bugs, /*bugs*/ - ) + cpu := &CPU{ + processorNumber: processorNum, + vendorID: tc.VendorID, + cpuFamily: tc.Family, + model: tc.Model, + physicalID: i, + coreID: j, + bugs: bugs, + } + cpus = append(cpus, cpu) } } } - return ret + return cpus } -// MakeSysPossibleString makes a string representing a the contents of /sys/devices/system/cpu/possible. -func (tc CPU) MakeSysPossibleString() string { - max := tc.PhysicalCores * tc.Cores * tc.ThreadsPerCore - if max == 1 { - return "0" - } - return fmt.Sprintf("0-%d", max-1) +// NumCPUs returns the number of CPUs for this CPU. +func (tc MockCPU) NumCPUs() int { + return int(tc.PhysicalCores * tc.Cores * tc.ThreadsPerCore) } diff --git a/runsc/mitigate/mock/BUILD b/runsc/mitigate/mock/BUILD deleted file mode 100644 index 5019ff9ee..000000000 --- a/runsc/mitigate/mock/BUILD +++ /dev/null @@ -1,11 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "mock", - srcs = ["mock.go"], - visibility = [ - "//runsc:__subpackages__", - ], -) diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index bc4a3fa32..d625230dd 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -17,12 +17,14 @@ go_library( "//pkg/control/client", "//pkg/control/server", "//pkg/coverage", + "//pkg/eventchannel", "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/platform", "//pkg/sync", "//pkg/tcpip/header", "//pkg/tcpip/stack", + "//pkg/unet", "//pkg/urpc", "//runsc/boot", "//runsc/boot/platforms", diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 5fb7dc834..9fbce6bd6 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -35,10 +35,12 @@ import ( "gvisor.dev/gvisor/pkg/control/client" "gvisor.dev/gvisor/pkg/control/server" "gvisor.dev/gvisor/pkg/coverage" + "gvisor.dev/gvisor/pkg/eventchannel" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/pkg/urpc" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/boot/platforms" @@ -981,7 +983,7 @@ func (s *Sandbox) Pause(cid string) error { } defer conn.Close() - if err := conn.Call(boot.ContMgrPause, nil, nil); err != nil { + if err := conn.Call(boot.LifecyclePause, nil, nil); err != nil { return fmt.Errorf("pausing container %q: %v", cid, err) } return nil @@ -996,12 +998,114 @@ func (s *Sandbox) Resume(cid string) error { } defer conn.Close() - if err := conn.Call(boot.ContMgrResume, nil, nil); err != nil { + if err := conn.Call(boot.LifecycleResume, nil, nil); err != nil { return fmt.Errorf("resuming container %q: %v", cid, err) } return nil } +// Cat sends the cat call for a container in the sandbox. +func (s *Sandbox) Cat(cid string, files []string, out *os.File) error { + log.Debugf("Cat sandbox %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + if err := conn.Call(boot.FsCat, &control.CatOpts{ + Files: files, + FilePayload: urpc.FilePayload{Files: []*os.File{out}}, + }, nil); err != nil { + return fmt.Errorf("Cat container %q: %v", cid, err) + } + return nil +} + +// Usage sends the collect call for a container in the sandbox. +func (s *Sandbox) Usage(cid string, Full bool) (control.MemoryUsage, error) { + log.Debugf("Usage sandbox %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return control.MemoryUsage{}, err + } + defer conn.Close() + + var m control.MemoryUsage + err = conn.Call(boot.UsageCollect, &control.MemoryUsageOpts{ + Full: Full, + }, &m) + return m, err +} + +// UsageFD sends the usagefd call for a container in the sandbox. +func (s *Sandbox) UsageFD(cid string) (*control.MemoryUsageRecord, error) { + log.Debugf("Usage sandbox %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return nil, err + } + defer conn.Close() + + var m control.MemoryUsageFile + if err := conn.Call(boot.UsageUsageFD, &control.MemoryUsageFileOpts{ + Version: 1, + }, &m); err != nil { + return nil, fmt.Errorf("UsageFD failed: %v", err) + } + + if len(m.FilePayload.Files) != 2 { + return nil, fmt.Errorf("wants exactly two fds") + } + + return control.NewMemoryUsageRecord(*m.FilePayload.Files[0], *m.FilePayload.Files[1]) +} + +// Reduce sends the reduce call for a container in the sandbox. +func (s *Sandbox) Reduce(cid string, wait bool) error { + log.Debugf("Reduce sandbox %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + return conn.Call(boot.UsageReduce, &control.UsageReduceOpts{ + Wait: wait, + }, nil) +} + +// Stream sends the AttachDebugEmitter call for a container in the sandbox, and +// dumps filtered events to out. +func (s *Sandbox) Stream(cid string, filters []string, out *os.File) error { + log.Debugf("Stream sandbox %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + r, w, err := unet.SocketPair(false) + if err != nil { + return err + } + + wfd, err := w.Release() + if err != nil { + return fmt.Errorf("failed to release write socket FD: %v", err) + } + + if err := conn.Call(boot.EventsAttachDebugEmitter, &control.EventsOpts{ + FilePayload: urpc.FilePayload{Files: []*os.File{ + os.NewFile(uintptr(wfd), "event sink"), + }}, + }, nil); err != nil { + return fmt.Errorf("AttachDebugEmitter failed: %v", err) + } + + return eventchannel.ProcessAll(r, filters, out) +} + // IsRunning returns true if the sandbox or gofer process is running. func (s *Sandbox) IsRunning() bool { if s.Pid != 0 { diff --git a/test/benchmarks/base/startup_test.go b/test/benchmarks/base/startup_test.go index 05a43ad17..197241622 100644 --- a/test/benchmarks/base/startup_test.go +++ b/test/benchmarks/base/startup_test.go @@ -34,15 +34,19 @@ func BenchmarkStartupEmpty(b *testing.B) { defer machine.CleanUp() ctx := context.Background() + b.StopTimer() + b.ResetTimer() for i := 0; i < b.N; i++ { harness.DebugLog(b, "Running container: %d", i) container := machine.GetContainer(ctx, b) - defer container.CleanUp(ctx) - if _, err := container.Run(ctx, dockerutil.RunOpts{ + b.StartTimer() + if err := container.Spawn(ctx, dockerutil.RunOpts{ Image: "benchmarks/alpine", - }, "true"); err != nil { - b.Fatalf("failed to run container: %v", err) + }, "sleep", "100"); err != nil { + b.Fatalf("failed to start container: %v", err) } + b.StopTimer() + container.CleanUp(ctx) harness.DebugLog(b, "Ran container: %d", i) } } diff --git a/test/benchmarks/tools/fio_test.go b/test/benchmarks/tools/fio_test.go index a98277150..3b1f852ce 100644 --- a/test/benchmarks/tools/fio_test.go +++ b/test/benchmarks/tools/fio_test.go @@ -86,7 +86,7 @@ func TestFio(t *testing.T) { fio := Fio{} // WriteBandwidth. got, err := fio.parseBandwidth(sampleData, false) - var want float64 = 1753471.0 * 1024 + want := 1753471.0 * 1024 if err != nil { t.Fatalf("parse failed with err: %v", err) } else if got != want { diff --git a/test/perf/BUILD b/test/perf/BUILD index e43c5eff9..59923da47 100644 --- a/test/perf/BUILD +++ b/test/perf/BUILD @@ -164,6 +164,20 @@ syscall_test( syscall_test( size = "large", debug = False, + test = "//test/perf/linux:verity_randread_benchmark", + vfs1 = False, +) + +syscall_test( + size = "large", + debug = False, test = "//test/perf/linux:verity_open_read_close_benchmark", vfs1 = False, ) + +syscall_test( + size = "large", + debug = False, + test = "//test/perf/linux:verity_stat_benchmark", + vfs1 = False, +) diff --git a/test/perf/linux/BUILD b/test/perf/linux/BUILD index fed5778df..020a69ab5 100644 --- a/test/perf/linux/BUILD +++ b/test/perf/linux/BUILD @@ -426,6 +426,25 @@ cc_binary( ) cc_binary( + name = "verity_randread_benchmark", + testonly = 1, + srcs = [ + "verity_randread_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:capability_util", + "//test/util:fs_util", + "//test/util:logging", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:verity_util", + ], +) + +cc_binary( name = "verity_open_read_close_benchmark", testonly = 1, srcs = [ @@ -443,3 +462,23 @@ cc_binary( "//test/util:verity_util", ], ) + +cc_binary( + name = "verity_stat_benchmark", + testonly = 1, + srcs = [ + "verity_stat_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:capability_util", + "//test/util:fs_util", + "//test/util:logging", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:verity_util", + "@com_google_absl//absl/strings", + ], +) diff --git a/test/perf/linux/randread_benchmark.cc b/test/perf/linux/randread_benchmark.cc index b0eb8c24e..11b56a8cb 100644 --- a/test/perf/linux/randread_benchmark.cc +++ b/test/perf/linux/randread_benchmark.cc @@ -85,7 +85,7 @@ void BM_RandRead(benchmark::State& state) { unsigned int seed = 1; for (auto _ : state) { TEST_CHECK(PreadFd(fd.get(), buf.data(), buf.size(), - rand_r(&seed) % kFileSize) == size); + rand_r(&seed) % (kFileSize - buf.size())) == size); } state.SetBytesProcessed(static_cast<int64_t>(size) * diff --git a/test/perf/linux/verity_randread_benchmark.cc b/test/perf/linux/verity_randread_benchmark.cc new file mode 100644 index 000000000..4178cfad8 --- /dev/null +++ b/test/perf/linux/verity_randread_benchmark.cc @@ -0,0 +1,108 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/uio.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/logging.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/verity_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Create a 1GB file that will be read from at random positions. This should +// invalid any performance gains from caching. +const uint64_t kFileSize = Megabytes(1024); + +// How many bytes to write at once to initialize the file used to read from. +const uint32_t kWriteSize = 65536; + +// Largest benchmarked read unit. +const uint32_t kMaxRead = Megabytes(64); + +// Global test state, initialized once per process lifetime. +struct GlobalState { + explicit GlobalState() { + // Mount a tmpfs file system to be wrapped by a verity fs. + tmp_dir_ = TempPath::CreateDir().ValueOrDie(); + TEST_CHECK(mount("", tmp_dir_.path().c_str(), "tmpfs", 0, "") == 0); + file_ = TempPath::CreateFileIn(tmp_dir_.path()).ValueOrDie(); + filename_ = std::string(Basename(file_.path())); + + FileDescriptor fd = Open(file_.path(), O_WRONLY).ValueOrDie(); + + // Try to minimize syscalls by using maximum size writev() requests. + std::vector<char> buffer(kWriteSize); + RandomizeBuffer(buffer.data(), buffer.size()); + const std::vector<std::vector<struct iovec>> iovecs_list = + GenerateIovecs(kFileSize + kMaxRead, buffer.data(), buffer.size()); + for (const auto& iovecs : iovecs_list) { + TEST_CHECK(writev(fd.get(), iovecs.data(), iovecs.size()) >= 0); + } + verity_dir_ = + MountVerity(tmp_dir_.path(), {EnableTarget(filename_, O_RDONLY)}) + .ValueOrDie(); + } + TempPath tmp_dir_; + TempPath file_; + std::string verity_dir_; + std::string filename_; +}; + +GlobalState& GetGlobalState() { + // This gets created only once throughout the lifetime of the process. + // Use a dynamically allocated object (that is never deleted) to avoid order + // of destruction of static storage variables issues. + static GlobalState* const state = + // The actual file size is the maximum random seek range (kFileSize) + the + // maximum read size so we can read that number of bytes at the end of the + // file. + new GlobalState(); + return *state; +} + +void BM_VerityRandRead(benchmark::State& state) { + const int size = state.range(0); + + GlobalState& global_state = GetGlobalState(); + FileDescriptor verity_fd = ASSERT_NO_ERRNO_AND_VALUE(Open( + JoinPath(global_state.verity_dir_, global_state.filename_), O_RDONLY)); + std::vector<char> buf(size); + + unsigned int seed = 1; + for (auto _ : state) { + TEST_CHECK(PreadFd(verity_fd.get(), buf.data(), buf.size(), + rand_r(&seed) % kFileSize) == size); + } + + state.SetBytesProcessed(static_cast<int64_t>(size) * + static_cast<int64_t>(state.iterations())); +} + +BENCHMARK(BM_VerityRandRead)->Range(1, kMaxRead)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/verity_stat_benchmark.cc b/test/perf/linux/verity_stat_benchmark.cc new file mode 100644 index 000000000..b43a9e266 --- /dev/null +++ b/test/perf/linux/verity_stat_benchmark.cc @@ -0,0 +1,84 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <algorithm> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "benchmark/benchmark.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/verity_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Creates a file in a nested directory hierarchy at least `depth` directories +// deep, and stats that file multiple times. +void BM_VerityStat(benchmark::State& state) { + // Create nested directories with given depth. + int depth = state.range(0); + + // Mount a tmpfs file system to be wrapped by a verity fs. + TempPath top_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TEST_CHECK(mount("", top_dir.path().c_str(), "tmpfs", 0, "") == 0); + std::string dir_path = top_dir.path(); + std::string child_path = ""; + std::vector<EnableTarget> targets; + + while (depth-- > 0) { + // Don't use TempPath because it will make paths too long to use. + // + // The top_dir destructor will clean up this whole tree. + dir_path = JoinPath(dir_path, absl::StrCat(depth)); + ASSERT_NO_ERRNO(Mkdir(dir_path, 0755)); + child_path = JoinPath(child_path, Basename(dir_path)); + targets.emplace_back(EnableTarget(child_path, O_RDONLY)); + } + + // Create the file that will be stat'd. + const TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir_path)); + + targets.emplace_back( + EnableTarget(JoinPath(child_path, Basename(file.path())), O_RDONLY)); + + // Reverse the targets because verity should be enabled from the lowest level. + std::reverse(targets.begin(), targets.end()); + + std::string verity_dir = + TEST_CHECK_NO_ERRNO_AND_VALUE(MountVerity(top_dir.path(), targets)); + + struct stat st; + for (auto _ : state) { + ASSERT_THAT(stat(JoinPath(verity_dir, targets[0].path).c_str(), &st), + SyscallSucceeds()); + } +} + +BENCHMARK(BM_VerityStat)->Range(1, 100)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 7129a797b..01ee432cb 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -7,6 +7,11 @@ package( exports_files( [ + "packet_socket.cc", + "packet_socket_raw.cc", + "raw_socket.cc", + "raw_socket_hdrincl.cc", + "raw_socket_icmp.cc", "socket.cc", "socket_inet_loopback.cc", "socket_inet_loopback_isolated.cc", @@ -1443,6 +1448,7 @@ cc_binary( deps = [ ":unix_domain_socket_test_util", "//test/util:capability_util", + "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:socket_util", "@com_google_absl//absl/base:core_headers", @@ -1461,6 +1467,7 @@ cc_binary( deps = [ ":unix_domain_socket_test_util", "//test/util:capability_util", + "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:socket_util", "@com_google_absl//absl/base:core_headers", @@ -4173,9 +4180,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + "//test/util:signal_util", "//test/util:temp_path", - "//test/util:test_main", "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", ], ) diff --git a/test/syscalls/linux/cgroup.cc b/test/syscalls/linux/cgroup.cc index f29891571..ca23dfeee 100644 --- a/test/syscalls/linux/cgroup.cc +++ b/test/syscalls/linux/cgroup.cc @@ -279,6 +279,23 @@ TEST(Cgroup, UnmountRepeated) { EXPECT_THAT(umount(c.Path().c_str()), SyscallFailsWithErrno(EINVAL)); } +TEST(Cgroup, Create) { + SKIP_IF(!CgroupsAvailable()); + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("")); + ASSERT_NO_ERRNO(c.CreateChild("child1")); + EXPECT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(Exists(c.Path()))); +} + +TEST(Cgroup, SubcontainerInitiallyEmpty) { + SKIP_IF(!CgroupsAvailable()); + Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir())); + Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("")); + Cgroup child = ASSERT_NO_ERRNO_AND_VALUE(c.CreateChild("child1")); + auto procs = ASSERT_NO_ERRNO_AND_VALUE(child.Procs()); + EXPECT_TRUE(procs.empty()); +} + TEST(MemoryCgroup, MemoryUsageInBytes) { SKIP_IF(!CgroupsAvailable()); diff --git a/test/syscalls/linux/link.cc b/test/syscalls/linux/link.cc index 4f9ca1a65..8b208f99a 100644 --- a/test/syscalls/linux/link.cc +++ b/test/syscalls/linux/link.cc @@ -142,7 +142,8 @@ TEST(LinkTest, OldnameIsEmpty) { TEST(LinkTest, OldnameDoesNotExist) { const std::string oldname = NewTempAbsPath(); const std::string newname = NewTempAbsPath(); - EXPECT_THAT(link("", newname.c_str()), SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(link(oldname.c_str(), newname.c_str()), + SyscallFailsWithErrno(ENOENT)); } TEST(LinkTest, NewnameCannotExist) { diff --git a/test/syscalls/linux/lseek.cc b/test/syscalls/linux/lseek.cc index d4f89527c..dbc21833f 100644 --- a/test/syscalls/linux/lseek.cc +++ b/test/syscalls/linux/lseek.cc @@ -121,7 +121,8 @@ TEST(LseekTest, InvalidFD) { } TEST(LseekTest, DirCurEnd) { - const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/tmp", O_RDONLY)); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(GetAbsoluteTestTmpdir().c_str(), O_RDONLY)); ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); } diff --git a/test/syscalls/linux/memfd.cc b/test/syscalls/linux/memfd.cc index 4a450742b..dbd1c93ae 100644 --- a/test/syscalls/linux/memfd.cc +++ b/test/syscalls/linux/memfd.cc @@ -445,9 +445,10 @@ TEST(MemfdTest, SealsAreInodeLevelProperties) { // Tmpfs files also support seals, but are created with F_SEAL_SEAL. TEST(MemfdTest, TmpfsFilesHaveSealSeal) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs("/tmp"))); + std::string tmpdir = GetAbsoluteTestTmpdir(); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs(tmpdir.c_str()))); const TempPath tmpfs_file = - ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn("/tmp")); + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(tmpdir.c_str())); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfs_file.path(), O_RDWR, 0644)); EXPECT_THAT(fcntl(fd.get(), F_GET_SEALS), diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc index 93a6d9cde..fda176261 100644 --- a/test/syscalls/linux/mmap.cc +++ b/test/syscalls/linux/mmap.cc @@ -793,6 +793,19 @@ class MMapFileTest : public MMapTest { ASSERT_THAT(unlink(filename_.c_str()), SyscallSucceeds()); } + bool FSSupportsMap() const { + bool supported = true; + void* ret = mmap(nullptr, 1, PROT_NONE, MAP_PRIVATE, fd_.get(), 0); + if (ret == MAP_FAILED && errno != ENODEV) { + supported = false; + } + if (ret != MAP_FAILED) { + munmap(ret, 1); + } + + return supported; + } + ssize_t Read(char* buf, size_t count) { ssize_t len = 0; do { @@ -840,12 +853,14 @@ class MMapFileParamTest // MAP_POPULATE allowed. // There isn't a good way to verify it actually did anything. TEST_P(MMapFileParamTest, MapPopulate) { + SKIP_IF(!FSSupportsMap()); ASSERT_THAT(Map(0, kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0), SyscallSucceeds()); } // MAP_POPULATE on a short file. TEST_P(MMapFileParamTest, MapPopulateShort) { + SKIP_IF(!FSSupportsMap()); ASSERT_THAT( Map(0, 2 * kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0), SyscallSucceeds()); @@ -853,6 +868,7 @@ TEST_P(MMapFileParamTest, MapPopulateShort) { // Read contents from mapped file. TEST_F(MMapFileTest, Read) { + SKIP_IF(!FSSupportsMap()); size_t len = strlen(kFileContents); ASSERT_EQ(len, Write(kFileContents, len)); @@ -866,6 +882,7 @@ TEST_F(MMapFileTest, Read) { // Map at an offset. TEST_F(MMapFileTest, MapOffset) { + SKIP_IF(!FSSupportsMap()); ASSERT_THAT(lseek(fd_.get(), kPageSize, SEEK_SET), SyscallSucceeds()); size_t len = strlen(kFileContents); @@ -881,6 +898,7 @@ TEST_F(MMapFileTest, MapOffset) { } TEST_F(MMapFileTest, MapOffsetBeyondEnd) { + SKIP_IF(!FSSupportsMap()); SetupGvisorDeathTest(); uintptr_t addr; @@ -897,6 +915,7 @@ TEST_F(MMapFileTest, MapOffsetBeyondEnd) { // Verify mmap fails when sum of length and offset overflows. TEST_F(MMapFileTest, MapLengthPlusOffsetOverflows) { + SKIP_IF(!FSSupportsMap()); const size_t length = static_cast<size_t>(-kPageSize); const off_t offset = kPageSize; ASSERT_THAT(Map(0, length, PROT_READ, MAP_PRIVATE, fd_.get(), offset), @@ -905,6 +924,7 @@ TEST_F(MMapFileTest, MapLengthPlusOffsetOverflows) { // MAP_PRIVATE PROT_WRITE is allowed on read-only FDs. TEST_F(MMapFileTest, WritePrivateOnReadOnlyFd) { + SKIP_IF(!FSSupportsMap()); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY)); @@ -921,6 +941,7 @@ TEST_F(MMapFileTest, WritePrivateOnReadOnlyFd) { // MAP_SHARED PROT_WRITE not allowed on read-only FDs. TEST_F(MMapFileTest, WriteSharedOnReadOnlyFd) { + SKIP_IF(!FSSupportsMap()); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY)); @@ -932,6 +953,7 @@ TEST_F(MMapFileTest, WriteSharedOnReadOnlyFd) { // Mmap not allowed on O_PATH FDs. TEST_F(MMapFileTest, MmapFileWithOpath) { + SKIP_IF(!FSSupportsMap()); SKIP_IF(IsRunningWithVFS1()); const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); const FileDescriptor fd = @@ -944,6 +966,7 @@ TEST_F(MMapFileTest, MmapFileWithOpath) { // The FD must be readable. TEST_P(MMapFileParamTest, WriteOnlyFd) { + SKIP_IF(!FSSupportsMap()); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY)); @@ -955,6 +978,7 @@ TEST_P(MMapFileParamTest, WriteOnlyFd) { // Overwriting the contents of a file mapped MAP_SHARED PROT_READ // should cause the new data to be reflected in the mapping. TEST_F(MMapFileTest, ReadSharedConsistentWithOverwrite) { + SKIP_IF(!FSSupportsMap()); // Start from scratch. EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); @@ -994,6 +1018,7 @@ TEST_F(MMapFileTest, ReadSharedConsistentWithOverwrite) { // Partially overwriting a file mapped MAP_SHARED PROT_READ should be reflected // in the mapping. TEST_F(MMapFileTest, ReadSharedConsistentWithPartialOverwrite) { + SKIP_IF(!FSSupportsMap()); // Start from scratch. EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); @@ -1034,6 +1059,7 @@ TEST_F(MMapFileTest, ReadSharedConsistentWithPartialOverwrite) { // Overwriting a file mapped MAP_SHARED PROT_READ should be reflected in the // mapping and the file. TEST_F(MMapFileTest, ReadSharedConsistentWithWriteAndFile) { + SKIP_IF(!FSSupportsMap()); // Start from scratch. EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); @@ -1077,6 +1103,7 @@ TEST_F(MMapFileTest, ReadSharedConsistentWithWriteAndFile) { // Write data to mapped file. TEST_F(MMapFileTest, WriteShared) { + SKIP_IF(!FSSupportsMap()); uintptr_t addr; ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd_.get(), 0), @@ -1101,6 +1128,7 @@ TEST_F(MMapFileTest, WriteShared) { // Write data to portion of mapped page beyond the end of the file. // These writes are not reflected in the file. TEST_F(MMapFileTest, WriteSharedBeyondEnd) { + SKIP_IF(!FSSupportsMap()); // The file is only half of a page. We map an entire page. Writes to the // end of the mapping must not be reflected in the file. uintptr_t addr; @@ -1137,6 +1165,7 @@ TEST_F(MMapFileTest, WriteSharedBeyondEnd) { // The portion of a mapped page that becomes part of the file after a truncate // is reflected in the file. TEST_F(MMapFileTest, WriteSharedTruncateUp) { + SKIP_IF(!FSSupportsMap()); // The file is only half of a page. We map an entire page. Writes to the // end of the mapping must not be reflected in the file. uintptr_t addr; @@ -1174,6 +1203,7 @@ TEST_F(MMapFileTest, WriteSharedTruncateUp) { } TEST_F(MMapFileTest, ReadSharedTruncateDownThenUp) { + SKIP_IF(!FSSupportsMap()); // Start from scratch. EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); @@ -1213,6 +1243,7 @@ TEST_F(MMapFileTest, ReadSharedTruncateDownThenUp) { } TEST_F(MMapFileTest, WriteSharedTruncateDownThenUp) { + SKIP_IF(!FSSupportsMap()); // The file is only half of a page. We map an entire page. Writes to the // end of the mapping must not be reflected in the file. uintptr_t addr; @@ -1247,6 +1278,7 @@ TEST_F(MMapFileTest, WriteSharedTruncateDownThenUp) { } TEST_F(MMapFileTest, ReadSharedTruncateSIGBUS) { + SKIP_IF(!FSSupportsMap()); SetupGvisorDeathTest(); // Start from scratch. @@ -1277,6 +1309,7 @@ TEST_F(MMapFileTest, ReadSharedTruncateSIGBUS) { } TEST_F(MMapFileTest, WriteSharedTruncateSIGBUS) { + SKIP_IF(!FSSupportsMap()); SetupGvisorDeathTest(); uintptr_t addr; @@ -1298,6 +1331,7 @@ TEST_F(MMapFileTest, WriteSharedTruncateSIGBUS) { } TEST_F(MMapFileTest, ReadSharedTruncatePartialPage) { + SKIP_IF(!FSSupportsMap()); // Start from scratch. EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); @@ -1327,6 +1361,7 @@ TEST_F(MMapFileTest, ReadSharedTruncatePartialPage) { // Page can still be accessed and contents are intact after truncating a partial // page. TEST_F(MMapFileTest, WriteSharedTruncatePartialPage) { + SKIP_IF(!FSSupportsMap()); // Expand the file to a full page. EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); @@ -1354,6 +1389,7 @@ TEST_F(MMapFileTest, WriteSharedTruncatePartialPage) { // MAP_PRIVATE writes are not carried through to the underlying file. TEST_F(MMapFileTest, WritePrivate) { + SKIP_IF(!FSSupportsMap()); uintptr_t addr; ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_.get(), 0), @@ -1378,6 +1414,7 @@ TEST_F(MMapFileTest, WritePrivate) { // SIGBUS raised when reading or writing past end of a mapped file. TEST_P(MMapFileParamTest, SigBusDeath) { + SKIP_IF(!FSSupportsMap()); SetupGvisorDeathTest(); uintptr_t addr; @@ -1406,6 +1443,7 @@ TEST_P(MMapFileParamTest, SigBusDeath) { // // See b/27877699. TEST_P(MMapFileParamTest, NoSigBusOnPagesBeforeEOF) { + SKIP_IF(!FSSupportsMap()); uintptr_t addr; ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), SyscallSucceeds()); @@ -1424,6 +1462,7 @@ TEST_P(MMapFileParamTest, NoSigBusOnPagesBeforeEOF) { // Tests that SIGBUS is not raised when reading or writing from a file-mapped // page containing EOF, *after* the EOF. TEST_P(MMapFileParamTest, NoSigBusOnPageContainingEOF) { + SKIP_IF(!FSSupportsMap()); uintptr_t addr; ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), SyscallSucceeds()); @@ -1446,6 +1485,7 @@ TEST_P(MMapFileParamTest, NoSigBusOnPageContainingEOF) { // page cache (which does not yet support writing to shared mappings), a bug // caused reads to fail unnecessarily on such mappings. See b/28913513. TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) { + SKIP_IF(!FSSupportsMap()); uintptr_t addr; size_t len = strlen(kFileContents); @@ -1463,6 +1503,7 @@ TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) { // read past end of file (resulting in a fault in sentry context in the gVisor // case). See b/28913513. TEST_F(MMapFileTest, InternalSigBus) { + SKIP_IF(!FSSupportsMap()); uintptr_t addr; ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_.get(), 0), @@ -1483,6 +1524,7 @@ TEST_F(MMapFileTest, InternalSigBus) { // /dev/zero to a shared mapping (so that the SIGBUS isn't caught during // copy-on-write breaking). TEST_F(MMapFileTest, InternalSigBusZeroing) { + SKIP_IF(!FSSupportsMap()); uintptr_t addr; ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd_.get(), 0), @@ -1578,6 +1620,7 @@ TEST_F(MMapTest, NoReserve) { // Map more than the gVisor page-cache map unit (64k) and ensure that // it is consistent with reading from the file. TEST_F(MMapFileTest, Bug38498194) { + SKIP_IF(!FSSupportsMap()); // Choose a sufficiently large map unit. constexpr int kSize = 4 * 1024 * 1024; EXPECT_THAT(ftruncate(fd_.get(), kSize), SyscallSucceeds()); @@ -1606,6 +1649,7 @@ TEST_F(MMapFileTest, Bug38498194) { // Tests that reading from a file to a memory mapping of the same file does not // deadlock. See b/34813270. TEST_F(MMapFileTest, SelfRead) { + SKIP_IF(!FSSupportsMap()); uintptr_t addr; ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd_.get(), 0), @@ -1618,6 +1662,7 @@ TEST_F(MMapFileTest, SelfRead) { // Tests that writing to a file from a memory mapping of the same file does not // deadlock. Regression test for b/34813270. TEST_F(MMapFileTest, SelfWrite) { + SKIP_IF(!FSSupportsMap()); uintptr_t addr; ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), SyscallSucceeds()); @@ -1633,8 +1678,12 @@ TEST(MMapDeathTest, TruncateAfterCOWBreak) { auto const temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDWR)); ASSERT_THAT(ftruncate(fd.get(), kPageSize), SyscallSucceeds()); - auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(Mmap( - nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0)); + + auto maybe_mapping = Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE, fd.get(), 0); + // Does FS support mmap? + SKIP_IF(maybe_mapping.error().errno_value() == ENODEV); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(std::move(maybe_mapping)); // Write to this mapping, causing the page to be copied for write. memset(mapping.ptr(), 'a', mapping.len()); @@ -1661,8 +1710,12 @@ TEST(MMapNoFixtureTest, MapReadOnlyAfterCreateWriteOnly) { auto const wo_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_WRONLY)); ASSERT_THAT(ftruncate(wo_fd.get(), kPageSize), SyscallSucceeds()); - auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( - Mmap(nullptr, kPageSize, PROT_READ, MAP_SHARED, ro_fd.get(), 0)); + auto maybe_mapping = + Mmap(nullptr, kPageSize, PROT_READ, MAP_SHARED, ro_fd.get(), 0); + // Does FS support mmap? + SKIP_IF(maybe_mapping.error().errno_value() == ENODEV); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(std::move(maybe_mapping)); + std::vector<char> buf(kPageSize); // The test passes if this survives. std::copy(static_cast<char*>(mapping.ptr()), diff --git a/test/syscalls/linux/msgqueue.cc b/test/syscalls/linux/msgqueue.cc index 6804478fd..aaf46625d 100644 --- a/test/syscalls/linux/msgqueue.cc +++ b/test/syscalls/linux/msgqueue.cc @@ -13,14 +13,18 @@ // limitations under the License. #include <errno.h> +#include <signal.h> #include <sys/ipc.h> #include <sys/msg.h> #include <sys/types.h> +#include "absl/synchronization/notification.h" #include "absl/time/clock.h" #include "test/util/capability_util.h" +#include "test/util/signal_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" +#include "test/util/thread_util.h" namespace gvisor { namespace testing { @@ -36,10 +40,16 @@ constexpr int msgMax = 8192; // Maximum number of bytes in a single message. constexpr int msgSsz = 16; // Message segment size. constexpr int msgTql = msgMnb; // Maximum number of messages on all queues. +constexpr int kInterruptSignal = SIGALRM; + // Queue is a RAII class used to automatically clean message queues. class Queue { public: explicit Queue(int id) : id_(id) {} + Queue(const Queue&) = delete; + Queue& operator=(const Queue&) = delete; + + Queue(Queue&& other) { id_ = other.release(); } ~Queue() { if (id_ >= 0) { @@ -59,6 +69,14 @@ class Queue { int id_ = -1; }; +PosixErrorOr<Queue> Msgget(key_t key, int flags) { + int id = msgget(key, flags); + if (id == -1) { + return PosixError(errno, absl::StrFormat("msgget(%d, %d)", key, flags)); + } + return Queue(id); +} + // Default size for messages. constexpr size_t msgSize = 50; @@ -78,14 +96,19 @@ bool operator==(msgbuf& a, msgbuf& b) { return a.mtype == b.mtype; } +// msgmax represents a buffer for the largest possible single message. +struct msgmax { + int64_t mtype; + char mtext[msgMax]; +}; + // Test simple creation and retrieval for msgget(2). TEST(MsgqueueTest, MsgGet) { const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); const key_t key = ftok(keyfile.path().c_str(), 1); ASSERT_THAT(key, SyscallSucceeds()); - Queue queue(msgget(key, IPC_CREAT)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(key, IPC_CREAT)); EXPECT_THAT(msgget(key, 0), SyscallSucceedsWithValue(queue.get())); } @@ -97,27 +120,20 @@ TEST(MsgqueueTest, MsgGetFail) { EXPECT_THAT(msgget(key, 0), SyscallFailsWithErrno(ENOENT)); - Queue queue(msgget(key, IPC_CREAT)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); - + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(key, IPC_CREAT)); EXPECT_THAT(msgget(key, IPC_CREAT | IPC_EXCL), SyscallFailsWithErrno(EEXIST)); } // Test using msgget(2) with IPC_PRIVATE option. TEST(MsgqueueTest, MsgGetIpcPrivate) { - Queue queue1(msgget(IPC_PRIVATE, 0)); - ASSERT_THAT(queue1.get(), SyscallSucceeds()); - - Queue queue2(msgget(IPC_PRIVATE, 0)); - ASSERT_THAT(queue2.get(), SyscallSucceeds()); - + Queue queue1 = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0)); + Queue queue2 = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0)); EXPECT_NE(queue1.get(), queue2.get()); } // Test simple msgsnd and msgrcv. TEST(MsgqueueTest, MsgOpSimple) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgbuf buf{1, "A message."}; msgbuf rcv; @@ -132,8 +148,7 @@ TEST(MsgqueueTest, MsgOpSimple) { // Test msgsnd and msgrcv of an empty message. TEST(MsgqueueTest, MsgOpEmpty) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgbuf buf{1, ""}; msgbuf rcv; @@ -145,8 +160,7 @@ TEST(MsgqueueTest, MsgOpEmpty) { // Test truncation of message with MSG_NOERROR flag. TEST(MsgqueueTest, MsgOpTruncate) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgbuf buf{1, ""}; msgbuf rcv; @@ -160,8 +174,7 @@ TEST(MsgqueueTest, MsgOpTruncate) { // Test msgsnd and msgrcv using invalid arguments. TEST(MsgqueueTest, MsgOpInvalidArgs) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgbuf buf{1, ""}; @@ -178,8 +191,7 @@ TEST(MsgqueueTest, MsgOpInvalidArgs) { // Test non-blocking msgrcv with an empty queue. TEST(MsgqueueTest, MsgOpNoMsg) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgbuf rcv; EXPECT_THAT(msgrcv(queue.get(), &rcv, sizeof(rcv.mtext) + 1, 0, IPC_NOWAIT), @@ -189,8 +201,7 @@ TEST(MsgqueueTest, MsgOpNoMsg) { // Test non-blocking msgrcv with a non-empty queue, but no messages of wanted // type. TEST(MsgqueueTest, MsgOpNoMsgType) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgbuf buf{1, ""}; ASSERT_THAT(msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0), @@ -202,8 +213,7 @@ TEST(MsgqueueTest, MsgOpNoMsgType) { // Test msgrcv with a larger size message than wanted, and truncation disabled. TEST(MsgqueueTest, MsgOpTooBig) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgbuf buf{1, ""}; ASSERT_THAT(msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0), @@ -215,8 +225,7 @@ TEST(MsgqueueTest, MsgOpTooBig) { // Test receiving messages based on type. TEST(MsgqueueTest, MsgRcvType) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); // Send messages in an order and receive them in reverse, based on type, // which shouldn't block. @@ -242,8 +251,7 @@ TEST(MsgqueueTest, MsgRcvType) { // Test using MSG_EXCEPT to receive a different-type message. TEST(MsgqueueTest, MsgExcept) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); std::map<int64_t, msgbuf> typeToBuf = { {1, msgbuf{1, "Message 1."}}, @@ -268,8 +276,7 @@ TEST(MsgqueueTest, MsgExcept) { // Test msgrcv with a negative type. TEST(MsgqueueTest, MsgRcvTypeNegative) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); // When msgtyp is negative, msgrcv returns the first message with mtype less // than or equal to the absolute value. @@ -292,8 +299,7 @@ TEST(MsgqueueTest, MsgRcvTypeNegative) { TEST(MsgqueueTest, MsgOpPermissions) { AutoCapability cap(CAP_IPC_OWNER, false); - Queue queue(msgget(IPC_PRIVATE, 0000)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0000)); msgbuf buf{1, ""}; @@ -305,8 +311,7 @@ TEST(MsgqueueTest, MsgOpPermissions) { // Test limits for messages and queues. TEST(MsgqueueTest, MsgOpLimits) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgbuf buf{1, "A message."}; @@ -315,13 +320,6 @@ TEST(MsgqueueTest, MsgOpLimits) { SyscallFailsWithErrno(EINVAL)); // Limit for queue. - // Use a buffer with the maximum mount of bytes that can be transformed to - // make it easier to exhaust the queue limit. - struct msgmax { - int64_t mtype; - char mtext[msgMax]; - }; - msgmax limit{1, ""}; for (size_t i = 0, msgCount = msgMnb / msgMax; i < msgCount; i++) { EXPECT_THAT(msgsnd(queue.get(), &limit, sizeof(limit.mtext), 0), @@ -341,11 +339,14 @@ bool MsgCopySupported() { // test if errno == ENOSYS. This means that the test will always run on // gVisor, but may be skipped on native linux. - Queue queue(msgget(IPC_PRIVATE, 0600)); - + auto maybe_id = Msgget(IPC_PRIVATE, 0600); + if (!maybe_id.ok()) { + return false; + } + Queue queue(std::move(maybe_id.ValueOrDie())); msgbuf buf{1, "Test message."}; - msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0); + msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0); return !(msgrcv(queue.get(), &buf, sizeof(buf.mtext) + 1, 0, MSG_COPY | IPC_NOWAIT) == -1 && errno == ENOSYS); @@ -355,9 +356,7 @@ bool MsgCopySupported() { TEST(MsgqueueTest, MsgCopy) { SKIP_IF(!MsgCopySupported()); - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); - + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgbuf bufs[5] = { msgbuf{1, "Message 1."}, msgbuf{2, "Message 2."}, msgbuf{3, "Message 3."}, msgbuf{4, "Message 4."}, msgbuf{5, "Message 5."}, @@ -391,9 +390,7 @@ TEST(MsgqueueTest, MsgCopy) { TEST(MsgqueueTest, MsgCopyInvalidArgs) { SKIP_IF(!MsgCopySupported()); - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); - + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgbuf rcv; EXPECT_THAT(msgrcv(queue.get(), &rcv, msgSize, 1, MSG_COPY), SyscallFailsWithErrno(EINVAL)); @@ -407,9 +404,7 @@ TEST(MsgqueueTest, MsgCopyInvalidArgs) { TEST(MsgqueueTest, MsgCopyInvalidIndex) { SKIP_IF(!MsgCopySupported()); - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); - + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgbuf rcv; EXPECT_THAT(msgrcv(queue.get(), &rcv, msgSize, -3, MSG_COPY | IPC_NOWAIT), SyscallFailsWithErrno(ENOMSG)); @@ -420,52 +415,41 @@ TEST(MsgqueueTest, MsgCopyInvalidIndex) { // Test msgrcv (most probably) blocking on an empty queue. TEST(MsgqueueTest, MsgRcvBlocking) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); - + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgbuf buf{1, "A message."}; - const pid_t child_pid = fork(); - if (child_pid == 0) { + ScopedThread t([&] { msgbuf rcv; - TEST_PCHECK(RetryEINTR(msgrcv)(queue.get(), &rcv, sizeof(buf.mtext) + 1, 0, - 0) == sizeof(buf.mtext) && - buf == rcv); - _exit(0); - } + ASSERT_THAT( + RetryEINTR(msgrcv)(queue.get(), &rcv, sizeof(buf.mtext) + 1, 0, 0), + SyscallSucceedsWithValue(sizeof(buf.mtext))); + EXPECT_TRUE(rcv == buf); + }); // Sleep to try and make msgrcv block before sending a message. absl::SleepFor(absl::Milliseconds(150)); EXPECT_THAT(msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0), SyscallSucceeds()); - - int status; - ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), - SyscallSucceedsWithValue(child_pid)); - EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0); } // Test msgrcv (most probably) waiting for a specific-type message. TEST(MsgqueueTest, MsgRcvTypeBlocking) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); - + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgbuf bufs[5] = {{1, "A message."}, {1, "A message."}, {1, "A message."}, {1, "A message."}, {2, "A different message."}}; - const pid_t child_pid = fork(); - if (child_pid == 0) { + ScopedThread t([&] { msgbuf buf = bufs[4]; // Buffer that should be received. msgbuf rcv; - TEST_PCHECK(RetryEINTR(msgrcv)(queue.get(), &rcv, sizeof(buf.mtext) + 1, 2, - 0) == sizeof(buf.mtext) && - buf == rcv); - _exit(0); - } + ASSERT_THAT( + RetryEINTR(msgrcv)(queue.get(), &rcv, sizeof(buf.mtext) + 1, 2, 0), + SyscallSucceedsWithValue(sizeof(buf.mtext))); + EXPECT_TRUE(rcv == buf); + }); // Sleep to try and make msgrcv block before sending messages. absl::SleepFor(absl::Milliseconds(150)); @@ -475,42 +459,29 @@ TEST(MsgqueueTest, MsgRcvTypeBlocking) { EXPECT_THAT(msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0), SyscallSucceeds()); } - - int status; - ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), - SyscallSucceedsWithValue(child_pid)); - EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0); } // Test msgsnd (most probably) blocking on a full queue. TEST(MsgqueueTest, MsgSndBlocking) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); - - // Use a buffer with the maximum mount of bytes that can be transformed to - // make it easier to exhaust the queue limit. - struct msgmax { - int64_t mtype; - char mtext[msgMax]; - }; - + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); msgmax buf{1, ""}; // Has max amount of bytes. const size_t msgCount = msgMnb / msgMax; // Number of messages that can be // sent without blocking. - const pid_t child_pid = fork(); - if (child_pid == 0) { + ScopedThread t([&] { // Fill the queue. for (size_t i = 0; i < msgCount; i++) { - TEST_PCHECK(msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0) == 0); + ASSERT_THAT(msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0), + SyscallSucceeds()); } // Next msgsnd should block. - TEST_PCHECK(RetryEINTR(msgsnd)(queue.get(), &buf, sizeof(buf.mtext), 0) == - 0); - _exit(0); - } + ASSERT_THAT(RetryEINTR(msgsnd)(queue.get(), &buf, sizeof(buf.mtext), 0), + SyscallSucceeds()); + }); + + const DisableSave ds; // Too many syscalls. // To increase the chance of the last msgsnd blocking before doing a msgrcv, // we use MSG_COPY option to copy the last index in the queue. As long as @@ -527,29 +498,16 @@ TEST(MsgqueueTest, MsgSndBlocking) { EXPECT_THAT(msgrcv(queue.get(), &rcv, sizeof(buf.mtext), 0, 0), SyscallSucceedsWithValue(sizeof(buf.mtext))); - - int status; - ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), - SyscallSucceedsWithValue(child_pid)); - EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0); } // Test removing a queue while a blocking msgsnd is executing. TEST(MsgqueueTest, MsgSndRmWhileBlocking) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); - // Use a buffer with the maximum mount of bytes that can be transformed to - // make it easier to exhaust the queue limit. - struct msgmax { - int64_t mtype; - char mtext[msgMax]; - }; + // Number of messages that can be sent without blocking. + const size_t msgCount = msgMnb / msgMax; - const size_t msgCount = msgMnb / msgMax; // Number of messages that can be - // sent without blocking. - const pid_t child_pid = fork(); - if (child_pid == 0) { + ScopedThread t([&] { // Fill the queue. msgmax buf{1, ""}; for (size_t i = 0; i < msgCount; i++) { @@ -559,11 +517,12 @@ TEST(MsgqueueTest, MsgSndRmWhileBlocking) { // Next msgsnd should block. Because we're repeating on EINTR, msgsnd may // race with msgctl(IPC_RMID) and return EINVAL. - TEST_PCHECK(RetryEINTR(msgsnd)(queue.get(), &buf, sizeof(buf.mtext), 0) == - -1 && - (errno == EIDRM || errno == EINVAL)); - _exit(0); - } + EXPECT_THAT(RetryEINTR(msgsnd)(queue.get(), &buf, sizeof(buf.mtext), 0), + SyscallFails()); + EXPECT_TRUE((errno == EIDRM || errno == EINVAL)); + }); + + const DisableSave ds; // Too many syscalls. // Similar to MsgSndBlocking, we do this to increase the chance of msgsnd // blocking before removing the queue. @@ -575,89 +534,165 @@ TEST(MsgqueueTest, MsgSndRmWhileBlocking) { absl::SleepFor(absl::Milliseconds(100)); EXPECT_THAT(msgctl(queue.release(), IPC_RMID, nullptr), SyscallSucceeds()); - - int status; - ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), - SyscallSucceedsWithValue(child_pid)); - EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0); } // Test removing a queue while a blocking msgrcv is executing. TEST(MsgqueueTest, MsgRcvRmWhileBlocking) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); - const pid_t child_pid = fork(); - if (child_pid == 0) { + ScopedThread t([&] { // Because we're repeating on EINTR, msgsnd may race with msgctl(IPC_RMID) // and return EINVAL. msgbuf rcv; - TEST_PCHECK(RetryEINTR(msgrcv)(queue.get(), &rcv, 1, 2, 0) == -1 && - (errno == EIDRM || errno == EINVAL)); - _exit(0); - } + EXPECT_THAT(RetryEINTR(msgrcv)(queue.get(), &rcv, 1, 2, 0), SyscallFails()); + EXPECT_TRUE(errno == EIDRM || errno == EINVAL); + }); // Sleep to try and make msgrcv block before sending messages. absl::SleepFor(absl::Milliseconds(150)); EXPECT_THAT(msgctl(queue.release(), IPC_RMID, nullptr), SyscallSucceeds()); - - int status; - ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), - SyscallSucceedsWithValue(child_pid)); - EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0); } // Test a collection of msgsnd/msgrcv operations in different processes. TEST(MsgqueueTest, MsgOpGeneral) { - Queue queue(msgget(IPC_PRIVATE, 0600)); - ASSERT_THAT(queue.get(), SyscallSucceeds()); + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); + + // Create multiple sending/receiving threads that send messages back and + // forth. There's a matching recv for each send, so by the end of the test, + // all threads should succeed and return. + const std::vector<msgbuf> msgs = { + msgbuf{1, "Message 1."}, msgbuf{2, "Message 2."}, msgbuf{3, "Message 3."}, + msgbuf{4, "Message 4."}, msgbuf{5, "Message 5."}}; - // Create 50 sending, and 50 receiving processes. There are only 5 messages to - // be sent and received, each with a different type. All messages will be sent - // and received equally (10 of each.) By the end of the test all processes - // should unblock and return normally. - const size_t msgCount = 5; - std::map<int64_t, msgbuf> typeToBuf = {{1, msgbuf{1, "Message 1."}}, - {2, msgbuf{2, "Message 2."}}, - {3, msgbuf{3, "Message 3."}}, - {4, msgbuf{4, "Message 4."}}, - {5, msgbuf{5, "Message 5."}}}; - - std::vector<pid_t> children; - - const size_t pCount = 50; - for (size_t i = 1; i <= pCount; i++) { - const pid_t child_pid = fork(); - if (child_pid == 0) { - msgbuf buf = typeToBuf[(i % msgCount) + 1]; + auto receiver = [&](int i) { + return [i, &msgs, &queue]() { + const msgbuf& target = msgs[i]; msgbuf rcv; - TEST_PCHECK(RetryEINTR(msgrcv)(queue.get(), &rcv, sizeof(buf.mtext) + 1, - (i % msgCount) + 1, - 0) == sizeof(buf.mtext) && - buf == rcv); - _exit(0); - } - children.push_back(child_pid); - } + EXPECT_THAT(RetryEINTR(msgrcv)(queue.get(), &rcv, + sizeof(target.mtext) + 1, target.mtype, 0), + SyscallSucceedsWithValue(sizeof(target.mtext))); + EXPECT_EQ(rcv.mtype, target.mtype); + EXPECT_EQ(0, memcmp(rcv.mtext, target.mtext, sizeof(target.mtext))); + }; + }; - for (size_t i = 1; i <= pCount; i++) { - const pid_t child_pid = fork(); - if (child_pid == 0) { - msgbuf buf = typeToBuf[(i % msgCount) + 1]; - TEST_PCHECK(RetryEINTR(msgsnd)(queue.get(), &buf, sizeof(buf.mtext), 0) == - 0); - _exit(0); - } - children.push_back(child_pid); - } + ScopedThread r1(receiver(0)); + ScopedThread r2(receiver(1)); + ScopedThread r3(receiver(2)); + ScopedThread r4(receiver(3)); + ScopedThread r5(receiver(4)); + ScopedThread r6(receiver(0)); + ScopedThread r7(receiver(1)); + ScopedThread r8(receiver(2)); + ScopedThread r9(receiver(3)); + ScopedThread r10(receiver(4)); + + auto sender = [&](int i) { + return [i, &msgs, &queue]() { + const msgbuf& target = msgs[i]; + EXPECT_THAT( + RetryEINTR(msgsnd)(queue.get(), &target, sizeof(target.mtext), 0), + SyscallSucceeds()); + }; + }; - for (auto const& pid : children) { - int status; - ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), - SyscallSucceedsWithValue(pid)); - EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0); + ScopedThread s1(sender(0)); + ScopedThread s2(sender(1)); + ScopedThread s3(sender(2)); + ScopedThread s4(sender(3)); + ScopedThread s5(sender(4)); + ScopedThread s6(sender(0)); + ScopedThread s7(sender(1)); + ScopedThread s8(sender(2)); + ScopedThread s9(sender(3)); + ScopedThread s10(sender(4)); +} + +void empty_sighandler(int sig, siginfo_t* info, void* context) {} + +TEST(MsgqueueTest, InterruptRecv) { + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); + char buf[64]; + + absl::Notification done, exit; + + // Thread calling msgrcv with no corresponding send. It would block forever, + // but we'll interrupt with a signal below. + ScopedThread t([&] { + struct sigaction sa = {}; + sa.sa_sigaction = empty_sighandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + auto cleanup_sigaction = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kInterruptSignal, sa)); + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ScopedSignalMask(SIG_UNBLOCK, kInterruptSignal)); + + EXPECT_THAT(msgrcv(queue.get(), &buf, sizeof(buf), 0, 0), + SyscallFailsWithErrno(EINTR)); + + done.Notify(); + exit.WaitForNotification(); + }); + + const DisableSave ds; // Too many syscalls. + + // We want the signal to arrive while msgrcv is blocking, but not after the + // thread has exited. Signals that arrive before msgrcv are no-ops. + do { + EXPECT_THAT(kill(getpid(), kInterruptSignal), SyscallSucceeds()); + absl::SleepFor(absl::Milliseconds(100)); // Rate limit. + } while (!done.HasBeenNotified()); + + exit.Notify(); + t.Join(); +} + +TEST(MsgqueueTest, InterruptSend) { + Queue queue = ASSERT_NO_ERRNO_AND_VALUE(Msgget(IPC_PRIVATE, 0600)); + msgmax buf{1, ""}; + // Number of messages that can be sent without blocking. + const size_t msgCount = msgMnb / msgMax; + + // Fill the queue. + for (size_t i = 0; i < msgCount; i++) { + ASSERT_THAT(msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0), + SyscallSucceeds()); } + + absl::Notification done, exit; + + // Thread calling msgsnd on a full queue. It would block forever, but we'll + // interrupt with a signal below. + ScopedThread t([&] { + struct sigaction sa = {}; + sa.sa_sigaction = empty_sighandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + auto cleanup_sigaction = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kInterruptSignal, sa)); + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ScopedSignalMask(SIG_UNBLOCK, kInterruptSignal)); + + EXPECT_THAT(msgsnd(queue.get(), &buf, sizeof(buf.mtext), 0), + SyscallFailsWithErrno(EINTR)); + + done.Notify(); + exit.WaitForNotification(); + }); + + const DisableSave ds; // Too many syscalls. + + // We want the signal to arrive while msgsnd is blocking, but not after the + // thread has exited. Signals that arrive before msgsnd are no-ops. + do { + EXPECT_THAT(kill(getpid(), kInterruptSignal), SyscallSucceeds()); + absl::SleepFor(absl::Milliseconds(100)); // Rate limit. + } while (!done.HasBeenNotified()); + + exit.Notify(); + t.Join(); } // Test msgctl with IPC_STAT option. @@ -833,3 +868,16 @@ TEST(MsgqueueTest, MsgCtlMsgInfo) { } // namespace } // namespace testing } // namespace gvisor + +int main(int argc, char** argv) { + // Some tests depend on delivering a signal to the main thread. Block the + // target signal so that any other threads created by TestInit will also have + // the signal blocked. + sigset_t set; + sigemptyset(&set); + sigaddset(&set, gvisor::testing::kInterruptSignal); + TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); + + gvisor::testing::TestInit(&argc, &argv); + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/packet_socket.cc b/test/syscalls/linux/packet_socket.cc index 98339277b..ca4ab0aad 100644 --- a/test/syscalls/linux/packet_socket.cc +++ b/test/syscalls/linux/packet_socket.cc @@ -14,13 +14,13 @@ #include <arpa/inet.h> #include <ifaddrs.h> -#include <linux/capability.h> -#include <linux/if_arp.h> -#include <linux/if_packet.h> #include <net/ethernet.h> +#include <net/if.h> +#include <net/if_arp.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/udp.h> +#include <netpacket/packet.h> #include <poll.h> #include <sys/ioctl.h> #include <sys/socket.h> @@ -31,6 +31,7 @@ #include "absl/base/internal/endian.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/capability_util.h" +#include "test/util/cleanup.h" #include "test/util/file_descriptor.h" #include "test/util/socket_util.h" #include "test/util/test_util.h" @@ -85,7 +86,7 @@ void SendUDPMessage(int sock) { // Send an IP packet and make sure ETH_P_<something else> doesn't pick it up. TEST(BasicCookedPacketTest, WrongType) { - if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + if (!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())) { ASSERT_THAT(socket(AF_PACKET, SOCK_DGRAM, ETH_P_PUP), SyscallFailsWithErrno(EPERM)); GTEST_SKIP(); @@ -123,7 +124,7 @@ class CookedPacketTest : public ::testing::TestWithParam<int> { }; void CookedPacketTest::SetUp() { - if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + if (!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())) { ASSERT_THAT(socket(AF_PACKET, SOCK_DGRAM, htons(GetParam())), SyscallFailsWithErrno(EPERM)); GTEST_SKIP(); @@ -149,7 +150,7 @@ void CookedPacketTest::SetUp() { void CookedPacketTest::TearDown() { // TearDown will be run even if we skip the test. - if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + if (ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())) { EXPECT_THAT(close(socket_), SyscallSucceeds()); } } diff --git a/test/syscalls/linux/packet_socket_raw.cc b/test/syscalls/linux/packet_socket_raw.cc index 07beb8ba0..61714d1da 100644 --- a/test/syscalls/linux/packet_socket_raw.cc +++ b/test/syscalls/linux/packet_socket_raw.cc @@ -13,14 +13,13 @@ // limitations under the License. #include <arpa/inet.h> -#include <linux/capability.h> -#include <linux/filter.h> -#include <linux/if_arp.h> -#include <linux/if_packet.h> #include <net/ethernet.h> +#include <net/if.h> +#include <net/if_arp.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/udp.h> +#include <netpacket/packet.h> #include <poll.h> #include <sys/ioctl.h> #include <sys/socket.h> @@ -32,6 +31,7 @@ #include "absl/base/internal/endian.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/capability_util.h" +#include "test/util/cleanup.h" #include "test/util/file_descriptor.h" #include "test/util/socket_util.h" #include "test/util/test_util.h" @@ -100,7 +100,7 @@ class RawPacketTest : public ::testing::TestWithParam<int> { }; void RawPacketTest::SetUp() { - if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + if (!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())) { ASSERT_THAT(socket(AF_PACKET, SOCK_RAW, htons(GetParam())), SyscallFailsWithErrno(EPERM)); GTEST_SKIP(); @@ -150,7 +150,7 @@ void RawPacketTest::SetUp() { void RawPacketTest::TearDown() { // TearDown will be run even if we skip the test. - if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + if (ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())) { EXPECT_THAT(close(s_), SyscallSucceeds()); } } @@ -340,7 +340,7 @@ TEST_P(RawPacketTest, Send) { // Check that setting SO_RCVBUF below min is clamped to the minimum // receive buffer size. TEST_P(RawPacketTest, SetSocketRecvBufBelowMin) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())); // Discover minimum receive buf size by trying to set it to zero. // See: @@ -373,7 +373,7 @@ TEST_P(RawPacketTest, SetSocketRecvBufBelowMin) { // Check that setting SO_RCVBUF above max is clamped to the maximum // receive buffer size. TEST_P(RawPacketTest, SetSocketRecvBufAboveMax) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())); // Discover max buf size by trying to set the largest possible buffer size. constexpr int kRcvBufSz = 0xffffffff; @@ -400,7 +400,7 @@ TEST_P(RawPacketTest, SetSocketRecvBufAboveMax) { // Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored. TEST_P(RawPacketTest, SetSocketRecvBuf) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())); int max = 0; int min = 0; @@ -449,7 +449,7 @@ TEST_P(RawPacketTest, SetSocketRecvBuf) { // Check that setting SO_SNDBUF below min is clamped to the minimum // receive buffer size. TEST_P(RawPacketTest, SetSocketSendBufBelowMin) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())); // Discover minimum buffer size by trying to set it to zero. constexpr int kSndBufSz = 0; @@ -480,7 +480,7 @@ TEST_P(RawPacketTest, SetSocketSendBufBelowMin) { // Check that setting SO_SNDBUF above max is clamped to the maximum // send buffer size. TEST_P(RawPacketTest, SetSocketSendBufAboveMax) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())); // Discover maximum buffer size by trying to set it to a large value. constexpr int kSndBufSz = 0xffffffff; @@ -507,7 +507,7 @@ TEST_P(RawPacketTest, SetSocketSendBufAboveMax) { // Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored. TEST_P(RawPacketTest, SetSocketSendBuf) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())); int max = 0; int min = 0; @@ -551,7 +551,7 @@ TEST_P(RawPacketTest, SetSocketSendBuf) { } TEST_P(RawPacketTest, GetSocketError) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())); int val = 0; socklen_t val_len = sizeof(val); @@ -561,7 +561,7 @@ TEST_P(RawPacketTest, GetSocketError) { } TEST_P(RawPacketTest, GetSocketErrorBind) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())); { // Bind to the loopback device. @@ -627,7 +627,7 @@ TEST_P(RawPacketTest, SetSocketDetachFilterNoInstalledFilter) { } TEST_P(RawPacketTest, GetSocketDetachFilter) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())); int val = 0; socklen_t val_len = sizeof(val); @@ -636,7 +636,7 @@ TEST_P(RawPacketTest, GetSocketDetachFilter) { } TEST_P(RawPacketTest, SetAndGetSocketLinger) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())); int level = SOL_SOCKET; int type = SO_LINGER; @@ -657,7 +657,7 @@ TEST_P(RawPacketTest, SetAndGetSocketLinger) { } TEST_P(RawPacketTest, GetSocketAcceptConn) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())); int got = -1; socklen_t length = sizeof(got); @@ -673,7 +673,7 @@ INSTANTIATE_TEST_SUITE_P(AllInetTests, RawPacketTest, class RawPacketMsgSizeTest : public ::testing::TestWithParam<TestAddress> {}; TEST_P(RawPacketMsgSizeTest, SendTooLong) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())); TestAddress addr = GetParam().WithPort(kPort); @@ -690,8 +690,11 @@ TEST_P(RawPacketMsgSizeTest, SendTooLong) { SyscallFailsWithErrno(EMSGSIZE)); } +// TODO(https://fxbug.dev/76957): Run this test on Fuchsia once splice is +// available. +#ifndef __Fuchsia__ TEST_P(RawPacketMsgSizeTest, SpliceTooLong) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HavePacketSocketCapability())); const char buf[65536] = {}; int fds[2]; @@ -718,6 +721,7 @@ TEST_P(RawPacketMsgSizeTest, SpliceTooLong) { EXPECT_THAT(n, SyscallSucceedsWithValue(sizeof(buf))); } } +#endif // __Fuchsia__ INSTANTIATE_TEST_SUITE_P(AllRawPacketMsgSizeTest, RawPacketMsgSizeTest, ::testing::Values(V4Loopback(), V6Loopback())); diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc index 0bba86846..209801e36 100644 --- a/test/syscalls/linux/pipe.cc +++ b/test/syscalls/linux/pipe.cc @@ -13,11 +13,13 @@ // limitations under the License. #include <fcntl.h> /* Obtain O_* constant definitions */ +#include <linux/futex.h> #include <linux/magic.h> #include <signal.h> #include <sys/ioctl.h> #include <sys/statfs.h> #include <sys/uio.h> +#include <syscall.h> #include <unistd.h> #include <vector> @@ -50,6 +52,9 @@ std::atomic<int> global_num_signals_received = 0; void SigRecordingHandler(int signum, siginfo_t* siginfo, void* unused_ucontext) { global_num_signals_received++; + ASSERT_THAT(syscall(SYS_futex, &global_num_signals_received, + FUTEX_WAKE | FUTEX_PRIVATE_FLAG, INT_MAX, 0, 0, 0), + SyscallSucceeds()); } PosixErrorOr<Cleanup> RegisterSignalHandler(int signum) { @@ -61,11 +66,14 @@ PosixErrorOr<Cleanup> RegisterSignalHandler(int signum) { return ScopedSigaction(signum, handler); } -void WaitForSignalDelivery(absl::Duration timeout, int max_expected) { - absl::Time wait_start = absl::Now(); - while (global_num_signals_received < max_expected && - absl::Now() - wait_start < timeout) { - absl::SleepFor(absl::Milliseconds(10)); +void WaitForSignalDelivery(int expected) { + while (1) { + int v = global_num_signals_received; + if (v >= expected) { + break; + } + RetryEINTR(syscall)(SYS_futex, &global_num_signals_received, + FUTEX_WAIT | FUTEX_PRIVATE_FLAG, v, 0, 0, 0); } } @@ -371,7 +379,7 @@ TEST_P(PipeTest, ReaderSideCloses) { EXPECT_THAT(write(wfd_.get(), &buf, sizeof(buf)), SyscallFailsWithErrno(EPIPE)); - WaitForSignalDelivery(absl::Seconds(1), 1); + WaitForSignalDelivery(1); ASSERT_EQ(global_num_signals_received, 1); } @@ -411,7 +419,7 @@ TEST_P(PipeTest, BlockWriteClosed) { notify.WaitForNotification(); ASSERT_THAT(close(rfd_.release()), SyscallSucceeds()); - WaitForSignalDelivery(absl::Seconds(1), 1); + WaitForSignalDelivery(1); ASSERT_EQ(global_num_signals_received, 1); t.Join(); @@ -443,7 +451,7 @@ TEST_P(PipeTest, BlockPartialWriteClosed) { // Unblock the above. ASSERT_THAT(close(rfd_.release()), SyscallSucceeds()); - WaitForSignalDelivery(absl::Seconds(1), 2); + WaitForSignalDelivery(2); ASSERT_EQ(global_num_signals_received, 2); t.Join(); diff --git a/test/syscalls/linux/prctl.cc b/test/syscalls/linux/prctl.cc index 25b0e63d4..286b3d168 100644 --- a/test/syscalls/linux/prctl.cc +++ b/test/syscalls/linux/prctl.cc @@ -214,6 +214,12 @@ TEST(PrctlTest, RootDumpability) { SyscallFailsWithErrno(EINVAL)); } +TEST(PrctlTest, SetGetSubreaper) { + // Setting subreaper on PID 1 works vacuously because PID 1 is always a + // subreaper. + EXPECT_THAT(prctl(PR_SET_CHILD_SUBREAPER, 1), SyscallSucceeds()); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc index 4cbe30fc1..162c0b665 100644 --- a/test/syscalls/linux/proc_net.cc +++ b/test/syscalls/linux/proc_net.cc @@ -152,6 +152,22 @@ TEST(ProcNetDev, Format) { EXPECT_GT(entries.size(), 0); } +// GetMibsAllocationSysctl retuns a value of the net.core.mibs_allocation +// sysctl./proc/sys/net/core/mibs_allocation +// +// When mibs_allocation is unset, a netns creation inherits MIB from init +// network namespace. Otherwise, MIBS is allocated for each namespace. +int GetMibsAllocationSysctl() { + auto ret = GetContents("/proc/sys/net/core/mibs_allocation"); + if (!ret.ok()) { + // The current kernel doesn't support mibs_allocation. + return 1; + } + int32_t val; + EXPECT_TRUE(absl::SimpleAtoi(ret.ValueOrDie(), &val)); + return val; +} + PosixErrorOr<uint64_t> GetSNMPMetricFromProc(const std::string snmp, const std::string& type, const std::string& item) { @@ -226,12 +242,21 @@ TEST(ProcNetSnmp, TcpReset) { newAttemptFails = ASSERT_NO_ERRNO_AND_VALUE( GetSNMPMetricFromProc(snmp, "Tcp", "AttemptFails")); - EXPECT_EQ(oldActiveOpens, newActiveOpens - 1); - EXPECT_EQ(oldOutRsts, newOutRsts - 1); - EXPECT_EQ(oldAttemptFails, newAttemptFails - 1); + if (GetMibsAllocationSysctl()) { + EXPECT_EQ(oldActiveOpens, newActiveOpens - 1); + EXPECT_EQ(oldOutRsts, newOutRsts - 1); + EXPECT_EQ(oldAttemptFails, newAttemptFails - 1); + } else { + // System-wide statistics can have some noise. + EXPECT_LE(oldOutRsts, newOutRsts - 1); + EXPECT_LE(oldAttemptFails, newAttemptFails - 1); + } } TEST(ProcNetSnmp, TcpEstab) { + // System-wide statistics can have some noise. + SKIP_IF(GetMibsAllocationSysctl() == 0); + // TODO(gvisor.dev/issue/866): epsocket metrics are not savable. DisableSave ds; @@ -355,8 +380,14 @@ TEST(ProcNetSnmp, UdpNoPorts) { newNoPorts = ASSERT_NO_ERRNO_AND_VALUE(GetSNMPMetricFromProc(snmp, "Udp", "NoPorts")); - EXPECT_EQ(oldOutDatagrams, newOutDatagrams - 1); - EXPECT_EQ(oldNoPorts, newNoPorts - 1); + if (GetMibsAllocationSysctl()) { + EXPECT_EQ(oldOutDatagrams, newOutDatagrams - 1); + EXPECT_EQ(oldNoPorts, newNoPorts - 1); + } else { + // System-wide statistics can have some noise. + EXPECT_LE(oldOutDatagrams, newOutDatagrams - 1); + EXPECT_LE(oldNoPorts, newNoPorts - 1); + } } TEST(ProcNetSnmp, UdpIn) { @@ -405,8 +436,14 @@ TEST(ProcNetSnmp, UdpIn) { newInDatagrams = ASSERT_NO_ERRNO_AND_VALUE( GetSNMPMetricFromProc(snmp, "Udp", "InDatagrams")); - EXPECT_EQ(oldOutDatagrams, newOutDatagrams - 1); - EXPECT_EQ(oldInDatagrams, newInDatagrams - 1); + if (GetMibsAllocationSysctl()) { + EXPECT_EQ(oldOutDatagrams, newOutDatagrams - 1); + EXPECT_EQ(oldInDatagrams, newInDatagrams - 1); + } else { + // System-wide statistics can have some noise. + EXPECT_LE(oldOutDatagrams, newOutDatagrams - 1); + EXPECT_LE(oldInDatagrams, newInDatagrams - 1); + } } TEST(ProcNetSnmp, CheckNetStat) { diff --git a/test/syscalls/linux/raw_socket.cc b/test/syscalls/linux/raw_socket.cc index e19fe8f6b..66f0e6ca4 100644 --- a/test/syscalls/linux/raw_socket.cc +++ b/test/syscalls/linux/raw_socket.cc @@ -13,8 +13,6 @@ // limitations under the License. #include <arpa/inet.h> -#include <linux/capability.h> -#include <linux/filter.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip6.h> @@ -99,7 +97,7 @@ class RawSocketTest : public ::testing::TestWithParam<std::tuple<int, int>> { }; void RawSocketTest::SetUp() { - if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + if (!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) { ASSERT_THAT(socket(Family(), SOCK_RAW, Protocol()), SyscallFailsWithErrno(EPERM)); GTEST_SKIP(); @@ -123,7 +121,7 @@ void RawSocketTest::SetUp() { void RawSocketTest::TearDown() { // TearDown will be run even if we skip the test. - if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) { EXPECT_THAT(close(s_), SyscallSucceeds()); } } @@ -132,7 +130,7 @@ void RawSocketTest::TearDown() { // BasicRawSocket::Setup creates the first one, so we only have to create one // more here. TEST_P(RawSocketTest, MultipleCreation) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); int s2; ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); @@ -142,7 +140,7 @@ TEST_P(RawSocketTest, MultipleCreation) { // Test that shutting down an unconnected socket fails. TEST_P(RawSocketTest, FailShutdownWithoutConnect) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); @@ -150,7 +148,7 @@ TEST_P(RawSocketTest, FailShutdownWithoutConnect) { // Shutdown is a no-op for raw sockets (and datagram sockets in general). TEST_P(RawSocketTest, ShutdownWriteNoop) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); ASSERT_THAT( connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), @@ -165,7 +163,7 @@ TEST_P(RawSocketTest, ShutdownWriteNoop) { // Shutdown is a no-op for raw sockets (and datagram sockets in general). TEST_P(RawSocketTest, ShutdownReadNoop) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); ASSERT_THAT( connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), @@ -182,14 +180,14 @@ TEST_P(RawSocketTest, ShutdownReadNoop) { // Test that listen() fails. TEST_P(RawSocketTest, FailListen) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); ASSERT_THAT(listen(s_, 1), SyscallFailsWithErrno(ENOTSUP)); } // Test that accept() fails. TEST_P(RawSocketTest, FailAccept) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); struct sockaddr saddr; socklen_t addrlen; @@ -197,7 +195,7 @@ TEST_P(RawSocketTest, FailAccept) { } TEST_P(RawSocketTest, BindThenGetSockName) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_); ASSERT_THAT(bind(s_, addr, AddrLen()), SyscallSucceeds()); @@ -221,7 +219,7 @@ TEST_P(RawSocketTest, BindThenGetSockName) { } TEST_P(RawSocketTest, ConnectThenGetSockName) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_); ASSERT_THAT(connect(s_, addr, AddrLen()), SyscallSucceeds()); @@ -246,7 +244,7 @@ TEST_P(RawSocketTest, ConnectThenGetSockName) { // Test that getpeername() returns nothing before connect(). TEST_P(RawSocketTest, FailGetPeerNameBeforeConnect) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); struct sockaddr saddr; socklen_t addrlen = sizeof(saddr); @@ -256,7 +254,7 @@ TEST_P(RawSocketTest, FailGetPeerNameBeforeConnect) { // Test that getpeername() returns something after connect(). TEST_P(RawSocketTest, GetPeerName) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); ASSERT_THAT( connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), @@ -270,7 +268,7 @@ TEST_P(RawSocketTest, GetPeerName) { // Test that the socket is writable immediately. TEST_P(RawSocketTest, PollWritableImmediately) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); struct pollfd pfd = {}; pfd.fd = s_; @@ -280,7 +278,7 @@ TEST_P(RawSocketTest, PollWritableImmediately) { // Test that the socket isn't readable before receiving anything. TEST_P(RawSocketTest, PollNotReadableInitially) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); // Try to receive data with MSG_DONTWAIT, which returns immediately if there's // nothing to be read. @@ -291,7 +289,7 @@ TEST_P(RawSocketTest, PollNotReadableInitially) { // Test that the socket becomes readable once something is written to it. TEST_P(RawSocketTest, PollTriggeredOnWrite) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); // Write something so that there's data to be read. // Arbitrary. @@ -306,7 +304,7 @@ TEST_P(RawSocketTest, PollTriggeredOnWrite) { // Test that we can connect() to a valid IP (loopback). TEST_P(RawSocketTest, ConnectToLoopback) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); ASSERT_THAT( connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), @@ -315,7 +313,7 @@ TEST_P(RawSocketTest, ConnectToLoopback) { // Test that calling send() without connect() fails. TEST_P(RawSocketTest, SendWithoutConnectFails) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); // Arbitrary. constexpr char kBuf[] = "Endgame was good"; @@ -325,7 +323,7 @@ TEST_P(RawSocketTest, SendWithoutConnectFails) { // Wildcard Bind. TEST_P(RawSocketTest, BindToWildcard) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); struct sockaddr_storage addr; addr = {}; @@ -346,16 +344,15 @@ TEST_P(RawSocketTest, BindToWildcard) { // Bind to localhost. TEST_P(RawSocketTest, BindToLocalhost) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); - ASSERT_THAT( - bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), - SyscallSucceeds()); + ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); } // Bind to a different address. TEST_P(RawSocketTest, BindToInvalid) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); struct sockaddr_storage bind_addr = addr_; if (Family() == AF_INET) { @@ -367,13 +364,14 @@ TEST_P(RawSocketTest, BindToInvalid) { memset(&sin6->sin6_addr.s6_addr, 0, sizeof(sin6->sin6_addr.s6_addr)); sin6->sin6_addr.s6_addr[0] = 1; // 1: - An address that we can't bind to. } - ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr), - AddrLen()), SyscallFailsWithErrno(EADDRNOTAVAIL)); + ASSERT_THAT( + bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr), AddrLen()), + SyscallFailsWithErrno(EADDRNOTAVAIL)); } // Send and receive an packet. TEST_P(RawSocketTest, SendAndReceive) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); // Arbitrary. constexpr char kBuf[] = "TB12"; @@ -388,7 +386,7 @@ TEST_P(RawSocketTest, SendAndReceive) { // We should be able to create multiple raw sockets for the same protocol and // receive the same packet on both. TEST_P(RawSocketTest, MultipleSocketReceive) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); int s2; ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); @@ -403,11 +401,11 @@ TEST_P(RawSocketTest, MultipleSocketReceive) { // Receive it on socket 2. std::vector<char> recv_buf2(sizeof(kBuf) + HdrLen()); - ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s2, recv_buf2.data(), - recv_buf2.size())); + ASSERT_NO_FATAL_FAILURE( + ReceiveBufFrom(s2, recv_buf2.data(), recv_buf2.size())); - EXPECT_EQ(memcmp(recv_buf1.data() + HdrLen(), - recv_buf2.data() + HdrLen(), sizeof(kBuf)), + EXPECT_EQ(memcmp(recv_buf1.data() + HdrLen(), recv_buf2.data() + HdrLen(), + sizeof(kBuf)), 0); ASSERT_THAT(close(s2), SyscallSucceeds()); @@ -415,7 +413,7 @@ TEST_P(RawSocketTest, MultipleSocketReceive) { // Test that connect sends packets to the right place. TEST_P(RawSocketTest, SendAndReceiveViaConnect) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); ASSERT_THAT( connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), @@ -434,11 +432,10 @@ TEST_P(RawSocketTest, SendAndReceiveViaConnect) { // Bind to localhost, then send and receive packets. TEST_P(RawSocketTest, BindSendAndReceive) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); - ASSERT_THAT( - bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), - SyscallSucceeds()); + ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); // Arbitrary. constexpr char kBuf[] = "DR16"; @@ -452,11 +449,10 @@ TEST_P(RawSocketTest, BindSendAndReceive) { // Bind and connect to localhost and send/receive packets. TEST_P(RawSocketTest, BindConnectSendAndReceive) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); - ASSERT_THAT( - bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), - SyscallSucceeds()); + ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); ASSERT_THAT( connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), SyscallSucceeds()); @@ -474,7 +470,7 @@ TEST_P(RawSocketTest, BindConnectSendAndReceive) { // Check that setting SO_RCVBUF below min is clamped to the minimum // receive buffer size. TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); // Discover minimum receive buf size by trying to set it to zero. // See: @@ -507,7 +503,7 @@ TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) { // Check that setting SO_RCVBUF above max is clamped to the maximum // receive buffer size. TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); // Discover max buf size by trying to set the largest possible buffer size. constexpr int kRcvBufSz = 0xffffffff; @@ -534,7 +530,7 @@ TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) { // Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored. TEST_P(RawSocketTest, SetSocketRecvBuf) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); int max = 0; int min = 0; @@ -584,7 +580,7 @@ TEST_P(RawSocketTest, SetSocketRecvBuf) { // Check that setting SO_SNDBUF below min is clamped to the minimum // receive buffer size. TEST_P(RawSocketTest, SetSocketSendBufBelowMin) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); // Discover minimum buffer size by trying to set it to zero. constexpr int kSndBufSz = 0; @@ -615,7 +611,7 @@ TEST_P(RawSocketTest, SetSocketSendBufBelowMin) { // Check that setting SO_SNDBUF above max is clamped to the maximum // send buffer size. TEST_P(RawSocketTest, SetSocketSendBufAboveMax) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); // Discover maximum buffer size by trying to set it to a large value. constexpr int kSndBufSz = 0xffffffff; @@ -642,7 +638,7 @@ TEST_P(RawSocketTest, SetSocketSendBufAboveMax) { // Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored. TEST_P(RawSocketTest, SetSocketSendBuf) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); int max = 0; int min = 0; @@ -688,11 +684,10 @@ TEST_P(RawSocketTest, SetSocketSendBuf) { // Test that receive buffer limits are not enforced when the recv buffer is // empty. TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); - ASSERT_THAT( - bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), - SyscallSucceeds()); + ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); ASSERT_THAT( connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), SyscallSucceeds()); @@ -719,9 +714,7 @@ TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) { // Receive the packet and make sure it's identical. std::vector<char> recv_buf(buf.size() + HdrLen()); ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); - EXPECT_EQ( - memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), - 0); + EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 0); } { @@ -734,9 +727,7 @@ TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) { // Receive the packet and make sure it's identical. std::vector<char> recv_buf(buf.size() + HdrLen()); ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); - EXPECT_EQ( - memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), - 0); + EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 0); } } @@ -750,11 +741,10 @@ TEST_P(RawSocketTest, RecvBufLimits) { if (Protocol() == IPPROTO_TCP) { return; } - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); - ASSERT_THAT( - bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), - SyscallSucceeds()); + ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); ASSERT_THAT( connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), SyscallSucceeds()); @@ -814,9 +804,7 @@ TEST_P(RawSocketTest, RecvBufLimits) { // Receive the packet and make sure it's identical. std::vector<char> recv_buf(buf.size() + HdrLen()); ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); - EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), - buf.size()), - 0); + EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 0); } // Assert that the last packet is dropped because the receive buffer should @@ -885,7 +873,7 @@ TEST_P(RawSocketTest, GetSocketDetachFilter) { // AF_INET6+SOCK_RAW+IPPROTO_RAW sockets can be created, but not written to. TEST(RawSocketTest, IPv6ProtoRaw) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); int sock; ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW), @@ -902,7 +890,7 @@ TEST(RawSocketTest, IPv6ProtoRaw) { } TEST(RawSocketTest, IPv6SendMsg) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); int sock; ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_TCP), @@ -930,7 +918,7 @@ TEST(RawSocketTest, IPv6SendMsg) { } TEST_P(RawSocketTest, ConnectOnIPv6Socket) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); int sock; ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_TCP), diff --git a/test/syscalls/linux/raw_socket_hdrincl.cc b/test/syscalls/linux/raw_socket_hdrincl.cc index f1d8fd295..d45bd07bc 100644 --- a/test/syscalls/linux/raw_socket_hdrincl.cc +++ b/test/syscalls/linux/raw_socket_hdrincl.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include <linux/capability.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip_icmp.h> @@ -63,7 +62,7 @@ class RawHDRINCL : public ::testing::Test { }; void RawHDRINCL::SetUp() { - if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + if (!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) { ASSERT_THAT(socket(AF_INET, SOCK_RAW, IPPROTO_RAW), SyscallFailsWithErrno(EPERM)); GTEST_SKIP(); @@ -81,7 +80,7 @@ void RawHDRINCL::SetUp() { void RawHDRINCL::TearDown() { // TearDown will be run even if we skip the test. - if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) { EXPECT_THAT(close(socket_), SyscallSucceeds()); } } diff --git a/test/syscalls/linux/raw_socket_icmp.cc b/test/syscalls/linux/raw_socket_icmp.cc index 27d3fffee..3f9717284 100644 --- a/test/syscalls/linux/raw_socket_icmp.cc +++ b/test/syscalls/linux/raw_socket_icmp.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include <linux/capability.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip_icmp.h> @@ -77,7 +76,7 @@ class RawSocketICMPTest : public ::testing::Test { }; void RawSocketICMPTest::SetUp() { - if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + if (!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) { ASSERT_THAT(socket(AF_INET, SOCK_RAW, IPPROTO_ICMP), SyscallFailsWithErrno(EPERM)); GTEST_SKIP(); @@ -95,7 +94,7 @@ void RawSocketICMPTest::SetUp() { void RawSocketICMPTest::TearDown() { // TearDown will be run even if we skip the test. - if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) { EXPECT_THAT(close(s_), SyscallSucceeds()); } } @@ -103,7 +102,7 @@ void RawSocketICMPTest::TearDown() { // We'll only read an echo in this case, as the kernel won't respond to the // malformed ICMP checksum. TEST_F(RawSocketICMPTest, SendAndReceiveBadChecksum) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); // Prepare and send an ICMP packet. Use arbitrary junk for checksum, sequence, // and ID. None of that should matter for raw sockets - the kernel should @@ -132,7 +131,7 @@ TEST_F(RawSocketICMPTest, SendAndReceiveBadChecksum) { // Send and receive an ICMP packet. TEST_F(RawSocketICMPTest, SendAndReceive) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); // Prepare and send an ICMP packet. Use arbitrary junk for sequence and ID. // None of that should matter for raw sockets - the kernel should still give @@ -152,7 +151,7 @@ TEST_F(RawSocketICMPTest, SendAndReceive) { // We should be able to create multiple raw sockets for the same protocol and // receive the same packet on both. TEST_F(RawSocketICMPTest, MultipleSocketReceive) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); FileDescriptor s2 = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)); @@ -215,7 +214,7 @@ TEST_F(RawSocketICMPTest, MultipleSocketReceive) { // A raw ICMP socket and ping socket should both receive the ICMP packets // intended for the ping socket. TEST_F(RawSocketICMPTest, RawAndPingSockets) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); FileDescriptor ping_sock = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP)); @@ -265,7 +264,7 @@ TEST_F(RawSocketICMPTest, RawAndPingSockets) { // while a ping socket should not. Neither should be able to receieve a short // malformed packet. TEST_F(RawSocketICMPTest, ShortEchoRawAndPingSockets) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); FileDescriptor ping_sock = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP)); @@ -306,7 +305,7 @@ TEST_F(RawSocketICMPTest, ShortEchoRawAndPingSockets) { // while ping socket should not. // Neither should be able to receieve a short malformed packet. TEST_F(RawSocketICMPTest, ShortEchoReplyRawAndPingSockets) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); FileDescriptor ping_sock = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP)); @@ -345,7 +344,7 @@ TEST_F(RawSocketICMPTest, ShortEchoReplyRawAndPingSockets) { // Test that connect() sends packets to the right place. TEST_F(RawSocketICMPTest, SendAndReceiveViaConnect) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); ASSERT_THAT( connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)), @@ -369,7 +368,7 @@ TEST_F(RawSocketICMPTest, SendAndReceiveViaConnect) { // Bind to localhost, then send and receive packets. TEST_F(RawSocketICMPTest, BindSendAndReceive) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); ASSERT_THAT( bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)), @@ -392,7 +391,7 @@ TEST_F(RawSocketICMPTest, BindSendAndReceive) { // Bind and connect to localhost and send/receive packets. TEST_F(RawSocketICMPTest, BindConnectSendAndReceive) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); ASSERT_THAT( bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)), @@ -418,7 +417,7 @@ TEST_F(RawSocketICMPTest, BindConnectSendAndReceive) { // Set and get SO_LINGER. TEST_F(RawSocketICMPTest, SetAndGetSocketLinger) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); int level = SOL_SOCKET; int type = SO_LINGER; @@ -440,7 +439,7 @@ TEST_F(RawSocketICMPTest, SetAndGetSocketLinger) { // Test getsockopt for SO_ACCEPTCONN. TEST_F(RawSocketICMPTest, GetSocketAcceptConn) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); int got = -1; socklen_t length = sizeof(got); diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc index 72f888659..19dc80d0c 100644 --- a/test/syscalls/linux/stat.cc +++ b/test/syscalls/linux/stat.cc @@ -765,7 +765,7 @@ TEST_F(StatTest, StatxSymlink) { SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && errno == ENOSYS); - std::string parent_dir = "/tmp"; + std::string parent_dir = GetAbsoluteTestTmpdir(); TempPath link = ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateSymlinkTo(parent_dir, test_file_name_)); std::string p = link.path(); diff --git a/test/syscalls/linux/statfs.cc b/test/syscalls/linux/statfs.cc index d4ea8e026..d057cdc09 100644 --- a/test/syscalls/linux/statfs.cc +++ b/test/syscalls/linux/statfs.cc @@ -28,7 +28,7 @@ namespace testing { namespace { TEST(StatfsTest, CannotStatBadPath) { - auto temp_file = NewTempAbsPathInDir("/tmp"); + auto temp_file = NewTempAbsPath(); struct statfs st; EXPECT_THAT(statfs(temp_file.c_str(), &st), SyscallFailsWithErrno(ENOENT)); diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index cb77986c2..3fbbf1423 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -2088,6 +2088,66 @@ TEST_P(SimpleTcpSocketTest, ConnectUnspecifiedAddress) { } } +// Tests that send will return EWOULDBLOCK initially with large buffer and will +// succeed after the send buffer size is increased. +TEST_P(TcpSocketTest, SendUnblocksOnSendBufferIncrease) { + // Set the FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(first_fd, F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(first_fd, F_SETFL, opts), SyscallSucceeds()); + + // Get maximum buffer size by trying to set it to a large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT(setsockopt(first_fd, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, + sizeof(kSndBufSz)), + SyscallSucceeds()); + + int max_buffer_sz = 0; + socklen_t max_len = sizeof(max_buffer_sz); + ASSERT_THAT( + getsockopt(first_fd, SOL_SOCKET, SO_SNDBUF, &max_buffer_sz, &max_len), + SyscallSucceeds()); + + int buffer_sz = max_buffer_sz >> 2; + EXPECT_THAT(setsockopt(first_fd, SOL_SOCKET, SO_SNDBUF, &buffer_sz, + sizeof(buffer_sz)), + SyscallSucceedsWithValue(0)); + + // Create a large buffer that will be used for sending. + std::vector<char> buffer(max_buffer_sz); + + // Write until we receive an error. + while (RetryEINTR(send)(first_fd, buffer.data(), buffer.size(), 0) != -1) { + // Sleep to give linux a chance to move data from the send buffer to the + // receive buffer. + usleep(10000); // 10ms. + } + + // The last error should have been EWOULDBLOCK. + ASSERT_EQ(errno, EWOULDBLOCK); + + ScopedThread send_thread([this]() { + int flags = 0; + ASSERT_THAT(flags = fcntl(first_fd, F_GETFL), SyscallSucceeds()); + EXPECT_THAT(fcntl(first_fd, F_SETFL, flags & ~O_NONBLOCK), + SyscallSucceeds()); + + // Expect the send() to succeed. + char buffer; + ASSERT_THAT(RetryEINTR(send)(first_fd, &buffer, sizeof(buffer), 0), + SyscallSucceeds()); + }); + + // Set SO_SNDBUF to maximum buffer size allowed. + buffer_sz = max_buffer_sz >> 1; + EXPECT_THAT(setsockopt(first_fd, SOL_SOCKET, SO_SNDBUF, &buffer_sz, + sizeof(buffer_sz)), + SyscallSucceedsWithValue(0)); + + send_thread.Join(); +} + INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest, ::testing::Values(AF_INET, AF_INET6)); diff --git a/test/util/BUILD b/test/util/BUILD index 4a4401ba8..b92af1c27 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -8,13 +8,20 @@ package( cc_library( name = "capability_util", testonly = 1, - srcs = ["capability_util.cc"], - hdrs = ["capability_util.h"], + srcs = [ + "fuchsia_capability_util.cc", + "linux_capability_util.cc", + ], + hdrs = [ + "capability_util.h", + "linux_capability_util.h", + ], deps = [ ":cleanup", ":memory_util", ":posix_error", ":save_util", + ":socket_util", ":test_util", "@com_google_absl//absl/strings", ], diff --git a/test/util/capability_util.h b/test/util/capability_util.h index c4b0feade..318a43feb 100644 --- a/test/util/capability_util.h +++ b/test/util/capability_util.h @@ -17,112 +17,30 @@ #ifndef GVISOR_TEST_UTIL_CAPABILITY_UTIL_H_ #define GVISOR_TEST_UTIL_CAPABILITY_UTIL_H_ -#ifdef __linux__ - -#include <errno.h> -#include <linux/capability.h> -#include <sys/syscall.h> -#include <unistd.h> - -#include "test/util/cleanup.h" -#include "test/util/posix_error.h" -#include "test/util/save_util.h" -#include "test/util/test_util.h" - -#ifndef _LINUX_CAPABILITY_VERSION_3 -#error Expecting _LINUX_CAPABILITY_VERSION_3 support +#if defined(__Fuchsia__) +// Nothing to include. +#elif defined(__linux__) +#include "test/util/linux_capability_util.h" +#else +#error "Unhandled platform" #endif namespace gvisor { namespace testing { -// HaveCapability returns true if the process has the specified EFFECTIVE -// capability. -inline PosixErrorOr<bool> HaveCapability(int cap) { - if (!cap_valid(cap)) { - return PosixError(EINVAL, "Invalid capability"); - } - - struct __user_cap_header_struct header = {_LINUX_CAPABILITY_VERSION_3, 0}; - struct __user_cap_data_struct caps[_LINUX_CAPABILITY_U32S_3] = {}; - RETURN_ERROR_IF_SYSCALL_FAIL(syscall(__NR_capget, &header, &caps)); - MaybeSave(); - - return (caps[CAP_TO_INDEX(cap)].effective & CAP_TO_MASK(cap)) != 0; -} - -// SetCapability sets the specified EFFECTIVE capability. -inline PosixError SetCapability(int cap, bool set) { - if (!cap_valid(cap)) { - return PosixError(EINVAL, "Invalid capability"); - } - - struct __user_cap_header_struct header = {_LINUX_CAPABILITY_VERSION_3, 0}; - struct __user_cap_data_struct caps[_LINUX_CAPABILITY_U32S_3] = {}; - RETURN_ERROR_IF_SYSCALL_FAIL(syscall(__NR_capget, &header, &caps)); - MaybeSave(); - - if (set) { - caps[CAP_TO_INDEX(cap)].effective |= CAP_TO_MASK(cap); - } else { - caps[CAP_TO_INDEX(cap)].effective &= ~CAP_TO_MASK(cap); - } - header = {_LINUX_CAPABILITY_VERSION_3, 0}; - RETURN_ERROR_IF_SYSCALL_FAIL(syscall(__NR_capset, &header, &caps)); - MaybeSave(); - - return NoError(); -} - -// DropPermittedCapability drops the specified PERMITTED. The EFFECTIVE -// capabilities must be a subset of PERMITTED, so those are dropped as well. -inline PosixError DropPermittedCapability(int cap) { - if (!cap_valid(cap)) { - return PosixError(EINVAL, "Invalid capability"); - } - - struct __user_cap_header_struct header = {_LINUX_CAPABILITY_VERSION_3, 0}; - struct __user_cap_data_struct caps[_LINUX_CAPABILITY_U32S_3] = {}; - RETURN_ERROR_IF_SYSCALL_FAIL(syscall(__NR_capget, &header, &caps)); - MaybeSave(); - - caps[CAP_TO_INDEX(cap)].effective &= ~CAP_TO_MASK(cap); - caps[CAP_TO_INDEX(cap)].permitted &= ~CAP_TO_MASK(cap); - - header = {_LINUX_CAPABILITY_VERSION_3, 0}; - RETURN_ERROR_IF_SYSCALL_FAIL(syscall(__NR_capset, &header, &caps)); - MaybeSave(); - - return NoError(); -} - -PosixErrorOr<bool> CanCreateUserNamespace(); - -class AutoCapability { - public: - AutoCapability(int cap, bool set) : cap_(cap), set_(set) { - const bool has = EXPECT_NO_ERRNO_AND_VALUE(HaveCapability(cap)); - if (set != has) { - EXPECT_NO_ERRNO(SetCapability(cap_, set_)); - applied_ = true; - } - } - - ~AutoCapability() { - if (applied_) { - EXPECT_NO_ERRNO(SetCapability(cap_, !set_)); - } - } +// HaveRawIPSocketCapability returns whether or not the process has access to +// raw IP sockets. +// +// Returns an error when raw IP socket access cannot be determined. +PosixErrorOr<bool> HaveRawIPSocketCapability(); - private: - int cap_; - bool set_; - bool applied_ = false; -}; +// HavePacketSocketCapability returns whether or not the process has access to +// packet sockets. +// +// Returns an error when packet socket access cannot be determined. +PosixErrorOr<bool> HavePacketSocketCapability(); } // namespace testing } // namespace gvisor -#endif // __linux__ - #endif // GVISOR_TEST_UTIL_CAPABILITY_UTIL_H_ diff --git a/test/util/cgroup_util.cc b/test/util/cgroup_util.cc index 977993f41..df3c57b87 100644 --- a/test/util/cgroup_util.cc +++ b/test/util/cgroup_util.cc @@ -25,12 +25,26 @@ namespace gvisor { namespace testing { -Cgroup::Cgroup(std::string_view path) : cgroup_path_(path) { +Cgroup::Cgroup(absl::string_view path) : cgroup_path_(path) { id_ = ++Cgroup::next_id_; std::cerr << absl::StreamFormat("[cg#%d] <= %s", id_, cgroup_path_) << std::endl; } +PosixErrorOr<Cgroup> Cgroup::RecursivelyCreate(absl::string_view path) { + RETURN_IF_ERRNO(RecursivelyCreateDir(path)); + return Cgroup(path); +} + +PosixErrorOr<Cgroup> Cgroup::Create(absl::string_view path) { + RETURN_IF_ERRNO(Mkdir(path)); + return Cgroup(path); +} + +PosixErrorOr<Cgroup> Cgroup::CreateChild(absl::string_view name) const { + return Cgroup::Create(JoinPath(Path(), name)); +} + PosixErrorOr<std::string> Cgroup::ReadControlFile( absl::string_view name) const { std::string buf; @@ -93,7 +107,7 @@ PosixErrorOr<absl::flat_hash_set<pid_t>> Cgroup::ParsePIDList( absl::string_view data) const { absl::flat_hash_set<pid_t> res; std::vector<absl::string_view> lines = absl::StrSplit(data, '\n'); - for (const std::string_view& line : lines) { + for (const absl::string_view& line : lines) { if (line.empty()) { continue; } diff --git a/test/util/cgroup_util.h b/test/util/cgroup_util.h index e3f696a89..ccc7219e3 100644 --- a/test/util/cgroup_util.h +++ b/test/util/cgroup_util.h @@ -34,8 +34,20 @@ class Cgroup { uint64_t id() const { return id_; } + // RecursivelyCreate creates cgroup specified by path, including all + // components leading up to path. Path should end inside a cgroupfs mount. If + // path already exists, RecursivelyCreate does nothing and silently succeeds. + static PosixErrorOr<Cgroup> RecursivelyCreate(std::string_view path); + + // Creates a new cgroup at path. The parent directory must exist and be a + // cgroupfs directory. + static PosixErrorOr<Cgroup> Create(std::string_view path); + const std::string& Path() const { return cgroup_path_; } + // Creates a child cgroup under this cgroup with the given name. + PosixErrorOr<Cgroup> CreateChild(std::string_view name) const; + std::string Relpath(absl::string_view leaf) const { return JoinPath(cgroup_path_, leaf); } diff --git a/test/util/fs_util.cc b/test/util/fs_util.cc index 483ae848d..253411858 100644 --- a/test/util/fs_util.cc +++ b/test/util/fs_util.cc @@ -201,7 +201,8 @@ PosixError UnlinkAt(const FileDescriptor& dfd, absl::string_view path, PosixError Mkdir(absl::string_view path, int mode) { int res = mkdir(std::string(path).c_str(), mode); if (res < 0) { - return PosixError(errno, absl::StrCat("mkdir ", path, " mode ", mode)); + return PosixError(errno, + absl::StrFormat("mkdir \"%s\" mode %#o", path, mode)); } return NoError(); diff --git a/test/util/fuchsia_capability_util.cc b/test/util/fuchsia_capability_util.cc new file mode 100644 index 000000000..bbe8643e7 --- /dev/null +++ b/test/util/fuchsia_capability_util.cc @@ -0,0 +1,73 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef __Fuchsia__ + +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <sys/socket.h> + +#include "test/util/socket_util.h" + +namespace gvisor { +namespace testing { + +// On Linux, access to raw IP and packet socket is controlled by a single +// capability (CAP_NET_RAW). However on Fuchsia, access to raw IP and packet +// sockets are controlled by separate capabilities/protocols. + +namespace { + +PosixErrorOr<bool> HaveSocketCapability(int domain, int type, int protocol) { + // Fuchsia does not have a platform supported way to check the protocols made + // available to a sandbox. As a workaround, simply try to create the specified + // socket and assume no access if we get a no permissions error. + auto s = Socket(domain, type, protocol); + if (s.ok()) { + return true; + } + if (s.error().errno_value() == EPERM) { + return false; + } + return s.error(); +} + +} // namespace + +PosixErrorOr<bool> HaveRawIPSocketCapability() { + static PosixErrorOr<bool> result(false); + static std::once_flag once; + + std::call_once(once, [&]() { + result = HaveSocketCapability(AF_INET, SOCK_RAW, IPPROTO_UDP); + }); + + return result; +} + +PosixErrorOr<bool> HavePacketSocketCapability() { + static PosixErrorOr<bool> result(false); + static std::once_flag once; + + std::call_once(once, [&]() { + result = HaveSocketCapability(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); + }); + + return result; +} + +} // namespace testing +} // namespace gvisor + +#endif // __Fuchsia__ diff --git a/test/util/capability_util.cc b/test/util/linux_capability_util.cc index 3bf218128..7218aa4ac 100644 --- a/test/util/capability_util.cc +++ b/test/util/linux_capability_util.cc @@ -14,7 +14,7 @@ #ifdef __linux__ -#include "test/util/capability_util.h" +#include "test/util/linux_capability_util.h" #include <linux/capability.h> #include <sched.h> @@ -32,6 +32,14 @@ namespace gvisor { namespace testing { +PosixErrorOr<bool> HaveRawIPSocketCapability() { + return HaveCapability(CAP_NET_RAW); +} + +PosixErrorOr<bool> HavePacketSocketCapability() { + return HaveCapability(CAP_NET_RAW); +} + PosixErrorOr<bool> CanCreateUserNamespace() { // The most reliable way to determine if userns creation is possible is by // trying to create one; see below. diff --git a/test/util/linux_capability_util.h b/test/util/linux_capability_util.h new file mode 100644 index 000000000..be94ebd19 --- /dev/null +++ b/test/util/linux_capability_util.h @@ -0,0 +1,128 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Utilities for testing capabilities on Linux. + +#ifndef GVISOR_TEST_UTIL_LINUX_CAPABILITY_UTIL_H_ +#define GVISOR_TEST_UTIL_LINUX_CAPABILITY_UTIL_H_ + +#ifdef __linux__ + +#include <errno.h> +#include <linux/capability.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "test/util/cleanup.h" +#include "test/util/posix_error.h" +#include "test/util/save_util.h" +#include "test/util/test_util.h" + +#ifndef _LINUX_CAPABILITY_VERSION_3 +#error Expecting _LINUX_CAPABILITY_VERSION_3 support +#endif + +namespace gvisor { +namespace testing { + +// HaveCapability returns true if the process has the specified EFFECTIVE +// capability. +inline PosixErrorOr<bool> HaveCapability(int cap) { + if (!cap_valid(cap)) { + return PosixError(EINVAL, "Invalid capability"); + } + + struct __user_cap_header_struct header = {_LINUX_CAPABILITY_VERSION_3, 0}; + struct __user_cap_data_struct caps[_LINUX_CAPABILITY_U32S_3] = {}; + RETURN_ERROR_IF_SYSCALL_FAIL(syscall(__NR_capget, &header, &caps)); + MaybeSave(); + + return (caps[CAP_TO_INDEX(cap)].effective & CAP_TO_MASK(cap)) != 0; +} + +// SetCapability sets the specified EFFECTIVE capability. +inline PosixError SetCapability(int cap, bool set) { + if (!cap_valid(cap)) { + return PosixError(EINVAL, "Invalid capability"); + } + + struct __user_cap_header_struct header = {_LINUX_CAPABILITY_VERSION_3, 0}; + struct __user_cap_data_struct caps[_LINUX_CAPABILITY_U32S_3] = {}; + RETURN_ERROR_IF_SYSCALL_FAIL(syscall(__NR_capget, &header, &caps)); + MaybeSave(); + + if (set) { + caps[CAP_TO_INDEX(cap)].effective |= CAP_TO_MASK(cap); + } else { + caps[CAP_TO_INDEX(cap)].effective &= ~CAP_TO_MASK(cap); + } + header = {_LINUX_CAPABILITY_VERSION_3, 0}; + RETURN_ERROR_IF_SYSCALL_FAIL(syscall(__NR_capset, &header, &caps)); + MaybeSave(); + + return NoError(); +} + +// DropPermittedCapability drops the specified PERMITTED. The EFFECTIVE +// capabilities must be a subset of PERMITTED, so those are dropped as well. +inline PosixError DropPermittedCapability(int cap) { + if (!cap_valid(cap)) { + return PosixError(EINVAL, "Invalid capability"); + } + + struct __user_cap_header_struct header = {_LINUX_CAPABILITY_VERSION_3, 0}; + struct __user_cap_data_struct caps[_LINUX_CAPABILITY_U32S_3] = {}; + RETURN_ERROR_IF_SYSCALL_FAIL(syscall(__NR_capget, &header, &caps)); + MaybeSave(); + + caps[CAP_TO_INDEX(cap)].effective &= ~CAP_TO_MASK(cap); + caps[CAP_TO_INDEX(cap)].permitted &= ~CAP_TO_MASK(cap); + + header = {_LINUX_CAPABILITY_VERSION_3, 0}; + RETURN_ERROR_IF_SYSCALL_FAIL(syscall(__NR_capset, &header, &caps)); + MaybeSave(); + + return NoError(); +} + +PosixErrorOr<bool> CanCreateUserNamespace(); + +class AutoCapability { + public: + AutoCapability(int cap, bool set) : cap_(cap), set_(set) { + const bool has = EXPECT_NO_ERRNO_AND_VALUE(HaveCapability(cap)); + if (set != has) { + EXPECT_NO_ERRNO(SetCapability(cap_, set_)); + applied_ = true; + } + } + + ~AutoCapability() { + if (applied_) { + EXPECT_NO_ERRNO(SetCapability(cap_, !set_)); + } + } + + private: + int cap_; + bool set_; + bool applied_ = false; +}; + +} // namespace testing +} // namespace gvisor + +#endif // __linux__ + +#endif // GVISOR_TEST_UTIL_LINUX_CAPABILITY_UTIL_H_ diff --git a/tools/bazel.mk b/tools/bazel.mk index 60b50cfb0..5893c7c7e 100644 --- a/tools/bazel.mk +++ b/tools/bazel.mk @@ -84,7 +84,7 @@ DOCKER_RUN_OPTIONS += -v "$(shell readlink -m $(GCLOUD_CONFIG)):$(GCLOUD_CONFIG) DOCKER_RUN_OPTIONS += -v "/tmp:/tmp" DOCKER_EXEC_OPTIONS := --user $(UID):$(GID) DOCKER_EXEC_OPTIONS += --interactive -ifeq (true,$(shell test -t 0 && echo true)) +ifeq (true,$(shell test -t 1 && echo true)) DOCKER_EXEC_OPTIONS += --tty endif @@ -199,9 +199,17 @@ build_paths = \ | xargs -r -n 1 -I {} readlink -f "{}" \ | xargs -r -n 1 -I {} bash -c 'set -xeuo pipefail; $(2)') +debian_paths = \ + (set -euo pipefail; \ + $(call wrapper,$(BAZEL) build $(BASE_OPTIONS) $(BAZEL_OPTIONS) $(1)) && \ + $(call wrapper,$(BAZEL) cquery $(BASE_OPTIONS) $(BAZEL_OPTIONS) $(1) --output=starlark --starlark:file=debian/show_paths.bzl) \ + | xargs -r -n 1 -I {} readlink -f "{}" \ + | xargs -r -n 1 -I {} bash -c 'set -xeuo pipefail; $(2)') + clean = $(call header,CLEAN) && $(call wrapper,$(BAZEL) clean) build = $(call header,BUILD $(1)) && $(call build_paths,$(1),echo {}) copy = $(call header,COPY $(1) $(2)) && $(call build_paths,$(1),cp -fa {} $(2)) +deb_copy = $(call header,COPY $(1) $(2)) && $(call debian_paths,$(1),cp -fa {} $(2)) run = $(call header,RUN $(1) $(2)) && $(call build_paths,$(1),{} $(2)) sudo = $(call header,SUDO $(1) $(2)) && $(call build_paths,$(1),sudo -E {} $(2)) test = $(call header,TEST $(1)) && $(call wrapper,$(BAZEL) test $(TEST_OPTIONS) $(1)) diff --git a/tools/go_generics/rules_tests/template_test.go b/tools/go_generics/rules_tests/template_test.go index b2a3446ef..6f4d140da 100644 --- a/tools/go_generics/rules_tests/template_test.go +++ b/tools/go_generics/rules_tests/template_test.go @@ -20,14 +20,16 @@ import ( ) func TestMax(t *testing.T) { - var a int = max(10, 20) + var a int + a = max(10, 20) if a != 20 { t.Errorf("Bad result of max, got %v, want %v", a, 20) } } func TestIntConst(t *testing.T) { - var a int = add(10) + var a int + a = add(10) if a != 30 { t.Errorf("Bad result of add, got %v, want %v", a, 30) } diff --git a/tools/go_stateify/main.go b/tools/go_stateify/main.go index 7216388a0..3cf00b5dd 100644 --- a/tools/go_stateify/main.go +++ b/tools/go_stateify/main.go @@ -362,7 +362,12 @@ func main() { fmt.Fprintf(outputFile, " stateSourceObject.LoadWait(%d, &%s.%s)\n", fields[name], recv, name) } emitSaveValue := func(name, typName string) { - fmt.Fprintf(outputFile, " var %sValue %s = %s.save%s()\n", name, typName, recv, camelCased(name)) + // Emit typName to be more robust against code generation bugs, + // but instead of one line make two lines to silence ST1023 + // finding (i.e. avoid nogo finding: "should omit type $typName + // from declaration; it will be inferred from the right-hand side") + fmt.Fprintf(outputFile, " var %sValue %s\n", name, typName) + fmt.Fprintf(outputFile, " %sValue = %s.save%s()\n", name, recv, camelCased(name)) fmt.Fprintf(outputFile, " stateSinkObject.SaveValue(%d, %sValue)\n", fields[name], name) } emitSave := func(name string) { diff --git a/tools/nogo/analyzers.go b/tools/nogo/analyzers.go index 6705fc905..db8bbdb8a 100644 --- a/tools/nogo/analyzers.go +++ b/tools/nogo/analyzers.go @@ -117,11 +117,11 @@ func register(all []*analysis.Analyzer) { func init() { // Add all staticcheck analyzers. for _, a := range staticcheck.Analyzers { - AllAnalyzers = append(AllAnalyzers, a) + AllAnalyzers = append(AllAnalyzers, a.Analyzer) } // Add all stylecheck analyzers. for _, a := range stylecheck.Analyzers { - AllAnalyzers = append(AllAnalyzers, a) + AllAnalyzers = append(AllAnalyzers, a.Analyzer) } // Register lists. diff --git a/tools/verity/measure_tool.go b/tools/verity/measure_tool.go index 0d314ae70..4a0bc497a 100644 --- a/tools/verity/measure_tool.go +++ b/tools/verity/measure_tool.go @@ -21,12 +21,14 @@ import ( "io/ioutil" "log" "os" + "strings" "syscall" "gvisor.dev/gvisor/pkg/abi/linux" ) var path = flag.String("path", "", "path to the verity file system.") +var rawpath = flag.String("rawpath", "", "path to the raw file system.") const maxDigestSize = 64 @@ -40,6 +42,14 @@ func main() { if *path == "" { log.Fatalf("no path provided") } + if *rawpath == "" { + log.Fatalf("no rawpath provided") + } + // TODO(b/182315468): Optimize the Merkle tree generate process to + // allow only updating certain files/directories. + if err := clearMerkle(*rawpath); err != nil { + log.Fatalf("Failed to clear merkle files in %s: %v", *rawpath, err) + } if err := enableDir(*path); err != nil { log.Fatalf("Failed to enable file system %s: %v", *path, err) } @@ -49,6 +59,26 @@ func main() { } } +func clearMerkle(path string) error { + files, err := ioutil.ReadDir(path) + if err != nil { + return err + } + + for _, file := range files { + if file.IsDir() { + if err := clearMerkle(path + "/" + file.Name()); err != nil { + return err + } + } else if strings.HasPrefix(file.Name(), ".merkle.verity") { + if err := os.Remove(path + "/" + file.Name()); err != nil { + return err + } + } + } + return nil +} + // enableDir enables verity features on all the files and sub-directories within // path. func enableDir(path string) error { |