diff options
Diffstat (limited to 'pkg')
595 files changed, 6781 insertions, 4297 deletions
diff --git a/pkg/abi/abi_linux.go b/pkg/abi/abi_linux.go index 3059479bd..008bbca08 100644 --- a/pkg/abi/abi_linux.go +++ b/pkg/abi/abi_linux.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package abi diff --git a/pkg/abi/linux/arch_amd64.go b/pkg/abi/linux/arch_amd64.go index 0be31e755..064c0a6da 100644 --- a/pkg/abi/linux/arch_amd64.go +++ b/pkg/abi/linux/arch_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/abi/linux/epoll_amd64.go b/pkg/abi/linux/epoll_amd64.go index 7e74b1143..7d5b9fdfb 100644 --- a/pkg/abi/linux/epoll_amd64.go +++ b/pkg/abi/linux/epoll_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/abi/linux/epoll_arm64.go b/pkg/abi/linux/epoll_arm64.go index a35939cc9..5e5960d32 100644 --- a/pkg/abi/linux/epoll_arm64.go +++ b/pkg/abi/linux/epoll_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package linux diff --git a/pkg/abi/linux/file_amd64.go b/pkg/abi/linux/file_amd64.go index 6b72364ea..ab404b17e 100644 --- a/pkg/abi/linux/file_amd64.go +++ b/pkg/abi/linux/file_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/abi/linux/file_arm64.go b/pkg/abi/linux/file_arm64.go index 6492c9038..6234955ab 100644 --- a/pkg/abi/linux/file_arm64.go +++ b/pkg/abi/linux/file_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package linux diff --git a/pkg/abi/linux/ptrace_amd64.go b/pkg/abi/linux/ptrace_amd64.go index e722971f1..e970b5b4a 100644 --- a/pkg/abi/linux/ptrace_amd64.go +++ b/pkg/abi/linux/ptrace_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/abi/linux/ptrace_arm64.go b/pkg/abi/linux/ptrace_arm64.go index 3d0906565..91e5af56b 100644 --- a/pkg/abi/linux/ptrace_arm64.go +++ b/pkg/abi/linux/ptrace_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package linux diff --git a/pkg/abi/linux/sem_amd64.go b/pkg/abi/linux/sem_amd64.go index ab980cb4f..cabd2d4b8 100644 --- a/pkg/abi/linux/sem_amd64.go +++ b/pkg/abi/linux/sem_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/abi/linux/sem_arm64.go b/pkg/abi/linux/sem_arm64.go index 521468fb1..a0c467dc4 100644 --- a/pkg/abi/linux/sem_arm64.go +++ b/pkg/abi/linux/sem_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package linux diff --git a/pkg/abi/linux/wait.go b/pkg/abi/linux/wait.go index 4bdc280d1..710729138 100644 --- a/pkg/abi/linux/wait.go +++ b/pkg/abi/linux/wait.go @@ -14,6 +14,10 @@ package linux +import ( + "fmt" +) + // Options for waitpid(2), wait4(2), and/or waitid(2), from // include/uapi/linux/wait.h. const ( @@ -34,3 +38,124 @@ const ( P_PID = 0x1 P_PGID = 0x2 ) + +// WaitStatus represents a thread status, as returned by the wait* family of +// syscalls. +type WaitStatus uint32 + +// WaitStatusExit returns a WaitStatus representing the given exit status. +func WaitStatusExit(status int32) WaitStatus { + return WaitStatus(uint32(status) << 8) +} + +// WaitStatusTerminationSignal returns a WaitStatus representing termination by +// the given signal. +func WaitStatusTerminationSignal(sig Signal) WaitStatus { + return WaitStatus(uint32(sig)) +} + +// WaitStatusStopped returns a WaitStatus representing stoppage by the given +// signal or ptrace trap code. +func WaitStatusStopped(code uint32) WaitStatus { + return WaitStatus(code<<8 | 0x7f) +} + +// WaitStatusContinued returns a WaitStatus representing continuation by +// SIGCONT. +func WaitStatusContinued() WaitStatus { + return WaitStatus(0xffff) +} + +// WithCoreDump returns a copy of ws that indicates that a core dump was +// generated. +// +// Preconditions: ws.Signaled(). +func (ws WaitStatus) WithCoreDump() WaitStatus { + return ws | 0x80 +} + +// Exited returns true if ws represents an exit status, consistent with +// WIFEXITED. +func (ws WaitStatus) Exited() bool { + return ws&0x7f == 0 +} + +// Signaled returns true if ws represents a termination by signal, consistent +// with WIFSIGNALED. +func (ws WaitStatus) Signaled() bool { + // ws&0x7f != 0 (exited) and ws&0x7f != 0x7f (stopped or continued) + return ((ws&0x7f)+1)>>1 != 0 +} + +// CoreDumped returns true if ws indicates that a core dump was produced, +// consistent with WCOREDUMP. +// +// Preconditions: ws.Signaled(). +func (ws WaitStatus) CoreDumped() bool { + return ws&0x80 != 0 +} + +// Stopped returns true if ws represents a stoppage, consistent with +// WIFSTOPPED. +func (ws WaitStatus) Stopped() bool { + return ws&0xff == 0x7f +} + +// Continued returns true if ws represents a continuation by SIGCONT, +// consistent with WIFCONTINUED. +func (ws WaitStatus) Continued() bool { + return ws == 0xffff +} + +// ExitStatus returns the lower 8 bits of the exit status represented by ws, +// consistent with WEXITSTATUS. +// +// Preconditions: ws.Exited(). +func (ws WaitStatus) ExitStatus() uint32 { + return uint32((ws & 0xff00) >> 8) +} + +// TerminationSignal returns the termination signal represented by ws, +// consistent with WTERMSIG. +// +// Preconditions: ws.Signaled(). +func (ws WaitStatus) TerminationSignal() Signal { + return Signal(ws & 0x7f) +} + +// StopSignal returns the stop signal represented by ws, consistent with +// WSTOPSIG. +// +// Preconditions: ws.Stopped(). +func (ws WaitStatus) StopSignal() Signal { + return Signal((ws & 0xff00) >> 8) +} + +// PtraceEvent returns the PTRACE_EVENT_* field in ws. +// +// Preconditions: ws.Stopped(). +func (ws WaitStatus) PtraceEvent() uint32 { + return uint32(ws >> 16) +} + +// String implements fmt.Stringer.String. +func (ws WaitStatus) String() string { + switch { + case ws.Exited(): + return fmt.Sprintf("exit status %d", ws.ExitStatus()) + case ws.Signaled(): + if ws.CoreDumped() { + return fmt.Sprintf("killed by signal %d (core dumped)", ws.TerminationSignal()) + } + return fmt.Sprintf("killed by signal %d", ws.TerminationSignal()) + case ws.Stopped(): + if ev := ws.PtraceEvent(); ev != 0 { + return fmt.Sprintf("stopped by signal %d (PTRACE_EVENT %d)", ws.StopSignal(), ev) + } + return fmt.Sprintf("stopped by signal %d", ws.StopSignal()) + case ws.Continued(): + return "continued" + default: + return fmt.Sprintf("unknown status %#x", uint32(ws)) + } +} diff --git a/pkg/atomicbitops/aligned_32bit_unsafe.go b/pkg/atomicbitops/aligned_32bit_unsafe.go index df706b453..383f81ff2 100644 --- a/pkg/atomicbitops/aligned_32bit_unsafe.go +++ b/pkg/atomicbitops/aligned_32bit_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm || mips || 386 // +build arm mips 386 package atomicbitops diff --git a/pkg/atomicbitops/aligned_64bit.go b/pkg/atomicbitops/aligned_64bit.go index 1544c7814..2c421d920 100644 --- a/pkg/atomicbitops/aligned_64bit.go +++ b/pkg/atomicbitops/aligned_64bit.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !arm && !mips && !386 // +build !arm,!mips,!386 package atomicbitops diff --git a/pkg/atomicbitops/atomicbitops.go b/pkg/atomicbitops/atomicbitops.go index 1be081719..4c4606a58 100644 --- a/pkg/atomicbitops/atomicbitops.go +++ b/pkg/atomicbitops/atomicbitops.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 || arm64 // +build amd64 arm64 // Package atomicbitops provides extensions to the sync/atomic package. diff --git a/pkg/atomicbitops/atomicbitops_noasm.go b/pkg/atomicbitops/atomicbitops_noasm.go index 3b2898256..474c0c815 100644 --- a/pkg/atomicbitops/atomicbitops_noasm.go +++ b/pkg/atomicbitops/atomicbitops_noasm.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !amd64 && !arm64 // +build !amd64,!arm64 package atomicbitops diff --git a/pkg/bits/uint64_arch.go b/pkg/bits/uint64_arch.go index 9f23eff77..fc5634167 100644 --- a/pkg/bits/uint64_arch.go +++ b/pkg/bits/uint64_arch.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 || arm64 // +build amd64 arm64 package bits diff --git a/pkg/bits/uint64_arch_amd64_asm.s b/pkg/bits/uint64_arch_amd64_asm.s index 8ff364181..2931b5d56 100644 --- a/pkg/bits/uint64_arch_amd64_asm.s +++ b/pkg/bits/uint64_arch_amd64_asm.s @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 TEXT ·TrailingZeros64(SB),$0-16 diff --git a/pkg/bits/uint64_arch_arm64_asm.s b/pkg/bits/uint64_arch_arm64_asm.s index 814ba562d..eb8d4d280 100644 --- a/pkg/bits/uint64_arch_arm64_asm.s +++ b/pkg/bits/uint64_arch_arm64_asm.s @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 TEXT ·TrailingZeros64(SB),$0-16 diff --git a/pkg/bits/uint64_arch_generic.go b/pkg/bits/uint64_arch_generic.go index 9dd2098d1..83b23a3fc 100644 --- a/pkg/bits/uint64_arch_generic.go +++ b/pkg/bits/uint64_arch_generic.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !amd64 && !arm64 // +build !amd64,!arm64 package bits diff --git a/pkg/control/server/server.go b/pkg/control/server/server.go index 629dae8f4..889568177 100644 --- a/pkg/control/server/server.go +++ b/pkg/control/server/server.go @@ -22,6 +22,7 @@ package server import ( "os" + "time" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sync" @@ -65,13 +66,13 @@ func (s *Server) Wait() { // Stop stops the server. Note that this function should only be called once // and the server should not be used afterwards. -func (s *Server) Stop() { +func (s *Server) Stop(timeout time.Duration) { s.socket.Close() s.Wait() // This will cause existing clients to be terminated safely. If the // registered handlers have a Stop callback, it will be called. - s.server.Stop() + s.server.Stop(timeout) } // StartServing starts listening for connect and spawns the main service diff --git a/pkg/coverage/coverage.go b/pkg/coverage/coverage.go index b33a20802..0fabee92b 100644 --- a/pkg/coverage/coverage.go +++ b/pkg/coverage/coverage.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + // Package coverage provides an interface through which Go coverage data can // be collected, converted to kcov format, and exposed to userspace. // diff --git a/pkg/cpuid/cpuid_arm64.go b/pkg/cpuid/cpuid_arm64.go index 98c6ec62f..6e61d562f 100644 --- a/pkg/cpuid/cpuid_arm64.go +++ b/pkg/cpuid/cpuid_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package cpuid diff --git a/pkg/cpuid/cpuid_arm64_test.go b/pkg/cpuid/cpuid_arm64_test.go index a34f67779..16b1c064a 100644 --- a/pkg/cpuid/cpuid_arm64_test.go +++ b/pkg/cpuid/cpuid_arm64_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package cpuid diff --git a/pkg/cpuid/cpuid_parse_x86_test.go b/pkg/cpuid/cpuid_parse_x86_test.go index d60fdb550..36dd20552 100644 --- a/pkg/cpuid/cpuid_parse_x86_test.go +++ b/pkg/cpuid/cpuid_parse_x86_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build 386 || amd64 // +build 386 amd64 package cpuid diff --git a/pkg/cpuid/cpuid_x86.go b/pkg/cpuid/cpuid_x86.go index 392711e8f..dc17cade8 100644 --- a/pkg/cpuid/cpuid_x86.go +++ b/pkg/cpuid/cpuid_x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build 386 || amd64 // +build 386 amd64 package cpuid diff --git a/pkg/cpuid/cpuid_x86_test.go b/pkg/cpuid/cpuid_x86_test.go index bacf345c8..92a2d9f81 100644 --- a/pkg/cpuid/cpuid_x86_test.go +++ b/pkg/cpuid/cpuid_x86_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build 386 || amd64 // +build 386 amd64 package cpuid diff --git a/pkg/crypto/crypto_stdlib.go b/pkg/crypto/crypto_stdlib.go index 514592b08..69e867386 100644 --- a/pkg/crypto/crypto_stdlib.go +++ b/pkg/crypto/crypto_stdlib.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package crypto import ( diff --git a/pkg/errors/linuxerr/BUILD b/pkg/errors/linuxerr/BUILD index 8afc9688c..201727780 100644 --- a/pkg/errors/linuxerr/BUILD +++ b/pkg/errors/linuxerr/BUILD @@ -9,6 +9,7 @@ go_library( deps = [ "//pkg/abi/linux/errno", "//pkg/errors", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/errors/linuxerr/linuxerr.go b/pkg/errors/linuxerr/linuxerr.go index bbdcdecd0..f9f8412e0 100644 --- a/pkg/errors/linuxerr/linuxerr.go +++ b/pkg/errors/linuxerr/linuxerr.go @@ -20,6 +20,7 @@ package linuxerr import ( "fmt" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux/errno" "gvisor.dev/gvisor/pkg/errors" ) @@ -165,6 +166,8 @@ var ( EWOULDBLOCK = EAGAIN EDEADLOCK = EDEADLK ENONET = ENOENT + ENOATTR = ENODATA + ENOTSUP = EOPNOTSUPP ) // A nil *errors.Error denotes no error and is placed at the 0 index of @@ -325,3 +328,22 @@ func ErrorFromErrno(e errno.Errno) *errors.Error { } panic(fmt.Sprintf("invalid error requested with errno: %d", e)) } + +// Equals compars a linuxerr to a given error +// TODO(b/34162363): Remove when syserror is removed. +func Equals(e *errors.Error, err error) bool { + if err == nil { + return e == NOERROR || e == nil + } + if e == nil { + return err == NOERROR || err == unix.Errno(0) + } + + switch err.(type) { + case *errors.Error: + return e == err + case unix.Errno, error: + return unix.Errno(e.Errno()) == err + } + return false +} diff --git a/pkg/errors/linuxerr/linuxerr_test.go b/pkg/errors/linuxerr/linuxerr_test.go index a81dd9560..f09d61b02 100644 --- a/pkg/errors/linuxerr/linuxerr_test.go +++ b/pkg/errors/linuxerr/linuxerr_test.go @@ -16,6 +16,8 @@ package syserror_test import ( "errors" + "io" + "io/fs" "syscall" "testing" @@ -42,7 +44,7 @@ func BenchmarkAssignLinuxerr(b *testing.B) { func BenchmarkAssignSyserror(b *testing.B) { for i := b.N; i > 0; i-- { - globalError = syserror.EINVAL + globalError = linuxerr.ENOMSG } } @@ -67,10 +69,10 @@ func BenchmarkCompareLinuxerr(b *testing.B) { } func BenchmarkCompareSyserror(b *testing.B) { - globalError = syserror.EAGAIN + globalError = linuxerr.EAGAIN j := 0 for i := b.N; i > 0; i-- { - if globalError == syserror.EINVAL { + if globalError == linuxerr.EACCES { j++ } } @@ -107,15 +109,15 @@ func BenchmarkSwitchLinuxerr(b *testing.B) { } func BenchmarkSwitchSyserror(b *testing.B) { - globalError = syserror.EPERM + globalError = linuxerr.EPERM j := 0 for i := b.N; i > 0; i-- { switch globalError { - case syserror.EINVAL: + case linuxerr.EACCES: j++ case syserror.EINTR: j += 2 - case syserror.EAGAIN: + case linuxerr.EAGAIN: j += 3 } } @@ -243,3 +245,62 @@ func TestSyscallErrnoToErrors(t *testing.T) { }) } } + +// TestEqualsMethod tests that the Equals method correctly compares syerror, +// unix.Errno and linuxerr. +// TODO (b/34162363): Remove this. +func TestEqualsMethod(t *testing.T) { + for _, tc := range []struct { + name string + linuxErr []*gErrors.Error + err []error + equal bool + }{ + { + name: "compare nil", + linuxErr: []*gErrors.Error{nil, linuxerr.NOERROR}, + err: []error{nil, linuxerr.NOERROR, unix.Errno(0)}, + equal: true, + }, + { + name: "linuxerr nil error not", + linuxErr: []*gErrors.Error{nil, linuxerr.NOERROR}, + err: []error{unix.Errno(1), linuxerr.EPERM, linuxerr.EACCES}, + equal: false, + }, + { + name: "linuxerr not nil error nil", + linuxErr: []*gErrors.Error{linuxerr.ENOENT}, + err: []error{nil, unix.Errno(0), linuxerr.NOERROR}, + equal: false, + }, + { + name: "equal errors", + linuxErr: []*gErrors.Error{linuxerr.ESRCH}, + err: []error{linuxerr.ESRCH, linuxerr.ESRCH, unix.Errno(linuxerr.ESRCH.Errno())}, + equal: true, + }, + { + name: "unequal errors", + linuxErr: []*gErrors.Error{linuxerr.ENOENT}, + err: []error{linuxerr.ESRCH, linuxerr.ESRCH, unix.Errno(linuxerr.ESRCH.Errno())}, + equal: false, + }, + { + name: "other error", + linuxErr: []*gErrors.Error{nil, linuxerr.NOERROR, linuxerr.E2BIG, linuxerr.EINVAL}, + err: []error{fs.ErrInvalid, io.EOF}, + equal: false, + }, + } { + t.Run(tc.name, func(t *testing.T) { + for _, le := range tc.linuxErr { + for _, e := range tc.err { + if linuxerr.Equals(le, e) != tc.equal { + t.Fatalf("Expected %t from Equals method for linuxerr: %s %T and error: %s %T", tc.equal, le, le, e, e) + } + } + } + }) + } +} diff --git a/pkg/eventchannel/BUILD b/pkg/eventchannel/BUILD index a264ae2f0..ad15d3672 100644 --- a/pkg/eventchannel/BUILD +++ b/pkg/eventchannel/BUILD @@ -12,13 +12,13 @@ go_library( visibility = ["//:sandbox"], deps = [ ":eventchannel_go_proto", + "//pkg/errors/linuxerr", "//pkg/log", "//pkg/sync", "//pkg/unet", "@org_golang_google_protobuf//encoding/prototext:go_default_library", "@org_golang_google_protobuf//proto:go_default_library", "@org_golang_google_protobuf//types/known/anypb:go_default_library", - "@org_golang_x_sys//unix:go_default_library", "@org_golang_x_time//rate:go_default_library", ], ) diff --git a/pkg/eventchannel/event.go b/pkg/eventchannel/event.go index 98dfeb1f5..2be2d9d37 100644 --- a/pkg/eventchannel/event.go +++ b/pkg/eventchannel/event.go @@ -23,9 +23,9 @@ import ( "encoding/binary" "fmt" - "golang.org/x/sys/unix" "google.golang.org/protobuf/encoding/prototext" "google.golang.org/protobuf/proto" + "gvisor.dev/gvisor/pkg/errors/linuxerr" pb "gvisor.dev/gvisor/pkg/eventchannel/eventchannel_go_proto" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sync" @@ -155,7 +155,7 @@ func (s *socketEmitter) Emit(msg proto.Message) (bool, error) { for done := 0; done < len(p); { n, err := s.socket.Write(p[done:]) if err != nil { - return (err == unix.EPIPE), err + return linuxerr.Equals(linuxerr.EPIPE, err), err } done += n } diff --git a/pkg/eventchannel/event_any.go b/pkg/eventchannel/event_any.go index a5549f6cd..13f300061 100644 --- a/pkg/eventchannel/event_any.go +++ b/pkg/eventchannel/event_any.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package eventchannel import ( diff --git a/pkg/fdchannel/fdchannel_unsafe.go b/pkg/fdchannel/fdchannel_unsafe.go index 1f24a448d..f9a201eeb 100644 --- a/pkg/fdchannel/fdchannel_unsafe.go +++ b/pkg/fdchannel/fdchannel_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris // +build aix darwin dragonfly freebsd linux netbsd openbsd solaris // Package fdchannel implements passing file descriptors between processes over diff --git a/pkg/fdnotifier/fdnotifier.go b/pkg/fdnotifier/fdnotifier.go index 1290d5d10..152557143 100644 --- a/pkg/fdnotifier/fdnotifier.go +++ b/pkg/fdnotifier/fdnotifier.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package fdnotifier contains an adapter that translates IO events (e.g., a diff --git a/pkg/fdnotifier/poll_unsafe.go b/pkg/fdnotifier/poll_unsafe.go index 493ea8375..db917303f 100644 --- a/pkg/fdnotifier/poll_unsafe.go +++ b/pkg/fdnotifier/poll_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package fdnotifier diff --git a/pkg/flipcall/BUILD b/pkg/flipcall/BUILD index 9730b88c1..c810c7946 100644 --- a/pkg/flipcall/BUILD +++ b/pkg/flipcall/BUILD @@ -10,9 +10,7 @@ go_library( "flipcall_unsafe.go", "futex_linux.go", "io.go", - "packet_window_allocator.go", - "packet_window_mmap_amd64.go", - "packet_window_mmap_arm64.go", + "packet_window.go", ], visibility = ["//visibility:public"], deps = [ diff --git a/pkg/flipcall/ctrl_futex.go b/pkg/flipcall/ctrl_futex.go index 2e8452a02..5d2ee4018 100644 --- a/pkg/flipcall/ctrl_futex.go +++ b/pkg/flipcall/ctrl_futex.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package flipcall import ( diff --git a/pkg/flipcall/flipcall.go b/pkg/flipcall/flipcall.go index 8d8309a73..f0e4ff487 100644 --- a/pkg/flipcall/flipcall.go +++ b/pkg/flipcall/flipcall.go @@ -22,6 +22,7 @@ import ( "sync/atomic" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/memutil" ) // An Endpoint provides the ability to synchronously transfer data and control @@ -96,9 +97,9 @@ func (ep *Endpoint) Init(side EndpointSide, pwd PacketWindowDescriptor, opts ... if pwd.Length > math.MaxUint32 { return fmt.Errorf("packet window size (%d) exceeds maximum (%d)", pwd.Length, math.MaxUint32) } - m, e := packetWindowMmap(pwd) - if e != 0 { - return fmt.Errorf("failed to mmap packet window: %v", e) + m, err := memutil.MapFile(0, uintptr(pwd.Length), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED, uintptr(pwd.FD), uintptr(pwd.Offset)) + if err != nil { + return fmt.Errorf("failed to mmap packet window: %v", err) } ep.packet = m ep.dataCap = uint32(pwd.Length) - uint32(PacketHeaderBytes) diff --git a/pkg/flipcall/futex_linux.go b/pkg/flipcall/futex_linux.go index c212f05f1..4bb85939b 100644 --- a/pkg/flipcall/futex_linux.go +++ b/pkg/flipcall/futex_linux.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package flipcall diff --git a/pkg/flipcall/packet_window_allocator.go b/pkg/flipcall/packet_window.go index 9122c97b7..9122c97b7 100644 --- a/pkg/flipcall/packet_window_allocator.go +++ b/pkg/flipcall/packet_window.go diff --git a/pkg/gohacks/gohacks_unsafe.go b/pkg/gohacks/gohacks_unsafe.go index 374aac2b4..09fc14787 100644 --- a/pkg/gohacks/gohacks_unsafe.go +++ b/pkg/gohacks/gohacks_unsafe.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build go1.13 -// +build !go1.18 +//go:build go1.13 && !go1.18 +// +build go1.13,!go1.18 // Check type signatures when updating Go version. diff --git a/pkg/goid/goid.go b/pkg/goid/goid.go index 193b2c2d4..85fb2f6d4 100644 --- a/pkg/goid/goid.go +++ b/pkg/goid/goid.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build go1.12 -// +build !go1.18 +//go:build go1.12 && !go1.18 +// +build go1.12,!go1.18 // Check type signatures when updating Go version. diff --git a/pkg/hostarch/hostarch_arm64.go b/pkg/hostarch/hostarch_arm64.go index a31a8aeeb..a65c810a5 100644 --- a/pkg/hostarch/hostarch_arm64.go +++ b/pkg/hostarch/hostarch_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package hostarch diff --git a/pkg/hostarch/hostarch_x86.go b/pkg/hostarch/hostarch_x86.go index af6ef2b7f..00bf668f3 100644 --- a/pkg/hostarch/hostarch_x86.go +++ b/pkg/hostarch/hostarch_x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 || 386 // +build amd64 386 package hostarch diff --git a/pkg/iovec/BUILD b/pkg/iovec/BUILD deleted file mode 100644 index f4e9a6af9..000000000 --- a/pkg/iovec/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "iovec", - srcs = ["iovec.go"], - visibility = ["//:sandbox"], - deps = ["@org_golang_x_sys//unix:go_default_library"], -) - -go_test( - name = "iovec_test", - size = "small", - srcs = ["iovec_test.go"], - library = ":iovec", - deps = ["@org_golang_x_sys//unix:go_default_library"], -) diff --git a/pkg/iovec/iovec.go b/pkg/iovec/iovec.go deleted file mode 100644 index a281c05b6..000000000 --- a/pkg/iovec/iovec.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build linux - -// Package iovec provides helpers to interact with vectorized I/O on host -// system. -package iovec - -import ( - "golang.org/x/sys/unix" -) - -// MaxIovs is the maximum number of iovecs host platform can accept. -var MaxIovs = 1024 - -// Builder is a builder for slice of unix.Iovec. -type Builder struct { - iovec []unix.Iovec - storage [8]unix.Iovec - - // overflow tracks the last buffer when iovec length is at MaxIovs. - overflow []byte -} - -// Add adds buf to b preparing to be written. Zero-length buf won't be added. -func (b *Builder) Add(buf []byte) { - if len(buf) == 0 { - return - } - if b.iovec == nil { - b.iovec = b.storage[:0] - } - if len(b.iovec) >= MaxIovs { - b.addByAppend(buf) - return - } - - b.iovec = append(b.iovec, unix.Iovec{Base: &buf[0]}) - b.iovec[len(b.iovec)-1].SetLen(len(buf)) - - // Keep the last buf if iovec is at max capacity. We will need to append to it - // for later bufs. - if len(b.iovec) == MaxIovs { - n := len(buf) - b.overflow = buf[:n:n] - } -} - -func (b *Builder) addByAppend(buf []byte) { - b.overflow = append(b.overflow, buf...) - b.iovec[len(b.iovec)-1] = unix.Iovec{Base: &b.overflow[0]} - b.iovec[len(b.iovec)-1].SetLen(len(b.overflow)) -} - -// Build returns the final Iovec slice. The length of returned iovec will not -// excceed MaxIovs. -func (b *Builder) Build() []unix.Iovec { - return b.iovec -} diff --git a/pkg/iovec/iovec_test.go b/pkg/iovec/iovec_test.go deleted file mode 100644 index f6deb4208..000000000 --- a/pkg/iovec/iovec_test.go +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build linux - -package iovec - -import ( - "bytes" - "fmt" - "testing" - "unsafe" - - "golang.org/x/sys/unix" -) - -func TestBuilderEmpty(t *testing.T) { - var builder Builder - iovecs := builder.Build() - if got, want := len(iovecs), 0; got != want { - t.Errorf("len(iovecs) = %d, want %d", got, want) - } -} - -func TestBuilderBuild(t *testing.T) { - a := []byte{1, 2} - b := []byte{3, 4, 5} - - var builder Builder - builder.Add(a) - builder.Add(b) - builder.Add(nil) // Nil slice won't be added. - builder.Add([]byte{}) // Empty slice won't be added. - iovecs := builder.Build() - - if got, want := len(iovecs), 2; got != want { - t.Fatalf("len(iovecs) = %d, want %d", got, want) - } - for i, data := range [][]byte{a, b} { - if got, want := *iovecs[i].Base, data[0]; got != want { - t.Fatalf("*iovecs[%d].Base = %d, want %d", i, got, want) - } - if got, want := iovecs[i].Len, uint64(len(data)); got != want { - t.Fatalf("iovecs[%d].Len = %d, want %d", i, got, want) - } - } -} - -func TestBuilderBuildMaxIov(t *testing.T) { - for _, test := range []struct { - numIov int - }{ - { - numIov: MaxIovs - 1, - }, - { - numIov: MaxIovs, - }, - { - numIov: MaxIovs + 1, - }, - { - numIov: MaxIovs + 10, - }, - } { - name := fmt.Sprintf("numIov=%v", test.numIov) - t.Run(name, func(t *testing.T) { - var data []byte - var builder Builder - for i := 0; i < test.numIov; i++ { - buf := []byte{byte(i)} - builder.Add(buf) - data = append(data, buf...) - } - iovec := builder.Build() - - // Check the expected length of iovec. - wantNum := test.numIov - if wantNum > MaxIovs { - wantNum = MaxIovs - } - if got, want := len(iovec), wantNum; got != want { - t.Errorf("len(iovec) = %d, want %d", got, want) - } - - // Test a real read-write. - var fds [2]int - if err := unix.Pipe(fds[:]); err != nil { - t.Fatalf("Pipe: %v", err) - } - defer unix.Close(fds[0]) - defer unix.Close(fds[1]) - - wrote, _, e := unix.RawSyscall(unix.SYS_WRITEV, uintptr(fds[1]), uintptr(unsafe.Pointer(&iovec[0])), uintptr(len(iovec))) - if int(wrote) != len(data) || e != 0 { - t.Fatalf("writev: %v, %v; want %v, 0", wrote, e, len(data)) - } - - got := make([]byte, len(data)) - if n, err := unix.Read(fds[0], got); n != len(got) || err != nil { - t.Fatalf("read: %v, %v; want %v, nil", n, err, len(got)) - } - - if !bytes.Equal(got, data) { - t.Errorf("read: got data %v, want %v", got, data) - } - }) - } -} diff --git a/pkg/memutil/BUILD b/pkg/memutil/BUILD index 9d07d98b4..bea595286 100644 --- a/pkg/memutil/BUILD +++ b/pkg/memutil/BUILD @@ -4,7 +4,11 @@ package(licenses = ["notice"]) go_library( name = "memutil", - srcs = ["memutil_unsafe.go"], + srcs = [ + "memfd_linux_unsafe.go", + "memutil.go", + "mmap.go", + ], visibility = ["//visibility:public"], deps = ["@org_golang_x_sys//unix:go_default_library"], ) diff --git a/pkg/memutil/memutil_unsafe.go b/pkg/memutil/memfd_linux_unsafe.go index 6676d1ce3..2179c92f3 100644 --- a/pkg/memutil/memutil_unsafe.go +++ b/pkg/memutil/memfd_linux_unsafe.go @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux -// Package memutil provides a wrapper for the memfd_create() system call. package memutil import ( diff --git a/pkg/flipcall/packet_window_mmap_arm64.go b/pkg/memutil/memutil.go index 87ad1a4a1..3185882fd 100644 --- a/pkg/flipcall/packet_window_mmap_arm64.go +++ b/pkg/memutil/memutil.go @@ -1,4 +1,4 @@ -// Copyright 2020 The gVisor Authors. +// Copyright 2018 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,14 +12,5 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build arm64 - -package flipcall - -import "golang.org/x/sys/unix" - -// Return a memory mapping of the pwd in memory that can be shared outside the sandbox. -func packetWindowMmap(pwd PacketWindowDescriptor) (uintptr, unix.Errno) { - m, _, err := unix.RawSyscall6(unix.SYS_MMAP, 0, uintptr(pwd.Length), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED, uintptr(pwd.FD), uintptr(pwd.Offset)) - return m, err -} +// Package memutil provides utilities for working with shared memory files. +package memutil diff --git a/pkg/flipcall/packet_window_mmap_amd64.go b/pkg/memutil/mmap.go index ced587a2a..7a55d1b28 100644 --- a/pkg/flipcall/packet_window_mmap_amd64.go +++ b/pkg/memutil/mmap.go @@ -12,12 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -package flipcall +//go:build go1.1 +// +build go1.1 -import "golang.org/x/sys/unix" +package memutil -// Return a memory mapping of the pwd in memory that can be shared outside the sandbox. -func packetWindowMmap(pwd PacketWindowDescriptor) (uintptr, unix.Errno) { - m, _, err := unix.RawSyscall6(unix.SYS_MMAP, 0, uintptr(pwd.Length), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED, uintptr(pwd.FD), uintptr(pwd.Offset)) - return m, err +import ( + "golang.org/x/sys/unix" +) + +// MapFile returns a memory mapping configured by the given options as per +// mmap(2). +func MapFile(addr, len, prot, flags, fd, offset uintptr) (uintptr, error) { + m, _, e := unix.RawSyscall6(unix.SYS_MMAP, addr, len, prot, flags, fd, offset) + if e != 0 { + return 0, e + } + return m, nil } diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go index ac7868ad9..0b961d3d9 100644 --- a/pkg/merkletree/merkletree.go +++ b/pkg/merkletree/merkletree.go @@ -151,21 +151,21 @@ type VerityDescriptor struct { Mode uint32 UID uint32 GID uint32 - Children map[string]struct{} + Children []string SymlinkTarget string RootHash []byte } -func (d *VerityDescriptor) String() string { +func (d *VerityDescriptor) encode() []byte { b := new(bytes.Buffer) e := gob.NewEncoder(b) - e.Encode(d.Children) - return fmt.Sprintf("Name: %s, Size: %d, Mode: %d, UID: %d, GID: %d, Children: %v, Symlink: %s, RootHash: %v", d.Name, d.FileSize, d.Mode, d.UID, d.GID, b.Bytes(), d.SymlinkTarget, d.RootHash) + e.Encode(d) + return b.Bytes() } // verify generates a hash from d, and compares it with expected. func (d *VerityDescriptor) verify(expected []byte, hashAlgorithms int) error { - h, err := hashData([]byte(d.String()), hashAlgorithms) + h, err := hashData(d.encode(), hashAlgorithms) if err != nil { return err } @@ -210,7 +210,7 @@ type GenerateParams struct { GID uint32 // Children is a map of children names for a directory. It should be // empty for a regular file. - Children map[string]struct{} + Children []string // SymlinkTarget is the target path of a symlink file, or "" if the file is not a symlink. SymlinkTarget string // HashAlgorithms is the algorithms used to hash data. @@ -242,7 +242,7 @@ func Generate(params *GenerateParams) ([]byte, error) { // If file is a symlink do not generate root hash for file content. if params.SymlinkTarget != "" { - return hashData([]byte(descriptor.String()), params.HashAlgorithms) + return hashData(descriptor.encode(), params.HashAlgorithms) } layout, err := InitLayout(params.Size, params.HashAlgorithms, params.DataAndTreeInSameFile) @@ -315,7 +315,7 @@ func Generate(params *GenerateParams) ([]byte, error) { numBlocks = (numBlocks + layout.hashesPerBlock() - 1) / layout.hashesPerBlock() } descriptor.RootHash = root - return hashData([]byte(descriptor.String()), params.HashAlgorithms) + return hashData(descriptor.encode(), params.HashAlgorithms) } // VerifyParams contains the params used to verify a portion of a file against @@ -339,7 +339,7 @@ type VerifyParams struct { GID uint32 // Children is a map of children names for a directory. It should be // empty for a regular file. - Children map[string]struct{} + Children []string // SymlinkTarget is the target path of a symlink file, or "" if the file is not a symlink. SymlinkTarget string // HashAlgorithms is the algorithms used to hash data. diff --git a/pkg/merkletree/merkletree_test.go b/pkg/merkletree/merkletree_test.go index 5d6f8df1b..1447fd139 100644 --- a/pkg/merkletree/merkletree_test.go +++ b/pkg/merkletree/merkletree_test.go @@ -206,112 +206,112 @@ func TestGenerate(t *testing.T) { data: bytes.Repeat([]byte{0}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: false, - expectedHash: []byte{9, 115, 238, 230, 38, 140, 195, 70, 207, 144, 202, 118, 23, 113, 32, 129, 226, 239, 177, 69, 161, 26, 14, 113, 16, 37, 30, 96, 19, 148, 132, 27}, + expectedHash: []byte{78, 38, 225, 107, 61, 246, 26, 6, 71, 163, 254, 97, 112, 200, 87, 232, 190, 87, 231, 160, 119, 124, 61, 229, 49, 126, 90, 223, 134, 51, 77, 182}, }, { name: "OnePageZeroesSHA256SameFile", data: bytes.Repeat([]byte{0}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: true, - expectedHash: []byte{9, 115, 238, 230, 38, 140, 195, 70, 207, 144, 202, 118, 23, 113, 32, 129, 226, 239, 177, 69, 161, 26, 14, 113, 16, 37, 30, 96, 19, 148, 132, 27}, + expectedHash: []byte{78, 38, 225, 107, 61, 246, 26, 6, 71, 163, 254, 97, 112, 200, 87, 232, 190, 87, 231, 160, 119, 124, 61, 229, 49, 126, 90, 223, 134, 51, 77, 182}, }, { name: "OnePageZeroesSHA512SeparateFile", data: bytes.Repeat([]byte{0}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: false, - expectedHash: []byte{127, 8, 95, 11, 83, 101, 51, 39, 170, 235, 39, 43, 135, 243, 145, 118, 148, 58, 27, 155, 182, 205, 44, 47, 5, 223, 215, 17, 35, 16, 43, 104, 43, 11, 8, 88, 171, 7, 249, 243, 14, 62, 126, 218, 23, 159, 237, 237, 42, 226, 39, 25, 87, 48, 253, 191, 116, 213, 37, 3, 187, 152, 154, 14}, + expectedHash: []byte{221, 45, 182, 132, 61, 212, 227, 145, 150, 131, 98, 221, 195, 5, 89, 21, 188, 36, 250, 101, 85, 78, 197, 253, 193, 23, 74, 219, 28, 108, 77, 47, 65, 79, 123, 144, 50, 245, 109, 72, 71, 80, 24, 77, 158, 95, 242, 185, 109, 163, 105, 183, 67, 106, 55, 194, 223, 46, 12, 242, 165, 203, 172, 254}, }, { name: "OnePageZeroesSHA512SameFile", data: bytes.Repeat([]byte{0}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: true, - expectedHash: []byte{127, 8, 95, 11, 83, 101, 51, 39, 170, 235, 39, 43, 135, 243, 145, 118, 148, 58, 27, 155, 182, 205, 44, 47, 5, 223, 215, 17, 35, 16, 43, 104, 43, 11, 8, 88, 171, 7, 249, 243, 14, 62, 126, 218, 23, 159, 237, 237, 42, 226, 39, 25, 87, 48, 253, 191, 116, 213, 37, 3, 187, 152, 154, 14}, + expectedHash: []byte{221, 45, 182, 132, 61, 212, 227, 145, 150, 131, 98, 221, 195, 5, 89, 21, 188, 36, 250, 101, 85, 78, 197, 253, 193, 23, 74, 219, 28, 108, 77, 47, 65, 79, 123, 144, 50, 245, 109, 72, 71, 80, 24, 77, 158, 95, 242, 185, 109, 163, 105, 183, 67, 106, 55, 194, 223, 46, 12, 242, 165, 203, 172, 254}, }, { name: "MultiplePageZeroesSHA256SeparateFile", data: bytes.Repeat([]byte{0}, 128*hostarch.PageSize+1), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: false, - expectedHash: []byte{247, 158, 42, 215, 180, 106, 0, 28, 77, 64, 132, 162, 74, 65, 250, 161, 243, 66, 129, 44, 197, 8, 145, 14, 94, 206, 156, 184, 145, 145, 20, 185}, + expectedHash: []byte{131, 122, 73, 143, 4, 202, 193, 156, 218, 169, 196, 223, 70, 100, 117, 191, 241, 113, 134, 11, 229, 231, 105, 157, 156, 0, 66, 213, 122, 145, 174, 8}, }, { name: "MultiplePageZeroesSHA256SameFile", data: bytes.Repeat([]byte{0}, 128*hostarch.PageSize+1), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: true, - expectedHash: []byte{247, 158, 42, 215, 180, 106, 0, 28, 77, 64, 132, 162, 74, 65, 250, 161, 243, 66, 129, 44, 197, 8, 145, 14, 94, 206, 156, 184, 145, 145, 20, 185}, + expectedHash: []byte{131, 122, 73, 143, 4, 202, 193, 156, 218, 169, 196, 223, 70, 100, 117, 191, 241, 113, 134, 11, 229, 231, 105, 157, 156, 0, 66, 213, 122, 145, 174, 8}, }, { name: "MultiplePageZeroesSHA512SeparateFile", data: bytes.Repeat([]byte{0}, 128*hostarch.PageSize+1), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: false, - expectedHash: []byte{100, 121, 14, 30, 104, 200, 142, 182, 190, 78, 23, 68, 157, 174, 23, 75, 174, 250, 250, 25, 66, 45, 235, 103, 129, 49, 78, 127, 173, 154, 121, 35, 37, 115, 60, 217, 26, 205, 253, 253, 236, 145, 107, 109, 232, 19, 72, 92, 4, 191, 181, 205, 191, 57, 234, 177, 144, 235, 143, 30, 15, 197, 109, 81}, + expectedHash: []byte{211, 48, 232, 110, 240, 51, 99, 241, 123, 138, 42, 76, 94, 86, 59, 200, 3, 246, 137, 148, 189, 226, 111, 103, 146, 29, 12, 218, 40, 182, 33, 99, 193, 163, 238, 26, 184, 13, 165, 187, 68, 173, 139, 9, 208, 59, 0, 192, 180, 50, 221, 35, 43, 119, 194, 16, 64, 84, 116, 63, 158, 195, 194, 226}, }, { name: "MultiplePageZeroesSHA512SameFile", data: bytes.Repeat([]byte{0}, 128*hostarch.PageSize+1), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: true, - expectedHash: []byte{100, 121, 14, 30, 104, 200, 142, 182, 190, 78, 23, 68, 157, 174, 23, 75, 174, 250, 250, 25, 66, 45, 235, 103, 129, 49, 78, 127, 173, 154, 121, 35, 37, 115, 60, 217, 26, 205, 253, 253, 236, 145, 107, 109, 232, 19, 72, 92, 4, 191, 181, 205, 191, 57, 234, 177, 144, 235, 143, 30, 15, 197, 109, 81}, + expectedHash: []byte{211, 48, 232, 110, 240, 51, 99, 241, 123, 138, 42, 76, 94, 86, 59, 200, 3, 246, 137, 148, 189, 226, 111, 103, 146, 29, 12, 218, 40, 182, 33, 99, 193, 163, 238, 26, 184, 13, 165, 187, 68, 173, 139, 9, 208, 59, 0, 192, 180, 50, 221, 35, 43, 119, 194, 16, 64, 84, 116, 63, 158, 195, 194, 226}, }, { name: "SingleASHA256SeparateFile", data: []byte{'a'}, hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: false, - expectedHash: []byte{90, 124, 194, 100, 206, 242, 75, 152, 47, 249, 16, 27, 136, 161, 223, 228, 121, 241, 126, 158, 126, 122, 100, 120, 117, 15, 81, 78, 201, 133, 119, 111}, + expectedHash: []byte{26, 47, 238, 138, 235, 244, 140, 231, 129, 240, 155, 252, 219, 44, 46, 72, 57, 249, 139, 88, 132, 238, 86, 108, 181, 115, 96, 72, 99, 210, 134, 47}, }, { name: "SingleASHA256SameFile", data: []byte{'a'}, hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: true, - expectedHash: []byte{90, 124, 194, 100, 206, 242, 75, 152, 47, 249, 16, 27, 136, 161, 223, 228, 121, 241, 126, 158, 126, 122, 100, 120, 117, 15, 81, 78, 201, 133, 119, 111}, + expectedHash: []byte{26, 47, 238, 138, 235, 244, 140, 231, 129, 240, 155, 252, 219, 44, 46, 72, 57, 249, 139, 88, 132, 238, 86, 108, 181, 115, 96, 72, 99, 210, 134, 47}, }, { name: "SingleASHA512SeparateFile", data: []byte{'a'}, hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: false, - expectedHash: []byte{24, 10, 13, 25, 113, 62, 169, 99, 151, 70, 166, 113, 81, 81, 163, 85, 5, 25, 29, 15, 46, 37, 104, 120, 142, 218, 52, 178, 187, 83, 30, 166, 101, 87, 70, 196, 188, 61, 123, 20, 13, 254, 126, 52, 212, 111, 75, 203, 33, 233, 233, 47, 181, 161, 43, 193, 131, 41, 99, 33, 164, 73, 89, 152}, + expectedHash: []byte{44, 30, 224, 12, 102, 119, 163, 171, 119, 175, 212, 121, 231, 188, 125, 171, 79, 28, 144, 234, 75, 122, 44, 75, 15, 101, 173, 92, 233, 109, 234, 60, 173, 148, 125, 85, 94, 234, 95, 91, 16, 196, 88, 175, 23, 129, 226, 110, 24, 238, 5, 49, 186, 128, 72, 188, 193, 180, 207, 193, 203, 119, 40, 191}, }, { name: "SingleASHA512SameFile", data: []byte{'a'}, hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: true, - expectedHash: []byte{24, 10, 13, 25, 113, 62, 169, 99, 151, 70, 166, 113, 81, 81, 163, 85, 5, 25, 29, 15, 46, 37, 104, 120, 142, 218, 52, 178, 187, 83, 30, 166, 101, 87, 70, 196, 188, 61, 123, 20, 13, 254, 126, 52, 212, 111, 75, 203, 33, 233, 233, 47, 181, 161, 43, 193, 131, 41, 99, 33, 164, 73, 89, 152}, + expectedHash: []byte{44, 30, 224, 12, 102, 119, 163, 171, 119, 175, 212, 121, 231, 188, 125, 171, 79, 28, 144, 234, 75, 122, 44, 75, 15, 101, 173, 92, 233, 109, 234, 60, 173, 148, 125, 85, 94, 234, 95, 91, 16, 196, 88, 175, 23, 129, 226, 110, 24, 238, 5, 49, 186, 128, 72, 188, 193, 180, 207, 193, 203, 119, 40, 191}, }, { name: "OnePageASHA256SeparateFile", data: bytes.Repeat([]byte{'a'}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: false, - expectedHash: []byte{132, 54, 112, 142, 156, 19, 50, 140, 138, 240, 192, 154, 100, 120, 242, 69, 64, 217, 62, 166, 127, 88, 23, 197, 100, 66, 255, 215, 214, 229, 54, 1}, + expectedHash: []byte{166, 254, 83, 46, 241, 111, 18, 47, 79, 6, 181, 197, 176, 143, 211, 204, 53, 5, 245, 134, 172, 95, 97, 131, 236, 132, 197, 138, 123, 78, 43, 13}, }, { name: "OnePageASHA256SameFile", data: bytes.Repeat([]byte{'a'}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256, dataAndTreeInSameFile: true, - expectedHash: []byte{132, 54, 112, 142, 156, 19, 50, 140, 138, 240, 192, 154, 100, 120, 242, 69, 64, 217, 62, 166, 127, 88, 23, 197, 100, 66, 255, 215, 214, 229, 54, 1}, + expectedHash: []byte{166, 254, 83, 46, 241, 111, 18, 47, 79, 6, 181, 197, 176, 143, 211, 204, 53, 5, 245, 134, 172, 95, 97, 131, 236, 132, 197, 138, 123, 78, 43, 13}, }, { name: "OnePageASHA512SeparateFile", data: bytes.Repeat([]byte{'a'}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: false, - expectedHash: []byte{165, 46, 176, 116, 47, 209, 101, 193, 64, 185, 30, 9, 52, 22, 24, 154, 135, 220, 232, 168, 215, 45, 222, 226, 207, 104, 160, 10, 156, 98, 245, 250, 76, 21, 68, 204, 65, 118, 69, 52, 210, 155, 36, 109, 233, 103, 1, 40, 218, 89, 125, 38, 247, 194, 2, 225, 119, 155, 65, 99, 182, 111, 110, 145}, + expectedHash: []byte{23, 69, 6, 79, 39, 232, 90, 246, 62, 55, 4, 229, 47, 36, 230, 24, 233, 47, 55, 36, 26, 139, 196, 78, 242, 12, 194, 77, 109, 81, 151, 188, 63, 201, 127, 235, 81, 214, 91, 200, 19, 232, 240, 14, 197, 1, 99, 224, 18, 213, 203, 242, 44, 102, 25, 62, 90, 189, 106, 107, 129, 61, 115, 39}, }, { name: "OnePageASHA512SameFile", data: bytes.Repeat([]byte{'a'}, hostarch.PageSize), hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512, dataAndTreeInSameFile: true, - expectedHash: []byte{165, 46, 176, 116, 47, 209, 101, 193, 64, 185, 30, 9, 52, 22, 24, 154, 135, 220, 232, 168, 215, 45, 222, 226, 207, 104, 160, 10, 156, 98, 245, 250, 76, 21, 68, 204, 65, 118, 69, 52, 210, 155, 36, 109, 233, 103, 1, 40, 218, 89, 125, 38, 247, 194, 2, 225, 119, 155, 65, 99, 182, 111, 110, 145}, + expectedHash: []byte{23, 69, 6, 79, 39, 232, 90, 246, 62, 55, 4, 229, 47, 36, 230, 24, 233, 47, 55, 36, 26, 139, 196, 78, 242, 12, 194, 77, 109, 81, 151, 188, 63, 201, 127, 235, 81, 214, 91, 200, 19, 232, 240, 14, 197, 1, 99, 224, 18, 213, 203, 242, 44, 102, 25, 62, 90, 189, 106, 107, 129, 61, 115, 39}, }, } @@ -324,7 +324,7 @@ func TestGenerate(t *testing.T) { Mode: defaultMode, UID: defaultUID, GID: defaultGID, - Children: make(map[string]struct{}), + Children: []string{}, HashAlgorithms: tc.hashAlgorithms, TreeReader: &tree, TreeWriter: &tree, @@ -366,7 +366,7 @@ func prepareVerify(t *testing.T, dataSize int64, hashAlgorithm int, dataAndTreeI Mode: defaultMode, UID: defaultUID, GID: defaultGID, - Children: make(map[string]struct{}), + Children: []string{}, HashAlgorithms: hashAlgorithm, TreeReader: &tree, TreeWriter: &tree, @@ -398,7 +398,7 @@ func prepareVerify(t *testing.T, dataSize int64, hashAlgorithm int, dataAndTreeI Mode: defaultMode, UID: defaultUID, GID: defaultGID, - Children: make(map[string]struct{}), + Children: []string{}, HashAlgorithms: hashAlgorithm, ReadOffset: verifyStart, ReadSize: verifySize, @@ -627,7 +627,7 @@ func TestVerifyModifiedChildren(t *testing.T) { t.Run(tc.name, func(t *testing.T) { var buf bytes.Buffer _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf) - params.Children["abc"] = struct{}{} + params.Children = append(params.Children, "abc") if _, err := Verify(¶ms); errors.Is(err, nil) { t.Errorf("Verification succeeded when expected to fail") } diff --git a/pkg/metric/BUILD b/pkg/metric/BUILD index 0a6a5d215..c08792751 100644 --- a/pkg/metric/BUILD +++ b/pkg/metric/BUILD @@ -4,13 +4,16 @@ package(licenses = ["notice"]) go_library( name = "metric", - srcs = ["metric.go"], + srcs = [ + "metric.go", + ], visibility = ["//:sandbox"], deps = [ ":metric_go_proto", "//pkg/eventchannel", "//pkg/log", "//pkg/sync", + "@org_golang_google_protobuf//types/known/timestamppb", ], ) @@ -18,6 +21,9 @@ proto_library( name = "metric", srcs = ["metric.proto"], visibility = ["//:sandbox"], + deps = [ + "@com_google_protobuf//:timestamp_proto", + ], ) go_test( diff --git a/pkg/metric/metric.go b/pkg/metric/metric.go index 4829ae7ce..ac38ec894 100644 --- a/pkg/metric/metric.go +++ b/pkg/metric/metric.go @@ -20,7 +20,9 @@ import ( "fmt" "sort" "sync/atomic" + "time" + "google.golang.org/protobuf/types/known/timestamppb" "gvisor.dev/gvisor/pkg/eventchannel" "gvisor.dev/gvisor/pkg/log" pb "gvisor.dev/gvisor/pkg/metric/metric_go_proto" @@ -54,6 +56,27 @@ var ( }) ) +// InitStage is the name of a Sentry initialization stage. +type InitStage string + +// List of all Sentry initialization stages. +var ( + InitRestoreConfig InitStage = "restore_config" + InitExecConfig InitStage = "exec_config" + InitRestore InitStage = "restore" + InitCreateProcess InitStage = "create_process" + InitTaskStart InitStage = "task_start" + + // allStages is the list of allowed stages. + allStages = []InitStage{ + InitRestoreConfig, + InitExecConfig, + InitRestore, + InitCreateProcess, + InitTaskStart, + } +) + // Uint64Metric encapsulates a uint64 that represents some kind of metric to be // monitored. We currently support metrics with at most one field. // @@ -98,6 +121,10 @@ func Initialize() error { for _, v := range allMetrics.m { m.Metrics = append(m.Metrics, v.metadata) } + m.Stages = make([]string, 0, len(allStages)) + for _, s := range allStages { + m.Stages = append(m.Stages, string(s)) + } if err := eventchannel.Emit(&m); err != nil { return fmt.Errorf("unable to emit metric initialize event: %w", err) } @@ -287,34 +314,66 @@ func (m *Uint64Metric) IncrementBy(v uint64, fieldValues ...string) { } } -// metricSet holds named metrics. +// stageTiming contains timing data for an initialization stage. +type stageTiming struct { + stage InitStage + started time.Time + // ended is the zero time when the stage has not ended yet. + ended time.Time +} + +// inProgress returns whether this stage hasn't ended yet. +func (s stageTiming) inProgress() bool { + return !s.started.IsZero() && s.ended.IsZero() +} + +// metricSet holds metric data. type metricSet struct { + // Map of metrics. m map[string]customUint64Metric + + // mu protects the fields below. + mu sync.RWMutex + + // Information about the stages reached by the Sentry. Only appended to, so + // reading a shallow copy of the slice header concurrently is safe. + finished []stageTiming + + // The current stage in progress. + currentStage stageTiming } // makeMetricSet returns a new metricSet. func makeMetricSet() metricSet { return metricSet{ - m: make(map[string]customUint64Metric), + m: make(map[string]customUint64Metric), + finished: make([]stageTiming, 0, len(allStages)), } } // Values returns a snapshot of all values in m. func (m *metricSet) Values() metricValues { - vals := make(metricValues) + m.mu.Lock() + stages := m.finished[:] + m.mu.Unlock() + + vals := metricValues{ + m: make(map[string]interface{}, len(m.m)), + stages: stages, + } for k, v := range m.m { fields := v.metadata.GetFields() switch len(fields) { case 0: - vals[k] = v.value() + vals.m[k] = v.value() case 1: values := fields[0].GetAllowedValues() fieldsMap := make(map[string]uint64) for _, fieldValue := range values { fieldsMap[fieldValue] = v.value(fieldValue) } - vals[k] = fieldsMap + vals.m[k] = fieldsMap default: panic(fmt.Sprintf("Unsupported number of metric fields: %d", len(fields))) } @@ -322,10 +381,16 @@ func (m *metricSet) Values() metricValues { return vals } -// metricValues contains a copy of the values of all metrics. It is a map -// with key as metric name and value can be either uint64 or map[string]uint64 -// to support metrics with one field. -type metricValues map[string]interface{} +// metricValues contains a copy of the values of all metrics. +type metricValues struct { + // m is a map with key as metric name and value can be either uint64 or + // map[string]uint64 to support metrics with one field. + m map[string]interface{} + + // Information on when initialization stages were reached. Does not include + // the currently-ongoing stage, if any. + stages []stageTiming +} var ( // emitMu protects metricsAtLastEmit and ensures that all emitted @@ -354,8 +419,8 @@ func EmitMetricUpdate() { m := pb.MetricUpdate{} // On the first call metricsAtLastEmit will be empty. Include all // metrics then. - for k, v := range snapshot { - prev, ok := metricsAtLastEmit[k] + for k, v := range snapshot.m { + prev, ok := metricsAtLastEmit.m[k] switch t := v.(type) { case uint64: // Metric exists and value did not change. @@ -386,8 +451,23 @@ func EmitMetricUpdate() { } } + for s := len(metricsAtLastEmit.stages); s < len(snapshot.stages); s++ { + newStage := snapshot.stages[s] + m.StageTiming = append(m.StageTiming, &pb.StageTiming{ + Stage: string(newStage.stage), + Started: ×tamppb.Timestamp{ + Seconds: newStage.started.Unix(), + Nanos: int32(newStage.started.Nanosecond()), + }, + Ended: ×tamppb.Timestamp{ + Seconds: newStage.ended.Unix(), + Nanos: int32(newStage.ended.Nanosecond()), + }, + }) + } + metricsAtLastEmit = snapshot - if len(m.Metrics) == 0 { + if len(m.Metrics) == 0 && len(m.StageTiming) == 0 { return } @@ -399,9 +479,52 @@ func EmitMetricUpdate() { for _, metric := range m.Metrics { log.Debugf("%s: %+v", metric.Name, metric.Value) } + for _, stage := range m.StageTiming { + duration := time.Duration(stage.Ended.Seconds-stage.Started.Seconds)*time.Second + time.Duration(stage.Ended.Nanos-stage.Started.Nanos)*time.Nanosecond + log.Debugf("Stage %s took %v", stage.GetStage(), duration) + } } if err := eventchannel.Emit(&m); err != nil { log.Warningf("Unable to emit metrics: %s", err) } } + +// StartStage should be called when an initialization stage is started. +// It returns a function that must be called to indicate that the stage ended. +// Alternatively, future calls to StartStage will implicitly indicate that the +// previous stage ended. +// Stage information will be emitted in the next call to EmitMetricUpdate after +// a stage has ended. +// +// This function may (and is expected to) be called prior to final +// initialization of this metric library, as it has to capture early stages +// of Sentry initialization. +func StartStage(stage InitStage) func() { + now := time.Now() + allMetrics.mu.Lock() + defer allMetrics.mu.Unlock() + if allMetrics.currentStage.inProgress() { + endStage(now) + } + allMetrics.currentStage.stage = stage + allMetrics.currentStage.started = now + return func() { + now := time.Now() + allMetrics.mu.Lock() + defer allMetrics.mu.Unlock() + // The current stage may have been ended by another call to StartStage, so + // double-check prior to clearing the current stage. + if allMetrics.currentStage.inProgress() && allMetrics.currentStage.stage == stage { + endStage(now) + } + } +} + +// endStage marks allMetrics.currentStage as ended, adding it to the list of +// finished stages. It assumes allMetrics.mu is locked. +func endStage(when time.Time) { + allMetrics.currentStage.ended = when + allMetrics.finished = append(allMetrics.finished, allMetrics.currentStage) + allMetrics.currentStage = stageTiming{} +} diff --git a/pkg/metric/metric.proto b/pkg/metric/metric.proto index 53c8b4b50..d466b6904 100644 --- a/pkg/metric/metric.proto +++ b/pkg/metric/metric.proto @@ -16,6 +16,8 @@ syntax = "proto3"; package gvisor; +import "google/protobuf/timestamp.proto"; + // MetricMetadata contains all of the metadata describing a single metric. message MetricMetadata { // name is the unique name of the metric, usually in a "directory" format @@ -63,6 +65,7 @@ message MetricMetadata { // future MetricUpdates. message MetricRegistration { repeated MetricMetadata metrics = 1; + repeated string stages = 2; } // MetricValue the value of a metric at a single point in time. @@ -79,9 +82,20 @@ message MetricValue { repeated string field_values = 4; } +// StageTiming represents a new stage that's been reached by the Sentry. +message StageTiming { + string stage = 1; + google.protobuf.Timestamp started = 2; + google.protobuf.Timestamp ended = 3; +} + // MetricUpdate contains new values for multiple distinct metrics. // // Metrics whose values have not changed are not included. message MetricUpdate { repeated MetricValue metrics = 1; + // Timing information of initialization stages reached since last update. + // The first MetricUpdate will include multiple entries, since metric + // initialization happens relatively late in the Sentry startup process. + repeated StageTiming stage_timing = 2; } diff --git a/pkg/metric/metric_test.go b/pkg/metric/metric_test.go index 1b4a9e73a..0654bdf07 100644 --- a/pkg/metric/metric_test.go +++ b/pkg/metric/metric_test.go @@ -16,6 +16,7 @@ package metric import ( "testing" + "time" "google.golang.org/protobuf/proto" "gvisor.dev/gvisor/pkg/eventchannel" @@ -352,3 +353,147 @@ func TestEmitMetricUpdateWithFields(t *testing.T) { t.Errorf("Field value weird2 not found: %+v", emitter) } } + +func TestMetricUpdateStageTiming(t *testing.T) { + defer reset() + + expectedTimings := map[InitStage]struct{ min, max time.Duration }{} + measureStage := func(stage InitStage, body func()) { + stageStarted := time.Now() + endStage := StartStage(stage) + bodyStarted := time.Now() + body() + bodyEnded := time.Now() + endStage() + stageEnded := time.Now() + + expectedTimings[stage] = struct{ min, max time.Duration }{ + min: bodyEnded.Sub(bodyStarted), + max: stageEnded.Sub(stageStarted), + } + } + checkStage := func(got *pb.StageTiming, want InitStage) { + if InitStage(got.GetStage()) != want { + t.Errorf("%v: got stage %q expected %q", got, got.GetStage(), want) + } + timingBounds, found := expectedTimings[want] + if !found { + t.Fatalf("invalid init stage name %q", want) + } + started := got.Started.AsTime() + ended := got.Ended.AsTime() + duration := ended.Sub(started) + if duration < timingBounds.min { + t.Errorf("stage %v: lasted %v, expected at least %v", want, duration, timingBounds.min) + } else if duration > timingBounds.max { + t.Errorf("stage %v: lasted %v, expected no more than %v", want, duration, timingBounds.max) + } + } + + // Test that it's legit to go through stages before metric registration. + measureStage("before_first_update_1", func() { + time.Sleep(100 * time.Millisecond) + }) + measureStage("before_first_update_2", func() { + time.Sleep(100 * time.Millisecond) + }) + + fooMetric, err := NewUint64Metric("/foo", false, pb.MetricMetadata_UNITS_NONE, fooDescription) + if err != nil { + t.Fatalf("Cannot register /foo: %v", err) + } + emitter.Reset() + Initialize() + EmitMetricUpdate() + + // We should have gotten the metric registration and the first MetricUpdate. + if len(emitter) != 2 { + t.Fatalf("emitter has %d messages (%v), expected %d", len(emitter), emitter, 2) + } + + if registration, ok := emitter[0].(*pb.MetricRegistration); !ok { + t.Errorf("first message is not MetricRegistration: %T / %v", emitter[0], emitter[0]) + } else if len(registration.Stages) != len(allStages) { + t.Errorf("MetricRegistration has %d stages (%v), expected %d (%v)", len(registration.Stages), registration.Stages, len(allStages), allStages) + } else { + for i := 0; i < len(allStages); i++ { + if InitStage(registration.Stages[i]) != allStages[i] { + t.Errorf("MetricRegistration.Stages[%d]: got %q want %q", i, registration.Stages[i], allStages[i]) + } + } + } + + if firstUpdate, ok := emitter[1].(*pb.MetricUpdate); !ok { + t.Errorf("second message is not MetricUpdate: %T / %v", emitter[1], emitter[1]) + } else if len(firstUpdate.StageTiming) != 2 { + t.Errorf("MetricUpdate has %d stage timings (%v), expected %d", len(firstUpdate.StageTiming), firstUpdate.StageTiming, 2) + } else { + checkStage(firstUpdate.StageTiming[0], "before_first_update_1") + checkStage(firstUpdate.StageTiming[1], "before_first_update_2") + } + + // Ensure re-emitting doesn't cause another event to be sent. + emitter.Reset() + EmitMetricUpdate() + if len(emitter) != 0 { + t.Fatalf("EmitMetricUpdate emitted %d events want %d", len(emitter), 0) + } + + // Generate monitoring data, we should get an event with no stages. + fooMetric.Increment() + emitter.Reset() + EmitMetricUpdate() + if len(emitter) != 1 { + t.Fatalf("EmitMetricUpdate emitted %d events want %d", len(emitter), 1) + } else if update, ok := emitter[0].(*pb.MetricUpdate); !ok { + t.Errorf("message is not MetricUpdate: %T / %v", emitter[1], emitter[1]) + } else if len(update.StageTiming) != 0 { + t.Errorf("unexpected stage timing information: %v", update.StageTiming) + } + + // Now generate new stages. + measureStage("foo_stage_1", func() { + time.Sleep(100 * time.Millisecond) + }) + measureStage("foo_stage_2", func() { + time.Sleep(100 * time.Millisecond) + }) + emitter.Reset() + EmitMetricUpdate() + if len(emitter) != 1 { + t.Fatalf("EmitMetricUpdate emitted %d events want %d", len(emitter), 1) + } else if update, ok := emitter[0].(*pb.MetricUpdate); !ok { + t.Errorf("message is not MetricUpdate: %T / %v", emitter[1], emitter[1]) + } else if len(update.Metrics) != 0 { + t.Errorf("MetricUpdate has %d metric value changes (%v), expected %d", len(update.Metrics), update.Metrics, 0) + } else if len(update.StageTiming) != 2 { + t.Errorf("MetricUpdate has %d stages (%v), expected %d", len(update.StageTiming), update.StageTiming, 2) + } else { + checkStage(update.StageTiming[0], "foo_stage_1") + checkStage(update.StageTiming[1], "foo_stage_2") + } + + // Now try generating data for both metrics and stages. + fooMetric.Increment() + measureStage("last_stage_1", func() { + time.Sleep(100 * time.Millisecond) + }) + measureStage("last_stage_2", func() { + time.Sleep(100 * time.Millisecond) + }) + fooMetric.Increment() + emitter.Reset() + EmitMetricUpdate() + if len(emitter) != 1 { + t.Fatalf("EmitMetricUpdate emitted %d events want %d", len(emitter), 1) + } else if update, ok := emitter[0].(*pb.MetricUpdate); !ok { + t.Errorf("message is not MetricUpdate: %T / %v", emitter[1], emitter[1]) + } else if len(update.Metrics) != 1 { + t.Errorf("MetricUpdate has %d metric value changes (%v), expected %d", len(update.Metrics), update.Metrics, 1) + } else if len(update.StageTiming) != 2 { + t.Errorf("MetricUpdate has %d stages (%v), expected %d", len(update.StageTiming), update.StageTiming, 2) + } else { + checkStage(update.StageTiming[0], "last_stage_1") + checkStage(update.StageTiming[1], "last_stage_2") + } +} diff --git a/pkg/procid/procid.go b/pkg/procid/procid.go index 78b92422c..e0d42819d 100644 --- a/pkg/procid/procid.go +++ b/pkg/procid/procid.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + // Package procid provides a way to get the current system thread identifier. package procid diff --git a/pkg/procid/procid_amd64.s b/pkg/procid/procid_amd64.s index c4307c523..b5bbfff90 100644 --- a/pkg/procid/procid_amd64.s +++ b/pkg/procid/procid_amd64.s @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build amd64 -// +build go1.8 -// +build !go1.18 +//go:build amd64 && go1.8 && !go1.18 && go1.1 +// +build amd64,go1.8,!go1.18,go1.1 #include "textflag.h" diff --git a/pkg/procid/procid_arm64.s b/pkg/procid/procid_arm64.s index c1c409f3c..772d96289 100644 --- a/pkg/procid/procid_arm64.s +++ b/pkg/procid/procid_arm64.s @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build arm64 -// +build go1.8 -// +build !go1.18 +//go:build arm64 && go1.8 && !go1.18 && go1.1 +// +build arm64,go1.8,!go1.18,go1.1 #include "textflag.h" diff --git a/pkg/rand/rand.go b/pkg/rand/rand.go index a2714784d..be0e85fdb 100644 --- a/pkg/rand/rand.go +++ b/pkg/rand/rand.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !linux // +build !linux // Package rand implements a cryptographically secure pseudorandom number diff --git a/pkg/refsvfs2/refs.go b/pkg/refsvfs2/refs.go index ef8beb659..fe3e4a1ca 100644 --- a/pkg/refsvfs2/refs.go +++ b/pkg/refsvfs2/refs.go @@ -28,6 +28,11 @@ type RefCounter interface { // DecRef decrements the object's reference count. Users of refs_template.Refs // may specify a destructor to be called once the reference count reaches zero. DecRef(ctx context.Context) +} + +// TryRefCounter is like RefCounter but allow the ref increment to be tried. +type TryRefCounter interface { + RefCounter // TryIncRef attempts to increment the reference count, but may fail if all // references have already been dropped, in which case it returns false. If diff --git a/pkg/refsvfs2/refs_template.go b/pkg/refsvfs2/refs_template.go index 1102c8adc..55b0a60a1 100644 --- a/pkg/refsvfs2/refs_template.go +++ b/pkg/refsvfs2/refs_template.go @@ -101,7 +101,7 @@ func (r *Refs) IncRef() { } } -// TryIncRef implements refs.RefCounter.TryIncRef. +// TryIncRef implements refs.TryRefCounter.TryIncRef. // // To do this safely without a loop, a speculative reference is first acquired // on the object. This allows multiple concurrent TryIncRef calls to distinguish diff --git a/pkg/ring0/aarch64.go b/pkg/ring0/aarch64.go index 3bda594f9..96c884844 100644 --- a/pkg/ring0/aarch64.go +++ b/pkg/ring0/aarch64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ring0 diff --git a/pkg/ring0/defs_amd64.go b/pkg/ring0/defs_amd64.go index 76776c65c..24f6e4cde 100644 --- a/pkg/ring0/defs_amd64.go +++ b/pkg/ring0/defs_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package ring0 diff --git a/pkg/ring0/defs_arm64.go b/pkg/ring0/defs_arm64.go index 0125690d2..3e212516f 100644 --- a/pkg/ring0/defs_arm64.go +++ b/pkg/ring0/defs_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ring0 diff --git a/pkg/ring0/entry_amd64.go b/pkg/ring0/entry_amd64.go index d87b1fd00..afd646b0b 100644 --- a/pkg/ring0/entry_amd64.go +++ b/pkg/ring0/entry_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package ring0 @@ -31,6 +32,13 @@ import ( // executed from kernel mode or not and the appropriate stub is called. func sysenter() +// addrOfSysenter returns the start address of sysenter. +// +// In Go 1.17+, Go references to assembly functions resolve to an ABIInternal +// wrapper function rather than the function itself. We must reference from +// assembly to get the ABI0 (i.e., primary) address. +func addrOfSysenter() uintptr + // swapgs swaps the current GS value. // // This must be called prior to sysret/iret. @@ -39,6 +47,9 @@ func swapgs() // jumpToKernel jumps to the kernel version of the current RIP. func jumpToKernel() +// jumpToUser jumps to the user version of the current RIP. +func jumpToUser() + // sysret returns to userspace from a system call. // // The return code is the vector that interrupted execution. @@ -65,7 +76,12 @@ func exception() // This is used when processing kernel exceptions and syscalls. func resume() -// Start is the CPU entrypoint. +// start is the CPU entrypoint. +// +// See requirements below. +func start() + +// AddrOfStart return the address of the CPU entrypoint. // // The following start conditions must be satisfied: // @@ -78,7 +94,11 @@ func resume() // * c.EFER() should be the current EFER value. // // The CPU state will be set to c.Registers(). -func Start() +// +// In Go 1.17+, Go references to assembly functions resolve to an ABIInternal +// wrapper function rather than the function itself. We must reference from +// assembly to get the ABI0 (i.e., primary) address. +func AddrOfStart() uintptr // Exception stubs. func divideByZero() @@ -104,28 +124,56 @@ func virtualizationException() func securityException() func syscallInt80() +// These returns the start address of the functions above. +// +// In Go 1.17+, Go references to assembly functions resolve to an ABIInternal +// wrapper function rather than the function itself. We must reference from +// assembly to get the ABI0 (i.e., primary) address. +func addrOfDivideByZero() uintptr +func addrOfDebug() uintptr +func addrOfNMI() uintptr +func addrOfBreakpoint() uintptr +func addrOfOverflow() uintptr +func addrOfBoundRangeExceeded() uintptr +func addrOfInvalidOpcode() uintptr +func addrOfDeviceNotAvailable() uintptr +func addrOfDoubleFault() uintptr +func addrOfCoprocessorSegmentOverrun() uintptr +func addrOfInvalidTSS() uintptr +func addrOfSegmentNotPresent() uintptr +func addrOfStackSegmentFault() uintptr +func addrOfGeneralProtectionFault() uintptr +func addrOfPageFault() uintptr +func addrOfX87FloatingPointException() uintptr +func addrOfAlignmentCheck() uintptr +func addrOfMachineCheck() uintptr +func addrOfSimdFloatingPointException() uintptr +func addrOfVirtualizationException() uintptr +func addrOfSecurityException() uintptr +func addrOfSyscallInt80() uintptr + // Exception handler index. -var handlers = map[Vector]func(){ - DivideByZero: divideByZero, - Debug: debug, - NMI: nmi, - Breakpoint: breakpoint, - Overflow: overflow, - BoundRangeExceeded: boundRangeExceeded, - InvalidOpcode: invalidOpcode, - DeviceNotAvailable: deviceNotAvailable, - DoubleFault: doubleFault, - CoprocessorSegmentOverrun: coprocessorSegmentOverrun, - InvalidTSS: invalidTSS, - SegmentNotPresent: segmentNotPresent, - StackSegmentFault: stackSegmentFault, - GeneralProtectionFault: generalProtectionFault, - PageFault: pageFault, - X87FloatingPointException: x87FloatingPointException, - AlignmentCheck: alignmentCheck, - MachineCheck: machineCheck, - SIMDFloatingPointException: simdFloatingPointException, - VirtualizationException: virtualizationException, - SecurityException: securityException, - SyscallInt80: syscallInt80, +var handlers = map[Vector]uintptr{ + DivideByZero: addrOfDivideByZero(), + Debug: addrOfDebug(), + NMI: addrOfNMI(), + Breakpoint: addrOfBreakpoint(), + Overflow: addrOfOverflow(), + BoundRangeExceeded: addrOfBoundRangeExceeded(), + InvalidOpcode: addrOfInvalidOpcode(), + DeviceNotAvailable: addrOfDeviceNotAvailable(), + DoubleFault: addrOfDoubleFault(), + CoprocessorSegmentOverrun: addrOfCoprocessorSegmentOverrun(), + InvalidTSS: addrOfInvalidTSS(), + SegmentNotPresent: addrOfSegmentNotPresent(), + StackSegmentFault: addrOfStackSegmentFault(), + GeneralProtectionFault: addrOfGeneralProtectionFault(), + PageFault: addrOfPageFault(), + X87FloatingPointException: addrOfX87FloatingPointException(), + AlignmentCheck: addrOfAlignmentCheck(), + MachineCheck: addrOfMachineCheck(), + SIMDFloatingPointException: addrOfSimdFloatingPointException(), + VirtualizationException: addrOfVirtualizationException(), + SecurityException: addrOfSecurityException(), + SyscallInt80: addrOfSyscallInt80(), } diff --git a/pkg/ring0/entry_amd64.s b/pkg/ring0/entry_amd64.s index f59747df3..520bd9f57 100644 --- a/pkg/ring0/entry_amd64.s +++ b/pkg/ring0/entry_amd64.s @@ -88,11 +88,33 @@ #define LOAD_KERNEL_STACK(entry) \ MOVQ ENTRY_STACK_TOP(entry), SP; +// ADDR_OF_FUNC defines a function named 'name' that returns the address of +// 'symbol'. +#define ADDR_OF_FUNC(name, symbol) \ +TEXT name,$0-8; \ + MOVQ $symbol, AX; \ + MOVQ AX, ret+0(FP); \ + RET + // See kernel.go. TEXT ·Halt(SB),NOSPLIT,$0 HLT RET +// See kernel_amd64.go. +TEXT ·HaltAndWriteFSBase(SB),NOSPLIT,$8-8 + HLT + + // Restore FS_BASE. + MOVQ regs+0(FP), AX + MOVQ PTRACE_FS_BASE(AX), AX + + PUSHQ AX // First argument (FS_BASE) + CALL ·writeFS(SB) + POPQ AX + + RET + // See entry_amd64.go. TEXT ·swapgs(SB),NOSPLIT,$0 SWAP_GS() @@ -107,8 +129,29 @@ TEXT ·jumpToKernel(SB),NOSPLIT,$0 MOVQ AX, 0(SP) RET +// jumpToUser changes execution to the user address space. +// +// This works by changing the return value to the user version. +TEXT ·jumpToUser(SB),NOSPLIT,$0 + // N.B. we can't access KernelStartAddress from the upper half (data + // pages not available), so just naively clear all the upper bits. + // We are assuming a 47-bit virtual address space. + MOVQ $0x00007fffffffffff, AX + MOVQ 0(SP), BX + ANDQ BX, AX // Future return value. + MOVQ AX, 0(SP) + RET + // See entry_amd64.go. TEXT ·sysret(SB),NOSPLIT,$0-24 + // Set application FS. We can't do this in Go because Go code needs FS. + MOVQ regs+8(FP), AX + MOVQ PTRACE_FS_BASE(AX), AX + + PUSHQ AX + CALL ·writeFS(SB) + POPQ AX + CALL ·jumpToKernel(SB) // Save original state and stack. sysenter() or exception() // from APP(gr3) will switch to this stack, set the return @@ -142,6 +185,14 @@ TEXT ·sysret(SB),NOSPLIT,$0-24 // See entry_amd64.go. TEXT ·iret(SB),NOSPLIT,$0-24 + // Set application FS. We can't do this in Go because Go code needs FS. + MOVQ regs+8(FP), AX + MOVQ PTRACE_FS_BASE(AX), AX + + PUSHQ AX // First argument (FS_BASE) + CALL ·writeFS(SB) + POPQ AX + CALL ·jumpToKernel(SB) // Save original state and stack. sysenter() or exception() // from APP(gr3) will switch to this stack, set the return @@ -184,13 +235,29 @@ TEXT ·resume(SB),NOSPLIT,$0 IRET() // See entry_amd64.go. -TEXT ·Start(SB),NOSPLIT,$0 +TEXT ·start(SB),NOSPLIT,$0 + // N.B. This is the vCPU entrypoint. It is not called from Go code and + // thus pushes and pops values on the stack until calling into Go + // (startGo) because we aren't usually a typical Go assembly frame. + PUSHQ $0x0 // Previous frame pointer. MOVQ SP, BP // Set frame pointer. - PUSHQ AX // First argument (CPU). - CALL ·start(SB) // Call Go hook. + + PUSHQ AX // Save CPU. + + // Set up environment required by Go before calling startGo: Go needs + // FS_BASE and floating point initialized. + MOVQ CPU_REGISTERS+PTRACE_FS_BASE(AX), BX + PUSHQ BX // First argument (FS_BASE) + CALL ·writeFS(SB) + POPQ BX + + // First argument (CPU) already at bottom of stack. + CALL ·startGo(SB) // Call Go hook. JMP ·resume(SB) // Restore to registers. +ADDR_OF_FUNC(·AddrOfStart(SB), ·start(SB)); + // See entry_amd64.go. TEXT ·sysenter(SB),NOSPLIT,$0 // _RFLAGS_IOPL0 is always set in the user mode and it is never set in @@ -218,6 +285,18 @@ user: MOVQ $0, CPU_ERROR_CODE(AX) // Clear error code. MOVQ $1, CPU_ERROR_TYPE(AX) // Set error type to user. + CALL ·jumpToUser(SB) + + // Restore kernel FS_BASE. + MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. + MOVQ CPU_REGISTERS+PTRACE_FS_BASE(AX), BX + + PUSHQ BX // First argument (FS_BASE) + CALL ·writeFS(SB) + POPQ BX + + MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. + // Return to the kernel, where the frame is: // // vector (sp+32) @@ -252,6 +331,8 @@ kernel: POPQ AX // Pop vCPU. JMP ·resume(SB) +ADDR_OF_FUNC(·addrOfSysenter(SB), ·sysenter(SB)); + // exception is a generic exception handler. // // There are two cases handled: @@ -298,6 +379,16 @@ user: MOVQ 40(SP), DI; MOVQ DI, PTRACE_RSP(AX) MOVQ 48(SP), SI; MOVQ SI, PTRACE_SS(AX) + CALL ·jumpToUser(SB) + + // Restore kernel FS_BASE. + MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. + MOVQ CPU_REGISTERS+PTRACE_FS_BASE(AX), BX + + PUSHQ BX // First argument (FS_BASE) + CALL ·writeFS(SB) + POPQ BX + // Copy out and return. MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU. MOVQ 0(SP), BX // Load vector. @@ -336,36 +427,38 @@ kernel: POPQ AX // Pop vCPU. JMP ·resume(SB) -#define EXCEPTION_WITH_ERROR(value, symbol) \ +#define EXCEPTION_WITH_ERROR(value, symbol, addr) \ +ADDR_OF_FUNC(addr, symbol); \ TEXT symbol,NOSPLIT,$0; \ PUSHQ $value; \ JMP ·exception(SB); -#define EXCEPTION_WITHOUT_ERROR(value, symbol) \ +#define EXCEPTION_WITHOUT_ERROR(value, symbol, addr) \ +ADDR_OF_FUNC(addr, symbol); \ TEXT symbol,NOSPLIT,$0; \ PUSHQ $0x0; \ PUSHQ $value; \ JMP ·exception(SB); -EXCEPTION_WITHOUT_ERROR(DivideByZero, ·divideByZero(SB)) -EXCEPTION_WITHOUT_ERROR(Debug, ·debug(SB)) -EXCEPTION_WITHOUT_ERROR(NMI, ·nmi(SB)) -EXCEPTION_WITHOUT_ERROR(Breakpoint, ·breakpoint(SB)) -EXCEPTION_WITHOUT_ERROR(Overflow, ·overflow(SB)) -EXCEPTION_WITHOUT_ERROR(BoundRangeExceeded, ·boundRangeExceeded(SB)) -EXCEPTION_WITHOUT_ERROR(InvalidOpcode, ·invalidOpcode(SB)) -EXCEPTION_WITHOUT_ERROR(DeviceNotAvailable, ·deviceNotAvailable(SB)) -EXCEPTION_WITH_ERROR(DoubleFault, ·doubleFault(SB)) -EXCEPTION_WITHOUT_ERROR(CoprocessorSegmentOverrun, ·coprocessorSegmentOverrun(SB)) -EXCEPTION_WITH_ERROR(InvalidTSS, ·invalidTSS(SB)) -EXCEPTION_WITH_ERROR(SegmentNotPresent, ·segmentNotPresent(SB)) -EXCEPTION_WITH_ERROR(StackSegmentFault, ·stackSegmentFault(SB)) -EXCEPTION_WITH_ERROR(GeneralProtectionFault, ·generalProtectionFault(SB)) -EXCEPTION_WITH_ERROR(PageFault, ·pageFault(SB)) -EXCEPTION_WITHOUT_ERROR(X87FloatingPointException, ·x87FloatingPointException(SB)) -EXCEPTION_WITH_ERROR(AlignmentCheck, ·alignmentCheck(SB)) -EXCEPTION_WITHOUT_ERROR(MachineCheck, ·machineCheck(SB)) -EXCEPTION_WITHOUT_ERROR(SIMDFloatingPointException, ·simdFloatingPointException(SB)) -EXCEPTION_WITHOUT_ERROR(VirtualizationException, ·virtualizationException(SB)) -EXCEPTION_WITH_ERROR(SecurityException, ·securityException(SB)) -EXCEPTION_WITHOUT_ERROR(SyscallInt80, ·syscallInt80(SB)) +EXCEPTION_WITHOUT_ERROR(DivideByZero, ·divideByZero(SB), ·addrOfDivideByZero(SB)) +EXCEPTION_WITHOUT_ERROR(Debug, ·debug(SB), ·addrOfDebug(SB)) +EXCEPTION_WITHOUT_ERROR(NMI, ·nmi(SB), ·addrOfNMI(SB)) +EXCEPTION_WITHOUT_ERROR(Breakpoint, ·breakpoint(SB), ·addrOfBreakpoint(SB)) +EXCEPTION_WITHOUT_ERROR(Overflow, ·overflow(SB), ·addrOfOverflow(SB)) +EXCEPTION_WITHOUT_ERROR(BoundRangeExceeded, ·boundRangeExceeded(SB), ·addrOfBoundRangeExceeded(SB)) +EXCEPTION_WITHOUT_ERROR(InvalidOpcode, ·invalidOpcode(SB), ·addrOfInvalidOpcode(SB)) +EXCEPTION_WITHOUT_ERROR(DeviceNotAvailable, ·deviceNotAvailable(SB), ·addrOfDeviceNotAvailable(SB)) +EXCEPTION_WITH_ERROR(DoubleFault, ·doubleFault(SB), ·addrOfDoubleFault(SB)) +EXCEPTION_WITHOUT_ERROR(CoprocessorSegmentOverrun, ·coprocessorSegmentOverrun(SB), ·addrOfCoprocessorSegmentOverrun(SB)) +EXCEPTION_WITH_ERROR(InvalidTSS, ·invalidTSS(SB), ·addrOfInvalidTSS(SB)) +EXCEPTION_WITH_ERROR(SegmentNotPresent, ·segmentNotPresent(SB), ·addrOfSegmentNotPresent(SB)) +EXCEPTION_WITH_ERROR(StackSegmentFault, ·stackSegmentFault(SB), ·addrOfStackSegmentFault(SB)) +EXCEPTION_WITH_ERROR(GeneralProtectionFault, ·generalProtectionFault(SB), ·addrOfGeneralProtectionFault(SB)) +EXCEPTION_WITH_ERROR(PageFault, ·pageFault(SB), ·addrOfPageFault(SB)) +EXCEPTION_WITHOUT_ERROR(X87FloatingPointException, ·x87FloatingPointException(SB), ·addrOfX87FloatingPointException(SB)) +EXCEPTION_WITH_ERROR(AlignmentCheck, ·alignmentCheck(SB), ·addrOfAlignmentCheck(SB)) +EXCEPTION_WITHOUT_ERROR(MachineCheck, ·machineCheck(SB), ·addrOfMachineCheck(SB)) +EXCEPTION_WITHOUT_ERROR(SIMDFloatingPointException, ·simdFloatingPointException(SB), ·addrOfSimdFloatingPointException(SB)) +EXCEPTION_WITHOUT_ERROR(VirtualizationException, ·virtualizationException(SB), ·addrOfVirtualizationException(SB)) +EXCEPTION_WITH_ERROR(SecurityException, ·securityException(SB), ·addrOfSecurityException(SB)) +EXCEPTION_WITHOUT_ERROR(SyscallInt80, ·syscallInt80(SB), ·addrOfSyscallInt80(SB)) diff --git a/pkg/ring0/entry_arm64.go b/pkg/ring0/entry_arm64.go index 62a93f3d6..299036478 100644 --- a/pkg/ring0/entry_arm64.go +++ b/pkg/ring0/entry_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ring0 diff --git a/pkg/ring0/kernel_amd64.go b/pkg/ring0/kernel_amd64.go index f63af8b76..23ec33f92 100644 --- a/pkg/ring0/kernel_amd64.go +++ b/pkg/ring0/kernel_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package ring0 @@ -19,12 +20,20 @@ package ring0 import ( "encoding/binary" "reflect" + "sync" "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/sentry/arch" ) +// HaltAndWriteFSBase halts execution. On resume, it sets FS_BASE from the +// value in regs. +func HaltAndWriteFSBase(regs *arch.Registers) + // init initializes architecture-specific state. func (k *Kernel) init(maxCPUs int) { + initSentryXCR0() + entrySize := reflect.TypeOf(kernelEntry{}).Size() var ( entries []kernelEntry @@ -240,7 +249,6 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { // Perform the switch. swapgs() // GS will be swapped on return. - WriteFS(uintptr(regs.Fs_base)) // escapes: no. Set application FS. WriteGS(uintptr(regs.Gs_base)) // escapes: no. Set application GS. LoadFloatingPoint(switchOpts.FloatingPointState.BytePointer()) // escapes: no. Copy in floating point. if switchOpts.FullRestore { @@ -249,38 +257,58 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { vector = sysret(c, regs, uintptr(userCR3)) } SaveFloatingPoint(switchOpts.FloatingPointState.BytePointer()) // escapes: no. Copy out floating point. - WriteFS(uintptr(c.registers.Fs_base)) // escapes: no. Restore kernel FS. RestoreKernelFPState() // escapes: no. Restore kernel MXCSR. return } -var sentryXCR0 = xgetbv(0) +var ( + sentryXCR0 uintptr + sentryXCR0Once sync.Once +) -// start is the CPU entrypoint. +// initSentryXCR0 saves a value of XCR0 in the host mode. It is used to +// initialize XCR0 of guest vCPU-s. +func initSentryXCR0() { + sentryXCR0Once.Do(func() { sentryXCR0 = xgetbv(0) }) +} + +// startGo is the CPU entrypoint. // -// This is called from the Start asm stub (see entry_amd64.go); on return the +// This is called from the start asm stub (see entry_amd64.go); on return the // registers in c.registers will be restored (not segments). // +// Note that any code written in Go should adhere to Go expected environment: +// * Initialized floating point state (required for optimizations using +// floating point instructions). +// * Go TLS in FS_BASE (this is required by splittable functions, calls into +// the runtime, calls to assembly functions (Go 1.17+ ABI wrappers access +// TLS)). +// //go:nosplit -func start(c *CPU) { - // Save per-cpu & FS segment. +func startGo(c *CPU) { + // Save per-cpu. WriteGS(kernelAddr(c.kernelEntry)) - WriteFS(uintptr(c.registers.Fs_base)) + // + // TODO(mpratt): Note that per the note above, this should be done + // before entering Go code. However for simplicity we leave it here for + // now, since the small critical sections with undefined FPU state + // should only contain very limited use of floating point instructions + // (notably, use of XMM15 as a zero register). fninit() // Need to sync XCR0 with the host, because xsave and xrstor can be // called from different contexts. xsetbv(0, sentryXCR0) // Set the syscall target. - wrmsr(_MSR_LSTAR, kernelFunc(sysenter)) + wrmsr(_MSR_LSTAR, kernelFunc(addrOfSysenter())) wrmsr(_MSR_SYSCALL_MASK, KernelFlagsClear|_RFLAGS_DF) // NOTE: This depends on having the 64-bit segments immediately // following the 32-bit user segments. This is simply the way the // sysret instruction is designed to work (it assumes they follow). wrmsr(_MSR_STAR, uintptr(uint64(Kcode)<<32|uint64(Ucode32)<<48)) - wrmsr(_MSR_CSTAR, kernelFunc(sysenter)) + wrmsr(_MSR_CSTAR, kernelFunc(addrOfSysenter())) } // SetCPUIDFaulting sets CPUID faulting per the boolean value. diff --git a/pkg/ring0/kernel_arm64.go b/pkg/ring0/kernel_arm64.go index 21db910a2..79f85ff50 100644 --- a/pkg/ring0/kernel_arm64.go +++ b/pkg/ring0/kernel_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ring0 diff --git a/pkg/ring0/kernel_unsafe.go b/pkg/ring0/kernel_unsafe.go index 16955ad91..04c60d0a7 100644 --- a/pkg/ring0/kernel_unsafe.go +++ b/pkg/ring0/kernel_unsafe.go @@ -35,7 +35,6 @@ func kernelAddr(obj interface{}) uintptr { // kernelFunc returns the address of the given function. // //go:nosplit -func kernelFunc(fn func()) uintptr { - fnptr := (**uintptr)(unsafe.Pointer(&fn)) - return KernelStartAddress | **fnptr +func kernelFunc(fn uintptr) uintptr { + return KernelStartAddress | fn } diff --git a/pkg/ring0/lib_amd64.go b/pkg/ring0/lib_amd64.go index 3e6bb9663..05c394ff5 100644 --- a/pkg/ring0/lib_amd64.go +++ b/pkg/ring0/lib_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package ring0 @@ -43,8 +44,8 @@ func xsave(*byte) // xsaveopt uses xsaveopt to save floating point state. func xsaveopt(*byte) -// WriteFS sets the GS address (set by init). -var WriteFS func(addr uintptr) +// writeFS sets the FS base address (selects one of wrfsbase or wrfsmsr). +func writeFS(addr uintptr) // wrfsbase writes to the GS base address. func wrfsbase(addr uintptr) @@ -116,10 +117,8 @@ func Init(featureSet *cpuid.FeatureSet) { LoadFloatingPoint = fxrstor } if hasFSGSBASE { - WriteFS = wrfsbase WriteGS = wrgsbase } else { - WriteFS = wrfsmsr WriteGS = wrgsmsr } } diff --git a/pkg/ring0/lib_amd64.s b/pkg/ring0/lib_amd64.s index 70a43e79e..8ed98fc84 100644 --- a/pkg/ring0/lib_amd64.s +++ b/pkg/ring0/lib_amd64.s @@ -80,6 +80,29 @@ TEXT ·xsaveopt(SB),NOSPLIT,$0-8 BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37; RET +// writeFS writes to the FS base. +// +// This is written in assembly because it must be safe to call before the Go +// environment is set up. See comment on start(). +// +// Preconditions: must be running in the lower address space, as it accesses +// global data. +TEXT ·writeFS(SB),NOSPLIT,$8-8 + MOVQ addr+0(FP), AX + + CMPB ·hasFSGSBASE(SB), $1 + JNE msr + + PUSHQ AX + CALL ·wrfsbase(SB) + POPQ AX + RET +msr: + PUSHQ AX + CALL ·wrfsmsr(SB) + POPQ AX + RET + // wrfsbase writes to the FS base. // // The code corresponds to: diff --git a/pkg/ring0/lib_arm64.go b/pkg/ring0/lib_arm64.go index 5eabd4296..a72a6926d 100644 --- a/pkg/ring0/lib_arm64.go +++ b/pkg/ring0/lib_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ring0 diff --git a/pkg/ring0/offsets_amd64.go b/pkg/ring0/offsets_amd64.go index ca4075b09..75f6218b3 100644 --- a/pkg/ring0/offsets_amd64.go +++ b/pkg/ring0/offsets_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package ring0 @@ -95,6 +96,6 @@ func Emit(w io.Writer) { fmt.Fprintf(w, "#define PTRACE_FLAGS 0x%02x\n", reflect.ValueOf(&p.Eflags).Pointer()-reflect.ValueOf(p).Pointer()) fmt.Fprintf(w, "#define PTRACE_RSP 0x%02x\n", reflect.ValueOf(&p.Rsp).Pointer()-reflect.ValueOf(p).Pointer()) fmt.Fprintf(w, "#define PTRACE_SS 0x%02x\n", reflect.ValueOf(&p.Ss).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_FS 0x%02x\n", reflect.ValueOf(&p.Fs_base).Pointer()-reflect.ValueOf(p).Pointer()) - fmt.Fprintf(w, "#define PTRACE_GS 0x%02x\n", reflect.ValueOf(&p.Gs_base).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_FS_BASE 0x%02x\n", reflect.ValueOf(&p.Fs_base).Pointer()-reflect.ValueOf(p).Pointer()) + fmt.Fprintf(w, "#define PTRACE_GS_BASE 0x%02x\n", reflect.ValueOf(&p.Gs_base).Pointer()-reflect.ValueOf(p).Pointer()) } diff --git a/pkg/ring0/offsets_arm64.go b/pkg/ring0/offsets_arm64.go index 03adaa6b0..60b2c4074 100644 --- a/pkg/ring0/offsets_arm64.go +++ b/pkg/ring0/offsets_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ring0 diff --git a/pkg/ring0/pagetables/pagetables_aarch64.go b/pkg/ring0/pagetables/pagetables_aarch64.go index 86eb00a4f..aa2a5c984 100644 --- a/pkg/ring0/pagetables/pagetables_aarch64.go +++ b/pkg/ring0/pagetables/pagetables_aarch64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package pagetables diff --git a/pkg/ring0/pagetables/pagetables_amd64_test.go b/pkg/ring0/pagetables/pagetables_amd64_test.go index a13c616ae..c27b3b10a 100644 --- a/pkg/ring0/pagetables/pagetables_amd64_test.go +++ b/pkg/ring0/pagetables/pagetables_amd64_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package pagetables diff --git a/pkg/ring0/pagetables/pagetables_arm64_test.go b/pkg/ring0/pagetables/pagetables_arm64_test.go index 2514b9ac5..1c919ec7d 100644 --- a/pkg/ring0/pagetables/pagetables_arm64_test.go +++ b/pkg/ring0/pagetables/pagetables_arm64_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package pagetables diff --git a/pkg/ring0/pagetables/pagetables_x86.go b/pkg/ring0/pagetables/pagetables_x86.go index e43698173..dc98d8452 100644 --- a/pkg/ring0/pagetables/pagetables_x86.go +++ b/pkg/ring0/pagetables/pagetables_x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build 386 || amd64 // +build 386 amd64 package pagetables diff --git a/pkg/ring0/pagetables/pcids_aarch64.go b/pkg/ring0/pagetables/pcids_aarch64.go index fbfd41d83..ad492d039 100644 --- a/pkg/ring0/pagetables/pcids_aarch64.go +++ b/pkg/ring0/pagetables/pcids_aarch64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package pagetables diff --git a/pkg/ring0/pagetables/pcids_aarch64.s b/pkg/ring0/pagetables/pcids_aarch64.s index e9d62d768..cfcedba71 100644 --- a/pkg/ring0/pagetables/pcids_aarch64.s +++ b/pkg/ring0/pagetables/pcids_aarch64.s @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 #include "funcdata.h" diff --git a/pkg/ring0/pagetables/pcids_x86.go b/pkg/ring0/pagetables/pcids_x86.go index 91fc5e8dd..2a107ea70 100644 --- a/pkg/ring0/pagetables/pcids_x86.go +++ b/pkg/ring0/pagetables/pcids_x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build i386 || amd64 // +build i386 amd64 package pagetables diff --git a/pkg/ring0/pagetables/walker_amd64.go b/pkg/ring0/pagetables/walker_amd64.go index eb4fbcc31..ca5e2f85f 100644 --- a/pkg/ring0/pagetables/walker_amd64.go +++ b/pkg/ring0/pagetables/walker_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package pagetables diff --git a/pkg/ring0/pagetables/walker_arm64.go b/pkg/ring0/pagetables/walker_arm64.go index 5ed881c7a..e32dbda2d 100644 --- a/pkg/ring0/pagetables/walker_arm64.go +++ b/pkg/ring0/pagetables/walker_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package pagetables diff --git a/pkg/ring0/x86.go b/pkg/ring0/x86.go index 34fbc1c35..7c96cca6b 100644 --- a/pkg/ring0/x86.go +++ b/pkg/ring0/x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build 386 || amd64 // +build 386 amd64 package ring0 diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index 8ffa1db37..062250d69 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -74,8 +74,8 @@ func Install(rules SyscallRules) error { } // Perform the actual installation. - if errno := SetFilter(instrs); errno != 0 { - return fmt.Errorf("failed to set filter: %v", errno) + if err := SetFilter(instrs); err != nil { + return fmt.Errorf("failed to set filter: %v", err) } log.Infof("Seccomp filters installed.") diff --git a/pkg/seccomp/seccomp_amd64.go b/pkg/seccomp/seccomp_amd64.go index 00bf332c1..9cd003bc5 100644 --- a/pkg/seccomp/seccomp_amd64.go +++ b/pkg/seccomp/seccomp_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package seccomp diff --git a/pkg/seccomp/seccomp_arm64.go b/pkg/seccomp/seccomp_arm64.go index b62133f21..adcf73e72 100644 --- a/pkg/seccomp/seccomp_arm64.go +++ b/pkg/seccomp/seccomp_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package seccomp diff --git a/pkg/seccomp/seccomp_test_victim_amd64.go b/pkg/seccomp/seccomp_test_victim_amd64.go index efb8604ec..5c1ecc301 100644 --- a/pkg/seccomp/seccomp_test_victim_amd64.go +++ b/pkg/seccomp/seccomp_test_victim_amd64.go @@ -15,6 +15,7 @@ // Test binary used to test that seccomp filters are properly constructed and // indeed kill the process on violation. +//go:build amd64 // +build amd64 package main diff --git a/pkg/seccomp/seccomp_test_victim_arm64.go b/pkg/seccomp/seccomp_test_victim_arm64.go index 97cb5f5fe..9647e2758 100644 --- a/pkg/seccomp/seccomp_test_victim_arm64.go +++ b/pkg/seccomp/seccomp_test_victim_arm64.go @@ -15,6 +15,7 @@ // Test binary used to test that seccomp filters are properly constructed and // indeed kill the process on violation. +//go:build arm64 // +build arm64 package main diff --git a/pkg/seccomp/seccomp_unsafe.go b/pkg/seccomp/seccomp_unsafe.go index 7202591df..6701b5542 100644 --- a/pkg/seccomp/seccomp_unsafe.go +++ b/pkg/seccomp/seccomp_unsafe.go @@ -15,6 +15,8 @@ package seccomp import ( + "fmt" + "runtime" "unsafe" "golang.org/x/sys/unix" @@ -22,12 +24,56 @@ import ( ) // SetFilter installs the given BPF program. +func SetFilter(instrs []linux.BPFInstruction) error { + // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See + // seccomp(2) for details. + // + // PR_SET_NO_NEW_PRIVS is specific to the calling thread, not the whole + // thread group, so between PR_SET_NO_NEW_PRIVS and seccomp() below we must + // remain on the same thread. no_new_privs will be propagated to other + // threads in the thread group by seccomp(SECCOMP_FILTER_FLAG_TSYNC), in + // kernel/seccomp.c:seccomp_sync_threads(). + runtime.LockOSThread() + defer runtime.UnlockOSThread() + if _, _, errno := unix.RawSyscall6(unix.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0); errno != 0 { + return errno + } + + sockProg := linux.SockFprog{ + Len: uint16(len(instrs)), + Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0])), + } + tid, errno := seccomp(linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(&sockProg)) + if errno != 0 { + return errno + } + // "On error, if SECCOMP_FILTER_FLAG_TSYNC was used, the return value is + // the ID of the thread that caused the synchronization failure. (This ID + // is a kernel thread ID of the type returned by clone(2) and gettid(2).)" + // - seccomp(2) + if tid != 0 { + return fmt.Errorf("couldn't synchronize filter to TID %d", tid) + } + return nil +} + +// SetFilterInChild is equivalent to SetFilter, but: +// +// - It is safe to call after runtime.syscall_runtime_AfterForkInChild. +// +// - It requires that the calling goroutine cannot be moved to another thread, +// which either requires that runtime.LockOSThread() is in effect or that the +// caller is in fact in a fork()ed child process. // -// This is safe to call from an afterFork context. +// - Since fork()ed child processes cannot perform heap allocation, it returns +// a unix.Errno rather than an error. // +// - The race instrumentation has to be disabled for all functions that are +// called in a forked child. +// +//go:norace //go:nosplit -func SetFilter(instrs []linux.BPFInstruction) unix.Errno { - // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See seccomp(2) for details. +func SetFilterInChild(instrs []linux.BPFInstruction) unix.Errno { if _, _, errno := unix.RawSyscall6(unix.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0); errno != 0 { return errno } @@ -36,12 +82,22 @@ func SetFilter(instrs []linux.BPFInstruction) unix.Errno { Len: uint16(len(instrs)), Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0])), } - return seccomp(linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(&sockProg)) + tid, errno := seccomp(linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(&sockProg)) + if errno != 0 { + return errno + } + if tid != 0 { + // Return an errno that seccomp(2) doesn't to uniquely identify this + // case. Since this case occurs if another thread has a conflicting + // filter set, "name not unique on network" is at least suggestive? + return unix.ENOTUNIQ + } + return 0 } func isKillProcessAvailable() (bool, error) { action := uint32(linux.SECCOMP_RET_KILL_PROCESS) - if errno := seccomp(linux.SECCOMP_GET_ACTION_AVAIL, 0, unsafe.Pointer(&action)); errno != 0 { + if _, errno := seccomp(linux.SECCOMP_GET_ACTION_AVAIL, 0, unsafe.Pointer(&action)); errno != 0 { // EINVAL: SECCOMP_GET_ACTION_AVAIL not in this kernel yet. // EOPNOTSUPP: SECCOMP_RET_KILL_PROCESS not supported. if errno == unix.EINVAL || errno == unix.EOPNOTSUPP { @@ -55,9 +111,7 @@ func isKillProcessAvailable() (bool, error) { // seccomp calls seccomp(2). This is safe to call from an afterFork context. // //go:nosplit -func seccomp(op, flags uint32, ptr unsafe.Pointer) unix.Errno { - if _, _, errno := unix.RawSyscall(SYS_SECCOMP, uintptr(op), uintptr(flags), uintptr(ptr)); errno != 0 { - return errno - } - return 0 +func seccomp(op, flags uint32, ptr unsafe.Pointer) (uintptr, unix.Errno) { + n, _, errno := unix.RawSyscall(SYS_SECCOMP, uintptr(op), uintptr(flags), uintptr(ptr)) + return n, errno } diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD index 61dacd2fb..e0dbc436d 100644 --- a/pkg/sentry/arch/BUILD +++ b/pkg/sentry/arch/BUILD @@ -28,13 +28,13 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/cpuid", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/marshal", "//pkg/marshal/primitive", "//pkg/sentry/arch/fpu", "//pkg/sentry/limits", - "//pkg/syserror", "//pkg/usermem", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go index 08789f517..9a827e84f 100644 --- a/pkg/sentry/arch/arch_aarch64.go +++ b/pkg/sentry/arch/arch_aarch64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package arch @@ -22,10 +23,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/arch/fpu" rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" - "gvisor.dev/gvisor/pkg/syserror" ) // Registers represents the CPU registers for this architecture. @@ -233,11 +234,11 @@ func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int, switch regset { case _NT_PRSTATUS: if maxlen < ptraceRegistersSize { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } return s.PtraceGetRegs(dst) default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } } @@ -246,11 +247,11 @@ func (s *State) PtraceSetRegSet(regset uintptr, src io.Reader, maxlen int) (int, switch regset { case _NT_PRSTATUS: if maxlen < ptraceRegistersSize { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } return s.PtraceSetRegs(src) default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } } diff --git a/pkg/sentry/arch/arch_amd64.go b/pkg/sentry/arch/arch_amd64.go index d6b4d2357..e7cb24102 100644 --- a/pkg/sentry/arch/arch_amd64.go +++ b/pkg/sentry/arch/arch_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package arch diff --git a/pkg/sentry/arch/arch_arm64.go b/pkg/sentry/arch/arch_arm64.go index 348f238fd..0d27a1f22 100644 --- a/pkg/sentry/arch/arch_arm64.go +++ b/pkg/sentry/arch/arch_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package arch diff --git a/pkg/sentry/arch/arch_state_x86.go b/pkg/sentry/arch/arch_state_x86.go index b2b94c304..6da13f26e 100644 --- a/pkg/sentry/arch/arch_state_x86.go +++ b/pkg/sentry/arch/arch_state_x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 || 386 // +build amd64 386 package arch diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go index e8e52d3a8..96e9a6949 100644 --- a/pkg/sentry/arch/arch_x86.go +++ b/pkg/sentry/arch/arch_x86.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 || 386 // +build amd64 386 package arch @@ -23,10 +24,10 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/arch/fpu" rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" - "gvisor.dev/gvisor/pkg/syserror" ) // Registers represents the CPU registers for this architecture. @@ -353,7 +354,7 @@ func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int, switch regset { case _NT_PRSTATUS: if maxlen < ptraceRegistersSize { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } return s.PtraceGetRegs(dst) case _NT_PRFPREG: @@ -361,7 +362,7 @@ func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int, case _NT_X86_XSTATE: return s.fpState.PtraceGetXstateRegs(dst, maxlen, s.FeatureSet) default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } } @@ -370,7 +371,7 @@ func (s *State) PtraceSetRegSet(regset uintptr, src io.Reader, maxlen int) (int, switch regset { case _NT_PRSTATUS: if maxlen < ptraceRegistersSize { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } return s.PtraceSetRegs(src) case _NT_PRFPREG: @@ -378,7 +379,7 @@ func (s *State) PtraceSetRegSet(regset uintptr, src io.Reader, maxlen int) (int, case _NT_X86_XSTATE: return s.fpState.PtraceSetXstateRegs(src, maxlen, s.FeatureSet) default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } } diff --git a/pkg/sentry/arch/arch_x86_impl.go b/pkg/sentry/arch/arch_x86_impl.go index 5d7b99bd9..bb5ff7f7f 100644 --- a/pkg/sentry/arch/arch_x86_impl.go +++ b/pkg/sentry/arch/arch_x86_impl.go @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build (amd64 || 386) && go1.1 // +build amd64 386 +// +build go1.1 package arch diff --git a/pkg/sentry/arch/fpu/BUILD b/pkg/sentry/arch/fpu/BUILD index 4e4f20639..6cdd21b1b 100644 --- a/pkg/sentry/arch/fpu/BUILD +++ b/pkg/sentry/arch/fpu/BUILD @@ -13,9 +13,9 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/cpuid", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/sync", - "//pkg/syserror", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/arch/fpu/fpu_amd64.go b/pkg/sentry/arch/fpu/fpu_amd64.go index f0ba26736..e422f67a1 100644 --- a/pkg/sentry/arch/fpu/fpu_amd64.go +++ b/pkg/sentry/arch/fpu/fpu_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 || i386 // +build amd64 i386 package fpu @@ -21,9 +22,9 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // initX86FPState (defined in asm files) sets up initial state. @@ -70,7 +71,7 @@ const ptraceFPRegsSize = 512 // PtraceGetFPRegs implements Context.PtraceGetFPRegs. func (s *State) PtraceGetFPRegs(dst io.Writer, maxlen int) (int, error) { if maxlen < ptraceFPRegsSize { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } return dst.Write((*s)[:ptraceFPRegsSize]) @@ -79,7 +80,7 @@ func (s *State) PtraceGetFPRegs(dst io.Writer, maxlen int) (int, error) { // PtraceSetFPRegs implements Context.PtraceSetFPRegs. func (s *State) PtraceSetFPRegs(src io.Reader, maxlen int) (int, error) { if maxlen < ptraceFPRegsSize { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } var f [ptraceFPRegsSize]byte diff --git a/pkg/sentry/arch/fpu/fpu_arm64.go b/pkg/sentry/arch/fpu/fpu_arm64.go index 46634661f..49e641722 100644 --- a/pkg/sentry/arch/fpu/fpu_arm64.go +++ b/pkg/sentry/arch/fpu/fpu_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package fpu diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go index 58e28dbba..dbd4336f9 100644 --- a/pkg/sentry/arch/signal_amd64.go +++ b/pkg/sentry/arch/signal_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package arch diff --git a/pkg/sentry/arch/signal_arm64.go b/pkg/sentry/arch/signal_arm64.go index 80df90076..ee22ec512 100644 --- a/pkg/sentry/arch/signal_arm64.go +++ b/pkg/sentry/arch/signal_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package arch diff --git a/pkg/sentry/arch/syscalls_amd64.go b/pkg/sentry/arch/syscalls_amd64.go index 3859f41ee..c021ba072 100644 --- a/pkg/sentry/arch/syscalls_amd64.go +++ b/pkg/sentry/arch/syscalls_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package arch diff --git a/pkg/sentry/arch/syscalls_arm64.go b/pkg/sentry/arch/syscalls_arm64.go index 95dfd1e90..7146c9e44 100644 --- a/pkg/sentry/arch/syscalls_arm64.go +++ b/pkg/sentry/arch/syscalls_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package arch diff --git a/pkg/sentry/control/logging.go b/pkg/sentry/control/logging.go index 8a500a515..7613dfcbc 100644 --- a/pkg/sentry/control/logging.go +++ b/pkg/sentry/control/logging.go @@ -50,20 +50,20 @@ type LoggingArgs struct { // enable strace at all. If this flag is false then a completely // pristine copy of the syscall table will be swapped in. This // approach is used to remain consistent with an empty strace - // whitelist meaning trace all system calls. + // allowlist meaning trace all system calls. EnableStrace bool - // Strace is the whitelist of syscalls to trace to log. If this - // and StraceEventWhitelist are empty trace all system calls. - StraceWhitelist []string + // Strace is the allowlist of syscalls to trace to log. If this + // and StraceEventAllowlist are empty trace all system calls. + StraceAllowlist []string // SetEventStrace is a flag used to indicate that event strace // related arguments were passed in. SetEventStrace bool - // StraceEventWhitelist is the whitelist of syscalls to trace + // StraceEventAllowlist is the allowlist of syscalls to trace // to event log. - StraceEventWhitelist []string + StraceEventAllowlist []string } // Logging provides functions related to logging. @@ -107,13 +107,13 @@ func (l *Logging) Change(args *LoggingArgs, code *int) error { func (l *Logging) configureStrace(args *LoggingArgs) error { if args.EnableStrace { - // Install the whitelist specified. - if len(args.StraceWhitelist) > 0 { - if err := strace.Enable(args.StraceWhitelist, strace.SinkTypeLog); err != nil { + // Install the allowlist specified. + if len(args.StraceAllowlist) > 0 { + if err := strace.Enable(args.StraceAllowlist, strace.SinkTypeLog); err != nil { return err } } else { - // For convenience, if strace is enabled but whitelist + // For convenience, if strace is enabled but allowlist // is empty, enable everything to log. strace.EnableAll(strace.SinkTypeLog) } @@ -125,8 +125,8 @@ func (l *Logging) configureStrace(args *LoggingArgs) error { } func (l *Logging) configureEventStrace(args *LoggingArgs) error { - if len(args.StraceEventWhitelist) > 0 { - if err := strace.Enable(args.StraceEventWhitelist, strace.SinkTypeEvent); err != nil { + if len(args.StraceEventAllowlist) > 0 { + if err := strace.Enable(args.StraceEventAllowlist, strace.SinkTypeEvent); err != nil { return err } } else { diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index 221e98a01..6352ea71a 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -126,7 +126,7 @@ func (proc *Proc) Exec(args *ExecArgs, waitStatus *uint32) error { // Wait for completion. newTG.WaitExited() - *waitStatus = newTG.ExitStatus().Status() + *waitStatus = uint32(newTG.ExitStatus()) return nil } @@ -223,7 +223,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI _ = fd.Close() } }() - ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, args.StdioIsPty, fds) + ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, args.StdioIsPty, args.KUID, args.KGID, fds) if err != nil { return nil, 0, nil, nil, err } diff --git a/pkg/sentry/control/state.go b/pkg/sentry/control/state.go index 62eaca965..4c83b8e8e 100644 --- a/pkg/sentry/control/state.go +++ b/pkg/sentry/control/state.go @@ -17,6 +17,7 @@ package control import ( "errors" + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/state" @@ -67,7 +68,7 @@ func (s *State) Save(o *SaveOpts, _ *struct{}) error { log.Warningf("Save failed: exiting...") s.Kernel.SetSaveError(err) } - s.Kernel.Kill(kernel.ExitStatus{}) + s.Kernel.Kill(linux.WaitStatusExit(0)) }, } return saveOpts.Save(s.Kernel.SupervisorContext(), s.Kernel, s.Watchdog) diff --git a/pkg/sentry/devices/tundev/BUILD b/pkg/sentry/devices/tundev/BUILD index 8b38d574d..60c971030 100644 --- a/pkg/sentry/devices/tundev/BUILD +++ b/pkg/sentry/devices/tundev/BUILD @@ -9,6 +9,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/sentry/arch", "//pkg/sentry/fsimpl/devtmpfs", @@ -16,7 +17,6 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/socket/netstack", "//pkg/sentry/vfs", - "//pkg/syserror", "//pkg/tcpip/link/tun", "//pkg/usermem", "//pkg/waiter", diff --git a/pkg/sentry/devices/tundev/tundev.go b/pkg/sentry/devices/tundev/tundev.go index a12eeb8e7..b4e2a6d91 100644 --- a/pkg/sentry/devices/tundev/tundev.go +++ b/pkg/sentry/devices/tundev/tundev.go @@ -18,6 +18,7 @@ package tundev import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" @@ -25,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket/netstack" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip/link/tun" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -77,11 +77,11 @@ func (fd *tunFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArg switch request { case linux.TUNSETIFF: if !t.HasCapability(linux.CAP_NET_ADMIN) { - return 0, syserror.EPERM + return 0, linuxerr.EPERM } stack, ok := t.NetworkContext().(*netstack.Stack) if !ok { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } var req linux.IFReq @@ -104,7 +104,7 @@ func (fd *tunFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArg return 0, err default: - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } } diff --git a/pkg/sentry/fdimport/BUILD b/pkg/sentry/fdimport/BUILD index 6b4f8b0ed..563e96e0d 100644 --- a/pkg/sentry/fdimport/BUILD +++ b/pkg/sentry/fdimport/BUILD @@ -15,6 +15,7 @@ go_library( "//pkg/sentry/fs/host", "//pkg/sentry/fsimpl/host", "//pkg/sentry/kernel", + "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", ], ) diff --git a/pkg/sentry/fdimport/fdimport.go b/pkg/sentry/fdimport/fdimport.go index badd5b073..f2b9630eb 100644 --- a/pkg/sentry/fdimport/fdimport.go +++ b/pkg/sentry/fdimport/fdimport.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/host" hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" ) @@ -31,9 +32,9 @@ import ( // sets up TTY for the first 3 FDs in the slice representing stdin, stdout, // stderr. Used FDs are either closed or released. It's safe for the caller to // close any remaining files upon return. -func Import(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []*fd.FD) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { +func Import(ctx context.Context, fdTable *kernel.FDTable, console bool, uid auth.KUID, gid auth.KGID, fds []*fd.FD) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { if kernel.VFS2Enabled { - ttyFile, err := importVFS2(ctx, fdTable, console, fds) + ttyFile, err := importVFS2(ctx, fdTable, console, uid, gid, fds) return nil, ttyFile, err } ttyFile, err := importFS(ctx, fdTable, console, fds) @@ -89,7 +90,7 @@ func importFS(ctx context.Context, fdTable *kernel.FDTable, console bool, fds [] return ttyFile.FileOperations.(*host.TTYFileOperations), nil } -func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdioFDs []*fd.FD) (*hostvfs2.TTYFileDescription, error) { +func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, uid auth.KUID, gid auth.KGID, stdioFDs []*fd.FD) (*hostvfs2.TTYFileDescription, error) { k := kernel.KernelFromContext(ctx) if k == nil { return nil, fmt.Errorf("cannot find kernel from context") @@ -103,7 +104,13 @@ func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdi // Import the file as a host TTY file. if ttyFile == nil { var err error - appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD.FD(), true /* isTTY */) + appFile, err = hostvfs2.NewFD(ctx, k.HostMount(), hostFD.FD(), &hostvfs2.NewFDOptions{ + Savable: true, + IsTTY: true, + VirtualOwner: true, + UID: uid, + GID: gid, + }) if err != nil { return nil, err } @@ -121,7 +128,12 @@ func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdi } } else { var err error - appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD.FD(), false /* isTTY */) + appFile, err = hostvfs2.NewFD(ctx, k.HostMount(), hostFD.FD(), &hostvfs2.NewFDOptions{ + Savable: true, + VirtualOwner: true, + UID: uid, + GID: gid, + }) if err != nil { return nil, err } diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD index 0dc100f9b..58fe1e77c 100644 --- a/pkg/sentry/fs/BUILD +++ b/pkg/sentry/fs/BUILD @@ -48,6 +48,7 @@ go_library( "//pkg/abi/linux", "//pkg/amutex", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/p9", @@ -110,12 +111,12 @@ go_test( deps = [ ":fs", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/ramfs", "//pkg/sentry/fs/tmpfs", "//pkg/sentry/kernel/contexttest", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go index 5aa668873..a8591052c 100644 --- a/pkg/sentry/fs/copy_up.go +++ b/pkg/sentry/fs/copy_up.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/memmap" @@ -161,7 +162,7 @@ func doCopyUp(ctx context.Context, d *Dirent) error { // then try to take copyMu for writing here, we'd deadlock. t := d.Inode.overlay.lower.StableAttr.Type if t != RegularFile && t != Directory && t != Symlink { - return syserror.EINVAL + return linuxerr.EINVAL } // Wait to get exclusive access to the upper Inode. @@ -410,7 +411,7 @@ func copyAttributesLocked(ctx context.Context, upper *Inode, lower *Inode) error return err } lowerXattr, err := lower.ListXattr(ctx, linux.XATTR_SIZE_MAX) - if err != nil && err != syserror.EOPNOTSUPP { + if err != nil && !linuxerr.Equals(linuxerr.EOPNOTSUPP, err) { return err } diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD index 23a3a9a2d..e28a8961b 100644 --- a/pkg/sentry/fs/dev/BUILD +++ b/pkg/sentry/fs/dev/BUILD @@ -18,6 +18,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/rand", "//pkg/safemem", diff --git a/pkg/sentry/fs/dev/net_tun.go b/pkg/sentry/fs/dev/net_tun.go index 77e8d222a..1abf11142 100644 --- a/pkg/sentry/fs/dev/net_tun.go +++ b/pkg/sentry/fs/dev/net_tun.go @@ -17,6 +17,7 @@ package dev import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -24,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket/netstack" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip/link/tun" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -98,11 +98,11 @@ func (n *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io user switch request { case linux.TUNSETIFF: if !t.HasCapability(linux.CAP_NET_ADMIN) { - return 0, syserror.EPERM + return 0, linuxerr.EPERM } stack, ok := t.NetworkContext().(*netstack.Stack) if !ok { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } var req linux.IFReq @@ -125,7 +125,7 @@ func (n *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io user return 0, err default: - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } } diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index 9d5d40954..ad8ff227e 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -22,6 +22,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" @@ -487,11 +488,11 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl // Slow path: load the InodeOperations into memory. Since this is a hot path and the lookup may be // expensive, if possible release the lock and re-acquire it. if walkMayUnlock { - d.mu.Unlock() + d.mu.Unlock() // +checklocksforce: results in an inconsistent block. } c, err := d.Inode.Lookup(ctx, name) if walkMayUnlock { - d.mu.Lock() + d.mu.Lock() // +checklocksforce: see above. } // No dice. if err != nil { @@ -593,21 +594,27 @@ func (d *Dirent) exists(ctx context.Context, root *Dirent, name string) bool { // lockDirectory should be called for any operation that changes this `d`s // children (creating or removing them). -func (d *Dirent) lockDirectory() func() { +// +checklocksacquire:d.dirMu +// +checklocksacquire:d.mu +func (d *Dirent) lockDirectory() { renameMu.RLock() d.dirMu.Lock() d.mu.Lock() - return func() { - d.mu.Unlock() - d.dirMu.Unlock() - renameMu.RUnlock() - } +} + +// unlockDirectory is the reverse of lockDirectory. +// +checklocksrelease:d.dirMu +// +checklocksrelease:d.mu +func (d *Dirent) unlockDirectory() { + d.mu.Unlock() + d.dirMu.Unlock() + renameMu.RUnlock() // +checklocksforce: see lockDirectory. } // Create creates a new regular file in this directory. func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags FileFlags, perms FilePermissions) (*File, error) { - unlock := d.lockDirectory() - defer unlock() + d.lockDirectory() + defer d.unlockDirectory() // Does something already exist? if d.exists(ctx, root, name) { @@ -669,8 +676,8 @@ func (d *Dirent) finishCreate(ctx context.Context, child *Dirent, name string) { // genericCreate executes create if name does not exist. Removes a negative Dirent at name if // create succeeds. func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, create func() error) error { - unlock := d.lockDirectory() - defer unlock() + d.lockDirectory() + defer d.unlockDirectory() // Does something already exist? if d.exists(ctx, root, name) { @@ -857,7 +864,7 @@ func direntReaddir(ctx context.Context, d *Dirent, it DirIterator, root *Dirent, // Once we have written entries for "." and "..", future errors from // IterateDir will be hidden. if !IsDir(d.Inode.StableAttr) { - return 0, syserror.ENOTDIR + return 0, linuxerr.ENOTDIR } // This is a special case for lseek(fd, 0, SEEK_END). @@ -963,7 +970,7 @@ func (d *Dirent) mount(ctx context.Context, inode *Inode) (newChild *Dirent, err // // See Linux equivalent in fs/namespace.c:do_add_mount. if IsSymlink(inode.StableAttr) { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } // Dirent that'll replace d. @@ -1020,8 +1027,8 @@ func (d *Dirent) Remove(ctx context.Context, root *Dirent, name string, dirPath panic("Dirent.Remove: root must not be nil") } - unlock := d.lockDirectory() - defer unlock() + d.lockDirectory() + defer d.unlockDirectory() // Try to walk to the node. child, err := d.walk(ctx, root, name, false /* may unlock */) @@ -1081,8 +1088,8 @@ func (d *Dirent) RemoveDirectory(ctx context.Context, root *Dirent, name string) panic("Dirent.Remove: root must not be nil") } - unlock := d.lockDirectory() - defer unlock() + d.lockDirectory() + defer d.unlockDirectory() // Check for dots. if name == "." { @@ -1258,17 +1265,15 @@ func (d *Dirent) dropExtendedReference() { d.Inode.MountSource.fscache.Remove(d) } -// lockForRename takes locks on oldParent and newParent as required by Rename -// and returns a function that will unlock the locks taken. The returned -// function must be called even if a non-nil error is returned. -func lockForRename(oldParent *Dirent, oldName string, newParent *Dirent, newName string) (func(), error) { +// lockForRename takes locks on oldParent and newParent as required by Rename. +// On return, unlockForRename must always be called, even with an error. +// +checklocksacquire:oldParent.mu +// +checklocksacquire:newParent.mu +func lockForRename(oldParent *Dirent, oldName string, newParent *Dirent, newName string) error { renameMu.Lock() if oldParent == newParent { oldParent.mu.Lock() - return func() { - oldParent.mu.Unlock() - renameMu.Unlock() - }, nil + return nil // +checklocksforce: only one lock exists. } // Renaming between directories is a bit subtle: @@ -1296,11 +1301,7 @@ func lockForRename(oldParent *Dirent, oldName string, newParent *Dirent, newName // itself. err = unix.EINVAL } - return func() { - newParent.mu.Unlock() - oldParent.mu.Unlock() - renameMu.Unlock() - }, err + return err } child = p } @@ -1309,17 +1310,27 @@ func lockForRename(oldParent *Dirent, oldName string, newParent *Dirent, newName // have no relationship; in either case we can do this: newParent.mu.Lock() oldParent.mu.Lock() - return func() { + return nil +} + +// unlockForRename is the opposite of lockForRename. +// +checklocksrelease:oldParent.mu +// +checklocksrelease:newParent.mu +func unlockForRename(oldParent, newParent *Dirent) { + if oldParent == newParent { oldParent.mu.Unlock() - newParent.mu.Unlock() - renameMu.Unlock() - }, nil + renameMu.Unlock() // +checklocksforce: only one lock exists. + return + } + newParent.mu.Unlock() + oldParent.mu.Unlock() + renameMu.Unlock() // +checklocksforce: not tracked. } func (d *Dirent) checkSticky(ctx context.Context, victim *Dirent) error { uattr, err := d.Inode.UnstableAttr(ctx) if err != nil { - return syserror.EPERM + return linuxerr.EPERM } if !uattr.Perms.Sticky { return nil @@ -1332,7 +1343,7 @@ func (d *Dirent) checkSticky(ctx context.Context, victim *Dirent) error { vuattr, err := victim.Inode.UnstableAttr(ctx) if err != nil { - return syserror.EPERM + return linuxerr.EPERM } if vuattr.Owner.UID == creds.EffectiveKUID { return nil @@ -1340,7 +1351,7 @@ func (d *Dirent) checkSticky(ctx context.Context, victim *Dirent) error { if victim.Inode.CheckCapability(ctx, linux.CAP_FOWNER) { return nil } - return syserror.EPERM + return linuxerr.EPERM } // MayDelete determines whether `name`, a child of `d`, can be deleted or @@ -1352,8 +1363,8 @@ func (d *Dirent) MayDelete(ctx context.Context, root *Dirent, name string) error return err } - unlock := d.lockDirectory() - defer unlock() + d.lockDirectory() + defer d.unlockDirectory() victim, err := d.walk(ctx, root, name, true /* may unlock */) if err != nil { @@ -1374,7 +1385,7 @@ func (d *Dirent) mayDelete(ctx context.Context, victim *Dirent) error { } if victim.IsRoot() { - return syserror.EBUSY + return linuxerr.EBUSY } return nil @@ -1391,8 +1402,8 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string } // Acquire global renameMu lock, and mu locks on oldParent/newParent. - unlock, err := lockForRename(oldParent, oldName, newParent, newName) - defer unlock() + err := lockForRename(oldParent, oldName, newParent, newName) + defer unlockForRename(oldParent, newParent) if err != nil { return err } @@ -1439,7 +1450,7 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string // replaced is the dirent that is being overwritten by rename. replaced, err := newParent.walk(ctx, root, newName, false /* may unlock */) if err != nil { - if err != syserror.ENOENT { + if !linuxerr.Equals(linuxerr.ENOENT, err) { return err } diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD index 2120f2bad..5c889c861 100644 --- a/pkg/sentry/fs/fdpipe/BUILD +++ b/pkg/sentry/fs/fdpipe/BUILD @@ -13,6 +13,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fd", "//pkg/fdnotifier", "//pkg/log", @@ -38,6 +39,8 @@ go_test( library = ":fdpipe", deps = [ "//pkg/context", + "//pkg/errors", + "//pkg/errors/linuxerr", "//pkg/fd", "//pkg/fdnotifier", "//pkg/hostarch", diff --git a/pkg/sentry/fs/fdpipe/pipe.go b/pkg/sentry/fs/fdpipe/pipe.go index 757b7d511..f8a29816b 100644 --- a/pkg/sentry/fs/fdpipe/pipe.go +++ b/pkg/sentry/fs/fdpipe/pipe.go @@ -20,6 +20,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/log" @@ -158,7 +159,7 @@ func (p *pipeOperations) Write(ctx context.Context, file *fs.File, src usermem.I // isBlockError unwraps os errors and checks if they are caused by EAGAIN or // EWOULDBLOCK. This is so they can be transformed into syserror.ErrWouldBlock. func isBlockError(err error) bool { - if err == syserror.EAGAIN || err == syserror.EWOULDBLOCK { + if linuxerr.Equals(linuxerr.EAGAIN, err) || linuxerr.Equals(linuxerr.EWOULDBLOCK, err) { return true } if pe, ok := err.(*os.PathError); ok { diff --git a/pkg/sentry/fs/fdpipe/pipe_opener_test.go b/pkg/sentry/fs/fdpipe/pipe_opener_test.go index 7b3ff191f..89d8be741 100644 --- a/pkg/sentry/fs/fdpipe/pipe_opener_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_opener_test.go @@ -25,8 +25,8 @@ import ( "github.com/google/uuid" "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -515,8 +515,8 @@ func assertReaderHungup(t *testing.T, desc string, reader io.Reader) bool { } func assertWriterHungup(t *testing.T, desc string, writer io.Writer) bool { - if _, err := writer.Write([]byte("hello")); unwrapError(err) != unix.EPIPE { - t.Errorf("%s: write to self after hangup got error %v, want %v", desc, err, unix.EPIPE) + if _, err := writer.Write([]byte("hello")); !linuxerr.Equals(linuxerr.EPIPE, unwrapError(err)) { + t.Errorf("%s: write to self after hangup got error %v, want %v", desc, err, linuxerr.EPIPE) return false } return true diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go index ab0e9dac7..4c8905a7e 100644 --- a/pkg/sentry/fs/fdpipe/pipe_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_test.go @@ -21,14 +21,15 @@ import ( "testing" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/errors" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" - - "gvisor.dev/gvisor/pkg/hostarch" ) func singlePipeFD() (int, error) { @@ -209,17 +210,17 @@ func TestPipeRequest(t *testing.T) { { desc: "ReadDir on pipe returns ENOTDIR", context: &ReadDir{}, - err: unix.ENOTDIR, + err: linuxerr.ENOTDIR, }, { desc: "Fsync on pipe returns EINVAL", context: &Fsync{}, - err: unix.EINVAL, + err: linuxerr.EINVAL, }, { desc: "Seek on pipe returns ESPIPE", context: &Seek{}, - err: unix.ESPIPE, + err: linuxerr.ESPIPE, }, { desc: "Readv on pipe from empty buffer returns nil", @@ -248,7 +249,7 @@ func TestPipeRequest(t *testing.T) { desc: "Writev on pipe from non-empty buffer and closed partner returns EPIPE", context: &Writev{Src: usermem.BytesIOSequence([]byte("hello"))}, flags: fs.FileFlags{Write: true}, - err: unix.EPIPE, + err: linuxerr.EPIPE, }, { desc: "Writev on pipe from non-empty buffer and open partner succeeds", @@ -307,7 +308,11 @@ func TestPipeRequest(t *testing.T) { t.Errorf("%s: unknown request type %T", test.desc, test.context) } - if unwrapError(err) != test.err { + if linuxErr, ok := test.err.(*errors.Error); ok { + if !linuxerr.Equals(linuxErr, unwrapError(err)) { + t.Errorf("%s: got error %v, want %v", test.desc, err, test.err) + } + } else if test.err != unwrapError(err) { t.Errorf("%s: got error %v, want %v", test.desc, err, test.err) } } diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go index 696613f3a..06c07c807 100644 --- a/pkg/sentry/fs/file_overlay.go +++ b/pkg/sentry/fs/file_overlay.go @@ -18,6 +18,7 @@ import ( "io" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/memmap" @@ -357,7 +358,7 @@ func (*overlayFileOperations) ConfigureMMap(ctx context.Context, file *File, opt } if !o.isMappableLocked() { - return syserror.ENODEV + return linuxerr.ENODEV } // FIXME(jamieliu): This is a copy/paste of fsutil.GenericConfigureMMap, @@ -407,7 +408,7 @@ func (f *overlayFileOperations) Ioctl(ctx context.Context, overlayFile *File, io // copy up on any ioctl would be too drastic. In the future, it can have a // list of ioctls that are safe to send to lower and a list that triggers a // copy up. - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } return f.upper.FileOperations.Ioctl(ctx, f.upper, io, args) } @@ -417,7 +418,7 @@ func (f *overlayFileOperations) FifoSize(ctx context.Context, overlayFile *File) err = f.onTop(ctx, overlayFile, func(file *File, ops FileOperations) error { sz, ok := ops.(FifoSizer) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } rv, err = sz.FifoSize(ctx, file) return err @@ -432,11 +433,11 @@ func (f *overlayFileOperations) SetFifoSize(size int64) (rv int64, err error) { if f.upper == nil { // Named pipes cannot be copied up and changes to the lower are prohibited. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } sz, ok := f.upper.FileOperations.(FifoSizer) if !ok { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } return sz.SetFifoSize(size) } diff --git a/pkg/sentry/fs/fs.go b/pkg/sentry/fs/fs.go index 44587bb37..a346c316b 100644 --- a/pkg/sentry/fs/fs.go +++ b/pkg/sentry/fs/fs.go @@ -80,23 +80,33 @@ func AsyncBarrier() { // Async executes a function asynchronously. // // Async must not be called recursively. +// +checklocksignore func Async(f func()) { workMu.RLock() - go func() { // S/R-SAFE: AsyncBarrier must be called. - defer workMu.RUnlock() // Ensure RUnlock in case of panic. - f() - }() + go asyncWork(f) // S/R-SAFE: AsyncBarrier must be called. +} + +// +checklocksignore +func asyncWork(f func()) { + // Ensure RUnlock in case of panic. + defer workMu.RUnlock() + f() } // AsyncWithContext is just like Async, except that it calls the asynchronous // function with the given context as argument. This function exists to avoid // needing to allocate an extra function on the heap in a hot path. +// +checklocksignore func AsyncWithContext(ctx context.Context, f func(context.Context)) { workMu.RLock() - go func() { // S/R-SAFE: AsyncBarrier must be called. - defer workMu.RUnlock() // Ensure RUnlock in case of panic. - f(ctx) - }() + go asyncWorkWithContext(ctx, f) +} + +// +checklocksignore +func asyncWorkWithContext(ctx context.Context, f func(context.Context)) { + // Ensure RUnlock in case of panic. + defer workMu.RUnlock() + f(ctx) } // AsyncErrorBarrier waits for all outstanding asynchronous work to complete, or diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD index 6469cc3a9..6bf2d51cb 100644 --- a/pkg/sentry/fs/fsutil/BUILD +++ b/pkg/sentry/fs/fsutil/BUILD @@ -76,6 +76,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/safemem", @@ -106,13 +107,13 @@ go_test( library = ":fsutil", deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/safemem", "//pkg/sentry/contexttest", "//pkg/sentry/fs", "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/fs/fsutil/file.go b/pkg/sentry/fs/fsutil/file.go index dc9efa5df..00b3bb29b 100644 --- a/pkg/sentry/fs/fsutil/file.go +++ b/pkg/sentry/fs/fsutil/file.go @@ -18,6 +18,7 @@ import ( "io" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/memmap" @@ -45,7 +46,7 @@ func SeekWithDirCursor(ctx context.Context, file *fs.File, whence fs.SeekWhence, // Does the Inode represents a non-seekable type? if fs.IsPipe(inode.StableAttr) || fs.IsSocket(inode.StableAttr) { - return current, syserror.ESPIPE + return current, linuxerr.ESPIPE } // Does the Inode represent a character device? @@ -63,12 +64,12 @@ func SeekWithDirCursor(ctx context.Context, file *fs.File, whence fs.SeekWhence, switch inode.StableAttr.Type { case fs.RegularFile, fs.SpecialFile, fs.BlockDevice: if offset < 0 { - return current, syserror.EINVAL + return current, linuxerr.EINVAL } return offset, nil case fs.Directory, fs.SpecialDirectory: if offset != 0 { - return current, syserror.EINVAL + return current, linuxerr.EINVAL } // SEEK_SET to 0 moves the directory "cursor" to the beginning. if dirCursor != nil { @@ -76,22 +77,22 @@ func SeekWithDirCursor(ctx context.Context, file *fs.File, whence fs.SeekWhence, } return 0, nil default: - return current, syserror.EINVAL + return current, linuxerr.EINVAL } case fs.SeekCurrent: switch inode.StableAttr.Type { case fs.RegularFile, fs.SpecialFile, fs.BlockDevice: if current+offset < 0 { - return current, syserror.EINVAL + return current, linuxerr.EINVAL } return current + offset, nil case fs.Directory, fs.SpecialDirectory: if offset != 0 { - return current, syserror.EINVAL + return current, linuxerr.EINVAL } return current, nil default: - return current, syserror.EINVAL + return current, linuxerr.EINVAL } case fs.SeekEnd: switch inode.StableAttr.Type { @@ -103,14 +104,14 @@ func SeekWithDirCursor(ctx context.Context, file *fs.File, whence fs.SeekWhence, } sz := uattr.Size if sz+offset < 0 { - return current, syserror.EINVAL + return current, linuxerr.EINVAL } return sz + offset, nil // FIXME(b/34778850): This is not universally correct. // Remove SpecialDirectory. case fs.SpecialDirectory: if offset != 0 { - return current, syserror.EINVAL + return current, linuxerr.EINVAL } // SEEK_END to 0 moves the directory "cursor" to the end. // @@ -121,12 +122,12 @@ func SeekWithDirCursor(ctx context.Context, file *fs.File, whence fs.SeekWhence, // futile (EOF will always be the result). return fs.FileMaxOffset, nil default: - return current, syserror.EINVAL + return current, linuxerr.EINVAL } } // Not a valid seek request. - return current, syserror.EINVAL + return current, linuxerr.EINVAL } // FileGenericSeek implements fs.FileOperations.Seek for files that use a @@ -152,7 +153,7 @@ type FileNoSeek struct{} // Seek implements fs.FileOperations.Seek. func (FileNoSeek) Seek(context.Context, *fs.File, fs.SeekWhence, int64) (int64, error) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // FilePipeSeek implements fs.FileOperations.Seek and can be used for files @@ -161,7 +162,7 @@ type FilePipeSeek struct{} // Seek implements fs.FileOperations.Seek. func (FilePipeSeek) Seek(context.Context, *fs.File, fs.SeekWhence, int64) (int64, error) { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } // FileNotDirReaddir implements fs.FileOperations.Readdir for non-directories. @@ -169,7 +170,7 @@ type FileNotDirReaddir struct{} // Readdir implements fs.FileOperations.FileNotDirReaddir. func (FileNotDirReaddir) Readdir(context.Context, *fs.File, fs.DentrySerializer) (int64, error) { - return 0, syserror.ENOTDIR + return 0, linuxerr.ENOTDIR } // FileNoFsync implements fs.FileOperations.Fsync for files that don't support @@ -178,7 +179,7 @@ type FileNoFsync struct{} // Fsync implements fs.FileOperations.Fsync. func (FileNoFsync) Fsync(context.Context, *fs.File, int64, int64, fs.SyncType) error { - return syserror.EINVAL + return linuxerr.EINVAL } // FileNoopFsync implements fs.FileOperations.Fsync for files that don't need @@ -204,7 +205,7 @@ type FileNoMMap struct{} // ConfigureMMap implements fs.FileOperations.ConfigureMMap. func (FileNoMMap) ConfigureMMap(context.Context, *fs.File, *memmap.MMapOpts) error { - return syserror.ENODEV + return linuxerr.ENODEV } // GenericConfigureMMap implements fs.FileOperations.ConfigureMMap for most @@ -222,7 +223,7 @@ type FileNoIoctl struct{} // Ioctl implements fs.FileOperations.Ioctl. func (FileNoIoctl) Ioctl(context.Context, *fs.File, usermem.IO, arch.SyscallArguments) (uintptr, error) { - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } // FileNoSplice implements fs.FileOperations.ReadFrom and @@ -345,7 +346,7 @@ func NewFileStaticContentReader(b []byte) FileStaticContentReader { // Read implements fs.FileOperations.Read. func (scr *FileStaticContentReader) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if offset >= int64(len(scr.content)) { return 0, nil @@ -367,7 +368,7 @@ type FileNoRead struct{} // Read implements fs.FileOperations.Read. func (FileNoRead) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // FileNoWrite implements fs.FileOperations.Write to return EINVAL. @@ -375,7 +376,7 @@ type FileNoWrite struct{} // Write implements fs.FileOperations.Write. func (FileNoWrite) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // FileNoopRead implement fs.FileOperations.Read as a noop. diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go index 85e7e35db..7c2de04c1 100644 --- a/pkg/sentry/fs/fsutil/inode.go +++ b/pkg/sentry/fs/fsutil/inode.go @@ -17,6 +17,7 @@ package fsutil import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" @@ -218,7 +219,7 @@ func (i *InodeSimpleExtendedAttributes) GetXattr(_ context.Context, _ *fs.Inode, value, ok := i.xattrs[name] i.mu.RUnlock() if !ok { - return "", syserror.ENOATTR + return "", linuxerr.ENOATTR } return value, nil } @@ -229,17 +230,17 @@ func (i *InodeSimpleExtendedAttributes) SetXattr(_ context.Context, _ *fs.Inode, defer i.mu.Unlock() if i.xattrs == nil { if flags&linux.XATTR_REPLACE != 0 { - return syserror.ENODATA + return linuxerr.ENODATA } i.xattrs = make(map[string]string) } _, ok := i.xattrs[name] if ok && flags&linux.XATTR_CREATE != 0 { - return syserror.EEXIST + return linuxerr.EEXIST } if !ok && flags&linux.XATTR_REPLACE != 0 { - return syserror.ENODATA + return linuxerr.ENODATA } i.xattrs[name] = value @@ -265,7 +266,7 @@ func (i *InodeSimpleExtendedAttributes) RemoveXattr(_ context.Context, _ *fs.Ino delete(i.xattrs, name) return nil } - return syserror.ENOATTR + return linuxerr.ENOATTR } // staticFile is a file with static contents. It is returned by @@ -331,52 +332,52 @@ type InodeNotDirectory struct{} // Lookup implements fs.InodeOperations.Lookup. func (InodeNotDirectory) Lookup(context.Context, *fs.Inode, string) (*fs.Dirent, error) { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } // Create implements fs.InodeOperations.Create. func (InodeNotDirectory) Create(context.Context, *fs.Inode, string, fs.FileFlags, fs.FilePermissions) (*fs.File, error) { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } // CreateLink implements fs.InodeOperations.CreateLink. func (InodeNotDirectory) CreateLink(context.Context, *fs.Inode, string, string) error { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // CreateHardLink implements fs.InodeOperations.CreateHardLink. func (InodeNotDirectory) CreateHardLink(context.Context, *fs.Inode, *fs.Inode, string) error { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // CreateDirectory implements fs.InodeOperations.CreateDirectory. func (InodeNotDirectory) CreateDirectory(context.Context, *fs.Inode, string, fs.FilePermissions) error { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Bind implements fs.InodeOperations.Bind. func (InodeNotDirectory) Bind(context.Context, *fs.Inode, string, transport.BoundEndpoint, fs.FilePermissions) (*fs.Dirent, error) { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } // CreateFifo implements fs.InodeOperations.CreateFifo. func (InodeNotDirectory) CreateFifo(context.Context, *fs.Inode, string, fs.FilePermissions) error { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Remove implements fs.InodeOperations.Remove. func (InodeNotDirectory) Remove(context.Context, *fs.Inode, string) error { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // RemoveDirectory implements fs.InodeOperations.RemoveDirectory. func (InodeNotDirectory) RemoveDirectory(context.Context, *fs.Inode, string) error { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Rename implements fs.FileOperations.Rename. func (InodeNotDirectory) Rename(context.Context, *fs.Inode, *fs.Inode, string, *fs.Inode, string, bool) error { - return syserror.EINVAL + return linuxerr.EINVAL } // InodeNotSocket can be used by Inodes that are not sockets. @@ -392,7 +393,7 @@ type InodeNotTruncatable struct{} // Truncate implements fs.InodeOperations.Truncate. func (InodeNotTruncatable) Truncate(context.Context, *fs.Inode, int64) error { - return syserror.EINVAL + return linuxerr.EINVAL } // InodeIsDirTruncate implements fs.InodeOperations.Truncate for directories. @@ -416,7 +417,7 @@ type InodeNotRenameable struct{} // Rename implements fs.InodeOperations.Rename. func (InodeNotRenameable) Rename(context.Context, *fs.Inode, *fs.Inode, string, *fs.Inode, string, bool) error { - return syserror.EINVAL + return linuxerr.EINVAL } // InodeNotOpenable can be used by Inodes that cannot be opened. @@ -448,12 +449,12 @@ type InodeNotSymlink struct{} // Readlink implements fs.InodeOperations.Readlink. func (InodeNotSymlink) Readlink(context.Context, *fs.Inode) (string, error) { - return "", syserror.ENOLINK + return "", linuxerr.ENOLINK } // Getlink implements fs.InodeOperations.Getlink. func (InodeNotSymlink) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error) { - return nil, syserror.ENOLINK + return nil, linuxerr.ENOLINK } // InodeNoExtendedAttributes can be used by Inodes that do not support @@ -462,22 +463,22 @@ type InodeNoExtendedAttributes struct{} // GetXattr implements fs.InodeOperations.GetXattr. func (InodeNoExtendedAttributes) GetXattr(context.Context, *fs.Inode, string, uint64) (string, error) { - return "", syserror.EOPNOTSUPP + return "", linuxerr.EOPNOTSUPP } // SetXattr implements fs.InodeOperations.SetXattr. func (InodeNoExtendedAttributes) SetXattr(context.Context, *fs.Inode, string, string, uint32) error { - return syserror.EOPNOTSUPP + return linuxerr.EOPNOTSUPP } // ListXattr implements fs.InodeOperations.ListXattr. func (InodeNoExtendedAttributes) ListXattr(context.Context, *fs.Inode, uint64) (map[string]struct{}, error) { - return nil, syserror.EOPNOTSUPP + return nil, linuxerr.EOPNOTSUPP } // RemoveXattr implements fs.InodeOperations.RemoveXattr. func (InodeNoExtendedAttributes) RemoveXattr(context.Context, *fs.Inode, string) error { - return syserror.EOPNOTSUPP + return linuxerr.EOPNOTSUPP } // InodeNoopRelease implements fs.InodeOperations.Release as a noop. @@ -512,7 +513,7 @@ type InodeNotAllocatable struct{} // Allocate implements fs.InodeOperations.Allocate. func (InodeNotAllocatable) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error { - return syserror.EOPNOTSUPP + return linuxerr.EOPNOTSUPP } // InodeNoopAllocate implements fs.InodeOperations.Allocate as a noop. diff --git a/pkg/sentry/fs/fsutil/inode_cached_test.go b/pkg/sentry/fs/fsutil/inode_cached_test.go index e107c3096..25e76d9f2 100644 --- a/pkg/sentry/fs/fsutil/inode_cached_test.go +++ b/pkg/sentry/fs/fsutil/inode_cached_test.go @@ -20,13 +20,13 @@ import ( "testing" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -244,7 +244,7 @@ func (*sliceBackingFile) FD() int { } func (f *sliceBackingFile) Allocate(ctx context.Context, offset int64, length int64) error { - return syserror.EOPNOTSUPP + return linuxerr.EOPNOTSUPP } type noopMappingSpace struct{} diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD index 94cb05246..c08301d19 100644 --- a/pkg/sentry/fs/gofer/BUILD +++ b/pkg/sentry/fs/gofer/BUILD @@ -26,6 +26,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fd", "//pkg/hostarch", "//pkg/log", diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index da3178527..9ff64a8b6 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -20,6 +20,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" @@ -476,7 +477,7 @@ func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.Fi switch d.Inode.StableAttr.Type { case fs.Socket: if i.session().overrides != nil { - return nil, syserror.ENXIO + return nil, linuxerr.ENXIO } return i.getFileSocket(ctx, d, flags) case fs.Pipe: @@ -676,7 +677,7 @@ func (i *inodeOperations) Readlink(ctx context.Context, inode *fs.Inode) (string // Getlink implementfs fs.InodeOperations.Getlink. func (i *inodeOperations) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error) { if !fs.IsSymlink(i.fileState.sattr) { - return nil, syserror.ENOLINK + return nil, linuxerr.ENOLINK } return nil, fs.ErrResolveViaReadlink } @@ -714,7 +715,7 @@ func (i *inodeOperations) configureMMap(file *fs.File, opts *memmap.MMapOpts) er if i.fileState.hostMappable != nil { return fsutil.GenericConfigureMMap(file, i.fileState.hostMappable, opts) } - return syserror.ENODEV + return linuxerr.ENODEV } func init() { diff --git a/pkg/sentry/fs/gofer/inode_state.go b/pkg/sentry/fs/gofer/inode_state.go index e2af1d2ae..19f91f010 100644 --- a/pkg/sentry/fs/gofer/inode_state.go +++ b/pkg/sentry/fs/gofer/inode_state.go @@ -112,13 +112,6 @@ func (i *inodeFileState) loadLoading(_ struct{}) { // +checklocks:i.loading func (i *inodeFileState) afterLoad() { load := func() (err error) { - // See comment on i.loading(). - defer func() { - if err == nil { - i.loading.Unlock() - } - }() - // Manually restore the p9.File. name, ok := i.s.inodeMappings[i.sattr.InodeID] if !ok { @@ -167,6 +160,9 @@ func (i *inodeFileState) afterLoad() { i.savedUAttr = nil } + // See comment on i.loading(). This only unlocks on the + // non-error path. + i.loading.Unlock() // +checklocksforce: per comment. return nil } diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go index 940838a44..88d83060c 100644 --- a/pkg/sentry/fs/gofer/path.go +++ b/pkg/sentry/fs/gofer/path.go @@ -18,6 +18,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/device" @@ -43,10 +44,11 @@ func changeType(mode p9.FileMode, newType p9.FileMode) p9.FileMode { // policy. func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) { if len(name) > maxFilenameLen { - return nil, syserror.ENAMETOOLONG + return nil, linuxerr.ENAMETOOLONG } - cp := i.session().cachePolicy + s := i.session() + cp := s.cachePolicy if cp.cacheReaddir() { // Check to see if we have readdirCache that indicates the // child does not exist. Avoid holding readdirMu longer than @@ -66,7 +68,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string // Get a p9.File for name. qids, newFile, mask, p9attr, err := i.fileState.file.walkGetAttr(ctx, []string{name}) if err != nil { - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { if cp.cacheNegativeDirents() { // Return a negative Dirent. It will stay cached until something // is created over it. @@ -77,7 +79,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string return nil, err } - if i.session().overrides != nil { + if s.overrides != nil { // Check if file belongs to a internal named pipe. Note that it doesn't need // to check for sockets because it's done in newInodeOperations below. deviceKey := device.MultiDeviceKey{ @@ -85,13 +87,13 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string SecondaryDevice: i.session().connID, Inode: qids[0].Path, } - unlock := i.session().overrides.lock() - if pipeInode := i.session().overrides.getPipe(deviceKey); pipeInode != nil { - unlock() + s.overrides.lock() + if pipeInode := s.overrides.getPipe(deviceKey); pipeInode != nil { + s.overrides.unlock() pipeInode.IncRef() return fs.NewDirent(ctx, pipeInode, name), nil } - unlock() + s.overrides.unlock() } // Construct the Inode operations. @@ -106,7 +108,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string // Ownership is currently ignored. func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perm fs.FilePermissions) (*fs.File, error) { if len(name) > maxFilenameLen { - return nil, syserror.ENAMETOOLONG + return nil, linuxerr.ENAMETOOLONG } // Create replaces the directory fid with the newly created/opened @@ -195,7 +197,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string // CreateLink uses Create to create a symlink between oldname and newname. func (i *inodeOperations) CreateLink(ctx context.Context, dir *fs.Inode, oldname string, newname string) error { if len(newname) > maxFilenameLen { - return syserror.ENAMETOOLONG + return linuxerr.ENAMETOOLONG } owner := fs.FileOwnerFromContext(ctx) @@ -209,29 +211,32 @@ func (i *inodeOperations) CreateLink(ctx context.Context, dir *fs.Inode, oldname // CreateHardLink implements InodeOperations.CreateHardLink. func (i *inodeOperations) CreateHardLink(ctx context.Context, inode *fs.Inode, target *fs.Inode, newName string) error { if len(newName) > maxFilenameLen { - return syserror.ENAMETOOLONG + return linuxerr.ENAMETOOLONG } targetOpts, ok := target.InodeOperations.(*inodeOperations) if !ok { - return syserror.EXDEV + return linuxerr.EXDEV } if err := i.fileState.file.link(ctx, &targetOpts.fileState.file, newName); err != nil { return err } - if i.session().cachePolicy.cacheUAttrs(inode) { + + s := i.session() + if s.cachePolicy.cacheUAttrs(inode) { // Increase link count. targetOpts.cachingInodeOps.IncLinks(ctx) } + i.touchModificationAndStatusChangeTime(ctx, inode) return nil } // CreateDirectory uses Create to create a directory named s under inodeOperations. -func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, s string, perm fs.FilePermissions) error { - if len(s) > maxFilenameLen { - return syserror.ENAMETOOLONG +func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { + if len(name) > maxFilenameLen { + return linuxerr.ENAMETOOLONG } // If the parent directory has setgid enabled, change the new directory's @@ -246,16 +251,18 @@ func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, s perm.SetGID = true } - if _, err := i.fileState.file.mkdir(ctx, s, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)); err != nil { + if _, err := i.fileState.file.mkdir(ctx, name, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)); err != nil { return err } - if i.session().cachePolicy.cacheUAttrs(dir) { + + s := i.session() + if s.cachePolicy.cacheUAttrs(dir) { // Increase link count. // // N.B. This will update the modification time. i.cachingInodeOps.IncLinks(ctx) } - if i.session().cachePolicy.cacheReaddir() { + if s.cachePolicy.cacheReaddir() { // Invalidate readdir cache. i.markDirectoryDirty() } @@ -265,16 +272,17 @@ func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, s // Bind implements InodeOperations.Bind. func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, ep transport.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) { if len(name) > maxFilenameLen { - return nil, syserror.ENAMETOOLONG + return nil, linuxerr.ENAMETOOLONG } - if i.session().overrides == nil { - return nil, syserror.EOPNOTSUPP + s := i.session() + if s.overrides == nil { + return nil, linuxerr.EOPNOTSUPP } // Stabilize the override map while creation is in progress. - unlock := i.session().overrides.lock() - defer unlock() + s.overrides.lock() + defer s.overrides.unlock() sattr, iops, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeSocket) if err != nil { @@ -283,22 +291,23 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, // Construct the positive Dirent. childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) - i.session().overrides.addBoundEndpoint(iops.fileState.key, childDir, ep) + s.overrides.addBoundEndpoint(iops.fileState.key, childDir, ep) return childDir, nil } // CreateFifo implements fs.InodeOperations.CreateFifo. func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { if len(name) > maxFilenameLen { - return syserror.ENAMETOOLONG + return linuxerr.ENAMETOOLONG } owner := fs.FileOwnerFromContext(ctx) mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe // N.B. FIFOs use major/minor numbers 0. + s := i.session() if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil { - if i.session().overrides == nil || err != syserror.EPERM { + if s.overrides == nil || !linuxerr.Equals(linuxerr.EPERM, err) { return err } // If gofer doesn't support mknod, check if we can create an internal fifo. @@ -310,13 +319,14 @@ func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name st } func (i *inodeOperations) createInternalFifo(ctx context.Context, dir *fs.Inode, name string, owner fs.FileOwner, perm fs.FilePermissions) error { - if i.session().overrides == nil { - return syserror.EPERM + s := i.session() + if s.overrides == nil { + return linuxerr.EPERM } // Stabilize the override map while creation is in progress. - unlock := i.session().overrides.lock() - defer unlock() + s.overrides.lock() + defer s.overrides.unlock() sattr, fileOps, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeNamedPipe) if err != nil { @@ -335,7 +345,7 @@ func (i *inodeOperations) createInternalFifo(ctx context.Context, dir *fs.Inode, // Construct the positive Dirent. childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) - i.session().overrides.addPipe(fileOps.fileState.key, childDir, inode) + s.overrides.addPipe(fileOps.fileState.key, childDir, inode) return nil } @@ -382,11 +392,12 @@ func (i *inodeOperations) createEndpointFile(ctx context.Context, dir *fs.Inode, // Remove implements InodeOperations.Remove. func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string) error { if len(name) > maxFilenameLen { - return syserror.ENAMETOOLONG + return linuxerr.ENAMETOOLONG } + s := i.session() var key *device.MultiDeviceKey - if i.session().overrides != nil { + if s.overrides != nil { // Find out if file being deleted is a socket or pipe that needs to be // removed from endpoint map. if d, err := i.Lookup(ctx, dir, name); err == nil { @@ -401,8 +412,8 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string } // Stabilize the override map while deletion is in progress. - unlock := i.session().overrides.lock() - defer unlock() + s.overrides.lock() + defer s.overrides.unlock() } } } @@ -411,7 +422,7 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string return err } if key != nil { - i.session().overrides.remove(ctx, *key) + s.overrides.remove(ctx, *key) } i.touchModificationAndStatusChangeTime(ctx, dir) @@ -421,18 +432,20 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string // Remove implements InodeOperations.RemoveDirectory. func (i *inodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, name string) error { if len(name) > maxFilenameLen { - return syserror.ENAMETOOLONG + return linuxerr.ENAMETOOLONG } // 0x200 = AT_REMOVEDIR. if err := i.fileState.file.unlinkAt(ctx, name, 0x200); err != nil { return err } - if i.session().cachePolicy.cacheUAttrs(dir) { + + s := i.session() + if s.cachePolicy.cacheUAttrs(dir) { // Decrease link count and updates atime. i.cachingInodeOps.DecLinks(ctx) } - if i.session().cachePolicy.cacheReaddir() { + if s.cachePolicy.cacheReaddir() { // Invalidate readdir cache. i.markDirectoryDirty() } @@ -442,12 +455,12 @@ func (i *inodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, na // Rename renames this node. func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error { if len(newName) > maxFilenameLen { - return syserror.ENAMETOOLONG + return linuxerr.ENAMETOOLONG } // Don't allow renames across different mounts. if newParent.MountSource != oldParent.MountSource { - return syserror.EXDEV + return linuxerr.EXDEV } // Unwrap the new parent to a *inodeOperations. @@ -462,12 +475,13 @@ func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent } // Is the renamed entity a directory? Fix link counts. + s := i.session() if fs.IsDir(i.fileState.sattr) { // Update cached state. - if i.session().cachePolicy.cacheUAttrs(oldParent) { + if s.cachePolicy.cacheUAttrs(oldParent) { oldParentInodeOperations.cachingInodeOps.DecLinks(ctx) } - if i.session().cachePolicy.cacheUAttrs(newParent) { + if s.cachePolicy.cacheUAttrs(newParent) { // Only IncLinks if there is a new addition to // newParent. If this is replacement, then the total // count remains the same. @@ -476,7 +490,7 @@ func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent } } } - if i.session().cachePolicy.cacheReaddir() { + if s.cachePolicy.cacheReaddir() { // Mark old directory dirty. oldParentInodeOperations.markDirectoryDirty() if oldParent != newParent { @@ -486,17 +500,18 @@ func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent } // Rename always updates ctime. - if i.session().cachePolicy.cacheUAttrs(inode) { + if s.cachePolicy.cacheUAttrs(inode) { i.cachingInodeOps.TouchStatusChangeTime(ctx) } return nil } func (i *inodeOperations) touchModificationAndStatusChangeTime(ctx context.Context, inode *fs.Inode) { - if i.session().cachePolicy.cacheUAttrs(inode) { + s := i.session() + if s.cachePolicy.cacheUAttrs(inode) { i.cachingInodeOps.TouchModificationAndStatusChangeTime(ctx) } - if i.session().cachePolicy.cacheReaddir() { + if s.cachePolicy.cacheReaddir() { // Invalidate readdir cache. i.markDirectoryDirty() } diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index 7cf3522ff..b7debeecb 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -98,9 +98,14 @@ func (e *overrideMaps) remove(ctx context.Context, key device.MultiDeviceKey) { // lock blocks other addition and removal operations from happening while // the backing file is being created or deleted. Returns a function that unlocks // the endpoint map. -func (e *overrideMaps) lock() func() { +// +checklocksacquire:e.mu +func (e *overrideMaps) lock() { e.mu.Lock() - return func() { e.mu.Unlock() } +} + +// +checklocksrelease:e.mu +func (e *overrideMaps) unlock() { + e.mu.Unlock() } // getBoundEndpoint returns the bound endpoint mapped to the given key. @@ -366,8 +371,8 @@ func newOverrideMaps() *overrideMaps { // fillKeyMap populates key and dirent maps upon restore from saved pathmap. func (s *session) fillKeyMap(ctx context.Context) error { - unlock := s.overrides.lock() - defer unlock() + s.overrides.lock() + defer s.overrides.unlock() for ep, dirPath := range s.overrides.pathMap { _, file, err := s.attach.walk(ctx, splitAbsolutePath(dirPath)) @@ -394,8 +399,8 @@ func (s *session) fillKeyMap(ctx context.Context) error { // fillPathMap populates paths for overrides from dirents in direntMap // before save. func (s *session) fillPathMap(ctx context.Context) error { - unlock := s.overrides.lock() - defer unlock() + s.overrides.lock() + defer s.overrides.unlock() for _, endpoint := range s.overrides.keyMap { mountRoot := endpoint.dirent.MountRoot() diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go index 8a1c69ac2..1fd8a0910 100644 --- a/pkg/sentry/fs/gofer/socket.go +++ b/pkg/sentry/fs/gofer/socket.go @@ -32,10 +32,11 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) transport. return nil } - if i.session().overrides != nil { - unlock := i.session().overrides.lock() - defer unlock() - ep := i.session().overrides.getBoundEndpoint(i.fileState.key) + s := i.session() + if s.overrides != nil { + s.overrides.lock() + defer s.overrides.unlock() + ep := s.overrides.getBoundEndpoint(i.fileState.key) if ep != nil { return ep } diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index 3c45f6cc5..24fc6305c 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -28,9 +28,9 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fd", "//pkg/fdnotifier", - "//pkg/iovec", "//pkg/log", "//pkg/marshal/primitive", "//pkg/refs", @@ -40,6 +40,7 @@ go_library( "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/hostfd", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/time", diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go index 07bd078b7..77c08a7ce 100644 --- a/pkg/sentry/fs/host/file.go +++ b/pkg/sentry/fs/host/file.go @@ -19,6 +19,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/log" @@ -268,7 +269,7 @@ func (f *fileOperations) Flush(context.Context, *fs.File) error { // ConfigureMMap implements fs.FileOperations.ConfigureMMap. func (f *fileOperations) ConfigureMMap(ctx context.Context, file *fs.File, opts *memmap.MMapOpts) error { if !canMap(file.Dirent.Inode) { - return syserror.ENODEV + return linuxerr.ENODEV } return fsutil.GenericConfigureMMap(file, f.iops.cachingInodeOps, opts) } diff --git a/pkg/sentry/fs/host/host.go b/pkg/sentry/fs/host/host.go index 081ba1dd8..9f6dbd7e9 100644 --- a/pkg/sentry/fs/host/host.go +++ b/pkg/sentry/fs/host/host.go @@ -17,8 +17,8 @@ package host import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/syserror" ) // filesystem is a host filesystem. @@ -40,7 +40,7 @@ func (*filesystem) Name() string { // Mount returns an error. Mounting hostfs is not allowed. func (*filesystem) Mount(ctx context.Context, device string, flags fs.MountSourceFlags, data string, dataObj interface{}) (*fs.Inode, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } // AllowUserMount prohibits users from using mount(2) with this file system. diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go index e299b532c..5f6af2067 100644 --- a/pkg/sentry/fs/host/inode.go +++ b/pkg/sentry/fs/host/inode.go @@ -17,6 +17,7 @@ package host import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/secio" @@ -113,7 +114,7 @@ func (i *inodeFileState) SetMaskedAttributes(ctx context.Context, mask fs.AttrMa return nil } if mask.UID || mask.GID { - return syserror.EPERM + return linuxerr.EPERM } if mask.Perms { if err := unix.Fchmod(i.FD(), uint32(attr.Perms.LinuxMode())); err != nil { @@ -224,48 +225,48 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string // Create implements fs.InodeOperations.Create. func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perm fs.FilePermissions) (*fs.File, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } // CreateDirectory implements fs.InodeOperations.CreateDirectory. func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { - return syserror.EPERM + return linuxerr.EPERM } // CreateLink implements fs.InodeOperations.CreateLink. func (i *inodeOperations) CreateLink(ctx context.Context, dir *fs.Inode, oldname string, newname string) error { - return syserror.EPERM + return linuxerr.EPERM } // CreateHardLink implements fs.InodeOperations.CreateHardLink. func (*inodeOperations) CreateHardLink(context.Context, *fs.Inode, *fs.Inode, string) error { - return syserror.EPERM + return linuxerr.EPERM } // CreateFifo implements fs.InodeOperations.CreateFifo. func (*inodeOperations) CreateFifo(context.Context, *fs.Inode, string, fs.FilePermissions) error { - return syserror.EPERM + return linuxerr.EPERM } // Remove implements fs.InodeOperations.Remove. func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string) error { - return syserror.EPERM + return linuxerr.EPERM } // RemoveDirectory implements fs.InodeOperations.RemoveDirectory. func (i *inodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, name string) error { - return syserror.EPERM + return linuxerr.EPERM } // Rename implements fs.InodeOperations.Rename. func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error { - return syserror.EPERM + return linuxerr.EPERM } // Bind implements fs.InodeOperations.Bind. func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data transport.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) { - return nil, syserror.EOPNOTSUPP + return nil, linuxerr.EOPNOTSUPP } // BoundEndpoint implements fs.InodeOperations.BoundEndpoint. @@ -276,7 +277,7 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) transport. // GetFile implements fs.InodeOperations.GetFile. func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { if fs.IsSocket(d.Inode.StableAttr) { - return nil, syserror.ENXIO + return nil, linuxerr.ENXIO } return newFile(ctx, d, flags, i), nil @@ -313,7 +314,7 @@ func (i *inodeOperations) Check(ctx context.Context, inode *fs.Inode, p fs.PermM // SetOwner implements fs.InodeOperations.SetOwner. func (i *inodeOperations) SetOwner(context.Context, *fs.Inode, fs.FileOwner) error { - return syserror.EPERM + return linuxerr.EPERM } // SetPermissions implements fs.InodeOperations.SetPermissions. @@ -392,7 +393,7 @@ func (i *inodeOperations) Readlink(ctx context.Context, inode *fs.Inode) (string // Getlink implements fs.InodeOperations.Getlink. func (i *inodeOperations) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error) { if !fs.IsSymlink(i.fileState.sattr) { - return nil, syserror.ENOLINK + return nil, linuxerr.ENOLINK } return nil, fs.ErrResolveViaReadlink } diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go index 46a2dc47d..54c421775 100644 --- a/pkg/sentry/fs/host/socket.go +++ b/pkg/sentry/fs/host/socket.go @@ -21,6 +21,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/refs" @@ -31,7 +32,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/pkg/waiter" @@ -211,9 +211,9 @@ func (c *ConnectedEndpoint) Send(ctx context.Context, data [][]byte, controlMess if n < totalLen && err == nil { // The host only returns a short write if it would otherwise // block (and only for stream sockets). - err = syserror.EAGAIN + err = linuxerr.EAGAIN } - if n > 0 && err != syserror.EAGAIN { + if n > 0 && !linuxerr.Equals(linuxerr.EAGAIN, err) { // The caller may need to block to send more data, but // otherwise there isn't anything that can be done about an // error with a partial write. diff --git a/pkg/sentry/fs/host/socket_iovec.go b/pkg/sentry/fs/host/socket_iovec.go index 7380d75e7..d98e3c6d1 100644 --- a/pkg/sentry/fs/host/socket_iovec.go +++ b/pkg/sentry/fs/host/socket_iovec.go @@ -16,8 +16,8 @@ package host import ( "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/iovec" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/sentry/hostfd" ) // LINT.IfChange @@ -66,13 +66,13 @@ func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovec if length > maxlen { if truncate { stopLen = maxlen - err = syserror.EAGAIN + err = linuxerr.EAGAIN } else { - return 0, nil, nil, syserror.EMSGSIZE + return 0, nil, nil, linuxerr.EMSGSIZE } } - if iovsRequired > iovec.MaxIovs { + if iovsRequired > hostfd.MaxSendRecvMsgIov { // The kernel will reject our call if we pass this many iovs. // Use a single intermediate buffer instead. b := make([]byte, stopLen) diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index 1183727ab..6f38b25c3 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -17,6 +17,7 @@ package host import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -126,7 +127,7 @@ func (t *TTYFileOperations) Release(ctx context.Context) { func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { task := kernel.TaskFromContext(ctx) if task == nil { - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } // Ignore arg[0]. This is the real FD: @@ -167,7 +168,7 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO pidns := kernel.PIDNamespaceFromContext(ctx) if pidns == nil { - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } t.mu.Lock() @@ -191,8 +192,8 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO if err := t.checkChange(ctx, linux.SIGTTOU); err != nil { // drivers/tty/tty_io.c:tiocspgrp() converts -EIO from // tty_check_change() to -ENOTTY. - if err == syserror.EIO { - return 0, syserror.ENOTTY + if linuxerr.Equals(linuxerr.EIO, err) { + return 0, linuxerr.ENOTTY } return 0, err } @@ -200,7 +201,7 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO // Check that calling task's process group is in the TTY // session. if task.ThreadGroup().Session() != t.session { - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } var pgIDP primitive.Int32 @@ -211,19 +212,19 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO // pgID must be non-negative. if pgID < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Process group with pgID must exist in this PID namespace. pidns := task.PIDNamespace() pg := pidns.ProcessGroupWithID(pgID) if pg == nil { - return 0, syserror.ESRCH + return 0, linuxerr.ESRCH } // Check that new process group is in the TTY session. if pg.Session() != t.session { - return 0, syserror.EPERM + return 0, linuxerr.EPERM } t.fgProcessGroup = pg @@ -283,7 +284,7 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO unimpl.EmitUnimplementedEvent(ctx) fallthrough default: - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } } diff --git a/pkg/sentry/fs/host/util.go b/pkg/sentry/fs/host/util.go index ab74724a3..e7db79189 100644 --- a/pkg/sentry/fs/host/util.go +++ b/pkg/sentry/fs/host/util.go @@ -19,12 +19,12 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/syserror" ) func nodeType(s *unix.Stat_t) fs.InodeType { @@ -98,7 +98,7 @@ type dirInfo struct { // isBlockError unwraps os errors and checks if they are caused by EAGAIN or // EWOULDBLOCK. This is so they can be transformed into syserror.ErrWouldBlock. func isBlockError(err error) bool { - if err == syserror.EAGAIN || err == syserror.EWOULDBLOCK { + if linuxerr.Equals(linuxerr.EAGAIN, err) || linuxerr.Equals(linuxerr.EWOULDBLOCK, err) { return true } if pe, ok := err.(*os.PathError); ok { diff --git a/pkg/sentry/fs/host/util_amd64_unsafe.go b/pkg/sentry/fs/host/util_amd64_unsafe.go index 21782f1da..e90629f4e 100644 --- a/pkg/sentry/fs/host/util_amd64_unsafe.go +++ b/pkg/sentry/fs/host/util_amd64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package host diff --git a/pkg/sentry/fs/host/util_arm64_unsafe.go b/pkg/sentry/fs/host/util_arm64_unsafe.go index ed8f5242a..9fbb93726 100644 --- a/pkg/sentry/fs/host/util_arm64_unsafe.go +++ b/pkg/sentry/fs/host/util_arm64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package host diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index 41a3c2047..ec204e5cf 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -17,6 +17,7 @@ package fs import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/fs/lock" @@ -298,7 +299,7 @@ func (i *Inode) RemoveXattr(ctx context.Context, d *Dirent, name string) error { func (i *Inode) CheckPermission(ctx context.Context, p PermMask) error { // First check the outer-most mounted filesystem. if p.Write && i.MountSource.Flags.ReadOnly { - return syserror.EROFS + return linuxerr.EROFS } if i.overlay != nil { @@ -312,7 +313,7 @@ func (i *Inode) CheckPermission(ctx context.Context, p PermMask) error { // we should not attempt to modify the writable layer if it // is mounted read-only. if p.Write && overlayUpperMountSource(i.MountSource).Flags.ReadOnly { - return syserror.EROFS + return linuxerr.EROFS } } @@ -324,7 +325,7 @@ func (i *Inode) check(ctx context.Context, p PermMask) error { return overlayCheck(ctx, i.overlay, p) } if !i.InodeOperations.Check(ctx, i, p) { - return syserror.EACCES + return linuxerr.EACCES } return nil } diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go index 2bbfb72ef..98e9fb2b1 100644 --- a/pkg/sentry/fs/inode_operations.go +++ b/pkg/sentry/fs/inode_operations.go @@ -283,7 +283,7 @@ type InodeOperations interface { // // Any error returned from Getlink other than ErrResolveViaReadlink // indicates the caller's inability to traverse this Inode as a link - // (e.g. syserror.ENOLINK indicates that the Inode is not a link, + // (e.g. linuxerr.ENOLINK indicates that the Inode is not a link, // syscall.EPERM indicates that traversing the link is not allowed, etc). Getlink(context.Context, *Inode) (*Dirent, error) diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index e97afc626..c47b9ce58 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/syserror" @@ -71,7 +72,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name // A file could have been created over a whiteout, so we need to // check if something exists in the upper file system first. child, err := parent.upper.Lookup(ctx, name) - if err != nil && err != syserror.ENOENT { + if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) { // We encountered an error that an overlay cannot handle, // we must propagate it to the caller. parent.copyMu.RUnlock() @@ -125,7 +126,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name // Check the lower file system. child, err := parent.lower.Lookup(ctx, name) // Same song and dance as above. - if err != nil && err != syserror.ENOENT { + if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) { // Don't leak resources. if upperInode != nil { upperInode.DecRef(ctx) @@ -343,7 +344,7 @@ func overlayRemove(ctx context.Context, o *overlayEntry, parent *Dirent, child * return err } if ser.Written() != 0 { - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } } if child.Inode.overlay.upper != nil { @@ -374,7 +375,7 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena // Maybe some day we can allow the more complicated case of // non-overlay X overlay renames, but that's not necessary right now. if renamed.Inode.overlay == nil || newParent.Inode.overlay == nil || oldParent.Inode.overlay == nil { - return syserror.EXDEV + return linuxerr.EXDEV } if replacement { @@ -396,7 +397,7 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena // newName has been removed out from under us. That's fine; // filesystems where that can happen must handle stale // 'replaced'. - if err != nil && err != syserror.ENOENT { + if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) { return err } if err == nil { @@ -420,7 +421,7 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena // need to bother checking for them. if len(children) > 0 { replaced.DecRef(ctx) - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } } @@ -552,7 +553,7 @@ func overlayGetXattr(ctx context.Context, o *overlayEntry, name string, size uin // Don't forward the value of the extended attribute if it would // unexpectedly change the behavior of a wrapping overlay layer. if isXattrOverlay(name) { - return "", syserror.ENODATA + return "", linuxerr.ENODATA } o.copyMu.RLock() @@ -568,7 +569,7 @@ func overlayGetXattr(ctx context.Context, o *overlayEntry, name string, size uin func overlaySetXattr(ctx context.Context, o *overlayEntry, d *Dirent, name, value string, flags uint32) error { // Don't allow changes to overlay xattrs through a setxattr syscall. if isXattrOverlay(name) { - return syserror.EPERM + return linuxerr.EPERM } if err := copyUp(ctx, d); err != nil { @@ -600,7 +601,7 @@ func overlayListXattr(ctx context.Context, o *overlayEntry, size uint64) (map[st func overlayRemoveXattr(ctx context.Context, o *overlayEntry, d *Dirent, name string) error { // Don't allow changes to overlay xattrs through a removexattr syscall. if isXattrOverlay(name) { - return syserror.EPERM + return linuxerr.EPERM } if err := copyUp(ctx, d); err != nil { @@ -687,7 +688,7 @@ func overlayGetlink(ctx context.Context, o *overlayEntry) (*Dirent, error) { dirent.DecRef(ctx) // Claim that the path is not accessible. - err = syserror.EACCES + err = linuxerr.EACCES log.Warningf("Getlink not supported in overlay for %q", name) } return nil, err diff --git a/pkg/sentry/fs/inode_overlay_test.go b/pkg/sentry/fs/inode_overlay_test.go index aa9851b26..a3800d700 100644 --- a/pkg/sentry/fs/inode_overlay_test.go +++ b/pkg/sentry/fs/inode_overlay_test.go @@ -18,11 +18,11 @@ import ( "testing" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/syserror" ) func TestLookup(t *testing.T) { @@ -191,11 +191,11 @@ func TestLookup(t *testing.T) { } { t.Run(test.desc, func(t *testing.T) { dirent, err := test.dir.Lookup(ctx, test.name) - if test.found && (err == syserror.ENOENT || dirent.IsNegative()) { + if test.found && (linuxerr.Equals(linuxerr.ENOENT, err) || dirent.IsNegative()) { t.Fatalf("lookup %q expected to find positive dirent, got dirent %v err %v", test.name, dirent, err) } if !test.found { - if err != syserror.ENOENT && !dirent.IsNegative() { + if !linuxerr.Equals(linuxerr.ENOENT, err) && !dirent.IsNegative() { t.Errorf("lookup %q expected to return ENOENT or negative dirent, got dirent %v err %v", test.name, dirent, err) } // Nothing more to check. @@ -389,7 +389,7 @@ func (d *dir) GetXattr(_ context.Context, _ *fs.Inode, name string, _ uint64) (s return "y", nil } } - return "", syserror.ENOATTR + return "", linuxerr.ENOATTR } // GetFile implements InodeOperations.GetFile. diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go index 1b83643db..ee28b0f99 100644 --- a/pkg/sentry/fs/inotify.go +++ b/pkg/sentry/fs/inotify.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/memmap" @@ -116,23 +117,23 @@ func (i *Inotify) Readiness(mask waiter.EventMask) waiter.EventMask { // Seek implements FileOperations.Seek. func (*Inotify) Seek(context.Context, *File, SeekWhence, int64) (int64, error) { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } // Readdir implements FileOperatons.Readdir. func (*Inotify) Readdir(context.Context, *File, DentrySerializer) (int64, error) { - return 0, syserror.ENOTDIR + return 0, linuxerr.ENOTDIR } // Write implements FileOperations.Write. func (*Inotify) Write(context.Context, *File, usermem.IOSequence, int64) (int64, error) { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // Read implements FileOperations.Read. func (i *Inotify) Read(ctx context.Context, _ *File, dst usermem.IOSequence, _ int64) (int64, error) { if dst.NumBytes() < inotifyEventBaseSize { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } i.evMu.Lock() @@ -156,7 +157,7 @@ func (i *Inotify) Read(ctx context.Context, _ *File, dst usermem.IOSequence, _ i // write some events out. return writeLen, nil } - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Linux always dequeues an available event as long as there's enough @@ -183,7 +184,7 @@ func (*Inotify) WriteTo(context.Context, *File, io.Writer, int64, bool) (int64, // Fsync implements FileOperations.Fsync. func (*Inotify) Fsync(context.Context, *File, int64, int64, SyncType) error { - return syserror.EINVAL + return linuxerr.EINVAL } // ReadFrom implements FileOperations.ReadFrom. @@ -198,7 +199,7 @@ func (*Inotify) Flush(context.Context, *File) error { // ConfigureMMap implements FileOperations.ConfigureMMap. func (*Inotify) ConfigureMMap(context.Context, *File, *memmap.MMapOpts) error { - return syserror.ENODEV + return linuxerr.ENODEV } // UnstableAttr implements FileOperations.UnstableAttr. @@ -222,7 +223,7 @@ func (i *Inotify) Ioctl(ctx context.Context, _ *File, io usermem.IO, args arch.S return 0, err default: - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } } @@ -329,7 +330,7 @@ func (i *Inotify) RmWatch(ctx context.Context, wd int32) error { watch, ok := i.watches[wd] if !ok { i.mu.Unlock() - return syserror.EINVAL + return linuxerr.EINVAL } // Remove the watch from this instance. diff --git a/pkg/sentry/fs/mock.go b/pkg/sentry/fs/mock.go index 1d6ea5736..fba7b961b 100644 --- a/pkg/sentry/fs/mock.go +++ b/pkg/sentry/fs/mock.go @@ -16,7 +16,7 @@ package fs import ( "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ) // MockInodeOperations implements InodeOperations for testing Inodes. @@ -109,7 +109,7 @@ func (n *MockInodeOperations) SetPermissions(context.Context, *Inode, FilePermis // SetOwner implements fs.InodeOperations.SetOwner. func (*MockInodeOperations) SetOwner(context.Context, *Inode, FileOwner) error { - return syserror.EINVAL + return linuxerr.EINVAL } // SetTimestamps implements fs.InodeOperations.SetTimestamps. @@ -172,5 +172,5 @@ func (n *MockInodeOperations) RemoveDirectory(context.Context, *Inode, string) e // Getlink implements fs.InodeOperations.Getlink. func (n *MockInodeOperations) Getlink(context.Context, *Inode) (*Dirent, error) { - return nil, syserror.ENOLINK + return nil, linuxerr.ENOLINK } diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go index 243098a09..10146af4e 100644 --- a/pkg/sentry/fs/mounts.go +++ b/pkg/sentry/fs/mounts.go @@ -20,10 +20,10 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // DefaultTraversalLimit provides a sensible default traversal limit that may @@ -281,7 +281,7 @@ func (mns *MountNamespace) withMountLocked(node *Dirent, fn func() error) error // Linux allows mounting over the root (?). It comes with a strange set // of semantics. We'll just not do this for now. if node.parent == nil { - return syserror.EBUSY + return linuxerr.EBUSY } // For both mount and unmount, we take this lock so we can swap out the @@ -357,7 +357,7 @@ func (mns *MountNamespace) Unmount(ctx context.Context, node *Dirent, detachOnly orig, ok := mns.mounts[node] if !ok { // node is not a mount point. - return syserror.EINVAL + return linuxerr.EINVAL } if orig.previous == nil { @@ -380,7 +380,7 @@ func (mns *MountNamespace) Unmount(ctx context.Context, node *Dirent, detachOnly if refs := m.DirentRefs(); refs < 2 { panic(fmt.Sprintf("have %d refs on unmount, expect 2 or more", refs)) } else if refs != 2 { - return syserror.EBUSY + return linuxerr.EBUSY } } @@ -497,7 +497,7 @@ func (mns *MountNamespace) FindLink(ctx context.Context, root, wd *Dirent, path if current != root { if !IsDir(current.Inode.StableAttr) { current.DecRef(ctx) // Drop reference from above. - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } if err := current.Inode.CheckPermission(ctx, PermMask{Execute: true}); err != nil { current.DecRef(ctx) // Drop reference from above. @@ -566,8 +566,8 @@ func (mns *MountNamespace) resolve(ctx context.Context, root, node *Dirent, rema // Resolve the path. target, err := node.Inode.Getlink(ctx) - switch err { - case nil: + switch { + case err == nil: // Make sure we didn't exhaust the traversal budget. if *remainingTraversals == 0 { target.DecRef(ctx) @@ -577,11 +577,11 @@ func (mns *MountNamespace) resolve(ctx context.Context, root, node *Dirent, rema node.DecRef(ctx) // Drop the original reference. return target, nil - case unix.ENOLINK: + case linuxerr.Equals(linuxerr.ENOLINK, err): // Not a symlink. return node, nil - case ErrResolveViaReadlink: + case err == ErrResolveViaReadlink: defer node.DecRef(ctx) // See above. // First, check if we should traverse. diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go index f96f5a3e5..7e72e47b5 100644 --- a/pkg/sentry/fs/overlay.go +++ b/pkg/sentry/fs/overlay.go @@ -19,11 +19,11 @@ import ( "strings" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // The virtual filesystem implements an overlay configuration. For a high-level @@ -218,7 +218,7 @@ func newOverlayEntry(ctx context.Context, upper *Inode, lower *Inode, lowerExist // We don't support copying up from character devices, // named pipes, or anything weird (like proc files). log.Warningf("%s not supported in lower filesytem", lower.StableAttr.Type) - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } } return &overlayEntry{ diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index 7af7e0b45..e6d74b949 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -30,6 +30,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/sentry/fs", diff --git a/pkg/sentry/fs/proc/exec_args.go b/pkg/sentry/fs/proc/exec_args.go index 24426b225..379429ab2 100644 --- a/pkg/sentry/fs/proc/exec_args.go +++ b/pkg/sentry/fs/proc/exec_args.go @@ -21,11 +21,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -104,7 +104,7 @@ var _ fs.FileOperations = (*execArgFile)(nil) // Read reads the exec arg from the process's address space.. func (f *execArgFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } m, err := getTaskMM(f.t) diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index 91c35eea9..187e9a921 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -34,7 +35,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip/header" ) @@ -291,7 +291,7 @@ func (n *netSnmp) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s continue } if err := n.s.Statistics(stat, line.prefix); err != nil { - if err == syserror.EOPNOTSUPP { + if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) { log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) } else { log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go index 2f2a9f920..546b57287 100644 --- a/pkg/sentry/fs/proc/proc.go +++ b/pkg/sentry/fs/proc/proc.go @@ -21,6 +21,7 @@ import ( "strconv" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" @@ -130,7 +131,7 @@ func (s *self) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { } // Who is reading this link? - return "", syserror.EINVAL + return "", linuxerr.EINVAL } // threadSelf is more magical than "self" link. @@ -154,7 +155,7 @@ func (s *threadSelf) Readlink(ctx context.Context, inode *fs.Inode) (string, err } // Who is reading this link? - return "", syserror.EINVAL + return "", linuxerr.EINVAL } // Lookup loads an Inode at name into a Dirent. diff --git a/pkg/sentry/fs/proc/seqfile/BUILD b/pkg/sentry/fs/proc/seqfile/BUILD index 713b81e08..90bd32345 100644 --- a/pkg/sentry/fs/proc/seqfile/BUILD +++ b/pkg/sentry/fs/proc/seqfile/BUILD @@ -9,13 +9,13 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/proc/device", "//pkg/sentry/kernel/time", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/proc/seqfile/seqfile.go b/pkg/sentry/fs/proc/seqfile/seqfile.go index b01688b1d..77270814e 100644 --- a/pkg/sentry/fs/proc/seqfile/seqfile.go +++ b/pkg/sentry/fs/proc/seqfile/seqfile.go @@ -20,13 +20,13 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -204,7 +204,7 @@ var _ fs.FileOperations = (*seqFileOperations)(nil) // Write implements fs.FileOperations.Write. func (*seqFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, syserror.EACCES + return 0, linuxerr.EACCES } // Read implements fs.FileOperations.Read. diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index 4893af56b..71f37d582 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -28,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -592,7 +592,7 @@ func (pf *portRangeFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSe // Port numbers must be uint16s. if ports[0] < 0 || ports[1] < 0 || ports[0] > math.MaxUint16 || ports[1] > math.MaxUint16 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if err := pf.inode.stack.SetPortRange(uint16(ports[0]), uint16(ports[1])); err != nil { diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index ae5ed25f9..edd62b857 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -46,7 +47,7 @@ import ( // no longer in use. func getTaskMM(t *kernel.Task) (*mm.MemoryManager, error) { if t.ExitState() == kernel.TaskExitDead { - return nil, syserror.ESRCH + return nil, linuxerr.ESRCH } var m *mm.MemoryManager t.WithMuLocked(func(t *kernel.Task) { @@ -61,9 +62,9 @@ func getTaskMM(t *kernel.Task) (*mm.MemoryManager, error) { func checkTaskState(t *kernel.Task) error { switch t.ExitState() { case kernel.TaskExitZombie: - return syserror.EACCES + return linuxerr.EACCES case kernel.TaskExitDead: - return syserror.ESRCH + return linuxerr.ESRCH } return nil } @@ -272,7 +273,7 @@ func (e *exe) executable() (file fsbridge.File, err error) { e.t.WithMuLocked(func(t *kernel.Task) { mm := t.MemoryManager() if mm == nil { - err = syserror.EACCES + err = linuxerr.EACCES return } @@ -281,7 +282,7 @@ func (e *exe) executable() (file fsbridge.File, err error) { // (with locks held). file = mm.Executable() if file == nil { - err = syserror.ESRCH + err = linuxerr.ESRCH } }) return @@ -290,7 +291,7 @@ func (e *exe) executable() (file fsbridge.File, err error) { // Readlink implements fs.InodeOperations. func (e *exe) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { if !kernel.ContextCanTrace(ctx, e.t, false) { - return "", syserror.EACCES + return "", linuxerr.EACCES } // Pull out the executable for /proc/TID/exe. @@ -323,7 +324,7 @@ func newCwd(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode // Readlink implements fs.InodeOperations. func (e *cwd) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { if !kernel.ContextCanTrace(ctx, e.t, false) { - return "", syserror.EACCES + return "", linuxerr.EACCES } if err := checkTaskState(e.t); err != nil { return "", err @@ -331,14 +332,14 @@ func (e *cwd) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { cwd := e.t.FSContext().WorkingDirectory() if cwd == nil { // It could have raced with process deletion. - return "", syserror.ESRCH + return "", linuxerr.ESRCH } defer cwd.DecRef(ctx) root := fs.RootFromContext(ctx) if root == nil { // It could have raced with process deletion. - return "", syserror.ESRCH + return "", linuxerr.ESRCH } defer root.DecRef(ctx) @@ -380,7 +381,7 @@ func (n *namespaceSymlink) Readlink(ctx context.Context, inode *fs.Inode) (strin // Getlink implements fs.InodeOperations.Getlink. func (n *namespaceSymlink) Getlink(ctx context.Context, inode *fs.Inode) (*fs.Dirent, error) { if !kernel.ContextCanTrace(ctx, n.t, false) { - return nil, syserror.EACCES + return nil, linuxerr.EACCES } if err := checkTaskState(n.t); err != nil { return nil, err @@ -448,7 +449,7 @@ func (m *memData) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileF // Permission to read this file is governed by PTRACE_MODE_ATTACH_FSCREDS // Since we dont implement setfsuid/setfsgid we can just use PTRACE_MODE_ATTACH if !kernel.ContextCanTrace(ctx, m.t, true) { - return nil, syserror.EACCES + return nil, linuxerr.EACCES } if err := checkTaskState(m.t); err != nil { return nil, err @@ -473,7 +474,7 @@ func (m *memDataFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequen n, readErr := mm.CopyIn(ctx, hostarch.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true}) if n > 0 { if _, err := dst.CopyOut(ctx, buf[:n]); err != nil { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } return int64(n), nil } @@ -867,7 +868,7 @@ var _ fs.FileOperations = (*commFile)(nil) // Read implements fs.FileOperations.Read. func (f *commFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } buf := []byte(f.t.Name() + "\n") @@ -922,7 +923,7 @@ type auxvecFile struct { // Read implements fs.FileOperations.Read. func (f *auxvecFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } m, err := getTaskMM(f.t) @@ -1003,7 +1004,7 @@ func (o *oomScoreAdj) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.F // Read implements fs.FileOperations.Read. func (f *oomScoreAdjFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { if f.t.ExitState() == kernel.TaskExitDead { - return 0, syserror.ESRCH + return 0, linuxerr.ESRCH } var buf bytes.Buffer fmt.Fprintf(&buf, "%d\n", f.t.OOMScoreAdj()) @@ -1030,7 +1031,7 @@ func (f *oomScoreAdjFile) Write(ctx context.Context, _ *fs.File, src usermem.IOS } if f.t.ExitState() == kernel.TaskExitDead { - return 0, syserror.ESRCH + return 0, linuxerr.ESRCH } if err := f.t.SetOOMScoreAdj(v); err != nil { return 0, err diff --git a/pkg/sentry/fs/proc/uid_gid_map.go b/pkg/sentry/fs/proc/uid_gid_map.go index 30d5ad4cf..fcdc1e7bd 100644 --- a/pkg/sentry/fs/proc/uid_gid_map.go +++ b/pkg/sentry/fs/proc/uid_gid_map.go @@ -21,12 +21,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -108,7 +108,7 @@ const maxIDMapLines = 5 // Read implements fs.FileOperations.Read. func (imfo *idMapFileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } var entries []auth.IDMapEntry if imfo.iops.gids { @@ -134,7 +134,7 @@ func (imfo *idMapFileOperations) Write(ctx context.Context, file *fs.File, src u // the file ..." - user_namespaces(7) srclen := src.NumBytes() if srclen >= hostarch.PageSize || offset != 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } b := make([]byte, srclen) if _, err := src.CopyIn(ctx, b); err != nil { @@ -154,7 +154,7 @@ func (imfo *idMapFileOperations) Write(ctx context.Context, file *fs.File, src u } lines := bytes.SplitN(b, []byte("\n"), maxIDMapLines+1) if len(lines) > maxIDMapLines { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } entries := make([]auth.IDMapEntry, len(lines)) @@ -162,7 +162,7 @@ func (imfo *idMapFileOperations) Write(ctx context.Context, file *fs.File, src u var e auth.IDMapEntry _, err := fmt.Sscan(string(l), &e.FirstID, &e.FirstParentID, &e.Length) if err != nil { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } entries[i] = e } diff --git a/pkg/sentry/fs/proc/uptime.go b/pkg/sentry/fs/proc/uptime.go index c0f6fb802..ac896f963 100644 --- a/pkg/sentry/fs/proc/uptime.go +++ b/pkg/sentry/fs/proc/uptime.go @@ -20,10 +20,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -74,7 +74,7 @@ type uptimeFile struct { // Read implements fs.FileOperations.Read. func (f *uptimeFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } now := ktime.NowFromContext(ctx) diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD index 4a3d9636b..b46567cf8 100644 --- a/pkg/sentry/fs/ramfs/BUILD +++ b/pkg/sentry/fs/ramfs/BUILD @@ -14,6 +14,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go index 19990f9db..33023af77 100644 --- a/pkg/sentry/fs/ramfs/dir.go +++ b/pkg/sentry/fs/ramfs/dir.go @@ -21,6 +21,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" @@ -178,7 +179,7 @@ func (d *Dir) Children() ([]string, map[string]fs.DentAttr) { func (d *Dir) removeChildLocked(ctx context.Context, name string) (*fs.Inode, error) { inode, ok := d.children[name] if !ok { - return nil, syserror.EACCES + return nil, linuxerr.EACCES } delete(d.children, name) @@ -208,7 +209,7 @@ func (d *Dir) removeChildLocked(ctx context.Context, name string) (*fs.Inode, er // Remove removes the named non-directory. func (d *Dir) Remove(ctx context.Context, _ *fs.Inode, name string) error { if len(name) > linux.NAME_MAX { - return syserror.ENAMETOOLONG + return linuxerr.ENAMETOOLONG } d.mu.Lock() @@ -226,7 +227,7 @@ func (d *Dir) Remove(ctx context.Context, _ *fs.Inode, name string) error { // RemoveDirectory removes the named directory. func (d *Dir) RemoveDirectory(ctx context.Context, _ *fs.Inode, name string) error { if len(name) > linux.NAME_MAX { - return syserror.ENAMETOOLONG + return linuxerr.ENAMETOOLONG } d.mu.Lock() @@ -240,7 +241,7 @@ func (d *Dir) RemoveDirectory(ctx context.Context, _ *fs.Inode, name string) err if ok, err := hasChildren(ctx, childInode); err != nil { return err } else if ok { - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } // Child was empty. Proceed with removal. @@ -259,7 +260,7 @@ func (d *Dir) RemoveDirectory(ctx context.Context, _ *fs.Inode, name string) err // with a reference. func (d *Dir) Lookup(ctx context.Context, _ *fs.Inode, p string) (*fs.Dirent, error) { if len(p) > linux.NAME_MAX { - return nil, syserror.ENAMETOOLONG + return nil, linuxerr.ENAMETOOLONG } d.mu.Lock() @@ -292,7 +293,7 @@ func (d *Dir) walkLocked(ctx context.Context, p string) (*fs.Inode, error) { // makeInodeOperations. It is the common logic for creating a new child. func (d *Dir) createInodeOperationsCommon(ctx context.Context, name string, makeInodeOperations func() (*fs.Inode, error)) (*fs.Inode, error) { if len(name) > linux.NAME_MAX { - return nil, syserror.ENAMETOOLONG + return nil, linuxerr.ENAMETOOLONG } d.mu.Lock() @@ -311,7 +312,7 @@ func (d *Dir) createInodeOperationsCommon(ctx context.Context, name string, make // Create creates a new Inode with the given name and returns its File. func (d *Dir) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perms fs.FilePermissions) (*fs.File, error) { if d.CreateOps == nil || d.CreateOps.NewFile == nil { - return nil, syserror.EACCES + return nil, linuxerr.EACCES } inode, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) { @@ -333,7 +334,7 @@ func (d *Dir) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.F // CreateLink returns a new link. func (d *Dir) CreateLink(ctx context.Context, dir *fs.Inode, oldname, newname string) error { if d.CreateOps == nil || d.CreateOps.NewSymlink == nil { - return syserror.EACCES + return linuxerr.EACCES } _, err := d.createInodeOperationsCommon(ctx, newname, func() (*fs.Inode, error) { return d.NewSymlink(ctx, dir, oldname) @@ -344,7 +345,7 @@ func (d *Dir) CreateLink(ctx context.Context, dir *fs.Inode, oldname, newname st // CreateHardLink creates a new hard link. func (d *Dir) CreateHardLink(ctx context.Context, dir *fs.Inode, target *fs.Inode, name string) error { if len(name) > linux.NAME_MAX { - return syserror.ENAMETOOLONG + return linuxerr.ENAMETOOLONG } d.mu.Lock() @@ -362,7 +363,7 @@ func (d *Dir) CreateHardLink(ctx context.Context, dir *fs.Inode, target *fs.Inod // CreateDirectory returns a new subdirectory. func (d *Dir) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perms fs.FilePermissions) error { if d.CreateOps == nil || d.CreateOps.NewDir == nil { - return syserror.EACCES + return linuxerr.EACCES } _, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) { return d.NewDir(ctx, dir, perms) @@ -373,7 +374,7 @@ func (d *Dir) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, p // Bind implements fs.InodeOperations.Bind. func (d *Dir) Bind(ctx context.Context, dir *fs.Inode, name string, ep transport.BoundEndpoint, perms fs.FilePermissions) (*fs.Dirent, error) { if d.CreateOps == nil || d.CreateOps.NewBoundEndpoint == nil { - return nil, syserror.EACCES + return nil, linuxerr.EACCES } inode, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) { return d.NewBoundEndpoint(ctx, dir, ep, perms) @@ -392,7 +393,7 @@ func (d *Dir) Bind(ctx context.Context, dir *fs.Inode, name string, ep transport // CreateFifo implements fs.InodeOperations.CreateFifo. func (d *Dir) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perms fs.FilePermissions) error { if d.CreateOps == nil || d.CreateOps.NewFifo == nil { - return syserror.EACCES + return linuxerr.EACCES } _, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) { return d.NewFifo(ctx, dir, perms) @@ -496,14 +497,14 @@ func hasChildren(ctx context.Context, inode *fs.Inode) (bool, error) { func Rename(ctx context.Context, oldParent fs.InodeOperations, oldName string, newParent fs.InodeOperations, newName string, replacement bool) error { op, ok := oldParent.(*Dir) if !ok { - return syserror.EXDEV + return linuxerr.EXDEV } np, ok := newParent.(*Dir) if !ok { - return syserror.EXDEV + return linuxerr.EXDEV } if len(newName) > linux.NAME_MAX { - return syserror.ENAMETOOLONG + return linuxerr.ENAMETOOLONG } np.mu.Lock() @@ -521,7 +522,7 @@ func Rename(ctx context.Context, oldParent fs.InodeOperations, oldName string, n if ok, err := hasChildren(ctx, replaced); err != nil { return err } else if ok { - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } } diff --git a/pkg/sentry/fs/ramfs/socket.go b/pkg/sentry/fs/ramfs/socket.go index d0c565879..dc9d27bb3 100644 --- a/pkg/sentry/fs/ramfs/socket.go +++ b/pkg/sentry/fs/ramfs/socket.go @@ -17,10 +17,10 @@ package ramfs import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -64,7 +64,7 @@ func (s *Socket) BoundEndpoint(*fs.Inode, string) transport.BoundEndpoint { // GetFile implements fs.FileOperations.GetFile. func (s *Socket) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { - return nil, syserror.ENXIO + return nil, linuxerr.ENXIO } // +stateify savable diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go index 33da82868..fff4befb2 100644 --- a/pkg/sentry/fs/splice.go +++ b/pkg/sentry/fs/splice.go @@ -19,6 +19,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/syserror" ) @@ -28,7 +29,7 @@ import ( func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, error) { // Verify basic file flag permissions. if !dst.Flags().Write || !src.Flags().Read { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // Check whether or not the objects being sliced are stream-oriented @@ -139,7 +140,7 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, // Attempt to do a WriteTo; this is likely the most efficient. n, err := src.FileOperations.WriteTo(ctx, src, w, opts.Length, opts.Dup) - if n == 0 && err == syserror.ENOSYS && !opts.Dup { + if n == 0 && linuxerr.Equals(linuxerr.ENOSYS, err) && !opts.Dup { // Attempt as a ReadFrom. If a WriteTo, a ReadFrom may also be // more efficient than a copy if buffers are cached or readily // available. (It's unlikely that they can actually be donated). @@ -151,7 +152,7 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, // if we block at some point, we could lose data. If the source is // not a pipe then reading is not destructive; if the destination // is a regular file, then it is guaranteed not to block writing. - if n == 0 && err == syserror.ENOSYS && !opts.Dup && (!dstPipe || !srcPipe) { + if n == 0 && linuxerr.Equals(linuxerr.ENOSYS, err) && !opts.Dup && (!dstPipe || !srcPipe) { // Fallback to an in-kernel copy. n, err = io.Copy(w, &io.LimitedReader{ R: r, diff --git a/pkg/sentry/fs/timerfd/BUILD b/pkg/sentry/fs/timerfd/BUILD index c7977a217..0148b33cf 100644 --- a/pkg/sentry/fs/timerfd/BUILD +++ b/pkg/sentry/fs/timerfd/BUILD @@ -8,6 +8,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", diff --git a/pkg/sentry/fs/timerfd/timerfd.go b/pkg/sentry/fs/timerfd/timerfd.go index c8ebe256c..093a14c1f 100644 --- a/pkg/sentry/fs/timerfd/timerfd.go +++ b/pkg/sentry/fs/timerfd/timerfd.go @@ -20,6 +20,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" @@ -121,7 +122,7 @@ func (t *TimerOperations) EventUnregister(e *waiter.Entry) { func (t *TimerOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { const sizeofUint64 = 8 if dst.NumBytes() < sizeofUint64 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if val := atomic.SwapUint64(&t.val, 0); val != 0 { var buf [sizeofUint64]byte @@ -138,7 +139,7 @@ func (t *TimerOperations) Read(ctx context.Context, file *fs.File, dst usermem.I // Write implements fs.FileOperations.Write. func (t *TimerOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Notify implements ktime.TimerListener.Notify. diff --git a/pkg/sentry/fs/tmpfs/BUILD b/pkg/sentry/fs/tmpfs/BUILD index 90398376a..511fffb43 100644 --- a/pkg/sentry/fs/tmpfs/BUILD +++ b/pkg/sentry/fs/tmpfs/BUILD @@ -15,6 +15,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/safemem", "//pkg/sentry/device", @@ -30,7 +31,6 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", "//pkg/sync", - "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go index 7faa822f0..1974523bf 100644 --- a/pkg/sentry/fs/tmpfs/inode_file.go +++ b/pkg/sentry/fs/tmpfs/inode_file.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -31,7 +32,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -150,7 +150,7 @@ func (*fileInodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldPare // GetFile implements fs.InodeOperations.GetFile. func (f *fileInodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { if fs.IsSocket(d.Inode.StableAttr) { - return nil, syserror.ENXIO + return nil, linuxerr.ENXIO } if flags.Write { @@ -217,7 +217,7 @@ func (f *fileInodeOperations) Truncate(ctx context.Context, _ *fs.Inode, size in fallthrough case oldSize > size && f.seals&linux.F_SEAL_SHRINK != 0: // Shrink sealed f.dataMu.Unlock() - return syserror.EPERM + return linuxerr.EPERM } if oldSize != size { @@ -278,7 +278,7 @@ func (f *fileInodeOperations) Allocate(ctx context.Context, _ *fs.Inode, offset, // Check if current seals allow growth. if f.seals&linux.F_SEAL_GROW != 0 { - return syserror.EPERM + return linuxerr.EPERM } f.attr.Size = newSize @@ -455,13 +455,13 @@ func (rw *fileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) end := fs.WriteEndOffset(rw.offset, int64(srcs.NumBytes())) if end == math.MaxInt64 { // Overflow. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Check if seals prevent either file growth or all writes. switch { case rw.f.seals&linux.F_SEAL_WRITE != 0: // Write sealed - return 0, syserror.EPERM + return 0, linuxerr.EPERM case end > rw.f.attr.Size && rw.f.seals&linux.F_SEAL_GROW != 0: // Grow sealed // When growth is sealed, Linux effectively allows writes which would // normally grow the file to partially succeed up to the current EOF, @@ -482,7 +482,7 @@ func (rw *fileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) } if end <= rw.offset { // Truncation would result in no data being written. - return 0, syserror.EPERM + return 0, linuxerr.EPERM } } @@ -550,7 +550,7 @@ func (f *fileInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingS // Reject writable mapping if F_SEAL_WRITE is set. if f.seals&linux.F_SEAL_WRITE != 0 && writable { - return syserror.EPERM + return linuxerr.EPERM } f.mappings.AddMapping(ms, ar, offset, writable) @@ -655,7 +655,7 @@ func GetSeals(inode *fs.Inode) (uint32, error) { return f.seals, nil } // Not a memfd inode. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // AddSeals adds new file seals to a memfd inode. @@ -668,13 +668,13 @@ func AddSeals(inode *fs.Inode, val uint32) error { if f.seals&linux.F_SEAL_SEAL != 0 { // Seal applied which prevents addition of any new seals. - return syserror.EPERM + return linuxerr.EPERM } // F_SEAL_WRITE can only be added if there are no active writable maps. if f.seals&linux.F_SEAL_WRITE == 0 && val&linux.F_SEAL_WRITE != 0 { if f.writableMappingPages > 0 { - return syserror.EBUSY + return linuxerr.EBUSY } } @@ -683,5 +683,5 @@ func AddSeals(inode *fs.Inode, val uint32) error { return nil } // Not a memfd inode. - return syserror.EINVAL + return linuxerr.EINVAL } diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go index 6aa8ff331..9a835b556 100644 --- a/pkg/sentry/fs/tmpfs/tmpfs.go +++ b/pkg/sentry/fs/tmpfs/tmpfs.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -28,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) var fsInfo = fs.Info{ @@ -49,7 +49,7 @@ var fsInfo = fs.Info{ func rename(ctx context.Context, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error { // Don't allow renames across different mounts. if newParent.MountSource != oldParent.MountSource { - return syserror.EXDEV + return linuxerr.EXDEV } op := oldParent.InodeOperations.(*Dir) diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD index 86ada820e..5933cb67b 100644 --- a/pkg/sentry/fs/tty/BUILD +++ b/pkg/sentry/fs/tty/BUILD @@ -17,6 +17,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/marshal/primitive", "//pkg/refs", diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go index 13c9dbe7d..3242dcb6a 100644 --- a/pkg/sentry/fs/tty/dir.go +++ b/pkg/sentry/fs/tty/dir.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -170,54 +171,54 @@ func (d *dirInodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name str // // Creation is never allowed. func (d *dirInodeOperations) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perm fs.FilePermissions) (*fs.File, error) { - return nil, syserror.EACCES + return nil, linuxerr.EACCES } // CreateDirectory implements fs.InodeOperations.CreateDirectory. // // Creation is never allowed. func (d *dirInodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { - return syserror.EACCES + return linuxerr.EACCES } // CreateLink implements fs.InodeOperations.CreateLink. // // Creation is never allowed. func (d *dirInodeOperations) CreateLink(ctx context.Context, dir *fs.Inode, oldname, newname string) error { - return syserror.EACCES + return linuxerr.EACCES } // CreateHardLink implements fs.InodeOperations.CreateHardLink. // // Creation is never allowed. func (d *dirInodeOperations) CreateHardLink(ctx context.Context, dir *fs.Inode, target *fs.Inode, name string) error { - return syserror.EACCES + return linuxerr.EACCES } // CreateFifo implements fs.InodeOperations.CreateFifo. // // Creation is never allowed. func (d *dirInodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { - return syserror.EACCES + return linuxerr.EACCES } // Remove implements fs.InodeOperations.Remove. // // Removal is never allowed. func (d *dirInodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string) error { - return syserror.EPERM + return linuxerr.EPERM } // RemoveDirectory implements fs.InodeOperations.RemoveDirectory. // // Removal is never allowed. func (d *dirInodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, name string) error { - return syserror.EPERM + return linuxerr.EPERM } // Bind implements fs.InodeOperations.Bind. func (d *dirInodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data transport.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } // GetFile implements fs.InodeOperations.GetFile. diff --git a/pkg/sentry/fs/tty/fs.go b/pkg/sentry/fs/tty/fs.go index 13f4901db..0e5916380 100644 --- a/pkg/sentry/fs/tty/fs.go +++ b/pkg/sentry/fs/tty/fs.go @@ -16,9 +16,9 @@ package tty import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/syserror" ) // ptsDevice is the pseudo-filesystem device. @@ -64,7 +64,7 @@ func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSou // No options are supported. if data != "" { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } return newDir(ctx, fs.NewMountSource(ctx, &superOperations{}, f, flags)), nil diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index 1cf869b62..88d6703a8 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -17,13 +17,13 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/unimpl" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -157,7 +157,7 @@ func (mf *masterFileOperations) Ioctl(ctx context.Context, file *fs.File, io use t := kernel.TaskFromContext(ctx) if t == nil { // ioctl(2) may only be called from a task goroutine. - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } switch cmd := args[1].Uint(); cmd { @@ -201,7 +201,7 @@ func (mf *masterFileOperations) Ioctl(ctx context.Context, file *fs.File, io use return mf.t.setForegroundProcessGroup(ctx, args, true /* isMaster */) default: maybeEmitUnimplementedEvent(ctx, cmd) - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } } diff --git a/pkg/sentry/fs/tty/replica.go b/pkg/sentry/fs/tty/replica.go index 0e3eea3bd..ca5bc7535 100644 --- a/pkg/sentry/fs/tty/replica.go +++ b/pkg/sentry/fs/tty/replica.go @@ -17,12 +17,12 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -142,7 +142,7 @@ func (sf *replicaFileOperations) Ioctl(ctx context.Context, file *fs.File, io us t := kernel.TaskFromContext(ctx) if t == nil { // ioctl(2) may only be called from a task goroutine. - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } switch cmd := args[1].Uint(); cmd { @@ -179,7 +179,7 @@ func (sf *replicaFileOperations) Ioctl(ctx context.Context, file *fs.File, io us return sf.si.t.setForegroundProcessGroup(ctx, args, false /* isMaster */) default: maybeEmitUnimplementedEvent(ctx, cmd) - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } } diff --git a/pkg/sentry/fs/user/BUILD b/pkg/sentry/fs/user/BUILD index 66e949c95..4acc73ee0 100644 --- a/pkg/sentry/fs/user/BUILD +++ b/pkg/sentry/fs/user/BUILD @@ -12,6 +12,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/log", "//pkg/sentry/fs", diff --git a/pkg/sentry/fs/user/path.go b/pkg/sentry/fs/user/path.go index 124bc95ed..f6eaab2bd 100644 --- a/pkg/sentry/fs/user/path.go +++ b/pkg/sentry/fs/user/path.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -93,7 +94,7 @@ func resolve(ctx context.Context, mns *fs.MountNamespace, paths []string, name s binPath := path.Join(p, name) traversals := uint(linux.MaxSymlinkTraversals) d, err := mns.FindInode(ctx, root, nil, binPath, &traversals) - if err == syserror.ENOENT || err == syserror.EACCES { + if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.EACCES, err) { // Didn't find it here. continue } @@ -142,7 +143,7 @@ func resolveVFS2(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNam Flags: linux.O_RDONLY, } dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts) - if err == syserror.ENOENT || err == syserror.EACCES { + if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.EACCES, err) { // Didn't find it here. continue } diff --git a/pkg/sentry/fsbridge/BUILD b/pkg/sentry/fsbridge/BUILD index 6c798f0bd..4631db2bb 100644 --- a/pkg/sentry/fsbridge/BUILD +++ b/pkg/sentry/fsbridge/BUILD @@ -13,12 +13,12 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", "//pkg/sentry/vfs", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsbridge/fs.go b/pkg/sentry/fsbridge/fs.go index 9785fd62a..527bde181 100644 --- a/pkg/sentry/fsbridge/fs.go +++ b/pkg/sentry/fsbridge/fs.go @@ -20,10 +20,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -127,7 +127,7 @@ func (l *fsLookup) OpenPath(ctx context.Context, path string, opts vfs.OpenOptio defer d.DecRef(ctx) if !resolveFinal && fs.IsSymlink(d.Inode.StableAttr) { - return nil, syserror.ELOOP + return nil, linuxerr.ELOOP } fsPerm := openOptionsToPermMask(&opts) @@ -138,13 +138,13 @@ func (l *fsLookup) OpenPath(ctx context.Context, path string, opts vfs.OpenOptio // If they claim it's a directory, then make sure. if strings.HasSuffix(path, "/") { if d.Inode.StableAttr.Type != fs.Directory { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } } if opts.FileExec && d.Inode.StableAttr.Type != fs.RegularFile { ctx.Infof("%q is not a regular file: %v", path, d.Inode.StableAttr.Type) - return nil, syserror.EACCES + return nil, linuxerr.EACCES } f, err := d.Inode.GetFile(ctx, d, flagsToFileFlags(opts.Flags)) diff --git a/pkg/sentry/fsimpl/cgroupfs/BUILD b/pkg/sentry/fsimpl/cgroupfs/BUILD index 37efb641a..4c9c5b344 100644 --- a/pkg/sentry/fsimpl/cgroupfs/BUILD +++ b/pkg/sentry/fsimpl/cgroupfs/BUILD @@ -31,6 +31,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/coverage", + "//pkg/errors/linuxerr", "//pkg/log", "//pkg/refs", "//pkg/refsvfs2", diff --git a/pkg/sentry/fsimpl/cgroupfs/base.go b/pkg/sentry/fsimpl/cgroupfs/base.go index fe9871bdd..4290ffe0d 100644 --- a/pkg/sentry/fsimpl/cgroupfs/base.go +++ b/pkg/sentry/fsimpl/cgroupfs/base.go @@ -23,10 +23,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -253,7 +253,7 @@ func parseInt64FromString(ctx context.Context, src usermem.IOSequence, offset in // Note: This also handles zero-len writes if offset is beyond the end // of src, or src is empty. ctx.Warningf("cgroupfs.parseInt64FromString: failed to parse %q: %v", string(buf), err) - return 0, int64(n), syserror.EINVAL + return 0, int64(n), linuxerr.EINVAL } return val, int64(n), nil diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go index 05d7eb4ce..24e28a51f 100644 --- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go +++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go @@ -62,12 +62,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) const ( @@ -167,7 +167,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt maxCachedDentries, err = strconv.ParseUint(str, 10, 64) if err != nil { ctx.Warningf("sys.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } } @@ -195,7 +195,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt if _, ok := mopts["all"]; ok { if len(wantControllers) > 0 { ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: other controllers specified with all: %v", wantControllers) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } delete(mopts, "all") @@ -209,7 +209,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt if len(mopts) != 0 { ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: unknown options: %v", mopts) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } k := kernel.KernelFromContext(ctx) @@ -294,7 +294,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt ctx.Infof("cgroupfs.FilesystemType.GetFilesystem: failed to register new hierarchy with controllers %v: %v", wantControllers, err) rootD.DecRef(ctx) fs.VFSFilesystem().DecRef(ctx) - return nil, nil, syserror.EBUSY + return nil, nil, linuxerr.EBUSY } // Move all existing tasks to the root of the new hierarchy. @@ -364,7 +364,7 @@ func (*dir) Keep() bool { // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM + return linuxerr.EPERM } // Open implements kernfs.Inode.Open. diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD index 6af3c3781..f981ff296 100644 --- a/pkg/sentry/fsimpl/devpts/BUILD +++ b/pkg/sentry/fsimpl/devpts/BUILD @@ -29,6 +29,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/log", "//pkg/marshal", "//pkg/marshal/primitive", @@ -59,5 +60,6 @@ go_test( "//pkg/abi/linux", "//pkg/sentry/contexttest", "//pkg/usermem", + "//pkg/waiter", ], ) diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go index e75954105..7a488e9fd 100644 --- a/pkg/sentry/fsimpl/devpts/devpts.go +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -56,7 +57,7 @@ func (*FilesystemType) Name() string { func (fstype *FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { // No data allowed. if opts.Data != "" { - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } fstype.initOnce.Do(func() { diff --git a/pkg/sentry/fsimpl/devpts/devpts_test.go b/pkg/sentry/fsimpl/devpts/devpts_test.go index 448390cfe..1ef07d702 100644 --- a/pkg/sentry/fsimpl/devpts/devpts_test.go +++ b/pkg/sentry/fsimpl/devpts/devpts_test.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" ) func TestSimpleMasterToReplica(t *testing.T) { @@ -54,3 +55,36 @@ func TestSimpleMasterToReplica(t *testing.T) { t.Fatalf("written and read strings do not match: got %q, want %q", outStr, inStr) } } + +type callback func(*waiter.Entry, waiter.EventMask) + +func (cb callback) Callback(entry *waiter.Entry, mask waiter.EventMask) { + cb(entry, mask) +} + +func TestEchoDeadlock(t *testing.T) { + ctx := contexttest.Context(t) + termios := linux.DefaultReplicaTermios + termios.LocalFlags |= linux.ECHO + ld := newLineDiscipline(termios) + outBytes := make([]byte, 32) + dst := usermem.BytesIOSequence(outBytes) + entry := &waiter.Entry{Callback: callback(func(*waiter.Entry, waiter.EventMask) { + ld.inputQueueRead(ctx, dst) + })} + ld.masterWaiter.EventRegister(entry, waiter.ReadableEvents) + defer ld.masterWaiter.EventUnregister(entry) + inBytes := []byte("hello, tty\n") + n, err := ld.inputQueueWrite(ctx, usermem.BytesIOSequence(inBytes)) + if err != nil { + t.Fatalf("inputQueueWrite: %v", err) + } + if int(n) != len(inBytes) { + t.Fatalf("read wrong length: got %d, want %d", n, len(inBytes)) + } + outStr := string(outBytes[:n]) + inStr := string(inBytes) + if outStr != inStr { + t.Fatalf("written and read strings do not match: got %q, want %q", outStr, inStr) + } +} diff --git a/pkg/sentry/fsimpl/devpts/line_discipline.go b/pkg/sentry/fsimpl/devpts/line_discipline.go index e94a5bac3..9cb21e83b 100644 --- a/pkg/sentry/fsimpl/devpts/line_discipline.go +++ b/pkg/sentry/fsimpl/devpts/line_discipline.go @@ -70,6 +70,10 @@ const ( // +------------------------| output queue |<--------------------------+ // (outputQueueRead) +--------------+ (outputQueueWrite) // +// There is special handling for the ECHO option, where bytes written to the +// input queue are also output back to the terminal by being written to +// l.outQueue by the input queue transformer. +// // Lock order: // termiosMu // inQueue.mu @@ -126,7 +130,6 @@ func (l *lineDiscipline) getTermios(task *kernel.Task, args arch.SyscallArgument // setTermios sets a linux.Termios for the tty. func (l *lineDiscipline) setTermios(task *kernel.Task, args arch.SyscallArguments) (uintptr, error) { l.termiosMu.Lock() - defer l.termiosMu.Unlock() oldCanonEnabled := l.termios.LEnabled(linux.ICANON) // We must copy a Termios struct, not KernelTermios. var t linux.Termios @@ -141,7 +144,10 @@ func (l *lineDiscipline) setTermios(task *kernel.Task, args arch.SyscallArgument l.inQueue.pushWaitBufLocked(l) l.inQueue.readable = true l.inQueue.mu.Unlock() + l.termiosMu.Unlock() l.replicaWaiter.Notify(waiter.ReadableEvents) + } else { + l.termiosMu.Unlock() } return 0, err @@ -179,28 +185,37 @@ func (l *lineDiscipline) inputQueueReadSize(t *kernel.Task, io usermem.IO, args func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) { l.termiosMu.RLock() - defer l.termiosMu.RUnlock() - n, pushed, err := l.inQueue.read(ctx, dst, l) + n, pushed, notifyEcho, err := l.inQueue.read(ctx, dst, l) + l.termiosMu.RUnlock() if err != nil { return 0, err } if n > 0 { - l.masterWaiter.Notify(waiter.WritableEvents) + if notifyEcho { + l.masterWaiter.Notify(waiter.ReadableEvents | waiter.WritableEvents) + } else { + l.masterWaiter.Notify(waiter.WritableEvents) + } if pushed { l.replicaWaiter.Notify(waiter.ReadableEvents) } return n, nil + } else if notifyEcho { + l.masterWaiter.Notify(waiter.ReadableEvents) } return 0, syserror.ErrWouldBlock } func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) { l.termiosMu.RLock() - defer l.termiosMu.RUnlock() - n, err := l.inQueue.write(ctx, src, l) + n, notifyEcho, err := l.inQueue.write(ctx, src, l) + l.termiosMu.RUnlock() if err != nil { return 0, err } + if notifyEcho { + l.masterWaiter.Notify(waiter.ReadableEvents) + } if n > 0 { l.replicaWaiter.Notify(waiter.ReadableEvents) return n, nil @@ -214,8 +229,9 @@ func (l *lineDiscipline) outputQueueReadSize(t *kernel.Task, io usermem.IO, args func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) { l.termiosMu.RLock() - defer l.termiosMu.RUnlock() - n, pushed, err := l.outQueue.read(ctx, dst, l) + // Ignore notifyEcho, as it cannot happen when reading from the output queue. + n, pushed, _, err := l.outQueue.read(ctx, dst, l) + l.termiosMu.RUnlock() if err != nil { return 0, err } @@ -231,8 +247,9 @@ func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequ func (l *lineDiscipline) outputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) { l.termiosMu.RLock() - defer l.termiosMu.RUnlock() - n, err := l.outQueue.write(ctx, src, l) + // Ignore notifyEcho, as it cannot happen when writing to the output queue. + n, _, err := l.outQueue.write(ctx, src, l) + l.termiosMu.RUnlock() if err != nil { return 0, err } @@ -246,7 +263,8 @@ func (l *lineDiscipline) outputQueueWrite(ctx context.Context, src usermem.IOSeq // transformer is a helper interface to make it easier to stateify queue. type transformer interface { // transform functions require queue's mutex to be held. - transform(*lineDiscipline, *queue, []byte) int + // The boolean indicates whether there was any echoed bytes. + transform(*lineDiscipline, *queue, []byte) (int, bool) } // outputQueueTransformer implements transformer. It performs line discipline @@ -261,7 +279,7 @@ type outputQueueTransformer struct{} // Preconditions: // * l.termiosMu must be held for reading. // * q.mu must be held. -func (*outputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte) int { +func (*outputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte) (int, bool) { // transformOutput is effectively always in noncanonical mode, as the // master termios never has ICANON set. @@ -270,7 +288,7 @@ func (*outputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte if len(q.readBuf) > 0 { q.readable = true } - return len(buf) + return len(buf), false } var ret int @@ -321,7 +339,7 @@ func (*outputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte if len(q.readBuf) > 0 { q.readable = true } - return ret + return ret, false } // inputQueueTransformer implements transformer. It performs line discipline @@ -334,15 +352,17 @@ type inputQueueTransformer struct{} // transformed according to flags set in the termios struct. See // drivers/tty/n_tty.c:n_tty_receive_char_special for an analogous kernel // function. +// It returns an extra boolean indicating whether any characters need to be +// echoed, in which case we need to notify readers. // // Preconditions: // * l.termiosMu must be held for reading. // * q.mu must be held. -func (*inputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte) int { +func (*inputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte) (int, bool) { // If there's a line waiting to be read in canonical mode, don't write // anything else to the read buffer. if l.termios.LEnabled(linux.ICANON) && q.readable { - return 0 + return 0, false } maxBytes := nonCanonMaxBytes @@ -351,6 +371,7 @@ func (*inputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte) } var ret int + var notifyEcho bool for len(buf) > 0 && len(q.readBuf) < canonMaxBytes { size := l.peek(buf) cBytes := append([]byte{}, buf[:size]...) @@ -397,7 +418,7 @@ func (*inputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte) // Anything written to the readBuf will have to be echoed. if l.termios.LEnabled(linux.ECHO) { l.outQueue.writeBytes(cBytes, l) - l.masterWaiter.Notify(waiter.ReadableEvents) + notifyEcho = true } // If we finish a line, make it available for reading. @@ -412,7 +433,7 @@ func (*inputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte) q.readable = true } - return ret + return ret, notifyEcho } // shouldDiscard returns whether c should be discarded. In canonical mode, if diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go index 93c031c89..9a1a245dc 100644 --- a/pkg/sentry/fsimpl/devpts/master.go +++ b/pkg/sentry/fsimpl/devpts/master.go @@ -17,6 +17,7 @@ package devpts import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" @@ -24,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -80,7 +80,7 @@ func (mi *masterInode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs // SetStat implements kernfs.Inode.SetStat func (mi *masterInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { if opts.Stat.Mask&linux.STATX_SIZE != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } return mi.InodeAttrs.SetStat(ctx, vfsfs, creds, opts) } @@ -132,7 +132,7 @@ func (mfd *masterFileDescription) Ioctl(ctx context.Context, io usermem.IO, args t := kernel.TaskFromContext(ctx) if t == nil { // ioctl(2) may only be called from a task goroutine. - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } switch cmd := args[1].Uint(); cmd { @@ -177,7 +177,7 @@ func (mfd *masterFileDescription) Ioctl(ctx context.Context, io usermem.IO, args return mfd.t.setForegroundProcessGroup(ctx, args, true /* isMaster */) default: maybeEmitUnimplementedEvent(ctx, cmd) - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } } diff --git a/pkg/sentry/fsimpl/devpts/queue.go b/pkg/sentry/fsimpl/devpts/queue.go index 47b0f1599..ff1d89955 100644 --- a/pkg/sentry/fsimpl/devpts/queue.go +++ b/pkg/sentry/fsimpl/devpts/queue.go @@ -98,17 +98,19 @@ func (q *queue) readableSize(t *kernel.Task, io usermem.IO, args arch.SyscallArg } -// read reads from q to userspace. It returns the number of bytes read as well -// as whether the read caused more readable data to become available (whether +// read reads from q to userspace. It returns: +// - The number of bytes read +// - Whether the read caused more readable data to become available (whether // data was pushed from the wait buffer to the read buffer). +// - Whether any data was echoed back (need to notify readers). // // Preconditions: l.termiosMu must be held for reading. -func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipline) (int64, bool, error) { +func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipline) (int64, bool, bool, error) { q.mu.Lock() defer q.mu.Unlock() if !q.readable { - return 0, false, syserror.ErrWouldBlock + return 0, false, false, syserror.ErrWouldBlock } if dst.NumBytes() > canonMaxBytes { @@ -131,19 +133,20 @@ func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipl return n, nil })) if err != nil { - return 0, false, err + return 0, false, false, err } // Move data from the queue's wait buffer to its read buffer. - nPushed := q.pushWaitBufLocked(l) + nPushed, notifyEcho := q.pushWaitBufLocked(l) - return int64(n), nPushed > 0, nil + return int64(n), nPushed > 0, notifyEcho, nil } // write writes to q from userspace. +// The returned boolean indicates whether any data was echoed back. // // Preconditions: l.termiosMu must be held for reading. -func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscipline) (int64, error) { +func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscipline) (int64, bool, error) { q.mu.Lock() defer q.mu.Unlock() @@ -173,44 +176,49 @@ func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscip return n, nil })) if err != nil { - return 0, err + return 0, false, err } // Push data from the wait to the read buffer. - q.pushWaitBufLocked(l) + _, notifyEcho := q.pushWaitBufLocked(l) - return n, nil + return n, notifyEcho, nil } // writeBytes writes to q from b. +// The returned boolean indicates whether any data was echoed back. // // Preconditions: l.termiosMu must be held for reading. -func (q *queue) writeBytes(b []byte, l *lineDiscipline) { +func (q *queue) writeBytes(b []byte, l *lineDiscipline) bool { q.mu.Lock() defer q.mu.Unlock() // Write to the wait buffer. q.waitBufAppend(b) - q.pushWaitBufLocked(l) + _, notifyEcho := q.pushWaitBufLocked(l) + return notifyEcho } // pushWaitBufLocked fills the queue's read buffer with data from the wait // buffer. +// The returned boolean indicates whether any data was echoed back. // // Preconditions: // * l.termiosMu must be held for reading. // * q.mu must be locked. -func (q *queue) pushWaitBufLocked(l *lineDiscipline) int { +func (q *queue) pushWaitBufLocked(l *lineDiscipline) (int, bool) { if q.waitBufLen == 0 { - return 0 + return 0, false } // Move data from the wait to the read buffer. var total int var i int + var notifyEcho bool for i = 0; i < len(q.waitBuf); i++ { - n := q.transform(l, q, q.waitBuf[i]) + n, echo := q.transform(l, q, q.waitBuf[i]) total += n + notifyEcho = notifyEcho || echo if n != len(q.waitBuf[i]) { // The read buffer filled up without consuming the // entire buffer. @@ -223,7 +231,7 @@ func (q *queue) pushWaitBufLocked(l *lineDiscipline) int { q.waitBuf = q.waitBuf[i:] q.waitBufLen -= uint64(total) - return total + return total, notifyEcho } // Precondition: q.mu must be locked. diff --git a/pkg/sentry/fsimpl/devpts/replica.go b/pkg/sentry/fsimpl/devpts/replica.go index 96d2054cb..e251897b4 100644 --- a/pkg/sentry/fsimpl/devpts/replica.go +++ b/pkg/sentry/fsimpl/devpts/replica.go @@ -17,13 +17,13 @@ package devpts import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -92,7 +92,7 @@ func (ri *replicaInode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vf // SetStat implements kernfs.Inode.SetStat func (ri *replicaInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { if opts.Stat.Mask&linux.STATX_SIZE != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } return ri.InodeAttrs.SetStat(ctx, vfsfs, creds, opts) } @@ -141,7 +141,7 @@ func (rfd *replicaFileDescription) Ioctl(ctx context.Context, io usermem.IO, arg t := kernel.TaskFromContext(ctx) if t == nil { // ioctl(2) may only be called from a task goroutine. - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } switch cmd := args[1].Uint(); cmd { @@ -179,7 +179,7 @@ func (rfd *replicaFileDescription) Ioctl(ctx context.Context, io usermem.IO, arg return rfd.inode.t.setForegroundProcessGroup(ctx, args, false /* isMaster */) default: maybeEmitUnimplementedEvent(ctx, cmd) - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } } diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD index 2dbc6bfd5..5e8b464a0 100644 --- a/pkg/sentry/fsimpl/ext/BUILD +++ b/pkg/sentry/fsimpl/ext/BUILD @@ -47,6 +47,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fd", "//pkg/fspath", "//pkg/log", @@ -88,13 +89,13 @@ go_test( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/marshal/primitive", "//pkg/sentry/contexttest", "//pkg/sentry/fsimpl/ext/disklayout", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", - "//pkg/syserror", "//pkg/test/testutil", "//pkg/usermem", "@com_github_google_go_cmp//cmp:go_default_library", diff --git a/pkg/sentry/fsimpl/ext/block_map_file.go b/pkg/sentry/fsimpl/ext/block_map_file.go index 1165234f9..79719faed 100644 --- a/pkg/sentry/fsimpl/ext/block_map_file.go +++ b/pkg/sentry/fsimpl/ext/block_map_file.go @@ -18,6 +18,7 @@ import ( "io" "math" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/syserror" ) @@ -84,7 +85,7 @@ func (f *blockMapFile) ReadAt(dst []byte, off int64) (int, error) { } if off < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } offset := uint64(off) diff --git a/pkg/sentry/fsimpl/ext/directory.go b/pkg/sentry/fsimpl/ext/directory.go index 512b70ede..cc067c20e 100644 --- a/pkg/sentry/fsimpl/ext/directory.go +++ b/pkg/sentry/fsimpl/ext/directory.go @@ -17,12 +17,12 @@ package ext import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // directory represents a directory inode. It holds the childList in memory. @@ -218,7 +218,7 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba // Seek implements vfs.FileDescriptionImpl.Seek. func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { if whence != linux.SEEK_SET && whence != linux.SEEK_CUR { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } dir := fd.inode().impl.(*directory) @@ -234,7 +234,7 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in if offset < 0 { // lseek(2) specifies that EINVAL should be returned if the resulting offset // is negative. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } n := int64(len(dir.childMap)) diff --git a/pkg/sentry/fsimpl/ext/ext.go b/pkg/sentry/fsimpl/ext/ext.go index 38fb7962b..80854b501 100644 --- a/pkg/sentry/fsimpl/ext/ext.go +++ b/pkg/sentry/fsimpl/ext/ext.go @@ -22,12 +22,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // Name is the name of this filesystem. @@ -133,13 +133,13 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // mount(2) specifies that EINVAL should be returned if the superblock is // invalid. fs.vfsfs.DecRef(ctx) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } // Refuse to mount if the filesystem is incompatible. if !isCompatible(fs.sb) { fs.vfsfs.DecRef(ctx) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } fs.bgs, err = readBlockGroups(dev, fs.sb) diff --git a/pkg/sentry/fsimpl/ext/ext_test.go b/pkg/sentry/fsimpl/ext/ext_test.go index d9fd4590c..db712e71f 100644 --- a/pkg/sentry/fsimpl/ext/ext_test.go +++ b/pkg/sentry/fsimpl/ext/ext_test.go @@ -26,12 +26,12 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/pkg/usermem" ) @@ -173,7 +173,7 @@ func TestSeek(t *testing.T) { } // EINVAL should be returned if the resulting offset is negative. - if _, err := fd.Seek(ctx, -1, linux.SEEK_SET); err != syserror.EINVAL { + if _, err := fd.Seek(ctx, -1, linux.SEEK_SET); !linuxerr.Equals(linuxerr.EINVAL, err) { t.Errorf("expected error EINVAL but got %v", err) } @@ -187,7 +187,7 @@ func TestSeek(t *testing.T) { } // EINVAL should be returned if the resulting offset is negative. - if _, err := fd.Seek(ctx, -(size + 2), linux.SEEK_CUR); err != syserror.EINVAL { + if _, err := fd.Seek(ctx, -(size + 2), linux.SEEK_CUR); !linuxerr.Equals(linuxerr.EINVAL, err) { t.Errorf("expected error EINVAL but got %v", err) } @@ -204,7 +204,7 @@ func TestSeek(t *testing.T) { } // EINVAL should be returned if the resulting offset is negative. - if _, err := fd.Seek(ctx, -(size + 1), linux.SEEK_END); err != syserror.EINVAL { + if _, err := fd.Seek(ctx, -(size + 1), linux.SEEK_END); !linuxerr.Equals(linuxerr.EINVAL, err) { t.Errorf("expected error EINVAL but got %v", err) } } diff --git a/pkg/sentry/fsimpl/ext/extent_file.go b/pkg/sentry/fsimpl/ext/extent_file.go index 778460107..f449bc8bd 100644 --- a/pkg/sentry/fsimpl/ext/extent_file.go +++ b/pkg/sentry/fsimpl/ext/extent_file.go @@ -18,6 +18,7 @@ import ( "io" "sort" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/syserror" ) @@ -65,7 +66,7 @@ func (f *extentFile) buildExtTree() error { if f.root.Header.NumEntries > 4 { // read(2) specifies that EINVAL should be returned if the file is unsuitable // for reading. - return syserror.EINVAL + return linuxerr.EINVAL } f.root.Entries = make([]disklayout.ExtentEntryPair, f.root.Header.NumEntries) @@ -145,7 +146,7 @@ func (f *extentFile) ReadAt(dst []byte, off int64) (int, error) { } if off < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if uint64(off) >= f.regFile.inode.diskInode.Size() { diff --git a/pkg/sentry/fsimpl/ext/file_description.go b/pkg/sentry/fsimpl/ext/file_description.go index 90b086468..2e9033c1d 100644 --- a/pkg/sentry/fsimpl/ext/file_description.go +++ b/pkg/sentry/fsimpl/ext/file_description.go @@ -17,8 +17,8 @@ package ext import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // fileDescription is embedded by ext implementations of @@ -49,7 +49,7 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) if opts.Stat.Mask == 0 { return nil } - return syserror.EPERM + return linuxerr.EPERM } // SetStat implements vfs.FileDescriptionImpl.StatFS. diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go index d4fc484a2..bcc7588da 100644 --- a/pkg/sentry/fsimpl/ext/filesystem.go +++ b/pkg/sentry/fsimpl/ext/filesystem.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -88,7 +89,7 @@ var _ vfs.FilesystemImpl = (*filesystem)(nil) // * inode == vfsd.Impl().(*Dentry).inode. func stepLocked(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode, write bool) (*vfs.Dentry, *inode, error) { if !inode.isDir() { - return nil, nil, syserror.ENOTDIR + return nil, nil, linuxerr.ENOTDIR } if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { return nil, nil, err @@ -180,7 +181,7 @@ func walkLocked(ctx context.Context, rp *vfs.ResolvingPath, write bool) (*vfs.De } } if rp.MustBeDir() && !inode.isDir() { - return nil, nil, syserror.ENOTDIR + return nil, nil, linuxerr.ENOTDIR } return vfsd, inode, nil } @@ -209,7 +210,7 @@ func walkParentLocked(ctx context.Context, rp *vfs.ResolvingPath, write bool) (* } } if !inode.isDir() { - return nil, nil, syserror.ENOTDIR + return nil, nil, linuxerr.ENOTDIR } return vfsd, inode, nil } @@ -301,7 +302,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op if opts.CheckSearchable { if !inode.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { return nil, err @@ -331,7 +332,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf // EROFS is returned if write access is needed. if vfs.MayWriteFileWithOpenFlags(opts.Flags) || opts.Flags&(linux.O_CREAT|linux.O_EXCL|linux.O_TMPFILE) != 0 { - return nil, syserror.EROFS + return nil, linuxerr.EROFS } return inode.open(rp, vfsd, &opts) } @@ -344,7 +345,7 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st } symlink, ok := inode.impl.(*symlink) if !ok { - return "", syserror.EINVAL + return "", linuxerr.EINVAL } return symlink.target, nil } @@ -389,33 +390,33 @@ func (fs *filesystem) Sync(ctx context.Context) error { // LinkAt implements vfs.FilesystemImpl.LinkAt. func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { if rp.Done() { - return syserror.EEXIST + return linuxerr.EEXIST } if _, _, err := fs.walk(ctx, rp, true); err != nil { return err } - return syserror.EROFS + return linuxerr.EROFS } // MkdirAt implements vfs.FilesystemImpl.MkdirAt. func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { if rp.Done() { - return syserror.EEXIST + return linuxerr.EEXIST } if _, _, err := fs.walk(ctx, rp, true); err != nil { return err } - return syserror.EROFS + return linuxerr.EROFS } // MknodAt implements vfs.FilesystemImpl.MknodAt. func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { if rp.Done() { - return syserror.EEXIST + return linuxerr.EEXIST } _, _, err := fs.walk(ctx, rp, true) @@ -423,7 +424,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return err } - return syserror.EROFS + return linuxerr.EROFS } // RenameAt implements vfs.FilesystemImpl.RenameAt. @@ -437,7 +438,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa return err } - return syserror.EROFS + return linuxerr.EROFS } // RmdirAt implements vfs.FilesystemImpl.RmdirAt. @@ -448,10 +449,10 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error } if !inode.isDir() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } - return syserror.EROFS + return linuxerr.EROFS } // SetStatAt implements vfs.FilesystemImpl.SetStatAt. @@ -461,13 +462,13 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts return err } - return syserror.EROFS + return linuxerr.EROFS } // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { if rp.Done() { - return syserror.EEXIST + return linuxerr.EEXIST } _, _, err := fs.walk(ctx, rp, true) @@ -475,7 +476,7 @@ func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ return err } - return syserror.EROFS + return linuxerr.EROFS } // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. @@ -489,7 +490,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error return syserror.EISDIR } - return syserror.EROFS + return linuxerr.EROFS } // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt. @@ -503,7 +504,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath } // TODO(b/134676337): Support sockets. - return nil, syserror.ECONNREFUSED + return nil, linuxerr.ECONNREFUSED } // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. @@ -512,7 +513,7 @@ func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, si if err != nil { return nil, err } - return nil, syserror.ENOTSUP + return nil, linuxerr.ENOTSUP } // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. @@ -521,7 +522,7 @@ func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt if err != nil { return "", err } - return "", syserror.ENOTSUP + return "", linuxerr.ENOTSUP } // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. @@ -530,7 +531,7 @@ func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt if err != nil { return err } - return syserror.ENOTSUP + return linuxerr.ENOTSUP } // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. @@ -539,7 +540,7 @@ func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, if err != nil { return err } - return syserror.ENOTSUP + return linuxerr.ENOTSUP } // PrependPath implements vfs.FilesystemImpl.PrependPath. diff --git a/pkg/sentry/fsimpl/ext/inode.go b/pkg/sentry/fsimpl/ext/inode.go index 4a555bf72..46658f855 100644 --- a/pkg/sentry/fsimpl/ext/inode.go +++ b/pkg/sentry/fsimpl/ext/inode.go @@ -19,6 +19,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -147,7 +148,7 @@ func newInode(fs *filesystem, inodeNum uint32) (*inode, error) { return &f.inode, nil default: // TODO(b/134676337): Return appropriate errors for sockets, pipes and devices. - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } } @@ -196,7 +197,7 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts *vfs.OpenOpt case *symlink: if opts.Flags&linux.O_PATH == 0 { // Can't open symlinks without O_PATH. - return nil, syserror.ELOOP + return nil, linuxerr.ELOOP } var fd symlinkFD fd.LockFD.Init(&in.locks) diff --git a/pkg/sentry/fsimpl/ext/regular_file.go b/pkg/sentry/fsimpl/ext/regular_file.go index 5ad9befcd..6613f0e1d 100644 --- a/pkg/sentry/fsimpl/ext/regular_file.go +++ b/pkg/sentry/fsimpl/ext/regular_file.go @@ -19,11 +19,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -110,7 +110,7 @@ func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { // write(2) specifies that EBADF must be returned if the fd is not open for // writing. - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // Write implements vfs.FileDescriptionImpl.Write. @@ -124,7 +124,7 @@ func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts // IterDirents implements vfs.FileDescriptionImpl.IterDirents. func (fd *regularFileFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Seek implements vfs.FileDescriptionImpl.Seek. @@ -139,10 +139,10 @@ func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) ( case linux.SEEK_END: offset += int64(fd.inode().diskInode.Size()) default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } fd.off = offset return offset, nil @@ -151,5 +151,5 @@ func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) ( // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { // TODO(b/134676337): Implement mmap(2). - return syserror.ENODEV + return linuxerr.ENODEV } diff --git a/pkg/sentry/fsimpl/ext/symlink.go b/pkg/sentry/fsimpl/ext/symlink.go index 5e2bcc837..385651dc3 100644 --- a/pkg/sentry/fsimpl/ext/symlink.go +++ b/pkg/sentry/fsimpl/ext/symlink.go @@ -16,9 +16,9 @@ package ext import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -81,35 +81,35 @@ func (fd *symlinkFD) Release(context.Context) {} // PRead implements vfs.FileDescriptionImpl.PRead. func (fd *symlinkFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // Read implements vfs.FileDescriptionImpl.Read. func (fd *symlinkFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // PWrite implements vfs.FileDescriptionImpl.PWrite. func (fd *symlinkFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // Write implements vfs.FileDescriptionImpl.Write. func (fd *symlinkFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // IterDirents implements vfs.FileDescriptionImpl.IterDirents. func (fd *symlinkFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Seek implements vfs.FileDescriptionImpl.Seek. func (fd *symlinkFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. func (fd *symlinkFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { - return syserror.EBADF + return linuxerr.EBADF } diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD index 3a4777fbe..871df5984 100644 --- a/pkg/sentry/fsimpl/fuse/BUILD +++ b/pkg/sentry/fsimpl/fuse/BUILD @@ -46,6 +46,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/marshal", @@ -76,6 +77,7 @@ go_test( library = ":fuse", deps = [ "//pkg/abi/linux", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/marshal", "//pkg/sentry/fsimpl/testutil", diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go index 077bf9307..d404edaf0 100644 --- a/pkg/sentry/fsimpl/fuse/connection.go +++ b/pkg/sentry/fsimpl/fuse/connection.go @@ -19,9 +19,9 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -252,11 +252,11 @@ func (conn *connection) Call(t *kernel.Task, r *Request) (*Response, error) { } if !conn.connected { - return nil, syserror.ENOTCONN + return nil, linuxerr.ENOTCONN } if conn.connInitError { - return nil, syserror.ECONNREFUSED + return nil, linuxerr.ECONNREFUSED } fut, err := conn.callFuture(t, r) @@ -306,7 +306,7 @@ func (conn *connection) callFutureLocked(t *kernel.Task, r *Request) (*futureRes conn.mu.Unlock() // we checked connected before, // this must be due to aborted connection. - return nil, syserror.ECONNABORTED + return nil, linuxerr.ECONNABORTED } conn.mu.Unlock() diff --git a/pkg/sentry/fsimpl/fuse/connection_test.go b/pkg/sentry/fsimpl/fuse/connection_test.go index 78ea6a31e..1fddd858e 100644 --- a/pkg/sentry/fsimpl/fuse/connection_test.go +++ b/pkg/sentry/fsimpl/fuse/connection_test.go @@ -19,9 +19,9 @@ import ( "testing" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" ) // TestConnectionInitBlock tests if initialization @@ -104,7 +104,7 @@ func TestConnectionAbort(t *testing.T) { // After abort, Call() should return directly with ENOTCONN. req := conn.NewRequest(creds, 0, 0, 0, testObj) _, err = conn.Call(task, req) - if err != syserror.ENOTCONN { + if !linuxerr.Equals(linuxerr.ENOTCONN, err) { t.Fatalf("Incorrect error code received for Call() after connection aborted") } diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go index 5d2bae14e..dab1e779d 100644 --- a/pkg/sentry/fsimpl/fuse/dev.go +++ b/pkg/sentry/fsimpl/fuse/dev.go @@ -18,6 +18,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -122,7 +123,7 @@ func (fd *DeviceFD) Release(ctx context.Context) { func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. if fd.fs == nil { - return 0, syserror.EPERM + return 0, linuxerr.EPERM } return 0, syserror.ENOSYS @@ -132,7 +133,7 @@ func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset in func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. if fd.fs == nil { - return 0, syserror.EPERM + return 0, linuxerr.EPERM } // We require that any Read done on this filesystem have a sane minimum @@ -149,7 +150,7 @@ func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.R // If the read buffer is too small, error out. if dst.NumBytes() < int64(minBuffSize) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } fd.mu.Lock() @@ -234,7 +235,7 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts func (fd *DeviceFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. if fd.fs == nil { - return 0, syserror.EPERM + return 0, linuxerr.EPERM } return 0, syserror.ENOSYS @@ -251,12 +252,12 @@ func (fd *DeviceFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs. func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. if fd.fs == nil { - return 0, syserror.EPERM + return 0, linuxerr.EPERM } // Return ENODEV if the filesystem is umounted. if fd.fs.umounted { - return 0, syserror.ENODEV + return 0, linuxerr.ENODEV } var cn, n int64 @@ -293,7 +294,7 @@ func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opt // Assert that the header isn't read into the writeBuf yet. if fd.writeCursor >= hdrLen { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // We don't have the full common response header yet. @@ -322,7 +323,7 @@ func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opt if !ok { // Server sent us a response for a request we never sent, // or for which we already received a reply (e.g. aborted), an unlikely event. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } delete(fd.completions, hdr.Unique) @@ -391,7 +392,7 @@ func (fd *DeviceFD) EventUnregister(e *waiter.Entry) { func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. if fd.fs == nil { - return 0, syserror.EPERM + return 0, linuxerr.EPERM } return 0, syserror.ENOSYS @@ -434,7 +435,7 @@ func (fd *DeviceFD) sendError(ctx context.Context, errno int32, unique linux.FUS if !ok { // A response for a request we never sent, // or for which we already received a reply (e.g. aborted). - return syserror.EINVAL + return linuxerr.EINVAL } delete(fd.completions, respHdr.Unique) diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go index 167c899e2..172cbd88f 100644 --- a/pkg/sentry/fsimpl/fuse/fusefs.go +++ b/pkg/sentry/fsimpl/fuse/fusefs.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" @@ -121,30 +122,30 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt deviceDescriptorStr, ok := mopts["fd"] if !ok { ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option fd missing") - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } delete(mopts, "fd") deviceDescriptor, err := strconv.ParseInt(deviceDescriptorStr, 10 /* base */, 32 /* bitSize */) if err != nil { ctx.Debugf("fusefs.FilesystemType.GetFilesystem: invalid fd: %q (%v)", deviceDescriptorStr, err) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } kernelTask := kernel.TaskFromContext(ctx) if kernelTask == nil { log.Warningf("%s.GetFilesystem: couldn't get kernel task from context", fsType.Name()) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } fuseFDGeneric := kernelTask.GetFileVFS2(int32(deviceDescriptor)) if fuseFDGeneric == nil { - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } defer fuseFDGeneric.DecRef(ctx) fuseFD, ok := fuseFDGeneric.Impl().(*DeviceFD) if !ok { log.Warningf("%s.GetFilesystem: device FD is %T, not a FUSE device", fsType.Name, fuseFDGeneric) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } // Parse and set all the other supported FUSE mount options. @@ -154,17 +155,17 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt uid, err := strconv.ParseUint(uidStr, 10, 32) if err != nil { log.Warningf("%s.GetFilesystem: invalid user_id: user_id=%s", fsType.Name(), uidStr) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } kuid := creds.UserNamespace.MapToKUID(auth.UID(uid)) if !kuid.Ok() { ctx.Warningf("fusefs.FilesystemType.GetFilesystem: unmapped uid: %d", uid) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } fsopts.uid = kuid } else { ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option user_id missing") - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } if gidStr, ok := mopts["group_id"]; ok { @@ -172,17 +173,17 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt gid, err := strconv.ParseUint(gidStr, 10, 32) if err != nil { log.Warningf("%s.GetFilesystem: invalid group_id: group_id=%s", fsType.Name(), gidStr) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } kgid := creds.UserNamespace.MapToKGID(auth.GID(gid)) if !kgid.Ok() { ctx.Warningf("fusefs.FilesystemType.GetFilesystem: unmapped gid: %d", gid) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } fsopts.gid = kgid } else { ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option group_id missing") - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } if modeStr, ok := mopts["rootmode"]; ok { @@ -190,12 +191,12 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt mode, err := strconv.ParseUint(modeStr, 8, 32) if err != nil { log.Warningf("%s.GetFilesystem: invalid mode: %q", fsType.Name(), modeStr) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } fsopts.rootMode = linux.FileMode(mode) } else { ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option rootmode missing") - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } // Set the maxInFlightRequests option. @@ -206,7 +207,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt maxRead, err := strconv.ParseUint(maxReadStr, 10, 32) if err != nil { log.Warningf("%s.GetFilesystem: invalid max_read: max_read=%s", fsType.Name(), maxReadStr) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } if maxRead < fuseMinMaxRead { maxRead = fuseMinMaxRead @@ -229,7 +230,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // Check for unparsed options. if len(mopts) != 0 { log.Warningf("%s.GetFilesystem: unsupported or unknown options: %v", fsType.Name(), mopts) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } // Create a new FUSE filesystem. @@ -258,7 +259,7 @@ func newFUSEFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, fsTyp conn, err := newFUSEConnection(ctx, fuseFD, opts) if err != nil { log.Warningf("fuse.NewFUSEFilesystem: NewFUSEConnection failed with error: %v", err) - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } fs := &filesystem{ @@ -375,7 +376,7 @@ func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, a creds.RealKGID != i.fs.opts.gid || creds.EffectiveKGID != i.fs.opts.gid || creds.SavedKGID != i.fs.opts.gid { - return syserror.EACCES + return linuxerr.EACCES } } @@ -393,10 +394,10 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentr isDir := i.InodeAttrs.Mode().IsDir() // return error if specified to open directory but inode is not a directory. if !isDir && opts.Mode.IsDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } if opts.Flags&linux.O_LARGEFILE == 0 && atomic.LoadUint64(&i.size) > linux.MAX_NON_LFS { - return nil, syserror.EOVERFLOW + return nil, linuxerr.EOVERFLOW } var fd *fileDescription @@ -418,7 +419,7 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentr kernelTask := kernel.TaskFromContext(ctx) if kernelTask == nil { log.Warningf("fusefs.Inode.Open: couldn't get kernel task from context") - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } // Build the request. @@ -440,7 +441,7 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentr if err != nil { return nil, err } - if err := res.Error(); err == syserror.ENOSYS && !isDir { + if err := res.Error(); linuxerr.Equals(linuxerr.ENOSYS, err) && !isDir { i.fs.conn.noOpen = true } else if err != nil { return nil, err @@ -512,7 +513,7 @@ func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) kernelTask := kernel.TaskFromContext(ctx) if kernelTask == nil { log.Warningf("fusefs.Inode.NewFile: couldn't get kernel task from context", i.nodeID) - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } in := linux.FUSECreateIn{ CreateMeta: linux.FUSECreateMeta{ @@ -552,7 +553,7 @@ func (i *inode) Unlink(ctx context.Context, name string, child kernfs.Inode) err kernelTask := kernel.TaskFromContext(ctx) if kernelTask == nil { log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.nodeID) - return syserror.EINVAL + return linuxerr.EINVAL } in := linux.FUSEUnlinkIn{Name: name} req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, linux.FUSE_UNLINK, &in) @@ -596,7 +597,7 @@ func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMo kernelTask := kernel.TaskFromContext(ctx) if kernelTask == nil { log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.nodeID) - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, opcode, payload) res, err := i.fs.conn.Call(kernelTask, req) @@ -626,13 +627,13 @@ func (i *inode) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, // Readlink implements kernfs.Inode.Readlink. func (i *inode) Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) { if i.Mode().FileType()&linux.S_IFLNK == 0 { - return "", syserror.EINVAL + return "", linuxerr.EINVAL } if len(i.link) == 0 { kernelTask := kernel.TaskFromContext(ctx) if kernelTask == nil { log.Warningf("fusefs.Inode.Readlink: couldn't get kernel task from context") - return "", syserror.EINVAL + return "", linuxerr.EINVAL } req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, linux.FUSE_READLINK, &linux.FUSEEmptyIn{}) res, err := i.fs.conn.Call(kernelTask, req) @@ -728,7 +729,7 @@ func (i *inode) getAttr(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOp task := kernel.TaskFromContext(ctx) if task == nil { log.Warningf("couldn't get kernel task from context") - return linux.FUSEAttr{}, syserror.EINVAL + return linux.FUSEAttr{}, linuxerr.EINVAL } creds := auth.CredentialsFromContext(ctx) @@ -833,7 +834,7 @@ func (i *inode) setAttr(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre task := kernel.TaskFromContext(ctx) if task == nil { log.Warningf("couldn't get kernel task from context") - return syserror.EINVAL + return linuxerr.EINVAL } // We should retain the original file type when assigning new mode. diff --git a/pkg/sentry/fsimpl/fuse/read_write.go b/pkg/sentry/fsimpl/fuse/read_write.go index 66ea889f9..35d0ab6f4 100644 --- a/pkg/sentry/fsimpl/fuse/read_write.go +++ b/pkg/sentry/fsimpl/fuse/read_write.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -39,7 +40,7 @@ func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off ui t := kernel.TaskFromContext(ctx) if t == nil { log.Warningf("fusefs.Read: couldn't get kernel task from context") - return nil, 0, syserror.EINVAL + return nil, 0, linuxerr.EINVAL } // Round up to a multiple of page size. @@ -155,7 +156,7 @@ func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, off uint64, t := kernel.TaskFromContext(ctx) if t == nil { log.Warningf("fusefs.Read: couldn't get kernel task from context") - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // One request cannnot exceed either maxWrite or maxPages. diff --git a/pkg/sentry/fsimpl/fuse/regular_file.go b/pkg/sentry/fsimpl/fuse/regular_file.go index 5bdd096c3..6c4de3507 100644 --- a/pkg/sentry/fsimpl/fuse/regular_file.go +++ b/pkg/sentry/fsimpl/fuse/regular_file.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -39,14 +40,14 @@ type regularFileFD struct { // PRead implements vfs.FileDescriptionImpl.PRead. func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Check that flags are supported. // // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } size := dst.NumBytes() @@ -56,7 +57,7 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs } else if size > math.MaxUint32 { // FUSE only supports uint32 for size. // Overflow. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // TODO(gvisor.dev/issue/3678): Add direct IO support. @@ -143,14 +144,14 @@ func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts // final offset should be ignored by PWrite. func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) { if offset < 0 { - return 0, offset, syserror.EINVAL + return 0, offset, linuxerr.EINVAL } // Check that flags are supported. // // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { - return 0, offset, syserror.EOPNOTSUPP + return 0, offset, linuxerr.EOPNOTSUPP } inode := fd.inode() @@ -171,11 +172,11 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off if srclen > math.MaxUint32 { // FUSE only supports uint32 for size. // Overflow. - return 0, offset, syserror.EINVAL + return 0, offset, linuxerr.EINVAL } if end := offset + srclen; end < offset { // Overflow. - return 0, offset, syserror.EINVAL + return 0, offset, linuxerr.EINVAL } srclen, err = vfs.CheckLimit(ctx, offset, srclen) diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD index 368272f12..752060044 100644 --- a/pkg/sentry/fsimpl/gofer/BUILD +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -49,6 +49,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fd", "//pkg/fdnotifier", "//pkg/fspath", diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go index 177e42649..5c48a9fee 100644 --- a/pkg/sentry/fsimpl/gofer/directory.go +++ b/pkg/sentry/fsimpl/gofer/directory.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/refsvfs2" @@ -28,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) func (d *dentry) isDir() bool { @@ -297,7 +297,7 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in switch whence { case linux.SEEK_SET: if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if offset == 0 { // Ensure that the next call to fd.IterDirents() calls @@ -309,13 +309,13 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in case linux.SEEK_CUR: offset += fd.off if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Don't clear fd.dirents in this case, even if offset == 0. fd.off = offset return fd.off, nil default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } } diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index eb09d54c3..05b776c2e 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" @@ -38,26 +39,14 @@ import ( // Sync implements vfs.FilesystemImpl.Sync. func (fs *filesystem) Sync(ctx context.Context) error { // Snapshot current syncable dentries and special file FDs. - fs.renameMu.RLock() fs.syncMu.Lock() ds := make([]*dentry, 0, len(fs.syncableDentries)) for d := range fs.syncableDentries { - // It's safe to use IncRef here even though fs.syncableDentries doesn't - // hold references since we hold fs.renameMu. Note that we can't use - // TryIncRef since cached dentries at zero references should still be - // synced. - d.IncRef() ds = append(ds, d) } - fs.renameMu.RUnlock() sffds := make([]*specialFileFD, 0, len(fs.specialFileFDs)) for sffd := range fs.specialFileFDs { - // As above, fs.specialFileFDs doesn't hold references. However, unlike - // dentries, an FD that has reached zero references can't be - // resurrected, so we can use TryIncRef. - if sffd.vfsfd.TryIncRef() { - sffds = append(sffds, sffd) - } + sffds = append(sffds, sffd) } fs.syncMu.Unlock() @@ -67,9 +56,7 @@ func (fs *filesystem) Sync(ctx context.Context) error { // Sync syncable dentries. for _, d := range ds { - err := d.syncCachedFile(ctx, true /* forFilesystemSync */) - d.DecRef(ctx) - if err != nil { + if err := d.syncCachedFile(ctx, true /* forFilesystemSync */); err != nil { ctx.Infof("gofer.filesystem.Sync: dentry.syncCachedFile failed: %v", err) if retErr == nil { retErr = err @@ -80,9 +67,7 @@ func (fs *filesystem) Sync(ctx context.Context) error { // Sync special files, which may be writable but do not use dentry shared // handles (so they won't be synced by the above). for _, sffd := range sffds { - err := sffd.sync(ctx, true /* forFilesystemSync */) - sffd.vfsfd.DecRef(ctx) - if err != nil { + if err := sffd.sync(ctx, true /* forFilesystemSync */); err != nil { ctx.Infof("gofer.filesystem.Sync: specialFileFD.sync failed: %v", err) if retErr == nil { retErr = err @@ -146,6 +131,7 @@ func putDentrySlice(ds *[]*dentry) { // but dentry slices are allocated lazily, and it's much easier to say "defer // fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() { // fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this. +// +checklocksrelease:fs.renameMu func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, dsp **[]*dentry) { fs.renameMu.RUnlock() if *dsp == nil { @@ -158,6 +144,7 @@ func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, dsp ** putDentrySlice(*dsp) } +// +checklocksrelease:fs.renameMu func (fs *filesystem) renameMuUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) { if *ds == nil { fs.renameMu.Unlock() @@ -186,7 +173,7 @@ func (fs *filesystem) renameMuUnlockAndCheckCaching(ctx context.Context, ds **[] // Postconditions: The returned dentry's cached metadata is up to date. func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, bool, error) { if !d.isDir() { - return nil, false, syserror.ENOTDIR + return nil, false, linuxerr.ENOTDIR } if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { return nil, false, err @@ -244,7 +231,7 @@ afterSymlink: // * dentry at name has been revalidated func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, error) { if len(name) > maxFilenameLen { - return nil, syserror.ENAMETOOLONG + return nil, linuxerr.ENAMETOOLONG } if child, ok := parent.children[name]; ok || parent.isSynthetic() { if child == nil { @@ -255,7 +242,7 @@ func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name s qid, file, attrMask, attr, err := parent.file.walkGetAttrOne(ctx, name) if err != nil { - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { parent.cacheNegativeLookupLocked(name) } return nil, err @@ -302,7 +289,7 @@ func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving } } if !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } return d, nil } @@ -330,7 +317,7 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, } } if rp.MustBeDir() && !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } return d, nil } @@ -359,7 +346,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir } name := rp.Component() if name == "." || name == ".." { - return syserror.EEXIST + return linuxerr.EEXIST } if parent.isDeleted() { return syserror.ENOENT @@ -372,20 +359,20 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir defer parent.dirMu.Unlock() if len(name) > maxFilenameLen { - return syserror.ENAMETOOLONG + return linuxerr.ENAMETOOLONG } // Check for existence only if caching information is available. Otherwise, // don't check for existence just yet. We will check for existence if the // checks for writability fail below. Existence check is done by the creation // RPCs themselves. if child, ok := parent.children[name]; ok && child != nil { - return syserror.EEXIST + return linuxerr.EEXIST } checkExistence := func() error { - if child, err := fs.getChildLocked(ctx, parent, name, &ds); err != nil && err != syserror.ENOENT { + if child, err := fs.getChildLocked(ctx, parent, name, &ds); err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) { return err } else if child != nil { - return syserror.EEXIST + return linuxerr.EEXIST } return nil } @@ -412,7 +399,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir } if parent.isSynthetic() { if createInSyntheticDir == nil { - return syserror.EPERM + return linuxerr.EPERM } if err := createInSyntheticDir(parent, name); err != nil { return err @@ -469,10 +456,10 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b name := rp.Component() if dir { if name == "." { - return syserror.EINVAL + return linuxerr.EINVAL } if name == ".." { - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } } else { if name == "." || name == ".." { @@ -539,8 +526,8 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b if child.syntheticChildren != 0 { // This is definitely not an empty directory, irrespective of // fs.opts.interop. - vfsObj.AbortDeleteDentry(&child.vfsd) - return syserror.ENOTEMPTY + vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: PrepareDeleteDentry called if child != nil. + return linuxerr.ENOTEMPTY } // If InteropModeShared is in effect and the first call to // PrepareDeleteDentry above succeeded, then child wasn't @@ -549,13 +536,13 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b // still exist) would be a waste of time. if child.cachedMetadataAuthoritative() { if !child.isDir() { - vfsObj.AbortDeleteDentry(&child.vfsd) - return syserror.ENOTDIR + vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. + return linuxerr.ENOTDIR } for _, grandchild := range child.children { if grandchild != nil { - vfsObj.AbortDeleteDentry(&child.vfsd) - return syserror.ENOTEMPTY + vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. + return linuxerr.ENOTEMPTY } } } @@ -564,14 +551,14 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b } else { // child must be a non-directory file. if child != nil && child.isDir() { - vfsObj.AbortDeleteDentry(&child.vfsd) + vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. return syserror.EISDIR } if rp.MustBeDir() { if child != nil { - vfsObj.AbortDeleteDentry(&child.vfsd) + vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. } - return syserror.ENOTDIR + return linuxerr.ENOTDIR } } if parent.isSynthetic() { @@ -582,7 +569,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b err = parent.file.unlinkAt(ctx, name, flags) if err != nil { if child != nil { - vfsObj.AbortDeleteDentry(&child.vfsd) + vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. } return err } @@ -600,7 +587,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b } if child != nil { - vfsObj.CommitDeleteDentry(ctx, &child.vfsd) + vfsObj.CommitDeleteDentry(ctx, &child.vfsd) // +checklocksforce: see above. child.setDeleted() if child.isSynthetic() { parent.syntheticChildren-- @@ -642,7 +629,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op } if opts.CheckSearchable { if !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { return nil, err @@ -674,11 +661,11 @@ func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, _ **[]*dentry) error { if rp.Mount() != vd.Mount() { - return syserror.EXDEV + return linuxerr.EXDEV } d := vd.Dentry().Impl().(*dentry) if d.isDir() { - return syserror.EPERM + return linuxerr.EPERM } gid := auth.KGID(atomic.LoadUint32(&d.gid)) uid := auth.KUID(atomic.LoadUint32(&d.uid)) @@ -690,7 +677,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.ENOENT } if d.nlink == math.MaxUint32 { - return syserror.EMLINK + return linuxerr.EMLINK } if err := parent.file.link(ctx, d.file, childName); err != nil { return err @@ -715,7 +702,7 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v mode |= linux.S_ISGID } if _, err := parent.file.mkdir(ctx, name, p9.FileMode(mode), (p9.UID)(creds.EffectiveKUID), p9.GID(kgid)); err != nil { - if !opts.ForSyntheticMountpoint || err == syserror.EEXIST { + if !opts.ForSyntheticMountpoint || linuxerr.Equals(linuxerr.EEXIST, err) { return err } ctx.Infof("Failed to create remote directory %q: %v; falling back to synthetic directory", name, err) @@ -734,7 +721,7 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v }, func(parent *dentry, name string) error { if !opts.ForSyntheticMountpoint { // Can't create non-synthetic files in synthetic directories. - return syserror.EPERM + return linuxerr.EPERM } parent.createSyntheticChildLocked(&createSyntheticOpts{ name: name, @@ -752,7 +739,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) error { creds := rp.Credentials() _, err := parent.file.mknod(ctx, name, (p9.FileMode)(opts.Mode), opts.DevMajor, opts.DevMinor, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) - if err != syserror.EPERM { + if !linuxerr.Equals(linuxerr.EPERM, err) { return err } @@ -764,8 +751,8 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v switch { case err == nil: // Step succeeded, another file exists. - return syserror.EEXIST - case err != syserror.ENOENT: + return linuxerr.EEXIST + case !linuxerr.Equals(linuxerr.ENOENT, err): // Unexpected error. return err } @@ -793,7 +780,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return nil } // Retain error from gofer if synthetic file cannot be created internally. - return syserror.EPERM + return linuxerr.EPERM }, nil) } @@ -804,7 +791,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf // support, and it isn't clear that there's any way to implement this in // 9P. if opts.Flags&linux.O_TMPFILE != 0 { - return nil, syserror.EOPNOTSUPP + return nil, linuxerr.EOPNOTSUPP } mayCreate := opts.Flags&linux.O_CREAT != 0 mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL) @@ -827,7 +814,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf return nil, syserror.EISDIR } if mustCreate { - return nil, syserror.EEXIST + return nil, linuxerr.EEXIST } if !start.cachedMetadataAuthoritative() { // Refresh dentry's attributes before opening. @@ -862,10 +849,10 @@ afterTrailingSymlink: // Determine whether or not we need to create a file. parent.dirMu.Lock() child, _, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds) - if err == syserror.ENOENT && mayCreate { + if linuxerr.Equals(linuxerr.ENOENT, err) && mayCreate { if parent.isSynthetic() { parent.dirMu.Unlock() - return nil, syserror.EPERM + return nil, linuxerr.EPERM } fd, err := parent.createAndOpenChildLocked(ctx, rp, &opts, &ds) parent.dirMu.Unlock() @@ -876,7 +863,7 @@ afterTrailingSymlink: return nil, err } if mustCreate { - return nil, syserror.EEXIST + return nil, linuxerr.EEXIST } // Open existing child or follow symlink. if child.isSymlink() && rp.ShouldFollowSymlink() { @@ -891,7 +878,7 @@ afterTrailingSymlink: goto afterTrailingSymlink } if rp.MustBeDir() && !child.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } child.IncRef() defer child.DecRef(ctx) @@ -942,7 +929,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open return nil, syserror.EISDIR } if opts.Flags&linux.O_DIRECT != 0 { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } if !d.isSynthetic() { if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, false /* write */, false /* trunc */); err != nil { @@ -962,10 +949,10 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open return &fd.vfsfd, nil case linux.S_IFLNK: // Can't open symlinks without O_PATH, which is handled at the VFS layer. - return nil, syserror.ELOOP + return nil, linuxerr.ELOOP case linux.S_IFSOCK: if d.isSynthetic() { - return nil, syserror.ENXIO + return nil, linuxerr.ENXIO } if d.fs.iopts.OpenSocketsByConnecting { return d.openSocketByConnecting(ctx, opts) @@ -998,7 +985,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open func (d *dentry) openSocketByConnecting(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { if opts.Flags&linux.O_DIRECT != 0 { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } fdObj, err := d.file.connect(ctx, p9.AnonymousSocket) if err != nil { @@ -1019,7 +1006,7 @@ func (d *dentry) openSocketByConnecting(ctx context.Context, opts *vfs.OpenOptio func (d *dentry) openSpecialFile(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { ats := vfs.AccessTypesForOpenFlags(opts) if opts.Flags&linux.O_DIRECT != 0 { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } // We assume that the server silently inserts O_NONBLOCK in the open flags // for all named pipes (because all existing gofers do this). @@ -1033,7 +1020,7 @@ func (d *dentry) openSpecialFile(ctx context.Context, mnt *vfs.Mount, opts *vfs. retry: h, err := openHandle(ctx, d.file, ats.MayRead(), ats.MayWrite(), opts.Flags&linux.O_TRUNC != 0) if err != nil { - if isBlockingOpenOfNamedPipe && ats == vfs.MayWrite && err == syserror.ENXIO { + if isBlockingOpenOfNamedPipe && ats == vfs.MayWrite && linuxerr.Equals(linuxerr.ENXIO, err) { // An attempt to open a named pipe with O_WRONLY|O_NONBLOCK fails // with ENXIO if opening the same named pipe with O_WRONLY would // block because there are no readers of the pipe. @@ -1187,7 +1174,7 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st return "", err } if !d.isSymlink() { - return "", syserror.EINVAL + return "", linuxerr.EINVAL } return d.readlink(ctx, rp.Mount()) } @@ -1204,24 +1191,24 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } if opts.Flags&^linux.RENAME_NOREPLACE != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } if fs.opts.interop == InteropModeShared && opts.Flags&linux.RENAME_NOREPLACE != 0 { // Requires 9P support to synchronize with other remote filesystem // users. - return syserror.EINVAL + return linuxerr.EINVAL } newName := rp.Component() if newName == "." || newName == ".." { if opts.Flags&linux.RENAME_NOREPLACE != 0 { - return syserror.EEXIST + return linuxerr.EEXIST } - return syserror.EBUSY + return linuxerr.EBUSY } mnt := rp.Mount() if mnt != oldParentVD.Mount() { - return syserror.EXDEV + return linuxerr.EXDEV } if err := mnt.CheckBeginWrite(); err != nil { return err @@ -1260,7 +1247,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } if renamed.isDir() { if renamed == newParent || genericIsAncestorDentry(renamed, newParent) { - return syserror.EINVAL + return linuxerr.EINVAL } if oldParent != newParent { if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil { @@ -1269,7 +1256,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } } else { if opts.MustBeDir || rp.MustBeDir() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } } @@ -1284,13 +1271,13 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa return syserror.ENOENT } replaced, err := fs.getChildLocked(ctx, newParent, newName, &ds) - if err != nil && err != syserror.ENOENT { + if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) { return err } var replacedVFSD *vfs.Dentry if replaced != nil { if opts.Flags&linux.RENAME_NOREPLACE != 0 { - return syserror.EEXIST + return linuxerr.EEXIST } replacedVFSD = &replaced.vfsd if replaced.isDir() { @@ -1298,11 +1285,11 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa return syserror.EISDIR } if genericIsAncestorDentry(replaced, renamed) { - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } } else { if rp.MustBeDir() || renamed.isDir() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } } } @@ -1507,7 +1494,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath return d.endpoint, nil } } - return nil, syserror.ECONNREFUSED + return nil, linuxerr.ECONNREFUSED } // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index cf69e1b7a..ec8d58cc9 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -46,6 +46,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" @@ -318,7 +319,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt mfp := pgalloc.MemoryFileProviderFromContext(ctx) if mfp == nil { ctx.Warningf("gofer.FilesystemType.GetFilesystem: context does not provide a pgalloc.MemoryFileProvider") - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } mopts := vfs.GenericParseMountOptions(opts.Data) @@ -354,7 +355,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt fsopts.interop = InteropModeShared default: ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid cache policy: %s=%s", moptCache, cache) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } } @@ -365,7 +366,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt dfltuid, err := strconv.ParseUint(dfltuidstr, 10, 32) if err != nil { ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: %s=%s", moptDfltUID, dfltuidstr) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } // In Linux, dfltuid is interpreted as a UID and is converted to a KUID // in the caller's user namespace, but goferfs isn't @@ -378,7 +379,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt dfltgid, err := strconv.ParseUint(dfltgidstr, 10, 32) if err != nil { ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: %s=%s", moptDfltGID, dfltgidstr) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } fsopts.dfltgid = auth.KGID(dfltgid) } @@ -390,7 +391,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt msize, err := strconv.ParseUint(msizestr, 10, 32) if err != nil { ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid message size: %s=%s", moptMsize, msizestr) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } fsopts.msize = uint32(msize) } @@ -409,7 +410,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt maxCachedDentries, err := strconv.ParseUint(str, 10, 64) if err != nil { ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid dentry cache limit: %s=%s", moptDentryCacheLimit, str) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } fsopts.maxCachedDentries = maxCachedDentries } @@ -433,14 +434,14 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // Check for unparsed options. if len(mopts) != 0 { ctx.Warningf("gofer.FilesystemType.GetFilesystem: unknown options: %v", mopts) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } // Handle internal options. iopts, ok := opts.InternalData.(InternalFilesystemOptions) if opts.InternalData != nil && !ok { ctx.Warningf("gofer.FilesystemType.GetFilesystem: GetFilesystemOptions.InternalData has type %T, wanted gofer.InternalFilesystemOptions", opts.InternalData) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } // If !ok, iopts being the zero value is correct. @@ -503,7 +504,7 @@ func getFDFromMountOptionsMap(ctx context.Context, mopts map[string]string) (int trans, ok := mopts[moptTransport] if !ok || trans != transportModeFD { ctx.Warningf("gofer.getFDFromMountOptionsMap: transport must be specified as '%s=%s'", moptTransport, transportModeFD) - return -1, syserror.EINVAL + return -1, linuxerr.EINVAL } delete(mopts, moptTransport) @@ -511,28 +512,28 @@ func getFDFromMountOptionsMap(ctx context.Context, mopts map[string]string) (int rfdstr, ok := mopts[moptReadFD] if !ok { ctx.Warningf("gofer.getFDFromMountOptionsMap: read FD must be specified as '%s=<file descriptor>'", moptReadFD) - return -1, syserror.EINVAL + return -1, linuxerr.EINVAL } delete(mopts, moptReadFD) rfd, err := strconv.Atoi(rfdstr) if err != nil { ctx.Warningf("gofer.getFDFromMountOptionsMap: invalid read FD: %s=%s", moptReadFD, rfdstr) - return -1, syserror.EINVAL + return -1, linuxerr.EINVAL } wfdstr, ok := mopts[moptWriteFD] if !ok { ctx.Warningf("gofer.getFDFromMountOptionsMap: write FD must be specified as '%s=<file descriptor>'", moptWriteFD) - return -1, syserror.EINVAL + return -1, linuxerr.EINVAL } delete(mopts, moptWriteFD) wfd, err := strconv.Atoi(wfdstr) if err != nil { ctx.Warningf("gofer.getFDFromMountOptionsMap: invalid write FD: %s=%s", moptWriteFD, wfdstr) - return -1, syserror.EINVAL + return -1, linuxerr.EINVAL } if rfd != wfd { ctx.Warningf("gofer.getFDFromMountOptionsMap: read FD (%d) and write FD (%d) must be equal", rfd, wfd) - return -1, syserror.EINVAL + return -1, linuxerr.EINVAL } return rfd, nil } @@ -581,10 +582,10 @@ func (fs *filesystem) Release(ctx context.Context) { d.dataMu.Unlock() // Close host FDs if they exist. if d.readFD >= 0 { - unix.Close(int(d.readFD)) + _ = unix.Close(int(d.readFD)) } if d.writeFD >= 0 && d.readFD != d.writeFD { - unix.Close(int(d.writeFD)) + _ = unix.Close(int(d.writeFD)) } d.readFD = -1 d.writeFD = -1 @@ -946,10 +947,10 @@ func (d *dentry) cachedMetadataAuthoritative() bool { // updateFromP9Attrs is called to update d's metadata after an update from the // remote filesystem. // Precondition: d.metadataMu must be locked. +// +checklocks:d.metadataMu func (d *dentry) updateFromP9AttrsLocked(mask p9.AttrMask, attr *p9.Attr) { if mask.Mode { if got, want := uint32(attr.Mode.FileType()), d.fileType(); got != want { - d.metadataMu.Unlock() panic(fmt.Sprintf("gofer.dentry file type changed from %#o to %#o", want, got)) } atomic.StoreUint32(&d.mode, uint32(attr.Mode)) @@ -988,6 +989,7 @@ func (d *dentry) updateFromP9AttrsLocked(mask p9.AttrMask, attr *p9.Attr) { // Preconditions: !d.isSynthetic(). // Preconditions: d.metadataMu is locked. +// +checklocks:d.metadataMu func (d *dentry) refreshSizeLocked(ctx context.Context) error { d.handleMu.RLock() @@ -1019,6 +1021,7 @@ func (d *dentry) updateFromGetattr(ctx context.Context) error { // Preconditions: // * !d.isSynthetic(). // * d.metadataMu is locked. +// +checklocks:d.metadataMu func (d *dentry) updateFromGetattrLocked(ctx context.Context) error { // Use d.readFile or d.writeFile, which represent 9P FIDs that have been // opened, in preference to d.file, which represents a 9P fid that has not. @@ -1043,7 +1046,8 @@ func (d *dentry) updateFromGetattrLocked(ctx context.Context) error { _, attrMask, attr, err := file.getAttr(ctx, dentryAttrMask()) if handleMuRLocked { - d.handleMu.RUnlock() // must be released before updateFromP9AttrsLocked() + // handleMu must be released before updateFromP9AttrsLocked(). + d.handleMu.RUnlock() // +checklocksforce: complex case. } if err != nil { return err @@ -1090,7 +1094,7 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs return nil } if stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_SIZE) != 0 { - return syserror.EPERM + return linuxerr.EPERM } mode := linux.FileMode(atomic.LoadUint32(&d.mode)) if err := vfs.CheckSetStat(ctx, creds, opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil { @@ -1110,7 +1114,7 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs case linux.S_IFDIR: return syserror.EISDIR default: - return syserror.EINVAL + return linuxerr.EINVAL } } @@ -1288,7 +1292,7 @@ func (d *dentry) checkXattrPermissions(creds *auth.Credentials, name string, ats // to the remote filesystem. This is inconsistent with Linux's 9p client, // but consistent with other filesystems (e.g. FUSE). if strings.HasPrefix(name, linux.XATTR_SECURITY_PREFIX) || strings.HasPrefix(name, linux.XATTR_SYSTEM_PREFIX) { - return syserror.EOPNOTSUPP + return linuxerr.EOPNOTSUPP } mode := linux.FileMode(atomic.LoadUint32(&d.mode)) kuid := auth.KUID(atomic.LoadUint32(&d.uid)) @@ -1469,7 +1473,7 @@ func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked boo if d.isDeleted() { d.watches.HandleDeletion(ctx) } - d.destroyLocked(ctx) + d.destroyLocked(ctx) // +checklocksforce: renameMu must be acquired at this point. return } // If d still has inotify watches and it is not deleted or invalidated, it @@ -1497,7 +1501,7 @@ func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked boo delete(d.parent.children, d.name) d.parent.dirMu.Unlock() } - d.destroyLocked(ctx) + d.destroyLocked(ctx) // +checklocksforce: see above. return } @@ -1526,7 +1530,7 @@ func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked boo d.fs.renameMu.Lock() defer d.fs.renameMu.Unlock() } - d.fs.evictCachedDentryLocked(ctx) + d.fs.evictCachedDentryLocked(ctx) // +checklocksforce: see above. } } @@ -1543,6 +1547,7 @@ func (d *dentry) removeFromCacheLocked() { // Precondition: fs.renameMu must be locked for writing; it may be temporarily // unlocked. +// +checklocks:fs.renameMu func (fs *filesystem) evictAllCachedDentriesLocked(ctx context.Context) { for fs.cachedDentriesLen != 0 { fs.evictCachedDentryLocked(ctx) @@ -1551,6 +1556,7 @@ func (fs *filesystem) evictAllCachedDentriesLocked(ctx context.Context) { // Preconditions: // * fs.renameMu must be locked for writing; it may be temporarily unlocked. +// +checklocks:fs.renameMu func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) { fs.cacheMu.Lock() victim := fs.cachedDentries.Back() @@ -1587,7 +1593,7 @@ func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) { // will try to acquire fs.renameMu (which we have already acquired). Hence, // fs.renameMu will synchronize the destroy attempts. victim.cachingMu.Unlock() - victim.destroyLocked(ctx) + victim.destroyLocked(ctx) // +checklocksforce: owned as precondition, victim.fs == fs. } // destroyLocked destroys the dentry. @@ -1597,6 +1603,7 @@ func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) { // * d.refs == 0. // * d.parent.children[d.name] != d, i.e. d is not reachable by path traversal // from its former parent dentry. +// +checklocks:d.fs.renameMu func (d *dentry) destroyLocked(ctx context.Context) { switch atomic.LoadInt64(&d.refs) { case 0: @@ -1630,18 +1637,18 @@ func (d *dentry) destroyLocked(ctx context.Context) { d.dataMu.Unlock() // Clunk open fids and close open host FDs. if !d.readFile.isNil() { - d.readFile.close(ctx) + _ = d.readFile.close(ctx) } if !d.writeFile.isNil() && d.readFile != d.writeFile { - d.writeFile.close(ctx) + _ = d.writeFile.close(ctx) } d.readFile = p9file{} d.writeFile = p9file{} if d.readFD >= 0 { - unix.Close(int(d.readFD)) + _ = unix.Close(int(d.readFD)) } if d.writeFD >= 0 && d.readFD != d.writeFD { - unix.Close(int(d.writeFD)) + _ = unix.Close(int(d.writeFD)) } d.readFD = -1 d.writeFD = -1 @@ -1703,7 +1710,7 @@ func (d *dentry) listXattr(ctx context.Context, creds *auth.Credentials, size ui func (d *dentry) getXattr(ctx context.Context, creds *auth.Credentials, opts *vfs.GetXattrOptions) (string, error) { if d.file.isNil() { - return "", syserror.ENODATA + return "", linuxerr.ENODATA } if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil { return "", err @@ -1713,7 +1720,7 @@ func (d *dentry) getXattr(ctx context.Context, creds *auth.Credentials, opts *vf func (d *dentry) setXattr(ctx context.Context, creds *auth.Credentials, opts *vfs.SetXattrOptions) error { if d.file.isNil() { - return syserror.EPERM + return linuxerr.EPERM } if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil { return err @@ -1723,7 +1730,7 @@ func (d *dentry) setXattr(ctx context.Context, creds *auth.Credentials, opts *vf func (d *dentry) removeXattr(ctx context.Context, creds *auth.Credentials, name string) error { if d.file.isNil() { - return syserror.EPERM + return linuxerr.EPERM } if err := d.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil { return err @@ -1763,7 +1770,7 @@ func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool openReadable := !d.readFile.isNil() || read openWritable := !d.writeFile.isNil() || write h, err := openHandle(ctx, d.file, openReadable, openWritable, trunc) - if err == syserror.EACCES && (openReadable != read || openWritable != write) { + if linuxerr.Equals(linuxerr.EACCES, err) && (openReadable != read || openWritable != write) { // It may not be possible to use a single handle for both // reading and writing, since permissions on the file may have // changed to e.g. disallow reading after previously being diff --git a/pkg/sentry/fsimpl/gofer/host_named_pipe.go b/pkg/sentry/fsimpl/gofer/host_named_pipe.go index c7bf10007..398288ee3 100644 --- a/pkg/sentry/fsimpl/gofer/host_named_pipe.go +++ b/pkg/sentry/fsimpl/gofer/host_named_pipe.go @@ -21,6 +21,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/syserror" ) @@ -78,7 +79,7 @@ func nonblockingPipeHasWriter(fd int32) (bool, error) { defer tempPipeMu.Unlock() // Copy 1 byte from fd into the temporary pipe. n, err := unix.Tee(int(fd), tempPipeWriteFD, 1, unix.SPLICE_F_NONBLOCK) - if err == syserror.EAGAIN { + if linuxerr.Equals(linuxerr.EAGAIN, err) { // The pipe represented by fd is empty, but has a writer. return true, nil } diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index eed05e369..91405fe66 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/metric" @@ -34,7 +35,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -124,14 +124,14 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs }() if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Check that flags are supported. // // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } // Check for reading at EOF before calling into MM (but not under @@ -194,14 +194,14 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off // offset should be ignored by PWrite. func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) { if offset < 0 { - return 0, offset, syserror.EINVAL + return 0, offset, linuxerr.EINVAL } // Check that flags are supported. // // TODO(gvisor.dev/issue/2601): Support select pwritev2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { - return 0, offset, syserror.EOPNOTSUPP + return 0, offset, linuxerr.EOPNOTSUPP } d := fd.dentry() @@ -297,7 +297,7 @@ func (fd *regularFileFD) writeCache(ctx context.Context, d *dentry, offset int64 pgstart := hostarch.PageRoundDown(uint64(offset)) pgend, ok := hostarch.PageRoundUp(uint64(offset + src.NumBytes())) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } mr := memmap.MappableRange{pgstart, pgend} var freed []memmap.FileRange @@ -652,20 +652,20 @@ func regularFileSeekLocked(ctx context.Context, d *dentry, fdOffset, offset int6 offset += size case linux.SEEK_DATA: if offset > size { - return 0, syserror.ENXIO + return 0, linuxerr.ENXIO } // Use offset as specified. case linux.SEEK_HOLE: if offset > size { - return 0, syserror.ENXIO + return 0, linuxerr.ENXIO } offset = size } default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } return offset, nil } @@ -678,28 +678,28 @@ func (fd *regularFileFD) Sync(ctx context.Context) error { // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { d := fd.dentry() - switch d.fs.opts.interop { - case InteropModeExclusive: - // Any mapping is fine. - case InteropModeWritethrough: - // Shared writable mappings require a host FD, since otherwise we can't - // synchronously flush memory-mapped writes to the remote file. - if opts.Private || !opts.MaxPerms.Write { - break - } - fallthrough - case InteropModeShared: - // All mappings require a host FD to be coherent with other filesystem - // users. - if d.fs.opts.forcePageCache { - // Whether or not we have a host FD, we're not allowed to use it. - return syserror.ENODEV - } - if atomic.LoadInt32(&d.mmapFD) < 0 { - return syserror.ENODEV + // Force sentry page caching at your own risk. + if !d.fs.opts.forcePageCache { + switch d.fs.opts.interop { + case InteropModeExclusive: + // Any mapping is fine. + case InteropModeWritethrough: + // Shared writable mappings require a host FD, since otherwise we + // can't synchronously flush memory-mapped writes to the remote + // file. + if opts.Private || !opts.MaxPerms.Write { + break + } + fallthrough + case InteropModeShared: + // All mappings require a host FD to be coherent with other + // filesystem users. + if atomic.LoadInt32(&d.mmapFD) < 0 { + return linuxerr.ENODEV + } + default: + panic(fmt.Sprintf("unknown InteropMode %v", d.fs.opts.interop)) } - default: - panic(fmt.Sprintf("unknown InteropMode %v", d.fs.opts.interop)) } // After this point, d may be used as a memmap.Mappable. d.pf.hostFileMapperInitOnce.Do(d.pf.hostFileMapper.Init) @@ -708,12 +708,12 @@ func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpt } func (d *dentry) mayCachePages() bool { - if d.fs.opts.interop == InteropModeShared { - return false - } if d.fs.opts.forcePageCache { return true } + if d.fs.opts.interop == InteropModeShared { + return false + } return atomic.LoadInt32(&d.mmapFD) >= 0 } diff --git a/pkg/sentry/fsimpl/gofer/revalidate.go b/pkg/sentry/fsimpl/gofer/revalidate.go index 8f81f0822..226790a11 100644 --- a/pkg/sentry/fsimpl/gofer/revalidate.go +++ b/pkg/sentry/fsimpl/gofer/revalidate.go @@ -247,16 +247,16 @@ func (fs *filesystem) revalidateHelper(ctx context.Context, vfsObj *vfs.VirtualF if found && !d.isSynthetic() { // First dentry is where the search is starting, just update attributes // since it cannot be replaced. - d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr) + d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr) // +checklocksforce: acquired by lockAllMetadata. } - d.metadataMu.Unlock() + d.metadataMu.Unlock() // +checklocksforce: see above. continue } // Note that synthetic dentries will always fails the comparison check // below. if !found || d.qidPath != stats[i].QID.Path { - d.metadataMu.Unlock() + d.metadataMu.Unlock() // +checklocksforce: see above. if !found && d.isSynthetic() { // We have a synthetic file, and no remote file has arisen to replace // it. @@ -298,7 +298,7 @@ func (fs *filesystem) revalidateHelper(ctx context.Context, vfsObj *vfs.VirtualF } // The file at this path hasn't changed. Just update cached metadata. - d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr) + d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr) // +checklocksforce: see above. d.metadataMu.Unlock() } @@ -354,6 +354,7 @@ func (r *revalidateState) add(name string, d *dentry) { r.dentries = append(r.dentries, d) } +// +checklocksignore func (r *revalidateState) lockAllMetadata() { for _, d := range r.dentries { d.metadataMu.Lock() @@ -372,6 +373,7 @@ func (r *revalidateState) popFront() *dentry { // reset releases all metadata locks and resets all fields to allow this // instance to be reused. +// +checklocksignore func (r *revalidateState) reset() { if r.locked { // Unlock any remaining dentries. diff --git a/pkg/sentry/fsimpl/gofer/save_restore.go b/pkg/sentry/fsimpl/gofer/save_restore.go index 83e841a51..e67422a2f 100644 --- a/pkg/sentry/fsimpl/gofer/save_restore.go +++ b/pkg/sentry/fsimpl/gofer/save_restore.go @@ -21,13 +21,13 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/refsvfs2" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) type saveRestoreContextID int @@ -92,7 +92,7 @@ func (fd *specialFileFD) savePipeData(ctx context.Context) error { fd.buf = append(fd.buf, buf[:n]...) } if err != nil { - if err == io.EOF || err == syserror.EAGAIN { + if err == io.EOF || linuxerr.Equals(linuxerr.EAGAIN, err) { break } return err diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index c12444b7e..4b59c1c3c 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -20,6 +20,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/p9" @@ -41,6 +42,11 @@ import ( type specialFileFD struct { fileDescription + // releaseMu synchronizes the closing of fd.handle with fd.sync(). It's safe + // to access fd.handle without locking for operations that require a ref to + // be held by the caller, e.g. vfs.FileDescriptionImpl implementations. + releaseMu sync.RWMutex `state:"nosave"` + // handle is used for file I/O. handle is immutable. handle handle `state:"nosave"` @@ -116,7 +122,10 @@ func (fd *specialFileFD) Release(ctx context.Context) { if fd.haveQueue { fdnotifier.RemoveFD(fd.handle.fd) } + fd.releaseMu.Lock() fd.handle.close(ctx) + fd.releaseMu.Unlock() + fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) fs.syncMu.Lock() delete(fs.specialFileFDs, fd) @@ -183,14 +192,14 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs }() if fd.seekable && offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Check that flags are supported. // // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } if d := fd.dentry(); d.cachedMetadataAuthoritative() { @@ -228,7 +237,7 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs // Just buffer the read instead. buf := make([]byte, dst.NumBytes()) n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset)) - if err == syserror.EAGAIN { + if linuxerr.Equals(linuxerr.EAGAIN, err) { err = syserror.ErrWouldBlock } if n == 0 { @@ -263,14 +272,14 @@ func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off // offset should be ignored by PWrite. func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) { if fd.seekable && offset < 0 { - return 0, offset, syserror.EINVAL + return 0, offset, linuxerr.EINVAL } // Check that flags are supported. // // TODO(gvisor.dev/issue/2601): Support select pwritev2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { - return 0, offset, syserror.EOPNOTSUPP + return 0, offset, linuxerr.EOPNOTSUPP } d := fd.dentry() @@ -316,7 +325,7 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off return 0, offset, copyErr } n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:copied])), uint64(offset)) - if err == syserror.EAGAIN { + if linuxerr.Equals(linuxerr.EAGAIN, err) { err = syserror.ErrWouldBlock } // Update offset if the offset is valid. @@ -354,7 +363,7 @@ func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts // Seek implements vfs.FileDescriptionImpl.Seek. func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { if !fd.seekable { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } fd.mu.Lock() defer fd.mu.Unlock() @@ -372,6 +381,13 @@ func (fd *specialFileFD) Sync(ctx context.Context) error { } func (fd *specialFileFD) sync(ctx context.Context, forFilesystemSync bool) error { + // Locks to ensure it didn't race with fd.Release(). + fd.releaseMu.RLock() + defer fd.releaseMu.RUnlock() + + if !fd.handle.isOpen() { + return nil + } err := func() error { // If we have a host FD, fsyncing it is likely to be faster than an fsync // RPC. diff --git a/pkg/sentry/fsimpl/gofer/symlink.go b/pkg/sentry/fsimpl/gofer/symlink.go index 2ec819f86..dbd834c67 100644 --- a/pkg/sentry/fsimpl/gofer/symlink.go +++ b/pkg/sentry/fsimpl/gofer/symlink.go @@ -41,7 +41,7 @@ func (d *dentry) readlink(ctx context.Context, mnt *vfs.Mount) (string, error) { d.haveTarget = true d.target = target } - d.dataMu.Unlock() + d.dataMu.Unlock() // +checklocksforce: guaranteed locked from above. } return target, err } diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index b94dfeb7f..476545d00 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -45,10 +45,10 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fdnotifier", "//pkg/fspath", "//pkg/hostarch", - "//pkg/iovec", "//pkg/log", "//pkg/marshal/primitive", "//pkg/refs", diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index a81f550b1..89aa7b3d9 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -24,6 +24,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/hostarch" @@ -41,6 +42,36 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// These are the modes that are stored with virtualOwner. +const virtualOwnerModes = linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID + +// +stateify savable +type virtualOwner struct { + // This field is initialized at creation time and is immutable. + enabled bool + + // mu protects the fields below and they can be accessed using atomic memory + // operations. + mu sync.Mutex `state:"nosave"` + uid uint32 + gid uint32 + // mode is also stored, otherwise setting the host file to `0000` could remove + // access to the file. + mode uint32 +} + +func (v *virtualOwner) atomicUID() uint32 { + return atomic.LoadUint32(&v.uid) +} + +func (v *virtualOwner) atomicGID() uint32 { + return atomic.LoadUint32(&v.gid) +} + +func (v *virtualOwner) atomicMode() uint32 { + return atomic.LoadUint32(&v.mode) +} + // inode implements kernfs.Inode. // // +stateify savable @@ -97,6 +128,11 @@ type inode struct { // Event queue for blocking operations. queue waiter.Queue + // virtualOwner caches ownership and permission information to override the + // underlying file owner and permission. This is used to allow the unstrusted + // application to change these fields without affecting the host. + virtualOwner virtualOwner + // If haveBuf is non-zero, hostFD represents a pipe, and buf contains data // read from the pipe from previous calls to inode.beforeSave(). haveBuf // and buf are protected by bufMu. haveBuf is accessed using atomic memory @@ -109,12 +145,12 @@ type inode struct { func newInode(ctx context.Context, fs *filesystem, hostFD int, savable bool, fileType linux.FileMode, isTTY bool) (*inode, error) { // Determine if hostFD is seekable. _, err := unix.Seek(hostFD, 0, linux.SEEK_CUR) - seekable := err != syserror.ESPIPE + seekable := !linuxerr.Equals(linuxerr.ESPIPE, err) // We expect regular files to be seekable, as this is required for them to // be memory-mappable. if !seekable && fileType == unix.S_IFREG { ctx.Infof("host.newInode: host FD %d is a non-seekable regular file", hostFD) - return nil, syserror.ESPIPE + return nil, linuxerr.ESPIPE } i := &inode{ @@ -146,7 +182,7 @@ func newInode(ctx context.Context, fs *filesystem, hostFD int, savable bool, fil type NewFDOptions struct { // If Savable is true, the host file descriptor may be saved/restored by // numeric value; the sandbox API requires a corresponding host FD with the - // same numeric value to be provieded at time of restore. + // same numeric value to be provided at time of restore. Savable bool // If IsTTY is true, the file descriptor is a TTY. @@ -156,6 +192,12 @@ type NewFDOptions struct { // the new file description will inherit flags from hostFD. HaveFlags bool Flags uint32 + + // VirtualOwner allow the host file to have owner and permissions different + // than the underlying host file. + VirtualOwner bool + UID auth.KUID + GID auth.KGID } // NewFD returns a vfs.FileDescription representing the given host file @@ -167,8 +209,8 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions) } // Retrieve metadata. - var s unix.Stat_t - if err := unix.Fstat(hostFD, &s); err != nil { + var stat unix.Stat_t + if err := unix.Fstat(hostFD, &stat); err != nil { return nil, err } @@ -182,11 +224,19 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions) flags = uint32(flagsInt) } - d := &kernfs.Dentry{} - i, err := newInode(ctx, fs, hostFD, opts.Savable, linux.FileMode(s.Mode).FileType(), opts.IsTTY) + fileType := linux.FileMode(stat.Mode).FileType() + i, err := newInode(ctx, fs, hostFD, opts.Savable, fileType, opts.IsTTY) if err != nil { return nil, err } + if opts.VirtualOwner { + i.virtualOwner.enabled = true + i.virtualOwner.uid = uint32(opts.UID) + i.virtualOwner.gid = uint32(opts.GID) + i.virtualOwner.mode = stat.Mode + } + + d := &kernfs.Dentry{} d.Init(&fs.Filesystem, i) // i.open will take a reference on d. @@ -195,15 +245,7 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions) // For simplicity, fileDescription.offset is set to 0. Technically, we // should only set to 0 on files that are not seekable (sockets, pipes, // etc.), and use the offset from the host fd otherwise when importing. - return i.open(ctx, d, mnt, flags) -} - -// ImportFD sets up and returns a vfs.FileDescription from a donated fd. -func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) { - return NewFD(ctx, mnt, hostFD, &NewFDOptions{ - Savable: true, - IsTTY: isTTY, - }) + return i.open(ctx, d, mnt, fileType, flags) } // filesystemType implements vfs.FilesystemType. @@ -269,7 +311,7 @@ func (fs *filesystem) MountOptions() string { // CheckPermissions implements kernfs.Inode.CheckPermissions. func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { var s unix.Stat_t - if err := unix.Fstat(i.hostFD, &s); err != nil { + if err := i.stat(&s); err != nil { return err } return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(s.Mode), auth.KUID(s.Uid), auth.KGID(s.Gid)) @@ -278,7 +320,7 @@ func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, a // Mode implements kernfs.Inode.Mode. func (i *inode) Mode() linux.FileMode { var s unix.Stat_t - if err := unix.Fstat(i.hostFD, &s); err != nil { + if err := i.stat(&s); err != nil { // Retrieving the mode from the host fd using fstat(2) should not fail. // If the syscall does not succeed, something is fundamentally wrong. panic(fmt.Sprintf("failed to retrieve mode from host fd %d: %v", i.hostFD, err)) @@ -289,10 +331,10 @@ func (i *inode) Mode() linux.FileMode { // Stat implements kernfs.Inode.Stat. func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { if opts.Mask&linux.STATX__RESERVED != 0 { - return linux.Statx{}, syserror.EINVAL + return linux.Statx{}, linuxerr.EINVAL } if opts.Sync&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE { - return linux.Statx{}, syserror.EINVAL + return linux.Statx{}, linuxerr.EINVAL } fs := vfsfs.Impl().(*filesystem) @@ -301,11 +343,11 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp mask := opts.Mask & linux.STATX_ALL var s unix.Statx_t err := unix.Statx(i.hostFD, "", int(unix.AT_EMPTY_PATH|opts.Sync), int(mask), &s) - if err == syserror.ENOSYS { + if linuxerr.Equals(linuxerr.ENOSYS, err) { // Fallback to fstat(2), if statx(2) is not supported on the host. // // TODO(b/151263641): Remove fallback. - return i.fstat(fs) + return i.statxFromStat(fs) } if err != nil { return linux.Statx{}, err @@ -329,19 +371,35 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp // device numbers. ls.Mask |= s.Mask & linux.STATX_ALL if s.Mask&linux.STATX_TYPE != 0 { - ls.Mode |= s.Mode & linux.S_IFMT + if i.virtualOwner.enabled { + ls.Mode |= uint16(i.virtualOwner.atomicMode()) & linux.S_IFMT + } else { + ls.Mode |= s.Mode & linux.S_IFMT + } } if s.Mask&linux.STATX_MODE != 0 { - ls.Mode |= s.Mode &^ linux.S_IFMT + if i.virtualOwner.enabled { + ls.Mode |= uint16(i.virtualOwner.atomicMode()) &^ linux.S_IFMT + } else { + ls.Mode |= s.Mode &^ linux.S_IFMT + } } if s.Mask&linux.STATX_NLINK != 0 { ls.Nlink = s.Nlink } if s.Mask&linux.STATX_UID != 0 { - ls.UID = s.Uid + if i.virtualOwner.enabled { + ls.UID = i.virtualOwner.atomicUID() + } else { + ls.UID = s.Uid + } } if s.Mask&linux.STATX_GID != 0 { - ls.GID = s.Gid + if i.virtualOwner.enabled { + ls.GID = i.virtualOwner.atomicGID() + } else { + ls.GID = s.Gid + } } if s.Mask&linux.STATX_ATIME != 0 { ls.Atime = unixToLinuxStatxTimestamp(s.Atime) @@ -365,7 +423,7 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp return ls, nil } -// fstat is a best-effort fallback for inode.Stat() if the host does not +// statxFromStat is a best-effort fallback for inode.Stat() if the host does not // support statx(2). // // We ignore the mask and sync flags in opts and simply supply @@ -373,9 +431,9 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp // of a mask or sync flags. fstat(2) does not provide any metadata // equivalent to Statx.Attributes, Statx.AttributesMask, or Statx.Btime, so // those fields remain empty. -func (i *inode) fstat(fs *filesystem) (linux.Statx, error) { +func (i *inode) statxFromStat(fs *filesystem) (linux.Statx, error) { var s unix.Stat_t - if err := unix.Fstat(i.hostFD, &s); err != nil { + if err := i.stat(&s); err != nil { return linux.Statx{}, err } @@ -399,7 +457,21 @@ func (i *inode) fstat(fs *filesystem) (linux.Statx, error) { }, nil } +func (i *inode) stat(stat *unix.Stat_t) error { + if err := unix.Fstat(i.hostFD, stat); err != nil { + return err + } + if i.virtualOwner.enabled { + stat.Uid = i.virtualOwner.atomicUID() + stat.Gid = i.virtualOwner.atomicGID() + stat.Mode = i.virtualOwner.atomicMode() + } + return nil +} + // SetStat implements kernfs.Inode.SetStat. +// +// +checklocksignore func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { s := &opts.Stat @@ -407,11 +479,22 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre if m == 0 { return nil } - if m&^(linux.STATX_MODE|linux.STATX_SIZE|linux.STATX_ATIME|linux.STATX_MTIME) != 0 { - return syserror.EPERM + supportedModes := uint32(linux.STATX_MODE | linux.STATX_SIZE | linux.STATX_ATIME | linux.STATX_MTIME) + if i.virtualOwner.enabled { + if m&virtualOwnerModes != 0 { + // Take lock if any of the virtual owner fields will be updated. + i.virtualOwner.mu.Lock() + defer i.virtualOwner.mu.Unlock() + } + + supportedModes |= virtualOwnerModes } + if m&^supportedModes != 0 { + return linuxerr.EPERM + } + var hostStat unix.Stat_t - if err := unix.Fstat(i.hostFD, &hostStat); err != nil { + if err := i.stat(&hostStat); err != nil { return err } if err := vfs.CheckSetStat(ctx, creds, &opts, linux.FileMode(hostStat.Mode), auth.KUID(hostStat.Uid), auth.KGID(hostStat.Gid)); err != nil { @@ -419,13 +502,17 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre } if m&linux.STATX_MODE != 0 { - if err := unix.Fchmod(i.hostFD, uint32(s.Mode)); err != nil { - return err + if i.virtualOwner.enabled { + i.virtualOwner.mode = uint32(opts.Stat.Mode) + } else { + if err := unix.Fchmod(i.hostFD, uint32(s.Mode)); err != nil { + return err + } } } if m&linux.STATX_SIZE != 0 { if hostStat.Mode&linux.S_IFMT != linux.S_IFREG { - return syserror.EINVAL + return linuxerr.EINVAL } if err := unix.Ftruncate(i.hostFD, int64(s.Size)); err != nil { return err @@ -448,6 +535,14 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre return err } } + if i.virtualOwner.enabled { + if m&linux.STATX_UID != 0 { + i.virtualOwner.uid = opts.Stat.UID + } + if m&linux.STATX_GID != 0 { + i.virtualOwner.gid = opts.Stat.GID + } + } return nil } @@ -470,18 +565,17 @@ func (i *inode) DecRef(ctx context.Context) { func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { // Once created, we cannot re-open a socket fd through /proc/[pid]/fd/. if i.Mode().FileType() == linux.S_IFSOCK { - return nil, syserror.ENXIO + return nil, linuxerr.ENXIO } - return i.open(ctx, d, rp.Mount(), opts.Flags) -} - -func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, flags uint32) (*vfs.FileDescription, error) { - var s unix.Stat_t - if err := unix.Fstat(i.hostFD, &s); err != nil { + var stat unix.Stat_t + if err := i.stat(&stat); err != nil { return nil, err } - fileType := s.Mode & linux.FileTypeMask + fileType := linux.FileMode(stat.Mode).FileType() + return i.open(ctx, d, rp.Mount(), fileType, opts.Flags) +} +func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, fileType linux.FileMode, flags uint32) (*vfs.FileDescription, error) { // Constrain flags to a subset we can handle. // // TODO(gvisor.dev/issue/2601): Support O_NONBLOCK by adding RWF_NOWAIT to pread/pwrite calls. @@ -491,7 +585,7 @@ func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, flag case unix.S_IFSOCK: if i.isTTY { log.Warningf("cannot use host socket fd %d as TTY", i.hostFD) - return nil, syserror.ENOTTY + return nil, linuxerr.ENOTTY } ep, err := newEndpoint(ctx, i.hostFD, &i.queue) @@ -529,7 +623,7 @@ func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, flag default: log.Warningf("cannot import host fd %d with file type %o", i.hostFD, fileType) - return nil, syserror.EPERM + return nil, linuxerr.EPERM } } @@ -584,12 +678,12 @@ func (f *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, off // // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } i := f.inode if !i.seekable { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } return readFromHostFD(ctx, i.hostFD, dst, offset, opts.Flags) @@ -601,7 +695,7 @@ func (f *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts // // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } i := f.inode @@ -660,7 +754,7 @@ func readFromHostFD(ctx context.Context, hostFD int, dst usermem.IOSequence, off // PWrite implements vfs.FileDescriptionImpl.PWrite. func (f *fileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { if !f.inode.seekable { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } return f.writeToHostFD(ctx, src, offset, opts.Flags) @@ -700,7 +794,7 @@ func (f *fileDescription) writeToHostFD(ctx context.Context, src usermem.IOSeque hostFD := f.inode.hostFD // TODO(gvisor.dev/issue/2601): Support select pwritev2 flags. if flags != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } writer := hostfd.GetReadWriterAt(int32(hostFD), offset, flags) n, err := src.CopyInTo(ctx, writer) @@ -721,7 +815,7 @@ func (f *fileDescription) writeToHostFD(ctx context.Context, src usermem.IOSeque func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (int64, error) { i := f.inode if !i.seekable { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } f.offsetMu.Lock() @@ -730,17 +824,17 @@ func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (i switch whence { case linux.SEEK_SET: if offset < 0 { - return f.offset, syserror.EINVAL + return f.offset, linuxerr.EINVAL } f.offset = offset case linux.SEEK_CUR: // Check for overflow. Note that underflow cannot occur, since f.offset >= 0. if offset > math.MaxInt64-f.offset { - return f.offset, syserror.EOVERFLOW + return f.offset, linuxerr.EOVERFLOW } if f.offset+offset < 0 { - return f.offset, syserror.EINVAL + return f.offset, linuxerr.EINVAL } f.offset += offset @@ -753,10 +847,10 @@ func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (i // Check for overflow. Note that underflow cannot occur, since size >= 0. if offset > math.MaxInt64-size { - return f.offset, syserror.EOVERFLOW + return f.offset, linuxerr.EOVERFLOW } if size+offset < 0 { - return f.offset, syserror.EINVAL + return f.offset, linuxerr.EINVAL } f.offset = size + offset @@ -773,7 +867,7 @@ func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (i default: // Invalid whence. - return f.offset, syserror.EINVAL + return f.offset, linuxerr.EINVAL } return f.offset, nil @@ -790,7 +884,7 @@ func (f *fileDescription) ConfigureMMap(_ context.Context, opts *memmap.MMapOpts // NOTE(b/38213152): Technically, some obscure char devices can be memory // mapped, but we only allow regular files. if f.inode.ftype != unix.S_IFREG { - return syserror.ENODEV + return linuxerr.ENODEV } i := f.inode i.CachedMappable.InitFileMapperOnce() diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go index ca85f5601..709d5747d 100644 --- a/pkg/sentry/fsimpl/host/socket.go +++ b/pkg/sentry/fsimpl/host/socket.go @@ -21,6 +21,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/socket/control" @@ -28,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/pkg/waiter" @@ -158,9 +158,9 @@ func (c *ConnectedEndpoint) Send(ctx context.Context, data [][]byte, controlMess if n < totalLen && err == nil { // The host only returns a short write if it would otherwise // block (and only for stream sockets). - err = syserror.EAGAIN + err = linuxerr.EAGAIN } - if n > 0 && err != syserror.EAGAIN { + if n > 0 && !linuxerr.Equals(linuxerr.EAGAIN, err) { // The caller may need to block to send more data, but // otherwise there isn't anything that can be done about an // error with a partial write. diff --git a/pkg/sentry/fsimpl/host/socket_iovec.go b/pkg/sentry/fsimpl/host/socket_iovec.go index b123a63ee..292b44c43 100644 --- a/pkg/sentry/fsimpl/host/socket_iovec.go +++ b/pkg/sentry/fsimpl/host/socket_iovec.go @@ -16,8 +16,8 @@ package host import ( "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/iovec" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/sentry/hostfd" ) // copyToMulti copies as many bytes from src to dst as possible. @@ -64,13 +64,13 @@ func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovec if length > maxlen { if truncate { stopLen = maxlen - err = syserror.EAGAIN + err = linuxerr.EAGAIN } else { - return 0, nil, nil, syserror.EMSGSIZE + return 0, nil, nil, linuxerr.EMSGSIZE } } - if iovsRequired > iovec.MaxIovs { + if iovsRequired > hostfd.MaxSendRecvMsgIov { // The kernel will reject our call if we pass this many iovs. // Use a single intermediate buffer instead. b := make([]byte, stopLen) diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go index 0f9e20a84..7f6ce4ee5 100644 --- a/pkg/sentry/fsimpl/host/tty.go +++ b/pkg/sentry/fsimpl/host/tty.go @@ -17,6 +17,7 @@ package host import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -147,7 +148,7 @@ func (t *TTYFileDescription) Write(ctx context.Context, src usermem.IOSequence, func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { task := kernel.TaskFromContext(ctx) if task == nil { - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } // Ignore arg[0]. This is the real FD: @@ -188,7 +189,7 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch pidns := kernel.PIDNamespaceFromContext(ctx) if pidns == nil { - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } t.mu.Lock() @@ -211,15 +212,15 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch if err := t.checkChange(ctx, linux.SIGTTOU); err != nil { // drivers/tty/tty_io.c:tiocspgrp() converts -EIO from tty_check_change() // to -ENOTTY. - if err == syserror.EIO { - return 0, syserror.ENOTTY + if linuxerr.Equals(linuxerr.EIO, err) { + return 0, linuxerr.ENOTTY } return 0, err } // Check that calling task's process group is in the TTY session. if task.ThreadGroup().Session() != t.session { - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } var pgIDP primitive.Int32 @@ -230,19 +231,19 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch // pgID must be non-negative. if pgID < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Process group with pgID must exist in this PID namespace. pidns := task.PIDNamespace() pg := pidns.ProcessGroupWithID(pgID) if pg == nil { - return 0, syserror.ESRCH + return 0, linuxerr.ESRCH } // Check that new process group is in the TTY session. if pg.Session() != t.session { - return 0, syserror.EPERM + return 0, linuxerr.EPERM } t.fgProcessGroup = pg @@ -302,7 +303,7 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch unimpl.EmitUnimplementedEvent(ctx) fallthrough default: - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } } diff --git a/pkg/sentry/fsimpl/host/util.go b/pkg/sentry/fsimpl/host/util.go index 63b465859..95d7ebe2e 100644 --- a/pkg/sentry/fsimpl/host/util.go +++ b/pkg/sentry/fsimpl/host/util.go @@ -17,7 +17,7 @@ package host import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ) func toTimespec(ts linux.StatxTimestamp, omit bool) unix.Timespec { @@ -44,5 +44,5 @@ func timespecToStatxTimestamp(ts unix.Timespec) linux.StatxTimestamp { // isBlockError checks if an error is EAGAIN or EWOULDBLOCK. // If so, they can be transformed into syserror.ErrWouldBlock. func isBlockError(err error) bool { - return err == syserror.EAGAIN || err == syserror.EWOULDBLOCK + return linuxerr.Equals(linuxerr.EAGAIN, err) || linuxerr.Equals(linuxerr.EWOULDBLOCK, err) } diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD index b7d13cced..d53937db6 100644 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ b/pkg/sentry/fsimpl/kernfs/BUILD @@ -104,6 +104,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/hostarch", "//pkg/log", @@ -135,6 +136,7 @@ go_test( ":kernfs", "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/log", "//pkg/refs", "//pkg/refsvfs2", diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go index 84b1c3745..9d7526e47 100644 --- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go +++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go @@ -19,9 +19,9 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -71,7 +71,7 @@ func (f *DynamicBytesFile) Open(ctx context.Context, rp *vfs.ResolvingPath, d *D // inode attributes to be changed. Override SetStat() making it call // f.InodeAttrs to allow it. func (*DynamicBytesFile) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM + return linuxerr.EPERM } // DynamicBytesFD implements vfs.FileDescriptionImpl for an FD backed by a @@ -137,5 +137,5 @@ func (fd *DynamicBytesFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux // SetStat implements vfs.FileDescriptionImpl.SetStat. func (fd *DynamicBytesFD) SetStat(context.Context, vfs.SetStatOptions) error { // DynamicBytesFiles are immutable. - return syserror.EPERM + return linuxerr.EPERM } diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go index e55111af0..8b008dc10 100644 --- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -248,10 +249,10 @@ func (fd *GenericDirectoryFD) Seek(ctx context.Context, offset int64, whence int panic(fmt.Sprintf("Invalid GenericDirectoryFD.seekEnd = %v", fd.seekEnd)) } default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } fd.off = offset return offset, nil diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 8fac53c60..a97473f7d 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" @@ -39,7 +40,7 @@ import ( // Postcondition: Caller must call fs.processDeferredDecRefs*. func (fs *Filesystem) stepExistingLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, mayFollowSymlinks bool) (*Dentry, error) { if !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } // Directory searchable? if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil { @@ -70,7 +71,7 @@ afterSymlink: return d.parent, nil } if len(name) > linux.NAME_MAX { - return nil, syserror.ENAMETOOLONG + return nil, linuxerr.ENAMETOOLONG } d.dirMu.Lock() next, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), d, name, d.children[name]) @@ -169,7 +170,7 @@ func (fs *Filesystem) walkExistingLocked(ctx context.Context, rp *vfs.ResolvingP } } if rp.MustBeDir() && !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } return d, nil } @@ -196,7 +197,7 @@ func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving } } if !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } return d, nil } @@ -214,13 +215,13 @@ func checkCreateLocked(ctx context.Context, creds *auth.Credentials, name string return err } if name == "." || name == ".." { - return syserror.EEXIST + return linuxerr.EEXIST } if len(name) > linux.NAME_MAX { - return syserror.ENAMETOOLONG + return linuxerr.ENAMETOOLONG } if _, ok := parent.children[name]; ok { - return syserror.EEXIST + return linuxerr.EEXIST } if parent.VFSDentry().IsDead() { return syserror.ENOENT @@ -237,7 +238,7 @@ func checkCreateLocked(ctx context.Context, creds *auth.Credentials, name string func checkDeleteLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry) error { parent := d.parent if parent == nil { - return syserror.EBUSY + return linuxerr.EBUSY } if parent.vfsd.IsDead() { return syserror.ENOENT @@ -317,7 +318,7 @@ func (fs *Filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op if opts.CheckSearchable { if !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil { return nil, err @@ -344,7 +345,7 @@ func (fs *Filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa // LinkAt implements vfs.FilesystemImpl.LinkAt. func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { if rp.Done() { - return syserror.EEXIST + return linuxerr.EEXIST } fs.mu.Lock() defer fs.processDeferredDecRefs(ctx) @@ -364,7 +365,7 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.ENOENT } if rp.Mount() != vd.Mount() { - return syserror.EXDEV + return linuxerr.EXDEV } if err := rp.Mount().CheckBeginWrite(); err != nil { return err @@ -373,7 +374,7 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. d := vd.Dentry().Impl().(*Dentry) if d.isDir() { - return syserror.EPERM + return linuxerr.EPERM } childI, err := parent.inode.NewLink(ctx, pc, d.inode) @@ -389,7 +390,7 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. // MkdirAt implements vfs.FilesystemImpl.MkdirAt. func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { if rp.Done() { - return syserror.EEXIST + return linuxerr.EEXIST } fs.mu.Lock() defer fs.processDeferredDecRefs(ctx) @@ -411,7 +412,7 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v defer rp.Mount().EndWrite() childI, err := parent.inode.NewDir(ctx, pc, opts) if err != nil { - if !opts.ForSyntheticMountpoint || err == syserror.EEXIST { + if !opts.ForSyntheticMountpoint || linuxerr.Equals(linuxerr.EEXIST, err) { return err } childI = newSyntheticDirectory(ctx, rp.Credentials(), opts.Mode) @@ -425,7 +426,7 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v // MknodAt implements vfs.FilesystemImpl.MknodAt. func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { if rp.Done() { - return syserror.EEXIST + return linuxerr.EEXIST } fs.mu.Lock() defer fs.processDeferredDecRefs(ctx) @@ -511,7 +512,7 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf return nil, syserror.EISDIR } if mustCreate { - return nil, syserror.EEXIST + return nil, linuxerr.EEXIST } if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { return nil, err @@ -542,11 +543,11 @@ afterTrailingSymlink: return nil, syserror.EISDIR } if len(pc) > linux.NAME_MAX { - return nil, syserror.ENAMETOOLONG + return nil, linuxerr.ENAMETOOLONG } // Determine whether or not we need to create a file. child, err := fs.stepExistingLocked(ctx, rp, parent, false /* mayFollowSymlinks */) - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { // Already checked for searchability above; now check for writability. if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil { return nil, err @@ -576,7 +577,7 @@ afterTrailingSymlink: } // Open existing file or follow symlink. if mustCreate { - return nil, syserror.EEXIST + return nil, linuxerr.EEXIST } if rp.ShouldFollowSymlink() && child.isSymlink() { targetVD, targetPathname, err := child.inode.Getlink(ctx, rp.Mount()) @@ -622,7 +623,7 @@ func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st } if !d.isSymlink() { fs.mu.RUnlock() - return "", syserror.EINVAL + return "", linuxerr.EINVAL } // Inode.Readlink() cannot be called holding fs locks. @@ -648,13 +649,13 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa // Only RENAME_NOREPLACE is supported. if opts.Flags&^linux.RENAME_NOREPLACE != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0 mnt := rp.Mount() if mnt != oldParentVD.Mount() { - return syserror.EXDEV + return linuxerr.EXDEV } if err := mnt.CheckBeginWrite(); err != nil { return err @@ -680,17 +681,19 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa newName := rp.Component() if newName == "." || newName == ".." { if noReplace { - return syserror.EEXIST + return linuxerr.EEXIST } - return syserror.EBUSY + return linuxerr.EBUSY } - switch err := checkCreateLocked(ctx, rp.Credentials(), newName, dstDir); err { - case nil: + + err = checkCreateLocked(ctx, rp.Credentials(), newName, dstDir) + switch { + case err == nil: // Ok, continue with rename as replacement. - case syserror.EEXIST: + case linuxerr.Equals(linuxerr.EEXIST, err): if noReplace { // Won't overwrite existing node since RENAME_NOREPLACE was requested. - return syserror.EEXIST + return linuxerr.EEXIST } dst = dstDir.children[newName] if dst == nil { @@ -749,7 +752,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa fs.deferDecRef(replaced) replaceVFSD = replaced.VFSDentry() } - virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD) + virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD) // +checklocksforce: to may be nil, that's okay. return nil } @@ -771,10 +774,10 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error return err } if !d.isDir() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } if d.inode.HasChildren() { - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } virtfs := rp.VirtualFilesystem() parentDentry := d.parent @@ -785,7 +788,7 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error defer mntns.DecRef(ctx) vfsd := d.VFSDentry() if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil { - return err + return err // +checklocksforce: vfsd is not locked. } if err := parentDentry.inode.RmDir(ctx, d.name, d.inode); err != nil { @@ -841,7 +844,7 @@ func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { if rp.Done() { - return syserror.EEXIST + return linuxerr.EEXIST } fs.mu.Lock() defer fs.processDeferredDecRefs(ctx) @@ -927,7 +930,7 @@ func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil { return nil, err } - return nil, syserror.ECONNREFUSED + return nil, linuxerr.ECONNREFUSED } // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. @@ -940,7 +943,7 @@ func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, si return nil, err } // kernfs currently does not support extended attributes. - return nil, syserror.ENOTSUP + return nil, linuxerr.ENOTSUP } // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. @@ -953,7 +956,7 @@ func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt return "", err } // kernfs currently does not support extended attributes. - return "", syserror.ENOTSUP + return "", linuxerr.ENOTSUP } // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. @@ -966,7 +969,7 @@ func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt return err } // kernfs currently does not support extended attributes. - return syserror.ENOTSUP + return linuxerr.ENOTSUP } // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. @@ -979,7 +982,7 @@ func (fs *Filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, return err } // kernfs currently does not support extended attributes. - return syserror.ENOTSUP + return linuxerr.ENOTSUP } // PrependPath implements vfs.FilesystemImpl.PrependPath. diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 3d0866ecf..a42fc79b4 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" @@ -61,27 +62,27 @@ type InodeDirectoryNoNewChildren struct{} // NewFile implements Inode.NewFile. func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (Inode, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } // NewDir implements Inode.NewDir. func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (Inode, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } // NewLink implements Inode.NewLink. func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (Inode, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } // NewSymlink implements Inode.NewSymlink. func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (Inode, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } // NewNode implements Inode.NewNode. func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (Inode, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } // InodeNotDirectory partially implements the Inode interface, specifically the @@ -158,12 +159,12 @@ type InodeNotSymlink struct{} // Readlink implements Inode.Readlink. func (InodeNotSymlink) Readlink(context.Context, *vfs.Mount) (string, error) { - return "", syserror.EINVAL + return "", linuxerr.EINVAL } // Getlink implements Inode.Getlink. func (InodeNotSymlink) Getlink(context.Context, *vfs.Mount) (vfs.VirtualDentry, string, error) { - return vfs.VirtualDentry{}, "", syserror.EINVAL + return vfs.VirtualDentry{}, "", linuxerr.EINVAL } // InodeAttrs partially implements the Inode interface, specifically the @@ -285,7 +286,7 @@ func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *aut // allowed by kernfs files but does not do anything. If some other behavior is // needed, the embedder should consider extending SetStat. if opts.Stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_SIZE) != 0 { - return syserror.EPERM + return linuxerr.EPERM } if opts.Stat.Mask&linux.STATX_SIZE != 0 && a.Mode().IsDir() { return syserror.EISDIR @@ -510,7 +511,7 @@ func (o *OrderedChildren) insert(name string, child Inode, static bool) error { o.mu.Lock() defer o.mu.Unlock() if _, ok := o.set[name]; ok { - return syserror.EEXIST + return linuxerr.EEXIST } s := &slot{ name: name, @@ -569,7 +570,7 @@ func (o *OrderedChildren) checkExistingLocked(name string, child Inode) error { // Unlink implements Inode.Unlink. func (o *OrderedChildren) Unlink(ctx context.Context, name string, child Inode) error { if !o.writable { - return syserror.EPERM + return linuxerr.EPERM } o.mu.Lock() defer o.mu.Unlock() @@ -599,15 +600,15 @@ func (o *OrderedChildren) RmDir(ctx context.Context, name string, child Inode) e // Postcondition: reference on any replaced dentry transferred to caller. func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error { if !o.writable { - return syserror.EPERM + return linuxerr.EPERM } dst, ok := dstDir.(interface{}).(*OrderedChildren) if !ok { - return syserror.EXDEV + return linuxerr.EXDEV } if !dst.writable { - return syserror.EPERM + return linuxerr.EPERM } // Note: There's a potential deadlock below if concurrent calls to Rename @@ -653,7 +654,7 @@ type InodeSymlink struct { // Open implements Inode.Open. func (InodeSymlink) Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - return nil, syserror.ELOOP + return nil, linuxerr.ELOOP } // StaticDirectory is a standard implementation of a directory with static @@ -709,7 +710,7 @@ func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, d *De // SetStat implements Inode.SetStat not allowing inode attributes to be changed. func (*StaticDirectory) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM + return linuxerr.EPERM } // DecRef implements Inode.DecRef. diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index 6f699c9cd..0e2867d49 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -52,7 +52,7 @@ // vfs.VirtualFilesystem.mountMu // vfs.Dentry.mu // (inode implementation locks, if any) -// kernfs.Filesystem.droppedDentriesMu +// kernfs.Filesystem.deferredDecRefsMu package kernfs import ( @@ -76,12 +76,12 @@ import ( type Filesystem struct { vfsfs vfs.Filesystem - droppedDentriesMu sync.Mutex `state:"nosave"` + deferredDecRefsMu sync.Mutex `state:"nosave"` - // droppedDentries is a list of dentries waiting to be DecRef()ed. This is + // deferredDecRefs is a list of dentries waiting to be DecRef()ed. This is // used to defer dentry destruction until mu can be acquired for - // writing. Protected by droppedDentriesMu. - droppedDentries []*Dentry + // writing. Protected by deferredDecRefsMu. + deferredDecRefs []refsvfs2.RefCounter // mu synchronizes the lifetime of Dentries on this filesystem. Holding it // for reading guarantees continued existence of any resolved dentries, but @@ -131,25 +131,49 @@ type Filesystem struct { // deferDecRef defers dropping a dentry ref until the next call to // processDeferredDecRefs{,Locked}. See comment on Filesystem.mu. // This may be called while Filesystem.mu or Dentry.dirMu is locked. -func (fs *Filesystem) deferDecRef(d *Dentry) { - fs.droppedDentriesMu.Lock() - fs.droppedDentries = append(fs.droppedDentries, d) - fs.droppedDentriesMu.Unlock() +func (fs *Filesystem) deferDecRef(d refsvfs2.RefCounter) { + fs.deferredDecRefsMu.Lock() + fs.deferredDecRefs = append(fs.deferredDecRefs, d) + fs.deferredDecRefsMu.Unlock() +} + +// SafeDecRefFD safely DecRef the FileDescription making sure DecRef is deferred +// in case Filesystem.mu is held. See comment on Filesystem.mu. +func (fs *Filesystem) SafeDecRefFD(ctx context.Context, fd *vfs.FileDescription) { + if d, ok := fd.Dentry().Impl().(*Dentry); ok && d.fs == fs { + // Only defer if dentry belongs to this filesystem, since locks cannot cross + // filesystems. + fs.deferDecRef(fd) + return + } + fd.DecRef(ctx) +} + +// SafeDecRef safely DecRef the virtual dentry making sure DecRef is deferred +// in case Filesystem.mu is held. See comment on Filesystem.mu. +func (fs *Filesystem) SafeDecRef(ctx context.Context, vd vfs.VirtualDentry) { + if d, ok := vd.Dentry().Impl().(*Dentry); ok && d.fs == fs { + // Only defer if dentry belongs to this filesystem, since locks cannot cross + // filesystems. + fs.deferDecRef(&vd) + return + } + vd.DecRef(ctx) } // processDeferredDecRefs calls vfs.Dentry.DecRef on all dentries in the -// droppedDentries list. See comment on Filesystem.mu. +// deferredDecRefs list. See comment on Filesystem.mu. // // Precondition: Filesystem.mu or Dentry.dirMu must NOT be locked. func (fs *Filesystem) processDeferredDecRefs(ctx context.Context) { - fs.droppedDentriesMu.Lock() - for _, d := range fs.droppedDentries { - // Defer the DecRef call so that we are not holding droppedDentriesMu + fs.deferredDecRefsMu.Lock() + for _, d := range fs.deferredDecRefs { + // Defer the DecRef call so that we are not holding deferredDecRefsMu // when DecRef is called. defer d.DecRef(ctx) } - fs.droppedDentries = fs.droppedDentries[:0] // Keep slice memory for reuse. - fs.droppedDentriesMu.Unlock() + fs.deferredDecRefs = fs.deferredDecRefs[:0] // Keep slice memory for reuse. + fs.deferredDecRefsMu.Unlock() } // VFSFilesystem returns the generic vfs filesystem object. diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go index 1cd3137e6..609887943 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go @@ -22,12 +22,12 @@ import ( "github.com/google/go-cmp/cmp" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -94,7 +94,7 @@ type attrs struct { } func (*attrs) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM + return linuxerr.EPERM } type readonlyDir struct { @@ -196,15 +196,15 @@ func (d *dir) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (k } func (*dir) NewLink(context.Context, string, kernfs.Inode) (kernfs.Inode, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } func (*dir) NewSymlink(context.Context, string, string) (kernfs.Inode, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } func (*dir) NewNode(context.Context, string, vfs.MknodOptions) (kernfs.Inode, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } func (fsType) Name() string { @@ -318,10 +318,10 @@ func TestDirFDReadWrite(t *testing.T) { defer fd.DecRef(sys.Ctx) // Read/Write should fail for directory FDs. - if _, err := fd.Read(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.ReadOptions{}); err != syserror.EISDIR { + if _, err := fd.Read(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.ReadOptions{}); !linuxerr.Equals(linuxerr.EISDIR, err) { t.Fatalf("Read for directory FD failed with unexpected error: %v", err) } - if _, err := fd.Write(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.WriteOptions{}); err != syserror.EBADF { + if _, err := fd.Write(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.WriteOptions{}); !linuxerr.Equals(linuxerr.EBADF, err) { t.Fatalf("Write for directory FD failed with unexpected error: %v", err) } } diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go index a0736c0d6..4adf76ce6 100644 --- a/pkg/sentry/fsimpl/kernfs/symlink.go +++ b/pkg/sentry/fsimpl/kernfs/symlink.go @@ -17,9 +17,9 @@ package kernfs import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // StaticSymlink provides an Inode implementation for symlinks that point to @@ -62,5 +62,5 @@ func (s *StaticSymlink) Getlink(context.Context, *vfs.Mount) (vfs.VirtualDentry, // SetStat implements Inode.SetStat not allowing inode attributes to be changed. func (*StaticSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM + return linuxerr.EPERM } diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go index 11694c392..c91d23b56 100644 --- a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go +++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go @@ -19,9 +19,9 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // syntheticDirectory implements kernfs.Inode for a directory created by @@ -65,13 +65,13 @@ func (dir *syntheticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, // NewFile implements Inode.NewFile. func (dir *syntheticDirectory) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (Inode, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } // NewDir implements Inode.NewDir. func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (Inode, error) { if !opts.ForSyntheticMountpoint { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } subdirI := newSyntheticDirectory(ctx, auth.CredentialsFromContext(ctx), opts.Mode&linux.PermissionsMask) if err := dir.OrderedChildren.Insert(name, subdirI); err != nil { @@ -84,17 +84,17 @@ func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs // NewLink implements Inode.NewLink. func (dir *syntheticDirectory) NewLink(ctx context.Context, name string, target Inode) (Inode, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } // NewSymlink implements Inode.NewSymlink. func (dir *syntheticDirectory) NewSymlink(ctx context.Context, name, target string) (Inode, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } // NewNode implements Inode.NewNode. func (dir *syntheticDirectory) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (Inode, error) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } // DecRef implements Inode.DecRef. diff --git a/pkg/sentry/fsimpl/overlay/BUILD b/pkg/sentry/fsimpl/overlay/BUILD index 5504476c8..ed730e215 100644 --- a/pkg/sentry/fsimpl/overlay/BUILD +++ b/pkg/sentry/fsimpl/overlay/BUILD @@ -29,6 +29,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/hostarch", "//pkg/log", diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go index 45aa5a494..1f85a1f0d 100644 --- a/pkg/sentry/fsimpl/overlay/copy_up.go +++ b/pkg/sentry/fsimpl/overlay/copy_up.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -51,13 +52,13 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { // Can be copied-up. default: // Can't be copied-up. - return syserror.EPERM + return linuxerr.EPERM } // Ensure that our parent directory is copied-up. if d.parent == nil { // d is a filesystem root with no upper layer. - return syserror.EROFS + return linuxerr.EROFS } if err := d.parent.copyUpLocked(ctx); err != nil { return err @@ -271,7 +272,7 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { } if upperStat.Mask&linux.STATX_INO == 0 { cleanupUndoCopyUp() - return syserror.EREMOTE + return linuxerr.EREMOTE } atomic.StoreUint32(&d.devMajor, upperStat.DevMajor) atomic.StoreUint32(&d.devMinor, upperStat.DevMinor) @@ -349,7 +350,7 @@ func (d *dentry) copyXattrsLocked(ctx context.Context) error { lowerXattrs, err := vfsObj.ListXattrAt(ctx, d.fs.creds, lowerPop, 0) if err != nil { - if err == syserror.EOPNOTSUPP { + if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) { // There are no guarantees as to the contents of lowerXattrs. return nil } diff --git a/pkg/sentry/fsimpl/overlay/directory.go b/pkg/sentry/fsimpl/overlay/directory.go index df4492346..ad3cdbb56 100644 --- a/pkg/sentry/fsimpl/overlay/directory.go +++ b/pkg/sentry/fsimpl/overlay/directory.go @@ -19,10 +19,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) func (d *dentry) isDir() bool { @@ -69,7 +69,7 @@ func (d *dentry) collectWhiteoutsForRmdirLocked(ctx context.Context) (map[string return nil } // Non-whiteout file in the directory prevents rmdir. - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY })) if err != nil { readdirErr = err @@ -88,7 +88,7 @@ func (d *dentry) collectWhiteoutsForRmdirLocked(ctx context.Context) (map[string } if stat.RdevMajor != 0 || stat.RdevMinor != 0 { // This file is a real character device, not a whiteout. - readdirErr = syserror.ENOTEMPTY + readdirErr = linuxerr.ENOTEMPTY return false } whiteouts[maybeWhiteoutName] = isUpper @@ -256,7 +256,7 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in switch whence { case linux.SEEK_SET: if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if offset == 0 { // Ensure that the next call to fd.IterDirents() calls @@ -268,13 +268,13 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in case linux.SEEK_CUR: offset += fd.off if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Don't clear fd.dirents in this case, even if offset == 0. fd.off = offset return fd.off, nil default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } } diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go index 6b6fa0bd5..5e89928c5 100644 --- a/pkg/sentry/fsimpl/overlay/filesystem.go +++ b/pkg/sentry/fsimpl/overlay/filesystem.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -86,7 +87,7 @@ func putDentrySlice(ds *[]*dentry) { // fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() { // fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this. // -// +checklocks:fs.renameMu +// +checklocksrelease:fs.renameMu func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, dsp **[]*dentry) { fs.renameMu.RUnlock() if *dsp == nil { @@ -112,7 +113,7 @@ func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, dsp **[]* putDentrySlice(*dsp) } -// +checklocks:fs.renameMu +// +checklocksrelease:fs.renameMu func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) { if *ds == nil { fs.renameMu.Unlock() @@ -137,7 +138,7 @@ func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*de // * !rp.Done(). func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, lookupLayer, error) { if !d.isDir() { - return nil, lookupLayerNone, syserror.ENOTDIR + return nil, lookupLayerNone, linuxerr.ENOTDIR } if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { return nil, lookupLayerNone, err @@ -218,7 +219,7 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str Start: parentVD, Path: childPath, }, &vfs.GetDentryOptions{}) - if err == syserror.ENOENT || err == syserror.ENAMETOOLONG { + if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENAMETOOLONG, err) { // The file doesn't exist on this layer. Proceed to the next one. return true } @@ -245,7 +246,7 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str return false } if stat.Mask&mask != mask { - lookupErr = syserror.EREMOTE + lookupErr = linuxerr.EREMOTE return false } @@ -352,7 +353,7 @@ func (fs *filesystem) lookupLayerLocked(ctx context.Context, parent *dentry, nam }, &vfs.StatOptions{ Mask: linux.STATX_TYPE, }) - if err == syserror.ENOENT || err == syserror.ENAMETOOLONG { + if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENAMETOOLONG, err) { // The file doesn't exist on this layer. Proceed to the next // one. return true @@ -365,7 +366,7 @@ func (fs *filesystem) lookupLayerLocked(ctx context.Context, parent *dentry, nam // Linux's overlayfs tends to return EREMOTE in cases where a file // is unusable for reasons that are not better captured by another // errno. - lookupErr = syserror.EREMOTE + lookupErr = linuxerr.EREMOTE return false } if isWhiteout(&stat) { @@ -437,7 +438,7 @@ func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving d = next } if !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } return d, nil } @@ -457,7 +458,7 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, d = next } if rp.MustBeDir() && !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } return d, nil } @@ -479,7 +480,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir } name := rp.Component() if name == "." || name == ".." { - return syserror.EEXIST + return linuxerr.EEXIST } if parent.vfsd.IsDead() { return syserror.ENOENT @@ -494,14 +495,14 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir // Determine if a file already exists at name. if _, ok := parent.children[name]; ok { - return syserror.EEXIST + return linuxerr.EEXIST } childLayer, err := fs.lookupLayerLocked(ctx, parent, name) if err != nil { return err } if childLayer.existsInOverlay() { - return syserror.EEXIST + return linuxerr.EEXIST } if !dir && rp.MustBeDir() { @@ -592,7 +593,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op } if opts.CheckSearchable { if !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { return nil, err @@ -620,11 +621,11 @@ func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error { if rp.Mount() != vd.Mount() { - return syserror.EXDEV + return linuxerr.EXDEV } old := vd.Dentry().Impl().(*dentry) if old.isDir() { - return syserror.EPERM + return linuxerr.EPERM } if err := old.copyUpLocked(ctx); err != nil { return err @@ -725,7 +726,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error { // Disallow attempts to create whiteouts. if opts.Mode&linux.S_IFMT == linux.S_IFCHR && opts.DevMajor == 0 && opts.DevMinor == 0 { - return syserror.EPERM + return linuxerr.EPERM } vfsObj := fs.vfsfs.VirtualFilesystem() pop := vfs.PathOperation{ @@ -782,7 +783,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf return nil, syserror.EISDIR } if mustCreate { - return nil, syserror.EEXIST + return nil, linuxerr.EEXIST } if start.isRegularFile() && mayWrite { if err := start.copyUpLocked(ctx); err != nil { @@ -811,7 +812,7 @@ afterTrailingSymlink: // Determine whether or not we need to create a file. parent.dirMu.Lock() child, topLookupLayer, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds) - if err == syserror.ENOENT && mayCreate { + if linuxerr.Equals(linuxerr.ENOENT, err) && mayCreate { fd, err := fs.createAndOpenLocked(ctx, rp, parent, &opts, &ds, topLookupLayer == lookupLayerUpperWhiteout) parent.dirMu.Unlock() return fd, err @@ -822,7 +823,7 @@ afterTrailingSymlink: } // Open existing child or follow symlink. if mustCreate { - return nil, syserror.EEXIST + return nil, linuxerr.EEXIST } if child.isSymlink() && rp.ShouldFollowSymlink() { target, err := child.readlink(ctx) @@ -836,7 +837,7 @@ afterTrailingSymlink: goto afterTrailingSymlink } if rp.MustBeDir() && !child.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } if child.isRegularFile() && mayWrite { if err := child.copyUpLocked(ctx); err != nil { @@ -871,7 +872,7 @@ func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts * return nil, syserror.EISDIR } if opts.Flags&linux.O_DIRECT != 0 { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } fd := &directoryFD{} fd.LockFD.Init(&d.locks) @@ -1027,19 +1028,19 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } if opts.Flags&^linux.RENAME_NOREPLACE != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } newName := rp.Component() if newName == "." || newName == ".." { if opts.Flags&linux.RENAME_NOREPLACE != 0 { - return syserror.EEXIST + return linuxerr.EEXIST } - return syserror.EBUSY + return linuxerr.EBUSY } mnt := rp.Mount() if mnt != oldParentVD.Mount() { - return syserror.EXDEV + return linuxerr.EXDEV } if err := mnt.CheckBeginWrite(); err != nil { return err @@ -1064,7 +1065,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } if renamed.isDir() { if renamed == newParent || genericIsAncestorDentry(renamed, newParent) { - return syserror.EINVAL + return linuxerr.EINVAL } if oldParent != newParent { if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil { @@ -1073,7 +1074,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } } else { if opts.MustBeDir || rp.MustBeDir() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } } @@ -1094,12 +1095,12 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa whiteouts map[string]bool ) replaced, replacedLayer, err = fs.getChildLocked(ctx, newParent, newName, &ds) - if err != nil && err != syserror.ENOENT { + if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) { return err } if replaced != nil { if opts.Flags&linux.RENAME_NOREPLACE != 0 { - return syserror.EEXIST + return linuxerr.EEXIST } replacedVFSD = &replaced.vfsd if replaced.isDir() { @@ -1107,7 +1108,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa return syserror.EISDIR } if genericIsAncestorDentry(replaced, renamed) { - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } replaced.dirMu.Lock() defer replaced.dirMu.Unlock() @@ -1117,7 +1118,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } } else { if rp.MustBeDir() || renamed.isDir() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } } } @@ -1177,7 +1178,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa Root: replaced.upperVD, Start: replaced.upperVD, Path: fspath.Parse(whiteoutName), - }); err != nil && err != syserror.EEXIST { + }); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) { panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to recreate deleted whiteout after RenameAt failure: %v", err)) } } @@ -1285,10 +1286,10 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error defer rp.Mount().EndWrite() name := rp.Component() if name == "." { - return syserror.EINVAL + return linuxerr.EINVAL } if name == ".." { - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } vfsObj := rp.VirtualFilesystem() mntns := vfs.MountNamespaceFromContext(ctx) @@ -1309,7 +1310,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error return err } if !child.isDir() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } if err := parent.mayDelete(rp.Credentials(), child); err != nil { return err @@ -1344,7 +1345,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error Root: child.upperVD, Start: child.upperVD, Path: fspath.Parse(whiteoutName), - }); err != nil && err != syserror.EEXIST { + }); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) { panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to recreate deleted whiteout after RmdirAt failure: %v", err)) } } @@ -1535,7 +1536,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error return syserror.EISDIR } if rp.MustBeDir() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } vfsObj := rp.VirtualFilesystem() mntns := vfs.MountNamespaceFromContext(ctx) @@ -1658,7 +1659,7 @@ func (fs *filesystem) getXattr(ctx context.Context, d *dentry, creds *auth.Crede // Return EOPNOTSUPP when fetching an overlay attribute. // See fs/overlayfs/super.c:ovl_own_xattr_get(). if isOverlayXattr(opts.Name) { - return "", syserror.EOPNOTSUPP + return "", linuxerr.EOPNOTSUPP } // Analogous to fs/overlayfs/super.c:ovl_other_xattr_get(). @@ -1696,7 +1697,7 @@ func (fs *filesystem) setXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mo // Return EOPNOTSUPP when setting an overlay attribute. // See fs/overlayfs/super.c:ovl_own_xattr_set(). if isOverlayXattr(opts.Name) { - return syserror.EOPNOTSUPP + return linuxerr.EOPNOTSUPP } // Analogous to fs/overlayfs/super.c:ovl_other_xattr_set(). @@ -1741,7 +1742,7 @@ func (fs *filesystem) removeXattrLocked(ctx context.Context, d *dentry, mnt *vfs // Linux passes the remove request to xattr_handler->set. // See fs/xattr.c:vfs_removexattr(). if isOverlayXattr(name) { - return syserror.EOPNOTSUPP + return linuxerr.EOPNOTSUPP } if err := mnt.CheckBeginWrite(); err != nil { diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go index 454c20d4f..46d9f1f1d 100644 --- a/pkg/sentry/fsimpl/overlay/overlay.go +++ b/pkg/sentry/fsimpl/overlay/overlay.go @@ -40,13 +40,13 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/refsvfs2" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // Name is the default filesystem name. @@ -135,7 +135,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt fsopts, ok := fsoptsRaw.(FilesystemOptions) if fsoptsRaw != nil && !ok { ctx.Infof("overlay.FilesystemType.GetFilesystem: GetFilesystemOptions.InternalData has type %T, wanted overlay.FilesystemOptions or nil", fsoptsRaw) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } vfsroot := vfs.RootFromContext(ctx) if vfsroot.Ok() { @@ -145,7 +145,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt if upperPathname, ok := mopts["upperdir"]; ok { if fsopts.UpperRoot.Ok() { ctx.Infof("overlay.FilesystemType.GetFilesystem: both upperdir and FilesystemOptions.UpperRoot are specified") - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } delete(mopts, "upperdir") // Linux overlayfs also requires a workdir when upperdir is @@ -154,7 +154,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt upperPath := fspath.Parse(upperPathname) if !upperPath.Absolute { ctx.Infof("overlay.FilesystemType.GetFilesystem: upperdir %q must be absolute", upperPathname) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } upperRoot, err := vfsObj.GetDentryAt(ctx, creds, &vfs.PathOperation{ Root: vfsroot, @@ -181,7 +181,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt if lowerPathnamesStr, ok := mopts["lowerdir"]; ok { if len(fsopts.LowerRoots) != 0 { ctx.Infof("overlay.FilesystemType.GetFilesystem: both lowerdir and FilesystemOptions.LowerRoots are specified") - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } delete(mopts, "lowerdir") lowerPathnames := strings.Split(lowerPathnamesStr, ":") @@ -189,7 +189,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt lowerPath := fspath.Parse(lowerPathname) if !lowerPath.Absolute { ctx.Infof("overlay.FilesystemType.GetFilesystem: lowerdir %q must be absolute", lowerPathname) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } lowerRoot, err := vfsObj.GetDentryAt(ctx, creds, &vfs.PathOperation{ Root: vfsroot, @@ -216,21 +216,21 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt if len(mopts) != 0 { ctx.Infof("overlay.FilesystemType.GetFilesystem: unused options: %v", mopts) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } if len(fsopts.LowerRoots) == 0 { ctx.Infof("overlay.FilesystemType.GetFilesystem: at least one lower layer is required") - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } if len(fsopts.LowerRoots) < 2 && !fsopts.UpperRoot.Ok() { ctx.Infof("overlay.FilesystemType.GetFilesystem: at least two lower layers are required when no upper layer is present") - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } const maxLowerLayers = 500 // Linux: fs/overlay/super.c:OVL_MAX_STACK if len(fsopts.LowerRoots) > maxLowerLayers { ctx.Infof("overlay.FilesystemType.GetFilesystem: %d lower layers specified, maximum %d", len(fsopts.LowerRoots), maxLowerLayers) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } // Take extra references held by the filesystem. @@ -277,13 +277,13 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt if rootStat.Mask&rootStatMask != rootStatMask { root.destroyLocked(ctx) fs.vfsfs.DecRef(ctx) - return nil, nil, syserror.EREMOTE + return nil, nil, linuxerr.EREMOTE } if isWhiteout(&rootStat) { ctx.Infof("overlay.FilesystemType.GetFilesystem: filesystem root is a whiteout") root.destroyLocked(ctx) fs.vfsfs.DecRef(ctx) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } root.mode = uint32(rootStat.Mode) root.uid = rootStat.UID diff --git a/pkg/sentry/fsimpl/overlay/regular_file.go b/pkg/sentry/fsimpl/overlay/regular_file.go index 82491a0f8..156ffeaeb 100644 --- a/pkg/sentry/fsimpl/overlay/regular_file.go +++ b/pkg/sentry/fsimpl/overlay/regular_file.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -26,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -415,7 +415,7 @@ func (fd *regularFileFD) ensureMappable(ctx context.Context, opts *memmap.MMapOp // Only permit mmap of regular files, since other file types may have // unpredictable behavior when mmapped (e.g. /dev/zero). if atomic.LoadUint32(&d.mode)&linux.S_IFMT != linux.S_IFREG { - return syserror.ENODEV + return linuxerr.ENODEV } // Get a Mappable for the current top layer. diff --git a/pkg/sentry/fsimpl/pipefs/BUILD b/pkg/sentry/fsimpl/pipefs/BUILD index 278ee3c92..a50510031 100644 --- a/pkg/sentry/fsimpl/pipefs/BUILD +++ b/pkg/sentry/fsimpl/pipefs/BUILD @@ -9,6 +9,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/hostarch", "//pkg/sentry/fsimpl/kernfs", @@ -16,6 +17,5 @@ go_library( "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/time", "//pkg/sentry/vfs", - "//pkg/syserror", ], ) diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go index 08aedc2ad..af09195a7 100644 --- a/pkg/sentry/fsimpl/pipefs/pipefs.go +++ b/pkg/sentry/fsimpl/pipefs/pipefs.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" @@ -28,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // +stateify savable @@ -152,7 +152,7 @@ func (i *inode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth. if opts.Stat.Mask == 0 { return nil } - return syserror.EPERM + return linuxerr.EPERM } // Open implements kernfs.Inode.Open. diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index 2b628bd55..1d3d2d95f 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -81,6 +81,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/refs", @@ -119,6 +120,7 @@ go_test( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/sentry/contexttest", "//pkg/sentry/fsimpl/testutil", @@ -127,7 +129,6 @@ go_test( "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go index ce8f55b1f..f2697c12d 100644 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ b/pkg/sentry/fsimpl/proc/filesystem.go @@ -21,11 +21,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) const ( @@ -76,7 +76,7 @@ func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualF maxCachedDentries, err = strconv.ParseUint(str, 10, 64) if err != nil { ctx.Warningf("proc.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } } diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go index c53cc0122..d99f90b36 100644 --- a/pkg/sentry/fsimpl/proc/subtasks.go +++ b/pkg/sentry/fsimpl/proc/subtasks.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -180,7 +181,7 @@ func (i *subtasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. func (*subtasksInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM + return linuxerr.EPERM } // DecRef implements kernfs.Inode.DecRef. diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index d05cc1508..cbbc0935a 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -20,12 +20,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // taskInode represents the inode for /proc/PID/ directory. @@ -49,7 +49,7 @@ var _ kernfs.Inode = (*taskInode)(nil) func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, fakeCgroupControllers map[string]string) (kernfs.Inode, error) { if task.ExitState() == kernel.TaskExitDead { - return nil, syserror.ESRCH + return nil, linuxerr.ESRCH } contents := map[string]kernfs.Inode{ @@ -65,8 +65,8 @@ func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns "io": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0400, newIO(task, isThreadGroup)), "maps": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mapsData{task: task}), "mem": fs.newMemInode(ctx, task, fs.NextIno(), 0400), - "mountinfo": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mountInfoData{task: task}), - "mounts": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mountsData{task: task}), + "mountinfo": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mountInfoData{fs: fs, task: task}), + "mounts": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mountsData{fs: fs, task: task}), "net": fs.newTaskNetDir(ctx, task), "ns": fs.newTaskOwnedDir(ctx, task, fs.NextIno(), 0511, map[string]kernfs.Inode{ "net": fs.newNamespaceSymlink(ctx, task, fs.NextIno(), "net"), @@ -124,7 +124,7 @@ func (i *taskInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.D // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. func (*taskInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM + return linuxerr.EPERM } // DecRef implements kernfs.Inode.DecRef. diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go index 4718fac7a..dfc0a924e 100644 --- a/pkg/sentry/fsimpl/proc/task_fds.go +++ b/pkg/sentry/fsimpl/proc/task_fds.go @@ -42,12 +42,12 @@ func getTaskFD(t *kernel.Task, fd int32) (*vfs.FileDescription, kernel.FDFlags) return file, flags } -func taskFDExists(ctx context.Context, t *kernel.Task, fd int32) bool { +func taskFDExists(ctx context.Context, fs *filesystem, t *kernel.Task, fd int32) bool { file, _ := getTaskFD(t, fd) if file == nil { return false } - file.DecRef(ctx) + fs.SafeDecRefFD(ctx, file) return true } @@ -145,7 +145,7 @@ func (i *fdDirInode) Lookup(ctx context.Context, name string) (kernfs.Inode, err return nil, syserror.ENOENT } fd := int32(fdInt) - if !taskFDExists(ctx, i.task, fd) { + if !taskFDExists(ctx, i.fs, i.task, fd) { return nil, syserror.ENOENT } return i.fs.newFDSymlink(ctx, i.task, fd, i.fs.NextIno()), nil @@ -198,6 +198,7 @@ type fdSymlink struct { kernfs.InodeNoopRefCount kernfs.InodeSymlink + fs *filesystem task *kernel.Task fd int32 } @@ -206,6 +207,7 @@ var _ kernfs.Inode = (*fdSymlink)(nil) func (fs *filesystem) newFDSymlink(ctx context.Context, task *kernel.Task, fd int32, ino uint64) kernfs.Inode { inode := &fdSymlink{ + fs: fs, task: task, fd: fd, } @@ -218,9 +220,9 @@ func (s *fdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) if file == nil { return "", syserror.ENOENT } - defer file.DecRef(ctx) + defer s.fs.SafeDecRefFD(ctx, file) root := vfs.RootFromContext(ctx) - defer root.DecRef(ctx) + defer s.fs.SafeDecRef(ctx, root) // Note: it's safe to reenter kernfs from Readlink if needed to resolve path. return s.task.Kernel().VFS().PathnameWithDeleted(ctx, root, file.VirtualDentry()) @@ -231,7 +233,7 @@ func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDen if file == nil { return vfs.VirtualDentry{}, "", syserror.ENOENT } - defer file.DecRef(ctx) + defer s.fs.SafeDecRefFD(ctx, file) vd := file.VirtualDentry() vd.IncRef() return vd, "", nil @@ -239,7 +241,7 @@ func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDen // Valid implements kernfs.Inode.Valid. func (s *fdSymlink) Valid(ctx context.Context) bool { - return taskFDExists(ctx, s.task, s.fd) + return taskFDExists(ctx, s.fs, s.task, s.fd) } // fdInfoDirInode represents the inode for /proc/[pid]/fdinfo directory. @@ -279,10 +281,11 @@ func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (kernfs.Inode, return nil, syserror.ENOENT } fd := int32(fdInt) - if !taskFDExists(ctx, i.task, fd) { + if !taskFDExists(ctx, i.fs, i.task, fd) { return nil, syserror.ENOENT } data := &fdInfoData{ + fs: i.fs, task: i.task, fd: fd, } @@ -316,6 +319,7 @@ func (i *fdInfoDirInode) DecRef(ctx context.Context) { type fdInfoData struct { kernfs.DynamicBytesFile + fs *filesystem task *kernel.Task fd int32 } @@ -328,7 +332,7 @@ func (d *fdInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { if file == nil { return syserror.ENOENT } - defer file.DecRef(ctx) + defer d.fs.SafeDecRefFD(ctx, file) // TODO(b/121266871): Include pos, locks, and other data. For now we only // have flags. // See https://www.kernel.org/doc/Documentation/filesystems/proc.txt @@ -339,5 +343,5 @@ func (d *fdInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { // Valid implements kernfs.Inode.Valid. func (d *fdInfoData) Valid(ctx context.Context) bool { - return taskFDExists(ctx, d.task, d.fd) + return taskFDExists(ctx, d.fs, d.task, d.fd) } diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index b294dfd6a..5bb6bc372 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fsbridge" @@ -70,9 +71,9 @@ func getMMIncRef(task *kernel.Task) (*mm.MemoryManager, error) { func checkTaskState(t *kernel.Task) error { switch t.ExitState() { case kernel.TaskExitZombie: - return syserror.EACCES + return linuxerr.EACCES case kernel.TaskExitDead: - return syserror.ESRCH + return linuxerr.ESRCH } return nil } @@ -109,7 +110,7 @@ var _ dynamicInode = (*auxvData)(nil) // Generate implements vfs.DynamicBytesSource.Generate. func (d *auxvData) Generate(ctx context.Context, buf *bytes.Buffer) error { if d.task.ExitState() == kernel.TaskExitDead { - return syserror.ESRCH + return linuxerr.ESRCH } m, err := getMMIncRef(d.task) if err != nil { @@ -159,7 +160,7 @@ var _ dynamicInode = (*cmdlineData)(nil) // Generate implements vfs.DynamicBytesSource.Generate. func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error { if d.task.ExitState() == kernel.TaskExitDead { - return syserror.ESRCH + return linuxerr.ESRCH } m, err := getMMIncRef(d.task) if err != nil { @@ -227,7 +228,7 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error { if int(arEnvv.Length()) > remaining { end, ok := arEnvv.Start.AddLength(uint64(remaining)) if !ok { - return syserror.EFAULT + return linuxerr.EFAULT } arEnvv.End = end } @@ -325,7 +326,7 @@ func (d *idMapData) Write(ctx context.Context, src usermem.IOSequence, offset in // the file ..." - user_namespaces(7) srclen := src.NumBytes() if srclen >= hostarch.PageSize || offset != 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } b := make([]byte, srclen) if _, err := src.CopyIn(ctx, b); err != nil { @@ -345,7 +346,7 @@ func (d *idMapData) Write(ctx context.Context, src usermem.IOSequence, offset in } lines := bytes.SplitN(b, []byte("\n"), maxIDMapLines+1) if len(lines) > maxIDMapLines { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } entries := make([]auth.IDMapEntry, len(lines)) @@ -353,7 +354,7 @@ func (d *idMapData) Write(ctx context.Context, src usermem.IOSequence, offset in var e auth.IDMapEntry _, err := fmt.Sscan(string(l), &e.FirstID, &e.FirstParentID, &e.Length) if err != nil { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } entries[i] = e } @@ -408,7 +409,7 @@ func (f *memInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.De // Permission to read this file is governed by PTRACE_MODE_ATTACH_FSCREDS // Since we dont implement setfsuid/setfsgid we can just use PTRACE_MODE_ATTACH if !kernel.ContextCanTrace(ctx, f.task, true) { - return nil, syserror.EACCES + return nil, linuxerr.EACCES } if err := checkTaskState(f.task); err != nil { return nil, err @@ -422,7 +423,7 @@ func (f *memInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.De // SetStat implements kernfs.Inode.SetStat. func (*memInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM + return linuxerr.EPERM } var _ vfs.FileDescriptionImpl = (*memFD)(nil) @@ -461,10 +462,10 @@ func (fd *memFD) Seek(ctx context.Context, offset int64, whence int32) (int64, e case linux.SEEK_CUR: offset += fd.offset default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } fd.offset = offset return offset, nil @@ -485,7 +486,7 @@ func (fd *memFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64 n, readErr := m.CopyIn(ctx, hostarch.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true}) if n > 0 { if _, err := dst.CopyOut(ctx, buf[:n]); err != nil { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } return int64(n), nil } @@ -512,7 +513,7 @@ func (fd *memFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, e // SetStat implements vfs.FileDescriptionImpl.SetStat. func (fd *memFD) SetStat(context.Context, vfs.SetStatOptions) error { - return syserror.EPERM + return linuxerr.EPERM } // Release implements vfs.FileDescriptionImpl.Release. @@ -762,7 +763,7 @@ var _ vfs.WritableDynamicBytesSource = (*oomScoreAdj)(nil) // Generate implements vfs.DynamicBytesSource.Generate. func (o *oomScoreAdj) Generate(ctx context.Context, buf *bytes.Buffer) error { if o.task.ExitState() == kernel.TaskExitDead { - return syserror.ESRCH + return linuxerr.ESRCH } fmt.Fprintf(buf, "%d\n", o.task.OOMScoreAdj()) return nil @@ -784,7 +785,7 @@ func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset } if o.task.ExitState() == kernel.TaskExitDead { - return 0, syserror.ESRCH + return 0, linuxerr.ESRCH } if err := o.task.SetOOMScoreAdj(v); err != nil { return 0, err @@ -802,13 +803,17 @@ type exeSymlink struct { kernfs.InodeNoopRefCount kernfs.InodeSymlink + fs *filesystem task *kernel.Task } var _ kernfs.Inode = (*exeSymlink)(nil) func (fs *filesystem) newExeSymlink(ctx context.Context, task *kernel.Task, ino uint64) kernfs.Inode { - inode := &exeSymlink{task: task} + inode := &exeSymlink{ + fs: fs, + task: task, + } inode.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777) return inode } @@ -819,14 +824,14 @@ func (s *exeSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) if err != nil { return "", err } - defer exec.DecRef(ctx) + defer s.fs.SafeDecRef(ctx, exec) root := vfs.RootFromContext(ctx) if !root.Ok() { // It could have raced with process deletion. - return "", syserror.ESRCH + return "", linuxerr.ESRCH } - defer root.DecRef(ctx) + defer s.fs.SafeDecRef(ctx, root) vfsObj := exec.Mount().Filesystem().VirtualFilesystem() name, _ := vfsObj.PathnameWithDeleted(ctx, root, exec) @@ -836,7 +841,7 @@ func (s *exeSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) // Getlink implements kernfs.Inode.Getlink. func (s *exeSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDentry, string, error) { if !kernel.ContextCanTrace(ctx, s.task, false) { - return vfs.VirtualDentry{}, "", syserror.EACCES + return vfs.VirtualDentry{}, "", linuxerr.EACCES } if err := checkTaskState(s.task); err != nil { return vfs.VirtualDentry{}, "", err @@ -847,7 +852,7 @@ func (s *exeSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDent s.task.WithMuLocked(func(t *kernel.Task) { mm := t.MemoryManager() if mm == nil { - err = syserror.EACCES + err = linuxerr.EACCES return } @@ -856,7 +861,7 @@ func (s *exeSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDent // (with locks held). exec = mm.Executable() if exec == nil { - err = syserror.ESRCH + err = linuxerr.ESRCH } }) if err != nil { @@ -878,13 +883,17 @@ type cwdSymlink struct { kernfs.InodeNoopRefCount kernfs.InodeSymlink + fs *filesystem task *kernel.Task } var _ kernfs.Inode = (*cwdSymlink)(nil) func (fs *filesystem) newCwdSymlink(ctx context.Context, task *kernel.Task, ino uint64) kernfs.Inode { - inode := &cwdSymlink{task: task} + inode := &cwdSymlink{ + fs: fs, + task: task, + } inode.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777) return inode } @@ -895,14 +904,14 @@ func (s *cwdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) if err != nil { return "", err } - defer cwd.DecRef(ctx) + defer s.fs.SafeDecRef(ctx, cwd) root := vfs.RootFromContext(ctx) if !root.Ok() { // It could have raced with process deletion. - return "", syserror.ESRCH + return "", linuxerr.ESRCH } - defer root.DecRef(ctx) + defer s.fs.SafeDecRef(ctx, root) vfsObj := cwd.Mount().Filesystem().VirtualFilesystem() name, _ := vfsObj.PathnameWithDeleted(ctx, root, cwd) @@ -912,7 +921,7 @@ func (s *cwdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) // Getlink implements kernfs.Inode.Getlink. func (s *cwdSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDentry, string, error) { if !kernel.ContextCanTrace(ctx, s.task, false) { - return vfs.VirtualDentry{}, "", syserror.EACCES + return vfs.VirtualDentry{}, "", linuxerr.EACCES } if err := checkTaskState(s.task); err != nil { return vfs.VirtualDentry{}, "", err @@ -920,8 +929,9 @@ func (s *cwdSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDent cwd := s.task.FSContext().WorkingDirectoryVFS2() if !cwd.Ok() { // It could have raced with process deletion. - return vfs.VirtualDentry{}, "", syserror.ESRCH + return vfs.VirtualDentry{}, "", linuxerr.ESRCH } + // The reference is transferred to the caller. return cwd, "", nil } @@ -931,6 +941,7 @@ func (s *cwdSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDent type mountInfoData struct { kernfs.DynamicBytesFile + fs *filesystem task *kernel.Task } @@ -951,7 +962,7 @@ func (i *mountInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { // Root has been destroyed. Don't try to read mounts. return nil } - defer rootDir.DecRef(ctx) + defer i.fs.SafeDecRef(ctx, rootDir) i.task.Kernel().VFS().GenerateProcMountInfo(ctx, rootDir, buf) return nil } @@ -962,6 +973,7 @@ func (i *mountInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { type mountsData struct { kernfs.DynamicBytesFile + fs *filesystem task *kernel.Task } @@ -982,7 +994,7 @@ func (i *mountsData) Generate(ctx context.Context, buf *bytes.Buffer) error { // Root has been destroyed. Don't try to read mounts. return nil } - defer rootDir.DecRef(ctx) + defer i.fs.SafeDecRef(ctx, rootDir) i.task.Kernel().VFS().GenerateProcMounts(ctx, rootDir, buf) return nil } @@ -1123,7 +1135,7 @@ func (d *taskCgroupData) Generate(ctx context.Context, buf *bytes.Buffer) error // exit this file show a task is in no cgroups, which is incorrect. Instead, // once a task has left its cgroups, we return an error. if d.task.ExitState() >= kernel.TaskExitInitiated { - return syserror.ESRCH + return linuxerr.ESRCH } d.task.GenerateProcTaskCgroup(buf) diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go index 177cb828f..ab47ea5a7 100644 --- a/pkg/sentry/fsimpl/proc/task_net.go +++ b/pkg/sentry/fsimpl/proc/task_net.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" @@ -33,7 +34,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip/header" ) @@ -679,7 +679,7 @@ func (d *netSnmpData) Generate(ctx context.Context, buf *bytes.Buffer) error { continue } if err := d.stack.Statistics(stat, line.prefix); err != nil { - if err == syserror.EOPNOTSUPP { + if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) { log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) } else { log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go index 045ed7a2d..03bed22a3 100644 --- a/pkg/sentry/fsimpl/proc/tasks_files.go +++ b/pkg/sentry/fsimpl/proc/tasks_files.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -53,7 +54,7 @@ func (s *selfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error t := kernel.TaskFromContext(ctx) if t == nil { // Who is reading this link? - return "", syserror.EINVAL + return "", linuxerr.EINVAL } tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup()) if tgid == 0 { @@ -69,7 +70,7 @@ func (s *selfSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualD // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. func (*selfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM + return linuxerr.EPERM } // +stateify savable @@ -94,7 +95,7 @@ func (s *threadSelfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, t := kernel.TaskFromContext(ctx) if t == nil { // Who is reading this link? - return "", syserror.EINVAL + return "", linuxerr.EINVAL } tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup()) tid := s.pidns.IDOfTask(t) @@ -111,7 +112,7 @@ func (s *threadSelfSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.Vi // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. func (*threadSelfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM + return linuxerr.EPERM } // dynamicBytesFileSetAttr implements a special file that allows inode diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index 2bc98a94f..99f64a9d8 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/inet" @@ -28,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" "gvisor.dev/gvisor/pkg/usermem" ) @@ -209,7 +209,7 @@ func (d *tcpSackData) Generate(ctx context.Context, buf *bytes.Buffer) error { func (d *tcpSackData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { if offset != 0 { // No need to handle partial writes thus far. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if src.NumBytes() == 0 { return 0, nil @@ -257,7 +257,7 @@ func (d *tcpRecoveryData) Generate(ctx context.Context, buf *bytes.Buffer) error func (d *tcpRecoveryData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { if offset != 0 { // No need to handle partial writes thus far. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if src.NumBytes() == 0 { return 0, nil @@ -311,7 +311,7 @@ func (d *tcpMemData) Generate(ctx context.Context, buf *bytes.Buffer) error { func (d *tcpMemData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { if offset != 0 { // No need to handle partial writes thus far. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if src.NumBytes() == 0 { return 0, nil @@ -396,7 +396,7 @@ func (ipf *ipForwarding) Generate(ctx context.Context, buf *bytes.Buffer) error func (ipf *ipForwarding) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { if offset != 0 { // No need to handle partial writes thus far. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if src.NumBytes() == 0 { return 0, nil @@ -449,7 +449,7 @@ func (pr *portRange) Generate(ctx context.Context, buf *bytes.Buffer) error { func (pr *portRange) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { if offset != 0 { // No need to handle partial writes thus far. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if src.NumBytes() == 0 { return 0, nil @@ -467,7 +467,7 @@ func (pr *portRange) Write(ctx context.Context, src usermem.IOSequence, offset i // Port numbers must be uint16s. if ports[0] < 0 || ports[1] < 0 || ports[0] > math.MaxUint16 || ports[1] > math.MaxUint16 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if err := pr.stack.SetPortRange(uint16(ports[0]), uint16(ports[1])); err != nil { diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go index e534fbca8..14f806c3c 100644 --- a/pkg/sentry/fsimpl/proc/tasks_test.go +++ b/pkg/sentry/fsimpl/proc/tasks_test.go @@ -23,13 +23,13 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -227,7 +227,7 @@ func TestTasks(t *testing.T) { defer fd.DecRef(s.Ctx) buf := make([]byte, 1) bufIOSeq := usermem.BytesIOSequence(buf) - if _, err := fd.Read(s.Ctx, bufIOSeq, vfs.ReadOptions{}); err != syserror.EISDIR { + if _, err := fd.Read(s.Ctx, bufIOSeq, vfs.ReadOptions{}); !linuxerr.Equals(linuxerr.EISDIR, err) { t.Errorf("wrong error reading directory: %v", err) } } @@ -237,7 +237,7 @@ func TestTasks(t *testing.T) { s.Creds, s.PathOpAtRoot("/proc/9999"), &vfs.OpenOptions{}, - ); err != syserror.ENOENT { + ); !linuxerr.Equals(linuxerr.ENOENT, err) { t.Fatalf("wrong error from vfsfs.OpenAt(/proc/9999): %v", err) } } diff --git a/pkg/sentry/fsimpl/proc/yama.go b/pkg/sentry/fsimpl/proc/yama.go index e039ec45e..7240563d7 100644 --- a/pkg/sentry/fsimpl/proc/yama.go +++ b/pkg/sentry/fsimpl/proc/yama.go @@ -21,11 +21,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -56,7 +56,7 @@ func (s *yamaPtraceScope) Generate(ctx context.Context, buf *bytes.Buffer) error func (s *yamaPtraceScope) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { if offset != 0 { // Ignore partial writes. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if src.NumBytes() == 0 { return 0, nil @@ -73,7 +73,7 @@ func (s *yamaPtraceScope) Write(ctx context.Context, src usermem.IOSequence, off // We do not support YAMA levels > YAMA_SCOPE_RELATIONAL. if v < linux.YAMA_SCOPE_DISABLED || v > linux.YAMA_SCOPE_RELATIONAL { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } atomic.StoreInt32(s.level, v) diff --git a/pkg/sentry/fsimpl/sockfs/BUILD b/pkg/sentry/fsimpl/sockfs/BUILD index 9453277b8..9defca936 100644 --- a/pkg/sentry/fsimpl/sockfs/BUILD +++ b/pkg/sentry/fsimpl/sockfs/BUILD @@ -9,10 +9,10 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", - "//pkg/syserror", ], ) diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go index 735756280..75934ecd0 100644 --- a/pkg/sentry/fsimpl/sockfs/sockfs.go +++ b/pkg/sentry/fsimpl/sockfs/sockfs.go @@ -20,11 +20,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // filesystemType implements vfs.FilesystemType. @@ -102,7 +102,7 @@ type inode struct { // Open implements kernfs.Inode.Open. func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - return nil, syserror.ENXIO + return nil, linuxerr.ENXIO } // StatFS implements kernfs.Inode.StatFS. diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD index 09043b572..1af0a5cbc 100644 --- a/pkg/sentry/fsimpl/sys/BUILD +++ b/pkg/sentry/fsimpl/sys/BUILD @@ -26,6 +26,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/coverage", + "//pkg/errors/linuxerr", "//pkg/log", "//pkg/refs", "//pkg/refsvfs2", diff --git a/pkg/sentry/fsimpl/sys/kcov.go b/pkg/sentry/fsimpl/sys/kcov.go index b13f141a8..51f0bf3d8 100644 --- a/pkg/sentry/fsimpl/sys/kcov.go +++ b/pkg/sentry/fsimpl/sys/kcov.go @@ -17,13 +17,13 @@ package sys import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -85,11 +85,11 @@ func (fd *kcovFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallAr case linux.KCOV_DISABLE: if arg != 0 { // This arg is unused; it should be 0. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } return 0, fd.kcov.DisableTrace(ctx) default: - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } } diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index 14eb10dcd..f322d2747 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -23,12 +23,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/coverage" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) const ( @@ -74,7 +74,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt maxCachedDentries, err = strconv.ParseUint(str, 10, 64) if err != nil { ctx.Warningf("sys.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } } @@ -174,7 +174,7 @@ func (fs *filesystem) newDir(ctx context.Context, creds *auth.Credentials, mode // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM + return linuxerr.EPERM } // Open implements kernfs.Inode.Open. diff --git a/pkg/sentry/fsimpl/timerfd/BUILD b/pkg/sentry/fsimpl/timerfd/BUILD index 7ce7dc429..e6980a314 100644 --- a/pkg/sentry/fsimpl/timerfd/BUILD +++ b/pkg/sentry/fsimpl/timerfd/BUILD @@ -8,6 +8,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/sentry/kernel/time", "//pkg/sentry/vfs", diff --git a/pkg/sentry/fsimpl/timerfd/timerfd.go b/pkg/sentry/fsimpl/timerfd/timerfd.go index cbb8b67c5..655a1c76a 100644 --- a/pkg/sentry/fsimpl/timerfd/timerfd.go +++ b/pkg/sentry/fsimpl/timerfd/timerfd.go @@ -19,6 +19,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -69,7 +70,7 @@ func New(ctx context.Context, vfsObj *vfs.VirtualFilesystem, clock ktime.Clock, func (tfd *TimerFileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { const sizeofUint64 = 8 if dst.NumBytes() < sizeofUint64 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if val := atomic.SwapUint64(&tfd.val, 0); val != 0 { var buf [sizeofUint64]byte diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index e21fddd7f..dc8b9bfeb 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -58,6 +58,7 @@ go_library( "//pkg/abi/linux", "//pkg/amutex", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/hostarch", "//pkg/log", @@ -94,6 +95,7 @@ go_test( ":tmpfs", "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/refs", "//pkg/sentry/contexttest", @@ -101,7 +103,6 @@ go_test( "//pkg/sentry/fs/tmpfs", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", - "//pkg/syserror", ], ) @@ -118,6 +119,7 @@ go_test( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/sentry/contexttest", "//pkg/sentry/fs/lock", diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go index 3cc63e732..2c29343c1 100644 --- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go +++ b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/contexttest" @@ -30,7 +31,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // Differences from stat_benchmark: @@ -68,7 +68,7 @@ func fileOpOn(ctx context.Context, mntns *fs.MountNamespace, root, wd *fs.Dirent rel = wd } else { // Need to extract the given FD. - return syserror.EBADF + return linuxerr.EBADF } // Lookup the node. @@ -146,7 +146,7 @@ func BenchmarkVFS1TmpfsStat(b *testing.B) { for i := 0; i < b.N; i++ { err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } uattr, err := d.Inode.UnstableAttr(ctx) if err != nil { @@ -341,7 +341,7 @@ func BenchmarkVFS1TmpfsMountStat(b *testing.B) { for i := 0; i < b.N; i++ { err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } uattr, err := d.Inode.UnstableAttr(ctx) if err != nil { diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go index e8d256495..c25494c0b 100644 --- a/pkg/sentry/fsimpl/tmpfs/directory.go +++ b/pkg/sentry/fsimpl/tmpfs/directory.go @@ -19,10 +19,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // +stateify savable @@ -196,10 +196,10 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in case linux.SEEK_CUR: offset += fd.off default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // If the offset isn't changing (e.g. due to lseek(0, SEEK_CUR)), don't diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index f0f4297ef..8b04df038 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/fsmetric" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -45,7 +46,7 @@ func (fs *filesystem) Sync(ctx context.Context) error { func stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry) (*dentry, error) { dir, ok := d.inode.impl.(*directory) if !ok { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { return nil, err @@ -70,7 +71,7 @@ afterSymlink: return d.parent, nil } if len(name) > linux.NAME_MAX { - return nil, syserror.ENAMETOOLONG + return nil, linuxerr.ENAMETOOLONG } child, ok := dir.childMap[name] if !ok { @@ -112,7 +113,7 @@ func walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry) } dir, ok := d.inode.impl.(*directory) if !ok { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } return dir, nil } @@ -132,7 +133,7 @@ func resolveLocked(ctx context.Context, rp *vfs.ResolvingPath) (*dentry, error) d = next } if rp.MustBeDir() && !d.inode.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } return d, nil } @@ -161,13 +162,13 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir } name := rp.Component() if name == "." || name == ".." { - return syserror.EEXIST + return linuxerr.EEXIST } if len(name) > linux.NAME_MAX { - return syserror.ENAMETOOLONG + return linuxerr.ENAMETOOLONG } if _, ok := parentDir.childMap[name]; ok { - return syserror.EEXIST + return linuxerr.EEXIST } if !dir && rp.MustBeDir() { return syserror.ENOENT @@ -220,7 +221,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op } if opts.CheckSearchable { if !d.inode.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { return nil, err @@ -246,12 +247,12 @@ func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { return fs.doCreateAt(ctx, rp, false /* dir */, func(parentDir *directory, name string) error { if rp.Mount() != vd.Mount() { - return syserror.EXDEV + return linuxerr.EXDEV } d := vd.Dentry().Impl().(*dentry) i := d.inode if i.isDir() { - return syserror.EPERM + return linuxerr.EPERM } if err := vfs.MayLink(auth.CredentialsFromContext(ctx), linux.FileMode(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))); err != nil { return err @@ -260,7 +261,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.ENOENT } if i.nlink == maxLinks { - return syserror.EMLINK + return linuxerr.EMLINK } i.incLinksLocked() i.watches.Notify(ctx, "", linux.IN_ATTRIB, 0, vfs.InodeEvent, false /* unlinked */) @@ -274,7 +275,7 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return fs.doCreateAt(ctx, rp, true /* dir */, func(parentDir *directory, name string) error { creds := rp.Credentials() if parentDir.inode.nlink == maxLinks { - return syserror.EMLINK + return linuxerr.EMLINK } parentDir.inode.incLinksLocked() // from child's ".." childDir := fs.newDirectory(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, parentDir) @@ -300,7 +301,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v case linux.S_IFSOCK: childInode = fs.newSocketFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, opts.Endpoint, parentDir) default: - return syserror.EINVAL + return linuxerr.EINVAL } child := fs.newDentry(childInode) parentDir.insertChildLocked(child, name) @@ -312,7 +313,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { if opts.Flags&linux.O_TMPFILE != 0 { // Not yet supported. - return nil, syserror.EOPNOTSUPP + return nil, linuxerr.EOPNOTSUPP } // Handle O_CREAT and !O_CREAT separately, since in the latter case we @@ -347,7 +348,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf return nil, syserror.EISDIR } if mustCreate { - return nil, syserror.EEXIST + return nil, linuxerr.EEXIST } start.IncRef() defer start.DecRef(ctx) @@ -372,7 +373,7 @@ afterTrailingSymlink: return nil, syserror.EISDIR } if len(name) > linux.NAME_MAX { - return nil, syserror.ENAMETOOLONG + return nil, linuxerr.ENAMETOOLONG } // Determine whether or not we need to create a file. child, ok := parentDir.childMap[name] @@ -401,7 +402,7 @@ afterTrailingSymlink: return fd, nil } if mustCreate { - return nil, syserror.EEXIST + return nil, linuxerr.EEXIST } // Is the file mounted over? if err := rp.CheckMount(ctx, &child.vfsd); err != nil { @@ -418,7 +419,7 @@ afterTrailingSymlink: goto afterTrailingSymlink } if rp.MustBeDir() && !child.inode.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } child.IncRef() defer child.DecRef(ctx) @@ -466,13 +467,13 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open return &fd.vfsfd, nil case *symlink: // Can't open symlinks without O_PATH, which is handled at the VFS layer. - return nil, syserror.ELOOP + return nil, linuxerr.ELOOP case *namedPipe: return impl.pipe.Open(ctx, rp.Mount(), &d.vfsd, opts.Flags, &d.inode.locks) case *deviceFile: return rp.VirtualFilesystem().OpenDeviceSpecialFile(ctx, rp.Mount(), &d.vfsd, impl.kind, impl.major, impl.minor, opts) case *socketFile: - return nil, syserror.ENXIO + return nil, linuxerr.ENXIO default: panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl)) } @@ -488,7 +489,7 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st } symlink, ok := d.inode.impl.(*symlink) if !ok { - return "", syserror.EINVAL + return "", linuxerr.EINVAL } symlink.inode.touchAtime(rp.Mount()) return symlink.target, nil @@ -506,19 +507,19 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa if opts.Flags&^linux.RENAME_NOREPLACE != 0 { // TODO(b/145974740): Support other renameat2 flags. - return syserror.EINVAL + return linuxerr.EINVAL } newName := rp.Component() if newName == "." || newName == ".." { if opts.Flags&linux.RENAME_NOREPLACE != 0 { - return syserror.EEXIST + return linuxerr.EEXIST } - return syserror.EBUSY + return linuxerr.EBUSY } mnt := rp.Mount() if mnt != oldParentVD.Mount() { - return syserror.EXDEV + return linuxerr.EXDEV } if err := mnt.CheckBeginWrite(); err != nil { return err @@ -541,7 +542,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa // mounted filesystem. if renamed.inode.isDir() { if renamed == &newParentDir.dentry || genericIsAncestorDentry(renamed, &newParentDir.dentry) { - return syserror.EINVAL + return linuxerr.EINVAL } if oldParentDir != newParentDir { // Writability is needed to change renamed's "..". @@ -551,7 +552,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } } else { if opts.MustBeDir || rp.MustBeDir() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } } @@ -561,7 +562,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa replaced, ok := newParentDir.childMap[newName] if ok { if opts.Flags&linux.RENAME_NOREPLACE != 0 { - return syserror.EEXIST + return linuxerr.EEXIST } replacedDir, ok := replaced.inode.impl.(*directory) if ok { @@ -569,19 +570,19 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa return syserror.EISDIR } if len(replacedDir.childMap) != 0 { - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } } else { if rp.MustBeDir() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } if renamed.inode.isDir() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } } } else { if renamed.inode.isDir() && newParentDir.inode.nlink == maxLinks { - return syserror.EMLINK + return linuxerr.EMLINK } } // tmpfs never calls VFS.InvalidateDentry(), so newParentDir.dentry can @@ -646,10 +647,10 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error } name := rp.Component() if name == "." { - return syserror.EINVAL + return linuxerr.EINVAL } if name == ".." { - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } child, ok := parentDir.childMap[name] if !ok { @@ -660,10 +661,10 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error } childDir, ok := child.inode.impl.(*directory) if !ok { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } if len(childDir.childMap) != 0 { - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } mnt := rp.Mount() if err := mnt.CheckBeginWrite(); err != nil { @@ -766,7 +767,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error return syserror.EISDIR } if rp.MustBeDir() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } mnt := rp.Mount() if err := mnt.CheckBeginWrite(); err != nil { @@ -806,11 +807,11 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath switch impl := d.inode.impl.(type) { case *socketFile: if impl.ep == nil { - return nil, syserror.ECONNREFUSED + return nil, linuxerr.ECONNREFUSED } return impl.ep, nil default: - return nil, syserror.ECONNREFUSED + return nil, linuxerr.ECONNREFUSED } } diff --git a/pkg/sentry/fsimpl/tmpfs/pipe_test.go b/pkg/sentry/fsimpl/tmpfs/pipe_test.go index 2f856ce36..418c7994e 100644 --- a/pkg/sentry/fsimpl/tmpfs/pipe_test.go +++ b/pkg/sentry/fsimpl/tmpfs/pipe_test.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -114,7 +115,7 @@ func TestNonblockingWriteError(t *testing.T) { } openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY | linux.O_NONBLOCK} _, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != syserror.ENXIO { + if !linuxerr.Equals(linuxerr.ENXIO, err) { t.Fatalf("expected ENXIO, but got error: %v", err) } } diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index c45bddff6..0f2ac6144 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -33,7 +34,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -185,7 +185,7 @@ func (rf *regularFile) truncateLocked(newSize uint64) (bool, error) { // Can we grow the file? if rf.seals&linux.F_SEAL_GROW != 0 { rf.dataMu.Unlock() - return false, syserror.EPERM + return false, linuxerr.EPERM } // We only need to update the file size. atomic.StoreUint64(&rf.size, newSize) @@ -196,7 +196,7 @@ func (rf *regularFile) truncateLocked(newSize uint64) (bool, error) { // We are shrinking the file. First check if this is allowed. if rf.seals&linux.F_SEAL_SHRINK != 0 { rf.dataMu.Unlock() - return false, syserror.EPERM + return false, linuxerr.EPERM } // Update the file size. @@ -233,7 +233,7 @@ func (rf *regularFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, a // Reject writable mapping if F_SEAL_WRITE is set. if rf.seals&linux.F_SEAL_WRITE != 0 && writable { - return syserror.EPERM + return linuxerr.EPERM } rf.mappings.AddMapping(ms, ar, offset, writable) @@ -366,7 +366,7 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs fsmetric.TmpfsReads.Increment() if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since @@ -374,7 +374,7 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs // // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } if dst.NumBytes() == 0 { @@ -407,7 +407,7 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off // final offset should be ignored by PWrite. func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) { if offset < 0 { - return 0, offset, syserror.EINVAL + return 0, offset, linuxerr.EINVAL } // Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since @@ -415,7 +415,7 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off // // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 { - return 0, offset, syserror.EOPNOTSUPP + return 0, offset, linuxerr.EOPNOTSUPP } srclen := src.NumBytes() @@ -432,7 +432,7 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off } if end := offset + srclen; end < offset { // Overflow. - return 0, offset, syserror.EINVAL + return 0, offset, linuxerr.EINVAL } srclen, err = vfs.CheckLimit(ctx, offset, srclen) @@ -476,10 +476,10 @@ func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) ( case linux.SEEK_END: offset += int64(atomic.LoadUint64(&fd.inode().impl.(*regularFile).size)) default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } fd.off = offset return offset, nil @@ -594,7 +594,7 @@ func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, // Check if seals prevent either file growth or all writes. switch { case rw.file.seals&linux.F_SEAL_WRITE != 0: // Write sealed - return 0, syserror.EPERM + return 0, linuxerr.EPERM case end > rw.file.size && rw.file.seals&linux.F_SEAL_GROW != 0: // Grow sealed // When growth is sealed, Linux effectively allows writes which would // normally grow the file to partially succeed up to the current EOF, @@ -615,7 +615,7 @@ func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, } if end <= rw.off { // Truncation would result in no data being written. - return 0, syserror.EPERM + return 0, linuxerr.EPERM } } @@ -684,7 +684,7 @@ exitLoop: func GetSeals(fd *vfs.FileDescription) (uint32, error) { f, ok := fd.Impl().(*regularFileFD) if !ok { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } rf := f.inode().impl.(*regularFile) rf.dataMu.RLock() @@ -696,7 +696,7 @@ func GetSeals(fd *vfs.FileDescription) (uint32, error) { func AddSeals(fd *vfs.FileDescription, val uint32) error { f, ok := fd.Impl().(*regularFileFD) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } rf := f.inode().impl.(*regularFile) rf.mapsMu.Lock() @@ -706,13 +706,13 @@ func AddSeals(fd *vfs.FileDescription, val uint32) error { if rf.seals&linux.F_SEAL_SEAL != 0 { // Seal applied which prevents addition of any new seals. - return syserror.EPERM + return linuxerr.EPERM } // F_SEAL_WRITE can only be added if there are no active writable maps. if rf.seals&linux.F_SEAL_WRITE == 0 && val&linux.F_SEAL_WRITE != 0 { if rf.writableMappingPages > 0 { - return syserror.EBUSY + return linuxerr.EBUSY } } diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 6b4367c42..79a54eef3 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -36,6 +36,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/time" @@ -138,7 +139,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt mode, err := strconv.ParseUint(modeStr, 8, 32) if err != nil { ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: invalid mode: %q", modeStr) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } rootMode = linux.FileMode(mode & 07777) } @@ -149,12 +150,12 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt uid, err := strconv.ParseUint(uidStr, 10, 32) if err != nil { ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: invalid uid: %q", uidStr) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } kuid := creds.UserNamespace.MapToKUID(auth.UID(uid)) if !kuid.Ok() { ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: unmapped uid: %d", uid) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } rootKUID = kuid } @@ -165,18 +166,18 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt gid, err := strconv.ParseUint(gidStr, 10, 32) if err != nil { ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: invalid gid: %q", gidStr) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } kgid := creds.UserNamespace.MapToKGID(auth.GID(gid)) if !kgid.Ok() { ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: unmapped gid: %d", gid) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } rootKGID = kgid } if len(mopts) != 0 { ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: unknown options: %v", mopts) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } devMinor, err := vfsObj.GetAnonBlockDevMinor() @@ -527,7 +528,7 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs. return nil } if stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_CTIME|linux.STATX_SIZE) != 0 { - return syserror.EPERM + return linuxerr.EPERM } mode := linux.FileMode(atomic.LoadUint32(&i.mode)) if err := vfs.CheckSetStat(ctx, creds, opts, mode, auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))); err != nil { @@ -557,7 +558,7 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs. case *directory: return syserror.EISDIR default: - return syserror.EINVAL + return linuxerr.EINVAL } } if mask&linux.STATX_UID != 0 { @@ -730,7 +731,7 @@ func checkXattrName(name string) error { if strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { return nil } - return syserror.EOPNOTSUPP + return linuxerr.EOPNOTSUPP } func (i *inode) listXattr(creds *auth.Credentials, size uint64) ([]string, error) { diff --git a/pkg/sentry/fsimpl/verity/BUILD b/pkg/sentry/fsimpl/verity/BUILD index d473a922d..1d855234c 100644 --- a/pkg/sentry/fsimpl/verity/BUILD +++ b/pkg/sentry/fsimpl/verity/BUILD @@ -13,6 +13,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/hostarch", "//pkg/marshal/primitive", @@ -41,6 +42,7 @@ go_test( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/sentry/arch", "//pkg/sentry/fsimpl/testutil", @@ -48,7 +50,6 @@ go_test( "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go index 3582d14c9..930016a3e 100644 --- a/pkg/sentry/fsimpl/verity/filesystem.go +++ b/pkg/sentry/fsimpl/verity/filesystem.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/merkletree" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -74,6 +75,7 @@ func putDentrySlice(ds *[]*dentry) { // but dentry slices are allocated lazily, and it's much easier to say "defer // fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() { // fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this. +// +checklocksrelease:fs.renameMu func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) { fs.renameMu.RUnlock() if *ds == nil { @@ -89,6 +91,7 @@ func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*d putDentrySlice(*ds) } +// +checklocksrelease:fs.renameMu func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) { if *ds == nil { fs.renameMu.Unlock() @@ -113,7 +116,7 @@ func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*de // * !rp.Done(). func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, error) { if !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { @@ -195,7 +198,7 @@ func (fs *filesystem) verifyChildLocked(ctx context.Context, parent *dentry, chi // The Merkle tree file for the child should have been created and // contains the expected xattrs. If the file or the xattr does not // exist, it indicates unexpected modifications to the file system. - if err == syserror.ENOENT || err == syserror.ENODATA { + if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENODATA, err) { return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for %s: %v", merkleOffsetInParentXattr, childPath, err)) } if err != nil { @@ -218,7 +221,7 @@ func (fs *filesystem) verifyChildLocked(ctx context.Context, parent *dentry, chi // The parent Merkle tree file should have been created. If it's // missing, it indicates an unexpected modification to the file system. - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to open parent Merkle file for %s: %v", childPath, err)) } if err != nil { @@ -238,7 +241,7 @@ func (fs *filesystem) verifyChildLocked(ctx context.Context, parent *dentry, chi // The Merkle tree file for the child should have been created and // contains the expected xattrs. If the file or the xattr does not // exist, it indicates unexpected modifications to the file system. - if err == syserror.ENOENT || err == syserror.ENODATA { + if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENODATA, err) { return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for %s: %v", merkleSizeXattr, childPath, err)) } if err != nil { @@ -261,7 +264,7 @@ func (fs *filesystem) verifyChildLocked(ctx context.Context, parent *dentry, chi Root: parent.lowerVD, Start: parent.lowerVD, }, &vfs.StatOptions{}) - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get parent stat for %s: %v", childPath, err)) } if err != nil { @@ -282,7 +285,7 @@ func (fs *filesystem) verifyChildLocked(ctx context.Context, parent *dentry, chi Mode: uint32(parentStat.Mode), UID: parentStat.UID, GID: parentStat.GID, - Children: parent.childrenNames, + Children: parent.childrenList, HashAlgorithms: fs.alg.toLinuxHashAlg(), ReadOffset: int64(offset), ReadSize: int64(merkletree.DigestSize(fs.alg.toLinuxHashAlg())), @@ -327,7 +330,7 @@ func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry }, &vfs.OpenOptions{ Flags: linux.O_RDONLY, }) - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { return fs.alertIntegrityViolation(fmt.Sprintf("Failed to open merkle file for %s: %v", childPath, err)) } if err != nil { @@ -341,7 +344,7 @@ func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry Size: sizeOfStringInt32, }) - if err == syserror.ENODATA { + if linuxerr.Equals(linuxerr.ENODATA, err) { return fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for merkle file of %s: %v", merkleSizeXattr, childPath, err)) } if err != nil { @@ -359,7 +362,7 @@ func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry Size: sizeOfStringInt32, }) - if err == syserror.ENODATA { + if linuxerr.Equals(linuxerr.ENODATA, err) { return fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for merkle file of %s: %v", childrenOffsetXattr, childPath, err)) } if err != nil { @@ -375,7 +378,7 @@ func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry Size: sizeOfStringInt32, }) - if err == syserror.ENODATA { + if linuxerr.Equals(linuxerr.ENODATA, err) { return fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for merkle file of %s: %v", childrenSizeXattr, childPath, err)) } if err != nil { @@ -403,6 +406,9 @@ func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry var buf bytes.Buffer d.hashMu.RLock() + + d.generateChildrenList() + params := &merkletree.VerifyParams{ Out: &buf, Tree: &fdReader, @@ -411,7 +417,7 @@ func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry Mode: uint32(stat.Mode), UID: stat.UID, GID: stat.GID, - Children: d.childrenNames, + Children: d.childrenList, HashAlgorithms: fs.alg.toLinuxHashAlg(), ReadOffset: 0, // Set read size to 0 so only the metadata is verified. @@ -465,7 +471,7 @@ func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name s } childVD, err := parent.getLowerAt(ctx, vfsObj, name) - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { // The file was previously accessed. If the // file does not exist now, it indicates an // unexpected modification to the file system. @@ -480,7 +486,7 @@ func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name s // The Merkle tree file was previous accessed. If it // does not exist now, it indicates an unexpected // modification to the file system. - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { return nil, fs.alertIntegrityViolation(fmt.Sprintf("Expected Merkle file for target %s but none found", path)) } if err != nil { @@ -551,7 +557,7 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry, } childVD, err := parent.getLowerAt(ctx, vfsObj, name) - if parent.verityEnabled() && err == syserror.ENOENT { + if parent.verityEnabled() && linuxerr.Equals(linuxerr.ENOENT, err) { return nil, fs.alertIntegrityViolation(fmt.Sprintf("file %s expected but not found", parentPath+"/"+name)) } if err != nil { @@ -564,7 +570,7 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry, childMerkleVD, err := parent.getLowerAt(ctx, vfsObj, merklePrefix+name) if err != nil { - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { if parent.verityEnabled() { return nil, fs.alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", parentPath+"/"+name)) } @@ -679,7 +685,7 @@ func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving d = next } if !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } return d, nil } @@ -699,7 +705,7 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, d = next } if rp.MustBeDir() && !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } return d, nil } @@ -708,7 +714,7 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { // Verity file system is read-only. if ats&vfs.MayWrite != 0 { - return syserror.EROFS + return linuxerr.EROFS } var ds *[]*dentry fs.renameMu.RLock() @@ -731,7 +737,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op } if opts.CheckSearchable { if !d.isDir() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { return nil, err @@ -758,26 +764,26 @@ func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa // LinkAt implements vfs.FilesystemImpl.LinkAt. func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { // Verity file system is read-only. - return syserror.EROFS + return linuxerr.EROFS } // MkdirAt implements vfs.FilesystemImpl.MkdirAt. func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { // Verity file system is read-only. - return syserror.EROFS + return linuxerr.EROFS } // MknodAt implements vfs.FilesystemImpl.MknodAt. func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { // Verity file system is read-only. - return syserror.EROFS + return linuxerr.EROFS } // OpenAt implements vfs.FilesystemImpl.OpenAt. func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { // Verity fs is read-only. if opts.Flags&(linux.O_WRONLY|linux.O_CREAT) != 0 { - return nil, syserror.EROFS + return nil, linuxerr.EROFS } var ds *[]*dentry @@ -826,7 +832,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf // Users should not open the Merkle tree files. Those are for verity fs // use only. if strings.Contains(d.name, merklePrefix) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } ats := vfs.AccessTypesForOpenFlags(opts) if err := d.checkPermissions(rp.Credentials(), ats); err != nil { @@ -835,7 +841,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf // Verity fs is read-only. if ats&vfs.MayWrite != 0 { - return nil, syserror.EROFS + return nil, linuxerr.EROFS } // Get the path to the target file. This is only used to provide path @@ -845,16 +851,23 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf return nil, err } + tmpOpts := *opts + + // Open the lowerFD with O_PATH if a symlink is opened for verity. + if tmpOpts.Flags&linux.O_NOFOLLOW != 0 && d.isSymlink() { + tmpOpts.Flags |= linux.O_PATH + } + // Open the file in the underlying file system. lowerFD, err := rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{ Root: d.lowerVD, Start: d.lowerVD, - }, opts) + }, &tmpOpts) // The file should exist, as we succeeded in finding its dentry. If it's // missing, it indicates an unexpected modification to the file system. if err != nil { - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { return nil, d.fs.alertIntegrityViolation(fmt.Sprintf("File %s expected but not found", path)) } return nil, err @@ -877,7 +890,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf // dentry. If it's missing, it indicates an unexpected modification to // the file system. if err != nil { - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { return nil, d.fs.alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", path)) } return nil, err @@ -887,7 +900,6 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf // be called if a verity FD is successfully created. defer merkleReader.DecRef(ctx) - lowerFlags := lowerFD.StatusFlags() lowerFDOpts := lowerFD.Options() var merkleWriter *vfs.FileDescription var parentMerkleWriter *vfs.FileDescription @@ -902,7 +914,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf Flags: linux.O_WRONLY | linux.O_APPEND, }) if err != nil { - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { return nil, d.fs.alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", path)) } return nil, err @@ -919,7 +931,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf Flags: linux.O_WRONLY | linux.O_APPEND, }) if err != nil { - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { parentPath, _ := d.fs.vfsfs.VirtualFilesystem().PathnameWithDeleted(ctx, d.fs.rootDentry.lowerVD, d.parent.lowerVD) return nil, d.fs.alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", parentPath)) } @@ -940,7 +952,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf isDir: d.isDir(), } - if err := fd.vfsfd.Init(fd, lowerFlags, rp.Mount(), &d.vfsd, &lowerFDOpts); err != nil { + if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), &d.vfsd, &lowerFDOpts); err != nil { return nil, err } lowerFD.IncRef() @@ -969,19 +981,19 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st // RenameAt implements vfs.FilesystemImpl.RenameAt. func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { // Verity file system is read-only. - return syserror.EROFS + return linuxerr.EROFS } // RmdirAt implements vfs.FilesystemImpl.RmdirAt. func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { // Verity file system is read-only. - return syserror.EROFS + return linuxerr.EROFS } // SetStatAt implements vfs.FilesystemImpl.SetStatAt. func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { // Verity file system is read-only. - return syserror.EROFS + return linuxerr.EROFS } // StatAt implements vfs.FilesystemImpl.StatAt. @@ -1021,13 +1033,13 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { // Verity file system is read-only. - return syserror.EROFS + return linuxerr.EROFS } // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { // Verity file system is read-only. - return syserror.EROFS + return linuxerr.EROFS } // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt. @@ -1038,7 +1050,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath if _, err := fs.resolveLocked(ctx, rp, &ds); err != nil { return nil, err } - return nil, syserror.ECONNREFUSED + return nil, linuxerr.ECONNREFUSED } // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. @@ -1076,13 +1088,13 @@ func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error { // Verity file system is read-only. - return syserror.EROFS + return linuxerr.EROFS } // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { // Verity file system is read-only. - return syserror.EROFS + return linuxerr.EROFS } // PrependPath implements vfs.FilesystemImpl.PrependPath. diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go index 969003613..c5fa9855b 100644 --- a/pkg/sentry/fsimpl/verity/verity.go +++ b/pkg/sentry/fsimpl/verity/verity.go @@ -39,12 +39,14 @@ import ( "encoding/json" "fmt" "math" + "sort" "strconv" "strings" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" @@ -251,7 +253,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt hash, err := hex.DecodeString(encodedRootHash) if err != nil { ctx.Warningf("verity.FilesystemType.GetFilesystem: Failed to decode root hash: %v", err) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } rootHash = hash } @@ -269,19 +271,19 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // Check for unparsed options. if len(mopts) != 0 { ctx.Warningf("verity.FilesystemType.GetFilesystem: unknown options: %v", mopts) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } // Handle internal options. iopts, ok := opts.InternalData.(InternalFilesystemOptions) if len(lowerPathname) == 0 && !ok { ctx.Warningf("verity.FilesystemType.GetFilesystem: missing verity configs") - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } if len(lowerPathname) != 0 { if ok { ctx.Warningf("verity.FilesystemType.GetFilesystem: unexpected verity configs with specified lower path") - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } iopts = InternalFilesystemOptions{ AllowRuntimeEnable: len(rootHash) == 0, @@ -300,7 +302,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt lowerPath := fspath.Parse(lowerPathname) if !lowerPath.Absolute { ctx.Infof("verity.FilesystemType.GetFilesystem: lower_path %q must be absolute", lowerPathname) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } var err error mountedLowerVD, err = vfsObj.GetDentryAt(ctx, creds, &vfs.PathOperation{ @@ -358,7 +360,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // If runtime enable is allowed, the root merkle tree may be absent. We // should create the tree file. - if err == syserror.ENOENT && fs.allowRuntimeEnable { + if linuxerr.Equals(linuxerr.ENOENT, err) && fs.allowRuntimeEnable { lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ Root: lowerVD, Start: lowerVD, @@ -439,7 +441,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt if !d.isDir() { ctx.Warningf("verity root must be a directory") - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } if !fs.allowRuntimeEnable { @@ -451,7 +453,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt Name: childrenOffsetXattr, Size: sizeOfStringInt32, }) - if err == syserror.ENOENT || err == syserror.ENODATA { + if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENODATA, err) { return nil, nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", childrenOffsetXattr, err)) } if err != nil { @@ -470,7 +472,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt Name: childrenSizeXattr, Size: sizeOfStringInt32, }) - if err == syserror.ENOENT || err == syserror.ENODATA { + if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENODATA, err) { return nil, nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", childrenSizeXattr, err)) } if err != nil { @@ -487,7 +489,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt }, &vfs.OpenOptions{ Flags: linux.O_RDONLY, }) - if err == syserror.ENOENT { + if linuxerr.Equals(linuxerr.ENOENT, err) { return nil, nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to open root Merkle file: %v", err)) } if err != nil { @@ -508,6 +510,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt if err := fs.verifyStatAndChildrenLocked(ctx, d, stat); err != nil { return nil, nil, err } + d.generateChildrenList() } d.vfsd.Init(d) @@ -564,6 +567,11 @@ type dentry struct { // populated by enableVerity. childrenNames is also protected by dirMu. childrenNames map[string]struct{} + // childrenList is a complete sorted list of childrenNames. This list + // is generated when verity is enabled, or the first time the file is + // verified in non runtime enable mode. + childrenList []string + // lowerVD is the VirtualDentry in the underlying file system. It is // never modified after initialized. lowerVD vfs.VirtualDentry @@ -749,6 +757,17 @@ func (d *dentry) verityEnabled() bool { return !d.fs.allowRuntimeEnable || len(d.hash) != 0 } +// generateChildrenList generates a sorted childrenList from childrenNames, and +// cache it in d for hashing. +func (d *dentry) generateChildrenList() { + if len(d.childrenList) == 0 && len(d.childrenNames) != 0 { + for child := range d.childrenNames { + d.childrenList = append(d.childrenList, child) + } + sort.Strings(d.childrenList) + } +} + // getLowerAt returns the dentry in the underlying file system, which is // represented by filename relative to d. func (d *dentry) getLowerAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, filename string) (vfs.VirtualDentry, error) { @@ -857,13 +876,13 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu // SetStat implements vfs.FileDescriptionImpl.SetStat. func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { // Verity files are read-only. - return syserror.EPERM + return linuxerr.EPERM } // IterDirents implements vfs.FileDescriptionImpl.IterDirents. func (fd *fileDescription) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { if !fd.d.isDir() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } fd.mu.Lock() defer fd.mu.Unlock() @@ -921,14 +940,14 @@ func (fd *fileDescription) Seek(ctx context.Context, offset int64, whence int32) case linux.SEEK_END: n = int64(fd.d.size) default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if offset > math.MaxInt64-n { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } offset += n if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } fd.off = offset return offset, nil @@ -962,10 +981,12 @@ func (fd *fileDescription) generateMerkleLocked(ctx context.Context) ([]byte, ui return nil, 0, err } + fd.d.generateChildrenList() + params := &merkletree.GenerateParams{ TreeReader: &merkleReader, TreeWriter: &merkleWriter, - Children: fd.d.childrenNames, + Children: fd.d.childrenList, HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(), Name: fd.d.name, Mode: uint32(stat.Mode), @@ -1007,7 +1028,7 @@ func (fd *fileDescription) generateMerkleLocked(ctx context.Context) ([]byte, ui default: // TODO(b/167728857): Investigate whether and how we should // enable other types of file. - return nil, 0, syserror.EINVAL + return nil, 0, linuxerr.EINVAL } hash, err := merkletree.Generate(params) return hash, uint64(params.Size), err @@ -1056,7 +1077,7 @@ func (fd *fileDescription) recordChildrenLocked(ctx context.Context) error { // and stores its hash in its parent directory's Merkle tree. func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) { if !fd.d.fs.allowRuntimeEnable { - return 0, syserror.EPERM + return 0, linuxerr.EPERM } fd.d.fs.verityMu.Lock() @@ -1125,7 +1146,7 @@ func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) { func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest hostarch.Addr) (uintptr, error) { t := kernel.TaskFromContext(ctx) if t == nil { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } var metadata linux.DigestMetadata @@ -1138,7 +1159,7 @@ func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest hosta // enabled, in which case fd.d.hash should be set. if len(fd.d.hash) == 0 { if fd.d.fs.allowRuntimeEnable { - return 0, syserror.ENODATA + return 0, linuxerr.ENODATA } return 0, fd.d.fs.alertIntegrityViolation("Ioctl measureVerity: no hash found") } @@ -1148,7 +1169,7 @@ func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest hosta return 0, err } if metadata.DigestSize < uint16(len(fd.d.hash)) { - return 0, syserror.EOVERFLOW + return 0, linuxerr.EOVERFLOW } // Populate the output digest size, since DigestSize is both input and @@ -1178,7 +1199,7 @@ func (fd *fileDescription) verityFlags(ctx context.Context, flags hostarch.Addr) t := kernel.TaskFromContext(ctx) if t == nil { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } _, err := primitive.CopyInt32Out(t, flags, f) return 0, err @@ -1227,7 +1248,7 @@ func (fd *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, of // The Merkle tree file for the child should have been created and // contains the expected xattrs. If the xattr does not exist, it // indicates unexpected modifications to the file system. - if err == syserror.ENODATA { + if linuxerr.Equals(linuxerr.ENODATA, err) { return 0, fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", merkleSizeXattr, err)) } if err != nil { @@ -1261,7 +1282,7 @@ func (fd *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, of Mode: fd.d.mode, UID: fd.d.uid, GID: fd.d.gid, - Children: fd.d.childrenNames, + Children: fd.d.childrenList, HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(), ReadOffset: offset, ReadSize: dst.NumBytes(), @@ -1277,12 +1298,12 @@ func (fd *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, of // PWrite implements vfs.FileDescriptionImpl.PWrite. func (fd *fileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.EROFS + return 0, linuxerr.EROFS } // Write implements vfs.FileDescriptionImpl.Write. func (fd *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.EROFS + return 0, linuxerr.EROFS } // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. @@ -1298,7 +1319,7 @@ func (fd *fileDescription) ConfigureMMap(ctx context.Context, opts *memmap.MMapO // Check if mmap is allowed on the lower filesystem. if !opts.SentryOwnedContent { - return syserror.ENODEV + return linuxerr.ENODEV } return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts) } @@ -1349,7 +1370,7 @@ func (fd *fileDescription) Translate(ctx context.Context, required, optional mem // The Merkle tree file for the child should have been created and // contains the expected xattrs. If the xattr does not exist, it // indicates unexpected modifications to the file system. - if err == syserror.ENODATA { + if linuxerr.Equals(linuxerr.ENODATA, err) { return nil, fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", merkleSizeXattr, err)) } if err != nil { @@ -1433,7 +1454,7 @@ func (r *mmapReadSeeker) ReadAt(p []byte, off int64) (int, error) { // mapped region. readOffset := off - int64(r.Offset) if readOffset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } bs.DropFirst64(uint64(readOffset)) view := bs.TakeFirst64(uint64(len(p))) diff --git a/pkg/sentry/fsimpl/verity/verity_test.go b/pkg/sentry/fsimpl/verity/verity_test.go index 5c78a0019..af041bd50 100644 --- a/pkg/sentry/fsimpl/verity/verity_test.go +++ b/pkg/sentry/fsimpl/verity/verity_test.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" @@ -31,7 +32,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -476,7 +476,7 @@ func TestOpenNonexistentFile(t *testing.T) { // Ensure open an unexpected file in the parent directory fails with // ENOENT rather than verification failure. - if _, err = openVerityAt(ctx, vfsObj, root, filename+"abc", linux.O_RDONLY, linux.ModeRegular); err != syserror.ENOENT { + if _, err = openVerityAt(ctx, vfsObj, root, filename+"abc", linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.ENOENT, err) { t.Errorf("OpenAt unexpected error: %v", err) } } @@ -767,7 +767,7 @@ func TestOpenDeletedFileFails(t *testing.T) { } // Ensure reopening the verity enabled file fails. - if _, err = openVerityAt(ctx, vfsObj, root, filename, linux.O_RDONLY, linux.ModeRegular); err != syserror.EIO { + if _, err = openVerityAt(ctx, vfsObj, root, filename, linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.EIO, err) { t.Errorf("got OpenAt error: %v, expected EIO", err) } }) @@ -829,7 +829,7 @@ func TestOpenRenamedFileFails(t *testing.T) { } // Ensure reopening the verity enabled file fails. - if _, err = openVerityAt(ctx, vfsObj, root, filename, linux.O_RDONLY, linux.ModeRegular); err != syserror.EIO { + if _, err = openVerityAt(ctx, vfsObj, root, filename, linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.EIO, err) { t.Errorf("got OpenAt error: %v, expected EIO", err) } }) @@ -899,7 +899,7 @@ func TestUnmodifiedSymlinkFileReadSucceeds(t *testing.T) { t.Fatalf("SymlinkAt: %v", err) } - fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_PATH|linux.O_NOFOLLOW, linux.ModeRegular) + fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_NOFOLLOW, linux.ModeRegular) if err != nil { t.Fatalf("openVerityAt symlink: %v", err) @@ -1034,7 +1034,7 @@ func TestDeletedSymlinkFileReadFails(t *testing.T) { t.Fatalf("SymlinkAt: %v", err) } - fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_PATH|linux.O_NOFOLLOW, linux.ModeRegular) + fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_NOFOLLOW, linux.ModeRegular) if err != nil { t.Fatalf("openVerityAt symlink: %v", err) @@ -1063,14 +1063,14 @@ func TestDeletedSymlinkFileReadFails(t *testing.T) { Root: root, Start: root, Path: fspath.Parse(symlink), - }); err != syserror.EIO { + }); !linuxerr.Equals(linuxerr.EIO, err) { t.Fatalf("ReadlinkAt succeeded with modified symlink: %v", err) } if tc.testWalk { fileInSymlinkDirectory := symlink + "/verity-test-file" // Ensure opening the verity enabled file in the symlink directory fails. - if _, err := openVerityAt(ctx, vfsObj, root, fileInSymlinkDirectory, linux.O_RDONLY, linux.ModeRegular); err != syserror.EIO { + if _, err := openVerityAt(ctx, vfsObj, root, fileInSymlinkDirectory, linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.EIO, err) { t.Errorf("Open succeeded with modified symlink: %v", err) } } @@ -1136,7 +1136,7 @@ func TestModifiedSymlinkFileReadFails(t *testing.T) { } // Open symlink file to get the fd for ioctl in new step. - fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_PATH|linux.O_NOFOLLOW, linux.ModeRegular) + fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_NOFOLLOW, linux.ModeRegular) if err != nil { t.Fatalf("OpenAt symlink: %v", err) } @@ -1195,14 +1195,14 @@ func TestModifiedSymlinkFileReadFails(t *testing.T) { Root: root, Start: root, Path: fspath.Parse(symlink), - }); err != syserror.EIO { + }); !linuxerr.Equals(linuxerr.EIO, err) { t.Fatalf("ReadlinkAt succeeded with modified symlink: %v", err) } if tc.testWalk { fileInSymlinkDirectory := symlink + "/verity-test-file" // Ensure opening the verity enabled file in the symlink directory fails. - if _, err := openVerityAt(ctx, vfsObj, root, fileInSymlinkDirectory, linux.O_RDONLY, linux.ModeRegular); err != syserror.EIO { + if _, err := openVerityAt(ctx, vfsObj, root, fileInSymlinkDirectory, linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.EIO, err) { t.Errorf("Open succeeded with modified symlink: %v", err) } } diff --git a/pkg/sentry/hostfd/hostfd_linux.go b/pkg/sentry/hostfd/hostfd_linux.go index 1cabc848f..0131da22d 100644 --- a/pkg/sentry/hostfd/hostfd_linux.go +++ b/pkg/sentry/hostfd/hostfd_linux.go @@ -12,7 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package hostfd -// maxIov is the maximum permitted size of a struct iovec array. -const maxIov = 1024 // UIO_MAXIOV +// MaxReadWriteIov is the maximum permitted size of a struct iovec array in a +// readv, writev, preadv, or pwritev host syscall. +const MaxReadWriteIov = 1024 // UIO_MAXIOV + +// MaxSendRecvMsgIov is the maximum permitted size of a struct iovec array in a +// sendmsg or recvmsg host syscall. +const MaxSendRecvMsgIov = 1024 // UIO_MAXIOV diff --git a/pkg/sentry/hostfd/hostfd_unsafe.go b/pkg/sentry/hostfd/hostfd_unsafe.go index 03c6d2a16..a43311eb4 100644 --- a/pkg/sentry/hostfd/hostfd_unsafe.go +++ b/pkg/sentry/hostfd/hostfd_unsafe.go @@ -23,6 +23,11 @@ import ( "gvisor.dev/gvisor/pkg/safemem" ) +const ( + sizeofIovec = unsafe.Sizeof(unix.Iovec{}) + sizeofMsghdr = unsafe.Sizeof(unix.Msghdr{}) +) + // Preadv2 reads up to dsts.NumBytes() bytes from host file descriptor fd into // dsts. offset and flags are interpreted as for preadv2(2). // @@ -44,9 +49,9 @@ func Preadv2(fd int32, dsts safemem.BlockSeq, offset int64, flags uint32) (uint6 } } else { iovs := safemem.IovecsFromBlockSeq(dsts) - if len(iovs) > maxIov { - log.Debugf("hostfd.Preadv2: truncating from %d iovecs to %d", len(iovs), maxIov) - iovs = iovs[:maxIov] + if len(iovs) > MaxReadWriteIov { + log.Debugf("hostfd.Preadv2: truncating from %d iovecs to %d", len(iovs), MaxReadWriteIov) + iovs = iovs[:MaxReadWriteIov] } n, _, e = unix.Syscall6(unix.SYS_PREADV2, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)), uintptr(offset), 0 /* pos_h */, uintptr(flags)) } @@ -80,9 +85,9 @@ func Pwritev2(fd int32, srcs safemem.BlockSeq, offset int64, flags uint32) (uint } } else { iovs := safemem.IovecsFromBlockSeq(srcs) - if len(iovs) > maxIov { - log.Debugf("hostfd.Preadv2: truncating from %d iovecs to %d", len(iovs), maxIov) - iovs = iovs[:maxIov] + if len(iovs) > MaxReadWriteIov { + log.Debugf("hostfd.Preadv2: truncating from %d iovecs to %d", len(iovs), MaxReadWriteIov) + iovs = iovs[:MaxReadWriteIov] } n, _, e = unix.Syscall6(unix.SYS_PWRITEV2, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)), uintptr(offset), 0 /* pos_h */, uintptr(flags)) } diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index a82d641da..26614b029 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -226,6 +226,8 @@ go_library( "//pkg/context", "//pkg/coverage", "//pkg/cpuid", + "//pkg/errors", + "//pkg/errors/linuxerr", "//pkg/eventchannel", "//pkg/fspath", "//pkg/goid", @@ -299,6 +301,7 @@ go_test( deps = [ "//pkg/abi", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/sentry/arch", "//pkg/sentry/contexttest", @@ -310,6 +313,5 @@ go_test( "//pkg/sentry/time", "//pkg/sentry/usage", "//pkg/sync", - "//pkg/syserror", ], ) diff --git a/pkg/sentry/kernel/abstract_socket_namespace.go b/pkg/sentry/kernel/abstract_socket_namespace.go index d100e58d7..5d86a04f3 100644 --- a/pkg/sentry/kernel/abstract_socket_namespace.go +++ b/pkg/sentry/kernel/abstract_socket_namespace.go @@ -27,7 +27,7 @@ import ( // +stateify savable type abstractEndpoint struct { ep transport.BoundEndpoint - socket refsvfs2.RefCounter + socket refsvfs2.TryRefCounter name string ns *AbstractSocketNamespace } @@ -57,7 +57,7 @@ func NewAbstractSocketNamespace() *AbstractSocketNamespace { // its backing socket. type boundEndpoint struct { transport.BoundEndpoint - socket refsvfs2.RefCounter + socket refsvfs2.TryRefCounter } // Release implements transport.BoundEndpoint.Release. @@ -89,7 +89,7 @@ func (a *AbstractSocketNamespace) BoundEndpoint(name string) transport.BoundEndp // // When the last reference managed by socket is dropped, ep may be removed from the // namespace. -func (a *AbstractSocketNamespace) Bind(ctx context.Context, name string, ep transport.BoundEndpoint, socket refsvfs2.RefCounter) error { +func (a *AbstractSocketNamespace) Bind(ctx context.Context, name string, ep transport.BoundEndpoint, socket refsvfs2.TryRefCounter) error { a.mu.Lock() defer a.mu.Unlock() @@ -109,7 +109,7 @@ func (a *AbstractSocketNamespace) Bind(ctx context.Context, name string, ep tran // Remove removes the specified socket at name from the abstract socket // namespace, if it has not yet been replaced. -func (a *AbstractSocketNamespace) Remove(name string, socket refsvfs2.RefCounter) { +func (a *AbstractSocketNamespace) Remove(name string, socket refsvfs2.TryRefCounter) { a.mu.Lock() defer a.mu.Unlock() diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD index 12180351d..7a1a36454 100644 --- a/pkg/sentry/kernel/auth/BUILD +++ b/pkg/sentry/kernel/auth/BUILD @@ -63,6 +63,7 @@ go_library( "//pkg/abi/linux", "//pkg/bits", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/log", "//pkg/sync", "//pkg/syserror", diff --git a/pkg/sentry/kernel/auth/credentials.go b/pkg/sentry/kernel/auth/credentials.go index 3325fedcb..fc245c54b 100644 --- a/pkg/sentry/kernel/auth/credentials.go +++ b/pkg/sentry/kernel/auth/credentials.go @@ -16,7 +16,7 @@ package auth import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ) // Credentials contains information required to authorize privileged operations @@ -203,7 +203,7 @@ func (c *Credentials) UseUID(uid UID) (KUID, error) { // uid must be mapped. kuid := c.UserNamespace.MapToKUID(uid) if !kuid.Ok() { - return NoID, syserror.EINVAL + return NoID, linuxerr.EINVAL } // If c has CAP_SETUID, then it can use any UID in its user namespace. if c.HasCapability(linux.CAP_SETUID) { @@ -214,7 +214,7 @@ func (c *Credentials) UseUID(uid UID) (KUID, error) { if kuid == c.RealKUID || kuid == c.EffectiveKUID || kuid == c.SavedKUID { return kuid, nil } - return NoID, syserror.EPERM + return NoID, linuxerr.EPERM } // UseGID checks that c can use gid in its user namespace, then translates it @@ -222,7 +222,7 @@ func (c *Credentials) UseUID(uid UID) (KUID, error) { func (c *Credentials) UseGID(gid GID) (KGID, error) { kgid := c.UserNamespace.MapToKGID(gid) if !kgid.Ok() { - return NoID, syserror.EINVAL + return NoID, linuxerr.EINVAL } if c.HasCapability(linux.CAP_SETGID) { return kgid, nil @@ -230,7 +230,7 @@ func (c *Credentials) UseGID(gid GID) (KGID, error) { if kgid == c.RealKGID || kgid == c.EffectiveKGID || kgid == c.SavedKGID { return kgid, nil } - return NoID, syserror.EPERM + return NoID, linuxerr.EPERM } // SetUID translates the provided uid to the root user namespace and updates c's @@ -239,7 +239,7 @@ func (c *Credentials) UseGID(gid GID) (KGID, error) { func (c *Credentials) SetUID(uid UID) error { kuid := c.UserNamespace.MapToKUID(uid) if !kuid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } c.RealKUID = kuid c.EffectiveKUID = kuid @@ -253,7 +253,7 @@ func (c *Credentials) SetUID(uid UID) error { func (c *Credentials) SetGID(gid GID) error { kgid := c.UserNamespace.MapToKGID(gid) if !kgid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } c.RealKGID = kgid c.EffectiveKGID = kgid diff --git a/pkg/sentry/kernel/auth/id_map.go b/pkg/sentry/kernel/auth/id_map.go index 28cbe159d..f06a374a0 100644 --- a/pkg/sentry/kernel/auth/id_map.go +++ b/pkg/sentry/kernel/auth/id_map.go @@ -17,7 +17,7 @@ package auth import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ) // MapFromKUID translates kuid, a UID in the root namespace, to a UID in ns. @@ -106,11 +106,11 @@ func (ns *UserNamespace) SetUIDMap(ctx context.Context, entries []IDMapEntry) er // than once to a uid_map file in a user namespace fails with the error // EPERM. Similar rules apply for gid_map files." - user_namespaces(7) if !ns.uidMapFromParent.IsEmpty() { - return syserror.EPERM + return linuxerr.EPERM } // "At least one line must be written to the file." if len(entries) == 0 { - return syserror.EINVAL + return linuxerr.EINVAL } // """ // In order for a process to write to the /proc/[pid]/uid_map @@ -121,12 +121,12 @@ func (ns *UserNamespace) SetUIDMap(ctx context.Context, entries []IDMapEntry) er // in the user namespace of the process pid. // """ if !c.HasCapabilityIn(linux.CAP_SETUID, ns) { - return syserror.EPERM + return linuxerr.EPERM } // "2. The writing process must either be in the user namespace of the process // pid or be in the parent user namespace of the process pid." if c.UserNamespace != ns && c.UserNamespace != ns.parent { - return syserror.EPERM + return linuxerr.EPERM } // """ // 3. (see trySetUIDMap) @@ -145,14 +145,14 @@ func (ns *UserNamespace) SetUIDMap(ctx context.Context, entries []IDMapEntry) er // parent user namespace to a user ID (group ID) in the user namespace. // """ if len(entries) != 1 || ns.parent.MapToKUID(UID(entries[0].FirstParentID)) != c.EffectiveKUID || entries[0].Length != 1 { - return syserror.EPERM + return linuxerr.EPERM } // """ // + The writing process must have the same effective user ID as the // process that created the user namespace. // """ if c.EffectiveKUID != ns.owner { - return syserror.EPERM + return linuxerr.EPERM } } // trySetUIDMap leaves data in maps if it fails. @@ -170,11 +170,11 @@ func (ns *UserNamespace) trySetUIDMap(entries []IDMapEntry) error { // checks for NoID. lastID := e.FirstID + e.Length if lastID <= e.FirstID { - return syserror.EINVAL + return linuxerr.EINVAL } lastParentID := e.FirstParentID + e.Length if lastParentID <= e.FirstParentID { - return syserror.EINVAL + return linuxerr.EINVAL } // "3. The mapped user IDs (group IDs) must in turn have a mapping in // the parent user namespace." @@ -182,14 +182,14 @@ func (ns *UserNamespace) trySetUIDMap(entries []IDMapEntry) error { // mappings when it's created, so SetUIDMap would have returned EPERM // without reaching this point if ns is root. if !ns.parent.allIDsMapped(&ns.parent.uidMapToParent, e.FirstParentID, lastParentID) { - return syserror.EPERM + return linuxerr.EPERM } // If either of these Adds fail, we have an overlapping range. if !ns.uidMapFromParent.Add(idMapRange{e.FirstParentID, lastParentID}, e.FirstID) { - return syserror.EINVAL + return linuxerr.EINVAL } if !ns.uidMapToParent.Add(idMapRange{e.FirstID, lastID}, e.FirstParentID) { - return syserror.EINVAL + return linuxerr.EINVAL } } return nil @@ -202,24 +202,24 @@ func (ns *UserNamespace) SetGIDMap(ctx context.Context, entries []IDMapEntry) er ns.mu.Lock() defer ns.mu.Unlock() if !ns.gidMapFromParent.IsEmpty() { - return syserror.EPERM + return linuxerr.EPERM } if len(entries) == 0 { - return syserror.EINVAL + return linuxerr.EINVAL } if !c.HasCapabilityIn(linux.CAP_SETGID, ns) { - return syserror.EPERM + return linuxerr.EPERM } if c.UserNamespace != ns && c.UserNamespace != ns.parent { - return syserror.EPERM + return linuxerr.EPERM } if !c.HasCapabilityIn(linux.CAP_SETGID, ns.parent) { if len(entries) != 1 || ns.parent.MapToKGID(GID(entries[0].FirstParentID)) != c.EffectiveKGID || entries[0].Length != 1 { - return syserror.EPERM + return linuxerr.EPERM } // It's correct for this to still be UID. if c.EffectiveKUID != ns.owner { - return syserror.EPERM + return linuxerr.EPERM } // "In the case of gid_map, use of the setgroups(2) system call must // first be denied by writing "deny" to the /proc/[pid]/setgroups file @@ -239,20 +239,20 @@ func (ns *UserNamespace) trySetGIDMap(entries []IDMapEntry) error { for _, e := range entries { lastID := e.FirstID + e.Length if lastID <= e.FirstID { - return syserror.EINVAL + return linuxerr.EINVAL } lastParentID := e.FirstParentID + e.Length if lastParentID <= e.FirstParentID { - return syserror.EINVAL + return linuxerr.EINVAL } if !ns.parent.allIDsMapped(&ns.parent.gidMapToParent, e.FirstParentID, lastParentID) { - return syserror.EPERM + return linuxerr.EPERM } if !ns.gidMapFromParent.Add(idMapRange{e.FirstParentID, lastParentID}, e.FirstID) { - return syserror.EINVAL + return linuxerr.EINVAL } if !ns.gidMapToParent.Add(idMapRange{e.FirstID, lastID}, e.FirstParentID) { - return syserror.EINVAL + return linuxerr.EINVAL } } return nil diff --git a/pkg/sentry/kernel/auth/user_namespace.go b/pkg/sentry/kernel/auth/user_namespace.go index 9dd52c860..40a406f9d 100644 --- a/pkg/sentry/kernel/auth/user_namespace.go +++ b/pkg/sentry/kernel/auth/user_namespace.go @@ -17,8 +17,8 @@ package auth import ( "math" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // A UserNamespace represents a user namespace. See user_namespaces(7) for @@ -105,7 +105,7 @@ func (c *Credentials) NewChildUserNamespace() (*UserNamespace, error) { if c.UserNamespace.depth() >= maxUserNamespaceDepth { // "... Calls to unshare(2) or clone(2) that would cause this limit to // be exceeded fail with the error EUSERS." - user_namespaces(7) - return nil, syserror.EUSERS + return nil, linuxerr.EUSERS } // "EPERM: CLONE_NEWUSER was specified in flags, but either the effective // user ID or the effective group ID of the caller does not have a mapping @@ -114,10 +114,10 @@ func (c *Credentials) NewChildUserNamespace() (*UserNamespace, error) { // process are mapped to user IDs and group IDs in the user namespace of // the calling process at the time of the call." - unshare(2) if !c.EffectiveKUID.In(c.UserNamespace).Ok() { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } if !c.EffectiveKGID.In(c.UserNamespace).Ok() { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } return &UserNamespace{ parent: c.UserNamespace, diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD index 6224a0cbd..6b2dd09da 100644 --- a/pkg/sentry/kernel/fasync/BUILD +++ b/pkg/sentry/kernel/fasync/BUILD @@ -8,12 +8,12 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", + "//pkg/errors/linuxerr", "//pkg/sentry/fs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", "//pkg/sync", - "//pkg/syserror", "//pkg/waiter", ], ) diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go index 5d584dc45..473987a79 100644 --- a/pkg/sentry/kernel/fasync/fasync.go +++ b/pkg/sentry/kernel/fasync/fasync.go @@ -17,12 +17,12 @@ package fasync import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -248,7 +248,7 @@ func (a *FileAsync) Signal() linux.Signal { // to send SIGIO. func (a *FileAsync) SetSignal(signal linux.Signal) error { if signal != 0 && !signal.IsValid() { - return syserror.EINVAL + return linuxerr.EINVAL } a.mu.Lock() defer a.mu.Unlock() diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index 62777faa8..8786a70b5 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -23,12 +23,12 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // FDFlags define flags for an individual descriptor. @@ -156,7 +156,7 @@ func (f *FDTable) dropVFS2(ctx context.Context, file *vfs.FileDescription) { // Release any POSIX lock possibly held by the FDTable. if file.SupportsLocks() { err := file.UnlockPOSIX(ctx, f, lock.LockRange{0, lock.LockEOF}) - if err != nil && err != syserror.ENOLCK { + if err != nil && !linuxerr.Equals(linuxerr.ENOLCK, err) { panic(fmt.Sprintf("UnlockPOSIX failed: %v", err)) } } diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD index 6c31e082c..cfdea5cf7 100644 --- a/pkg/sentry/kernel/futex/BUILD +++ b/pkg/sentry/kernel/futex/BUILD @@ -37,6 +37,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/sentry/memmap", @@ -53,8 +54,8 @@ go_test( library = ":futex", deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/sync", - "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go index 0427cf3f4..f5c364c96 100644 --- a/pkg/sentry/kernel/futex/futex.go +++ b/pkg/sentry/kernel/futex/futex.go @@ -20,6 +20,7 @@ package futex import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" @@ -122,7 +123,7 @@ func check(t Target, addr hostarch.Addr, val uint32) error { return err } if cur != val { - return syserror.EAGAIN + return linuxerr.EAGAIN } return nil } @@ -332,7 +333,7 @@ func getKey(t Target, addr hostarch.Addr, private bool) (Key, error) { // Ensure the address is aligned. // It must be a DWORD boundary. if addr&0x3 != 0 { - return Key{}, syserror.EINVAL + return Key{}, linuxerr.EINVAL } if private { return Key{Kind: KindPrivate, Offset: uint64(addr)}, nil @@ -397,8 +398,8 @@ func (m *Manager) Fork() *Manager { } // lockBucket returns a locked bucket for the given key. -func (m *Manager) lockBucket(k *Key) *bucket { - var b *bucket +// +checklocksacquire:b.mu +func (m *Manager) lockBucket(k *Key) (b *bucket) { if k.Kind == KindSharedMappable { b = m.sharedBucket } else { @@ -409,7 +410,9 @@ func (m *Manager) lockBucket(k *Key) *bucket { } // lockBuckets returns locked buckets for the given keys. -func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) { +// +checklocksacquire:b1.mu +// +checklocksacquire:b2.mu +func (m *Manager) lockBuckets(k1, k2 *Key) (b1 *bucket, b2 *bucket) { // Buckets must be consistently ordered to avoid circular lock // dependencies. We order buckets in m.privateBuckets by index (lowest // index first), and all buckets in m.privateBuckets precede @@ -419,8 +422,8 @@ func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) { if k1.Kind != KindSharedMappable && k2.Kind != KindSharedMappable { i1 := bucketIndexForAddr(k1.addr()) i2 := bucketIndexForAddr(k2.addr()) - b1 := &m.privateBuckets[i1] - b2 := &m.privateBuckets[i2] + b1 = &m.privateBuckets[i1] + b2 = &m.privateBuckets[i2] switch { case i1 < i2: b1.mu.Lock() @@ -431,19 +434,30 @@ func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) { default: b1.mu.Lock() } - return b1, b2 + return b1, b2 // +checklocksforce } // At least one of b1 or b2 should be m.sharedBucket. - b1 := m.sharedBucket - b2 := m.sharedBucket + b1 = m.sharedBucket + b2 = m.sharedBucket if k1.Kind != KindSharedMappable { b1 = m.lockBucket(k1) } else if k2.Kind != KindSharedMappable { b2 = m.lockBucket(k2) } m.sharedBucket.mu.Lock() - return b1, b2 + return b1, b2 // +checklocksforce +} + +// unlockBuckets unlocks two buckets. +// +checklocksrelease:b1.mu +// +checklocksrelease:b2.mu +func (m *Manager) unlockBuckets(b1, b2 *bucket) { + b1.mu.Unlock() + if b1 != b2 { + b2.mu.Unlock() + } + return // +checklocksforce } // Wake wakes up to n waiters matching the bitmask on the given addr. @@ -476,10 +490,7 @@ func (m *Manager) doRequeue(t Target, addr, naddr hostarch.Addr, private bool, c defer k2.release(t) b1, b2 := m.lockBuckets(&k1, &k2) - defer b1.mu.Unlock() - if b2 != b1 { - defer b2.mu.Unlock() - } + defer m.unlockBuckets(b1, b2) if checkval { if err := check(t, addr, val); err != nil { @@ -526,10 +537,7 @@ func (m *Manager) WakeOp(t Target, addr1, addr2 hostarch.Addr, private bool, nwa defer k2.release(t) b1, b2 := m.lockBuckets(&k1, &k2) - defer b1.mu.Unlock() - if b2 != b1 { - defer b2.mu.Unlock() - } + defer m.unlockBuckets(b1, b2) done := 0 cond, err := atomicOp(t, addr2, op) @@ -670,7 +678,7 @@ func (m *Manager) lockPILocked(w *Waiter, t Target, addr hostarch.Addr, tid uint return false, err } if (cur & linux.FUTEX_TID_MASK) == tid { - return false, syserror.EDEADLK + return false, linuxerr.EDEADLK } if (cur & linux.FUTEX_TID_MASK) == 0 { @@ -745,7 +753,7 @@ func (m *Manager) unlockPILocked(t Target, addr hostarch.Addr, tid uint32, b *bu } if (cur & linux.FUTEX_TID_MASK) != tid { - return syserror.EPERM + return linuxerr.EPERM } var next *Waiter // Who's the next owner? @@ -773,7 +781,7 @@ func (m *Manager) unlockPILocked(t Target, addr hostarch.Addr, tid uint32, b *bu if prev != cur { // Let user mode handle CAS races. This is different than lock, which // retries when CAS fails. - return syserror.EAGAIN + return linuxerr.EAGAIN } return nil } @@ -790,7 +798,7 @@ func (m *Manager) unlockPILocked(t Target, addr hostarch.Addr, tid uint32, b *bu return err } if prev != cur { - return syserror.EINVAL + return linuxerr.EINVAL } b.wakeWaiterLocked(next) diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go index deba44e5c..04c136f87 100644 --- a/pkg/sentry/kernel/futex/futex_test.go +++ b/pkg/sentry/kernel/futex/futex_test.go @@ -21,8 +21,8 @@ import ( "testing" "unsafe" - "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sync" ) @@ -488,7 +488,7 @@ func (t *testMutex) Lock() { // Wait for it to be "not locked". w := NewWaiter() err := t.m.WaitPrepare(w, t.d, t.a, true, testMutexLocked, ^uint32(0)) - if err == unix.EAGAIN { + if linuxerr.Equals(linuxerr.EAGAIN, err) { continue } if err != nil { diff --git a/pkg/sentry/kernel/kcov.go b/pkg/sentry/kernel/kcov.go index 4b943106b..e8a71bec1 100644 --- a/pkg/sentry/kernel/kcov.go +++ b/pkg/sentry/kernel/kcov.go @@ -22,13 +22,13 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/coverage" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) // kcovAreaSizeMax is the maximum number of uint64 entries allowed in the kcov @@ -125,19 +125,19 @@ func (kcov *Kcov) InitTrace(size uint64) error { defer kcov.mu.Unlock() if kcov.mode != linux.KCOV_MODE_DISABLED { - return syserror.EBUSY + return linuxerr.EBUSY } // To simplify all the logic around mapping, we require that the length of the // shared region is a multiple of the system page size. if (8*size)&(hostarch.PageSize-1) != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } // We need space for at least two uint64s to hold current position and a // single PC. if size < 2 || size > kcovAreaSizeMax { - return syserror.EINVAL + return linuxerr.EINVAL } kcov.size = size @@ -157,7 +157,7 @@ func (kcov *Kcov) EnableTrace(ctx context.Context, traceKind uint8) error { // KCOV_ENABLE must be preceded by KCOV_INIT_TRACE and an mmap call. if kcov.mode != linux.KCOV_MODE_INIT || kcov.mappable == nil { - return syserror.EINVAL + return linuxerr.EINVAL } switch traceKind { @@ -165,13 +165,13 @@ func (kcov *Kcov) EnableTrace(ctx context.Context, traceKind uint8) error { kcov.mode = linux.KCOV_MODE_TRACE_PC case linux.KCOV_TRACE_CMP: // We do not support KCOV_MODE_TRACE_CMP. - return syserror.ENOTSUP + return linuxerr.ENOTSUP default: - return syserror.EINVAL + return linuxerr.EINVAL } if kcov.owningTask != nil && kcov.owningTask != t { - return syserror.EBUSY + return linuxerr.EBUSY } kcov.owningTask = t @@ -195,7 +195,7 @@ func (kcov *Kcov) DisableTrace(ctx context.Context) error { } if t != kcov.owningTask { - return syserror.EINVAL + return linuxerr.EINVAL } kcov.mode = linux.KCOV_MODE_INIT kcov.owningTask = nil @@ -237,7 +237,7 @@ func (kcov *Kcov) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) erro defer kcov.mu.Unlock() if kcov.mode != linux.KCOV_MODE_INIT { - return syserror.EINVAL + return linuxerr.EINVAL } if kcov.mappable == nil { diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 352c36ba9..df5160b67 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -1299,11 +1299,11 @@ func (k *Kernel) WaitExited() { } // Kill requests that all tasks in k immediately exit as if group exiting with -// status es. Kill does not wait for tasks to exit. -func (k *Kernel) Kill(es ExitStatus) { +// status ws. Kill does not wait for tasks to exit. +func (k *Kernel) Kill(ws linux.WaitStatus) { k.extMu.Lock() defer k.extMu.Unlock() - k.tasks.Kill(es) + k.tasks.Kill(ws) } // Pause requests that all tasks in k temporarily stop executing, and blocks diff --git a/pkg/sentry/kernel/kernel_opts.go b/pkg/sentry/kernel/kernel_opts.go index 2e66ec587..5ffafb0d1 100644 --- a/pkg/sentry/kernel/kernel_opts.go +++ b/pkg/sentry/kernel/kernel_opts.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package kernel // SpecialOpts contains non-standard options for the kernel. diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD index 34c617b08..94ebac7c5 100644 --- a/pkg/sentry/kernel/pipe/BUILD +++ b/pkg/sentry/kernel/pipe/BUILD @@ -21,6 +21,7 @@ go_library( "//pkg/abi/linux", "//pkg/amutex", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/marshal/primitive", "//pkg/safemem", @@ -47,6 +48,7 @@ go_test( library = ":pipe", deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/sentry/contexttest", "//pkg/sentry/fs", "//pkg/syserror", diff --git a/pkg/sentry/kernel/pipe/node.go b/pkg/sentry/kernel/pipe/node.go index 6497dc4ba..08786d704 100644 --- a/pkg/sentry/kernel/pipe/node.go +++ b/pkg/sentry/kernel/pipe/node.go @@ -17,6 +17,7 @@ package pipe import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sync" @@ -112,7 +113,7 @@ func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.Fi // read side isn't open yet. if flags.NonBlocking { w.DecRef(ctx) - return nil, syserror.ENXIO + return nil, linuxerr.ENXIO } if !waitFor(&i.mu, &i.rWakeup, ctx) { @@ -130,10 +131,10 @@ func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.Fi return rw, nil default: - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } } func (*inodeOperations) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error { - return syserror.EPIPE + return linuxerr.EPIPE } diff --git a/pkg/sentry/kernel/pipe/node_test.go b/pkg/sentry/kernel/pipe/node_test.go index d6fb0fdb8..d25cf658e 100644 --- a/pkg/sentry/kernel/pipe/node_test.go +++ b/pkg/sentry/kernel/pipe/node_test.go @@ -19,6 +19,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/syserror" @@ -258,7 +259,7 @@ func TestNonblockingWriteOpenFileNoReaders(t *testing.T) { ctx := newSleeperContext(t) f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - if _, err := testOpen(ctx, t, f, fs.FileFlags{Write: true, NonBlocking: true}, nil); err != syserror.ENXIO { + if _, err := testOpen(ctx, t, f, fs.FileFlags{Write: true, NonBlocking: true}, nil); !linuxerr.Equals(linuxerr.ENXIO, err) { t.Fatalf("Nonblocking open for write failed unexpected error %v.", err) } } diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index 06769931a..85e3ce9f4 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -22,6 +22,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -428,18 +429,18 @@ func (p *Pipe) FifoSize(context.Context, *fs.File) (int64, error) { // SetFifoSize implements fs.FifoSizer.SetFifoSize. func (p *Pipe) SetFifoSize(size int64) (int64, error) { if size < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if size < MinimumPipeSize { size = MinimumPipeSize // Per spec. } if size > MaximumPipeSize { - return 0, syserror.EPERM + return 0, linuxerr.EPERM } p.mu.Lock() defer p.mu.Unlock() if size < p.size { - return 0, syserror.EBUSY + return 0, linuxerr.EBUSY } p.max = size return size, nil diff --git a/pkg/sentry/kernel/pipe/pipe_unsafe.go b/pkg/sentry/kernel/pipe/pipe_unsafe.go index dd60cba24..077c5d596 100644 --- a/pkg/sentry/kernel/pipe/pipe_unsafe.go +++ b/pkg/sentry/kernel/pipe/pipe_unsafe.go @@ -23,6 +23,8 @@ import ( // concurrent calls cannot deadlock. // // Preconditions: x != y. +// +checklocksacquire:x.mu +// +checklocksacquire:y.mu func lockTwoPipes(x, y *Pipe) { // Lock the two pipes in order of increasing address. if uintptr(unsafe.Pointer(x)) < uintptr(unsafe.Pointer(y)) { diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go index 3fa5d1d2f..c883a9014 100644 --- a/pkg/sentry/kernel/pipe/pipe_util.go +++ b/pkg/sentry/kernel/pipe/pipe_util.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/amutex" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -86,7 +87,7 @@ func (p *Pipe) Write(ctx context.Context, src usermem.IOSequence) (int64, error) if n > 0 { p.Notify(waiter.ReadableEvents) } - if err == unix.EPIPE { + if linuxerr.Equals(linuxerr.EPIPE, err) { // If we are returning EPIPE send SIGPIPE to the task. if sendSig := linux.SignalNoInfoFuncFromContext(ctx); sendSig != nil { sendSig(linux.SIGPIPE) @@ -156,6 +157,7 @@ func (p *Pipe) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArgume // // mu must be held by the caller. waitFor returns with mu held, but it will // drop mu before blocking for any reader/writers. +// +checklocks:mu func waitFor(mu *sync.Mutex, wakeupChan *chan struct{}, sleeper amutex.Sleeper) bool { // Ideally this function would simply use a condition variable. However, the // wait needs to be interruptible via 'sleeper', so we must sychronize via a diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index 95b948edb..077d5fd7f 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -17,6 +17,7 @@ package pipe import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -79,7 +80,7 @@ func (vp *VFSPipe) ReaderWriterPair(ctx context.Context, mnt *vfs.Mount, vfsd *v // Allocate implements vfs.FileDescriptionImpl.Allocate. func (*VFSPipe) Allocate(context.Context, uint64, uint64, uint64) error { - return syserror.ESPIPE + return linuxerr.ESPIPE } // Open opens the pipe represented by vp. @@ -90,7 +91,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s readable := vfs.MayReadFileWithOpenFlags(statusFlags) writable := vfs.MayWriteFileWithOpenFlags(statusFlags) if !readable && !writable { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } fd, err := vp.newFD(mnt, vfsd, statusFlags, locks) @@ -131,7 +132,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s // side isn't open yet. if statusFlags&linux.O_NONBLOCK != 0 { fd.DecRef(ctx) - return nil, syserror.ENXIO + return nil, linuxerr.ENXIO } // Wait for a reader to open the other end. if !waitFor(&vp.mu, &vp.rWakeup, ctx) { @@ -224,7 +225,7 @@ func (fd *VFSPipeFD) Readiness(mask waiter.EventMask) waiter.EventMask { // Allocate implements vfs.FileDescriptionImpl.Allocate. func (fd *VFSPipeFD) Allocate(ctx context.Context, mode, offset, length uint64) error { - return syserror.ESPIPE + return linuxerr.ESPIPE } // EventRegister implements waiter.Waitable.EventRegister. @@ -415,7 +416,7 @@ func Tee(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) { // Preconditions: count > 0. func spliceOrTee(ctx context.Context, dst, src *VFSPipeFD, count int64, removeFromSrc bool) (int64, error) { if dst.pipe == src.pipe { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } lockTwoPipes(dst.pipe, src.pipe) diff --git a/pkg/sentry/kernel/posixtimer.go b/pkg/sentry/kernel/posixtimer.go index d801a3d83..319754a42 100644 --- a/pkg/sentry/kernel/posixtimer.go +++ b/pkg/sentry/kernel/posixtimer.go @@ -18,8 +18,8 @@ import ( "math" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/syserror" ) // IntervalTimer represents a POSIX interval timer as described by @@ -175,7 +175,7 @@ func (t *Task) IntervalTimerCreate(c ktime.Clock, sigev *linux.Sigevent) (linux. break } if t.tg.nextTimerID == end { - return 0, syserror.EAGAIN + return 0, linuxerr.EAGAIN } } @@ -214,16 +214,16 @@ func (t *Task) IntervalTimerCreate(c ktime.Clock, sigev *linux.Sigevent) (linux. target, ok := t.tg.pidns.tasks[ThreadID(sigev.Tid)] t.tg.pidns.owner.mu.RUnlock() if !ok || target.tg != t.tg { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } it.target = target default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if sigev.Notify != linux.SIGEV_NONE { it.signo = linux.Signal(sigev.Signo) if !it.signo.IsValid() { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } } it.timer = ktime.NewTimer(c, it) @@ -238,7 +238,7 @@ func (t *Task) IntervalTimerDelete(id linux.TimerID) error { defer t.tg.timerMu.Unlock() it := t.tg.timers[id] if it == nil { - return syserror.EINVAL + return linuxerr.EINVAL } delete(t.tg.timers, id) it.DestroyTimer() @@ -251,7 +251,7 @@ func (t *Task) IntervalTimerSettime(id linux.TimerID, its linux.Itimerspec, abs defer t.tg.timerMu.Unlock() it := t.tg.timers[id] if it == nil { - return linux.Itimerspec{}, syserror.EINVAL + return linux.Itimerspec{}, linuxerr.EINVAL } newS, err := ktime.SettingFromItimerspec(its, abs, it.timer.Clock()) @@ -269,7 +269,7 @@ func (t *Task) IntervalTimerGettime(id linux.TimerID) (linux.Itimerspec, error) defer t.tg.timerMu.Unlock() it := t.tg.timers[id] if it == nil { - return linux.Itimerspec{}, syserror.EINVAL + return linux.Itimerspec{}, linuxerr.EINVAL } tm, s := it.timer.Get() @@ -285,7 +285,7 @@ func (t *Task) IntervalTimerGetoverrun(id linux.TimerID) (int32, error) { defer t.tg.timerMu.Unlock() it := t.tg.timers[id] if it == nil { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // By timer_create(2) invariant, either it.target == nil (in which case // it.overrunLast is immutably 0) or t.tg == it.target.tg; and the fact diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index a6287fd6a..21358ec92 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -19,6 +19,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/mm" @@ -294,7 +295,7 @@ func (t *Task) isYAMADescendantOfLocked(ancestor *Task) bool { // Precondition: the TaskSet mutex must be locked (for reading or writing). func (t *Task) hasYAMAExceptionForLocked(tracer *Task) bool { - allowed, ok := t.k.ptraceExceptions[t] + allowed, ok := t.k.ptraceExceptions[t.tg.leader] if !ok { return false } @@ -481,7 +482,7 @@ func (t *Task) ptraceTraceme() error { t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() if t.hasTracer() { - return syserror.EPERM + return linuxerr.EPERM } if t.parent == nil { // In Linux, only init can not have a parent, and init is assumed never @@ -497,7 +498,7 @@ func (t *Task) ptraceTraceme() error { return nil } if !t.parent.canTraceLocked(t, true) { - return syserror.EPERM + return linuxerr.EPERM } if t.parent.exitState != TaskExitNone { // Fail silently, as if we were successfully attached but then @@ -513,21 +514,21 @@ func (t *Task) ptraceTraceme() error { // ptrace(PTRACE_SEIZE, target, 0, opts) if seize is true. t is the caller. func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error { if t.tg == target.tg { - return syserror.EPERM + return linuxerr.EPERM } t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() if !t.canTraceLocked(target, true) { - return syserror.EPERM + return linuxerr.EPERM } if target.hasTracer() { - return syserror.EPERM + return linuxerr.EPERM } // Attaching to zombies and dead tasks is not permitted; the exit // notification logic relies on this. Linux allows attaching to PF_EXITING // tasks, though. if target.exitState >= TaskExitZombie { - return syserror.EPERM + return linuxerr.EPERM } if seize { if err := target.ptraceSetOptionsLocked(opts); err != nil { @@ -651,6 +652,7 @@ func (t *Task) forgetTracerLocked() { // Preconditions: // * The signal mutex must be locked. // * The caller must be running on the task goroutine. +// +checklocks:t.tg.signalHandlers.mu func (t *Task) ptraceSignalLocked(info *linux.SignalInfo) bool { if linux.Signal(info.Signo) == linux.SIGKILL { return false @@ -910,7 +912,7 @@ func (t *Task) ptraceExit() { return } t.tg.signalHandlers.mu.Lock() - status := t.exitStatus.Status() + status := t.exitStatus t.tg.signalHandlers.mu.Unlock() t.Debugf("Entering PTRACE_EVENT_EXIT stop") t.ptraceEventLocked(linux.PTRACE_EVENT_EXIT, uint64(status)) @@ -938,7 +940,7 @@ func (t *Task) ptraceKill(target *Task) error { t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() if target.Tracer() != t { - return syserror.ESRCH + return linuxerr.ESRCH } target.tg.signalHandlers.mu.Lock() defer target.tg.signalHandlers.mu.Unlock() @@ -962,7 +964,7 @@ func (t *Task) ptraceInterrupt(target *Task) error { t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() if target.Tracer() != t { - return syserror.ESRCH + return linuxerr.ESRCH } if !target.ptraceSeized { return syserror.EIO @@ -994,7 +996,7 @@ func (t *Task) ptraceSetOptionsLocked(opts uintptr) error { linux.PTRACE_O_TRACEVFORK | linux.PTRACE_O_TRACEVFORKDONE) if opts&^valid != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } t.ptraceOpts = ptraceOptions{ ExitKill: opts&linux.PTRACE_O_EXITKILL != 0, @@ -1020,7 +1022,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { // specified by pid. target := t.tg.pidns.TaskWithID(pid) if target == nil { - return syserror.ESRCH + return linuxerr.ESRCH } // PTRACE_ATTACH and PTRACE_SEIZE do not require that target is not already @@ -1045,7 +1047,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { t.tg.pidns.owner.mu.RLock() if target.Tracer() != t { t.tg.pidns.owner.mu.RUnlock() - return syserror.ESRCH + return linuxerr.ESRCH } if !target.ptraceFreeze() { t.tg.pidns.owner.mu.RUnlock() @@ -1053,7 +1055,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { // PTRACE_TRACEME, PTRACE_INTERRUPT, and PTRACE_KILL) require the // tracee to be in a ptrace-stop, otherwise they fail with ESRCH." - // ptrace(2) - return syserror.ESRCH + return linuxerr.ESRCH } t.tg.pidns.owner.mu.RUnlock() // Even if the target has a ptrace-stop active, the tracee's task goroutine @@ -1221,7 +1223,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { t.tg.pidns.owner.mu.RLock() defer t.tg.pidns.owner.mu.RUnlock() if target.ptraceSiginfo == nil { - return syserror.EINVAL + return linuxerr.EINVAL } _, err := target.ptraceSiginfo.CopyOut(t, data) return err @@ -1234,14 +1236,14 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { t.tg.pidns.owner.mu.RLock() defer t.tg.pidns.owner.mu.RUnlock() if target.ptraceSiginfo == nil { - return syserror.EINVAL + return linuxerr.EINVAL } target.ptraceSiginfo = &info return nil case linux.PTRACE_GETSIGMASK: if addr != linux.SignalSetSize { - return syserror.EINVAL + return linuxerr.EINVAL } mask := target.SignalMask() _, err := mask.CopyOut(t, data) @@ -1249,7 +1251,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { case linux.PTRACE_SETSIGMASK: if addr != linux.SignalSetSize { - return syserror.EINVAL + return linuxerr.EINVAL } var mask linux.SignalSet if _, err := mask.CopyIn(t, data); err != nil { diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go index 5ae05b5c3..63422e155 100644 --- a/pkg/sentry/kernel/ptrace_amd64.go +++ b/pkg/sentry/kernel/ptrace_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kernel diff --git a/pkg/sentry/kernel/ptrace_arm64.go b/pkg/sentry/kernel/ptrace_arm64.go index 46dd84cbc..27514d67b 100644 --- a/pkg/sentry/kernel/ptrace_arm64.go +++ b/pkg/sentry/kernel/ptrace_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kernel diff --git a/pkg/sentry/kernel/rseq.go b/pkg/sentry/kernel/rseq.go index 4bc5bca44..de352f4f2 100644 --- a/pkg/sentry/kernel/rseq.go +++ b/pkg/sentry/kernel/rseq.go @@ -18,9 +18,9 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/hostcpu" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -59,23 +59,23 @@ func (t *Task) RSeqAvailable() bool { func (t *Task) SetRSeq(addr hostarch.Addr, length, signature uint32) error { if t.rseqAddr != 0 { if t.rseqAddr != addr { - return syserror.EINVAL + return linuxerr.EINVAL } if t.rseqSignature != signature { - return syserror.EINVAL + return linuxerr.EINVAL } - return syserror.EBUSY + return linuxerr.EBUSY } // rseq must be aligned and correctly sized. if addr&(linux.AlignOfRSeq-1) != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } if length != linux.SizeOfRSeq { - return syserror.EINVAL + return linuxerr.EINVAL } if _, ok := t.MemoryManager().CheckIORange(addr, linux.SizeOfRSeq); !ok { - return syserror.EFAULT + return linuxerr.EFAULT } t.rseqAddr = addr @@ -92,7 +92,7 @@ func (t *Task) SetRSeq(addr hostarch.Addr, length, signature uint32) error { t.Debugf("Failed to copy CPU to %#x for rseq: %v", t.rseqAddr, err) t.forceSignal(linux.SIGSEGV, false /* unconditional */) t.SendSignal(SignalInfoPriv(linux.SIGSEGV)) - return syserror.EFAULT + return linuxerr.EFAULT } return nil @@ -103,16 +103,16 @@ func (t *Task) SetRSeq(addr hostarch.Addr, length, signature uint32) error { // Preconditions: The caller must be running on the task goroutine. func (t *Task) ClearRSeq(addr hostarch.Addr, length, signature uint32) error { if t.rseqAddr == 0 { - return syserror.EINVAL + return linuxerr.EINVAL } if t.rseqAddr != addr { - return syserror.EINVAL + return linuxerr.EINVAL } if length != linux.SizeOfRSeq { - return syserror.EINVAL + return linuxerr.EINVAL } if t.rseqSignature != signature { - return syserror.EPERM + return linuxerr.EPERM } if err := t.rseqClearCPU(); err != nil { @@ -152,10 +152,10 @@ func (t *Task) SetOldRSeqCriticalRegion(r OldRSeqCriticalRegion) error { return nil } if r.CriticalSection.Start >= r.CriticalSection.End { - return syserror.EINVAL + return linuxerr.EINVAL } if r.CriticalSection.Contains(r.Restart) { - return syserror.EINVAL + return linuxerr.EINVAL } // TODO(jamieliu): check that r.CriticalSection and r.Restart are in // the application address range, for consistency with Linux. @@ -187,7 +187,7 @@ func (t *Task) SetOldRSeqCPUAddr(addr hostarch.Addr) error { // unfortunate, but unlikely in a correct program. if err := t.rseqUpdateCPU(); err != nil { t.oldRSeqCPUAddr = 0 - return syserror.EINVAL // yes, EINVAL, not err or EFAULT + return linuxerr.EINVAL // yes, EINVAL, not err or EFAULT } return nil } diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD index 65e5427c1..a787c00a8 100644 --- a/pkg/sentry/kernel/semaphore/BUILD +++ b/pkg/sentry/kernel/semaphore/BUILD @@ -25,6 +25,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/log", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go index 47bb66b42..485c3a788 100644 --- a/pkg/sentry/kernel/semaphore/semaphore.go +++ b/pkg/sentry/kernel/semaphore/semaphore.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -127,7 +128,7 @@ func NewRegistry(userNS *auth.UserNamespace) *Registry { // exists. func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linux.FileMode, private, create, exclusive bool) (*Set, error) { if nsems < 0 || nsems > semsMax { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } r.mu.Lock() @@ -142,15 +143,15 @@ func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linu // Check that caller can access semaphore set. creds := auth.CredentialsFromContext(ctx) if !set.checkPerms(creds, fs.PermsFromMode(mode)) { - return nil, syserror.EACCES + return nil, linuxerr.EACCES } // Validate parameters. if nsems > int32(set.Size()) { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } if create && exclusive { - return nil, syserror.EEXIST + return nil, linuxerr.EEXIST } return set, nil } @@ -163,7 +164,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linu // Zero is only valid if an existing set is found. if nsems == 0 { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } // Apply system limits. @@ -238,7 +239,7 @@ func (r *Registry) RemoveID(id int32, creds *auth.Credentials) error { set := r.semaphores[id] if set == nil { - return syserror.EINVAL + return linuxerr.EINVAL } index, found := r.findIndexByID(id) if !found { @@ -252,7 +253,7 @@ func (r *Registry) RemoveID(id int32, creds *auth.Credentials) error { // "The effective user ID of the calling process must match the creator or // owner of the semaphore set, or the caller must be privileged." if !set.checkCredentials(creds) && !set.checkCapability(creds) { - return syserror.EACCES + return linuxerr.EACCES } delete(r.semaphores, set.ID) @@ -370,7 +371,7 @@ func (s *Set) Change(ctx context.Context, creds *auth.Credentials, owner fs.File // "The effective UID of the calling process must match the owner or creator // of the semaphore set, or the caller must be privileged." if !s.checkCredentials(creds) && !s.checkCapability(creds) { - return syserror.EACCES + return linuxerr.EACCES } s.owner = owner @@ -395,7 +396,7 @@ func (s *Set) semStat(creds *auth.Credentials, permMask fs.PermMask) (*linux.Sem defer s.mu.Unlock() if !s.checkPerms(creds, permMask) { - return nil, syserror.EACCES + return nil, linuxerr.EACCES } return &linux.SemidDS{ @@ -417,7 +418,7 @@ func (s *Set) semStat(creds *auth.Credentials, permMask fs.PermMask) (*linux.Sem // SetVal overrides a semaphore value, waking up waiters as needed. func (s *Set) SetVal(ctx context.Context, num int32, val int16, creds *auth.Credentials, pid int32) error { if val < 0 || val > valueMax { - return syserror.ERANGE + return linuxerr.ERANGE } s.mu.Lock() @@ -425,12 +426,12 @@ func (s *Set) SetVal(ctx context.Context, num int32, val int16, creds *auth.Cred // "The calling process must have alter permission on the semaphore set." if !s.checkPerms(creds, fs.PermMask{Write: true}) { - return syserror.EACCES + return linuxerr.EACCES } sem := s.findSem(num) if sem == nil { - return syserror.ERANGE + return linuxerr.ERANGE } // TODO(gvisor.dev/issue/137): Clear undo entries in all processes. @@ -452,7 +453,7 @@ func (s *Set) SetValAll(ctx context.Context, vals []uint16, creds *auth.Credenti for _, val := range vals { if val > valueMax { - return syserror.ERANGE + return linuxerr.ERANGE } } @@ -461,7 +462,7 @@ func (s *Set) SetValAll(ctx context.Context, vals []uint16, creds *auth.Credenti // "The calling process must have alter permission on the semaphore set." if !s.checkPerms(creds, fs.PermMask{Write: true}) { - return syserror.EACCES + return linuxerr.EACCES } for i, val := range vals { @@ -483,12 +484,12 @@ func (s *Set) GetVal(num int32, creds *auth.Credentials) (int16, error) { // "The calling process must have read permission on the semaphore set." if !s.checkPerms(creds, fs.PermMask{Read: true}) { - return 0, syserror.EACCES + return 0, linuxerr.EACCES } sem := s.findSem(num) if sem == nil { - return 0, syserror.ERANGE + return 0, linuxerr.ERANGE } return sem.value, nil } @@ -500,7 +501,7 @@ func (s *Set) GetValAll(creds *auth.Credentials) ([]uint16, error) { // "The calling process must have read permission on the semaphore set." if !s.checkPerms(creds, fs.PermMask{Read: true}) { - return nil, syserror.EACCES + return nil, linuxerr.EACCES } vals := make([]uint16, s.Size()) @@ -517,12 +518,12 @@ func (s *Set) GetPID(num int32, creds *auth.Credentials) (int32, error) { // "The calling process must have read permission on the semaphore set." if !s.checkPerms(creds, fs.PermMask{Read: true}) { - return 0, syserror.EACCES + return 0, linuxerr.EACCES } sem := s.findSem(num) if sem == nil { - return 0, syserror.ERANGE + return 0, linuxerr.ERANGE } return sem.pid, nil } @@ -533,12 +534,12 @@ func (s *Set) countWaiters(num int32, creds *auth.Credentials, pred func(w *wait // The calling process must have read permission on the semaphore set. if !s.checkPerms(creds, fs.PermMask{Read: true}) { - return 0, syserror.EACCES + return 0, linuxerr.EACCES } sem := s.findSem(num) if sem == nil { - return 0, syserror.ERANGE + return 0, linuxerr.ERANGE } var cnt uint16 for w := sem.waiters.Front(); w != nil; w = w.Next() { @@ -581,7 +582,7 @@ func (s *Set) ExecuteOps(ctx context.Context, ops []linux.Sembuf, creds *auth.Cr readOnly := true for _, op := range ops { if s.findSem(int32(op.SemNum)) == nil { - return nil, 0, syserror.EFBIG + return nil, 0, linuxerr.EFBIG } if op.SemOp != 0 { readOnly = false @@ -589,7 +590,7 @@ func (s *Set) ExecuteOps(ctx context.Context, ops []linux.Sembuf, creds *auth.Cr } if !s.checkPerms(creds, fs.PermMask{Read: readOnly, Write: !readOnly}) { - return nil, 0, syserror.EACCES + return nil, 0, linuxerr.EACCES } ch, num, err := s.executeOps(ctx, ops, pid) @@ -624,7 +625,7 @@ func (s *Set) executeOps(ctx context.Context, ops []linux.Sembuf, pid int32) (ch if op.SemOp < 0 { // Handle 'wait' operation. if -op.SemOp > valueMax { - return nil, 0, syserror.ERANGE + return nil, 0, linuxerr.ERANGE } if -op.SemOp > tmpVals[op.SemNum] { // Not enough resources, must wait. @@ -639,7 +640,7 @@ func (s *Set) executeOps(ctx context.Context, ops []linux.Sembuf, pid int32) (ch } else { // op.SemOp > 0: Handle 'signal' operation. if tmpVals[op.SemNum] > valueMax-op.SemOp { - return nil, 0, syserror.ERANGE + return nil, 0, linuxerr.ERANGE } } diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go index ca9076406..f9f872522 100644 --- a/pkg/sentry/kernel/sessions.go +++ b/pkg/sentry/kernel/sessions.go @@ -16,7 +16,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ) // SessionID is the public identifier. @@ -120,8 +120,9 @@ func (pg *ProcessGroup) Originator() *ThreadGroup { // IsOrphan returns true if this process group is an orphan. func (pg *ProcessGroup) IsOrphan() bool { - pg.originator.TaskSet().mu.RLock() - defer pg.originator.TaskSet().mu.RUnlock() + ts := pg.originator.TaskSet() + ts.mu.RLock() + defer ts.mu.RUnlock() return pg.ancestors == 0 } @@ -277,14 +278,14 @@ func (tg *ThreadGroup) createSession() error { continue } if s.leader == tg { - return syserror.EPERM + return linuxerr.EPERM } if s.id == SessionID(id) { - return syserror.EPERM + return linuxerr.EPERM } for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() { if pg.id == ProcessGroupID(id) { - return syserror.EPERM + return linuxerr.EPERM } } } @@ -369,17 +370,22 @@ func (tg *ThreadGroup) CreateProcessGroup() error { // Get the ID for this thread in the current namespace. id := tg.pidns.tgids[tg] + // Check whether a process still exists or not. + if id == 0 { + return linuxerr.ESRCH + } + // Per above, check for a Session leader or existing group. for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() { if s.leader.pidns != tg.pidns { continue } if s.leader == tg { - return syserror.EPERM + return linuxerr.EPERM } for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() { if pg.id == ProcessGroupID(id) { - return syserror.EPERM + return linuxerr.EPERM } } } @@ -437,17 +443,17 @@ func (tg *ThreadGroup) JoinProcessGroup(pidns *PIDNamespace, pgid ProcessGroupID // Lookup the ProcessGroup. pg := pidns.processGroups[pgid] if pg == nil { - return syserror.EPERM + return linuxerr.EPERM } // Disallow the join if an execve has performed, per POSIX. if checkExec && tg.execed { - return syserror.EACCES + return linuxerr.EACCES } // See if it's in the same session as ours. if pg.session != tg.processGroup.session { - return syserror.EPERM + return linuxerr.EPERM } // Join the group; adjust children. diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD index 1c3c0794f..5b69333fe 100644 --- a/pkg/sentry/kernel/shm/BUILD +++ b/pkg/sentry/kernel/shm/BUILD @@ -28,6 +28,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/refs", diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index a73f1bdca..f7ac4c2b2 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -38,6 +38,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -145,7 +146,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui // // Note that 'private' always implies the creation of a new segment // whether IPC_CREAT is specified or not. - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } r.mu.Lock() @@ -169,20 +170,20 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui // memory segment, and does not have the CAP_IPC_OWNER // capability in the user namespace that governs its IPC // namespace." - man shmget(2) - return nil, syserror.EACCES + return nil, linuxerr.EACCES } if size > shm.size { // "A segment for the given key exists, but size is greater than // the size of that segment." - man shmget(2) - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } if create && exclusive { // "IPC_CREAT and IPC_EXCL were specified in shmflg, but a // shared memory segment already exists for key." // - man shmget(2) - return nil, syserror.EEXIST + return nil, linuxerr.EEXIST } shm.IncRef() @@ -200,7 +201,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui if val, ok := hostarch.Addr(size).RoundUp(); ok { sizeAligned = uint64(val) } else { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } if numPages := sizeAligned / hostarch.PageSize; r.totalPages+numPages > linux.SHMALL { @@ -511,7 +512,7 @@ func (*Shm) CopyMapping(context.Context, memmap.MappingSpace, hostarch.AddrRange func (s *Shm) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { var err error if required.End > s.fr.Length() { - err = &memmap.BusError{syserror.EFAULT} + err = &memmap.BusError{linuxerr.EFAULT} } if source := optional.Intersect(memmap.MappableRange{0, s.fr.Length()}); source.Length() != 0 { return []memmap.Translation{ @@ -558,7 +559,7 @@ func (s *Shm) ConfigureAttach(ctx context.Context, addr hostarch.Addr, opts Atta // "The calling process does not have the required permissions for the // requested attach type, and does not have the CAP_IPC_OWNER capability // in the user namespace that governs its IPC namespace." - man shmat(2) - return memmap.MMapOpts{}, syserror.EACCES + return memmap.MMapOpts{}, linuxerr.EACCES } return memmap.MMapOpts{ Length: s.size, @@ -595,7 +596,7 @@ func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) { // read access for shmid, and the calling process does not have the // CAP_IPC_OWNER capability in the user namespace that governs its IPC // namespace." - man shmctl(2) - return nil, syserror.EACCES + return nil, linuxerr.EACCES } var mode uint16 @@ -645,14 +646,14 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error { defer s.mu.Unlock() if !s.checkOwnership(ctx) { - return syserror.EPERM + return linuxerr.EPERM } creds := auth.CredentialsFromContext(ctx) uid := creds.UserNamespace.MapToKUID(auth.UID(ds.ShmPerm.UID)) gid := creds.UserNamespace.MapToKGID(auth.GID(ds.ShmPerm.GID)) if !uid.Ok() || !gid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } // User may only modify the lower 9 bits of the mode. All the other bits are diff --git a/pkg/sentry/kernel/signalfd/BUILD b/pkg/sentry/kernel/signalfd/BUILD index 76d472292..1110ecca5 100644 --- a/pkg/sentry/kernel/signalfd/BUILD +++ b/pkg/sentry/kernel/signalfd/BUILD @@ -9,6 +9,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", diff --git a/pkg/sentry/kernel/signalfd/signalfd.go b/pkg/sentry/kernel/signalfd/signalfd.go index f58ec4194..47958e2d4 100644 --- a/pkg/sentry/kernel/signalfd/signalfd.go +++ b/pkg/sentry/kernel/signalfd/signalfd.go @@ -18,6 +18,7 @@ package signalfd import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -64,7 +65,7 @@ func New(ctx context.Context, mask linux.SignalSet) (*fs.File, error) { t := kernel.TaskFromContext(ctx) if t == nil { // No task context? Not valid. - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } // name matches fs/signalfd.c:signalfd4. dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[signalfd]") diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 2e3b4488a..59eeb253d 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/inet" @@ -32,7 +33,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -232,7 +232,7 @@ type Task struct { // exitStatus is the task's exit status. // // exitStatus is protected by the signal mutex. - exitStatus ExitStatus + exitStatus linux.WaitStatus // syscallRestartBlock represents a custom restart function to run in // restart_syscall(2) to resume an interrupted syscall. @@ -846,7 +846,7 @@ func (t *Task) OOMScoreAdj() int32 { // value should be between -1000 and 1000 inclusive. func (t *Task) SetOOMScoreAdj(adj int32) error { if adj > 1000 || adj < -1000 { - return syserror.EINVAL + return linuxerr.EINVAL } atomic.StoreInt32(&t.tg.oomScoreAdj, adj) return nil diff --git a/pkg/sentry/kernel/task_acct.go b/pkg/sentry/kernel/task_acct.go index e574997f7..dd364ae50 100644 --- a/pkg/sentry/kernel/task_acct.go +++ b/pkg/sentry/kernel/task_acct.go @@ -18,10 +18,10 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) // Getitimer implements getitimer(2). @@ -44,7 +44,7 @@ func (t *Task) Getitimer(id int32) (linux.ItimerVal, error) { s, _ = t.tg.itimerProfSetting.At(tm) t.tg.signalHandlers.mu.Unlock() default: - return linux.ItimerVal{}, syserror.EINVAL + return linux.ItimerVal{}, linuxerr.EINVAL } val, iv := ktime.SpecFromSetting(tm, s) return linux.ItimerVal{ @@ -105,7 +105,7 @@ func (t *Task) Setitimer(id int32, newitv linux.ItimerVal) (linux.ItimerVal, err return linux.ItimerVal{}, err } default: - return linux.ItimerVal{}, syserror.EINVAL + return linux.ItimerVal{}, linuxerr.EINVAL } oldval, oldiv := ktime.SpecFromSetting(tm, olds) return linux.ItimerVal{ diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go index ecbe8f920..b2520eecf 100644 --- a/pkg/sentry/kernel/task_block.go +++ b/pkg/sentry/kernel/task_block.go @@ -19,6 +19,7 @@ import ( "runtime/trace" "time" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -45,7 +46,7 @@ func (t *Task) BlockWithTimeout(C chan struct{}, haveTimeout bool, timeout time. err := t.BlockWithDeadline(C, true, deadline) // Timeout, explicitly return a remaining duration of 0. - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { return 0, err } @@ -162,7 +163,7 @@ func (t *Task) block(C <-chan struct{}, timerChan <-chan struct{}) error { region.End() t.SleepFinish(true) // We've timed out. - return syserror.ETIMEDOUT + return linuxerr.ETIMEDOUT } } diff --git a/pkg/sentry/kernel/task_cgroup.go b/pkg/sentry/kernel/task_cgroup.go index 7c138e80f..828b90014 100644 --- a/pkg/sentry/kernel/task_cgroup.go +++ b/pkg/sentry/kernel/task_cgroup.go @@ -20,15 +20,13 @@ import ( "sort" "strings" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/syserror" ) // EnterInitialCgroups moves t into an initial set of cgroups. // // Precondition: t isn't in any cgroups yet, t.cgs is empty. -// -// +checklocksignore parent.mu is conditionally acquired. func (t *Task) EnterInitialCgroups(parent *Task) { var inherit map[Cgroup]struct{} if parent != nil { @@ -67,7 +65,7 @@ func (t *Task) EnterCgroup(c Cgroup) error { // // TODO(b/183137098): Implement cgroup migration. log.Warningf("Cgroup migration is not implemented") - return syserror.EBUSY + return linuxerr.EBUSY } } } diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index 405771f3f..7e1347aa6 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -20,9 +20,9 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" "gvisor.dev/gvisor/pkg/cleanup" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -142,25 +142,25 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { // address, any set of signal handlers must refer to the same address // space. if !opts.NewSignalHandlers && opts.NewAddressSpace { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // In order for the behavior of thread-group-directed signals to be sane, // all tasks in a thread group must share signal handlers. if !opts.NewThreadGroup && opts.NewSignalHandlers { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // All tasks in a thread group must be in the same PID namespace. if !opts.NewThreadGroup && (opts.NewPIDNamespace || t.childPIDNamespace != nil) { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // The two different ways of specifying a new PID namespace are // incompatible. if opts.NewPIDNamespace && t.childPIDNamespace != nil { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Thread groups and FS contexts cannot span user namespaces. if opts.NewUserNamespace && (!opts.NewThreadGroup || !opts.NewFSContext) { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Pull task registers and FPU state, a cloned task will inherit the @@ -182,7 +182,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { // in which it resides)." - clone(2). Neither chroot(2) nor // user_namespaces(7) document this. if t.IsChrooted() { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } userns, err = creds.NewChildUserNamespace() if err != nil { @@ -190,7 +190,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { } } if (opts.NewPIDNamespace || opts.NewNetworkNamespace || opts.NewUTSNamespace) && !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, userns) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } utsns := t.UTSNamespace() @@ -241,7 +241,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { } if opts.SetTLS { if !image.Arch.SetTLS(uintptr(opts.TLS)) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } } @@ -463,14 +463,14 @@ func (t *Task) Unshare(opts *SharingOptions) error { // sense that clone(2) allows a task to share signal handlers and address // spaces with tasks in other thread groups. if opts.NewAddressSpace || opts.NewSignalHandlers { - return syserror.EINVAL + return linuxerr.EINVAL } creds := t.Credentials() if opts.NewThreadGroup { t.tg.signalHandlers.mu.Lock() if t.tg.tasksCount != 1 { t.tg.signalHandlers.mu.Unlock() - return syserror.EINVAL + return linuxerr.EINVAL } t.tg.signalHandlers.mu.Unlock() // This isn't racy because we're the only living task, and therefore @@ -478,7 +478,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { } if opts.NewUserNamespace { if t.IsChrooted() { - return syserror.EPERM + return linuxerr.EPERM } newUserNS, err := creds.NewChildUserNamespace() if err != nil { @@ -494,7 +494,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { haveCapSysAdmin := t.HasCapability(linux.CAP_SYS_ADMIN) if opts.NewPIDNamespace { if !haveCapSysAdmin { - return syserror.EPERM + return linuxerr.EPERM } t.childPIDNamespace = t.tg.pidns.NewChild(t.UserNamespace()) } @@ -503,14 +503,14 @@ func (t *Task) Unshare(opts *SharingOptions) error { if opts.NewNetworkNamespace { if !haveCapSysAdmin { t.mu.Unlock() - return syserror.EPERM + return linuxerr.EPERM } t.netns = inet.NewNamespace(t.netns) } if opts.NewUTSNamespace { if !haveCapSysAdmin { t.mu.Unlock() - return syserror.EPERM + return linuxerr.EPERM } // Note that this must happen after NewUserNamespace, so the // new user namespace is used if there is one. @@ -519,7 +519,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { if opts.NewIPCNamespace { if !haveCapSysAdmin { t.mu.Unlock() - return syserror.EPERM + return linuxerr.EPERM } // Note that "If CLONE_NEWIPC is set, then create the process in a new IPC // namespace" diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index d115b8783..fbfcc19e5 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -28,66 +28,14 @@ import ( "errors" "fmt" "strconv" - "strings" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) -// An ExitStatus is a value communicated from an exiting task or thread group -// to the party that reaps it. -// -// +stateify savable -type ExitStatus struct { - // Code is the numeric value passed to the call to exit or exit_group that - // caused the exit. If the exit was not caused by such a call, Code is 0. - Code int - - // Signo is the signal that caused the exit. If the exit was not caused by - // a signal, Signo is 0. - Signo int -} - -func (es ExitStatus) String() string { - var b strings.Builder - if code := es.Code; code != 0 { - if b.Len() != 0 { - b.WriteByte(' ') - } - _, _ = fmt.Fprintf(&b, "Code=%d", code) - } - if signal := es.Signo; signal != 0 { - if b.Len() != 0 { - b.WriteByte(' ') - } - _, _ = fmt.Fprintf(&b, "Signal=%d", signal) - } - return b.String() -} - -// Signaled returns true if the ExitStatus indicates that the exiting task or -// thread group was killed by a signal. -func (es ExitStatus) Signaled() bool { - return es.Signo != 0 -} - -// Status returns the numeric representation of the ExitStatus returned by e.g. -// the wait4() system call. -func (es ExitStatus) Status() uint32 { - return ((uint32(es.Code) & 0xff) << 8) | (uint32(es.Signo) & 0xff) -} - -// ShellExitCode returns the numeric exit code that Bash would return for an -// exit status of es. -func (es ExitStatus) ShellExitCode() int { - if es.Signaled() { - return 128 + es.Signo - } - return es.Code -} - // TaskExitState represents a step in the task exit path. // // "Exiting" and "exited" are often ambiguous; prefer to name specific states. @@ -163,13 +111,13 @@ func (t *Task) killedLocked() bool { return t.pendingSignals.pendingSet&linux.SignalSetOf(linux.SIGKILL) != 0 } -// PrepareExit indicates an exit with status es. +// PrepareExit indicates an exit with the given status. // // Preconditions: The caller must be running on the task goroutine. -func (t *Task) PrepareExit(es ExitStatus) { +func (t *Task) PrepareExit(ws linux.WaitStatus) { t.tg.signalHandlers.mu.Lock() defer t.tg.signalHandlers.mu.Unlock() - t.exitStatus = es + t.exitStatus = ws } // PrepareGroupExit indicates a group exit with status es to t's thread group. @@ -180,7 +128,7 @@ func (t *Task) PrepareExit(es ExitStatus) { // ptrace.) // // Preconditions: The caller must be running on the task goroutine. -func (t *Task) PrepareGroupExit(es ExitStatus) { +func (t *Task) PrepareGroupExit(ws linux.WaitStatus) { t.tg.signalHandlers.mu.Lock() defer t.tg.signalHandlers.mu.Unlock() if t.tg.exiting || t.tg.execing != nil { @@ -198,8 +146,8 @@ func (t *Task) PrepareGroupExit(es ExitStatus) { return } t.tg.exiting = true - t.tg.exitStatus = es - t.exitStatus = es + t.tg.exitStatus = ws + t.exitStatus = ws for sibling := t.tg.tasks.Front(); sibling != nil; sibling = sibling.Next() { if sibling != t { sibling.killLocked() @@ -207,11 +155,11 @@ func (t *Task) PrepareGroupExit(es ExitStatus) { } } -// Kill requests that all tasks in ts exit as if group exiting with status es. +// Kill requests that all tasks in ts exit as if group exiting with status ws. // Kill does not wait for tasks to exit. // // Kill has no analogue in Linux; it's provided for save/restore only. -func (ts *TaskSet) Kill(es ExitStatus) { +func (ts *TaskSet) Kill(ws linux.WaitStatus) { ts.mu.Lock() defer ts.mu.Unlock() ts.Root.exiting = true @@ -219,7 +167,7 @@ func (ts *TaskSet) Kill(es ExitStatus) { t.tg.signalHandlers.mu.Lock() if !t.tg.exiting { t.tg.exiting = true - t.tg.exitStatus = es + t.tg.exitStatus = ws } t.killLocked() t.tg.signalHandlers.mu.Unlock() @@ -730,10 +678,10 @@ func (t *Task) exitNotificationSignal(sig linux.Signal, receiver *Task) *linux.S info.SetUID(int32(t.Credentials().RealKUID.In(receiver.UserNamespace()).OrOverflow())) if t.exitStatus.Signaled() { info.Code = linux.CLD_KILLED - info.SetStatus(int32(t.exitStatus.Signo)) + info.SetStatus(int32(t.exitStatus.TerminationSignal())) } else { info.Code = linux.CLD_EXITED - info.SetStatus(int32(t.exitStatus.Code)) + info.SetStatus(int32(t.exitStatus.ExitStatus())) } // TODO(b/72102453): Set utime, stime. return info @@ -741,7 +689,7 @@ func (t *Task) exitNotificationSignal(sig linux.Signal, receiver *Task) *linux.S // ExitStatus returns t's exit status, which is only guaranteed to be // meaningful if t.ExitState() != TaskExitNone. -func (t *Task) ExitStatus() ExitStatus { +func (t *Task) ExitStatus() linux.WaitStatus { t.tg.pidns.owner.mu.RLock() defer t.tg.pidns.owner.mu.RUnlock() t.tg.signalHandlers.mu.Lock() @@ -751,7 +699,7 @@ func (t *Task) ExitStatus() ExitStatus { // ExitStatus returns the exit status that would be returned by a consuming // wait*() on tg. -func (tg *ThreadGroup) ExitStatus() ExitStatus { +func (tg *ThreadGroup) ExitStatus() linux.WaitStatus { tg.pidns.owner.mu.RLock() defer tg.pidns.owner.mu.RUnlock() tg.signalHandlers.mu.Lock() @@ -762,7 +710,9 @@ func (tg *ThreadGroup) ExitStatus() ExitStatus { return tg.leader.exitStatus } -// TerminationSignal returns the thread group's termination signal. +// TerminationSignal returns the thread group's termination signal, which is +// the signal that will be sent to its leader's parent when all threads have +// exited. func (tg *ThreadGroup) TerminationSignal() linux.Signal { tg.pidns.owner.mu.RLock() defer tg.pidns.owner.mu.RUnlock() @@ -888,8 +838,8 @@ type WaitResult struct { // Event is exactly one of the events defined above. Event waiter.EventMask - // Status is the numeric status associated with the event. - Status uint32 + // Status is the wait status associated with the event. + Status linux.WaitStatus } // Wait waits for an event from a thread group that is a child of t's thread @@ -942,7 +892,7 @@ func (t *Task) waitOnce(opts *WaitOptions) (*WaitResult, error) { if anyWaitableTasks { return nil, ErrNoWaitableEvent } - return nil, syserror.ECHILD + return nil, linuxerr.ECHILD } // Preconditions: The TaskSet mutex must be locked for writing. @@ -1042,7 +992,7 @@ func (t *Task) waitCollectZombieLocked(target *Task, opts *WaitOptions, asPtrace } pid := t.tg.pidns.tids[target] uid := target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow() - status := target.exitStatus.Status() + status := target.exitStatus if !opts.ConsumeEvent { return &WaitResult{ Task: target, @@ -1056,7 +1006,7 @@ func (t *Task) waitCollectZombieLocked(target *Task, opts *WaitOptions, asPtrace // differ from that reported by a consuming wait; the latter will return // the group exit code if one is available. if target.tg.exiting { - status = target.tg.exitStatus.Status() + status = target.tg.exitStatus } // t may be (in the thread group of) target's parent, tracer, or both. We // don't need to check for !exitTracerAcked because tracees are detached @@ -1122,12 +1072,11 @@ func (t *Task) waitCollectChildGroupStopLocked(target *Task, opts *WaitOptions) target.tg.groupStopWaitable = false } return &WaitResult{ - Task: target, - TID: pid, - UID: uid, - Event: EventChildGroupStop, - // There is no name for these status constants. - Status: (uint32(sig)&0xff)<<8 | 0x7f, + Task: target, + TID: pid, + UID: uid, + Event: EventChildGroupStop, + Status: linux.WaitStatusStopped(uint32(sig)), } } @@ -1148,7 +1097,7 @@ func (t *Task) waitCollectGroupContinueLocked(target *Task, opts *WaitOptions) * TID: pid, UID: uid, Event: EventGroupContinue, - Status: 0xffff, + Status: linux.WaitStatusContinued(), } } @@ -1176,7 +1125,7 @@ func (t *Task) waitCollectTraceeStopLocked(target *Task, opts *WaitOptions) *Wai TID: pid, UID: uid, Event: EventTraceeStop, - Status: uint32(code)<<8 | 0x7f, + Status: linux.WaitStatusStopped(uint32(code)), } } diff --git a/pkg/sentry/kernel/task_identity.go b/pkg/sentry/kernel/task_identity.go index 0325967e4..a9067b682 100644 --- a/pkg/sentry/kernel/task_identity.go +++ b/pkg/sentry/kernel/task_identity.go @@ -16,9 +16,9 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/syserror" ) // Credentials returns t's credentials. @@ -47,7 +47,7 @@ func (t *Task) HasCapability(cp linux.Capability) bool { func (t *Task) SetUID(uid auth.UID) error { // setuid considers -1 to be invalid. if !uid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } t.mu.Lock() @@ -56,7 +56,7 @@ func (t *Task) SetUID(uid auth.UID) error { creds := t.Credentials() kuid := creds.UserNamespace.MapToKUID(uid) if !kuid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } // "setuid() sets the effective user ID of the calling process. If the // effective UID of the caller is root (more precisely: if the caller has @@ -70,7 +70,7 @@ func (t *Task) SetUID(uid auth.UID) error { // capability) and uid does not match the real UID or saved set-user-ID of // the calling process." if kuid != creds.RealKUID && kuid != creds.SavedKUID { - return syserror.EPERM + return linuxerr.EPERM } t.setKUIDsUncheckedLocked(creds.RealKUID, kuid, creds.SavedKUID) return nil @@ -87,26 +87,26 @@ func (t *Task) SetREUID(r, e auth.UID) error { if r.Ok() { newR = creds.UserNamespace.MapToKUID(r) if !newR.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } } newE := creds.EffectiveKUID if e.Ok() { newE = creds.UserNamespace.MapToKUID(e) if !newE.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } } if !creds.HasCapability(linux.CAP_SETUID) { // "Unprivileged processes may only set the effective user ID to the // real user ID, the effective user ID, or the saved set-user-ID." if newE != creds.RealKUID && newE != creds.EffectiveKUID && newE != creds.SavedKUID { - return syserror.EPERM + return linuxerr.EPERM } // "Unprivileged users may only set the real user ID to the real user // ID or the effective user ID." if newR != creds.RealKUID && newR != creds.EffectiveKUID { - return syserror.EPERM + return linuxerr.EPERM } } // "If the real user ID is set (i.e., ruid is not -1) or the effective user @@ -223,7 +223,7 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) { // SetGID implements the semantics of setgid(2). func (t *Task) SetGID(gid auth.GID) error { if !gid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } t.mu.Lock() @@ -232,14 +232,14 @@ func (t *Task) SetGID(gid auth.GID) error { creds := t.Credentials() kgid := creds.UserNamespace.MapToKGID(gid) if !kgid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } if creds.HasCapability(linux.CAP_SETGID) { t.setKGIDsUncheckedLocked(kgid, kgid, kgid) return nil } if kgid != creds.RealKGID && kgid != creds.SavedKGID { - return syserror.EPERM + return linuxerr.EPERM } t.setKGIDsUncheckedLocked(creds.RealKGID, kgid, creds.SavedKGID) return nil @@ -255,22 +255,22 @@ func (t *Task) SetREGID(r, e auth.GID) error { if r.Ok() { newR = creds.UserNamespace.MapToKGID(r) if !newR.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } } newE := creds.EffectiveKGID if e.Ok() { newE = creds.UserNamespace.MapToKGID(e) if !newE.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } } if !creds.HasCapability(linux.CAP_SETGID) { if newE != creds.RealKGID && newE != creds.EffectiveKGID && newE != creds.SavedKGID { - return syserror.EPERM + return linuxerr.EPERM } if newR != creds.RealKGID && newR != creds.EffectiveKGID { - return syserror.EPERM + return linuxerr.EPERM } } newS := creds.SavedKGID @@ -343,13 +343,13 @@ func (t *Task) SetExtraGIDs(gids []auth.GID) error { defer t.mu.Unlock() creds := t.Credentials() if !creds.HasCapability(linux.CAP_SETGID) { - return syserror.EPERM + return linuxerr.EPERM } kgids := make([]auth.KGID, len(gids)) for i, gid := range gids { kgid := creds.UserNamespace.MapToKGID(gid) if !kgid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } kgids[i] = kgid } @@ -367,25 +367,25 @@ func (t *Task) SetCapabilitySets(permitted, inheritable, effective auth.Capabili // "Permitted: This is a limiting superset for the effective capabilities // that the thread may assume." - capabilities(7) if effective & ^permitted != 0 { - return syserror.EPERM + return linuxerr.EPERM } creds := t.Credentials() // "It is also a limiting superset for the capabilities that may be added // to the inheritable set by a thread that does not have the CAP_SETPCAP // capability in its effective set." if !creds.HasCapability(linux.CAP_SETPCAP) && (inheritable & ^(creds.InheritableCaps|creds.PermittedCaps) != 0) { - return syserror.EPERM + return linuxerr.EPERM } // "If a thread drops a capability from its permitted set, it can never // reacquire that capability (unless it execve(2)s ..." if permitted & ^creds.PermittedCaps != 0 { - return syserror.EPERM + return linuxerr.EPERM } // "... if a capability is not in the bounding set, then a thread can't add // this capability to its inheritable set, even if it was in its permitted // capabilities ..." if inheritable & ^(creds.InheritableCaps|creds.BoundingCaps) != 0 { - return syserror.EPERM + return linuxerr.EPERM } creds = creds.Fork() // The credentials object is immutable. See doc for creds. creds.PermittedCaps = permitted @@ -402,7 +402,7 @@ func (t *Task) DropBoundingCapability(cp linux.Capability) error { defer t.mu.Unlock() creds := t.Credentials() if !creds.HasCapability(linux.CAP_SETPCAP) { - return syserror.EPERM + return linuxerr.EPERM } creds = creds.Fork() // The credentials object is immutable. See doc for creds. creds.BoundingCaps &^= auth.CapabilitySetOf(cp) @@ -422,7 +422,7 @@ func (t *Task) SetUserNamespace(ns *auth.UserNamespace) error { // If t just created ns, then t.creds is guaranteed to have CAP_SYS_ADMIN // in ns (by rule 3 in auth.Credentials.HasCapability). if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) { - return syserror.EPERM + return linuxerr.EPERM } creds = creds.Fork() // The credentials object is immutable. See doc for creds. diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go index 72b9a0384..8de08151a 100644 --- a/pkg/sentry/kernel/task_log.go +++ b/pkg/sentry/kernel/task_log.go @@ -235,7 +235,7 @@ func (t *Task) traceExitEvent() { if !trace.IsEnabled() { return } - trace.Logf(t.traceContext, traceCategory, "exit status: 0x%x", t.exitStatus.Status()) + trace.Logf(t.traceContext, traceCategory, "exit status: %s", t.exitStatus) } // traceExecEvent is called when a task calls exec. diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go index 068f25af1..054ff212f 100644 --- a/pkg/sentry/kernel/task_run.go +++ b/pkg/sentry/kernel/task_run.go @@ -377,7 +377,7 @@ func (app *runApp) execute(t *Task) taskRunState { default: // What happened? Can't continue. t.Warningf("Unexpected SwitchToApp error: %v", err) - t.PrepareExit(ExitStatus{Code: ExtractErrno(err, -1)}) + t.PrepareExit(linux.WaitStatusExit(int32(ExtractErrno(err, -1)))) return (*runExit)(nil) } } diff --git a/pkg/sentry/kernel/task_sched.go b/pkg/sentry/kernel/task_sched.go index f142feab4..9d9fa76a6 100644 --- a/pkg/sentry/kernel/task_sched.go +++ b/pkg/sentry/kernel/task_sched.go @@ -23,12 +23,12 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/hostcpu" "gvisor.dev/gvisor/pkg/sentry/kernel/sched" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) // TaskGoroutineState is a coarse representation of the current execution @@ -601,7 +601,7 @@ func (t *Task) SetCPUMask(mask sched.CPUSet) error { // Ensure that at least 1 CPU is still allowed. if mask.NumCPUs() == 0 { - return syserror.EINVAL + return linuxerr.EINVAL } if t.k.useHostCores { diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go index 8ca61ed48..7065ac79c 100644 --- a/pkg/sentry/kernel/task_signals.go +++ b/pkg/sentry/kernel/task_signals.go @@ -22,6 +22,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/eventchannel" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -156,7 +157,8 @@ func (t *Task) PendingSignals() linux.SignalSet { // deliverSignal delivers the given signal and returns the following run state. func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRunState { - sigact := computeAction(linux.Signal(info.Signo), act) + sig := linux.Signal(info.Signo) + sigact := computeAction(sig, act) if t.haveSyscallReturn { if sre, ok := syserror.SyscallRestartErrnoFromReturn(t.Arch().Return()); ok { @@ -197,14 +199,14 @@ func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRu } // Attach an fault address if appropriate. - switch linux.Signal(info.Signo) { + switch sig { case linux.SIGSEGV, linux.SIGFPE, linux.SIGILL, linux.SIGTRAP, linux.SIGBUS: ucs.FaultAddr = info.Addr() } eventchannel.Emit(ucs) - t.PrepareGroupExit(ExitStatus{Signo: int(info.Signo)}) + t.PrepareGroupExit(linux.WaitStatusTerminationSignal(sig)) return (*runExit)(nil) case SignalActionStop: @@ -224,12 +226,12 @@ func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRu // Send a forced SIGSEGV. If the signal that couldn't be delivered // was a SIGSEGV, force the handler to SIG_DFL. - t.forceSignal(linux.SIGSEGV, linux.Signal(info.Signo) == linux.SIGSEGV /* unconditional */) + t.forceSignal(linux.SIGSEGV, sig == linux.SIGSEGV /* unconditional */) t.SendSignal(SignalInfoPriv(linux.SIGSEGV)) } default: - panic(fmt.Sprintf("Unknown signal action %+v, %d?", info, computeAction(linux.Signal(info.Signo), act))) + panic(fmt.Sprintf("Unknown signal action %+v, %d?", info, computeAction(sig, act))) } return (*runInterrupt)(nil) } @@ -338,7 +340,7 @@ func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*linux. } if timeout == 0 { - return nil, syserror.EAGAIN + return nil, linuxerr.EAGAIN } // Unblock signals we're waiting for. Remember the original signal mask so @@ -359,8 +361,8 @@ func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*linux. if info := t.dequeueSignalLocked(mask); info != nil { return info, nil } - if err == syserror.ETIMEDOUT { - return nil, syserror.EAGAIN + if err == linuxerr.ETIMEDOUT { + return nil, linuxerr.EAGAIN } return nil, err } @@ -369,9 +371,9 @@ func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*linux. // // The following errors may be returned: // -// syserror.ESRCH - The task has exited. -// syserror.EINVAL - The signal is not valid. -// syserror.EAGAIN - THe signal is realtime, and cannot be queued. +// linuxerr.ESRCH - The task has exited. +// linuxerr.EINVAL - The signal is not valid. +// linuxerr.EAGAIN - THe signal is realtime, and cannot be queued. // func (t *Task) SendSignal(info *linux.SignalInfo) error { t.tg.pidns.owner.mu.RLock() @@ -406,14 +408,14 @@ func (t *Task) sendSignalLocked(info *linux.SignalInfo, group bool) error { func (t *Task) sendSignalTimerLocked(info *linux.SignalInfo, group bool, timer *IntervalTimer) error { if t.exitState == TaskExitDead { - return syserror.ESRCH + return linuxerr.ESRCH } sig := linux.Signal(info.Signo) if sig == 0 { return nil } if !sig.IsValid() { - return syserror.EINVAL + return linuxerr.EINVAL } // Signal side effects apply even if the signal is ultimately discarded. @@ -450,7 +452,7 @@ func (t *Task) sendSignalTimerLocked(info *linux.SignalInfo, group bool, timer * } if !q.enqueue(info, timer) { if sig.IsRealtime() { - return syserror.EAGAIN + return linuxerr.EAGAIN } t.Debugf("Discarding duplicate signal %d", sig) if timer != nil { @@ -505,7 +507,7 @@ func (tg *ThreadGroup) applySignalSideEffectsLocked(sig linux.Signal) { // ignores tg.execing. if !tg.exiting { tg.exiting = true - tg.exitStatus = ExitStatus{Signo: int(linux.SIGKILL)} + tg.exitStatus = linux.WaitStatusTerminationSignal(linux.SIGKILL) } for t := tg.tasks.Front(); t != nil; t = t.Next() { t.killLocked() @@ -684,7 +686,7 @@ func (t *Task) SetSignalStack(alt linux.SignalStack) bool { // to *actptr (if actptr is not nil) and returns the old signal action. func (tg *ThreadGroup) SetSigAction(sig linux.Signal, actptr *linux.SigAction) (linux.SigAction, error) { if !sig.IsValid() { - return linux.SigAction{}, syserror.EINVAL + return linux.SigAction{}, linuxerr.EINVAL } tg.pidns.owner.mu.RLock() @@ -695,7 +697,7 @@ func (tg *ThreadGroup) SetSigAction(sig linux.Signal, actptr *linux.SigAction) ( oldact := sh.actions[sig] if actptr != nil { if sig == linux.SIGKILL || sig == linux.SIGSTOP { - return oldact, syserror.EINVAL + return oldact, linuxerr.EINVAL } act := *actptr diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go index 41fd2d471..0565059c1 100644 --- a/pkg/sentry/kernel/task_start.go +++ b/pkg/sentry/kernel/task_start.go @@ -17,6 +17,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -299,7 +300,7 @@ func (ns *PIDNamespace) allocateTID() (ThreadID, error) { // Did we do a full cycle? if tid == ns.last { // No tid available. - return 0, syserror.EAGAIN + return 0, linuxerr.EAGAIN } } } diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go index 601fc0d3a..0586c9def 100644 --- a/pkg/sentry/kernel/task_syscall.go +++ b/pkg/sentry/kernel/task_syscall.go @@ -22,6 +22,8 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bits" + "gvisor.dev/gvisor/pkg/errors" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/metric" @@ -159,7 +161,7 @@ func (t *Task) doSyscall() taskRunState { // ok case linux.SECCOMP_RET_KILL_THREAD: t.Debugf("Syscall %d: killed by seccomp", sysno) - t.PrepareExit(ExitStatus{Signo: int(linux.SIGSYS)}) + t.PrepareExit(linux.WaitStatusTerminationSignal(linux.SIGSYS)) return (*runExit)(nil) case linux.SECCOMP_RET_TRACE: t.Debugf("Syscall %d: stopping for PTRACE_EVENT_SECCOMP", sysno) @@ -309,7 +311,7 @@ func (t *Task) doVsyscall(addr hostarch.Addr, sysno uintptr) taskRunState { return &runVsyscallAfterPtraceEventSeccomp{addr, sysno, caller} case linux.SECCOMP_RET_KILL_THREAD: t.Debugf("vsyscall %d: killed by seccomp", sysno) - t.PrepareExit(ExitStatus{Signo: int(linux.SIGSYS)}) + t.PrepareExit(linux.WaitStatusTerminationSignal(linux.SIGSYS)) return (*runExit)(nil) default: panic(fmt.Sprintf("Unknown seccomp result %d", r)) @@ -336,7 +338,7 @@ func (r *runVsyscallAfterPtraceEventSeccomp) execute(t *Task) taskRunState { // Documentation/prctl/seccomp_filter.txt. On Linux, changing orig_ax or ip // causes do_exit(SIGSYS), and changing sp is ignored. if (sysno != ^uintptr(0) && sysno != r.sysno) || hostarch.Addr(t.Arch().IP()) != r.addr { - t.PrepareExit(ExitStatus{Signo: int(linux.SIGSYS)}) + t.PrepareExit(linux.WaitStatusTerminationSignal(linux.SIGSYS)) return (*runExit)(nil) } if sysno == ^uintptr(0) { @@ -357,7 +359,7 @@ func (t *Task) doVsyscallInvoke(sysno uintptr, args arch.SyscallArguments, calle t.Arch().SetReturn(uintptr(rval)) } else { t.Debugf("vsyscall %d, caller %x: emulated syscall returned error: %v", sysno, t.Arch().Value(caller), err) - if err == syserror.EFAULT { + if linuxerr.Equals(linuxerr.EFAULT, err) { t.forceSignal(linux.SIGSEGV, false /* unconditional */) t.SendSignal(SignalInfoPriv(linux.SIGSEGV)) // A return is not emulated in this case. @@ -379,6 +381,8 @@ func ExtractErrno(err error, sysno int) int { return 0 case unix.Errno: return int(err) + case *errors.Error: + return int(err.Errno()) case syserror.SyscallRestartErrno: return int(err) case *memmap.BusError: diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go index fc6d9438a..8e2c36598 100644 --- a/pkg/sentry/kernel/task_usermem.go +++ b/pkg/sentry/kernel/task_usermem.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/syserror" @@ -132,7 +133,7 @@ func (t *Task) CopyOutIovecs(addr hostarch.Addr, src hostarch.AddrRangeSeq) erro case 8: const itemLen = 16 if _, ok := addr.AddLength(uint64(src.NumRanges()) * itemLen); !ok { - return syserror.EFAULT + return linuxerr.EFAULT } b := t.CopyScratchBuffer(itemLen) @@ -190,7 +191,7 @@ func (t *Task) CopyInIovecs(addr hostarch.Addr, numIovecs int) (hostarch.AddrRan case 8: const itemLen = 16 if _, ok := addr.AddLength(uint64(numIovecs) * itemLen); !ok { - return hostarch.AddrRangeSeq{}, syserror.EFAULT + return hostarch.AddrRangeSeq{}, linuxerr.EFAULT } b := t.CopyScratchBuffer(itemLen) @@ -202,11 +203,11 @@ func (t *Task) CopyInIovecs(addr hostarch.Addr, numIovecs int) (hostarch.AddrRan base := hostarch.Addr(hostarch.ByteOrder.Uint64(b[0:8])) length := hostarch.ByteOrder.Uint64(b[8:16]) if length > math.MaxInt64 { - return hostarch.AddrRangeSeq{}, syserror.EINVAL + return hostarch.AddrRangeSeq{}, linuxerr.EINVAL } ar, ok := t.MemoryManager().CheckIORange(base, int64(length)) if !ok { - return hostarch.AddrRangeSeq{}, syserror.EFAULT + return hostarch.AddrRangeSeq{}, linuxerr.EFAULT } if numIovecs == 1 { @@ -252,7 +253,7 @@ func (t *Task) SingleIOSequence(addr hostarch.Addr, length int, opts usermem.IOO } ar, ok := t.MemoryManager().CheckIORange(addr, int64(length)) if !ok { - return usermem.IOSequence{}, syserror.EFAULT + return usermem.IOSequence{}, linuxerr.EFAULT } return usermem.IOSequence{ IO: t.MemoryManager(), @@ -270,7 +271,7 @@ func (t *Task) SingleIOSequence(addr hostarch.Addr, length int, opts usermem.IOO // Preconditions: Same as Task.CopyInIovecs. func (t *Task) IovecsIOSequence(addr hostarch.Addr, iovcnt int, opts usermem.IOOpts) (usermem.IOSequence, error) { if iovcnt < 0 || iovcnt > linux.UIO_MAXIOV { - return usermem.IOSequence{}, syserror.EINVAL + return usermem.IOSequence{}, linuxerr.EINVAL } ars, err := t.CopyInIovecs(addr, iovcnt) if err != nil { @@ -312,7 +313,7 @@ func (cc *taskCopyContext) getMemoryManager() (*mm.MemoryManager, error) { tmm := cc.t.MemoryManager() cc.t.mu.Unlock() if !tmm.IncUsers() { - return nil, syserror.EFAULT + return nil, linuxerr.EFAULT } return tmm, nil } diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go index 4566e4c7c..2eda15303 100644 --- a/pkg/sentry/kernel/thread_group.go +++ b/pkg/sentry/kernel/thread_group.go @@ -19,13 +19,13 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // A ThreadGroup is a logical grouping of tasks that has widespread @@ -143,7 +143,7 @@ type ThreadGroup struct { // // While exiting is false, exitStatus is protected by the signal mutex. // When exiting becomes true, exitStatus becomes immutable. - exitStatus ExitStatus + exitStatus linux.WaitStatus // terminationSignal is the signal that this thread group's leader will // send to its parent when it exits. @@ -357,7 +357,7 @@ func (tg *ThreadGroup) SetControllingTTY(tty *TTY, steal bool, isReadable bool) // "The calling process must be a session leader and not have a // controlling terminal already." - tty_ioctl(4) if tg.processGroup.session.leader != tg || tg.tty != nil { - return syserror.EINVAL + return linuxerr.EINVAL } creds := auth.CredentialsFromContext(tg.leader) @@ -371,7 +371,7 @@ func (tg *ThreadGroup) SetControllingTTY(tty *TTY, steal bool, isReadable bool) if tty.tg != nil && tg.processGroup.session != tty.tg.processGroup.session { // Stealing requires CAP_SYS_ADMIN in the root user namespace. if !hasAdmin || !steal { - return syserror.EPERM + return linuxerr.EPERM } // Steal the TTY away. Unlike TIOCNOTTY, don't send signals. for othertg := range tg.pidns.owner.Root.tgids { @@ -391,7 +391,7 @@ func (tg *ThreadGroup) SetControllingTTY(tty *TTY, steal bool, isReadable bool) } if !isReadable && !hasAdmin { - return syserror.EPERM + return linuxerr.EPERM } // Set the controlling terminal and foreground process group. @@ -419,7 +419,7 @@ func (tg *ThreadGroup) ReleaseControllingTTY(tty *TTY) error { if tg.tty == nil || tg.tty != tty { tg.signalHandlers.mu.Unlock() - return syserror.ENOTTY + return linuxerr.ENOTTY } // "If the process was session leader, then send SIGHUP and SIGCONT to @@ -473,7 +473,7 @@ func (tg *ThreadGroup) ForegroundProcessGroup(tty *TTY) (int32, error) { // "When fd does not refer to the controlling terminal of the calling // process, -1 is returned" - tcgetpgrp(3) if tg.tty != tty { - return -1, syserror.ENOTTY + return -1, linuxerr.ENOTTY } return int32(tg.processGroup.session.foreground.id), nil @@ -496,24 +496,24 @@ func (tg *ThreadGroup) SetForegroundProcessGroup(tty *TTY, pgid ProcessGroupID) // tty must be the controlling terminal. if tg.tty != tty { - return -1, syserror.ENOTTY + return -1, linuxerr.ENOTTY } // pgid must be positive. if pgid < 0 { - return -1, syserror.EINVAL + return -1, linuxerr.EINVAL } // pg must not be empty. Empty process groups are removed from their // pid namespaces. pg, ok := tg.pidns.processGroups[pgid] if !ok { - return -1, syserror.ESRCH + return -1, linuxerr.ESRCH } // pg must be part of this process's session. if tg.processGroup.session != pg.session { - return -1, syserror.EPERM + return -1, linuxerr.EPERM } tg.processGroup.session.foreground.id = pgid diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD index 2817aa3ba..e293d9a0f 100644 --- a/pkg/sentry/kernel/time/BUILD +++ b/pkg/sentry/kernel/time/BUILD @@ -13,8 +13,8 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/sync", - "//pkg/syserror", "//pkg/waiter", ], ) diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go index 26aa34aa6..191b92811 100644 --- a/pkg/sentry/kernel/time/time.go +++ b/pkg/sentry/kernel/time/time.go @@ -22,8 +22,8 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -322,7 +322,7 @@ func SettingFromSpec(value time.Duration, interval time.Duration, c Clock) (Sett // interpreted as a time relative to now. func SettingFromSpecAt(value time.Duration, interval time.Duration, now Time) (Setting, error) { if value < 0 { - return Setting{}, syserror.EINVAL + return Setting{}, linuxerr.EINVAL } if value == 0 { return Setting{Period: interval}, nil @@ -338,7 +338,7 @@ func SettingFromSpecAt(value time.Duration, interval time.Duration, now Time) (S // interpreted as an absolute time. func SettingFromAbsSpec(value Time, interval time.Duration) (Setting, error) { if value.Before(ZeroTime) { - return Setting{}, syserror.EINVAL + return Setting{}, linuxerr.EINVAL } if value.IsZero() { return Setting{Period: interval}, nil diff --git a/pkg/sentry/kernel/timekeeper_test.go b/pkg/sentry/kernel/timekeeper_test.go index dfc3c0719..b6039505a 100644 --- a/pkg/sentry/kernel/timekeeper_test.go +++ b/pkg/sentry/kernel/timekeeper_test.go @@ -17,12 +17,12 @@ package kernel import ( "testing" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/pgalloc" sentrytime "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) // mockClocks is a sentrytime.Clocks that simply returns the times in the @@ -45,7 +45,7 @@ func (c *mockClocks) GetTime(id sentrytime.ClockID) (int64, error) { case sentrytime.Realtime: return c.realtime, nil default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } } diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD index 80f862628..54bfed644 100644 --- a/pkg/sentry/loader/BUILD +++ b/pkg/sentry/loader/BUILD @@ -20,6 +20,7 @@ go_library( "//pkg/abi/linux/errno", "//pkg/context", "//pkg/cpuid", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/rand", diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go index 8fc3e2a79..577374fa4 100644 --- a/pkg/sentry/loader/elf.go +++ b/pkg/sentry/loader/elf.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -476,7 +477,7 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, in // the open path would return a different // error. ctx.Infof("PT_INTERP path is empty: %v", path) - return loadedELF{}, syserror.EACCES + return loadedELF{}, linuxerr.EACCES } } } @@ -517,13 +518,13 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, in start, ok = start.AddLength(uint64(offset)) if !ok { ctx.Infof(fmt.Sprintf("Start %#x + offset %#x overflows?", start, offset)) - return loadedELF{}, syserror.EINVAL + return loadedELF{}, linuxerr.EINVAL } end, ok = end.AddLength(uint64(offset)) if !ok { ctx.Infof(fmt.Sprintf("End %#x + offset %#x overflows?", end, offset)) - return loadedELF{}, syserror.EINVAL + return loadedELF{}, linuxerr.EINVAL } info.entry, ok = info.entry.AddLength(uint64(offset)) @@ -621,20 +622,20 @@ func loadInitialELF(ctx context.Context, m *mm.MemoryManager, fs *cpuid.FeatureS func loadInterpreterELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, initial loadedELF) (loadedELF, error) { info, err := parseHeader(ctx, f) if err != nil { - if err == syserror.ENOEXEC { + if linuxerr.Equals(linuxerr.ENOEXEC, err) { // Bad interpreter. - err = syserror.ELIBBAD + err = linuxerr.ELIBBAD } return loadedELF{}, err } if info.os != initial.os { ctx.Infof("Initial ELF OS %v and interpreter ELF OS %v differ", initial.os, info.os) - return loadedELF{}, syserror.ELIBBAD + return loadedELF{}, linuxerr.ELIBBAD } if info.arch != initial.arch { ctx.Infof("Initial ELF arch %v and interpreter ELF arch %v differ", initial.arch, info.arch) - return loadedELF{}, syserror.ELIBBAD + return loadedELF{}, linuxerr.ELIBBAD } // The interpreter is not given a load offset, as its location does not diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go index 8240173ae..86d0c54cd 100644 --- a/pkg/sentry/loader/loader.go +++ b/pkg/sentry/loader/loader.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux/errno" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -113,7 +114,7 @@ func checkIsRegularFile(ctx context.Context, file fsbridge.File, filename string } if t != linux.ModeRegular { ctx.Infof("%q is not a regular file: %v", filename, t) - return syserror.EACCES + return linuxerr.EACCES } return nil } @@ -207,7 +208,7 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context args.File = nil } - return loadedELF{}, nil, nil, nil, syserror.ELOOP + return loadedELF{}, nil, nil, nil, linuxerr.ELOOP } // Load loads args.File into a MemoryManager. If args.File is nil, the path diff --git a/pkg/sentry/loader/vdso.go b/pkg/sentry/loader/vdso.go index fd54261fd..054ef1723 100644 --- a/pkg/sentry/loader/vdso.go +++ b/pkg/sentry/loader/vdso.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/safemem" @@ -58,7 +59,7 @@ type byteFullReader struct { func (b *byteFullReader) ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) { if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if offset >= int64(len(b.data)) { return 0, io.EOF diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index b417c2da7..69aff21b6 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -125,6 +125,7 @@ go_library( "//pkg/abi/linux", "//pkg/atomicbitops", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/refs", @@ -156,6 +157,7 @@ go_test( library = ":mm", deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/sentry/arch", "//pkg/sentry/contexttest", @@ -163,7 +165,6 @@ go_test( "//pkg/sentry/memmap", "//pkg/sentry/pgalloc", "//pkg/sentry/platform", - "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go index 346866d3c..b7f765cd7 100644 --- a/pkg/sentry/mm/aio_context.go +++ b/pkg/sentry/mm/aio_context.go @@ -17,12 +17,12 @@ package mm import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -158,11 +158,11 @@ func (ctx *AIOContext) Prepare() error { defer ctx.mu.Unlock() if ctx.dead { // Context died after the caller looked it up. - return syserror.EINVAL + return linuxerr.EINVAL } if ctx.outstanding >= ctx.maxOutstanding { // Context is busy. - return syserror.EAGAIN + return linuxerr.EAGAIN } ctx.outstanding++ return nil @@ -297,7 +297,7 @@ func (m *aioMappable) InodeID() uint64 { // Msync implements memmap.MappingIdentity.Msync. func (m *aioMappable) Msync(ctx context.Context, mr memmap.MappableRange) error { // Linux: aio_ring_fops.fsync == NULL - return syserror.EINVAL + return linuxerr.EINVAL } // AddMapping implements memmap.Mappable.AddMapping. @@ -305,7 +305,7 @@ func (m *aioMappable) AddMapping(_ context.Context, _ memmap.MappingSpace, ar ho // Don't allow mappings to be expanded (in Linux, fs/aio.c:aio_ring_mmap() // sets VM_DONTEXPAND). if offset != 0 || uint64(ar.Length()) != aioRingBufferSize { - return syserror.EFAULT + return linuxerr.EFAULT } return nil } @@ -319,13 +319,13 @@ func (m *aioMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, s // Don't allow mappings to be expanded (in Linux, fs/aio.c:aio_ring_mmap() // sets VM_DONTEXPAND). if offset != 0 || uint64(dstAR.Length()) != aioRingBufferSize { - return syserror.EFAULT + return linuxerr.EFAULT } // Require that the mapping correspond to a live AIOContext. Compare // Linux's fs/aio.c:aio_ring_mremap(). mm, ok := ms.(*MemoryManager) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } am := &mm.aioManager am.mu.Lock() @@ -333,12 +333,12 @@ func (m *aioMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, s oldID := uint64(srcAR.Start) aioCtx, ok := am.contexts[oldID] if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } aioCtx.mu.Lock() defer aioCtx.mu.Unlock() if aioCtx.dead { - return syserror.EINVAL + return linuxerr.EINVAL } // Use the new ID for the AIOContext. am.contexts[uint64(dstAR.Start)] = aioCtx @@ -350,7 +350,7 @@ func (m *aioMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, s func (m *aioMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { var err error if required.End > m.fr.Length() { - err = &memmap.BusError{syserror.EFAULT} + err = &memmap.BusError{linuxerr.EFAULT} } if source := optional.Intersect(memmap.MappableRange{0, m.fr.Length()}); source.Length() != 0 { return []memmap.Translation{ @@ -399,7 +399,7 @@ func (mm *MemoryManager) NewAIOContext(ctx context.Context, events uint32) (uint id := uint64(addr) if !mm.aioManager.newAIOContext(events, id) { mm.MUnmap(ctx, addr, aioRingBufferSize) - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } return id, nil } diff --git a/pkg/sentry/mm/io.go b/pkg/sentry/mm/io.go index 16f318ab3..5fcfeb473 100644 --- a/pkg/sentry/mm/io.go +++ b/pkg/sentry/mm/io.go @@ -16,10 +16,10 @@ package mm import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -97,14 +97,14 @@ func translateIOError(ctx context.Context, err error) error { if logIOErrors { ctx.Debugf("MM I/O error: %v", err) } - return syserror.EFAULT + return linuxerr.EFAULT } // CopyOut implements usermem.IO.CopyOut. func (mm *MemoryManager) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) { ar, ok := mm.CheckIORange(addr, int64(len(src))) if !ok { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } if len(src) == 0 { @@ -147,7 +147,7 @@ func (mm *MemoryManager) asCopyOut(ctx context.Context, addr hostarch.Addr, src func (mm *MemoryManager) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) { ar, ok := mm.CheckIORange(addr, int64(len(dst))) if !ok { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } if len(dst) == 0 { @@ -190,7 +190,7 @@ func (mm *MemoryManager) asCopyIn(ctx context.Context, addr hostarch.Addr, dst [ func (mm *MemoryManager) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { ar, ok := mm.CheckIORange(addr, toZero) if !ok { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } if toZero == 0 { @@ -231,7 +231,7 @@ func (mm *MemoryManager) asZeroOut(ctx context.Context, addr hostarch.Addr, toZe // CopyOutFrom implements usermem.IO.CopyOutFrom. func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) { if !mm.checkIOVec(ars) { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } if ars.NumBytes() == 0 { @@ -276,7 +276,7 @@ func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars hostarch.AddrRange // CopyInTo implements usermem.IO.CopyInTo. func (mm *MemoryManager) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) { if !mm.checkIOVec(ars) { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } if ars.NumBytes() == 0 { @@ -314,7 +314,7 @@ func (mm *MemoryManager) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq func (mm *MemoryManager) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts usermem.IOOpts) (uint32, error) { ar, ok := mm.CheckIORange(addr, 4) if !ok { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } // Do AddressSpace IO if applicable. @@ -339,7 +339,7 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr hostarch.Addr, new _, err := mm.withInternalMappings(ctx, ar, hostarch.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { if ims.NumBlocks() != 1 || ims.NumBytes() != 4 { // Atomicity is unachievable across mappings. - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } im := ims.Head() var err error @@ -357,7 +357,7 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr hostarch.Addr, new func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) { ar, ok := mm.CheckIORange(addr, 4) if !ok { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } // Do AddressSpace IO if applicable. @@ -382,7 +382,7 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr hostarch _, err := mm.withInternalMappings(ctx, ar, hostarch.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { if ims.NumBlocks() != 1 || ims.NumBytes() != 4 { // Atomicity is unachievable across mappings. - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } im := ims.Head() var err error @@ -400,7 +400,7 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr hostarch func (mm *MemoryManager) LoadUint32(ctx context.Context, addr hostarch.Addr, opts usermem.IOOpts) (uint32, error) { ar, ok := mm.CheckIORange(addr, 4) if !ok { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } // Do AddressSpace IO if applicable. @@ -425,7 +425,7 @@ func (mm *MemoryManager) LoadUint32(ctx context.Context, addr hostarch.Addr, opt _, err := mm.withInternalMappings(ctx, ar, hostarch.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) { if ims.NumBlocks() != 1 || ims.NumBytes() != 4 { // Atomicity is unachievable across mappings. - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } im := ims.Head() var err error diff --git a/pkg/sentry/mm/mm_test.go b/pkg/sentry/mm/mm_test.go index 1304b0a2f..84cb8158d 100644 --- a/pkg/sentry/mm/mm_test.go +++ b/pkg/sentry/mm/mm_test.go @@ -18,6 +18,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/contexttest" @@ -25,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -171,7 +171,7 @@ func TestIOAfterUnmap(t *testing.T) { } n, err = mm.CopyIn(ctx, addr, b, usermem.IOOpts{}) - if err != syserror.EFAULT { + if !linuxerr.Equals(linuxerr.EFAULT, err) { t.Errorf("CopyIn got err %v want EFAULT", err) } if n != 0 { @@ -212,7 +212,7 @@ func TestIOAfterMProtect(t *testing.T) { // Without IgnorePermissions, CopyOut should no longer succeed. n, err = mm.CopyOut(ctx, addr, b, usermem.IOOpts{}) - if err != syserror.EFAULT { + if !linuxerr.Equals(linuxerr.EFAULT, err) { t.Errorf("CopyOut got err %v want EFAULT", err) } if n != 0 { @@ -249,7 +249,7 @@ func TestAIOPrepareAfterDestroy(t *testing.T) { mm.DestroyAIOContext(ctx, id) // Prepare should fail because aioCtx should be destroyed. - if err := aioCtx.Prepare(); err != syserror.EINVAL { + if err := aioCtx.Prepare(); !linuxerr.Equals(linuxerr.EINVAL, err) { t.Errorf("aioCtx.Prepare got err %v want nil", err) } else if err == nil { aioCtx.CancelPendingRequest() diff --git a/pkg/sentry/mm/pma.go b/pkg/sentry/mm/pma.go index 5583f62b2..9f4cc238f 100644 --- a/pkg/sentry/mm/pma.go +++ b/pkg/sentry/mm/pma.go @@ -18,12 +18,12 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safecopy" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) // existingPMAsLocked checks that pmas exist for all addresses in ar, and @@ -116,7 +116,7 @@ func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar var alignerr error if !ok { end = ar.End.RoundDown() - alignerr = syserror.EFAULT + alignerr = linuxerr.EFAULT } ar = hostarch.AddrRange{ar.Start.RoundDown(), end} @@ -162,7 +162,7 @@ func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars hostarch.Addr var alignerr error if !ok { end = ar.End.RoundDown() - alignerr = syserror.EFAULT + alignerr = linuxerr.EFAULT } ar = hostarch.AddrRange{ar.Start.RoundDown(), end} diff --git a/pkg/sentry/mm/shm.go b/pkg/sentry/mm/shm.go index 3130be80c..94d5112a1 100644 --- a/pkg/sentry/mm/shm.go +++ b/pkg/sentry/mm/shm.go @@ -16,16 +16,16 @@ package mm import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/shm" - "gvisor.dev/gvisor/pkg/syserror" ) // DetachShm unmaps a sysv shared memory segment. func (mm *MemoryManager) DetachShm(ctx context.Context, addr hostarch.Addr) error { if addr != addr.RoundDown() { // "... shmaddr is not aligned on a page boundary." - man shmdt(2) - return syserror.EINVAL + return linuxerr.EINVAL } var detached *shm.Shm @@ -48,7 +48,7 @@ func (mm *MemoryManager) DetachShm(ctx context.Context, addr hostarch.Addr) erro if detached == nil { // There is no shared memory segment attached at addr. - return syserror.EINVAL + return linuxerr.EINVAL } // Remove all vmas that could have been created by the same attach. diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go index e748b7ff8..69c6e77a7 100644 --- a/pkg/sentry/mm/special_mappable.go +++ b/pkg/sentry/mm/special_mappable.go @@ -16,11 +16,11 @@ package mm import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) // SpecialMappable implements memmap.MappingIdentity and memmap.Mappable with @@ -94,7 +94,7 @@ func (*SpecialMappable) CopyMapping(context.Context, memmap.MappingSpace, hostar func (m *SpecialMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { var err error if required.End > m.fr.Length() { - err = &memmap.BusError{syserror.EFAULT} + err = &memmap.BusError{linuxerr.EFAULT} } if source := optional.Intersect(memmap.MappableRange{0, m.fr.Length()}); source.Length() != 0 { return []memmap.Translation{ @@ -144,11 +144,11 @@ func (m *SpecialMappable) Length() uint64 { // leak (b/143656263). Delete this function along with VFS1. func NewSharedAnonMappable(length uint64, mfp pgalloc.MemoryFileProvider) (*SpecialMappable, error) { if length == 0 { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } alignedLen, ok := hostarch.Addr(length).RoundUp() if !ok { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } fr, err := mfp.MemoryFile().Allocate(uint64(alignedLen), usage.Anonymous) if err != nil { diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go index 7ad6b7c21..256eb4afb 100644 --- a/pkg/sentry/mm/syscalls.go +++ b/pkg/sentry/mm/syscalls.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/futex" @@ -36,7 +37,7 @@ import ( func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr hostarch.Addr, at hostarch.AccessType, sp hostarch.Addr) error { ar, ok := addr.RoundDown().ToRange(hostarch.PageSize) if !ok { - return syserror.EFAULT + return linuxerr.EFAULT } // Don't bother trying existingPMAsLocked; in most cases, if we did have @@ -74,7 +75,7 @@ func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr hostarch.Addr // MMap establishes a memory mapping. func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (hostarch.Addr, error) { if opts.Length == 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } length, ok := hostarch.Addr(opts.Length).RoundUp() if !ok { @@ -85,7 +86,7 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (hostar if opts.Mappable != nil { // Offset must be aligned. if hostarch.Addr(opts.Offset).RoundDown() != hostarch.Addr(opts.Offset) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Offset + length must not overflow. if end := opts.Offset + opts.Length; end < opts.Offset { @@ -99,19 +100,19 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (hostar // MAP_FIXED requires addr to be page-aligned; non-fixed mappings // don't. if opts.Fixed { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } opts.Addr = opts.Addr.RoundDown() } if !opts.MaxPerms.SupersetOf(opts.Perms) { - return 0, syserror.EACCES + return 0, linuxerr.EACCES } if opts.Unmap && !opts.Fixed { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if opts.GrowsDown && opts.Mappable != nil { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Get the new vma. @@ -203,6 +204,7 @@ func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar h // * vseg.Range().IsSupersetOf(ar). // // Postconditions: mm.mappingMu will be unlocked. +// +checklocksrelease:mm.mappingMu func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, precommit bool) { // See populateVMA above for commentary. if !vseg.ValuePtr().effectivePerms.Any() { @@ -281,18 +283,18 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (hostarch.AddrRange, erro // MUnmap implements the semantics of Linux's munmap(2). func (mm *MemoryManager) MUnmap(ctx context.Context, addr hostarch.Addr, length uint64) error { if addr != addr.RoundDown() { - return syserror.EINVAL + return linuxerr.EINVAL } if length == 0 { - return syserror.EINVAL + return linuxerr.EINVAL } la, ok := hostarch.Addr(length).RoundUp() if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } ar, ok := addr.ToRange(uint64(la)) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } mm.mappingMu.Lock() @@ -331,7 +333,7 @@ const ( func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldSize uint64, newSize uint64, opts MRemapOpts) (hostarch.Addr, error) { // "Note that old_address has to be page aligned." - mremap(2) if oldAddr.RoundDown() != oldAddr { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Linux treats an old_size that rounds up to 0 as 0, which is otherwise a @@ -340,13 +342,13 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS oldSize = uint64(oldSizeAddr) newSizeAddr, ok := hostarch.Addr(newSize).RoundUp() if !ok || newSizeAddr == 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } newSize = uint64(newSizeAddr) oldEnd, ok := oldAddr.AddLength(oldSize) if !ok { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } mm.mappingMu.Lock() @@ -355,7 +357,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS // All cases require that a vma exists at oldAddr. vseg := mm.vmas.FindSegment(oldAddr) if !vseg.Ok() { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } // Behavior matrix: @@ -379,7 +381,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) { if newLockedAS := mm.lockedAS - oldSize + newSize; newLockedAS > mlockLimit { - return 0, syserror.EAGAIN + return 0, linuxerr.EAGAIN } } } @@ -402,7 +404,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS // Check that oldEnd maps to the same vma as oldAddr. if vseg.End() < oldEnd { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } // "Grow" the existing vma by creating a new mergeable one. vma := vseg.ValuePtr() @@ -450,15 +452,15 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS case MRemapMustMove: newAddr := opts.NewAddr if newAddr.RoundDown() != newAddr { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } var ok bool newAR, ok = newAddr.ToRange(newSize) if !ok { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if (hostarch.AddrRange{oldAddr, oldEnd}).Overlaps(newAR) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Check that the new region is valid. @@ -492,7 +494,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS // Check that oldEnd maps to the same vma as oldAddr. if vseg.End() < oldEnd { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } // Check against RLIMIT_AS. @@ -504,7 +506,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS if vma := vseg.ValuePtr(); vma.mappable != nil { // Check that offset+length does not overflow. if vma.off+uint64(newAR.Length()) < vma.off { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Inform the Mappable, if any, of the new mapping. if err := vma.mappable.CopyMapping(ctx, mm, oldAR, newAR, vseg.mappableOffsetAt(oldAR.Start), vma.canWriteMappableLocked()); err != nil { @@ -590,7 +592,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldS // MProtect implements the semantics of Linux's mprotect(2). func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms hostarch.AccessType, growsDown bool) error { if addr.RoundDown() != addr { - return syserror.EINVAL + return linuxerr.EINVAL } if length == 0 { return nil @@ -618,7 +620,7 @@ func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms h } if growsDown { if !vseg.ValuePtr().growsDown { - return syserror.EINVAL + return linuxerr.EINVAL } if ar.End <= vseg.Start() { return syserror.ENOMEM @@ -644,7 +646,7 @@ func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms h // Check for permission validity before splitting vmas, for consistency // with Linux. if !vseg.ValuePtr().maxPerms.SupersetOf(effectivePerms) { - return syserror.EACCES + return linuxerr.EACCES } vseg = mm.vmas.Isolate(vseg, ar) @@ -711,7 +713,7 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr hostarch.Addr) (hostarch. if addr < mm.brk.Start { addr = mm.brk.End mm.mappingMu.Unlock() - return addr, syserror.EINVAL + return addr, linuxerr.EINVAL } // TODO(gvisor.dev/issue/156): This enforces RLIMIT_DATA, but is @@ -730,7 +732,7 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr hostarch.Addr) (hostarch. if !ok { addr = mm.brk.End mm.mappingMu.Unlock() - return addr, syserror.EFAULT + return addr, linuxerr.EFAULT } switch { @@ -780,7 +782,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u la, _ := hostarch.Addr(length + addr.PageOffset()).RoundUp() ar, ok := addr.RoundDown().ToRange(uint64(la)) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } mm.mappingMu.Lock() @@ -792,7 +794,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur if mlockLimit == 0 { mm.mappingMu.Unlock() - return syserror.EPERM + return linuxerr.EPERM } if newLockedAS := mm.lockedAS + uint64(ar.Length()) - mm.mlockedBytesRangeLocked(ar); newLockedAS > mlockLimit { mm.mappingMu.Unlock() @@ -855,11 +857,11 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length u mm.activeMu.Unlock() mm.mappingMu.RUnlock() // Linux: mm/mlock.c:__mlock_posix_error_return() - if err == syserror.EFAULT { + if linuxerr.Equals(linuxerr.EFAULT, err) { return syserror.ENOMEM } - if err == syserror.ENOMEM { - return syserror.EAGAIN + if linuxerr.Equals(linuxerr.ENOMEM, err) { + return linuxerr.EAGAIN } return err } @@ -898,7 +900,7 @@ type MLockAllOpts struct { // depending on opts. func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error { if !opts.Current && !opts.Future { - return syserror.EINVAL + return linuxerr.EINVAL } mm.mappingMu.Lock() @@ -911,7 +913,7 @@ func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur if mlockLimit == 0 { mm.mappingMu.Unlock() - return syserror.EPERM + return linuxerr.EPERM } if uint64(mm.vmas.Span()) > mlockLimit { mm.mappingMu.Unlock() @@ -970,7 +972,7 @@ func (mm *MemoryManager) NumaPolicy(addr hostarch.Addr) (linux.NumaPolicy, uint6 defer mm.mappingMu.RUnlock() vseg := mm.vmas.FindSegment(addr) if !vseg.Ok() { - return 0, 0, syserror.EFAULT + return 0, 0, linuxerr.EFAULT } vma := vseg.ValuePtr() return vma.numaPolicy, vma.numaNodemask, nil @@ -979,13 +981,13 @@ func (mm *MemoryManager) NumaPolicy(addr hostarch.Addr) (linux.NumaPolicy, uint6 // SetNumaPolicy implements the semantics of Linux's mbind(). func (mm *MemoryManager) SetNumaPolicy(addr hostarch.Addr, length uint64, policy linux.NumaPolicy, nodemask uint64) error { if !addr.IsPageAligned() { - return syserror.EINVAL + return linuxerr.EINVAL } // Linux allows this to overflow. la, _ := hostarch.Addr(length).RoundUp() ar, ok := addr.ToRange(uint64(la)) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } if ar.Length() == 0 { return nil @@ -1003,7 +1005,7 @@ func (mm *MemoryManager) SetNumaPolicy(addr hostarch.Addr, length uint64, policy if !vseg.Ok() || lastEnd < vseg.Start() { // "EFAULT: ... there was an unmapped hole in the specified memory // range specified [sic] by addr and len." - mbind(2) - return syserror.EFAULT + return linuxerr.EFAULT } vseg = mm.vmas.Isolate(vseg, ar) vma := vseg.ValuePtr() @@ -1021,7 +1023,7 @@ func (mm *MemoryManager) SetNumaPolicy(addr hostarch.Addr, length uint64, policy func (mm *MemoryManager) SetDontFork(addr hostarch.Addr, length uint64, dontfork bool) error { ar, ok := addr.ToRange(length) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } mm.mappingMu.Lock() @@ -1047,7 +1049,7 @@ func (mm *MemoryManager) SetDontFork(addr hostarch.Addr, length uint64, dontfork func (mm *MemoryManager) Decommit(addr hostarch.Addr, length uint64) error { ar, ok := addr.ToRange(length) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } mm.mappingMu.RLock() @@ -1063,7 +1065,7 @@ func (mm *MemoryManager) Decommit(addr hostarch.Addr, length uint64) error { for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() { vma := vseg.ValuePtr() if vma.mlockMode != memmap.MLockNone { - return syserror.EINVAL + return linuxerr.EINVAL } vsegAR := vseg.Range().Intersect(ar) // pseg should already correspond to either this vma or a later one, @@ -1114,7 +1116,7 @@ type MSyncOpts struct { // MSync implements the semantics of Linux's msync(). func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length uint64, opts MSyncOpts) error { if addr != addr.RoundDown() { - return syserror.EINVAL + return linuxerr.EINVAL } if length == 0 { return nil @@ -1150,7 +1152,7 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length u vma := vseg.ValuePtr() if opts.Invalidate && vma.mlockMode != memmap.MLockNone { mm.mappingMu.RUnlock() - return syserror.EBUSY + return linuxerr.EBUSY } // It's only possible to have dirtied the Mappable through a shared // mapping. Don't check if the mapping is writable, because mprotect @@ -1191,7 +1193,7 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length u func (mm *MemoryManager) GetSharedFutexKey(ctx context.Context, addr hostarch.Addr) (futex.Key, error) { ar, ok := addr.ToRange(4) // sizeof(int32). if !ok { - return futex.Key{}, syserror.EFAULT + return futex.Key{}, linuxerr.EFAULT } mm.mappingMu.RLock() diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go index 0d019e41d..5f8ab7ca3 100644 --- a/pkg/sentry/mm/vma.go +++ b/pkg/sentry/mm/vma.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -66,14 +67,14 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) { mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur if mlockLimit == 0 { - return vmaIterator{}, hostarch.AddrRange{}, syserror.EPERM + return vmaIterator{}, hostarch.AddrRange{}, linuxerr.EPERM } newLockedAS := mm.lockedAS + opts.Length if opts.Unmap { newLockedAS -= mm.mlockedBytesRangeLocked(ar) } if newLockedAS > mlockLimit { - return vmaIterator{}, hostarch.AddrRange{}, syserror.EAGAIN + return vmaIterator{}, hostarch.AddrRange{}, linuxerr.EAGAIN } } } @@ -288,7 +289,7 @@ func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar hostarch.AddrRang vma := vseg.ValuePtr() if addr < vseg.Start() { // TODO(jamieliu): Implement vma.growsDown here. - return vbegin, vgap, syserror.EFAULT + return vbegin, vgap, linuxerr.EFAULT } perms := vma.effectivePerms @@ -296,7 +297,7 @@ func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar hostarch.AddrRang perms = vma.maxPerms } if !perms.SupersetOf(at) { - return vbegin, vgap, syserror.EPERM + return vbegin, vgap, linuxerr.EPERM } addr = vseg.End() @@ -308,7 +309,7 @@ func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar hostarch.AddrRang } // Ran out of vmas before ar.End. - return vbegin, vgap, syserror.EFAULT + return vbegin, vgap, linuxerr.EFAULT } // getVecVMAsLocked ensures that vmas exist for all addresses in ars, and diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD index 57d73d770..d351869ef 100644 --- a/pkg/sentry/pgalloc/BUILD +++ b/pkg/sentry/pgalloc/BUILD @@ -85,6 +85,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/memutil", diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go index d1a883da4..0c8542485 100644 --- a/pkg/sentry/pgalloc/pgalloc.go +++ b/pkg/sentry/pgalloc/pgalloc.go @@ -31,6 +31,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/safemem" @@ -674,7 +675,7 @@ func (f *MemoryFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (s panic(fmt.Sprintf("invalid range: %v", fr)) } if at.Execute { - return safemem.BlockSeq{}, syserror.EACCES + return safemem.BlockSeq{}, linuxerr.EACCES } chunks := ((fr.End + chunkMask) >> chunkShift) - (fr.Start >> chunkShift) @@ -944,7 +945,7 @@ func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func( // NOTE(b/165896008): mincore (which is passed as checkCommitted) // by f.UpdateUsage() might take a really long time. So unlock f.mu // while checkCommitted runs. - f.mu.Unlock() + f.mu.Unlock() // +checklocksforce err := checkCommitted(s, buf) f.mu.Lock() if err != nil { diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go index d761bbdee..0567c8d32 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm @@ -74,8 +75,27 @@ func (c *vCPU) KernelSyscall() { // therefore be guaranteed that there is no floating point state to be // loaded on resuming from halt. We only worry about saving on exit. ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no. - ring0.Halt() - ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no, reload host segment. + // N.B. Since KernelSyscall is called when the kernel makes a syscall, + // FS_BASE is already set for correct execution of this function. + // + // Refresher on syscall/exception handling: + // 1. When the sentry is in guest mode and makes a syscall, it goes to + // sysenter(), which saves the register state (including RIP of SYSCALL + // instruction) to vCPU.registers. + // 2. It then calls KernelSyscall, which rewinds the IP and executes + // HLT. + // 3. HLT does a VM-exit to bluepillHandler, which returns from the + // signal handler using vCPU.registers, directly to the SYSCALL + // instruction. + // 4. Later, when we want to re-use the vCPU (perhaps on a different + // host thread), we set the new thread's registers in vCPU.registers + // (as opposed to setting the KVM registers with KVM_SET_REGS). + // 5. KVM_RUN thus enters the guest with the old register state, + // immediately following the HLT instruction, returning here. + // 6. We then restore FS_BASE and the full registers from vCPU.register + // to return from sysenter() back to the desired bluepill point from + // the host. + ring0.HaltAndWriteFSBase(regs) // escapes: no, reload host segment. } // KernelException handles kernel exceptions. @@ -93,8 +113,8 @@ func (c *vCPU) KernelException(vector ring0.Vector) { } // See above. ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no. - ring0.Halt() - ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no; reload host segment. + // See above. + ring0.HaltAndWriteFSBase(regs) // escapes: no, reload host segment. } // bluepillArchExit is called during bluepillEnter. diff --git a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go index 198bafdea..4ba1d6f9c 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.go b/pkg/sentry/platform/kvm/bluepill_arm64.go index 578852c3f..acb0cb05f 100644 --- a/pkg/sentry/platform/kvm/bluepill_arm64.go +++ b/pkg/sentry/platform/kvm/bluepill_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm @@ -25,29 +26,6 @@ import ( var ( // The action for bluepillSignal is changed by sigaction(). bluepillSignal = unix.SIGILL - - // vcpuSErrBounce is the event of system error for bouncing KVM. - vcpuSErrBounce = kvmVcpuEvents{ - exception: exception{ - sErrPending: 1, - }, - } - - // vcpuSErrNMI is the event of system error to trigger sigbus. - vcpuSErrNMI = kvmVcpuEvents{ - exception: exception{ - sErrPending: 1, - sErrHasEsr: 1, - sErrEsr: _ESR_ELx_SERR_NMI, - }, - } - - // vcpuExtDabt is the event of ext_dabt. - vcpuExtDabt = kvmVcpuEvents{ - exception: exception{ - extDabtPending: 1, - }, - } ) // getTLS returns the value of TPIDR_EL0 register. diff --git a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go index 07fc4f216..ee7dba828 100644 --- a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm @@ -80,11 +81,18 @@ func getHypercallID(addr uintptr) int { // //go:nosplit func bluepillStopGuest(c *vCPU) { + // vcpuSErrBounce is the event of system error for bouncing KVM. + vcpuSErrBounce := &kvmVcpuEvents{ + exception: exception{ + sErrPending: 1, + }, + } + if _, _, errno := unix.RawSyscall( // escapes: no. unix.SYS_IOCTL, uintptr(c.fd), _KVM_SET_VCPU_EVENTS, - uintptr(unsafe.Pointer(&vcpuSErrBounce))); errno != 0 { + uintptr(unsafe.Pointer(vcpuSErrBounce))); errno != 0 { throw("bounce sErr injection failed") } } @@ -93,12 +101,21 @@ func bluepillStopGuest(c *vCPU) { // //go:nosplit func bluepillSigBus(c *vCPU) { + // vcpuSErrNMI is the event of system error to trigger sigbus. + vcpuSErrNMI := &kvmVcpuEvents{ + exception: exception{ + sErrPending: 1, + sErrHasEsr: 1, + sErrEsr: _ESR_ELx_SERR_NMI, + }, + } + // Host must support ARM64_HAS_RAS_EXTN. if _, _, errno := unix.RawSyscall( // escapes: no. unix.SYS_IOCTL, uintptr(c.fd), _KVM_SET_VCPU_EVENTS, - uintptr(unsafe.Pointer(&vcpuSErrNMI))); errno != 0 { + uintptr(unsafe.Pointer(vcpuSErrNMI))); errno != 0 { if errno == unix.EINVAL { throw("No ARM64_HAS_RAS_EXTN feature in host.") } @@ -110,11 +127,18 @@ func bluepillSigBus(c *vCPU) { // //go:nosplit func bluepillExtDabt(c *vCPU) { + // vcpuExtDabt is the event of ext_dabt. + vcpuExtDabt := &kvmVcpuEvents{ + exception: exception{ + extDabtPending: 1, + }, + } + if _, _, errno := unix.RawSyscall( // escapes: no. unix.SYS_IOCTL, uintptr(c.fd), _KVM_SET_VCPU_EVENTS, - uintptr(unsafe.Pointer(&vcpuExtDabt))); errno != 0 { + uintptr(unsafe.Pointer(vcpuExtDabt))); errno != 0 { throw("ext_dabt injection failed") } } diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go index 6f87236ad..f63ab6aba 100644 --- a/pkg/sentry/platform/kvm/bluepill_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build go1.12 -// +build !go1.18 +//go:build go1.12 && !go1.18 +// +build go1.12,!go1.18 // Check go:linkname function signatures when updating Go version. @@ -85,6 +85,13 @@ func bluepillGuestExit(c *vCPU, context unsafe.Pointer) { // signal stack. It should only execute raw system calls and functions that are // explicitly marked go:nosplit. // +// Ideally, this function should switch to gsignal, as runtime.sigtramp does, +// but that is tedious given all the runtime internals. That said, using +// gsignal inside a signal handler is not _required_, provided we avoid stack +// splits and allocations. Note that calling any splittable function here will +// be flaky; if the signal stack is below the G stack then we will trigger a +// split and crash. If above, we won't trigger a split. +// // +checkescape:all // //go:nosplit diff --git a/pkg/sentry/platform/kvm/kvm_amd64.go b/pkg/sentry/platform/kvm/kvm_amd64.go index b9ed4a706..a5189d9e2 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64.go +++ b/pkg/sentry/platform/kvm/kvm_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm diff --git a/pkg/sentry/platform/kvm/kvm_amd64_test.go b/pkg/sentry/platform/kvm/kvm_amd64_test.go index b1cab89a0..c3fbbdc75 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64_test.go +++ b/pkg/sentry/platform/kvm/kvm_amd64_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm @@ -28,7 +29,7 @@ import ( ) func TestSegments(t *testing.T) { - applicationTest(t, true, testutil.TwiddleSegments, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfTwiddleSegments(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { testutil.SetTestSegments(regs) for { var si linux.SignalInfo @@ -55,7 +56,7 @@ func TestSegments(t *testing.T) { func stmxcsr(addr *uint32) func TestMXCSR(t *testing.T) { - applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo switchOpts := ring0.SwitchOpts{ Registers: regs, diff --git a/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go b/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go index 0c43d72f4..7fdb6ac64 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm diff --git a/pkg/sentry/platform/kvm/kvm_arm64.go b/pkg/sentry/platform/kvm/kvm_arm64.go index b73340f0e..159808433 100644 --- a/pkg/sentry/platform/kvm/kvm_arm64.go +++ b/pkg/sentry/platform/kvm/kvm_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm diff --git a/pkg/sentry/platform/kvm/kvm_arm64_test.go b/pkg/sentry/platform/kvm/kvm_arm64_test.go index 0e3d84d95..b53e354da 100644 --- a/pkg/sentry/platform/kvm/kvm_arm64_test.go +++ b/pkg/sentry/platform/kvm/kvm_arm64_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm diff --git a/pkg/sentry/platform/kvm/kvm_arm64_unsafe.go b/pkg/sentry/platform/kvm/kvm_arm64_unsafe.go index f07a9f34d..54d579a2b 100644 --- a/pkg/sentry/platform/kvm/kvm_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/kvm_arm64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index fe570aff9..3a30286e2 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -120,13 +120,13 @@ func TestKernelFloatingPoint(t *testing.T) { }) } -func applicationTest(t testHarness, useHostMappings bool, target func(), fn func(*vCPU, *arch.Registers, *pagetables.PageTables) bool) { +func applicationTest(t testHarness, useHostMappings bool, targetFn uintptr, fn func(*vCPU, *arch.Registers, *pagetables.PageTables) bool) { // Initialize registers & page tables. var ( regs arch.Registers pt *pagetables.PageTables ) - testutil.SetTestTarget(®s, target) + testutil.SetTestTarget(®s, targetFn) kvmTest(t, func(k *KVM) { // Create new page tables. @@ -157,7 +157,7 @@ func applicationTest(t testHarness, useHostMappings bool, target func(), fn func } func TestApplicationSyscall(t *testing.T) { - applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, @@ -171,7 +171,7 @@ func TestApplicationSyscall(t *testing.T) { } return false }) - applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, @@ -187,7 +187,7 @@ func TestApplicationSyscall(t *testing.T) { } func TestApplicationFault(t *testing.T) { - applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfTouch(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { testutil.SetTouchTarget(regs, nil) // Cause fault. var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ @@ -202,7 +202,7 @@ func TestApplicationFault(t *testing.T) { } return false }) - applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfTouch(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { testutil.SetTouchTarget(regs, nil) // Cause fault. var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ @@ -219,7 +219,7 @@ func TestApplicationFault(t *testing.T) { } func TestRegistersSyscall(t *testing.T) { - applicationTest(t, true, testutil.TwiddleRegsSyscall, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfTwiddleRegsSyscall(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { testutil.SetTestRegs(regs) // Fill values for all registers. for { var si linux.SignalInfo @@ -242,7 +242,7 @@ func TestRegistersSyscall(t *testing.T) { } func TestRegistersFault(t *testing.T) { - applicationTest(t, true, testutil.TwiddleRegsFault, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfTwiddleRegsFault(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { testutil.SetTestRegs(regs) // Fill values for all registers. for { var si linux.SignalInfo @@ -266,7 +266,7 @@ func TestRegistersFault(t *testing.T) { } func TestBounce(t *testing.T) { - applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfSpinLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { go func() { time.Sleep(time.Millisecond) c.BounceToKernel() @@ -281,7 +281,7 @@ func TestBounce(t *testing.T) { } return false }) - applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfSpinLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { go func() { time.Sleep(time.Millisecond) c.BounceToKernel() @@ -300,7 +300,7 @@ func TestBounce(t *testing.T) { } func TestBounceStress(t *testing.T) { - applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfSpinLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { randomSleep := func() { // O(hundreds of microseconds) is appropriate to ensure // different overlaps and different schedules. @@ -336,7 +336,7 @@ func TestBounceStress(t *testing.T) { func TestInvalidate(t *testing.T) { var data uintptr // Used below. - applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfTouch(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { testutil.SetTouchTarget(regs, &data) // Read legitimate value. for { var si linux.SignalInfo @@ -377,7 +377,7 @@ func IsFault(err error, si *linux.SignalInfo) bool { } func TestEmptyAddressSpace(t *testing.T) { - applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, false, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, @@ -391,7 +391,7 @@ func TestEmptyAddressSpace(t *testing.T) { } return false }) - applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, false, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, @@ -467,7 +467,7 @@ func BenchmarkApplicationSyscall(b *testing.B) { i int // Iteration includes machine.Get() / machine.Put(). a int // Count for ErrContextInterrupt. ) - applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(b, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, @@ -489,7 +489,7 @@ func BenchmarkApplicationSyscall(b *testing.B) { func BenchmarkKernelSyscall(b *testing.B) { // Note that the target passed here is irrelevant, we never execute SwitchToUser. - applicationTest(b, true, testutil.Getpid, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(b, true, testutil.AddrOfGetpid(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { // iteration does not include machine.Get() / machine.Put(). for i := 0; i < b.N; i++ { testutil.Getpid() @@ -504,7 +504,7 @@ func BenchmarkWorldSwitchToUserRoundtrip(b *testing.B) { i int a int ) - applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(b, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index 7a10fd812..a96634381 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm @@ -136,7 +137,7 @@ func (c *vCPU) initArchState() error { } // Set the entrypoint for the kernel. - kernelUserRegs.RIP = uint64(reflect.ValueOf(ring0.Start).Pointer()) + kernelUserRegs.RIP = uint64(ring0.AddrOfStart()) kernelUserRegs.RAX = uint64(reflect.ValueOf(&c.CPU).Pointer()) kernelUserRegs.RSP = c.StackTop() kernelUserRegs.RFLAGS = ring0.KernelFlagsSet diff --git a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go index 83bcc7406..de798bb2c 100644 --- a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kvm diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go index edaccf9bc..7937a8481 100644 --- a/pkg/sentry/platform/kvm/machine_arm64.go +++ b/pkg/sentry/platform/kvm/machine_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index 1b0a6e0a7..1a4a9ce7d 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kvm @@ -140,22 +141,15 @@ func (c *vCPU) initArchState() error { // vbar_el1 reg.id = _KVM_ARM64_REGS_VBAR_EL1 - - fromLocation := reflect.ValueOf(ring0.Vectors).Pointer() - offset := fromLocation & (1<<11 - 1) - if offset != 0 { - offset = 1<<11 - offset - } - - toLocation := fromLocation + offset - data = uint64(ring0.KernelStartAddress | toLocation) + vectorLocation := reflect.ValueOf(ring0.Vectors).Pointer() + data = uint64(ring0.KernelStartAddress | vectorLocation) if err := c.setOneRegister(®); err != nil { return err } // Use the address of the exception vector table as // the MMIO address base. - arm64HypercallMMIOBase = toLocation + arm64HypercallMMIOBase = vectorLocation // Initialize the PCID database. if hasGuestPCID { diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go index 49e1c7136..35660e827 100644 --- a/pkg/sentry/platform/kvm/machine_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_unsafe.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build go1.12 -// +build !go1.18 +//go:build go1.12 && !go1.18 +// +build go1.12,!go1.18 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/sentry/platform/kvm/testutil/testutil.go b/pkg/sentry/platform/kvm/testutil/testutil.go index 5c1efa0fd..d8c273796 100644 --- a/pkg/sentry/platform/kvm/testutil/testutil.go +++ b/pkg/sentry/platform/kvm/testutil/testutil.go @@ -23,23 +23,41 @@ import ( // Getpid executes a trivial system call. func Getpid() -// Touch touches the value in the first register. -func Touch() +// AddrOfGetpid returns the address of Getpid. +// +// In Go 1.17+, Go references to assembly functions resolve to an ABIInternal +// wrapper function rather than the function itself. We must reference from +// assembly to get the ABI0 (i.e., primary) address. +func AddrOfGetpid() uintptr + +// AddrOfTouch returns the address of a function that touches the value in the +// first register. +func AddrOfTouch() uintptr +func touch() -// SyscallLoop executes a syscall and loops. -func SyscallLoop() +// AddrOfSyscallLoop returns the address of a function that executes a syscall +// and loops. +func AddrOfSyscallLoop() uintptr +func syscallLoop() -// SpinLoop spins on the CPU. -func SpinLoop() +// AddrOfSpinLoop returns the address of a function that spins on the CPU. +func AddrOfSpinLoop() uintptr +func spinLoop() -// HaltLoop immediately halts and loops. -func HaltLoop() +// AddrOfHaltLoop returns the address of a function that immediately halts and +// loops. +func AddrOfHaltLoop() uintptr +func haltLoop() -// TwiddleRegsFault twiddles registers then faults. -func TwiddleRegsFault() +// AddrOfTwiddleRegsFault returns the address of a function that twiddles +// registers then faults. +func AddrOfTwiddleRegsFault() uintptr +func twiddleRegsFault() -// TwiddleRegsSyscall twiddles registers then executes a syscall. -func TwiddleRegsSyscall() +// AddrOfTwiddleRegsSyscall returns the address of a function that twiddles +// registers then executes a syscall. +func AddrOfTwiddleRegsSyscall() uintptr +func twiddleRegsSyscall() // FloatingPointWorks is a floating point test. // diff --git a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go b/pkg/sentry/platform/kvm/testutil/testutil_amd64.go index 8048eedec..98c52b2f5 100644 --- a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go +++ b/pkg/sentry/platform/kvm/testutil/testutil_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package testutil @@ -22,12 +23,14 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" ) -// TwiddleSegments reads segments into known registers. -func TwiddleSegments() +// AddrOfTwiddleSegments return the address of a function that reads segments +// into known registers. +func AddrOfTwiddleSegments() uintptr +func twiddleSegments() // SetTestTarget sets the rip appropriately. -func SetTestTarget(regs *arch.Registers, fn func()) { - regs.Rip = uint64(reflect.ValueOf(fn).Pointer()) +func SetTestTarget(regs *arch.Registers, fn uintptr) { + regs.Rip = uint64(fn) } // SetTouchTarget sets rax appropriately. diff --git a/pkg/sentry/platform/kvm/testutil/testutil_amd64.s b/pkg/sentry/platform/kvm/testutil/testutil_amd64.s index 491ec0c2a..65e7c05ea 100644 --- a/pkg/sentry/platform/kvm/testutil/testutil_amd64.s +++ b/pkg/sentry/platform/kvm/testutil/testutil_amd64.s @@ -25,27 +25,46 @@ TEXT ·Getpid(SB),NOSPLIT,$0 SYSCALL RET -TEXT ·Touch(SB),NOSPLIT,$0 +// func AddrOfGetpid() uintptr +TEXT ·AddrOfGetpid(SB), $0-8 + MOVQ $·Getpid(SB), AX + MOVQ AX, ret+0(FP) + RET + +TEXT ·touch(SB),NOSPLIT,$0 start: MOVQ 0(AX), BX // deref AX MOVQ $39, AX // getpid SYSCALL JMP start -TEXT ·HaltLoop(SB),NOSPLIT,$0 -start: - HLT - JMP start +// func AddrOfTouch() uintptr +TEXT ·AddrOfTouch(SB), $0-8 + MOVQ $·touch(SB), AX + MOVQ AX, ret+0(FP) + RET -TEXT ·SyscallLoop(SB),NOSPLIT,$0 +TEXT ·syscallLoop(SB),NOSPLIT,$0 start: SYSCALL JMP start -TEXT ·SpinLoop(SB),NOSPLIT,$0 +// func AddrOfSyscallLoop() uintptr +TEXT ·AddrOfSyscallLoop(SB), $0-8 + MOVQ $·syscallLoop(SB), AX + MOVQ AX, ret+0(FP) + RET + +TEXT ·spinLoop(SB),NOSPLIT,$0 start: JMP start +// func AddrOfSpinLoop() uintptr +TEXT ·AddrOfSpinLoop(SB), $0-8 + MOVQ $·spinLoop(SB), AX + MOVQ AX, ret+0(FP) + RET + TEXT ·FloatingPointWorks(SB),NOSPLIT,$0-8 NO_LOCAL_POINTERS MOVQ $1, AX @@ -75,20 +94,32 @@ TEXT ·FloatingPointWorks(SB),NOSPLIT,$0-8 NOTQ DI; \ NOTQ SP; -TEXT ·TwiddleRegsSyscall(SB),NOSPLIT,$0 +TEXT ·twiddleRegsSyscall(SB),NOSPLIT,$0 TWIDDLE_REGS() SYSCALL RET // never reached -TEXT ·TwiddleRegsFault(SB),NOSPLIT,$0 +// func AddrOfTwiddleRegsSyscall() uintptr +TEXT ·AddrOfTwiddleRegsSyscall(SB), $0-8 + MOVQ $·twiddleRegsSyscall(SB), AX + MOVQ AX, ret+0(FP) + RET + +TEXT ·twiddleRegsFault(SB),NOSPLIT,$0 TWIDDLE_REGS() JMP AX // must fault RET // never reached +// func AddrOfTwiddleRegsFault() uintptr +TEXT ·AddrOfTwiddleRegsFault(SB), $0-8 + MOVQ $·twiddleRegsFault(SB), AX + MOVQ AX, ret+0(FP) + RET + #define READ_FS() BYTE $0x64; BYTE $0x48; BYTE $0x8b; BYTE $0x00; #define READ_GS() BYTE $0x65; BYTE $0x48; BYTE $0x8b; BYTE $0x00; -TEXT ·TwiddleSegments(SB),NOSPLIT,$0 +TEXT ·twiddleSegments(SB),NOSPLIT,$0 MOVQ $0x0, AX READ_GS() MOVQ AX, BX @@ -96,3 +127,9 @@ TEXT ·TwiddleSegments(SB),NOSPLIT,$0 READ_FS() SYSCALL RET // never reached + +// func AddrOfTwiddleSegments() uintptr +TEXT ·AddrOfTwiddleSegments(SB), $0-8 + MOVQ $·twiddleSegments(SB), AX + MOVQ AX, ret+0(FP) + RET diff --git a/pkg/sentry/platform/kvm/testutil/testutil_arm64.go b/pkg/sentry/platform/kvm/testutil/testutil_arm64.go index c5235ca9d..6d0ba8252 100644 --- a/pkg/sentry/platform/kvm/testutil/testutil_arm64.go +++ b/pkg/sentry/platform/kvm/testutil/testutil_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package testutil diff --git a/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe.go index 4f7fe993a..07eda0ef3 100644 --- a/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe.go +++ b/pkg/sentry/platform/ptrace/ptrace_arm64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ptrace diff --git a/pkg/sentry/platform/ptrace/subprocess_amd64.go b/pkg/sentry/platform/ptrace/subprocess_amd64.go index 90b1ead56..13a55b784 100644 --- a/pkg/sentry/platform/ptrace/subprocess_amd64.go +++ b/pkg/sentry/platform/ptrace/subprocess_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package ptrace @@ -176,6 +177,7 @@ func patchSignalInfo(regs *arch.Registers, signalInfo *linux.SignalInfo) { // // This is safe to call in an afterFork context. // +//go:norace //go:nosplit func enableCpuidFault() { unix.RawSyscall6(unix.SYS_ARCH_PRCTL, linux.ARCH_SET_CPUID, 0, 0, 0, 0, 0) diff --git a/pkg/sentry/platform/ptrace/subprocess_arm64.go b/pkg/sentry/platform/ptrace/subprocess_arm64.go index e4257e3bf..8181db659 100644 --- a/pkg/sentry/platform/ptrace/subprocess_arm64.go +++ b/pkg/sentry/platform/ptrace/subprocess_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package ptrace diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go index 4f0260432..129ca52e2 100644 --- a/pkg/sentry/platform/ptrace/subprocess_linux.go +++ b/pkg/sentry/platform/ptrace/subprocess_linux.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package ptrace @@ -120,6 +121,17 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro return nil, err } + return forkStub(flags, instrs) +} + +// In the child, this function must not acquire any locks, because they might +// have been locked at the time of the fork. This means no rescheduling, no +// malloc calls, and no new stack segments. For the same reason compiler does +// not race instrument it. +// +// +//go:norace +func forkStub(flags uintptr, instrs []linux.BPFInstruction) (*thread, error) { // Declare all variables up front in order to ensure that there's no // need for allocations between beforeFork & afterFork. var ( @@ -181,7 +193,7 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro // Set an aggressive BPF filter for the stub and all it's children. See // the description of the BPF program built above. - if errno := seccomp.SetFilter(instrs); errno != 0 { + if errno := seccomp.SetFilterInChild(instrs); errno != 0 { unix.RawSyscall(unix.SYS_EXIT, uintptr(errno), 0, 0) } diff --git a/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go index 9c342c59b..f1e84059d 100644 --- a/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go +++ b/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux && (amd64 || arm64) // +build linux // +build amd64 arm64 @@ -26,6 +27,7 @@ import ( // unmaskAllSignals unmasks all signals on the current thread. // +//go:norace //go:nosplit func unmaskAllSignals() unix.Errno { var set linux.SignalSet diff --git a/pkg/sentry/platform/ptrace/subprocess_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_unsafe.go index 38b7b1a5e..ffd4665f4 100644 --- a/pkg/sentry/platform/ptrace/subprocess_unsafe.go +++ b/pkg/sentry/platform/ptrace/subprocess_unsafe.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build go1.12 -// +build !go1.18 +//go:build go1.12 && !go1.18 +// +build go1.12,!go1.18 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD index 2029e7cf4..b2fc84181 100644 --- a/pkg/sentry/socket/control/BUILD +++ b/pkg/sentry/socket/control/BUILD @@ -16,6 +16,7 @@ go_library( "//pkg/abi/linux", "//pkg/bits", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/marshal", "//pkg/marshal/primitive", @@ -25,7 +26,6 @@ go_library( "//pkg/sentry/socket", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", - "//pkg/syserror", ], ) diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index 235b9c306..00a5e729a 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bits" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" @@ -28,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/syserror" ) const maxInt = int(^uint(0) >> 1) @@ -70,7 +70,7 @@ func NewSCMRights(t *kernel.Task, fds []int32) (SCMRights, error) { file := t.GetFile(fd) if file == nil { files.Release(t) - return nil, syserror.EBADF + return nil, linuxerr.EBADF } files = append(files, file) } @@ -169,7 +169,7 @@ func NewSCMCredentials(t *kernel.Task, cred linux.ControlMessageCredentials) (SC return nil, err } if kernel.ThreadID(cred.PID) != t.ThreadGroup().ID() && !t.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.PIDNamespace().UserNamespace()) { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } return &scmCredentials{t, kuid, kgid}, nil } @@ -473,17 +473,17 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) for i := 0; i < len(buf); { if i+linux.SizeOfControlMessageHeader > len(buf) { - return cmsgs, syserror.EINVAL + return cmsgs, linuxerr.EINVAL } var h linux.ControlMessageHeader h.UnmarshalUnsafe(buf[i : i+linux.SizeOfControlMessageHeader]) if h.Length < uint64(linux.SizeOfControlMessageHeader) { - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } if h.Length > uint64(len(buf)-i) { - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } i += linux.SizeOfControlMessageHeader @@ -497,7 +497,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) numRights := rightsSize / linux.SizeOfControlMessageRight if len(fds)+numRights > linux.SCM_MAX_FD { - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } for j := i; j < i+rightsSize; j += linux.SizeOfControlMessageRight { @@ -508,7 +508,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) case linux.SCM_CREDENTIALS: if length < linux.SizeOfControlMessageCredentials { - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } var creds linux.ControlMessageCredentials @@ -522,7 +522,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) case linux.SO_TIMESTAMP: if length < linux.SizeOfTimeval { - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } var ts linux.Timeval ts.UnmarshalUnsafe(buf[i : i+linux.SizeOfTimeval]) @@ -532,13 +532,13 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) default: // Unknown message type. - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } case linux.SOL_IP: switch h.Type { case linux.IP_TOS: if length < linux.SizeOfControlMessageTOS { - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } cmsgs.IP.HasTOS = true var tos primitive.Uint8 @@ -548,7 +548,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) case linux.IP_PKTINFO: if length < linux.SizeOfControlMessageIPPacketInfo { - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } cmsgs.IP.HasIPPacketInfo = true @@ -561,7 +561,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) case linux.IP_RECVORIGDSTADDR: var addr linux.SockAddrInet if length < addr.SizeBytes() { - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } addr.UnmarshalUnsafe(buf[i : i+addr.SizeBytes()]) cmsgs.IP.OriginalDstAddress = &addr @@ -570,7 +570,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) case linux.IP_RECVERR: var errCmsg linux.SockErrCMsgIPv4 if length < errCmsg.SizeBytes() { - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } errCmsg.UnmarshalBytes(buf[i : i+errCmsg.SizeBytes()]) @@ -578,13 +578,13 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) i += bits.AlignUp(length, width) default: - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } case linux.SOL_IPV6: switch h.Type { case linux.IPV6_TCLASS: if length < linux.SizeOfControlMessageTClass { - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } cmsgs.IP.HasTClass = true var tclass primitive.Uint32 @@ -595,7 +595,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) case linux.IPV6_RECVORIGDSTADDR: var addr linux.SockAddrInet6 if length < addr.SizeBytes() { - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } addr.UnmarshalUnsafe(buf[i : i+addr.SizeBytes()]) cmsgs.IP.OriginalDstAddress = &addr @@ -604,7 +604,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) case linux.IPV6_RECVERR: var errCmsg linux.SockErrCMsgIPv6 if length < errCmsg.SizeBytes() { - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } errCmsg.UnmarshalBytes(buf[i : i+errCmsg.SizeBytes()]) @@ -612,10 +612,10 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint) i += bits.AlignUp(length, width) default: - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } default: - return socket.ControlMessages{}, syserror.EINVAL + return socket.ControlMessages{}, linuxerr.EINVAL } } diff --git a/pkg/sentry/socket/control/control_vfs2.go b/pkg/sentry/socket/control/control_vfs2.go index 37d02948f..0a989cbeb 100644 --- a/pkg/sentry/socket/control/control_vfs2.go +++ b/pkg/sentry/socket/control/control_vfs2.go @@ -17,10 +17,10 @@ package control import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // SCMRightsVFS2 represents a SCM_RIGHTS socket control message. @@ -51,7 +51,7 @@ func NewSCMRightsVFS2(t *kernel.Task, fds []int32) (SCMRightsVFS2, error) { file := t.GetFileVFS2(fd) if file == nil { files.Release(t) - return nil, syserror.EBADF + return nil, linuxerr.EBADF } files = append(files, file) } diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD index 3c6511ead..3950caa0f 100644 --- a/pkg/sentry/socket/hostinet/BUILD +++ b/pkg/sentry/socket/hostinet/BUILD @@ -18,6 +18,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fdnotifier", "//pkg/hostarch", "//pkg/log", diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index b9473da6c..38cb2c99c 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -20,6 +20,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" @@ -714,7 +715,7 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b } if ch != nil { if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { err = syserror.ErrWouldBlock } break diff --git a/pkg/sentry/socket/hostinet/socket_unsafe.go b/pkg/sentry/socket/hostinet/socket_unsafe.go index d3be2d825..ccf4f534d 100644 --- a/pkg/sentry/socket/hostinet/socket_unsafe.go +++ b/pkg/sentry/socket/hostinet/socket_unsafe.go @@ -20,12 +20,12 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -67,9 +67,25 @@ func ioctl(ctx context.Context, fd int, io usermem.IO, args arch.SyscallArgument AddressSpaceActive: true, }) return 0, err - + case unix.SIOCGIFFLAGS: + cc := &usermem.IOCopyContext{ + Ctx: ctx, + IO: io, + Opts: usermem.IOOpts{ + AddressSpaceActive: true, + }, + } + var ifr linux.IFReq + if _, err := ifr.CopyIn(cc, args[2].Pointer()); err != nil { + return 0, err + } + if _, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), cmd, uintptr(unsafe.Pointer(&ifr))); errno != 0 { + return 0, translateIOSyscallError(errno) + } + _, err := ifr.CopyOut(cc, args[2].Pointer()) + return 0, err default: - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } } diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go index 5d55cc64d..cd6e34ecc 100644 --- a/pkg/sentry/socket/hostinet/socket_vfs2.go +++ b/pkg/sentry/socket/hostinet/socket_vfs2.go @@ -18,6 +18,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" @@ -26,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -105,7 +105,7 @@ func (s *socketVFS2) Ioctl(ctx context.Context, uio usermem.IO, args arch.Syscal // PRead implements vfs.FileDescriptionImpl.PRead. func (s *socketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } // Read implements vfs.FileDescriptionImpl. @@ -113,7 +113,7 @@ func (s *socketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs. // All flags other than RWF_NOWAIT should be ignored. // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT. if opts.Flags != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } reader := hostfd.GetReadWriterAt(int32(s.fd), -1, opts.Flags) @@ -124,7 +124,7 @@ func (s *socketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs. // PWrite implements vfs.FileDescriptionImpl. func (s *socketVFS2) PWrite(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } // Write implements vfs.FileDescriptionImpl. @@ -132,7 +132,7 @@ func (s *socketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs // All flags other than RWF_NOWAIT should be ignored. // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT. if opts.Flags != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } writer := hostfd.GetReadWriterAt(int32(s.fd), -1, opts.Flags) diff --git a/pkg/sentry/socket/hostinet/sockopt_impl.go b/pkg/sentry/socket/hostinet/sockopt_impl.go index 8a783712e..2397e04e7 100644 --- a/pkg/sentry/socket/hostinet/sockopt_impl.go +++ b/pkg/sentry/socket/hostinet/sockopt_impl.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package hostinet import ( diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go index cbb1e905d..7a4e78a5f 100644 --- a/pkg/sentry/socket/hostinet/stack.go +++ b/pkg/sentry/socket/hostinet/stack.go @@ -29,11 +29,11 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/usermem" @@ -320,12 +320,12 @@ func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { // AddInterfaceAddr implements inet.Stack.AddInterfaceAddr. func (s *Stack) AddInterfaceAddr(int32, inet.InterfaceAddr) error { - return syserror.EACCES + return linuxerr.EACCES } // RemoveInterfaceAddr implements inet.Stack.RemoveInterfaceAddr. func (s *Stack) RemoveInterfaceAddr(int32, inet.InterfaceAddr) error { - return syserror.EACCES + return linuxerr.EACCES } // SupportsIPv6 implements inet.Stack.SupportsIPv6. @@ -340,7 +340,7 @@ func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) { // SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize. func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error { - return syserror.EACCES + return linuxerr.EACCES } // TCPSendBufferSize implements inet.Stack.TCPSendBufferSize. @@ -350,7 +350,7 @@ func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) { // SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize. func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error { - return syserror.EACCES + return linuxerr.EACCES } // TCPSACKEnabled implements inet.Stack.TCPSACKEnabled. @@ -360,7 +360,7 @@ func (s *Stack) TCPSACKEnabled() (bool, error) { // SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled. func (s *Stack) SetTCPSACKEnabled(bool) error { - return syserror.EACCES + return linuxerr.EACCES } // TCPRecovery implements inet.Stack.TCPRecovery. @@ -370,7 +370,7 @@ func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) { // SetTCPRecovery implements inet.Stack.SetTCPRecovery. func (s *Stack) SetTCPRecovery(inet.TCPLossRecovery) error { - return syserror.EACCES + return linuxerr.EACCES } // getLine reads one line from proc file, with specified prefix. @@ -483,7 +483,7 @@ func (s *Stack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {} // SetForwarding implements inet.Stack.SetForwarding. func (s *Stack) SetForwarding(tcpip.NetworkProtocolNumber, bool) error { - return syserror.EACCES + return linuxerr.EACCES } // PortRange implements inet.Stack.PortRange. @@ -494,5 +494,5 @@ func (*Stack) PortRange() (uint16, uint16) { // SetPortRange implements inet.Stack.SetPortRange. func (*Stack) SetPortRange(start uint16, end uint16) error { - return syserror.EACCES + return linuxerr.EACCES } diff --git a/pkg/sentry/socket/netfilter/ipv4.go b/pkg/sentry/socket/netfilter/ipv4.go index d8bd86292..af31cbc5b 100644 --- a/pkg/sentry/socket/netfilter/ipv4.go +++ b/pkg/sentry/socket/netfilter/ipv4.go @@ -81,6 +81,8 @@ func getEntries4(table stack.Table, tablename linux.TableName) (linux.KernelIPTG copy(entry.Entry.IP.SrcMask[:], rule.Filter.SrcMask) copy(entry.Entry.IP.OutputInterface[:], rule.Filter.OutputInterface) copy(entry.Entry.IP.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask) + copy(entry.Entry.IP.InputInterface[:], rule.Filter.InputInterface) + copy(entry.Entry.IP.InputInterfaceMask[:], rule.Filter.InputInterfaceMask) if rule.Filter.DstInvert { entry.Entry.IP.InverseFlags |= linux.IPT_INV_DSTIP } diff --git a/pkg/sentry/socket/netfilter/ipv6.go b/pkg/sentry/socket/netfilter/ipv6.go index c68230847..6cefe0b9c 100644 --- a/pkg/sentry/socket/netfilter/ipv6.go +++ b/pkg/sentry/socket/netfilter/ipv6.go @@ -81,6 +81,8 @@ func getEntries6(table stack.Table, tablename linux.TableName) (linux.KernelIP6T copy(entry.Entry.IPv6.SrcMask[:], rule.Filter.SrcMask) copy(entry.Entry.IPv6.OutputInterface[:], rule.Filter.OutputInterface) copy(entry.Entry.IPv6.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask) + copy(entry.Entry.IPv6.InputInterface[:], rule.Filter.InputInterface) + copy(entry.Entry.IPv6.InputInterfaceMask[:], rule.Filter.InputInterfaceMask) if rule.Filter.DstInvert { entry.Entry.IPv6.InverseFlags |= linux.IP6T_INV_DSTIP } diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 6b83698ad..ed85404da 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -17,6 +17,7 @@ go_library( "//pkg/abi/linux/errno", "//pkg/bits", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/marshal", "//pkg/marshal/primitive", diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index c9f784cf4..5c3ae26f8 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/abi/linux/errno" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" @@ -213,7 +214,7 @@ func (s *socketOpsCommon) ConnectedPasscred() bool { // Ioctl implements fs.FileOperations.Ioctl. func (*Socket) Ioctl(context.Context, *fs.File, usermem.IO, arch.SyscallArguments) (uintptr, error) { // TODO(b/68878065): no ioctls supported. - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } // ExtractSockAddr extracts the SockAddrNetlink from b. @@ -559,7 +560,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags } if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain } return 0, 0, nil, 0, socket.ControlMessages{}, syserr.FromError(err) diff --git a/pkg/sentry/socket/netlink/socket_vfs2.go b/pkg/sentry/socket/netlink/socket_vfs2.go index 842036764..4d3cdea62 100644 --- a/pkg/sentry/socket/netlink/socket_vfs2.go +++ b/pkg/sentry/socket/netlink/socket_vfs2.go @@ -17,6 +17,7 @@ package netlink import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket" @@ -24,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -108,12 +108,12 @@ func (s *SocketVFS2) EventUnregister(e *waiter.Entry) { // Ioctl implements vfs.FileDescriptionImpl. func (*SocketVFS2) Ioctl(context.Context, usermem.IO, arch.SyscallArguments) (uintptr, error) { // TODO(b/68878065): no ioctls supported. - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } // PRead implements vfs.FileDescriptionImpl. func (s *SocketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } // Read implements vfs.FileDescriptionImpl. @@ -121,7 +121,7 @@ func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs. // All flags other than RWF_NOWAIT should be ignored. // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT. if opts.Flags != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } if dst.NumBytes() == 0 { @@ -134,7 +134,7 @@ func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs. // PWrite implements vfs.FileDescriptionImpl. func (s *SocketVFS2) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } // Write implements vfs.FileDescriptionImpl. @@ -142,7 +142,7 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs // All flags other than RWF_NOWAIT should be ignored. // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT. if opts.Flags != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } n, err := s.sendMsg(ctx, src, nil, 0, socket.ControlMessages{}) diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index 96c425619..e828982eb 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -21,6 +21,7 @@ go_library( "//pkg/abi/linux", "//pkg/abi/linux/errno", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/marshal", diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 66d0fcb47..0f8cbe7e2 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -38,6 +38,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/abi/linux/errno" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/marshal" @@ -48,6 +49,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" @@ -1681,6 +1683,26 @@ func SetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, level int return nil } +func clampBufSize(newSz, min, max int64, ignoreMax bool) int64 { + // packetOverheadFactor is used to multiply the value provided by the user on + // a setsockopt(2) for setting the send/receive buffer sizes sockets. + const packetOverheadFactor = 2 + + if !ignoreMax && newSz > max { + newSz = max + } + + if newSz < math.MaxInt32/packetOverheadFactor { + newSz *= packetOverheadFactor + if newSz < min { + newSz = min + } + } else { + newSz = math.MaxInt32 + } + return newSz +} + // setSockOptSocket implements SetSockOpt when level is SOL_SOCKET. func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, optVal []byte) *syserr.Error { switch name { @@ -1690,7 +1712,9 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam } v := hostarch.ByteOrder.Uint32(optVal) - ep.SocketOptions().SetSendBufferSize(int64(v), true /* notify */) + min, max := ep.SocketOptions().SendBufferLimits() + clamped := clampBufSize(int64(v), min, max, false /* ignoreMax */) + ep.SocketOptions().SetSendBufferSize(clamped, true /* notify */) return nil case linux.SO_RCVBUF: @@ -1699,7 +1723,24 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam } v := hostarch.ByteOrder.Uint32(optVal) - ep.SocketOptions().SetReceiveBufferSize(int64(v), true /* notify */) + min, max := ep.SocketOptions().ReceiveBufferLimits() + clamped := clampBufSize(int64(v), min, max, false /* ignoreMax */) + ep.SocketOptions().SetReceiveBufferSize(clamped, true /* notify */) + return nil + + case linux.SO_RCVBUFFORCE: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + + if creds := auth.CredentialsFromContext(t); !creds.HasCapability(linux.CAP_NET_ADMIN) { + return syserr.ErrNotPermitted + } + + v := hostarch.ByteOrder.Uint32(optVal) + min, max := ep.SocketOptions().ReceiveBufferLimits() + clamped := clampBufSize(int64(v), min, max, true /* ignoreMax */) + ep.SocketOptions().SetReceiveBufferSize(clamped, true /* notify */) return nil case linux.SO_REUSEADDR: @@ -2809,7 +2850,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags if n > 0 { return n, msgFlags, senderAddr, senderAddrLen, controlMessages, nil } - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain } return 0, 0, nil, 0, socket.ControlMessages{}, syserr.FromError(err) @@ -2877,7 +2918,7 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b // became available between when we last checked and when we setup // the notification. if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { return int(total), syserr.ErrTryAgain } // handleIOError will consume errors from t.Block if needed. @@ -3015,7 +3056,7 @@ func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.Sysc unimpl.EmitUnimplementedEvent(ctx) } - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } // interfaceIoctl implements interface requests. diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index 30f3ad153..edc160b1b 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -17,6 +17,7 @@ package netstack import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" @@ -104,7 +105,7 @@ func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs. // All flags other than RWF_NOWAIT should be ignored. // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT. if opts.Flags != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } if dst.NumBytes() == 0 { @@ -125,7 +126,7 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs // All flags other than RWF_NOWAIT should be ignored. // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT. if opts.Flags != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } r := src.Reader(ctx) diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index eef5e6519..0fd0ad32c 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -18,10 +18,10 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" @@ -110,24 +110,24 @@ func convertAddr(addr inet.InterfaceAddr) (tcpip.ProtocolAddress, error) { switch addr.Family { case linux.AF_INET: if len(addr.Addr) != header.IPv4AddressSize { - return protocolAddress, syserror.EINVAL + return protocolAddress, linuxerr.EINVAL } if addr.PrefixLen > header.IPv4AddressSize*8 { - return protocolAddress, syserror.EINVAL + return protocolAddress, linuxerr.EINVAL } protocol = ipv4.ProtocolNumber address = tcpip.Address(addr.Addr) case linux.AF_INET6: if len(addr.Addr) != header.IPv6AddressSize { - return protocolAddress, syserror.EINVAL + return protocolAddress, linuxerr.EINVAL } if addr.PrefixLen > header.IPv6AddressSize*8 { - return protocolAddress, syserror.EINVAL + return protocolAddress, linuxerr.EINVAL } protocol = ipv6.ProtocolNumber address = tcpip.Address(addr.Addr) default: - return protocolAddress, syserror.ENOTSUP + return protocolAddress, linuxerr.ENOTSUP } protocolAddress = tcpip.ProtocolAddress{ diff --git a/pkg/sentry/socket/netstack/tun.go b/pkg/sentry/socket/netstack/tun.go index c7ed52702..e67fe9700 100644 --- a/pkg/sentry/socket/netstack/tun.go +++ b/pkg/sentry/socket/netstack/tun.go @@ -16,7 +16,7 @@ package netstack import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/tcpip/link/tun" ) @@ -41,7 +41,7 @@ func LinuxToTUNFlags(flags uint16) (tun.Flags, error) { // when there is no sk_filter. See __tun_chr_ioctl() in // net/drivers/tun.c. if flags&^uint16(linux.IFF_TUN|linux.IFF_TAP|linux.IFF_NO_PI|linux.IFF_ONE_QUEUE) != 0 { - return tun.Flags{}, syserror.EINVAL + return tun.Flags{}, linuxerr.EINVAL } return tun.Flags{ TUN: flags&linux.IFF_TUN != 0, diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go index f5da3c509..658e90bb9 100644 --- a/pkg/sentry/socket/socket.go +++ b/pkg/sentry/socket/socket.go @@ -509,7 +509,6 @@ func SetSockOptEmitUnimplementedEvent(t *kernel.Task, name int) { linux.SO_ATTACH_REUSEPORT_EBPF, linux.SO_CNX_ADVICE, linux.SO_DETACH_FILTER, - linux.SO_RCVBUFFORCE, linux.SO_SNDBUFFORCE: t.Kernel().EmitUnimplementedEvent(t) diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index c9cbefb3a..5c3cdef6a 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -39,6 +39,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/hostarch", "//pkg/log", diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index db7b1affe..8ccdadae9 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -23,6 +23,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" @@ -518,7 +519,7 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b } if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { err = syserror.ErrWouldBlock } break @@ -719,7 +720,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags if total > 0 { err = nil } - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { return int(total), msgFlags, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain } return int(total), msgFlags, nil, 0, socket.ControlMessages{}, syserr.FromError(err) diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index c39e317ff..8c5075a1c 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -17,6 +17,7 @@ package unix import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" @@ -29,7 +30,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -236,7 +236,7 @@ func (s *SocketVFS2) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { Mode: linux.FileMode(linux.S_IFSOCK | uint(stat.Mode)&^t.FSContext().Umask()), Endpoint: bep, }) - if err == syserror.EEXIST { + if linuxerr.Equals(linuxerr.EEXIST, err) { return syserr.ErrAddressInUse } return syserr.FromError(err) @@ -253,7 +253,7 @@ func (s *SocketVFS2) Ioctl(ctx context.Context, uio usermem.IO, args arch.Syscal // PRead implements vfs.FileDescriptionImpl. func (s *SocketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } // Read implements vfs.FileDescriptionImpl. @@ -261,7 +261,7 @@ func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs. // All flags other than RWF_NOWAIT should be ignored. // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT. if opts.Flags != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } if dst.NumBytes() == 0 { @@ -282,7 +282,7 @@ func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs. // PWrite implements vfs.FileDescriptionImpl. func (s *SocketVFS2) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } // Write implements vfs.FileDescriptionImpl. @@ -290,7 +290,7 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs // All flags other than RWF_NOWAIT should be ignored. // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT. if opts.Flags != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } t := kernel.TaskFromContext(ctx) diff --git a/pkg/sentry/state/BUILD b/pkg/sentry/state/BUILD index 3e801182c..7f02807c5 100644 --- a/pkg/sentry/state/BUILD +++ b/pkg/sentry/state/BUILD @@ -13,6 +13,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/log", "//pkg/sentry/inet", "//pkg/sentry/kernel", @@ -20,7 +21,6 @@ go_library( "//pkg/sentry/vfs", "//pkg/sentry/watchdog", "//pkg/state/statefile", - "//pkg/syserror", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/state/state.go b/pkg/sentry/state/state.go index 2f0aba4e2..e9d544f3d 100644 --- a/pkg/sentry/state/state.go +++ b/pkg/sentry/state/state.go @@ -20,6 +20,7 @@ import ( "io" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -27,7 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sentry/watchdog" "gvisor.dev/gvisor/pkg/state/statefile" - "gvisor.dev/gvisor/pkg/syserror" ) var previousMetadata map[string]string @@ -88,7 +88,7 @@ func (opts SaveOpts) Save(ctx context.Context, k *kernel.Kernel, w *watchdog.Wat // ENOSPC is a state file error. This error can only come from // writing the state file, and not from fs.FileOperations.Fsync // because we wrap those in kernel.TaskSet.flushWritesToFiles. - if err == syserror.ENOSPC { + if linuxerr.Equals(linuxerr.ENOSPC, err) { err = ErrStateFile{err} } diff --git a/pkg/sentry/state/state_metadata.go b/pkg/sentry/state/state_metadata.go index cefd20b9b..c42297c80 100644 --- a/pkg/sentry/state/state_metadata.go +++ b/pkg/sentry/state/state_metadata.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package state import ( diff --git a/pkg/sentry/strace/linux64_amd64.go b/pkg/sentry/strace/linux64_amd64.go index 6ce1bb592..317c3c31c 100644 --- a/pkg/sentry/strace/linux64_amd64.go +++ b/pkg/sentry/strace/linux64_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package strace diff --git a/pkg/sentry/strace/linux64_arm64.go b/pkg/sentry/strace/linux64_arm64.go index ce5594301..65f27c810 100644 --- a/pkg/sentry/strace/linux64_arm64.go +++ b/pkg/sentry/strace/linux64_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package strace diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go index af7088847..757ff2a40 100644 --- a/pkg/sentry/strace/strace.go +++ b/pkg/sentry/strace/strace.go @@ -133,6 +133,9 @@ func dump(t *kernel.Task, addr hostarch.Addr, size uint, maximumBlobSize uint) s } func path(t *kernel.Task, addr hostarch.Addr) string { + if addr == 0 { + return "<null>" + } path, err := t.CopyInString(addr, linux.PATH_MAX) if err != nil { return fmt.Sprintf("%#x (error decoding path: %s)", addr, err) @@ -816,10 +819,10 @@ func convertToSyscallFlag(sinks SinkType) uint32 { return ret } -// Enable enables the syscalls in whitelist in all syscall tables. +// Enable enables the syscalls in allowlist in all syscall tables. // // Preconditions: Initialize has been called. -func Enable(whitelist []string, sinks SinkType) error { +func Enable(allowlist []string, sinks SinkType) error { flags := convertToSyscallFlag(sinks) for _, table := range kernel.SyscallTables() { // Is this known? @@ -829,7 +832,7 @@ func Enable(whitelist []string, sinks SinkType) error { } // Convert to a set of system calls numbers. - wl, err := sys.ConvertToSysnoMap(whitelist) + wl, err := sys.ConvertToSysnoMap(allowlist) if err != nil { return err } diff --git a/pkg/sentry/syscalls/BUILD b/pkg/sentry/syscalls/BUILD index b8d1bd415..f2c55588f 100644 --- a/pkg/sentry/syscalls/BUILD +++ b/pkg/sentry/syscalls/BUILD @@ -11,6 +11,7 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", + "//pkg/errors/linuxerr", "//pkg/sentry/arch", "//pkg/sentry/kernel", "//pkg/sentry/kernel/epoll", diff --git a/pkg/sentry/syscalls/epoll.go b/pkg/sentry/syscalls/epoll.go index 3b4d79889..a69ed0746 100644 --- a/pkg/sentry/syscalls/epoll.go +++ b/pkg/sentry/syscalls/epoll.go @@ -18,10 +18,10 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/epoll" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -45,21 +45,21 @@ func AddEpoll(t *kernel.Task, epfd int32, fd int32, flags epoll.EntryFlags, mask // Get epoll from the file descriptor. epollfile := t.GetFile(epfd) if epollfile == nil { - return syserror.EBADF + return linuxerr.EBADF } defer epollfile.DecRef(t) // Get the target file id. file := t.GetFile(fd) if file == nil { - return syserror.EBADF + return linuxerr.EBADF } defer file.DecRef(t) // Extract the epollPoll operations. e, ok := epollfile.FileOperations.(*epoll.EventPoll) if !ok { - return syserror.EBADF + return linuxerr.EBADF } // Try to add the entry. @@ -71,21 +71,21 @@ func UpdateEpoll(t *kernel.Task, epfd int32, fd int32, flags epoll.EntryFlags, m // Get epoll from the file descriptor. epollfile := t.GetFile(epfd) if epollfile == nil { - return syserror.EBADF + return linuxerr.EBADF } defer epollfile.DecRef(t) // Get the target file id. file := t.GetFile(fd) if file == nil { - return syserror.EBADF + return linuxerr.EBADF } defer file.DecRef(t) // Extract the epollPoll operations. e, ok := epollfile.FileOperations.(*epoll.EventPoll) if !ok { - return syserror.EBADF + return linuxerr.EBADF } // Try to update the entry. @@ -97,21 +97,21 @@ func RemoveEpoll(t *kernel.Task, epfd int32, fd int32) error { // Get epoll from the file descriptor. epollfile := t.GetFile(epfd) if epollfile == nil { - return syserror.EBADF + return linuxerr.EBADF } defer epollfile.DecRef(t) // Get the target file id. file := t.GetFile(fd) if file == nil { - return syserror.EBADF + return linuxerr.EBADF } defer file.DecRef(t) // Extract the epollPoll operations. e, ok := epollfile.FileOperations.(*epoll.EventPoll) if !ok { - return syserror.EBADF + return linuxerr.EBADF } // Try to remove the entry. @@ -123,14 +123,14 @@ func WaitEpoll(t *kernel.Task, fd int32, max int, timeoutInNanos int64) ([]linux // Get epoll from the file descriptor. epollfile := t.GetFile(fd) if epollfile == nil { - return nil, syserror.EBADF + return nil, linuxerr.EBADF } defer epollfile.DecRef(t) // Extract the epollPoll operations. e, ok := epollfile.FileOperations.(*epoll.EventPoll) if !ok { - return nil, syserror.EBADF + return nil, linuxerr.EBADF } // Try to read events and return right away if we got them or if the @@ -163,7 +163,7 @@ func WaitEpoll(t *kernel.Task, fd int32, max int, timeoutInNanos int64) ([]linux } if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { return nil, nil } diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 408a6c422..a2f612f45 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -64,6 +64,7 @@ go_library( "//pkg/abi/linux", "//pkg/bpf", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/marshal", diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go index 6eabfd219..76389fbe3 100644 --- a/pkg/sentry/syscalls/linux/error.go +++ b/pkg/sentry/syscalls/linux/error.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -94,13 +95,13 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, errOrig, intr er if errno, ok := syserror.TranslateError(errOrig); ok { translatedErr = errno } - switch translatedErr { - case io.EOF: + switch { + case translatedErr == io.EOF: // EOF is always consumed. If this is a partial read/write // (result != 0), the application will see that, otherwise // they will see 0. return true, nil - case syserror.EFBIG: + case linuxerr.Equals(linuxerr.EFBIG, translatedErr): t := kernel.TaskFromContext(ctx) if t == nil { panic("I/O error should only occur from a context associated with a Task") @@ -112,8 +113,8 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, errOrig, intr er // Do not consume the error and return it as EFBIG. // Simultaneously send a SIGXFSZ per setrlimit(2). t.SendSignal(kernel.SignalInfoNoInfo(linux.SIGXFSZ, t, t)) - return true, syserror.EFBIG - case syserror.EINTR: + return true, linuxerr.EFBIG + case linuxerr.Equals(linuxerr.EINTR, translatedErr): // The syscall was interrupted. Return nil if it completed // partially, otherwise return the error code that the syscall // needs (to indicate to the kernel what it should do). @@ -128,21 +129,21 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, errOrig, intr er return true, errOrig } - switch translatedErr { - case syserror.EINTR: + switch { + case linuxerr.Equals(linuxerr.EINTR, translatedErr): // Syscall interrupted, but completed a partial // read/write. Like ErrWouldBlock, since we have a // partial read/write, we consume the error and return // the partial result. return true, nil - case syserror.EFAULT: + case linuxerr.Equals(linuxerr.EFAULT, translatedErr): // EFAULT is only shown the user if nothing was // read/written. If we read something (this case), they see // a partial read/write. They will then presumably try again // with an incremented buffer, which will EFAULT with // result == 0. return true, nil - case syserror.EPIPE: + case linuxerr.Equals(linuxerr.EPIPE, translatedErr): // Writes to a pipe or socket will return EPIPE if the other // side is gone. The partial write is returned. EPIPE will be // returned on the next call. @@ -150,15 +151,17 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, errOrig, intr er // TODO(gvisor.dev/issue/161): In some cases SIGPIPE should // also be sent to the application. return true, nil - case syserror.ENOSPC: + case linuxerr.Equals(linuxerr.ENOSPC, translatedErr): // Similar to EPIPE. Return what we wrote this time, and let // ENOSPC be returned on the next call. return true, nil - case syserror.ECONNRESET, syserror.ETIMEDOUT: + case linuxerr.Equals(linuxerr.ECONNRESET, translatedErr): + fallthrough + case linuxerr.Equals(linuxerr.ETIMEDOUT, translatedErr): // For TCP sendfile connections, we may have a reset or timeout. But we // should just return n as the result. return true, nil - case syserror.EWOULDBLOCK: + case linuxerr.Equals(linuxerr.EWOULDBLOCK, translatedErr): // Syscall would block, but completed a partial read/write. // This case should only be returned by IssueIO for nonblocking // files. Since we have a partial read/write, we consume diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index 090c5ffcb..f1cb5a2c8 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -18,6 +18,7 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -187,7 +188,7 @@ var AMD64 = &kernel.SyscallTable{ 132: syscalls.Supported("utime", Utime), 133: syscalls.PartiallySupported("mknod", Mknod, "Device creation is not generally supported. Only regular file and FIFO creation are supported.", nil), 134: syscalls.Error("uselib", syserror.ENOSYS, "Obsolete", nil), - 135: syscalls.ErrorWithEvent("personality", syserror.EINVAL, "Unable to change personality.", nil), + 135: syscalls.ErrorWithEvent("personality", linuxerr.EINVAL, "Unable to change personality.", nil), 136: syscalls.ErrorWithEvent("ustat", syserror.ENOSYS, "Needs filesystem support.", nil), 137: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil), 138: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil), @@ -200,15 +201,15 @@ var AMD64 = &kernel.SyscallTable{ 145: syscalls.PartiallySupported("sched_getscheduler", SchedGetscheduler, "Stub implementation.", nil), 146: syscalls.PartiallySupported("sched_get_priority_max", SchedGetPriorityMax, "Stub implementation.", nil), 147: syscalls.PartiallySupported("sched_get_priority_min", SchedGetPriorityMin, "Stub implementation.", nil), - 148: syscalls.ErrorWithEvent("sched_rr_get_interval", syserror.EPERM, "", nil), + 148: syscalls.ErrorWithEvent("sched_rr_get_interval", linuxerr.EPERM, "", nil), 149: syscalls.PartiallySupported("mlock", Mlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), 150: syscalls.PartiallySupported("munlock", Munlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), 151: syscalls.PartiallySupported("mlockall", Mlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), 152: syscalls.PartiallySupported("munlockall", Munlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), 153: syscalls.CapError("vhangup", linux.CAP_SYS_TTY_CONFIG, "", nil), - 154: syscalls.Error("modify_ldt", syserror.EPERM, "", nil), - 155: syscalls.Error("pivot_root", syserror.EPERM, "", nil), - 156: syscalls.Error("sysctl", syserror.EPERM, "Deprecated. Use /proc/sys instead.", nil), + 154: syscalls.Error("modify_ldt", linuxerr.EPERM, "", nil), + 155: syscalls.Error("pivot_root", linuxerr.EPERM, "", nil), + 156: syscalls.Error("sysctl", linuxerr.EPERM, "Deprecated. Use /proc/sys instead.", nil), 157: syscalls.PartiallySupported("prctl", Prctl, "Not all options are supported.", nil), 158: syscalls.PartiallySupported("arch_prctl", ArchPrctl, "Options ARCH_GET_GS, ARCH_SET_GS not supported.", nil), 159: syscalls.CapError("adjtimex", linux.CAP_SYS_TIME, "", nil), @@ -300,9 +301,9 @@ var AMD64 = &kernel.SyscallTable{ 245: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) 246: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil), 247: syscalls.Supported("waitid", Waitid), - 248: syscalls.Error("add_key", syserror.EACCES, "Not available to user.", nil), - 249: syscalls.Error("request_key", syserror.EACCES, "Not available to user.", nil), - 250: syscalls.Error("keyctl", syserror.EACCES, "Not available to user.", nil), + 248: syscalls.Error("add_key", linuxerr.EACCES, "Not available to user.", nil), + 249: syscalls.Error("request_key", linuxerr.EACCES, "Not available to user.", nil), + 250: syscalls.Error("keyctl", linuxerr.EACCES, "Not available to user.", nil), 251: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) 252: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) 253: syscalls.PartiallySupported("inotify_init", InotifyInit, "Inotify events are only available inside the sandbox. Hard links are treated as different watch targets in gofer fs.", nil), @@ -350,17 +351,17 @@ var AMD64 = &kernel.SyscallTable{ 295: syscalls.Supported("preadv", Preadv), 296: syscalls.Supported("pwritev", Pwritev), 297: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo), - 298: syscalls.ErrorWithEvent("perf_event_open", syserror.ENODEV, "No support for perf counters", nil), + 298: syscalls.ErrorWithEvent("perf_event_open", linuxerr.ENODEV, "No support for perf counters", nil), 299: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil), 300: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), 301: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), 302: syscalls.Supported("prlimit64", Prlimit64), - 303: syscalls.Error("name_to_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), - 304: syscalls.Error("open_by_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), + 303: syscalls.Error("name_to_handle_at", linuxerr.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), + 304: syscalls.Error("open_by_handle_at", linuxerr.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), 305: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil), 306: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil), 307: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil), - 308: syscalls.ErrorWithEvent("setns", syserror.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995) + 308: syscalls.ErrorWithEvent("setns", linuxerr.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995) 309: syscalls.Supported("getcpu", Getcpu), 310: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), 311: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), @@ -470,7 +471,7 @@ var ARM64 = &kernel.SyscallTable{ 38: syscalls.Supported("renameat", Renameat), 39: syscalls.PartiallySupported("umount2", Umount2, "Not all options or file systems are supported.", nil), 40: syscalls.PartiallySupported("mount", Mount, "Not all options or file systems are supported.", nil), - 41: syscalls.Error("pivot_root", syserror.EPERM, "", nil), + 41: syscalls.Error("pivot_root", linuxerr.EPERM, "", nil), 42: syscalls.Error("nfsservctl", syserror.ENOSYS, "Removed after Linux 3.1.", nil), 43: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil), 44: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil), @@ -521,7 +522,7 @@ var ARM64 = &kernel.SyscallTable{ 89: syscalls.CapError("acct", linux.CAP_SYS_PACCT, "", nil), 90: syscalls.Supported("capget", Capget), 91: syscalls.Supported("capset", Capset), - 92: syscalls.ErrorWithEvent("personality", syserror.EINVAL, "Unable to change personality.", nil), + 92: syscalls.ErrorWithEvent("personality", linuxerr.EINVAL, "Unable to change personality.", nil), 93: syscalls.Supported("exit", Exit), 94: syscalls.Supported("exit_group", ExitGroup), 95: syscalls.Supported("waitid", Waitid), @@ -556,7 +557,7 @@ var ARM64 = &kernel.SyscallTable{ 124: syscalls.Supported("sched_yield", SchedYield), 125: syscalls.PartiallySupported("sched_get_priority_max", SchedGetPriorityMax, "Stub implementation.", nil), 126: syscalls.PartiallySupported("sched_get_priority_min", SchedGetPriorityMin, "Stub implementation.", nil), - 127: syscalls.ErrorWithEvent("sched_rr_get_interval", syserror.EPERM, "", nil), + 127: syscalls.ErrorWithEvent("sched_rr_get_interval", linuxerr.EPERM, "", nil), 128: syscalls.Supported("restart_syscall", RestartSyscall), 129: syscalls.Supported("kill", Kill), 130: syscalls.Supported("tkill", Tkill), @@ -646,9 +647,9 @@ var ARM64 = &kernel.SyscallTable{ 214: syscalls.Supported("brk", Brk), 215: syscalls.Supported("munmap", Munmap), 216: syscalls.Supported("mremap", Mremap), - 217: syscalls.Error("add_key", syserror.EACCES, "Not available to user.", nil), - 218: syscalls.Error("request_key", syserror.EACCES, "Not available to user.", nil), - 219: syscalls.Error("keyctl", syserror.EACCES, "Not available to user.", nil), + 217: syscalls.Error("add_key", linuxerr.EACCES, "Not available to user.", nil), + 218: syscalls.Error("request_key", linuxerr.EACCES, "Not available to user.", nil), + 219: syscalls.Error("keyctl", linuxerr.EACCES, "Not available to user.", nil), 220: syscalls.PartiallySupported("clone", Clone, "Mount namespace (CLONE_NEWNS) not supported. Options CLONE_PARENT, CLONE_SYSVSEM not supported.", nil), 221: syscalls.Supported("execve", Execve), 222: syscalls.PartiallySupported("mmap", Mmap, "Generally supported with exceptions. Options MAP_FIXED_NOREPLACE, MAP_SHARED_VALIDATE, MAP_SYNC MAP_GROWSDOWN, MAP_HUGETLB are not supported.", nil), @@ -670,18 +671,18 @@ var ARM64 = &kernel.SyscallTable{ 238: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil), 239: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly) 240: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo), - 241: syscalls.ErrorWithEvent("perf_event_open", syserror.ENODEV, "No support for perf counters", nil), + 241: syscalls.ErrorWithEvent("perf_event_open", linuxerr.ENODEV, "No support for perf counters", nil), 242: syscalls.Supported("accept4", Accept4), 243: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil), 260: syscalls.Supported("wait4", Wait4), 261: syscalls.Supported("prlimit64", Prlimit64), 262: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), 263: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), - 264: syscalls.Error("name_to_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), - 265: syscalls.Error("open_by_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), + 264: syscalls.Error("name_to_handle_at", linuxerr.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), + 265: syscalls.Error("open_by_handle_at", linuxerr.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), 266: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil), 267: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil), - 268: syscalls.ErrorWithEvent("setns", syserror.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995) + 268: syscalls.ErrorWithEvent("setns", linuxerr.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995) 269: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil), 270: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), 271: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), diff --git a/pkg/sentry/syscalls/linux/sigset.go b/pkg/sentry/syscalls/linux/sigset.go index e8c2d8f9e..9dea78085 100644 --- a/pkg/sentry/syscalls/linux/sigset.go +++ b/pkg/sentry/syscalls/linux/sigset.go @@ -16,6 +16,7 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" @@ -29,7 +30,7 @@ import ( // syscalls are moved into this package, then they can be unexported. func CopyInSigSet(t *kernel.Task, sigSetAddr hostarch.Addr, size uint) (linux.SignalSet, error) { if size != linux.SignalSetSize { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } b := t.CopyScratchBuffer(8) if _, err := t.CopyInBytes(sigSetAddr, b); err != nil { diff --git a/pkg/sentry/syscalls/linux/sys_aio.go b/pkg/sentry/syscalls/linux/sys_aio.go index 70e8569a8..4ce3430e2 100644 --- a/pkg/sentry/syscalls/linux/sys_aio.go +++ b/pkg/sentry/syscalls/linux/sys_aio.go @@ -17,6 +17,7 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -42,7 +43,7 @@ func IoSetup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca return 0, nil, err } if idIn != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } id, err := t.MemoryManager().NewAIOContext(t, uint32(nrEvents)) @@ -66,7 +67,7 @@ func IoDestroy(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys ctx := t.MemoryManager().DestroyAIOContext(t, id) if ctx == nil { // Does not exist. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Drain completed requests amd wait for pending requests until there are no @@ -97,12 +98,12 @@ func IoGetevents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // Sanity check arguments. if minEvents < 0 || minEvents > events { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } ctx, ok := t.MemoryManager().LookupAIOContext(t, id) if !ok { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Setup the timeout. @@ -114,7 +115,7 @@ func IoGetevents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S return 0, nil, err } if !d.Valid() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } deadline = t.Kernel().MonotonicClock().Now().Add(d.ToDuration()) haveDeadline = true @@ -134,7 +135,7 @@ func IoGetevents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S var err error v, err = waitForRequest(ctx, t, haveDeadline, deadline) if err != nil { - if count > 0 || err == syserror.ETIMEDOUT { + if count > 0 || linuxerr.Equals(linuxerr.ETIMEDOUT, err) { return uintptr(count), nil, nil } return 0, nil, syserror.ConvertIntr(err, syserror.EINTR) @@ -171,7 +172,7 @@ func waitForRequest(ctx *mm.AIOContext, t *kernel.Task, haveDeadline bool, deadl done := ctx.WaitChannel() if done == nil { // Context has been destroyed. - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } if err := t.BlockWithDeadline(done, haveDeadline, deadline); err != nil { return nil, err @@ -184,7 +185,7 @@ func memoryFor(t *kernel.Task, cb *linux.IOCallback) (usermem.IOSequence, error) bytes := int(cb.Bytes) if bytes < 0 { // Linux also requires that this field fit in ssize_t. - return usermem.IOSequence{}, syserror.EINVAL + return usermem.IOSequence{}, linuxerr.EINVAL } // Since this I/O will be asynchronous with respect to t's task goroutine, @@ -206,7 +207,7 @@ func memoryFor(t *kernel.Task, cb *linux.IOCallback) (usermem.IOSequence, error) default: // Not a supported command. - return usermem.IOSequence{}, syserror.EINVAL + return usermem.IOSequence{}, linuxerr.EINVAL } } @@ -269,7 +270,7 @@ func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr host file := t.GetFile(cb.FD) if file == nil { // File not found. - return syserror.EBADF + return linuxerr.EBADF } defer file.DecRef(t) @@ -279,14 +280,14 @@ func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr host eventFile = t.GetFile(cb.ResFD) if eventFile == nil { // Bad FD. - return syserror.EBADF + return linuxerr.EBADF } defer eventFile.DecRef(t) // Check that it is an eventfd. if _, ok := eventFile.FileOperations.(*eventfd.EventOperations); !ok { // Not an event FD. - return syserror.EINVAL + return linuxerr.EINVAL } } @@ -299,14 +300,14 @@ func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr host switch cb.OpCode { case linux.IOCB_CMD_PREAD, linux.IOCB_CMD_PREADV, linux.IOCB_CMD_PWRITE, linux.IOCB_CMD_PWRITEV: if cb.Offset < 0 { - return syserror.EINVAL + return linuxerr.EINVAL } } // Prepare the request. ctx, ok := t.MemoryManager().LookupAIOContext(t, id) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } if err := ctx.Prepare(); err != nil { return err @@ -335,7 +336,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc addr := args[2].Pointer() if nrEvents < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } for i := int32(0); i < nrEvents; i++ { diff --git a/pkg/sentry/syscalls/linux/sys_capability.go b/pkg/sentry/syscalls/linux/sys_capability.go index d3b85e11b..1e714503c 100644 --- a/pkg/sentry/syscalls/linux/sys_capability.go +++ b/pkg/sentry/syscalls/linux/sys_capability.go @@ -16,22 +16,22 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" ) func lookupCaps(t *kernel.Task, tid kernel.ThreadID) (permitted, inheritable, effective auth.CapabilitySet, err error) { if tid < 0 { - err = syserror.EINVAL + err = linuxerr.EINVAL return } if tid > 0 { t = t.PIDNamespace().TaskWithID(tid) } if t == nil { - err = syserror.ESRCH + err = linuxerr.ESRCH return } creds := t.Credentials() @@ -97,7 +97,7 @@ func Capget(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, err } if dataAddr != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 0, nil, nil } @@ -115,7 +115,7 @@ func Capset(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal switch hdr.Version { case linux.LINUX_CAPABILITY_VERSION_1: if tid := kernel.ThreadID(hdr.Pid); tid != 0 && tid != t.ThreadID() { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } var data linux.CapUserData if _, err := data.CopyIn(t, dataAddr); err != nil { @@ -128,7 +128,7 @@ func Capset(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal case linux.LINUX_CAPABILITY_VERSION_2, linux.LINUX_CAPABILITY_VERSION_3: if tid := kernel.ThreadID(hdr.Pid); tid != 0 && tid != t.ThreadID() { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } var data [2]linux.CapUserData if _, err := linux.CopyCapUserDataSliceIn(t, dataAddr, data[:]); err != nil { @@ -144,6 +144,6 @@ func Capset(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if _, err := hdr.CopyOut(t, hdrAddr); err != nil { return 0, nil, err } - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } diff --git a/pkg/sentry/syscalls/linux/sys_clone_amd64.go b/pkg/sentry/syscalls/linux/sys_clone_amd64.go index dd43cf18d..2b2dbd9f9 100644 --- a/pkg/sentry/syscalls/linux/sys_clone_amd64.go +++ b/pkg/sentry/syscalls/linux/sys_clone_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/sentry/syscalls/linux/sys_clone_arm64.go b/pkg/sentry/syscalls/linux/sys_clone_arm64.go index cf68a8949..877c86e6a 100644 --- a/pkg/sentry/syscalls/linux/sys_clone_arm64.go +++ b/pkg/sentry/syscalls/linux/sys_clone_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package linux diff --git a/pkg/sentry/syscalls/linux/sys_epoll.go b/pkg/sentry/syscalls/linux/sys_epoll.go index 69cbc98d0..daa151bb4 100644 --- a/pkg/sentry/syscalls/linux/sys_epoll.go +++ b/pkg/sentry/syscalls/linux/sys_epoll.go @@ -16,6 +16,7 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -31,7 +32,7 @@ import ( func EpollCreate1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { flags := args[0].Int() if flags & ^linux.EPOLL_CLOEXEC != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } closeOnExec := flags&linux.EPOLL_CLOEXEC != 0 @@ -48,7 +49,7 @@ func EpollCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S size := args[0].Int() if size <= 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } fd, err := syscalls.CreateEpoll(t, false) @@ -101,7 +102,7 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc mask |= waiter.EventHUp | waiter.EventErr return 0, nil, syscalls.UpdateEpoll(t, epfd, fd, flags, mask, data) default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } diff --git a/pkg/sentry/syscalls/linux/sys_eventfd.go b/pkg/sentry/syscalls/linux/sys_eventfd.go index 3b4f879e4..7ba9a755e 100644 --- a/pkg/sentry/syscalls/linux/sys_eventfd.go +++ b/pkg/sentry/syscalls/linux/sys_eventfd.go @@ -16,11 +16,11 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd" - "gvisor.dev/gvisor/pkg/syserror" ) // Eventfd2 implements linux syscall eventfd2(2). @@ -30,7 +30,7 @@ func Eventfd2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc allOps := uint(linux.EFD_SEMAPHORE | linux.EFD_NONBLOCK | linux.EFD_CLOEXEC) if flags & ^allOps != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } event := eventfd.New(t, uint64(initVal), flags&linux.EFD_SEMAPHORE != 0) diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 90a719ba2..3528d325f 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -18,6 +18,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -79,12 +80,12 @@ func fileOpOn(t *kernel.Task, dirFD int32, path string, resolve bool, fn func(ro // Need to extract the given FD. f = t.GetFile(dirFD) if f == nil { - return syserror.EBADF + return linuxerr.EBADF } rel = f.Dirent if !fs.IsDir(rel.Inode.StableAttr) { f.DecRef(t) - return syserror.ENOTDIR + return linuxerr.ENOTDIR } } @@ -152,7 +153,7 @@ func openAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint) (fd uin } if fs.IsSymlink(d.Inode.StableAttr) && !resolve { - return syserror.ELOOP + return linuxerr.ELOOP } fileFlags := linuxToFlags(flags) @@ -166,11 +167,11 @@ func openAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint) (fd uin } else { // If O_DIRECTORY is set, but the file is not a directory, then fail. if fileFlags.Directory { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // If it's a directory, then make sure. if dirPath { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } } @@ -219,7 +220,7 @@ func mknodAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode linux.FileMod return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Do we have the appropriate permissions on the parent? @@ -260,7 +261,7 @@ func mknodAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode linux.FileMod // Instead of emulating this seemingly useless behaviour, we'll // indicate that the filesystem doesn't support the creation of // sockets. - return syserror.EOPNOTSUPP + return linuxerr.EOPNOTSUPP case linux.ModeCharacterDevice: fallthrough @@ -270,12 +271,12 @@ func mknodAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode linux.FileMod // // When we start supporting block and character devices, we'll // need to check for CAP_MKNOD here. - return syserror.EPERM + return linuxerr.EPERM default: // "EINVAL - mode requested creation of something other than a // regular file, device special file, FIFO or socket." - mknod(2) - return syserror.EINVAL + return linuxerr.EINVAL } }) } @@ -325,7 +326,7 @@ func createAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint, mode ) for { if !fs.IsDir(parent.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Start by looking up the dirent at 'name'. @@ -339,7 +340,7 @@ func createAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint, mode // O_EXCL flag was passed, then we can immediately // return EEXIST. if flags&linux.O_EXCL != 0 { - return syserror.EEXIST + return linuxerr.EEXIST } // If we have a non-symlink, then we can proceed. @@ -350,7 +351,7 @@ func createAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint, mode // If O_NOFOLLOW was passed, then don't try to resolve // anything. if flags&linux.O_NOFOLLOW != 0 { - return syserror.ELOOP + return linuxerr.ELOOP } // Try to resolve the symlink directly to a Dirent. @@ -394,8 +395,8 @@ func createAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint, mode } var newFile *fs.File - switch err { - case nil: + switch { + case err == nil: // Like sys_open, check for a few things about the // filesystem before trying to get a reference to the // fs.File. The same constraints on Check apply. @@ -418,7 +419,7 @@ func createAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint, mode return syserror.ConvertIntr(err, syserror.ERESTARTSYS) } defer newFile.DecRef(t) - case syserror.ENOENT: + case linuxerr.Equals(linuxerr.ENOENT, err): // File does not exist. Proceed with creation. // Do we have write permissions on the parent? @@ -527,7 +528,7 @@ func accessAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode uint) error // Sanity check the mode. if mode&^(rOK|wOK|xOK) != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } return fileOpOn(t, dirFD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { @@ -595,7 +596,7 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -684,7 +685,7 @@ func Getcwd(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Note this is >= because we need a terminator. if uint(len(s)) >= size { - return 0, nil, syserror.ERANGE + return 0, nil, linuxerr.ERANGE } // Copy out the path name for the node. @@ -703,7 +704,7 @@ func Chroot(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal addr := args[0].Pointer() if !t.HasCapability(linux.CAP_SYS_CHROOT) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } path, _, err := copyInPath(t, addr, false /* allowEmpty */) @@ -714,7 +715,7 @@ func Chroot(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // Is it a directory? if !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Does it have execute permissions? @@ -739,7 +740,7 @@ func Chdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // Is it a directory? if !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Does it have execute permissions? @@ -758,13 +759,13 @@ func Fchdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Is it a directory? if !fs.IsDir(file.Dirent.Inode.StableAttr) { - return 0, nil, syserror.ENOTDIR + return 0, nil, linuxerr.ENOTDIR } // Does it have execute permissions? @@ -789,7 +790,7 @@ func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // (and other reference-holding operations complete). file, _ := t.FDTable().Remove(t, fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -803,13 +804,13 @@ func Dup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{}) if err != nil { - return 0, nil, syserror.EMFILE + return 0, nil, linuxerr.EMFILE } return uintptr(newFD), nil, nil } @@ -824,7 +825,7 @@ func Dup2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if oldfd == newfd { oldFile := t.GetFile(oldfd) if oldFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer oldFile.DecRef(t) @@ -843,12 +844,12 @@ func Dup3(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC flags := args[2].Uint() if oldfd == newfd { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } oldFile := t.GetFile(oldfd) if oldFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer oldFile.DecRef(t) @@ -905,7 +906,7 @@ func fSetOwn(t *kernel.Task, fd int, file *fs.File, who int32) error { if who < 0 { // Check for overflow before flipping the sign. if who-1 > who { - return syserror.EINVAL + return linuxerr.EINVAL } pg := t.PIDNamespace().ProcessGroupWithID(kernel.ProcessGroupID(-who)) a.SetOwnerProcessGroup(t, pg) @@ -923,7 +924,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file, flags := t.FDTable().Get(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -956,7 +957,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Normally pipe and socket types lack lock operations. We diverge and use a heavy // hammer by only allowing locks on files and directories. if !fs.IsFile(file.Dirent.Inode.StableAttr) && !fs.IsDir(file.Dirent.Inode.StableAttr) { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Copy in the lock request. @@ -976,7 +977,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case 2: sw = fs.SeekEnd default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Compute the lock offset. @@ -995,7 +996,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } off = uattr.Size default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Compute the lock range. @@ -1009,12 +1010,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall switch flock.Type { case linux.F_RDLCK: if !file.Flags().Read { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } if cmd == linux.F_SETLK { // Non-blocking lock, provide a nil lock.Blocker. if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.ReadLock, rng, nil) { - return 0, nil, syserror.EAGAIN + return 0, nil, linuxerr.EAGAIN } } else { // Blocking lock, pass in the task to satisfy the lock.Blocker interface. @@ -1025,12 +1026,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, nil case linux.F_WRLCK: if !file.Flags().Write { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } if cmd == linux.F_SETLK { // Non-blocking lock, provide a nil lock.Blocker. if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.WriteLock, rng, nil) { - return 0, nil, syserror.EAGAIN + return 0, nil, linuxerr.EAGAIN } } else { // Blocking lock, pass in the task to satisfy the lock.Blocker interface. @@ -1043,7 +1044,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file.Dirent.Inode.LockCtx.Posix.UnlockRegion(t.FDTable(), rng) return 0, nil, nil default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } case linux.F_GETOWN: return uintptr(fGetOwn(t, file)), nil, nil @@ -1066,47 +1067,47 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.F_OWNER_TID: task := t.PIDNamespace().TaskWithID(kernel.ThreadID(owner.PID)) if task == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } a.SetOwnerTask(t, task) return 0, nil, nil case linux.F_OWNER_PID: tg := t.PIDNamespace().ThreadGroupWithID(kernel.ThreadID(owner.PID)) if tg == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } a.SetOwnerThreadGroup(t, tg) return 0, nil, nil case linux.F_OWNER_PGRP: pg := t.PIDNamespace().ProcessGroupWithID(kernel.ProcessGroupID(owner.PID)) if pg == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } a.SetOwnerProcessGroup(t, pg) return 0, nil, nil default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } case linux.F_GET_SEALS: val, err := tmpfs.GetSeals(file.Dirent.Inode) return uintptr(val), nil, err case linux.F_ADD_SEALS: if !file.Flags().Write { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } err := tmpfs.AddSeals(file.Dirent.Inode, args[2].Uint()) return 0, nil, err case linux.F_GETPIPE_SZ: sz, ok := file.FileOperations.(fs.FifoSizer) if !ok { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } size, err := sz.FifoSize(t, file) return uintptr(size), nil, err case linux.F_SETPIPE_SZ: sz, ok := file.FileOperations.(fs.FifoSizer) if !ok { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } n, err := sz.SetFifoSize(int64(args[2].Int())) return uintptr(n), nil, err @@ -1118,7 +1119,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, a.SetSignal(linux.Signal(args[2].Int())) default: // Everything else is not yet supported. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } @@ -1131,18 +1132,18 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // Note: offset is allowed to be negative. if length < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // If the FD refers to a pipe or FIFO, return error. if fs.IsPipe(file.Dirent.Inode.StableAttr) { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } switch advice { @@ -1153,7 +1154,7 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys case linux.POSIX_FADV_DONTNEED: case linux.POSIX_FADV_NOREUSE: default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Sure, whatever. @@ -1172,18 +1173,18 @@ func mkdirAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode linux.FileMod return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Does this directory exist already? remainingTraversals := uint(linux.MaxSymlinkTraversals) f, err := t.MountNamespace().FindInode(t, root, d, name, &remainingTraversals) - switch err { - case nil: + switch { + case err == nil: // The directory existed. defer f.DecRef(t) - return syserror.EEXIST - case syserror.EACCES: + return linuxerr.EEXIST + case linuxerr.Equals(linuxerr.EACCES, err): // Permission denied while walking to the directory. return err default: @@ -1224,21 +1225,21 @@ func rmdirAt(t *kernel.Task, dirFD int32, addr hostarch.Addr) error { // Special case: removing the root always returns EBUSY. if path == "/" { - return syserror.EBUSY + return linuxerr.EBUSY } return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Linux returns different ernos when the path ends in single // dot vs. double dots. switch name { case ".": - return syserror.EINVAL + return linuxerr.EINVAL case "..": - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } if err := d.MayDelete(t, root, name); err != nil { @@ -1277,7 +1278,7 @@ func symlinkAt(t *kernel.Task, dirFD int32, newAddr hostarch.Addr, oldAddr hosta return fileOpAt(t, dirFD, newPath, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Make sure we have write permissions on the parent directory. @@ -1329,10 +1330,10 @@ func mayLinkAt(t *kernel.Task, target *fs.Inode) error { // If we are not the owner, then the file must be regular and have // Read+Write permissions. if !fs.IsRegular(target.StableAttr) { - return syserror.EPERM + return linuxerr.EPERM } if target.CheckPermission(t, fs.PermMask{Read: true, Write: true}) != nil { - return syserror.EPERM + return linuxerr.EPERM } return nil @@ -1357,7 +1358,7 @@ func linkAt(t *kernel.Task, oldDirFD int32, oldAddr hostarch.Addr, newDirFD int3 if allowEmpty && oldPath == "" { target := t.GetFile(oldDirFD) if target == nil { - return syserror.EBADF + return linuxerr.EBADF } defer target.DecRef(t) if err := mayLinkAt(t, target.Dirent.Inode); err != nil { @@ -1367,7 +1368,7 @@ func linkAt(t *kernel.Task, oldDirFD int32, oldAddr hostarch.Addr, newDirFD int3 // Resolve the target directory. return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error { if !fs.IsDir(newParent.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Make sure we have write permissions on the parent directory. @@ -1388,7 +1389,7 @@ func linkAt(t *kernel.Task, oldDirFD int32, oldAddr hostarch.Addr, newDirFD int3 // Next resolve newDirFD and newAddr to the parent dirent and name. return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error { if !fs.IsDir(newParent.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Make sure we have write permissions on the parent directory. @@ -1431,7 +1432,7 @@ func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Sanity check flags. if flags&^(linux.AT_SYMLINK_FOLLOW|linux.AT_EMPTY_PATH) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } resolve := flags&linux.AT_SYMLINK_FOLLOW == linux.AT_SYMLINK_FOLLOW @@ -1464,8 +1465,8 @@ func readlinkAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, bufAddr hostarc } s, err := d.Inode.Readlink(t) - if err == syserror.ENOLINK { - return syserror.EINVAL + if linuxerr.Equals(linuxerr.ENOLINK, err) { + return linuxerr.EINVAL } if err != nil { return err @@ -1519,7 +1520,7 @@ func unlinkAt(t *kernel.Task, dirFD int32, addr hostarch.Addr) error { return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } if err := d.MayDelete(t, root, name); err != nil { @@ -1557,7 +1558,7 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc length := args[1].Int64() if length < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) @@ -1565,7 +1566,7 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, err } if dirPath { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if uint64(length) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur { @@ -1573,7 +1574,7 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc Signo: int32(linux.SIGXFSZ), Code: linux.SI_USER, }) - return 0, nil, syserror.EFBIG + return 0, nil, linuxerr.EFBIG } return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { @@ -1583,7 +1584,7 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // In contrast to open(O_TRUNC), truncate(2) is only valid for file // types. if !fs.IsFile(d.Inode.StableAttr) { - return syserror.EINVAL + return linuxerr.EINVAL } // Reject truncation if the access permissions do not allow truncation. @@ -1610,25 +1611,25 @@ func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Reject truncation if the file flags do not permit this operation. // This is different from truncate(2) above. if !file.Flags().Write { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // In contrast to open(O_TRUNC), truncate(2) is only valid for file // types. Note that this is different from truncate(2) above, where a // directory returns EISDIR. if !fs.IsFile(file.Dirent.Inode.StableAttr) { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if length < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if uint64(length) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur { @@ -1636,7 +1637,7 @@ func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys Signo: int32(linux.SIGXFSZ), Code: linux.SI_USER, }) - return 0, nil, syserror.EFBIG + return 0, nil, linuxerr.EFBIG } if err := file.Dirent.Inode.Truncate(t, file.Dirent, length); err != nil { @@ -1682,7 +1683,7 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error { kuid := c.UserNamespace.MapToKUID(uid) // Valid UID must be supplied if UID is to be changed. if !kuid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } // "Only a privileged process (CAP_CHOWN) may change the owner @@ -1692,7 +1693,7 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error { // explicitly not changing its UID. isNoop := uattr.Owner.UID == kuid if !(hasCap || (isOwner && isNoop)) { - return syserror.EPERM + return linuxerr.EPERM } // The setuid and setgid bits are cleared during a chown. @@ -1706,7 +1707,7 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error { kgid := c.UserNamespace.MapToKGID(gid) // Valid GID must be supplied if GID is to be changed. if !kgid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } // "The owner of a file may change the group of the file to any @@ -1715,7 +1716,7 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error { isNoop := uattr.Owner.GID == kgid isMemberGroup := c.InGroup(kgid) if !(hasCap || (isOwner && (isNoop || isMemberGroup))) { - return syserror.EPERM + return linuxerr.EPERM } // The setuid and setgid bits are cleared during a chown. @@ -1737,7 +1738,7 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error { if clearPrivilege && uattr.Perms.HasSetUIDOrGID() && !fs.IsDir(d.Inode.StableAttr) { uattr.Perms.DropSetUIDAndMaybeGID() if !d.Inode.SetPermissions(t, d, uattr.Perms) { - return syserror.EPERM + return linuxerr.EPERM } } @@ -1754,7 +1755,7 @@ func chownAt(t *kernel.Task, fd int32, addr hostarch.Addr, resolve, allowEmpty b // Annoying. What's wrong with fchown? file := t.GetFile(fd) if file == nil { - return syserror.EBADF + return linuxerr.EBADF } defer file.DecRef(t) @@ -1792,7 +1793,7 @@ func Fchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -1808,7 +1809,7 @@ func Fchownat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc flags := args[4].Int() if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 0, nil, chownAt(t, dirFD, addr, flags&linux.AT_SYMLINK_NOFOLLOW == 0, flags&linux.AT_EMPTY_PATH != 0, uid, gid) @@ -1817,12 +1818,12 @@ func Fchownat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc func chmod(t *kernel.Task, d *fs.Dirent, mode linux.FileMode) error { // Must own file to change mode. if !d.Inode.CheckOwnership(t) { - return syserror.EPERM + return linuxerr.EPERM } p := fs.FilePermsFromMode(mode) if !d.Inode.SetPermissions(t, d, p) { - return syserror.EPERM + return linuxerr.EPERM } // File attribute changed, generate notification. @@ -1857,7 +1858,7 @@ func Fchmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -1888,7 +1889,7 @@ func utimes(t *kernel.Task, dirFD int32, addr hostarch.Addr, ts fs.TimeSpec, res if !d.Inode.CheckOwnership(t) { // Trying to set a specific time? Must be owner. if (ts.ATimeOmit || !ts.ATimeSetSystemTime) && (ts.MTimeOmit || !ts.MTimeSetSystemTime) { - return syserror.EPERM + return linuxerr.EPERM } // Trying to set to current system time? Must have write access. @@ -1913,11 +1914,11 @@ func utimes(t *kernel.Task, dirFD int32, addr hostarch.Addr, ts fs.TimeSpec, res if addr == 0 && dirFD != linux.AT_FDCWD { if !resolve { // Linux returns EINVAL in this case. See utimes.c. - return syserror.EINVAL + return linuxerr.EINVAL } f := t.GetFile(dirFD) if f == nil { - return syserror.EBADF + return linuxerr.EBADF } defer f.DecRef(t) @@ -1996,7 +1997,7 @@ func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, err } if !timespecIsValid(times[0]) || !timespecIsValid(times[1]) { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // If both are UTIME_OMIT, this is a noop. @@ -2031,7 +2032,7 @@ func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys } if times[0].Usec >= 1e6 || times[0].Usec < 0 || times[1].Usec >= 1e6 || times[1].Usec < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } ts = fs.TimeSpec{ @@ -2058,26 +2059,26 @@ func renameAt(t *kernel.Task, oldDirFD int32, oldAddr hostarch.Addr, newDirFD in return fileOpAt(t, oldDirFD, oldPath, func(root *fs.Dirent, oldParent *fs.Dirent, oldName string, _ uint) error { if !fs.IsDir(oldParent.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Rename rejects paths that end in ".", "..", or empty (i.e. // the root) with EBUSY. switch oldName { case "", ".", "..": - return syserror.EBUSY + return linuxerr.EBUSY } return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error { if !fs.IsDir(newParent.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Rename rejects paths that end in ".", "..", or empty // (i.e. the root) with EBUSY. switch newName { case "", ".", "..": - return syserror.EBUSY + return linuxerr.EBUSY } return fs.Rename(t, root, oldParent, oldName, newParent, newName) @@ -2112,39 +2113,39 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) if offset < 0 || length <= 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if mode != 0 { t.Kernel().EmitUnimplementedEvent(t) - return 0, nil, syserror.ENOTSUP + return 0, nil, linuxerr.ENOTSUP } if !file.Flags().Write { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } if fs.IsPipe(file.Dirent.Inode.StableAttr) { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } if fs.IsDir(file.Dirent.Inode.StableAttr) { return 0, nil, syserror.EISDIR } if !fs.IsRegular(file.Dirent.Inode.StableAttr) { - return 0, nil, syserror.ENODEV + return 0, nil, linuxerr.ENODEV } size := offset + length if size < 0 { - return 0, nil, syserror.EFBIG + return 0, nil, linuxerr.EFBIG } if uint64(size) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur { t.SendSignal(&linux.SignalInfo{ Signo: int32(linux.SIGXFSZ), Code: linux.SI_USER, }) - return 0, nil, syserror.EFBIG + return 0, nil, linuxerr.EFBIG } if err := file.Dirent.Inode.Allocate(t, file.Dirent, offset, length); err != nil { @@ -2165,7 +2166,7 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFile(fd) if file == nil { // flock(2): EBADF fd is not an open file descriptor. - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -2183,7 +2184,7 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if nonblocking { // Since we're nonblocking we pass a nil lock.Blocker implementation. if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.WriteLock, rng, nil) { - return 0, nil, syserror.EWOULDBLOCK + return 0, nil, linuxerr.EWOULDBLOCK } } else { // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. @@ -2195,7 +2196,7 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if nonblocking { // Since we're nonblocking we pass a nil lock.Blocker implementation. if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.ReadLock, rng, nil) { - return 0, nil, syserror.EWOULDBLOCK + return 0, nil, linuxerr.EWOULDBLOCK } } else { // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. @@ -2207,7 +2208,7 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file.Dirent.Inode.LockCtx.BSD.UnlockRegion(file, rng) default: // flock(2): EINVAL operation is invalid. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 0, nil, nil @@ -2226,7 +2227,7 @@ func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S if flags&^memfdAllFlags != 0 { // Unknown bits in flags. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } allowSeals := flags&linux.MFD_ALLOW_SEALING != 0 @@ -2237,7 +2238,7 @@ func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S return 0, nil, err } if len(name) > memfdMaxNameLen { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } name = memfdPrefix + name diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go index eeea1613b..717cec04d 100644 --- a/pkg/sentry/syscalls/linux/sys_futex.go +++ b/pkg/sentry/syscalls/linux/sys_futex.go @@ -18,6 +18,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -159,7 +160,7 @@ func tryLockPI(t *kernel.Task, addr hostarch.Addr, private bool) error { return err } if !locked { - return syserror.EWOULDBLOCK + return linuxerr.EWOULDBLOCK } return nil } @@ -210,7 +211,7 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // WAIT_BITSET uses an absolute timeout which is either // CLOCK_MONOTONIC or CLOCK_REALTIME. if mask == 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } n, err := futexWaitAbsolute(t, clockRealtime, timespec, forever, addr, private, uint32(val), mask) return n, nil, err @@ -224,7 +225,7 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.FUTEX_WAKE_BITSET: if mask == 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if val <= 0 { // The Linux kernel wakes one waiter even if val is @@ -295,7 +296,7 @@ func SetRobustList(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel length := args[1].SizeT() if length != uint(linux.SizeOfRobustListHead) { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } t.SetRobustList(head) return 0, nil, nil @@ -310,13 +311,13 @@ func GetRobustList(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel sizeAddr := args[2].Pointer() if tid < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } ot := t if tid != 0 { if ot = t.PIDNamespace().TaskWithID(kernel.ThreadID(tid)); ot == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } } diff --git a/pkg/sentry/syscalls/linux/sys_getdents.go b/pkg/sentry/syscalls/linux/sys_getdents.go index bbba71d8f..917717e31 100644 --- a/pkg/sentry/syscalls/linux/sys_getdents.go +++ b/pkg/sentry/syscalls/linux/sys_getdents.go @@ -19,6 +19,7 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -38,7 +39,7 @@ func Getdents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc minSize := int(smallestDirent(t.Arch())) if size < minSize { // size is smaller than smallest possible dirent. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } n, err := getdents(t, fd, addr, size, (*dirent).Serialize) @@ -54,7 +55,7 @@ func Getdents64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy minSize := int(smallestDirent64(t.Arch())) if size < minSize { // size is smaller than smallest possible dirent. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } n, err := getdents(t, fd, addr, size, (*dirent).Serialize64) @@ -66,7 +67,7 @@ func Getdents64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy func getdents(t *kernel.Task, fd int32, addr hostarch.Addr, size int, f func(*dirent, io.Writer) (int, error)) (uintptr, error) { dir := t.GetFile(fd) if dir == nil { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } defer dir.DecRef(t) diff --git a/pkg/sentry/syscalls/linux/sys_identity.go b/pkg/sentry/syscalls/linux/sys_identity.go index a29d307e5..50fcadb58 100644 --- a/pkg/sentry/syscalls/linux/sys_identity.go +++ b/pkg/sentry/syscalls/linux/sys_identity.go @@ -15,10 +15,10 @@ package linux import ( + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" ) const ( @@ -142,7 +142,7 @@ func Setresgid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys func Getgroups(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { size := int(args[0].Int()) if size < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } kgids := t.Credentials().ExtraKGIDs // "If size is zero, list is not modified, but the total number of @@ -151,7 +151,7 @@ func Getgroups(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return uintptr(len(kgids)), nil, nil } if size < len(kgids) { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } gids := make([]auth.GID, len(kgids)) for i, kgid := range kgids { @@ -167,7 +167,7 @@ func Getgroups(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys func Setgroups(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { size := args[0].Int() if size < 0 || size > maxNGroups { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if size == 0 { return 0, nil, t.SetExtraGIDs(nil) diff --git a/pkg/sentry/syscalls/linux/sys_inotify.go b/pkg/sentry/syscalls/linux/sys_inotify.go index cf47bb9dd..b7ad1922e 100644 --- a/pkg/sentry/syscalls/linux/sys_inotify.go +++ b/pkg/sentry/syscalls/linux/sys_inotify.go @@ -16,11 +16,11 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) const allFlags = int(linux.IN_NONBLOCK | linux.IN_CLOEXEC) @@ -30,7 +30,7 @@ func InotifyInit1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. flags := int(args[0].Int()) if flags&^allFlags != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } dirent := fs.NewDirent(t, anon.NewInode(t), "inotify") @@ -65,14 +65,14 @@ func fdToInotify(t *kernel.Task, fd int32) (*fs.Inotify, *fs.File, error) { file := t.GetFile(fd) if file == nil { // Invalid fd. - return nil, nil, syserror.EBADF + return nil, nil, linuxerr.EBADF } ino, ok := file.FileOperations.(*fs.Inotify) if !ok { // Not an inotify fd. file.DecRef(t) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } return ino, file, nil @@ -91,7 +91,7 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern // "EINVAL: The given event mask contains no valid events." // -- inotify_add_watch(2) if validBits := mask & linux.ALL_INOTIFY_BITS; validBits == 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } ino, file, err := fdToInotify(t, fd) @@ -108,7 +108,7 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern err = fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, dirent *fs.Dirent, _ uint) error { // "IN_ONLYDIR: Only watch pathname if it is a directory." -- inotify(7) if onlyDir := mask&linux.IN_ONLYDIR != 0; onlyDir && !fs.IsDir(dirent.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Copy out to the return frame. diff --git a/pkg/sentry/syscalls/linux/sys_lseek.go b/pkg/sentry/syscalls/linux/sys_lseek.go index 0046347cb..bf71a9af3 100644 --- a/pkg/sentry/syscalls/linux/sys_lseek.go +++ b/pkg/sentry/syscalls/linux/sys_lseek.go @@ -15,6 +15,7 @@ package linux import ( + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -31,7 +32,7 @@ func Lseek(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -44,7 +45,7 @@ func Lseek(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case 2: sw = fs.SeekEnd default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } offset, serr := file.Seek(t, sw, offset) diff --git a/pkg/sentry/syscalls/linux/sys_membarrier.go b/pkg/sentry/syscalls/linux/sys_membarrier.go index 63ee5d435..6ceedc086 100644 --- a/pkg/sentry/syscalls/linux/sys_membarrier.go +++ b/pkg/sentry/syscalls/linux/sys_membarrier.go @@ -16,9 +16,9 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // Membarrier implements syscall membarrier(2). @@ -29,7 +29,7 @@ func Membarrier(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy switch cmd { case linux.MEMBARRIER_CMD_QUERY: if flags != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } var supportedCommands uintptr if t.Kernel().Platform.HaveGlobalMemoryBarrier() { @@ -46,58 +46,58 @@ func Membarrier(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return supportedCommands, nil, nil case linux.MEMBARRIER_CMD_GLOBAL, linux.MEMBARRIER_CMD_GLOBAL_EXPEDITED, linux.MEMBARRIER_CMD_PRIVATE_EXPEDITED: if flags != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if !t.Kernel().Platform.HaveGlobalMemoryBarrier() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if cmd == linux.MEMBARRIER_CMD_PRIVATE_EXPEDITED && !t.MemoryManager().IsMembarrierPrivateEnabled() { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } return 0, nil, t.Kernel().Platform.GlobalMemoryBarrier() case linux.MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED: if flags != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if !t.Kernel().Platform.HaveGlobalMemoryBarrier() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // no-op return 0, nil, nil case linux.MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: if flags != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if !t.Kernel().Platform.HaveGlobalMemoryBarrier() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } t.MemoryManager().EnableMembarrierPrivate() return 0, nil, nil case linux.MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ: if flags&^linux.MEMBARRIER_CMD_FLAG_CPU != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if !t.RSeqAvailable() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if !t.MemoryManager().IsMembarrierRSeqEnabled() { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } // MEMBARRIER_CMD_FLAG_CPU and cpu_id are ignored since we don't have // the ability to preempt specific CPUs. return 0, nil, t.Kernel().Platform.PreemptAllCPUs() case linux.MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ: if flags != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if !t.RSeqAvailable() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } t.MemoryManager().EnableMembarrierRSeq() return 0, nil, nil default: // Probably a command we don't implement. t.Kernel().EmitUnimplementedEvent(t) - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } diff --git a/pkg/sentry/syscalls/linux/sys_mempolicy.go b/pkg/sentry/syscalls/linux/sys_mempolicy.go index 6d27f4292..6e7bcb868 100644 --- a/pkg/sentry/syscalls/linux/sys_mempolicy.go +++ b/pkg/sentry/syscalls/linux/sys_mempolicy.go @@ -18,10 +18,10 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -43,7 +43,7 @@ func copyInNodemask(t *kernel.Task, addr hostarch.Addr, maxnode uint32) (uint64, // maxnode-1, not maxnode, as the number of bits. bits := maxnode - 1 if bits > hostarch.PageSize*8 { // also handles overflow from maxnode == 0 - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if bits == 0 { return 0, nil @@ -58,12 +58,12 @@ func copyInNodemask(t *kernel.Task, addr hostarch.Addr, maxnode uint32) (uint64, // Check that only allowed bits in the first unsigned long in the nodemask // are set. if val&^allowedNodemask != 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Check that all remaining bits in the nodemask are 0. for i := 8; i < len(buf); i++ { if buf[i] != 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } } return val, nil @@ -74,7 +74,7 @@ func copyOutNodemask(t *kernel.Task, addr hostarch.Addr, maxnode uint32, val uin // bits. bits := maxnode - 1 if bits > hostarch.PageSize*8 { // also handles overflow from maxnode == 0 - return syserror.EINVAL + return linuxerr.EINVAL } if bits == 0 { return nil @@ -89,7 +89,7 @@ func copyOutNodemask(t *kernel.Task, addr hostarch.Addr, maxnode uint32, val uin if bits > 64 { remAddr, ok := addr.AddLength(8) if !ok { - return syserror.EFAULT + return linuxerr.EFAULT } remUint64 := (bits - 1) / 64 if _, err := t.MemoryManager().ZeroOut(t, remAddr, int64(remUint64)*8, usermem.IOOpts{ @@ -110,7 +110,7 @@ func GetMempolicy(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. flags := args[4].Uint() if flags&^(linux.MPOL_F_NODE|linux.MPOL_F_ADDR|linux.MPOL_F_MEMS_ALLOWED) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } nodeFlag := flags&linux.MPOL_F_NODE != 0 addrFlag := flags&linux.MPOL_F_ADDR != 0 @@ -119,7 +119,7 @@ func GetMempolicy(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. // "EINVAL: The value specified by maxnode is less than the number of node // IDs supported by the system." - get_mempolicy(2) if nodemask != 0 && maxnode < maxNodes { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // "If flags specifies MPOL_F_MEMS_ALLOWED [...], the mode argument is @@ -130,7 +130,7 @@ func GetMempolicy(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. // "It is not permitted to combine MPOL_F_MEMS_ALLOWED with either // MPOL_F_ADDR or MPOL_F_NODE." if nodeFlag || addrFlag { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if err := copyOutNodemask(t, nodemask, maxnode, allowedNodemask); err != nil { return 0, nil, err @@ -184,7 +184,7 @@ func GetMempolicy(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. // mm/mempolicy.c:do_get_mempolicy() doesn't special-case NULL; it will // just (usually) fail to find a VMA at address 0 and return EFAULT. if addr != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // "If flags is specified as 0, then information about the calling thread's @@ -198,7 +198,7 @@ func GetMempolicy(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. policy, nodemaskVal := t.NumaPolicy() if nodeFlag { if policy&^linux.MPOL_MODE_FLAGS != linux.MPOL_INTERLEAVE { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } policy = linux.MPOL_DEFAULT // maxNodes == 1 } @@ -240,12 +240,12 @@ func Mbind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall flags := args[5].Uint() if flags&^linux.MPOL_MF_VALID != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // "If MPOL_MF_MOVE_ALL is passed in flags ... [the] calling thread must be // privileged (CAP_SYS_NICE) to use this flag." - mbind(2) if flags&linux.MPOL_MF_MOVE_ALL != 0 && !t.HasCapability(linux.CAP_SYS_NICE) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } mode, nodemaskVal, err := copyInMempolicyNodemask(t, mode, nodemask, maxnode) @@ -264,11 +264,11 @@ func copyInMempolicyNodemask(t *kernel.Task, modeWithFlags linux.NumaPolicy, nod mode := linux.NumaPolicy(modeWithFlags &^ linux.MPOL_MODE_FLAGS) if flags == linux.MPOL_MODE_FLAGS { // Can't specify both mode flags simultaneously. - return 0, 0, syserror.EINVAL + return 0, 0, linuxerr.EINVAL } if mode < 0 || mode >= linux.MPOL_MAX { // Must specify a valid mode. - return 0, 0, syserror.EINVAL + return 0, 0, linuxerr.EINVAL } var nodemaskVal uint64 @@ -285,22 +285,22 @@ func copyInMempolicyNodemask(t *kernel.Task, modeWithFlags linux.NumaPolicy, nod // "nodemask must be specified as NULL." - set_mempolicy(2). This is inaccurate; // Linux allows a nodemask to be specified, as long as it is empty. if nodemaskVal != 0 { - return 0, 0, syserror.EINVAL + return 0, 0, linuxerr.EINVAL } case linux.MPOL_BIND, linux.MPOL_INTERLEAVE: // These require a non-empty nodemask. if nodemaskVal == 0 { - return 0, 0, syserror.EINVAL + return 0, 0, linuxerr.EINVAL } case linux.MPOL_PREFERRED: // This permits an empty nodemask, as long as no flags are set. if nodemaskVal == 0 && flags != 0 { - return 0, 0, syserror.EINVAL + return 0, 0, linuxerr.EINVAL } case linux.MPOL_LOCAL: // This requires an empty nodemask and no flags set ... if nodemaskVal != 0 || flags != 0 { - return 0, 0, syserror.EINVAL + return 0, 0, linuxerr.EINVAL } // ... and is implemented as MPOL_PREFERRED. mode = linux.MPOL_PREFERRED diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go index 70da0707d..cee621791 100644 --- a/pkg/sentry/syscalls/linux/sys_mmap.go +++ b/pkg/sentry/syscalls/linux/sys_mmap.go @@ -18,13 +18,13 @@ import ( "bytes" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/hostarch" ) // Brk implements linux syscall brk(2). @@ -51,7 +51,7 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Require exactly one of MAP_PRIVATE and MAP_SHARED. if private == shared { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } opts := memmap.MMapOpts{ @@ -84,14 +84,14 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Convert the passed FD to a file reference. file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) flags := file.Flags() // mmap unconditionally requires that the FD is readable. if !flags.Read { - return 0, nil, syserror.EACCES + return 0, nil, linuxerr.EACCES } // MAP_SHARED requires that the FD be writable for PROT_WRITE. if shared && !flags.Write { @@ -132,7 +132,7 @@ func Mremap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal newAddr := args[4].Pointer() if flags&^(linux.MREMAP_MAYMOVE|linux.MREMAP_FIXED) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } mayMove := flags&linux.MREMAP_MAYMOVE != 0 fixed := flags&linux.MREMAP_FIXED != 0 @@ -147,7 +147,7 @@ func Mremap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal case !mayMove && fixed: // "If MREMAP_FIXED is specified, then MREMAP_MAYMOVE must also be // specified." - mremap(2) - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } rv, err := t.MemoryManager().MRemap(t, oldAddr, oldSize, newSize, mm.MRemapOpts{ @@ -178,7 +178,7 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // "The Linux implementation requires that the address addr be // page-aligned, and allows length to be zero." - madvise(2) if addr.RoundDown() != addr { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if length == 0 { return 0, nil, nil @@ -186,7 +186,7 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Not explicitly stated: length need not be page-aligned. lenAddr, ok := hostarch.Addr(length).RoundUp() if !ok { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } length = uint64(lenAddr) @@ -214,10 +214,10 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca return 0, nil, syserror.ENOSYS case linux.MADV_HWPOISON: // Only privileged processes are allowed to poison pages. - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM default: // If adv is not a valid value tell the caller. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } @@ -228,7 +228,7 @@ func Mincore(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca vec := args[2].Pointer() if addr != addr.RoundDown() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // "The length argument need not be a multiple of the page size, but since // residency information is returned for whole pages, length is effectively @@ -265,11 +265,11 @@ func Msync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // semantics that are (currently) equivalent to specifying MS_ASYNC." - // msync(2) if flags&^(linux.MS_ASYNC|linux.MS_SYNC|linux.MS_INVALIDATE) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } sync := flags&linux.MS_SYNC != 0 if sync && flags&linux.MS_ASYNC != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } err := t.MemoryManager().MSync(t, addr, uint64(length), mm.MSyncOpts{ Sync: sync, @@ -295,7 +295,7 @@ func Mlock2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal flags := args[2].Int() if flags&^(linux.MLOCK_ONFAULT) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } mode := memmap.MLockEager @@ -318,7 +318,7 @@ func Mlockall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc flags := args[0].Int() if flags&^(linux.MCL_CURRENT|linux.MCL_FUTURE|linux.MCL_ONFAULT) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } mode := memmap.MLockEager diff --git a/pkg/sentry/syscalls/linux/sys_mount.go b/pkg/sentry/syscalls/linux/sys_mount.go index 864d2138c..6d26f89b9 100644 --- a/pkg/sentry/syscalls/linux/sys_mount.go +++ b/pkg/sentry/syscalls/linux/sys_mount.go @@ -16,12 +16,11 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/hostarch" ) // Mount implements Linux syscall mount(2). @@ -67,7 +66,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Must have CAP_SYS_ADMIN in the mount namespace's associated user // namespace. if !t.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.MountNamespace().UserNamespace()) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } const unsupportedOps = linux.MS_REMOUNT | linux.MS_BIND | @@ -83,15 +82,15 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // unknown or unsupported flags are passed. Since we don't implement // everything, we fail explicitly on flags that are unimplemented. if flags&(unsupportedOps|unsupportedFlags) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } rsys, ok := fs.FindFilesystem(fsType) if !ok { - return 0, nil, syserror.ENODEV + return 0, nil, linuxerr.ENODEV } if !rsys.AllowUserMount() { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } var superFlags fs.MountSourceFlags @@ -107,7 +106,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall rootInode, err := rsys.Mount(t, sourcePath, superFlags, data, nil) if err != nil { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if err := fileOpOn(t, linux.AT_FDCWD, targetPath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { @@ -130,7 +129,7 @@ func Umount2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca const unsupported = linux.MNT_FORCE | linux.MNT_EXPIRE if flags&unsupported != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } path, _, err := copyInPath(t, addr, false /* allowEmpty */) @@ -143,7 +142,7 @@ func Umount2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // // Currently, this is always the init task's user namespace. if !t.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.MountNamespace().UserNamespace()) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } resolve := flags&linux.UMOUNT_NOFOLLOW != linux.UMOUNT_NOFOLLOW diff --git a/pkg/sentry/syscalls/linux/sys_pipe.go b/pkg/sentry/syscalls/linux/sys_pipe.go index d95034347..5925c2263 100644 --- a/pkg/sentry/syscalls/linux/sys_pipe.go +++ b/pkg/sentry/syscalls/linux/sys_pipe.go @@ -16,13 +16,13 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" - "gvisor.dev/gvisor/pkg/syserror" ) // LINT.IfChange @@ -30,7 +30,7 @@ import ( // pipe2 implements the actual system call with flags. func pipe2(t *kernel.Task, addr hostarch.Addr, flags uint) (uintptr, error) { if flags&^(linux.O_NONBLOCK|linux.O_CLOEXEC) != 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } r, w := pipe.NewConnectedPipe(t, pipe.DefaultPipeSize) diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go index da548a14a..a80c84fcd 100644 --- a/pkg/sentry/syscalls/linux/sys_poll.go +++ b/pkg/sentry/syscalls/linux/sys_poll.go @@ -18,6 +18,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -128,7 +129,7 @@ func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time. // Wait for a notification. timeout, err = t.BlockWithTimeout(ch, !forever, timeout) if err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { err = nil } return timeout, 0, err @@ -157,7 +158,7 @@ func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time. // CopyInPollFDs copies an array of struct pollfd unless nfds exceeds the max. func CopyInPollFDs(t *kernel.Task, addr hostarch.Addr, nfds uint) ([]linux.PollFD, error) { if uint64(nfds) > t.ThreadGroup().Limits().GetCapped(limits.NumberOfFiles, fileCap) { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } pfd := make([]linux.PollFD, nfds) @@ -217,7 +218,7 @@ func CopyInFDSet(t *kernel.Task, addr hostarch.Addr, nBytes, nBitsInLastPartialB func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs hostarch.Addr, timeout time.Duration) (uintptr, error) { if nfds < 0 || nfds > fileCap { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Calculate the size of the fd sets (one bit per fd). @@ -264,7 +265,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs hostarch.Ad // OK. Linux is racy in the same way. file := t.GetFile(fd) if file == nil { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } file.DecRef(t) @@ -404,7 +405,7 @@ func (p *pollRestartBlock) Restart(t *kernel.Task) (uintptr, error) { func poll(t *kernel.Task, pfdAddr hostarch.Addr, nfds uint, timeout time.Duration) (uintptr, error) { remainingTimeout, n, err := doPoll(t, pfdAddr, nfds, timeout) // On an interrupt poll(2) is restarted with the remaining timeout. - if err == syserror.EINTR { + if linuxerr.Equals(linuxerr.EINTR, err) { t.SetSyscallRestartBlock(&pollRestartBlock{ pfdAddr: pfdAddr, nfds: nfds, @@ -463,7 +464,7 @@ func Ppoll(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // // Note that this means that if err is nil but copyErr is not, copyErr is // ignored. This is consistent with Linux. - if err == syserror.EINTR && copyErr == nil { + if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil { err = syserror.ERESTARTNOHAND } return n, nil, err @@ -485,7 +486,7 @@ func Select(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, err } if timeval.Sec < 0 || timeval.Usec < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } timeout = time.Duration(timeval.ToNsecCapped()) } @@ -493,7 +494,7 @@ func Select(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal n, err := doSelect(t, nfds, readFDs, writeFDs, exceptFDs, timeout) copyErr := copyOutTimevalRemaining(t, startNs, timeout, timevalAddr) // See comment in Ppoll. - if err == syserror.EINTR && copyErr == nil { + if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil { err = syserror.ERESTARTNOHAND } return n, nil, err @@ -538,7 +539,7 @@ func Pselect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca n, err := doSelect(t, nfds, readFDs, writeFDs, exceptFDs, timeout) copyErr := copyOutTimespecRemaining(t, startNs, timeout, timespecAddr) // See comment in Ppoll. - if err == syserror.EINTR && copyErr == nil { + if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil { err = syserror.ERESTARTNOHAND } return n, nil, err diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go index 9890dd946..a16b6b4d6 100644 --- a/pkg/sentry/syscalls/linux/sys_prctl.go +++ b/pkg/sentry/syscalls/linux/sys_prctl.go @@ -18,6 +18,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -25,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/syserror" ) // Prctl implements linux syscall prctl(2). @@ -38,7 +38,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.PR_SET_PDEATHSIG: sig := linux.Signal(args[1].Int()) if sig != 0 && !sig.IsValid() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } t.SetParentDeathSignal(sig) return 0, nil, nil @@ -69,7 +69,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall d = mm.UserDumpable default: // N.B. Userspace may not pass SUID_DUMP_ROOT. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } t.MemoryManager().SetDumpability(d) return 0, nil, nil @@ -90,7 +90,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } else if val == 1 { t.SetKeepCaps(true) } else { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 0, nil, nil @@ -98,7 +98,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.PR_SET_NAME: addr := args[1].Pointer() name, err := t.CopyInString(addr, linux.TASK_COMM_LEN-1) - if err != nil && err != syserror.ENAMETOOLONG { + if err != nil && !linuxerr.Equals(linuxerr.ENAMETOOLONG, err) { return 0, nil, err } t.SetName(name) @@ -118,7 +118,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.PR_SET_MM: if !t.HasCapability(linux.CAP_SYS_RESOURCE) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } switch args[1].Int() { @@ -127,13 +127,13 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // They trying to set exe to a non-file? if !fs.IsFile(file.Dirent.Inode.StableAttr) { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Set the underlying executable. @@ -155,12 +155,12 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall t.Kernel().EmitUnimplementedEvent(t) fallthrough default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } case linux.PR_SET_NO_NEW_PRIVS: if args[1].Int() != 1 || args[2].Int() != 0 || args[3].Int() != 0 || args[4].Int() != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // PR_SET_NO_NEW_PRIVS is assumed to always be set. // See kernel.Task.updateCredsForExecLocked. @@ -168,7 +168,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.PR_GET_NO_NEW_PRIVS: if args[1].Int() != 0 || args[2].Int() != 0 || args[3].Int() != 0 || args[4].Int() != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 1, nil, nil @@ -184,7 +184,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall default: tracer := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) if tracer == nil { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } t.SetYAMAException(tracer) return 0, nil, nil @@ -193,7 +193,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.PR_SET_SECCOMP: if args[1].Int() != linux.SECCOMP_MODE_FILTER { // Unsupported mode. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 0, nil, seccomp(t, linux.SECCOMP_SET_MODE_FILTER, 0, args[2].Pointer()) @@ -204,7 +204,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.PR_CAPBSET_READ: cp := linux.Capability(args[1].Uint64()) if !cp.Ok() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } var rv uintptr if auth.CapabilitySetOf(cp)&t.Credentials().BoundingCaps != 0 { @@ -215,7 +215,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.PR_CAPBSET_DROP: cp := linux.Capability(args[1].Uint64()) if !cp.Ok() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 0, nil, t.DropBoundingCapability(cp) @@ -240,7 +240,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall t.Kernel().EmitUnimplementedEvent(t) fallthrough default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 0, nil, nil diff --git a/pkg/sentry/syscalls/linux/sys_random.go b/pkg/sentry/syscalls/linux/sys_random.go index ae545f80f..f86e87bc7 100644 --- a/pkg/sentry/syscalls/linux/sys_random.go +++ b/pkg/sentry/syscalls/linux/sys_random.go @@ -18,14 +18,13 @@ import ( "io" "math" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" - - "gvisor.dev/gvisor/pkg/hostarch" ) const ( @@ -47,7 +46,7 @@ func GetRandom(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // Flags are checked for validity but otherwise ignored. See above. if flags & ^(_GRND_NONBLOCK|_GRND_RANDOM) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if length > math.MaxInt32 { @@ -55,7 +54,7 @@ func GetRandom(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys } ar, ok := addr.ToRange(uint64(length)) if !ok { - return 0, nil, syserror.EFAULT + return 0, nil, linuxerr.EFAULT } // "If the urandom source has been initialized, reads of up to 256 bytes diff --git a/pkg/sentry/syscalls/linux/sys_read.go b/pkg/sentry/syscalls/linux/sys_read.go index 13e5e3a51..b54a3a11f 100644 --- a/pkg/sentry/syscalls/linux/sys_read.go +++ b/pkg/sentry/syscalls/linux/sys_read.go @@ -18,6 +18,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -46,19 +47,19 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the file is readable. if !file.Flags().Read { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Check that the size is legitimate. si := int(size) if si < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get the destination of the read. @@ -82,29 +83,29 @@ func Readahead(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the file is readable. if !file.Flags().Read { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Check that the size is valid. if int(size) < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Check that the offset is legitimate and does not overflow. if offset < 0 || offset+int64(size) < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Return EINVAL; if the underlying file type does not support readahead, // then Linux will return EINVAL to indicate as much. In the future, we // may extend this function to actually support readahead hints. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Pread64 implements linux syscall pread64(2). @@ -116,29 +117,29 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the offset is legitimate and does not overflow. if offset < 0 || offset+int64(size) < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Is reading at an offset supported? if !file.Flags().Pread { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } // Check that the file is readable. if !file.Flags().Read { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Check that the size is legitimate. si := int(size) if si < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get the destination of the read. @@ -162,13 +163,13 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the file is readable. if !file.Flags().Read { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Read the iovecs that specify the destination of the read. @@ -193,23 +194,23 @@ func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the offset is legitimate. if offset < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Is reading at an offset supported? if !file.Flags().Pread { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } // Check that the file is readable. if !file.Flags().Read { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Read the iovecs that specify the destination of the read. @@ -242,30 +243,30 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the offset is legitimate. if offset < -1 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Is reading at an offset supported? if offset > -1 && !file.Flags().Pread { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } // Check that the file is readable. if !file.Flags().Read { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Check flags field. // Note: gVisor does not implement the RWF_HIPRI feature, but the flag is // accepted as a valid flag argument for preadv2. if flags&^linux.RWF_VALID != 0 { - return 0, nil, syserror.EOPNOTSUPP + return 0, nil, linuxerr.EOPNOTSUPP } // Read the iovecs that specify the destination of the read. @@ -331,7 +332,7 @@ func readv(t *kernel.Task, f *fs.File, dst usermem.IOSequence) (int64, error) { // Wait for a notification that we should retry. if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { err = syserror.ErrWouldBlock } break diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go index e64246d57..a12e1c915 100644 --- a/pkg/sentry/syscalls/linux/sys_rlimit.go +++ b/pkg/sentry/syscalls/linux/sys_rlimit.go @@ -16,6 +16,7 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -105,7 +106,7 @@ func prlimit64(t *kernel.Task, resource limits.LimitType, newLim *limits.Limit) } if _, ok := setableLimits[resource]; !ok { - return limits.Limit{}, syserror.EPERM + return limits.Limit{}, linuxerr.EPERM } // "A privileged process (under Linux: one with the CAP_SYS_RESOURCE @@ -129,7 +130,7 @@ func Getrlimit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys resource, ok := limits.FromLinuxResource[int(args[0].Int())] if !ok { // Return err; unknown limit. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } addr := args[1].Pointer() rlim, err := newRlimit(t) @@ -150,7 +151,7 @@ func Setrlimit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys resource, ok := limits.FromLinuxResource[int(args[0].Int())] if !ok { // Return err; unknown limit. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } addr := args[1].Pointer() rlim, err := newRlimit(t) @@ -158,7 +159,7 @@ func Setrlimit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, err } if _, err := rlim.CopyIn(t, addr); err != nil { - return 0, nil, syserror.EFAULT + return 0, nil, linuxerr.EFAULT } _, err = prlimit64(t, resource, rlim.toLimit()) return 0, nil, err @@ -170,7 +171,7 @@ func Prlimit64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys resource, ok := limits.FromLinuxResource[int(args[1].Int())] if !ok { // Return err; unknown limit. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } newRlimAddr := args[2].Pointer() oldRlimAddr := args[3].Pointer() @@ -179,18 +180,18 @@ func Prlimit64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if newRlimAddr != 0 { var nrl rlimit64 if err := nrl.copyIn(t, newRlimAddr); err != nil { - return 0, nil, syserror.EFAULT + return 0, nil, linuxerr.EFAULT } newLim = nrl.toLimit() } if tid < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } ot := t if tid > 0 { if ot = t.PIDNamespace().TaskWithID(tid); ot == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } } @@ -207,7 +208,7 @@ func Prlimit64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys cred.RealKGID != tcred.RealKGID || cred.RealKGID != tcred.EffectiveKGID || cred.RealKGID != tcred.SavedKGID { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } } @@ -218,7 +219,7 @@ func Prlimit64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if oldRlimAddr != 0 { if err := makeRlimit64(oldLim).copyOut(t, oldRlimAddr); err != nil { - return 0, nil, syserror.EFAULT + return 0, nil, linuxerr.EFAULT } } diff --git a/pkg/sentry/syscalls/linux/sys_rseq.go b/pkg/sentry/syscalls/linux/sys_rseq.go index 90db10ea6..5fe196647 100644 --- a/pkg/sentry/syscalls/linux/sys_rseq.go +++ b/pkg/sentry/syscalls/linux/sys_rseq.go @@ -16,6 +16,7 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" @@ -43,6 +44,6 @@ func RSeq(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC return 0, nil, t.ClearRSeq(addr, length, signature) default: // Unknown flag. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } diff --git a/pkg/sentry/syscalls/linux/sys_rusage.go b/pkg/sentry/syscalls/linux/sys_rusage.go index ac5c98a54..a689abcc9 100644 --- a/pkg/sentry/syscalls/linux/sys_rusage.go +++ b/pkg/sentry/syscalls/linux/sys_rusage.go @@ -16,11 +16,11 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) func getrusage(t *kernel.Task, which int32) linux.Rusage { @@ -76,7 +76,7 @@ func Getrusage(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys addr := args[1].Pointer() if which != linux.RUSAGE_SELF && which != linux.RUSAGE_CHILDREN && which != linux.RUSAGE_THREAD { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } ru := getrusage(t, which) diff --git a/pkg/sentry/syscalls/linux/sys_sched.go b/pkg/sentry/syscalls/linux/sys_sched.go index bfcf44b6f..59c7a4b22 100644 --- a/pkg/sentry/syscalls/linux/sys_sched.go +++ b/pkg/sentry/syscalls/linux/sys_sched.go @@ -16,9 +16,9 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) const ( @@ -38,13 +38,13 @@ func SchedGetparam(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel pid := args[0].Int() param := args[1].Pointer() if param == 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if pid < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if pid != 0 && t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } r := SchedParam{schedPriority: onlyPriority} if _, err := r.CopyOut(t, param); err != nil { @@ -58,10 +58,10 @@ func SchedGetparam(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel func SchedGetscheduler(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { pid := args[0].Int() if pid < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if pid != 0 && t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } return onlyScheduler, nil, nil } @@ -72,20 +72,20 @@ func SchedSetscheduler(t *kernel.Task, args arch.SyscallArguments) (uintptr, *ke policy := args[1].Int() param := args[2].Pointer() if pid < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if policy != onlyScheduler { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if pid != 0 && t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } var r SchedParam if _, err := r.CopyIn(t, param); err != nil { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if r.schedPriority != onlyPriority { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 0, nil, nil } diff --git a/pkg/sentry/syscalls/linux/sys_seccomp.go b/pkg/sentry/syscalls/linux/sys_seccomp.go index e16d6ff3f..b0dc84b8d 100644 --- a/pkg/sentry/syscalls/linux/sys_seccomp.go +++ b/pkg/sentry/syscalls/linux/sys_seccomp.go @@ -17,10 +17,10 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // userSockFprog is equivalent to Linux's struct sock_fprog on amd64. @@ -44,7 +44,7 @@ func seccomp(t *kernel.Task, mode, flags uint64, addr hostarch.Addr) error { // We only support SECCOMP_SET_MODE_FILTER at the moment. if mode != linux.SECCOMP_SET_MODE_FILTER { // Unsupported mode. - return syserror.EINVAL + return linuxerr.EINVAL } tsync := flags&linux.SECCOMP_FILTER_FLAG_TSYNC != 0 @@ -52,7 +52,7 @@ func seccomp(t *kernel.Task, mode, flags uint64, addr hostarch.Addr) error { // The only flag we support now is SECCOMP_FILTER_FLAG_TSYNC. if flags&^linux.SECCOMP_FILTER_FLAG_TSYNC != 0 { // Unsupported flag. - return syserror.EINVAL + return linuxerr.EINVAL } var fprog userSockFprog @@ -66,7 +66,7 @@ func seccomp(t *kernel.Task, mode, flags uint64, addr hostarch.Addr) error { compiledFilter, err := bpf.Compile(filter) if err != nil { t.Debugf("Invalid seccomp-bpf filter: %v", err) - return syserror.EINVAL + return linuxerr.EINVAL } return t.AppendSyscallFilter(compiledFilter, tsync) diff --git a/pkg/sentry/syscalls/linux/sys_sem.go b/pkg/sentry/syscalls/linux/sys_sem.go index c84260080..30919eb2f 100644 --- a/pkg/sentry/syscalls/linux/sys_sem.go +++ b/pkg/sentry/syscalls/linux/sys_sem.go @@ -19,13 +19,13 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/syserror" ) const opsMax = 500 // SEMOPM @@ -61,10 +61,10 @@ func Semtimedop(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy nsops := args[2].SizeT() timespecAddr := args[3].Pointer() if nsops <= 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if nsops > opsMax { - return 0, nil, syserror.E2BIG + return 0, nil, linuxerr.E2BIG } ops := make([]linux.Sembuf, nsops) @@ -77,12 +77,12 @@ func Semtimedop(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return 0, nil, err } if timeout.Sec < 0 || timeout.Nsec < 0 || timeout.Nsec >= 1e9 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if err := semTimedOp(t, id, ops, true, timeout.ToDuration()); err != nil { - if err == syserror.ETIMEDOUT { - return 0, nil, syserror.EAGAIN + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { + return 0, nil, linuxerr.EAGAIN } return 0, nil, err } @@ -96,10 +96,10 @@ func Semop(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall nsops := args[2].SizeT() if nsops <= 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if nsops > opsMax { - return 0, nil, syserror.E2BIG + return 0, nil, linuxerr.E2BIG } ops := make([]linux.Sembuf, nsops) @@ -113,7 +113,7 @@ func semTimedOp(t *kernel.Task, id int32, ops []linux.Sembuf, haveTimeout bool, set := t.IPCNamespace().SemaphoreRegistry().FindByID(id) if set == nil { - return syserror.EINVAL + return linuxerr.EINVAL } creds := auth.CredentialsFromContext(t) pid := t.Kernel().GlobalInit().PIDNamespace().IDOfThreadGroup(t.ThreadGroup()) @@ -139,7 +139,7 @@ func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal case linux.SETVAL: val := args[3].Int() if val > math.MaxInt16 { - return 0, nil, syserror.ERANGE + return 0, nil, linuxerr.ERANGE } return 0, nil, setVal(t, id, num, int16(val)) @@ -232,7 +232,7 @@ func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return uintptr(semid), nil, err default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } @@ -246,17 +246,17 @@ func ipcSet(t *kernel.Task, id int32, uid auth.UID, gid auth.GID, perms fs.FileP r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { - return syserror.EINVAL + return linuxerr.EINVAL } creds := auth.CredentialsFromContext(t) kuid := creds.UserNamespace.MapToKUID(uid) if !kuid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } kgid := creds.UserNamespace.MapToKGID(gid) if !kgid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } owner := fs.FileOwner{UID: kuid, GID: kgid} return set.Change(t, creds, owner, perms) @@ -266,7 +266,7 @@ func ipcStat(t *kernel.Task, id int32) (*linux.SemidDS, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } creds := auth.CredentialsFromContext(t) return set.GetStat(creds) @@ -276,7 +276,7 @@ func semStat(t *kernel.Task, index int32) (int32, *linux.SemidDS, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByIndex(index) if set == nil { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } creds := auth.CredentialsFromContext(t) ds, err := set.GetStat(creds) @@ -289,7 +289,7 @@ func semStat(t *kernel.Task, index int32) (int32, *linux.SemidDS, error) { func semStatAny(t *kernel.Task, index int32) (int32, *linux.SemidDS, error) { set := t.IPCNamespace().SemaphoreRegistry().FindByIndex(index) if set == nil { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } creds := auth.CredentialsFromContext(t) ds, err := set.GetStatAny(creds) @@ -303,7 +303,7 @@ func setVal(t *kernel.Task, id int32, num int32, val int16) error { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { - return syserror.EINVAL + return linuxerr.EINVAL } creds := auth.CredentialsFromContext(t) pid := t.Kernel().GlobalInit().PIDNamespace().IDOfThreadGroup(t.ThreadGroup()) @@ -314,7 +314,7 @@ func setValAll(t *kernel.Task, id int32, array hostarch.Addr) error { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { - return syserror.EINVAL + return linuxerr.EINVAL } vals := make([]uint16, set.Size()) if _, err := primitive.CopyUint16SliceIn(t, array, vals); err != nil { @@ -329,7 +329,7 @@ func getVal(t *kernel.Task, id int32, num int32) (int16, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } creds := auth.CredentialsFromContext(t) return set.GetVal(num, creds) @@ -339,7 +339,7 @@ func getValAll(t *kernel.Task, id int32, array hostarch.Addr) error { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { - return syserror.EINVAL + return linuxerr.EINVAL } creds := auth.CredentialsFromContext(t) vals, err := set.GetValAll(creds) @@ -354,7 +354,7 @@ func getPID(t *kernel.Task, id int32, num int32) (int32, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } creds := auth.CredentialsFromContext(t) gpid, err := set.GetPID(num, creds) @@ -373,7 +373,7 @@ func getZCnt(t *kernel.Task, id int32, num int32) (uint16, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } creds := auth.CredentialsFromContext(t) return set.CountZeroWaiters(num, creds) @@ -383,7 +383,7 @@ func getNCnt(t *kernel.Task, id int32, num int32) (uint16, error) { r := t.IPCNamespace().SemaphoreRegistry() set := r.FindByID(id) if set == nil { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } creds := auth.CredentialsFromContext(t) return set.CountNegativeWaiters(num, creds) diff --git a/pkg/sentry/syscalls/linux/sys_shm.go b/pkg/sentry/syscalls/linux/sys_shm.go index 584064143..3e3a952ce 100644 --- a/pkg/sentry/syscalls/linux/sys_shm.go +++ b/pkg/sentry/syscalls/linux/sys_shm.go @@ -16,10 +16,10 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/shm" - "gvisor.dev/gvisor/pkg/syserror" ) // Shmget implements shmget(2). @@ -51,7 +51,7 @@ func findSegment(t *kernel.Task, id shm.ID) (*shm.Shm, error) { segment := r.FindByID(id) if segment == nil { // No segment with provided id. - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } return segment, nil } @@ -64,7 +64,7 @@ func Shmat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall segment, err := findSegment(t, id) if err != nil { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } defer segment.DecRef(t) @@ -106,7 +106,7 @@ func Shmctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal case linux.IPC_STAT: segment, err := findSegment(t, id) if err != nil { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } defer segment.DecRef(t) @@ -130,7 +130,7 @@ func Shmctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Remaining commands refer to a specific segment. segment, err := findSegment(t, id) if err != nil { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } defer segment.DecRef(t) @@ -155,6 +155,6 @@ func Shmctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, nil default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go index 27a7f7fe1..45608f3fa 100644 --- a/pkg/sentry/syscalls/linux/sys_signal.go +++ b/pkg/sentry/syscalls/linux/sys_signal.go @@ -19,6 +19,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -79,10 +80,10 @@ func Kill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC for { target := t.PIDNamespace().TaskWithID(pid) if target == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } if !mayKill(t, target, sig) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } info := &linux.SignalInfo{ Signo: int32(sig), @@ -90,7 +91,7 @@ func Kill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC } info.SetPID(int32(target.PIDNamespace().IDOfTask(t))) info.SetUID(int32(t.Credentials().RealKUID.In(target.UserNamespace()).OrOverflow())) - if err := target.SendGroupSignal(info); err != syserror.ESRCH { + if err := target.SendGroupSignal(info); !linuxerr.Equals(linuxerr.ESRCH, err) { return 0, nil, err } } @@ -130,7 +131,7 @@ func Kill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC info.SetPID(int32(tg.PIDNamespace().IDOfTask(t))) info.SetUID(int32(t.Credentials().RealKUID.In(tg.Leader().UserNamespace()).OrOverflow())) err := tg.SendSignal(info) - if err == syserror.ESRCH { + if linuxerr.Equals(linuxerr.ESRCH, err) { // ESRCH is ignored because it means the task // exited while we were iterating. This is a // race which would not normally exist on @@ -145,7 +146,7 @@ func Kill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if delivered > 0 { return 0, nil, lastErr } - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH default: // "If pid equals 0, then sig is sent to every process in the process // group of the calling process." @@ -159,11 +160,11 @@ func Kill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // If pid != -1 (i.e. signalling a process group), the returned error // is the last error from any call to group_send_sig_info. - lastErr := syserror.ESRCH + lastErr := error(linuxerr.ESRCH) for _, tg := range t.PIDNamespace().ThreadGroups() { if t.PIDNamespace().IDOfProcessGroup(tg.ProcessGroup()) == pgid { if !mayKill(t, tg.Leader(), sig) { - lastErr = syserror.EPERM + lastErr = linuxerr.EPERM continue } @@ -174,7 +175,7 @@ func Kill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC info.SetPID(int32(tg.PIDNamespace().IDOfTask(t))) info.SetUID(int32(t.Credentials().RealKUID.In(tg.Leader().UserNamespace()).OrOverflow())) // See note above regarding ESRCH race above. - if err := tg.SendSignal(info); err != syserror.ESRCH { + if err := tg.SendSignal(info); !linuxerr.Equals(linuxerr.ESRCH, err) { lastErr = err } } @@ -202,16 +203,16 @@ func Tkill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // N.B. Inconsistent with man page, linux actually rejects calls with // tid <=0 by EINVAL. This isn't the same for all signal calls. if tid <= 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } target := t.PIDNamespace().TaskWithID(tid) if target == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } if !mayKill(t, target, sig) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } return 0, nil, target.SendSignal(tkillSigInfo(t, target, sig)) } @@ -225,17 +226,17 @@ func Tgkill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // N.B. Inconsistent with man page, linux actually rejects calls with // tgid/tid <=0 by EINVAL. This isn't the same for all signal calls. if tgid <= 0 || tid <= 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } targetTG := t.PIDNamespace().ThreadGroupWithID(tgid) target := t.PIDNamespace().TaskWithID(tid) if targetTG == nil || target == nil || target.ThreadGroup() != targetTG { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } if !mayKill(t, target, sig) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } return 0, nil, target.SendSignal(tkillSigInfo(t, target, sig)) } @@ -248,7 +249,7 @@ func RtSigaction(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S sigsetsize := args[3].SizeT() if sigsetsize != linux.SignalSetSize { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } var newactptr *linux.SigAction @@ -291,7 +292,7 @@ func RtSigprocmask(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel sigsetsize := args[3].SizeT() if sigsetsize != linux.SignalSetSize { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } oldmask := t.SignalMask() if setaddr != 0 { @@ -308,7 +309,7 @@ func RtSigprocmask(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel case linux.SIG_SETMASK: t.SetSignalMask(mask) default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } if oldaddr != 0 { @@ -338,7 +339,7 @@ func Sigaltstack(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // these semantics apply to changing the signal stack via a // ucontext during a signal handler. if !t.SetSignalStack(alt) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } } @@ -377,7 +378,7 @@ func RtSigtimedwait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne return 0, nil, err } if !d.Valid() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } timeout = time.Duration(d.ToNsecCapped()) } else { @@ -420,20 +421,20 @@ func RtSigqueueinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne // Deliver to the given task's thread group. target := t.PIDNamespace().TaskWithID(pid) if target == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } // If the sender is not the receiver, it can't use si_codes used by the // kernel or SI_TKILL. if (info.Code >= 0 || info.Code == linux.SI_TKILL) && target != t { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } if !mayKill(t, target, sig) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } - if err := target.SendGroupSignal(&info); err != syserror.ESRCH { + if err := target.SendGroupSignal(&info); !linuxerr.Equals(linuxerr.ESRCH, err) { return 0, nil, err } } @@ -449,7 +450,7 @@ func RtTgsigqueueinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *ker // N.B. Inconsistent with man page, linux actually rejects calls with // tgid/tid <=0 by EINVAL. This isn't the same for all signal calls. if tgid <= 0 || tid <= 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Copy in the info. See RtSigqueueinfo above. @@ -463,17 +464,17 @@ func RtTgsigqueueinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *ker targetTG := t.PIDNamespace().ThreadGroupWithID(tgid) target := t.PIDNamespace().TaskWithID(tid) if targetTG == nil || target == nil || target.ThreadGroup() != targetTG { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } // If the sender is not the receiver, it can't use si_codes used by the // kernel or SI_TKILL. if (info.Code >= 0 || info.Code == linux.SI_TKILL) && target != t { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } if !mayKill(t, target, sig) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } return 0, nil, target.SendSignal(&info) } @@ -524,7 +525,7 @@ func sharedSignalfd(t *kernel.Task, fd int32, sigset hostarch.Addr, sigsetsize u // Always check for valid flags, even if not creating. if flags&^(linux.SFD_NONBLOCK|linux.SFD_CLOEXEC) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Is this a change to an existing signalfd? @@ -533,7 +534,7 @@ func sharedSignalfd(t *kernel.Task, fd int32, sigset hostarch.Addr, sigsetsize u if fd != -1 { file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -544,7 +545,7 @@ func sharedSignalfd(t *kernel.Task, fd int32, sigset hostarch.Addr, sigsetsize u } // Not a signalfd. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Create a new file. diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index e07917613..06eb8f319 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -18,6 +18,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" @@ -117,7 +118,7 @@ type multipleMessageHeader64 struct { // from the untrusted address space range. func CaptureAddress(t *kernel.Task, addr hostarch.Addr, addrlen uint32) ([]byte, error) { if addrlen > maxAddrLen { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } addrBuf := make([]byte, addrlen) @@ -139,7 +140,7 @@ func writeAddress(t *kernel.Task, addr linux.SockAddr, addrLen uint32, addrPtr h } if int32(bufLen) < 0 { - return syserror.EINVAL + return linuxerr.EINVAL } // Write the length unconditionally. @@ -173,7 +174,7 @@ func Socket(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Check and initialize the flags. if stype & ^(0xf|linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Create the new socket. @@ -205,7 +206,7 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Check and initialize the flags. if stype & ^(0xf|linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } fileFlags := fs.SettableFileFlags{ @@ -252,7 +253,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -277,13 +278,13 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr, flags int) (uintptr, error) { // Check that no unsupported flags are passed in. if flags & ^(linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } defer file.DecRef(t) @@ -305,7 +306,7 @@ func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr, if peerRequested { // NOTE(magi): Linux does not give you an error if it can't // write the data back out so neither do we. - if err := writeAddress(t, peer, peerLen, addr, addrLen); err == syserror.EINVAL { + if err := writeAddress(t, peer, peerLen, addr, addrLen); linuxerr.Equals(linuxerr.EINVAL, err) { return 0, err } } @@ -342,7 +343,7 @@ func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -369,7 +370,7 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -407,7 +408,7 @@ func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -421,7 +422,7 @@ func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc switch how { case linux.SHUT_RD, linux.SHUT_WR, linux.SHUT_RDWR: default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 0, nil, s.Shutdown(t, int(how)).ToError() @@ -438,7 +439,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -454,7 +455,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return 0, nil, err } if optLen < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Call syscall implementation then copy both value and value len out. @@ -519,7 +520,7 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -530,10 +531,10 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy } if optLen < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if optLen > maxOptLen { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } buf := t.CopyScratchBuffer(int(optLen)) if _, err := t.CopyInBytes(optValAddr, buf); err != nil { @@ -557,7 +558,7 @@ func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -585,7 +586,7 @@ func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -612,13 +613,13 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if t.Arch().Width() != 8 { // We only handle 64-bit for now. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -630,7 +631,7 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Reject flags that we don't handle yet. if flags & ^(baseRecvFlags|linux.MSG_PEEK|linux.MSG_CMSG_CLOEXEC|linux.MSG_ERRQUEUE) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if file.Flags().NonBlocking { @@ -660,7 +661,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if t.Arch().Width() != 8 { // We only handle 64-bit for now. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if vlen > linux.UIO_MAXIOV { @@ -669,13 +670,13 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Reject flags that we don't handle yet. if flags & ^(baseRecvFlags|linux.MSG_CMSG_CLOEXEC|linux.MSG_ERRQUEUE) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -697,7 +698,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, err } if !ts.Valid() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } deadline = t.Kernel().MonotonicClock().Now().Add(ts.ToDuration()) haveDeadline = true @@ -717,7 +718,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc for i := uint64(0); i < uint64(vlen); i++ { mp, ok := msgPtr.AddLength(i * multipleMessageHeader64Len) if !ok { - return 0, nil, syserror.EFAULT + return 0, nil, linuxerr.EFAULT } var n uintptr if n, err = recvSingleMsg(t, s, mp, flags, haveDeadline, deadline); err != nil { @@ -727,7 +728,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Copy the received length to the caller. lp, ok := mp.AddLength(messageHeader64Len) if !ok { - return 0, nil, syserror.EFAULT + return 0, nil, linuxerr.EFAULT } if _, err = primitive.CopyUint32Out(t, lp, uint32(n)); err != nil { break @@ -749,7 +750,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr hostarch.Addr, flags } if msg.IovLen > linux.UIO_MAXIOV { - return 0, syserror.EMSGSIZE + return 0, linuxerr.EMSGSIZE } dst, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ AddressSpaceActive: true, @@ -780,7 +781,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr hostarch.Addr, flags } if msg.ControlLen > maxControlLen { - return 0, syserror.ENOBUFS + return 0, linuxerr.ENOBUFS } n, mflags, sender, senderLen, cms, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, msg.NameLen != 0, msg.ControlLen) if e != nil { @@ -829,18 +830,18 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr hostarch.Addr, flags // recvfrom and recv syscall handlers. func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLenPtr hostarch.Addr) (uintptr, error) { if int(bufLen) < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Reject flags that we don't handle yet. if flags & ^(baseRecvFlags|linux.MSG_PEEK|linux.MSG_CONFIRM) != 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } defer file.DecRef(t) @@ -907,13 +908,13 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if t.Arch().Width() != 8 { // We only handle 64-bit for now. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -925,7 +926,7 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Reject flags that we don't handle yet. if flags & ^(linux.MSG_DONTWAIT|linux.MSG_EOR|linux.MSG_MORE|linux.MSG_NOSIGNAL) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if file.Flags().NonBlocking { @@ -945,7 +946,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if t.Arch().Width() != 8 { // We only handle 64-bit for now. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if vlen > linux.UIO_MAXIOV { @@ -955,7 +956,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -967,7 +968,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Reject flags that we don't handle yet. if flags & ^(linux.MSG_DONTWAIT|linux.MSG_EOR|linux.MSG_MORE|linux.MSG_NOSIGNAL) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if file.Flags().NonBlocking { @@ -979,7 +980,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc for i := uint64(0); i < uint64(vlen); i++ { mp, ok := msgPtr.AddLength(i * multipleMessageHeader64Len) if !ok { - return 0, nil, syserror.EFAULT + return 0, nil, linuxerr.EFAULT } var n uintptr if n, err = sendSingleMsg(t, s, file, mp, flags); err != nil { @@ -989,7 +990,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Copy the received length to the caller. lp, ok := mp.AddLength(messageHeader64Len) if !ok { - return 0, nil, syserror.EFAULT + return 0, nil, linuxerr.EFAULT } if _, err = primitive.CopyUint32Out(t, lp, uint32(n)); err != nil { break @@ -1014,7 +1015,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr hostar if msg.ControlLen > 0 { // Put an upper bound to prevent large allocations. if msg.ControlLen > maxControlLen { - return 0, syserror.ENOBUFS + return 0, linuxerr.ENOBUFS } controlData = make([]byte, msg.ControlLen) if _, err := t.CopyInBytes(hostarch.Addr(msg.Control), controlData); err != nil { @@ -1034,7 +1035,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr hostar // Read data then call the sendmsg implementation. if msg.IovLen > linux.UIO_MAXIOV { - return 0, syserror.EMSGSIZE + return 0, linuxerr.EMSGSIZE } src, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ AddressSpaceActive: true, @@ -1073,13 +1074,13 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr hostar func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLen uint32) (uintptr, error) { bl := int(bufLen) if bl < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Get socket from the file descriptor. file := t.GetFile(fd) if file == nil { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } defer file.DecRef(t) diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index 134051124..34d87ac1f 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -16,6 +16,7 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -27,7 +28,7 @@ import ( // doSplice implements a blocking splice operation. func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonBlocking bool) (int64, error) { if opts.Length < 0 || opts.SrcStart < 0 || opts.DstStart < 0 || (opts.SrcStart+opts.Length < 0) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if opts.Length == 0 { return 0, nil @@ -105,33 +106,33 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Get files. inFile := t.GetFile(inFD) if inFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer inFile.DecRef(t) if !inFile.Flags().Read { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } outFile := t.GetFile(outFD) if outFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer outFile.DecRef(t) if !outFile.Flags().Write { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Verify that the outfile Append flag is not set. if outFile.Flags().Append { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Verify that we have a regular infile. This is a requirement; the // same check appears in Linux (fs/splice.c:splice_direct_to_actor). if !fs.IsRegular(inFile.Dirent.Inode.StableAttr) { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } var ( @@ -142,7 +143,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Verify that when offset address is not null, infile must be // seekable. The fs.Splice routine itself validates basic read. if !inFile.Flags().Pread { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } // Copy in the offset. @@ -190,19 +191,19 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Check for invalid flags. if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get files. outFile := t.GetFile(outFD) if outFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer outFile.DecRef(t) inFile := t.GetFile(inFD) if inFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer inFile.DecRef(t) @@ -226,11 +227,11 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal switch { case fs.IsPipe(inFileAttr) && !fs.IsPipe(outFileAttr): if inOffset != 0 { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } if outOffset != 0 { if !outFile.Flags().Pwrite { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } var offset int64 @@ -244,11 +245,11 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal } case !fs.IsPipe(inFileAttr) && fs.IsPipe(outFileAttr): if outOffset != 0 { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } if inOffset != 0 { if !inFile.Flags().Pread { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } var offset int64 @@ -262,15 +263,15 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal } case fs.IsPipe(inFileAttr) && fs.IsPipe(outFileAttr): if inOffset != 0 || outOffset != 0 { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } // We may not refer to the same pipe; otherwise it's a continuous loop. if inFileAttr.InodeID == outFileAttr.InodeID { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Splice data. @@ -298,30 +299,30 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo // Check for invalid flags. if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get files. outFile := t.GetFile(outFD) if outFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer outFile.DecRef(t) inFile := t.GetFile(inFD) if inFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer inFile.DecRef(t) // All files must be pipes. if !fs.IsPipe(inFile.Dirent.Inode.StableAttr) || !fs.IsPipe(outFile.Dirent.Inode.StableAttr) { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // We may not refer to the same pipe; see above. if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // The operation is non-blocking if anything is non-blocking. diff --git a/pkg/sentry/syscalls/linux/sys_stat.go b/pkg/sentry/syscalls/linux/sys_stat.go index 2338ba44b..3da385c66 100644 --- a/pkg/sentry/syscalls/linux/sys_stat.go +++ b/pkg/sentry/syscalls/linux/sys_stat.go @@ -16,11 +16,11 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // LINT.IfChange @@ -56,7 +56,7 @@ func Fstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Annoying. What's wrong with fstat? file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -98,7 +98,7 @@ func Fstat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -108,7 +108,7 @@ func Fstat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // stat implements stat from the given *fs.Dirent. func stat(t *kernel.Task, d *fs.Dirent, dirPath bool, statAddr hostarch.Addr) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } uattr, err := d.Inode.UnstableAttr(t) if err != nil { @@ -139,13 +139,13 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall statxAddr := args[4].Pointer() if mask&linux.STATX__RESERVED != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if flags&^(linux.AT_SYMLINK_NOFOLLOW|linux.AT_EMPTY_PATH|linux.AT_STATX_SYNC_TYPE) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if flags&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } path, dirPath, err := copyInPath(t, pathAddr, flags&linux.AT_EMPTY_PATH != 0) @@ -156,7 +156,7 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if path == "" { file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) uattr, err := file.UnstableAttr(t) @@ -170,7 +170,7 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } uattr, err := d.Inode.UnstableAttr(t) if err != nil { @@ -247,7 +247,7 @@ func Fstatfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) diff --git a/pkg/sentry/syscalls/linux/sys_stat_amd64.go b/pkg/sentry/syscalls/linux/sys_stat_amd64.go index 0a04a6113..e38066ea8 100644 --- a/pkg/sentry/syscalls/linux/sys_stat_amd64.go +++ b/pkg/sentry/syscalls/linux/sys_stat_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package linux diff --git a/pkg/sentry/syscalls/linux/sys_stat_arm64.go b/pkg/sentry/syscalls/linux/sys_stat_arm64.go index 5a3b1bfad..b2ea390c5 100644 --- a/pkg/sentry/syscalls/linux/sys_stat_arm64.go +++ b/pkg/sentry/syscalls/linux/sys_stat_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package linux diff --git a/pkg/sentry/syscalls/linux/sys_sync.go b/pkg/sentry/syscalls/linux/sys_sync.go index 5ebd4461f..6278bef21 100644 --- a/pkg/sentry/syscalls/linux/sys_sync.go +++ b/pkg/sentry/syscalls/linux/sys_sync.go @@ -16,6 +16,7 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -37,7 +38,7 @@ func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -52,7 +53,7 @@ func Fsync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -68,7 +69,7 @@ func Fdatasync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -86,13 +87,13 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel uflags := args[3].Uint() if offset < 0 || offset+nbytes < offset { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if uflags&^(linux.SYNC_FILE_RANGE_WAIT_BEFORE| linux.SYNC_FILE_RANGE_WRITE| linux.SYNC_FILE_RANGE_WAIT_AFTER) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if nbytes == 0 { @@ -101,7 +102,7 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) diff --git a/pkg/sentry/syscalls/linux/sys_syslog.go b/pkg/sentry/syscalls/linux/sys_syslog.go index 40c8bb061..ba372f9e3 100644 --- a/pkg/sentry/syscalls/linux/sys_syslog.go +++ b/pkg/sentry/syscalls/linux/sys_syslog.go @@ -15,6 +15,7 @@ package linux import ( + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" @@ -40,7 +41,7 @@ func Syslog(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal switch command { case _SYSLOG_ACTION_READ_ALL: if size < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if size > logBufLen { size = logBufLen diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index 0d5056303..46145955e 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -17,8 +17,8 @@ package linux import ( "path" - "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -112,7 +112,7 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr host } if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } atEmptyPath := flags&linux.AT_EMPTY_PATH != 0 if !atEmptyPath && len(pathname) == 0 { @@ -135,7 +135,7 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr host // Need to extract the given FD. f, fdFlags := t.FDTable().Get(dirFD) if f == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer f.DecRef(t) closeOnExec = fdFlags.CloseOnExec @@ -154,7 +154,7 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr host wd = f.Dirent wd.IncRef() if !fs.IsDir(wd.Inode.StableAttr) { - return 0, nil, syserror.ENOTDIR + return 0, nil, linuxerr.ENOTDIR } } } @@ -187,15 +187,15 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr host // Exit implements linux syscall exit(2). func Exit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - status := int(args[0].Int()) - t.PrepareExit(kernel.ExitStatus{Code: status}) + status := args[0].Int() + t.PrepareExit(linux.WaitStatusExit(status & 0xff)) return 0, kernel.CtrlDoExit, nil } // ExitGroup implements linux syscall exit_group(2). func ExitGroup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - status := int(args[0].Int()) - t.PrepareGroupExit(kernel.ExitStatus{Code: status}) + status := args[0].Int() + t.PrepareGroupExit(linux.WaitStatusExit(status & 0xff)) return 0, kernel.CtrlDoExit, nil } @@ -260,7 +260,7 @@ func parseCommonWaitOptions(wopts *kernel.WaitOptions, options int) error { wopts.NonCloneTasks = true wopts.CloneTasks = true default: - return syserror.EINVAL + return linuxerr.EINVAL } if options&linux.WCONTINUED != 0 { wopts.Events |= kernel.EventGroupContinue @@ -277,7 +277,7 @@ func parseCommonWaitOptions(wopts *kernel.WaitOptions, options int) error { // wait4 waits for the given child process to exit. func wait4(t *kernel.Task, pid int, statusAddr hostarch.Addr, options int, rusageAddr hostarch.Addr) (uintptr, error) { if options&^(linux.WNOHANG|linux.WUNTRACED|linux.WCONTINUED|linux.WNOTHREAD|linux.WALL|linux.WCLONE) != 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } wopts := kernel.WaitOptions{ Events: kernel.EventExit | kernel.EventTraceeStop, @@ -315,7 +315,7 @@ func wait4(t *kernel.Task, pid int, statusAddr hostarch.Addr, options int, rusag return 0, err } if statusAddr != 0 { - if _, err := primitive.CopyUint32Out(t, statusAddr, wr.Status); err != nil { + if _, err := primitive.CopyUint32Out(t, statusAddr, uint32(wr.Status)); err != nil { return 0, err } } @@ -358,10 +358,10 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal rusageAddr := args[4].Pointer() if options&^(linux.WNOHANG|linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED|linux.WNOWAIT|linux.WNOTHREAD|linux.WALL|linux.WCLONE) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if options&(linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED) == 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } wopts := kernel.WaitOptions{ Events: kernel.EventTraceeStop, @@ -374,7 +374,7 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal case linux.P_PGID: wopts.SpecificPGID = kernel.ProcessGroupID(id) default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if err := parseCommonWaitOptions(&wopts, options); err != nil { @@ -418,23 +418,22 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal } si.SetPID(int32(wr.TID)) si.SetUID(int32(wr.UID)) - // TODO(b/73541790): convert kernel.ExitStatus to functions and make - // WaitResult.Status a linux.WaitStatus. - s := unix.WaitStatus(wr.Status) + s := wr.Status switch { case s.Exited(): si.Code = linux.CLD_EXITED si.SetStatus(int32(s.ExitStatus())) case s.Signaled(): - si.Code = linux.CLD_KILLED - si.SetStatus(int32(s.Signal())) - case s.CoreDump(): - si.Code = linux.CLD_DUMPED - si.SetStatus(int32(s.Signal())) + if s.CoreDumped() { + si.Code = linux.CLD_DUMPED + } else { + si.Code = linux.CLD_KILLED + } + si.SetStatus(int32(s.TerminationSignal())) case s.Stopped(): if wr.Event == kernel.EventTraceeStop { si.Code = linux.CLD_TRAPPED - si.SetStatus(int32(s.TrapCause())) + si.SetStatus(int32(s.PtraceEvent())) } else { si.Code = linux.CLD_STOPPED si.SetStatus(int32(s.StopSignal())) @@ -504,7 +503,7 @@ func SchedSetaffinity(t *kernel.Task, args arch.SyscallArguments) (uintptr, *ker } else { task = t.PIDNamespace().TaskWithID(kernel.ThreadID(tid)) if task == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } } @@ -528,7 +527,7 @@ func SchedGetaffinity(t *kernel.Task, args arch.SyscallArguments) (uintptr, *ker // in an array of "unsigned long" so the buffer needs to // be a multiple of the word size. if size&(t.Arch().Width()-1) > 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } var task *kernel.Task @@ -537,7 +536,7 @@ func SchedGetaffinity(t *kernel.Task, args arch.SyscallArguments) (uintptr, *ker } else { task = t.PIDNamespace().TaskWithID(kernel.ThreadID(tid)) if task == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } } @@ -545,7 +544,7 @@ func SchedGetaffinity(t *kernel.Task, args arch.SyscallArguments) (uintptr, *ker // The buffer needs to be big enough to hold a cpumask with // all possible cpus. if size < mask.Size() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } _, err := t.CopyOutBytes(maskAddr, mask) @@ -590,16 +589,16 @@ func Setpgid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if pid != 0 { ot := t.PIDNamespace().TaskWithID(pid) if ot == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } tg = ot.ThreadGroup() if tg.Leader() != ot { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Setpgid only operates on child threadgroups. if tg != t.ThreadGroup() && (tg.Leader().Parent() == nil || tg.Leader().Parent().ThreadGroup() != t.ThreadGroup()) { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } } @@ -609,7 +608,7 @@ func Setpgid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if pgid == 0 { pgid = defaultPGID } else if pgid < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // If the pgid is the same as the group, then create a new one. Otherwise, @@ -654,7 +653,7 @@ func Getpgid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca target := t.PIDNamespace().TaskWithID(tid) if target == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } return uintptr(t.PIDNamespace().IDOfProcessGroup(target.ThreadGroup().ProcessGroup())), nil, nil @@ -674,7 +673,7 @@ func Getsid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal target := t.PIDNamespace().TaskWithID(tid) if target == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } return uintptr(t.PIDNamespace().IDOfSession(target.ThreadGroup().Session())), nil, nil @@ -698,7 +697,7 @@ func Getpriority(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S } if task == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } // From kernel/sys.c:getpriority: @@ -712,7 +711,7 @@ func Getpriority(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // PRIO_USER and PRIO_PGRP have no further implementation yet. return 0, nil, nil default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } @@ -744,7 +743,7 @@ func Setpriority(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S } if task == nil { - return 0, nil, syserror.ESRCH + return 0, nil, linuxerr.ESRCH } task.SetNiceness(niceval) @@ -754,7 +753,7 @@ func Setpriority(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // PRIO_USER and PRIO_PGRP have no further implementation yet. return 0, nil, nil default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 0, nil, nil diff --git a/pkg/sentry/syscalls/linux/sys_time.go b/pkg/sentry/syscalls/linux/sys_time.go index 5c3b3dee2..674e74f82 100644 --- a/pkg/sentry/syscalls/linux/sys_time.go +++ b/pkg/sentry/syscalls/linux/sys_time.go @@ -19,6 +19,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -75,7 +76,7 @@ func ClockGetres(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S } if _, err := getClock(t, clockID); err != nil { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if addr == 0 { @@ -94,12 +95,12 @@ type cpuClocker interface { func getClock(t *kernel.Task, clockID int32) (ktime.Clock, error) { if clockID < 0 { if !isValidCPUClock(clockID) { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } targetTask := targetTask(t, clockID) if targetTask == nil { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } var target cpuClocker @@ -116,7 +117,7 @@ func getClock(t *kernel.Task, clockID int32) (ktime.Clock, error) { // CPUCLOCK_SCHED is approximated by CPUCLOCK_PROF. return target.CPUClock(), nil default: - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } } @@ -138,7 +139,7 @@ func getClock(t *kernel.Task, clockID int32) (ktime.Clock, error) { case linux.CLOCK_THREAD_CPUTIME_ID: return t.CPUClock(), nil default: - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } } @@ -157,7 +158,7 @@ func ClockGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. // ClockSettime implements linux syscall clock_settime(2). func ClockSettime(*kernel.Task, arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } // Time implements linux syscall time(2). @@ -209,11 +210,11 @@ func clockNanosleepUntil(t *kernel.Task, c ktime.Clock, end ktime.Time, rem host timer.Destroy() - switch err { - case syserror.ETIMEDOUT: + switch { + case linuxerr.Equals(linuxerr.ETIMEDOUT, err): // Slept for entire timeout. return nil - case syserror.ErrInterrupted: + case err == syserror.ErrInterrupted: // Interrupted. remaining := end.Sub(c.Now()) if remaining <= 0 { @@ -253,7 +254,7 @@ func Nanosleep(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys } if !ts.Valid() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Just like linux, we cap the timeout with the max number that int64 can @@ -276,7 +277,7 @@ func ClockNanosleep(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne } if !req.Valid() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Only allow clock constants also allowed by Linux. @@ -284,7 +285,7 @@ func ClockNanosleep(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne if clockID != linux.CLOCK_REALTIME && clockID != linux.CLOCK_MONOTONIC && clockID != linux.CLOCK_PROCESS_CPUTIME_ID { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } diff --git a/pkg/sentry/syscalls/linux/sys_timerfd.go b/pkg/sentry/syscalls/linux/sys_timerfd.go index cadd9d348..4eeb94231 100644 --- a/pkg/sentry/syscalls/linux/sys_timerfd.go +++ b/pkg/sentry/syscalls/linux/sys_timerfd.go @@ -16,12 +16,12 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/timerfd" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/syserror" ) // TimerfdCreate implements Linux syscall timerfd_create(2). @@ -30,7 +30,7 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel flags := args[1].Int() if flags&^(linux.TFD_CLOEXEC|linux.TFD_NONBLOCK) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } var c ktime.Clock @@ -40,7 +40,7 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel case linux.CLOCK_MONOTONIC, linux.CLOCK_BOOTTIME: c = t.Kernel().MonotonicClock() default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } f := timerfd.NewFile(t, c) defer f.DecRef(t) @@ -66,18 +66,18 @@ func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne oldValAddr := args[3].Pointer() if flags&^(linux.TFD_TIMER_ABSTIME) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } f := t.GetFile(fd) if f == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer f.DecRef(t) tf, ok := f.FileOperations.(*timerfd.TimerOperations) if !ok { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } var newVal linux.Itimerspec @@ -105,13 +105,13 @@ func TimerfdGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne f := t.GetFile(fd) if f == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer f.DecRef(t) tf, ok := f.FileOperations.(*timerfd.TimerOperations) if !ok { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } tm, s := tf.GetTime() diff --git a/pkg/sentry/syscalls/linux/sys_tls_amd64.go b/pkg/sentry/syscalls/linux/sys_tls_amd64.go index 6ddd30d5c..8c6cd7511 100644 --- a/pkg/sentry/syscalls/linux/sys_tls_amd64.go +++ b/pkg/sentry/syscalls/linux/sys_tls_amd64.go @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -//+build amd64 +//go:build amd64 +// +build amd64 package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -42,13 +44,13 @@ func ArchPrctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys case linux.ARCH_SET_FS: fsbase := args[1].Uint64() if !t.Arch().SetTLS(uintptr(fsbase)) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } case linux.ARCH_GET_GS, linux.ARCH_SET_GS: t.Kernel().EmitUnimplementedEvent(t) fallthrough default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 0, nil, nil diff --git a/pkg/sentry/syscalls/linux/sys_tls_arm64.go b/pkg/sentry/syscalls/linux/sys_tls_arm64.go index fb08a356e..ff4ac4d6d 100644 --- a/pkg/sentry/syscalls/linux/sys_tls_arm64.go +++ b/pkg/sentry/syscalls/linux/sys_tls_arm64.go @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//+build arm64 +//go:build arm64 +// +build arm64 package linux diff --git a/pkg/sentry/syscalls/linux/sys_utsname.go b/pkg/sentry/syscalls/linux/sys_utsname.go index 66c5974f5..4e945d2c0 100644 --- a/pkg/sentry/syscalls/linux/sys_utsname.go +++ b/pkg/sentry/syscalls/linux/sys_utsname.go @@ -16,9 +16,9 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // Uname implements linux syscall uname. @@ -57,10 +57,10 @@ func Setdomainname(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel utsns := t.UTSNamespace() if !t.HasCapabilityIn(linux.CAP_SYS_ADMIN, utsns.UserNamespace()) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } if size < 0 || size > linux.UTSLen { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } name, err := t.CopyInString(nameAddr, int(size)) @@ -79,10 +79,10 @@ func Sethostname(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S utsns := t.UTSNamespace() if !t.HasCapabilityIn(linux.CAP_SYS_ADMIN, utsns.UserNamespace()) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } if size < 0 || size > linux.UTSLen { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } name := make([]byte, size) diff --git a/pkg/sentry/syscalls/linux/sys_write.go b/pkg/sentry/syscalls/linux/sys_write.go index 95bfe6606..872168606 100644 --- a/pkg/sentry/syscalls/linux/sys_write.go +++ b/pkg/sentry/syscalls/linux/sys_write.go @@ -18,6 +18,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -46,19 +47,19 @@ func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the file is writable. if !file.Flags().Write { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Check that the size is legitimate. si := int(size) if si < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get the source of the write. @@ -83,29 +84,29 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the offset is legitimate and does not overflow. if offset < 0 || offset+int64(size) < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Is writing at an offset supported? if !file.Flags().Pwrite { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } // Check that the file is writable. if !file.Flags().Write { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Check that the size is legitimate. si := int(size) if si < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get the source of the write. @@ -129,13 +130,13 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the file is writable. if !file.Flags().Write { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Read the iovecs that specify the source of the write. @@ -160,23 +161,23 @@ func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the offset is legitimate. if offset < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Is writing at an offset supported? if !file.Flags().Pwrite { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } // Check that the file is writable. if !file.Flags().Write { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Read the iovecs that specify the source of the write. @@ -208,34 +209,34 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc flags := int(args[5].Int()) if int(args[4].Int())&0x4 == 1 { - return 0, nil, syserror.EACCES + return 0, nil, linuxerr.EACCES } file := t.GetFile(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the offset is legitimate. if offset < -1 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Is writing at an offset supported? if offset > -1 && !file.Flags().Pwrite { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } // Note: gVisor does not implement the RWF_HIPRI feature, but the flag is // accepted as a valid flag argument for pwritev2. if flags&^linux.RWF_VALID != 0 { - return uintptr(flags), nil, syserror.EOPNOTSUPP + return uintptr(flags), nil, linuxerr.EOPNOTSUPP } // Check that the file is writeable. if !file.Flags().Write { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Read the iovecs that specify the source of the write. @@ -301,7 +302,7 @@ func writev(t *kernel.Task, f *fs.File, src usermem.IOSequence) (int64, error) { // Wait for a notification that we should retry. if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { err = syserror.ErrWouldBlock } break diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go index 28ad6a60e..baaf31191 100644 --- a/pkg/sentry/syscalls/linux/sys_xattr.go +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -18,11 +18,11 @@ import ( "strings" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // LINT.IfChange @@ -47,7 +47,7 @@ func FGetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH. f := t.GetFile(fd) if f == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer f.DecRef(t) @@ -73,7 +73,7 @@ func getXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink n := 0 err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } n, err = getXattr(t, d, nameAddr, valueAddr, size) @@ -99,7 +99,7 @@ func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr hostarch.Addr, s // TODO(b/148380782): Support xattrs in namespaces other than "user". if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } // If getxattr(2) is called with size 0, the size of the value will be @@ -116,7 +116,7 @@ func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr hostarch.Addr, s } n := len(value) if uint64(n) > requestedSize { - return 0, syserror.ERANGE + return 0, linuxerr.ERANGE } // Don't copy out the attribute value if size is 0. @@ -151,7 +151,7 @@ func FSetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH. f := t.GetFile(fd) if f == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer f.DecRef(t) @@ -172,7 +172,7 @@ func setXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } return setXattr(t, d, nameAddr, valueAddr, uint64(size), flags) @@ -182,7 +182,7 @@ func setXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink // setXattr implements setxattr(2) from the given *fs.Dirent. func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr hostarch.Addr, size uint64, flags uint32) error { if flags&^(linux.XATTR_CREATE|linux.XATTR_REPLACE) != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } name, err := copyInXattrName(t, nameAddr) @@ -195,7 +195,7 @@ func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr hostarch.Addr, s } if size > linux.XATTR_SIZE_MAX { - return syserror.E2BIG + return linuxerr.E2BIG } buf := make([]byte, size) if _, err := t.CopyInBytes(valueAddr, buf); err != nil { @@ -204,7 +204,7 @@ func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr hostarch.Addr, s value := string(buf) if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { - return syserror.EOPNOTSUPP + return linuxerr.EOPNOTSUPP } if err := d.Inode.SetXattr(t, d, name, value, flags); err != nil { @@ -217,13 +217,13 @@ func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr hostarch.Addr, s func copyInXattrName(t *kernel.Task, nameAddr hostarch.Addr) (string, error) { name, err := t.CopyInString(nameAddr, linux.XATTR_NAME_MAX+1) if err != nil { - if err == syserror.ENAMETOOLONG { - return "", syserror.ERANGE + if linuxerr.Equals(linuxerr.ENAMETOOLONG, err) { + return "", linuxerr.ERANGE } return "", err } if len(name) == 0 { - return "", syserror.ERANGE + return "", linuxerr.ERANGE } return name, nil } @@ -241,9 +241,9 @@ func checkXattrPermissions(t *kernel.Task, i *fs.Inode, perms fs.PermMask) error // Restrict xattrs to regular files and directories. if !xattrFileTypeOk(i) { if perms.Write { - return syserror.EPERM + return linuxerr.EPERM } - return syserror.ENODATA + return linuxerr.ENODATA } return i.CheckPermission(t, perms) @@ -268,7 +268,7 @@ func FListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH. f := t.GetFile(fd) if f == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer f.DecRef(t) @@ -293,7 +293,7 @@ func listXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlin n := 0 err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } n, err = listXattr(t, d, listAddr, size) @@ -333,10 +333,10 @@ func listXattr(t *kernel.Task, d *fs.Dirent, addr hostarch.Addr, size uint64) (i listSize := xattrListSize(xattrs) if listSize > linux.XATTR_SIZE_MAX { - return 0, syserror.E2BIG + return 0, linuxerr.E2BIG } if uint64(listSize) > requestedSize { - return 0, syserror.ERANGE + return 0, linuxerr.ERANGE } // Don't copy out the attributes if size is 0. @@ -382,7 +382,7 @@ func FRemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH. f := t.GetFile(fd) if f == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer f.DecRef(t) @@ -400,7 +400,7 @@ func removeXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSyml return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } return removeXattr(t, d, nameAddr) @@ -419,7 +419,7 @@ func removeXattr(t *kernel.Task, d *fs.Dirent, nameAddr hostarch.Addr) error { } if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { - return syserror.EOPNOTSUPP + return linuxerr.EOPNOTSUPP } if err := d.Inode.RemoveXattr(t, d, name); err != nil { diff --git a/pkg/sentry/syscalls/linux/timespec.go b/pkg/sentry/syscalls/linux/timespec.go index 3edc922eb..b327e27d6 100644 --- a/pkg/sentry/syscalls/linux/timespec.go +++ b/pkg/sentry/syscalls/linux/timespec.go @@ -18,6 +18,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" @@ -103,7 +104,7 @@ func copyTimespecInToDuration(t *kernel.Task, timespecAddr hostarch.Addr) (time. return 0, err } if !timespec.Valid() { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } timeout = time.Duration(timespec.ToNsecCapped()) } diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index 5ce0bc714..a73f096ff 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -41,6 +41,7 @@ go_library( "//pkg/abi/linux", "//pkg/bits", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fspath", "//pkg/gohacks", "//pkg/hostarch", diff --git a/pkg/sentry/syscalls/linux/vfs2/aio.go b/pkg/sentry/syscalls/linux/vfs2/aio.go index fd1863ef3..a8fa86cdc 100644 --- a/pkg/sentry/syscalls/linux/vfs2/aio.go +++ b/pkg/sentry/syscalls/linux/vfs2/aio.go @@ -17,6 +17,8 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/eventfd" @@ -26,8 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" - - "gvisor.dev/gvisor/pkg/hostarch" ) // IoSubmit implements linux syscall io_submit(2). @@ -37,7 +37,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc addr := args[2].Pointer() if nrEvents < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } for i := int32(0); i < nrEvents; i++ { @@ -90,12 +90,12 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // submitCallback processes a single callback. func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr hostarch.Addr) error { if cb.Reserved2 != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } fd := t.GetFileVFS2(cb.FD) if fd == nil { - return syserror.EBADF + return linuxerr.EBADF } defer fd.DecRef(t) @@ -104,13 +104,13 @@ func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr host if cb.Flags&linux.IOCB_FLAG_RESFD != 0 { eventFD = t.GetFileVFS2(cb.ResFD) if eventFD == nil { - return syserror.EBADF + return linuxerr.EBADF } defer eventFD.DecRef(t) // Check that it is an eventfd. if _, ok := eventFD.Impl().(*eventfd.EventFileDescription); !ok { - return syserror.EINVAL + return linuxerr.EINVAL } } @@ -123,14 +123,14 @@ func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr host switch cb.OpCode { case linux.IOCB_CMD_PREAD, linux.IOCB_CMD_PREADV, linux.IOCB_CMD_PWRITE, linux.IOCB_CMD_PWRITEV: if cb.Offset < 0 { - return syserror.EINVAL + return linuxerr.EINVAL } } // Prepare the request. aioCtx, ok := t.MemoryManager().LookupAIOContext(t, id) if !ok { - return syserror.EINVAL + return linuxerr.EINVAL } if err := aioCtx.Prepare(); err != nil { return err @@ -200,7 +200,7 @@ func memoryFor(t *kernel.Task, cb *linux.IOCallback) (usermem.IOSequence, error) bytes := int(cb.Bytes) if bytes < 0 { // Linux also requires that this field fit in ssize_t. - return usermem.IOSequence{}, syserror.EINVAL + return usermem.IOSequence{}, linuxerr.EINVAL } // Since this I/O will be asynchronous with respect to t's task goroutine, @@ -222,6 +222,6 @@ func memoryFor(t *kernel.Task, cb *linux.IOCallback) (usermem.IOSequence, error) default: // Not a supported command. - return usermem.IOSequence{}, syserror.EINVAL + return usermem.IOSequence{}, linuxerr.EINVAL } } diff --git a/pkg/sentry/syscalls/linux/vfs2/epoll.go b/pkg/sentry/syscalls/linux/vfs2/epoll.go index 047d955b6..84010db77 100644 --- a/pkg/sentry/syscalls/linux/vfs2/epoll.go +++ b/pkg/sentry/syscalls/linux/vfs2/epoll.go @@ -19,12 +19,12 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -34,7 +34,7 @@ var sizeofEpollEvent = (*linux.EpollEvent)(nil).SizeBytes() func EpollCreate1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { flags := args[0].Int() if flags&^linux.EPOLL_CLOEXEC != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } file, err := t.Kernel().VFS().NewEpollInstanceFD(t) @@ -59,7 +59,7 @@ func EpollCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // "Since Linux 2.6.8, the size argument is ignored, but must be greater // than zero" - epoll_create(2) if size <= 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } file, err := t.Kernel().VFS().NewEpollInstanceFD(t) @@ -84,20 +84,20 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc epfile := t.GetFileVFS2(epfd) if epfile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer epfile.DecRef(t) ep, ok := epfile.Impl().(*vfs.EpollInstance) if !ok { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) if epfile == file { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } var event linux.EpollEvent @@ -115,24 +115,24 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } return 0, nil, ep.ModifyInterest(file, fd, event) default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } func waitEpoll(t *kernel.Task, epfd int32, eventsAddr hostarch.Addr, maxEvents int, timeoutInNanos int64) (uintptr, *kernel.SyscallControl, error) { var _EP_MAX_EVENTS = math.MaxInt32 / sizeofEpollEvent // Linux: fs/eventpoll.c:EP_MAX_EVENTS if maxEvents <= 0 || maxEvents > _EP_MAX_EVENTS { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } epfile := t.GetFileVFS2(epfd) if epfile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer epfile.DecRef(t) ep, ok := epfile.Impl().(*vfs.EpollInstance) if !ok { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Allocate space for a few events on the stack for the common case in @@ -174,7 +174,7 @@ func waitEpoll(t *kernel.Task, epfd int32, eventsAddr hostarch.Addr, maxEvents i haveDeadline = true } if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { err = nil } return 0, nil, err diff --git a/pkg/sentry/syscalls/linux/vfs2/eventfd.go b/pkg/sentry/syscalls/linux/vfs2/eventfd.go index 807f909da..0dcf1fbff 100644 --- a/pkg/sentry/syscalls/linux/vfs2/eventfd.go +++ b/pkg/sentry/syscalls/linux/vfs2/eventfd.go @@ -16,10 +16,10 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/eventfd" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // Eventfd2 implements linux syscall eventfd2(2). @@ -29,7 +29,7 @@ func Eventfd2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc allOps := uint(linux.EFD_SEMAPHORE | linux.EFD_NONBLOCK | linux.EFD_CLOEXEC) if flags & ^allOps != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } vfsObj := t.Kernel().VFS() diff --git a/pkg/sentry/syscalls/linux/vfs2/execve.go b/pkg/sentry/syscalls/linux/vfs2/execve.go index 3315398a4..38818c175 100644 --- a/pkg/sentry/syscalls/linux/vfs2/execve.go +++ b/pkg/sentry/syscalls/linux/vfs2/execve.go @@ -16,7 +16,9 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -24,8 +26,6 @@ import ( slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/hostarch" ) // Execve implements linux syscall execve(2). @@ -48,7 +48,7 @@ func Execveat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc func execveat(t *kernel.Task, dirfd int32, pathnameAddr, argvAddr, envvAddr hostarch.Addr, flags int32) (uintptr, *kernel.SyscallControl, error) { if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } pathname, err := t.CopyInString(pathnameAddr, linux.PATH_MAX) @@ -87,7 +87,7 @@ func execveat(t *kernel.Task, dirfd int32, pathnameAddr, argvAddr, envvAddr host } dirfile, dirfileFlags := t.FDTable().GetVFS2(dirfd) if dirfile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } start := dirfile.VirtualDentry() start.IncRef() diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index 1a31898e8..2cfb12cad 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -16,6 +16,7 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" @@ -36,7 +37,7 @@ func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // (and other reference-holding operations complete). _, file := t.FDTable().Remove(t, fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -50,13 +51,13 @@ func Dup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) newFD, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{}) if err != nil { - return 0, nil, syserror.EMFILE + return 0, nil, linuxerr.EMFILE } return uintptr(newFD), nil, nil } @@ -70,7 +71,7 @@ func Dup2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // As long as oldfd is valid, dup2() does nothing and returns newfd. file := t.GetFileVFS2(oldfd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } file.DecRef(t) return uintptr(newfd), nil, nil @@ -86,7 +87,7 @@ func Dup3(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC flags := args[2].Uint() if oldfd == newfd { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return dup3(t, oldfd, newfd, flags) @@ -94,12 +95,12 @@ func Dup3(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC func dup3(t *kernel.Task, oldfd, newfd int32, flags uint32) (uintptr, *kernel.SyscallControl, error) { if flags&^linux.O_CLOEXEC != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } file := t.GetFileVFS2(oldfd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -119,7 +120,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file, flags := t.FDTable().GetVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -128,7 +129,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC, linux.F_GETFD, linux.F_SETFD, linux.F_GETFL: // allowed default: - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } } @@ -169,7 +170,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if who < 0 { // Check for overflow before flipping the sign. if who-1 > who { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } ownerType = linux.F_OWNER_PGRP who = -who @@ -192,7 +193,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.F_SETPIPE_SZ: pipefile, ok := file.Impl().(*pipe.VFSPipeFD) if !ok { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } n, err := pipefile.SetPipeSize(int64(args[2].Int())) if err != nil { @@ -202,7 +203,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.F_GETPIPE_SZ: pipefile, ok := file.Impl().(*pipe.VFSPipeFD) if !ok { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } return uintptr(pipefile.PipeSize()), nil, nil case linux.F_GET_SEALS: @@ -210,7 +211,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return uintptr(val), nil, err case linux.F_ADD_SEALS: if !file.IsWritable() { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } err := tmpfs.AddSeals(file, args[2].Uint()) return 0, nil, err @@ -232,7 +233,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, a.SetSignal(linux.Signal(args[2].Int())) default: // Everything else is not yet supported. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } @@ -269,7 +270,7 @@ func setAsyncOwner(t *kernel.Task, fd int, file *vfs.FileDescription, ownerType, case linux.F_OWNER_TID, linux.F_OWNER_PID, linux.F_OWNER_PGRP: // Acceptable type. default: - return syserror.EINVAL + return linuxerr.EINVAL } a := file.SetAsyncHandler(fasync.NewVFS2(fd)).(*fasync.FileAsync) @@ -282,26 +283,26 @@ func setAsyncOwner(t *kernel.Task, fd int, file *vfs.FileDescription, ownerType, case linux.F_OWNER_TID: task := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) if task == nil { - return syserror.ESRCH + return linuxerr.ESRCH } a.SetOwnerTask(t, task) return nil case linux.F_OWNER_PID: tg := t.PIDNamespace().ThreadGroupWithID(kernel.ThreadID(pid)) if tg == nil { - return syserror.ESRCH + return linuxerr.ESRCH } a.SetOwnerThreadGroup(t, tg) return nil case linux.F_OWNER_PGRP: pg := t.PIDNamespace().ProcessGroupWithID(kernel.ProcessGroupID(pid)) if pg == nil { - return syserror.ESRCH + return linuxerr.ESRCH } a.SetOwnerProcessGroup(t, pg) return nil default: - return syserror.EINVAL + return linuxerr.EINVAL } } @@ -319,7 +320,7 @@ func posixTestLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDes case linux.F_WRLCK: typ = lock.WriteLock default: - return syserror.EINVAL + return linuxerr.EINVAL } r, err := file.ComputeLockRange(t, uint64(flock.Start), uint64(flock.Len), flock.Whence) if err != nil { @@ -368,13 +369,13 @@ func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescrip switch flock.Type { case linux.F_RDLCK: if !file.IsReadable() { - return syserror.EBADF + return linuxerr.EBADF } return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.ReadLock, r, blocker) case linux.F_WRLCK: if !file.IsWritable() { - return syserror.EBADF + return linuxerr.EBADF } return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.WriteLock, r, blocker) @@ -382,7 +383,7 @@ func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescrip return file.UnlockPOSIX(t, t.FDTable(), r) default: - return syserror.EINVAL + return linuxerr.EINVAL } } @@ -395,22 +396,22 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // Note: offset is allowed to be negative. if length < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) if file.StatusFlags()&linux.O_PATH != 0 { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // If the FD refers to a pipe or FIFO, return error. if _, isPipe := file.Impl().(*pipe.VFSPipeFD); isPipe { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } switch advice { @@ -421,7 +422,7 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys case linux.POSIX_FADV_DONTNEED: case linux.POSIX_FADV_NOREUSE: default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Sure, whatever. diff --git a/pkg/sentry/syscalls/linux/vfs2/filesystem.go b/pkg/sentry/syscalls/linux/vfs2/filesystem.go index 36aa1d3ae..534355237 100644 --- a/pkg/sentry/syscalls/linux/vfs2/filesystem.go +++ b/pkg/sentry/syscalls/linux/vfs2/filesystem.go @@ -16,12 +16,12 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/hostarch" ) // Link implements Linux syscall link(2). @@ -43,7 +43,7 @@ func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal func linkat(t *kernel.Task, olddirfd int32, oldpathAddr hostarch.Addr, newdirfd int32, newpathAddr hostarch.Addr, flags int32) error { if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_FOLLOW) != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } if flags&linux.AT_EMPTY_PATH != 0 && !t.HasCapability(linux.CAP_DAC_READ_SEARCH) { return syserror.ENOENT @@ -290,7 +290,7 @@ func Unlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc flags := args[2].Int() if flags&^linux.AT_REMOVEDIR != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if flags&linux.AT_REMOVEDIR != 0 { diff --git a/pkg/sentry/syscalls/linux/vfs2/fscontext.go b/pkg/sentry/syscalls/linux/vfs2/fscontext.go index a7d4d2a36..1e36d9c76 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fscontext.go +++ b/pkg/sentry/syscalls/linux/vfs2/fscontext.go @@ -16,11 +16,11 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) // Getcwd implements Linux syscall getcwd(2). @@ -39,7 +39,7 @@ func Getcwd(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Note this is >= because we need a terminator. if uint(len(s)) >= size { - return 0, nil, syserror.ERANGE + return 0, nil, linuxerr.ERANGE } // Construct a byte slice containing a NUL terminator. @@ -106,7 +106,7 @@ func Chroot(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal addr := args[0].Pointer() if !t.HasCapability(linux.CAP_SYS_CHROOT) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } path, err := copyInPath(t, addr) diff --git a/pkg/sentry/syscalls/linux/vfs2/getdents.go b/pkg/sentry/syscalls/linux/vfs2/getdents.go index b41a3056a..c2c3172bc 100644 --- a/pkg/sentry/syscalls/linux/vfs2/getdents.go +++ b/pkg/sentry/syscalls/linux/vfs2/getdents.go @@ -17,13 +17,12 @@ package vfs2 import ( "fmt" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/hostarch" ) // Getdents implements Linux syscall getdents(2). @@ -43,7 +42,7 @@ func getdents(t *kernel.Task, args arch.SyscallArguments, isGetdents64 bool) (ui file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -100,7 +99,7 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error { size := 8 + 8 + 2 + 1 + 1 + len(dirent.Name) size = (size + 7) &^ 7 // round up to multiple of 8 if size > cb.remaining { - return syserror.EINVAL + return linuxerr.EINVAL } buf = cb.t.CopyScratchBuffer(size) hostarch.ByteOrder.PutUint64(buf[0:8], dirent.Ino) @@ -134,7 +133,7 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error { size := 8 + 8 + 2 + 1 + 1 + len(dirent.Name) size = (size + 7) &^ 7 // round up to multiple of sizeof(long) if size > cb.remaining { - return syserror.EINVAL + return linuxerr.EINVAL } buf = cb.t.CopyScratchBuffer(size) hostarch.ByteOrder.PutUint64(buf[0:8], dirent.Ino) diff --git a/pkg/sentry/syscalls/linux/vfs2/inotify.go b/pkg/sentry/syscalls/linux/vfs2/inotify.go index 11753d8e5..d8d5dd7ad 100644 --- a/pkg/sentry/syscalls/linux/vfs2/inotify.go +++ b/pkg/sentry/syscalls/linux/vfs2/inotify.go @@ -16,10 +16,10 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" ) const allFlags = linux.IN_NONBLOCK | linux.IN_CLOEXEC @@ -28,7 +28,7 @@ const allFlags = linux.IN_NONBLOCK | linux.IN_CLOEXEC func InotifyInit1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { flags := args[0].Int() if flags&^allFlags != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } ino, err := vfs.NewInotifyFD(t, t.Kernel().VFS(), uint32(flags)) @@ -60,14 +60,14 @@ func fdToInotify(t *kernel.Task, fd int32) (*vfs.Inotify, *vfs.FileDescription, f := t.GetFileVFS2(fd) if f == nil { // Invalid fd. - return nil, nil, syserror.EBADF + return nil, nil, linuxerr.EBADF } ino, ok := f.Impl().(*vfs.Inotify) if !ok { // Not an inotify fd. f.DecRef(t) - return nil, nil, syserror.EINVAL + return nil, nil, linuxerr.EINVAL } return ino, f, nil @@ -82,7 +82,7 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern // "EINVAL: The given event mask contains no valid events." // -- inotify_add_watch(2) if mask&linux.ALL_INOTIFY_BITS == 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // "IN_DONT_FOLLOW: Don't dereference pathname if it is a symbolic link." diff --git a/pkg/sentry/syscalls/linux/vfs2/ioctl.go b/pkg/sentry/syscalls/linux/vfs2/ioctl.go index c7c3fed57..b806120cd 100644 --- a/pkg/sentry/syscalls/linux/vfs2/ioctl.go +++ b/pkg/sentry/syscalls/linux/vfs2/ioctl.go @@ -16,10 +16,10 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // Ioctl implements Linux syscall ioctl(2). @@ -28,12 +28,12 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) if file.StatusFlags()&linux.O_PATH != 0 { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Handle ioctls that apply to all FDs. @@ -99,7 +99,7 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if who < 0 { // Check for overflow before flipping the sign. if who-1 > who { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } ownerType = linux.F_OWNER_PGRP who = -who diff --git a/pkg/sentry/syscalls/linux/vfs2/lock.go b/pkg/sentry/syscalls/linux/vfs2/lock.go index d1452a04d..008603173 100644 --- a/pkg/sentry/syscalls/linux/vfs2/lock.go +++ b/pkg/sentry/syscalls/linux/vfs2/lock.go @@ -16,10 +16,10 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) // Flock implements linux syscall flock(2). @@ -30,7 +30,7 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFileVFS2(fd) if file == nil { // flock(2): EBADF fd is not an open file descriptor. - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -57,7 +57,7 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } default: // flock(2): EINVAL operation is invalid. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 0, nil, nil diff --git a/pkg/sentry/syscalls/linux/vfs2/memfd.go b/pkg/sentry/syscalls/linux/vfs2/memfd.go index c4c0f9e0a..70c2cf5a5 100644 --- a/pkg/sentry/syscalls/linux/vfs2/memfd.go +++ b/pkg/sentry/syscalls/linux/vfs2/memfd.go @@ -16,10 +16,10 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" ) const ( @@ -35,7 +35,7 @@ func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S if flags&^memfdAllFlags != 0 { // Unknown bits in flags. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } allowSeals := flags&linux.MFD_ALLOW_SEALING != 0 diff --git a/pkg/sentry/syscalls/linux/vfs2/mmap.go b/pkg/sentry/syscalls/linux/vfs2/mmap.go index c961545f6..c804f9fd3 100644 --- a/pkg/sentry/syscalls/linux/vfs2/mmap.go +++ b/pkg/sentry/syscalls/linux/vfs2/mmap.go @@ -16,13 +16,12 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/hostarch" ) // Mmap implements Linux syscall mmap(2). @@ -38,7 +37,7 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Require exactly one of MAP_PRIVATE and MAP_SHARED. if private == shared { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } opts := memmap.MMapOpts{ @@ -71,13 +70,13 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Convert the passed FD to a file reference. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // mmap unconditionally requires that the FD is readable. if !file.IsReadable() { - return 0, nil, syserror.EACCES + return 0, nil, linuxerr.EACCES } // MAP_SHARED requires that the FD be writable for PROT_WRITE. if shared && !file.IsWritable() { diff --git a/pkg/sentry/syscalls/linux/vfs2/mount.go b/pkg/sentry/syscalls/linux/vfs2/mount.go index dd93430e2..4d73d46ef 100644 --- a/pkg/sentry/syscalls/linux/vfs2/mount.go +++ b/pkg/sentry/syscalls/linux/vfs2/mount.go @@ -16,12 +16,11 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/hostarch" ) // Mount implements Linux syscall mount(2). @@ -69,7 +68,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // namespace. creds := t.Credentials() if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.MountNamespaceVFS2().Owner) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } const unsupportedOps = linux.MS_REMOUNT | linux.MS_BIND | @@ -84,7 +83,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // unknown or unsupported flags are passed. Since we don't implement // everything, we fail explicitly on flags that are unimplemented. if flags&(unsupportedOps|unsupportedFlags) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } var opts vfs.MountOptions @@ -125,12 +124,12 @@ func Umount2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Currently, this is always the init task's user namespace. creds := t.Credentials() if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.MountNamespaceVFS2().Owner) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } const unsupported = linux.MNT_FORCE | linux.MNT_EXPIRE if flags&unsupported != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } path, err := copyInPath(t, addr) diff --git a/pkg/sentry/syscalls/linux/vfs2/path.go b/pkg/sentry/syscalls/linux/vfs2/path.go index 2aaf1ed74..2bb783a85 100644 --- a/pkg/sentry/syscalls/linux/vfs2/path.go +++ b/pkg/sentry/syscalls/linux/vfs2/path.go @@ -16,12 +16,12 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/hostarch" ) func copyInPath(t *kernel.Task, addr hostarch.Addr) (fspath.Path, error) { @@ -53,7 +53,7 @@ func getTaskPathOperation(t *kernel.Task, dirfd int32, path fspath.Path, shouldA dirfile := t.GetFileVFS2(dirfd) if dirfile == nil { root.DecRef(t) - return taskPathOperation{}, syserror.EBADF + return taskPathOperation{}, linuxerr.EBADF } start = dirfile.VirtualDentry() start.IncRef() diff --git a/pkg/sentry/syscalls/linux/vfs2/pipe.go b/pkg/sentry/syscalls/linux/vfs2/pipe.go index c6fc1954c..07a89cf4e 100644 --- a/pkg/sentry/syscalls/linux/vfs2/pipe.go +++ b/pkg/sentry/syscalls/linux/vfs2/pipe.go @@ -16,14 +16,13 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/pipefs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/hostarch" ) // Pipe implements Linux syscall pipe(2). @@ -41,7 +40,7 @@ func Pipe2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall func pipe2(t *kernel.Task, addr hostarch.Addr, flags int32) error { if flags&^(linux.O_NONBLOCK|linux.O_CLOEXEC) != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } r, w, err := pipefs.NewConnectedPipeFDs(t, t.Kernel().PipeMount(), uint32(flags&linux.O_NONBLOCK)) if err != nil { diff --git a/pkg/sentry/syscalls/linux/vfs2/poll.go b/pkg/sentry/syscalls/linux/vfs2/poll.go index a69c80edd..042aa4c97 100644 --- a/pkg/sentry/syscalls/linux/vfs2/poll.go +++ b/pkg/sentry/syscalls/linux/vfs2/poll.go @@ -19,6 +19,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" @@ -132,7 +133,7 @@ func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time. // Wait for a notification. timeout, err = t.BlockWithTimeout(ch, haveTimeout, timeout) if err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { err = nil } return timeout, 0, err @@ -161,7 +162,7 @@ func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time. // copyInPollFDs copies an array of struct pollfd unless nfds exceeds the max. func copyInPollFDs(t *kernel.Task, addr hostarch.Addr, nfds uint) ([]linux.PollFD, error) { if uint64(nfds) > t.ThreadGroup().Limits().GetCapped(limits.NumberOfFiles, fileCap) { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } pfd := make([]linux.PollFD, nfds) @@ -221,7 +222,7 @@ func CopyInFDSet(t *kernel.Task, addr hostarch.Addr, nBytes, nBitsInLastPartialB func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs hostarch.Addr, timeout time.Duration) (uintptr, error) { if nfds < 0 || nfds > fileCap { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Calculate the size of the fd sets (one bit per fd). @@ -268,7 +269,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs hostarch.Ad // OK. Linux is racy in the same way. file := t.GetFileVFS2(fd) if file == nil { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } file.DecRef(t) @@ -410,7 +411,7 @@ func (p *pollRestartBlock) Restart(t *kernel.Task) (uintptr, error) { func poll(t *kernel.Task, pfdAddr hostarch.Addr, nfds uint, timeout time.Duration) (uintptr, error) { remainingTimeout, n, err := doPoll(t, pfdAddr, nfds, timeout) // On an interrupt poll(2) is restarted with the remaining timeout. - if err == syserror.EINTR { + if linuxerr.Equals(linuxerr.EINTR, err) { t.SetSyscallRestartBlock(&pollRestartBlock{ pfdAddr: pfdAddr, nfds: nfds, @@ -462,7 +463,7 @@ func Ppoll(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // // Note that this means that if err is nil but copyErr is not, copyErr is // ignored. This is consistent with Linux. - if err == syserror.EINTR && copyErr == nil { + if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil { err = syserror.ERESTARTNOHAND } return n, nil, err @@ -484,7 +485,7 @@ func Select(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, err } if timeval.Sec < 0 || timeval.Usec < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } timeout = time.Duration(timeval.ToNsecCapped()) } @@ -492,7 +493,7 @@ func Select(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal n, err := doSelect(t, nfds, readFDs, writeFDs, exceptFDs, timeout) copyErr := copyOutTimevalRemaining(t, startNs, timeout, timevalAddr) // See comment in Ppoll. - if err == syserror.EINTR && copyErr == nil { + if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil { err = syserror.ERESTARTNOHAND } return n, nil, err @@ -539,7 +540,7 @@ func Pselect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca n, err := doSelect(t, nfds, readFDs, writeFDs, exceptFDs, timeout) copyErr := copyOutTimespecRemaining(t, startNs, timeout, timespecAddr) // See comment in Ppoll. - if err == syserror.EINTR && copyErr == nil { + if linuxerr.Equals(linuxerr.EINTR, err) && copyErr == nil { err = syserror.ERESTARTNOHAND } return n, nil, err @@ -561,7 +562,7 @@ func copyTimespecInToDuration(t *kernel.Task, timespecAddr hostarch.Addr) (time. return 0, err } if !timespec.Valid() { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } timeout = time.Duration(timespec.ToNsecCapped()) } @@ -573,7 +574,7 @@ func setTempSignalSet(t *kernel.Task, maskAddr hostarch.Addr, maskSize uint) err return nil } if maskSize != linux.SignalSetSize { - return syserror.EINVAL + return linuxerr.EINVAL } var mask linux.SignalSet if _, err := mask.CopyIn(t, maskAddr); err != nil { diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go index b863d7b84..fe8aa06da 100644 --- a/pkg/sentry/syscalls/linux/vfs2/read_write.go +++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go @@ -18,6 +18,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" @@ -42,14 +43,14 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the size is legitimate. si := int(size) if si < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get the destination of the read. @@ -73,7 +74,7 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -120,7 +121,7 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt // Wait for a notification that we should retry. if err = t.BlockWithDeadline(ch, hasDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { err = syserror.ErrWouldBlock } break @@ -140,19 +141,19 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the offset is legitimate and does not overflow. if offset < 0 || offset+int64(size) < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Check that the size is legitimate. si := int(size) if si < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get the destination of the read. @@ -177,13 +178,13 @@ func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the offset is legitimate. if offset < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get the destination of the read. @@ -215,13 +216,13 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the offset is legitimate. if offset < -1 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get the destination of the read. @@ -275,7 +276,7 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of // Wait for a notification that we should retry. if err = t.BlockWithDeadline(ch, hasDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { err = syserror.ErrWouldBlock } break @@ -293,14 +294,14 @@ func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the size is legitimate. si := int(size) if si < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get the source of the write. @@ -324,7 +325,7 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -371,7 +372,7 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op // Wait for a notification that we should retry. if err = t.BlockWithDeadline(ch, hasDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { err = syserror.ErrWouldBlock } break @@ -390,19 +391,19 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the offset is legitimate and does not overflow. if offset < 0 || offset+int64(size) < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Check that the size is legitimate. si := int(size) if si < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get the source of the write. @@ -427,13 +428,13 @@ func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the offset is legitimate. if offset < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get the source of the write. @@ -465,13 +466,13 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the offset is legitimate. if offset < -1 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get the source of the write. @@ -525,7 +526,7 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o // Wait for a notification that we should retry. if err = t.BlockWithDeadline(ch, hasDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { err = syserror.ErrWouldBlock } break @@ -560,7 +561,7 @@ func Lseek(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -576,27 +577,27 @@ func Readahead(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) // Check that the file is readable. if !file.IsReadable() { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Check that the size is valid. if int(size) < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Check that the offset is legitimate and does not overflow. if offset < 0 || offset+int64(size) < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Return EINVAL; if the underlying file type does not support readahead, // then Linux will return EINVAL to indicate as much. In the future, we // may extend this function to actually support readahead hints. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go index 647e089d0..b5a3b92c5 100644 --- a/pkg/sentry/syscalls/linux/vfs2/setstat.go +++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go @@ -16,15 +16,15 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/hostarch" ) const chmodMask = 0777 | linux.S_ISUID | linux.S_ISGID | linux.S_ISVTX @@ -65,7 +65,7 @@ func Fchmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -105,7 +105,7 @@ func Fchownat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc func fchownat(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr, owner, group, flags int32) error { if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } path, err := copyInPath(t, pathAddr) @@ -126,7 +126,7 @@ func populateSetStatOptionsForChown(t *kernel.Task, owner, group int32, opts *vf if owner != -1 { kuid := userns.MapToKUID(auth.UID(owner)) if !kuid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } opts.Stat.Mask |= linux.STATX_UID opts.Stat.UID = uint32(kuid) @@ -134,7 +134,7 @@ func populateSetStatOptionsForChown(t *kernel.Task, owner, group int32, opts *vf if group != -1 { kgid := userns.MapToKGID(auth.GID(group)) if !kgid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } opts.Stat.Mask |= linux.STATX_GID opts.Stat.GID = uint32(kgid) @@ -150,7 +150,7 @@ func Fchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -167,7 +167,7 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc length := args[1].Int64() if length < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } path, err := copyInPath(t, addr) @@ -191,17 +191,17 @@ func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys length := args[1].Int64() if length < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) if !file.IsWritable() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } err := file.SetStat(t, vfs.SetStatOptions{ @@ -222,23 +222,23 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) if !file.IsWritable() { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } if mode != 0 { - return 0, nil, syserror.ENOTSUP + return 0, nil, linuxerr.ENOTSUP } if offset < 0 || length <= 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } size := offset + length if size < 0 { - return 0, nil, syserror.EFBIG + return 0, nil, linuxerr.EFBIG } limit := limits.FromContext(t).Get(limits.FileSize).Cur if uint64(size) >= limit { @@ -246,7 +246,7 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys Signo: int32(linux.SIGXFSZ), Code: linux.SI_USER, }) - return 0, nil, syserror.EFBIG + return 0, nil, linuxerr.EFBIG } return 0, nil, file.Allocate(t, mode, uint64(offset), uint64(length)) @@ -340,7 +340,7 @@ func populateSetStatOptionsForUtimes(t *kernel.Task, timesAddr hostarch.Addr, op return err } if times[0].Usec < 0 || times[0].Usec > 999999 || times[1].Usec < 0 || times[1].Usec > 999999 { - return syserror.EINVAL + return linuxerr.EINVAL } opts.Stat.Mask = linux.STATX_ATIME | linux.STATX_MTIME opts.Stat.Atime = linux.StatxTimestamp{ @@ -372,7 +372,7 @@ func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys } if flags&^linux.AT_SYMLINK_NOFOLLOW != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // "If filename is NULL and dfd refers to an open file, then operate on the @@ -405,7 +405,7 @@ func populateSetStatOptionsForUtimens(t *kernel.Task, timesAddr hostarch.Addr, o } if times[0].Nsec != linux.UTIME_OMIT { if times[0].Nsec != linux.UTIME_NOW && (times[0].Nsec < 0 || times[0].Nsec > 999999999) { - return syserror.EINVAL + return linuxerr.EINVAL } opts.Stat.Mask |= linux.STATX_ATIME opts.Stat.Atime = linux.StatxTimestamp{ @@ -415,7 +415,7 @@ func populateSetStatOptionsForUtimens(t *kernel.Task, timesAddr hostarch.Addr, o } if times[1].Nsec != linux.UTIME_OMIT { if times[1].Nsec != linux.UTIME_NOW && (times[1].Nsec < 0 || times[1].Nsec > 999999999) { - return syserror.EINVAL + return linuxerr.EINVAL } opts.Stat.Mask |= linux.STATX_MTIME opts.Stat.Mtime = linux.StatxTimestamp{ @@ -440,7 +440,7 @@ func setstatat(t *kernel.Task, dirfd int32, path fspath.Path, shouldAllowEmptyPa } else { dirfile := t.GetFileVFS2(dirfd) if dirfile == nil { - return syserror.EBADF + return linuxerr.EBADF } if !path.HasComponents() { // Use FileDescription.SetStat() instead of @@ -468,7 +468,7 @@ func handleSetSizeError(t *kernel.Task, err error) error { if err == syserror.ErrExceedsFileSizeLimit { // Convert error to EFBIG and send a SIGXFSZ per setrlimit(2). t.SendSignal(kernel.SignalInfoNoInfo(linux.SIGXFSZ, t, t)) - return syserror.EFBIG + return linuxerr.EFBIG } return err } diff --git a/pkg/sentry/syscalls/linux/vfs2/signal.go b/pkg/sentry/syscalls/linux/vfs2/signal.go index 6163da103..27fb2139b 100644 --- a/pkg/sentry/syscalls/linux/vfs2/signal.go +++ b/pkg/sentry/syscalls/linux/vfs2/signal.go @@ -16,13 +16,12 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/signalfd" "gvisor.dev/gvisor/pkg/sentry/kernel" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" - "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/hostarch" ) // sharedSignalfd is shared between the two calls. @@ -35,7 +34,7 @@ func sharedSignalfd(t *kernel.Task, fd int32, sigset hostarch.Addr, sigsetsize u // Always check for valid flags, even if not creating. if flags&^(linux.SFD_NONBLOCK|linux.SFD_CLOEXEC) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Is this a change to an existing signalfd? @@ -44,7 +43,7 @@ func sharedSignalfd(t *kernel.Task, fd int32, sigset hostarch.Addr, sigsetsize u if fd != -1 { file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -55,7 +54,7 @@ func sharedSignalfd(t *kernel.Task, fd int32, sigset hostarch.Addr, sigsetsize u } // Not a signalfd. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } fileFlags := uint32(linux.O_RDWR) diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go index 69f69e3af..0c2e0720b 100644 --- a/pkg/sentry/syscalls/linux/vfs2/socket.go +++ b/pkg/sentry/syscalls/linux/vfs2/socket.go @@ -18,6 +18,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -117,7 +118,7 @@ type multipleMessageHeader64 struct { // from the untrusted address space range. func CaptureAddress(t *kernel.Task, addr hostarch.Addr, addrlen uint32) ([]byte, error) { if addrlen > maxAddrLen { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } addrBuf := make([]byte, addrlen) @@ -139,7 +140,7 @@ func writeAddress(t *kernel.Task, addr linux.SockAddr, addrLen uint32, addrPtr h } if int32(bufLen) < 0 { - return syserror.EINVAL + return linuxerr.EINVAL } // Write the length unconditionally. @@ -173,7 +174,7 @@ func Socket(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Check and initialize the flags. if stype & ^(0xf|linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Create the new socket. @@ -206,7 +207,7 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Check and initialize the flags. if stype & ^(0xf|linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Create the socket pair. @@ -256,7 +257,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -281,13 +282,13 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr, flags int) (uintptr, error) { // Check that no unsupported flags are passed in. if flags & ^(linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } defer file.DecRef(t) @@ -309,7 +310,7 @@ func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr, if peerRequested { // NOTE(magi): Linux does not give you an error if it can't // write the data back out so neither do we. - if err := writeAddress(t, peer, peerLen, addr, addrLen); err == syserror.EINVAL { + if err := writeAddress(t, peer, peerLen, addr, addrLen); linuxerr.Equals(linuxerr.EINVAL, err) { return 0, err } } @@ -346,7 +347,7 @@ func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -373,7 +374,7 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -411,7 +412,7 @@ func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -425,7 +426,7 @@ func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc switch how { case linux.SHUT_RD, linux.SHUT_WR, linux.SHUT_RDWR: default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } return 0, nil, s.Shutdown(t, int(how)).ToError() @@ -442,7 +443,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -458,7 +459,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return 0, nil, err } if optLen < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Call syscall implementation then copy both value and value len out. @@ -523,7 +524,7 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -534,10 +535,10 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy } if optLen < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if optLen > maxOptLen { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } buf := t.CopyScratchBuffer(int(optLen)) if _, err := t.CopyInBytes(optValAddr, buf); err != nil { @@ -561,7 +562,7 @@ func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -589,7 +590,7 @@ func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -616,13 +617,13 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if t.Arch().Width() != 8 { // We only handle 64-bit for now. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -634,7 +635,7 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Reject flags that we don't handle yet. if flags & ^(baseRecvFlags|linux.MSG_PEEK|linux.MSG_CMSG_CLOEXEC|linux.MSG_ERRQUEUE) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { @@ -664,7 +665,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if t.Arch().Width() != 8 { // We only handle 64-bit for now. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if vlen > linux.UIO_MAXIOV { @@ -673,13 +674,13 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Reject flags that we don't handle yet. if flags & ^(baseRecvFlags|linux.MSG_CMSG_CLOEXEC|linux.MSG_ERRQUEUE) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -701,7 +702,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, err } if !ts.Valid() { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } deadline = t.Kernel().MonotonicClock().Now().Add(ts.ToDuration()) haveDeadline = true @@ -721,7 +722,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc for i := uint64(0); i < uint64(vlen); i++ { mp, ok := msgPtr.AddLength(i * multipleMessageHeader64Len) if !ok { - return 0, nil, syserror.EFAULT + return 0, nil, linuxerr.EFAULT } var n uintptr if n, err = recvSingleMsg(t, s, mp, flags, haveDeadline, deadline); err != nil { @@ -731,7 +732,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Copy the received length to the caller. lp, ok := mp.AddLength(messageHeader64Len) if !ok { - return 0, nil, syserror.EFAULT + return 0, nil, linuxerr.EFAULT } if _, err = primitive.CopyUint32Out(t, lp, uint32(n)); err != nil { break @@ -753,7 +754,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr hostarch.Addr, fl } if msg.IovLen > linux.UIO_MAXIOV { - return 0, syserror.EMSGSIZE + return 0, linuxerr.EMSGSIZE } dst, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ AddressSpaceActive: true, @@ -784,7 +785,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr hostarch.Addr, fl } if msg.ControlLen > maxControlLen { - return 0, syserror.ENOBUFS + return 0, linuxerr.ENOBUFS } n, mflags, sender, senderLen, cms, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, msg.NameLen != 0, msg.ControlLen) if e != nil { @@ -833,18 +834,18 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr hostarch.Addr, fl // recvfrom and recv syscall handlers. func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLenPtr hostarch.Addr) (uintptr, error) { if int(bufLen) < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Reject flags that we don't handle yet. if flags & ^(baseRecvFlags|linux.MSG_PEEK|linux.MSG_CONFIRM) != 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } defer file.DecRef(t) @@ -911,13 +912,13 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if t.Arch().Width() != 8 { // We only handle 64-bit for now. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -929,7 +930,7 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Reject flags that we don't handle yet. if flags & ^(linux.MSG_DONTWAIT|linux.MSG_EOR|linux.MSG_MORE|linux.MSG_NOSIGNAL) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { @@ -949,7 +950,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if t.Arch().Width() != 8 { // We only handle 64-bit for now. - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if vlen > linux.UIO_MAXIOV { @@ -959,7 +960,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -971,7 +972,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Reject flags that we don't handle yet. if flags & ^(linux.MSG_DONTWAIT|linux.MSG_EOR|linux.MSG_MORE|linux.MSG_NOSIGNAL) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { @@ -983,7 +984,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc for i := uint64(0); i < uint64(vlen); i++ { mp, ok := msgPtr.AddLength(i * multipleMessageHeader64Len) if !ok { - return 0, nil, syserror.EFAULT + return 0, nil, linuxerr.EFAULT } var n uintptr if n, err = sendSingleMsg(t, s, file, mp, flags); err != nil { @@ -993,7 +994,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Copy the received length to the caller. lp, ok := mp.AddLength(messageHeader64Len) if !ok { - return 0, nil, syserror.EFAULT + return 0, nil, linuxerr.EFAULT } if _, err = primitive.CopyUint32Out(t, lp, uint32(n)); err != nil { break @@ -1018,7 +1019,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio if msg.ControlLen > 0 { // Put an upper bound to prevent large allocations. if msg.ControlLen > maxControlLen { - return 0, syserror.ENOBUFS + return 0, linuxerr.ENOBUFS } controlData = make([]byte, msg.ControlLen) if _, err := t.CopyInBytes(hostarch.Addr(msg.Control), controlData); err != nil { @@ -1038,7 +1039,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio // Read data then call the sendmsg implementation. if msg.IovLen > linux.UIO_MAXIOV { - return 0, syserror.EMSGSIZE + return 0, linuxerr.EMSGSIZE } src, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ AddressSpaceActive: true, @@ -1077,13 +1078,13 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLen uint32) (uintptr, error) { bl := int(bufLen) if bl < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Get socket from the file descriptor. file := t.GetFileVFS2(fd) if file == nil { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } defer file.DecRef(t) diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go index 19e175203..d8009123f 100644 --- a/pkg/sentry/syscalls/linux/vfs2/splice.go +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -18,6 +18,7 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -46,29 +47,29 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal count = int64(kernel.MAX_RW_COUNT) } if count < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Check for invalid flags. if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get file descriptions. inFile := t.GetFileVFS2(inFD) if inFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer inFile.DecRef(t) outFile := t.GetFileVFS2(outFD) if outFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer outFile.DecRef(t) // Check that both files support the required directionality. if !inFile.IsReadable() || !outFile.IsWritable() { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // The operation is non-blocking if anything is non-blocking. @@ -82,38 +83,38 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal inPipeFD, inIsPipe := inFile.Impl().(*pipe.VFSPipeFD) outPipeFD, outIsPipe := outFile.Impl().(*pipe.VFSPipeFD) if !inIsPipe && !outIsPipe { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Copy in offsets. inOffset := int64(-1) if inOffsetPtr != 0 { if inIsPipe { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } if inFile.Options().DenyPRead { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if _, err := primitive.CopyInt64In(t, inOffsetPtr, &inOffset); err != nil { return 0, nil, err } if inOffset < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } outOffset := int64(-1) if outOffsetPtr != 0 { if outIsPipe { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } if outFile.Options().DenyPWrite { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if _, err := primitive.CopyInt64In(t, outOffsetPtr, &outOffset); err != nil { return 0, nil, err } if outOffset < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } @@ -189,29 +190,29 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo count = int64(kernel.MAX_RW_COUNT) } if count < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Check for invalid flags. if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Get file descriptions. inFile := t.GetFileVFS2(inFD) if inFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer inFile.DecRef(t) outFile := t.GetFileVFS2(outFD) if outFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer outFile.DecRef(t) // Check that both files support the required directionality. if !inFile.IsReadable() || !outFile.IsWritable() { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // The operation is non-blocking if anything is non-blocking. @@ -225,7 +226,7 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo inPipeFD, inIsPipe := inFile.Impl().(*pipe.VFSPipeFD) outPipeFD, outIsPipe := outFile.Impl().(*pipe.VFSPipeFD) if !inIsPipe || !outIsPipe { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Copy data. @@ -270,25 +271,25 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc inFile := t.GetFileVFS2(inFD) if inFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer inFile.DecRef(t) if !inFile.IsReadable() { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } outFile := t.GetFileVFS2(outFD) if outFile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer outFile.DecRef(t) if !outFile.IsWritable() { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } // Verify that the outFile Append flag is not set. if outFile.StatusFlags()&linux.O_APPEND != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Verify that inFile is a regular file or block device. This is a @@ -298,14 +299,14 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, err } else if stat.Mask&linux.STATX_TYPE == 0 || (stat.Mode&linux.S_IFMT != linux.S_IFREG && stat.Mode&linux.S_IFMT != linux.S_IFBLK) { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Copy offset if it exists. offset := int64(-1) if offsetAddr != 0 { if inFile.Options().DenyPRead { - return 0, nil, syserror.ESPIPE + return 0, nil, linuxerr.ESPIPE } var offsetP primitive.Int64 if _, err := offsetP.CopyIn(t, offsetAddr); err != nil { @@ -314,16 +315,16 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc offset = int64(offsetP) if offset < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if offset+count < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } } // Validate count. This must come after offset checks. if count < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if count == 0 { return 0, nil, nil diff --git a/pkg/sentry/syscalls/linux/vfs2/stat.go b/pkg/sentry/syscalls/linux/vfs2/stat.go index 69e77fa99..ba1d30823 100644 --- a/pkg/sentry/syscalls/linux/vfs2/stat.go +++ b/pkg/sentry/syscalls/linux/vfs2/stat.go @@ -17,15 +17,15 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bits" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/hostarch" ) // Stat implements Linux syscall stat(2). @@ -53,7 +53,7 @@ func Newfstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy func fstatat(t *kernel.Task, dirfd int32, pathAddr, statAddr hostarch.Addr, flags int32) error { if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } opts := vfs.StatOptions{ @@ -78,7 +78,7 @@ func fstatat(t *kernel.Task, dirfd int32, pathAddr, statAddr hostarch.Addr, flag } else { dirfile := t.GetFileVFS2(dirfd) if dirfile == nil { - return syserror.EBADF + return linuxerr.EBADF } if !path.HasComponents() { // Use FileDescription.Stat() instead of @@ -131,7 +131,7 @@ func Fstat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -156,15 +156,15 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall statxAddr := args[4].Pointer() if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW|linux.AT_STATX_SYNC_TYPE) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Make sure that only one sync type option is set. syncType := uint32(flags & linux.AT_STATX_SYNC_TYPE) if syncType != 0 && !bits.IsPowerOfTwo32(syncType) { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if mask&linux.STATX__RESERVED != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } opts := vfs.StatOptions{ @@ -190,7 +190,7 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } else { dirfile := t.GetFileVFS2(dirfd) if dirfile == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } if !path.HasComponents() { // Use FileDescription.Stat() instead of @@ -272,7 +272,7 @@ func accessAt(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr, mode uint) er // Sanity check the mode. if mode&^(rOK|wOK|xOK) != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } path, err := copyInPath(t, pathAddr) @@ -315,7 +315,7 @@ func Readlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy func readlinkat(t *kernel.Task, dirfd int32, pathAddr, bufAddr hostarch.Addr, size uint) (uintptr, *kernel.SyscallControl, error) { if int(size) <= 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } path, err := copyInPath(t, pathAddr) diff --git a/pkg/sentry/syscalls/linux/vfs2/stat_amd64.go b/pkg/sentry/syscalls/linux/vfs2/stat_amd64.go index 2da538fc6..122921b52 100644 --- a/pkg/sentry/syscalls/linux/vfs2/stat_amd64.go +++ b/pkg/sentry/syscalls/linux/vfs2/stat_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package vfs2 diff --git a/pkg/sentry/syscalls/linux/vfs2/stat_arm64.go b/pkg/sentry/syscalls/linux/vfs2/stat_arm64.go index 88b9c7627..d32031481 100644 --- a/pkg/sentry/syscalls/linux/vfs2/stat_arm64.go +++ b/pkg/sentry/syscalls/linux/vfs2/stat_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package vfs2 diff --git a/pkg/sentry/syscalls/linux/vfs2/sync.go b/pkg/sentry/syscalls/linux/vfs2/sync.go index 1f8a5878c..d0ffc7c32 100644 --- a/pkg/sentry/syscalls/linux/vfs2/sync.go +++ b/pkg/sentry/syscalls/linux/vfs2/sync.go @@ -16,6 +16,7 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" @@ -32,12 +33,12 @@ func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) if file.StatusFlags()&linux.O_PATH != 0 { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } return 0, nil, file.SyncFS(t) @@ -49,7 +50,7 @@ func Fsync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -71,15 +72,15 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel // Check for negative values and overflow. if offset < 0 || offset+nbytes < 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } if flags&^(linux.SYNC_FILE_RANGE_WAIT_BEFORE|linux.SYNC_FILE_RANGE_WRITE|linux.SYNC_FILE_RANGE_WAIT_AFTER) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) diff --git a/pkg/sentry/syscalls/linux/vfs2/timerfd.go b/pkg/sentry/syscalls/linux/vfs2/timerfd.go index 250870c03..b8f96a757 100644 --- a/pkg/sentry/syscalls/linux/vfs2/timerfd.go +++ b/pkg/sentry/syscalls/linux/vfs2/timerfd.go @@ -16,11 +16,11 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsimpl/timerfd" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/syserror" ) // TimerfdCreate implements Linux syscall timerfd_create(2). @@ -29,7 +29,7 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel flags := args[1].Int() if flags&^(linux.TFD_CLOEXEC|linux.TFD_NONBLOCK) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Timerfds aren't writable per se (their implementation of Write just @@ -47,7 +47,7 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel case linux.CLOCK_MONOTONIC, linux.CLOCK_BOOTTIME: clock = t.Kernel().MonotonicClock() default: - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } vfsObj := t.Kernel().VFS() file, err := timerfd.New(t, vfsObj, clock, fileFlags) @@ -72,18 +72,18 @@ func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne oldValAddr := args[3].Pointer() if flags&^(linux.TFD_TIMER_ABSTIME) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) tfd, ok := file.Impl().(*timerfd.TimerFileDescription) if !ok { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } var newVal linux.Itimerspec @@ -111,13 +111,13 @@ func TimerfdGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) tfd, ok := file.Impl().(*timerfd.TimerFileDescription) if !ok { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } tm, s := tfd.GetTime() diff --git a/pkg/sentry/syscalls/linux/vfs2/xattr.go b/pkg/sentry/syscalls/linux/vfs2/xattr.go index c261050c6..7b2f69c45 100644 --- a/pkg/sentry/syscalls/linux/vfs2/xattr.go +++ b/pkg/sentry/syscalls/linux/vfs2/xattr.go @@ -18,13 +18,12 @@ import ( "bytes" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/hostarch" ) // ListXattr implements Linux syscall listxattr(2). @@ -71,7 +70,7 @@ func Flistxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -140,7 +139,7 @@ func Fgetxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -178,7 +177,7 @@ func setxattr(t *kernel.Task, args arch.SyscallArguments, shouldFollowFinalSymli flags := args[4].Int() if flags&^(linux.XATTR_CREATE|linux.XATTR_REPLACE) != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } path, err := copyInPath(t, pathAddr) @@ -216,12 +215,12 @@ func Fsetxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys flags := args[4].Int() if flags&^(linux.XATTR_CREATE|linux.XATTR_REPLACE) != 0 { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -280,7 +279,7 @@ func Fremovexattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. file := t.GetFileVFS2(fd) if file == nil { - return 0, nil, syserror.EBADF + return 0, nil, linuxerr.EBADF } defer file.DecRef(t) @@ -295,13 +294,13 @@ func Fremovexattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. func copyInXattrName(t *kernel.Task, nameAddr hostarch.Addr) (string, error) { name, err := t.CopyInString(nameAddr, linux.XATTR_NAME_MAX+1) if err != nil { - if err == syserror.ENAMETOOLONG { - return "", syserror.ERANGE + if linuxerr.Equals(linuxerr.ENAMETOOLONG, err) { + return "", linuxerr.ERANGE } return "", err } if len(name) == 0 { - return "", syserror.ERANGE + return "", linuxerr.ERANGE } return name, nil } @@ -321,16 +320,16 @@ func copyOutXattrNameList(t *kernel.Task, listAddr hostarch.Addr, size uint, nam } if buf.Len() > int(size) { if size >= linux.XATTR_LIST_MAX { - return 0, syserror.E2BIG + return 0, linuxerr.E2BIG } - return 0, syserror.ERANGE + return 0, linuxerr.ERANGE } return t.CopyOutBytes(listAddr, buf.Bytes()) } func copyInXattrValue(t *kernel.Task, valueAddr hostarch.Addr, size uint) (string, error) { if size > linux.XATTR_SIZE_MAX { - return "", syserror.E2BIG + return "", linuxerr.E2BIG } buf := make([]byte, size) if _, err := t.CopyInBytes(valueAddr, buf); err != nil { @@ -349,9 +348,9 @@ func copyOutXattrValue(t *kernel.Task, valueAddr hostarch.Addr, size uint, value } if len(value) > int(size) { if size >= linux.XATTR_SIZE_MAX { - return 0, syserror.E2BIG + return 0, linuxerr.E2BIG } - return 0, syserror.ERANGE + return 0, linuxerr.ERANGE } return t.CopyOutBytes(valueAddr, gohacks.ImmutableBytesFromString(value)) } diff --git a/pkg/sentry/syscalls/syscalls.go b/pkg/sentry/syscalls/syscalls.go index f88055676..511fb8b28 100644 --- a/pkg/sentry/syscalls/syscalls.go +++ b/pkg/sentry/syscalls/syscalls.go @@ -28,6 +28,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" @@ -99,13 +100,13 @@ func CapError(name string, c linux.Capability, note string, urls []string) kerne Name: name, Fn: func(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { if !t.HasCapability(c) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } t.Kernel().EmitUnimplementedEvent(t) return 0, nil, syserror.ENOSYS }, SupportLevel: kernel.SupportUnimplemented, - Note: fmt.Sprintf("%sReturns %q if the process does not have %s; %q otherwise.", note, syserror.EPERM, c.String(), syserror.ENOSYS), + Note: fmt.Sprintf("%sReturns %q if the process does not have %s; %q otherwise.", note, linuxerr.EPERM, c.String(), syserror.ENOSYS), URLs: urls, } } diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD index 202486a1e..36d999c47 100644 --- a/pkg/sentry/time/BUILD +++ b/pkg/sentry/time/BUILD @@ -34,6 +34,7 @@ go_library( ], visibility = ["//:sandbox"], deps = [ + "//pkg/errors/linuxerr", "//pkg/gohacks", "//pkg/log", "//pkg/metric", diff --git a/pkg/sentry/time/calibrated_clock.go b/pkg/sentry/time/calibrated_clock.go index 39bf1e0de..eed74f6bd 100644 --- a/pkg/sentry/time/calibrated_clock.go +++ b/pkg/sentry/time/calibrated_clock.go @@ -19,10 +19,10 @@ package time import ( "time" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // CalibratedClock implements a clock that tracks a reference clock. @@ -259,6 +259,6 @@ func (c *CalibratedClocks) GetTime(id ClockID) (int64, error) { case Realtime: return c.realtime.GetTime() default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } } diff --git a/pkg/sentry/time/calibrated_clock_test.go b/pkg/sentry/time/calibrated_clock_test.go index d6622bfe2..0a4b1f1bf 100644 --- a/pkg/sentry/time/calibrated_clock_test.go +++ b/pkg/sentry/time/calibrated_clock_test.go @@ -50,6 +50,7 @@ func TestConstantFrequency(t *testing.T) { if !c.ready { c.mu.RUnlock() t.Fatalf("clock not ready") + return // For checklocks consistency. } // A bit after the last sample. now, ok := c.params.ComputeTime(750000) diff --git a/pkg/sentry/time/sampler_amd64.go b/pkg/sentry/time/sampler_amd64.go index 9f1b4b2fb..5fa1832b4 100644 --- a/pkg/sentry/time/sampler_amd64.go +++ b/pkg/sentry/time/sampler_amd64.go @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//+build amd64 +//go:build amd64 +// +build amd64 package time diff --git a/pkg/sentry/time/sampler_arm64.go b/pkg/sentry/time/sampler_arm64.go index 4c8d33ae4..3560e66ae 100644 --- a/pkg/sentry/time/sampler_arm64.go +++ b/pkg/sentry/time/sampler_arm64.go @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//+build arm64 +//go:build arm64 +// +build arm64 package time diff --git a/pkg/sentry/usage/memory.go b/pkg/sentry/usage/memory.go index 581862ee2..e7073ec87 100644 --- a/pkg/sentry/usage/memory.go +++ b/pkg/sentry/usage/memory.go @@ -132,7 +132,7 @@ func Init() error { // always be the case for a newly mapped page from /dev/shm. If we obtain // the shared memory through some other means in the future, we may have to // explicitly zero the page. - mmap, err := unix.Mmap(int(file.Fd()), 0, int(RTMemoryStatsSize), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED) + mmap, err := memutil.MapFile(0, RTMemoryStatsSize, unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED, file.Fd(), 0) if err != nil { return fmt.Errorf("error mapping usage file: %v", err) } diff --git a/pkg/sentry/usage/memory_unsafe.go b/pkg/sentry/usage/memory_unsafe.go index 9e0014ca0..bc1531b91 100644 --- a/pkg/sentry/usage/memory_unsafe.go +++ b/pkg/sentry/usage/memory_unsafe.go @@ -21,7 +21,7 @@ import ( // RTMemoryStatsSize is the size of the RTMemoryStats struct. var RTMemoryStatsSize = unsafe.Sizeof(RTMemoryStats{}) -// RTMemoryStatsPointer casts the address of the byte slice into a RTMemoryStats pointer. -func RTMemoryStatsPointer(b []byte) *RTMemoryStats { - return (*RTMemoryStats)(unsafe.Pointer(&b[0])) +// RTMemoryStatsPointer casts addr to a RTMemoryStats pointer. +func RTMemoryStatsPointer(addr uintptr) *RTMemoryStats { + return (*RTMemoryStats)(unsafe.Pointer(addr)) } diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index ac60fe8bf..a2032162d 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -95,6 +95,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/fd", "//pkg/fdnotifier", "//pkg/fspath", @@ -133,6 +134,7 @@ go_test( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/sentry/contexttest", "//pkg/sync", "//pkg/syserror", diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index f48817132..255d3992e 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -19,11 +19,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/syserror" ) // NewAnonVirtualDentry returns a VirtualDentry with the given synthetic name, @@ -101,7 +101,7 @@ func (fs *anonFilesystem) Sync(ctx context.Context) error { // AccessAt implements vfs.Filesystem.Impl.AccessAt. func (fs *anonFilesystem) AccessAt(ctx context.Context, rp *ResolvingPath, creds *auth.Credentials, ats AccessTypes) error { if !rp.Done() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } return GenericCheckPermissions(creds, ats, anonFileMode, anonFileUID, anonFileGID) } @@ -109,10 +109,10 @@ func (fs *anonFilesystem) AccessAt(ctx context.Context, rp *ResolvingPath, creds // GetDentryAt implements FilesystemImpl.GetDentryAt. func (fs *anonFilesystem) GetDentryAt(ctx context.Context, rp *ResolvingPath, opts GetDentryOptions) (*Dentry, error) { if !rp.Done() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } if opts.CheckSearchable { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } // anonDentry no-ops refcounting. return rp.Start(), nil @@ -121,7 +121,7 @@ func (fs *anonFilesystem) GetDentryAt(ctx context.Context, rp *ResolvingPath, op // GetParentDentryAt implements FilesystemImpl.GetParentDentryAt. func (fs *anonFilesystem) GetParentDentryAt(ctx context.Context, rp *ResolvingPath) (*Dentry, error) { if !rp.Final() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } // anonDentry no-ops refcounting. return rp.Start(), nil @@ -130,63 +130,63 @@ func (fs *anonFilesystem) GetParentDentryAt(ctx context.Context, rp *ResolvingPa // LinkAt implements FilesystemImpl.LinkAt. func (fs *anonFilesystem) LinkAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry) error { if !rp.Final() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } - return syserror.EPERM + return linuxerr.EPERM } // MkdirAt implements FilesystemImpl.MkdirAt. func (fs *anonFilesystem) MkdirAt(ctx context.Context, rp *ResolvingPath, opts MkdirOptions) error { if !rp.Final() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } - return syserror.EPERM + return linuxerr.EPERM } // MknodAt implements FilesystemImpl.MknodAt. func (fs *anonFilesystem) MknodAt(ctx context.Context, rp *ResolvingPath, opts MknodOptions) error { if !rp.Final() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } - return syserror.EPERM + return linuxerr.EPERM } // OpenAt implements FilesystemImpl.OpenAt. func (fs *anonFilesystem) OpenAt(ctx context.Context, rp *ResolvingPath, opts OpenOptions) (*FileDescription, error) { if !rp.Done() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } - return nil, syserror.ENODEV + return nil, linuxerr.ENODEV } // ReadlinkAt implements FilesystemImpl.ReadlinkAt. func (fs *anonFilesystem) ReadlinkAt(ctx context.Context, rp *ResolvingPath) (string, error) { if !rp.Done() { - return "", syserror.ENOTDIR + return "", linuxerr.ENOTDIR } - return "", syserror.EINVAL + return "", linuxerr.EINVAL } // RenameAt implements FilesystemImpl.RenameAt. func (fs *anonFilesystem) RenameAt(ctx context.Context, rp *ResolvingPath, oldParentVD VirtualDentry, oldName string, opts RenameOptions) error { if !rp.Final() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } - return syserror.EPERM + return linuxerr.EPERM } // RmdirAt implements FilesystemImpl.RmdirAt. func (fs *anonFilesystem) RmdirAt(ctx context.Context, rp *ResolvingPath) error { if !rp.Final() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } - return syserror.EPERM + return linuxerr.EPERM } // SetStatAt implements FilesystemImpl.SetStatAt. func (fs *anonFilesystem) SetStatAt(ctx context.Context, rp *ResolvingPath, opts SetStatOptions) error { if !rp.Done() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Linux actually permits anon_inode_inode's metadata to be set, which is // visible to all users of anon_inode_inode. We just silently ignore @@ -197,7 +197,7 @@ func (fs *anonFilesystem) SetStatAt(ctx context.Context, rp *ResolvingPath, opts // StatAt implements FilesystemImpl.StatAt. func (fs *anonFilesystem) StatAt(ctx context.Context, rp *ResolvingPath, opts StatOptions) (linux.Statx, error) { if !rp.Done() { - return linux.Statx{}, syserror.ENOTDIR + return linux.Statx{}, linuxerr.ENOTDIR } // See fs/anon_inodes.c:anon_inode_init() => fs/libfs.c:alloc_anon_inode(). return linux.Statx{ @@ -218,7 +218,7 @@ func (fs *anonFilesystem) StatAt(ctx context.Context, rp *ResolvingPath, opts St // StatFSAt implements FilesystemImpl.StatFSAt. func (fs *anonFilesystem) StatFSAt(ctx context.Context, rp *ResolvingPath) (linux.Statfs, error) { if !rp.Done() { - return linux.Statfs{}, syserror.ENOTDIR + return linux.Statfs{}, linuxerr.ENOTDIR } return linux.Statfs{ Type: linux.ANON_INODE_FS_MAGIC, @@ -229,34 +229,34 @@ func (fs *anonFilesystem) StatFSAt(ctx context.Context, rp *ResolvingPath) (linu // SymlinkAt implements FilesystemImpl.SymlinkAt. func (fs *anonFilesystem) SymlinkAt(ctx context.Context, rp *ResolvingPath, target string) error { if !rp.Final() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } - return syserror.EPERM + return linuxerr.EPERM } // UnlinkAt implements FilesystemImpl.UnlinkAt. func (fs *anonFilesystem) UnlinkAt(ctx context.Context, rp *ResolvingPath) error { if !rp.Final() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } - return syserror.EPERM + return linuxerr.EPERM } // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt. func (fs *anonFilesystem) BoundEndpointAt(ctx context.Context, rp *ResolvingPath, opts BoundEndpointOptions) (transport.BoundEndpoint, error) { if !rp.Final() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } if err := GenericCheckPermissions(rp.Credentials(), MayWrite, anonFileMode, anonFileUID, anonFileGID); err != nil { return nil, err } - return nil, syserror.ECONNREFUSED + return nil, linuxerr.ECONNREFUSED } // ListXattrAt implements FilesystemImpl.ListXattrAt. func (fs *anonFilesystem) ListXattrAt(ctx context.Context, rp *ResolvingPath, size uint64) ([]string, error) { if !rp.Done() { - return nil, syserror.ENOTDIR + return nil, linuxerr.ENOTDIR } return nil, nil } @@ -264,25 +264,25 @@ func (fs *anonFilesystem) ListXattrAt(ctx context.Context, rp *ResolvingPath, si // GetXattrAt implements FilesystemImpl.GetXattrAt. func (fs *anonFilesystem) GetXattrAt(ctx context.Context, rp *ResolvingPath, opts GetXattrOptions) (string, error) { if !rp.Done() { - return "", syserror.ENOTDIR + return "", linuxerr.ENOTDIR } - return "", syserror.ENOTSUP + return "", linuxerr.ENOTSUP } // SetXattrAt implements FilesystemImpl.SetXattrAt. func (fs *anonFilesystem) SetXattrAt(ctx context.Context, rp *ResolvingPath, opts SetXattrOptions) error { if !rp.Done() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } - return syserror.EPERM + return linuxerr.EPERM } // RemoveXattrAt implements FilesystemImpl.RemoveXattrAt. func (fs *anonFilesystem) RemoveXattrAt(ctx context.Context, rp *ResolvingPath, name string) error { if !rp.Done() { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } - return syserror.EPERM + return linuxerr.EPERM } // PrependPath implements FilesystemImpl.PrependPath. diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go index e7ca24d96..cb92b6eee 100644 --- a/pkg/sentry/vfs/dentry.go +++ b/pkg/sentry/vfs/dentry.go @@ -18,8 +18,8 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // Dentry represents a node in a Filesystem tree at which a file exists. @@ -196,11 +196,12 @@ func (d *Dentry) OnZeroWatches(ctx context.Context) { // PrepareDeleteDentry must be called before attempting to delete the file // represented by d. If PrepareDeleteDentry succeeds, the caller must call // AbortDeleteDentry or CommitDeleteDentry depending on the deletion's outcome. +// +checklocksacquire:d.mu func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dentry) error { vfs.mountMu.Lock() if mntns.mountpoints[d] != 0 { vfs.mountMu.Unlock() - return syserror.EBUSY + return linuxerr.EBUSY // +checklocksforce: inconsistent return. } d.mu.Lock() vfs.mountMu.Unlock() @@ -211,14 +212,14 @@ func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dent // AbortDeleteDentry must be called after PrepareDeleteDentry if the deletion // fails. -// +checklocks:d.mu +// +checklocksrelease:d.mu func (vfs *VirtualFilesystem) AbortDeleteDentry(d *Dentry) { d.mu.Unlock() } // CommitDeleteDentry must be called after PrepareDeleteDentry if the deletion // succeeds. -// +checklocks:d.mu +// +checklocksrelease:d.mu func (vfs *VirtualFilesystem) CommitDeleteDentry(ctx context.Context, d *Dentry) { d.dead = true d.mu.Unlock() @@ -249,16 +250,18 @@ func (vfs *VirtualFilesystem) InvalidateDentry(ctx context.Context, d *Dentry) { // Preconditions: // * If to is not nil, it must be a child Dentry from the same Filesystem. // * from != to. +// +checklocksacquire:from.mu +// +checklocksacquire:to.mu func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error { vfs.mountMu.Lock() if mntns.mountpoints[from] != 0 { vfs.mountMu.Unlock() - return syserror.EBUSY + return linuxerr.EBUSY // +checklocksforce: no locks acquired. } if to != nil { if mntns.mountpoints[to] != 0 { vfs.mountMu.Unlock() - return syserror.EBUSY + return linuxerr.EBUSY // +checklocksforce: no locks acquired. } to.mu.Lock() } @@ -267,13 +270,13 @@ func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, t // Return with from.mu and to.mu locked, which will be unlocked by // AbortRenameDentry, CommitRenameReplaceDentry, or // CommitRenameExchangeDentry. - return nil + return nil // +checklocksforce: to may not be acquired. } // AbortRenameDentry must be called after PrepareRenameDentry if the rename // fails. -// +checklocks:from.mu -// +checklocks:to.mu +// +checklocksrelease:from.mu +// +checklocksrelease:to.mu func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) { from.mu.Unlock() if to != nil { @@ -286,8 +289,8 @@ func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) { // that was replaced by from. // // Preconditions: PrepareRenameDentry was previously called on from and to. -// +checklocks:from.mu -// +checklocks:to.mu +// +checklocksrelease:from.mu +// +checklocksrelease:to.mu func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, from, to *Dentry) { from.mu.Unlock() if to != nil { @@ -303,8 +306,8 @@ func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, fro // from and to are exchanged by rename(RENAME_EXCHANGE). // // Preconditions: PrepareRenameDentry was previously called on from and to. -// +checklocks:from.mu -// +checklocks:to.mu +// +checklocksrelease:from.mu +// +checklocksrelease:to.mu func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) { from.mu.Unlock() to.mu.Unlock() diff --git a/pkg/sentry/vfs/device.go b/pkg/sentry/vfs/device.go index dde2ad79b..572d81afc 100644 --- a/pkg/sentry/vfs/device.go +++ b/pkg/sentry/vfs/device.go @@ -18,7 +18,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ) // DeviceKind indicates whether a device is a block or character device. @@ -100,7 +100,7 @@ func (vfs *VirtualFilesystem) OpenDeviceSpecialFile(ctx context.Context, mnt *Mo defer vfs.devicesMu.RUnlock() rd, ok := vfs.devices[tup] if !ok { - return nil, syserror.ENXIO + return nil, linuxerr.ENXIO } return rd.dev.Open(ctx, mnt, d, *opts) } @@ -120,7 +120,7 @@ func (vfs *VirtualFilesystem) GetAnonBlockDevMinor() (uint32, error) { } minor++ } - return 0, syserror.EMFILE + return 0, linuxerr.EMFILE } // PutAnonBlockDevMinor deallocates a minor device number returned by a diff --git a/pkg/sentry/vfs/epoll.go b/pkg/sentry/vfs/epoll.go index ae004b371..befe3ca25 100644 --- a/pkg/sentry/vfs/epoll.go +++ b/pkg/sentry/vfs/epoll.go @@ -17,6 +17,7 @@ package vfs import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" @@ -174,7 +175,7 @@ func (ep *EpollInstance) AddInterest(file *FileDescription, num int32, event lin // that cyclic polling is not introduced after the check. defer epollCycleMu.Unlock() if subep.mightPoll(ep) { - return syserror.ELOOP + return linuxerr.ELOOP } } @@ -187,7 +188,7 @@ func (ep *EpollInstance) AddInterest(file *FileDescription, num int32, event lin num: num, } if _, ok := ep.interest[key]; ok { - return syserror.EEXIST + return linuxerr.EEXIST } // Register interest in file. diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index ef8d8a813..ca3303dec 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -20,13 +20,13 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsmetric" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -252,7 +252,7 @@ func (fd *FileDescription) SetStatusFlags(ctx context.Context, creds *auth.Crede return err } if (stat.AttributesMask&linux.STATX_ATTR_APPEND != 0) && (stat.Attributes&linux.STATX_ATTR_APPEND != 0) { - return syserror.EPERM + return linuxerr.EPERM } } if (flags&linux.O_NOATIME != 0) && (oldFlags&linux.O_NOATIME == 0) { @@ -266,14 +266,14 @@ func (fd *FileDescription) SetStatusFlags(ctx context.Context, creds *auth.Crede return err } if stat.Mask&linux.STATX_UID == 0 { - return syserror.EPERM + return linuxerr.EPERM } if !CanActAsOwner(creds, auth.KUID(stat.UID)) { - return syserror.EPERM + return linuxerr.EPERM } } if flags&linux.O_DIRECT != 0 && !fd.opts.AllowDirectIO { - return syserror.EINVAL + return linuxerr.EINVAL } // TODO(gvisor.dev/issue/1035): FileDescriptionImpl.SetOAsync()? const settableFlags = linux.O_APPEND | linux.O_ASYNC | linux.O_DIRECT | linux.O_NOATIME | linux.O_NONBLOCK @@ -567,7 +567,7 @@ func (fd *FileDescription) StatFS(ctx context.Context) (linux.Statfs, error) { // Allocate grows file represented by FileDescription to offset + length bytes. func (fd *FileDescription) Allocate(ctx context.Context, mode, offset, length uint64) error { if !fd.IsWritable() { - return syserror.EBADF + return linuxerr.EBADF } if err := fd.impl.Allocate(ctx, mode, offset, length); err != nil { return err @@ -602,10 +602,10 @@ func (fd *FileDescription) EventUnregister(e *waiter.Entry) { // partial reads with a nil error. func (fd *FileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) { if fd.opts.DenyPRead { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } if !fd.readable { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } start := fsmetric.StartReadWait() n, err := fd.impl.PRead(ctx, dst, offset, opts) @@ -620,7 +620,7 @@ func (fd *FileDescription) PRead(ctx context.Context, dst usermem.IOSequence, of // Read is similar to PRead, but does not specify an offset. func (fd *FileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) { if !fd.readable { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } start := fsmetric.StartReadWait() n, err := fd.impl.Read(ctx, dst, opts) @@ -637,10 +637,10 @@ func (fd *FileDescription) Read(ctx context.Context, dst usermem.IOSequence, opt // return partial writes with a nil error. func (fd *FileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { if fd.opts.DenyPWrite { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } if !fd.writable { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } n, err := fd.impl.PWrite(ctx, src, offset, opts) if n > 0 { @@ -652,7 +652,7 @@ func (fd *FileDescription) PWrite(ctx context.Context, src usermem.IOSequence, o // Write is similar to PWrite, but does not specify an offset. func (fd *FileDescription) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { if !fd.writable { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } n, err := fd.impl.Write(ctx, src, opts) if n > 0 { @@ -708,8 +708,8 @@ func (fd *FileDescription) ListXattr(ctx context.Context, size uint64) ([]string return names, err } names, err := fd.impl.ListXattr(ctx, size) - if err == syserror.ENOTSUP { - // Linux doesn't actually return ENOTSUP in this case; instead, + if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) { + // Linux doesn't actually return EOPNOTSUPP in this case; instead, // fs/xattr.c:vfs_listxattr() falls back to allowing the security // subsystem to return security extended attributes, which by default // don't exist. @@ -873,7 +873,7 @@ func (fd *FileDescription) ComputeLockRange(ctx context.Context, start uint64, l } off = int64(stat.Size) default: - return lock.LockRange{}, syserror.EINVAL + return lock.LockRange{}, linuxerr.EINVAL } return lock.ComputeRange(int64(start), int64(length), off) diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index 2b6f47b4b..a875fdeca 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/memmap" @@ -65,7 +66,7 @@ func (FileDescriptionDefaultImpl) StatFS(ctx context.Context) (linux.Statfs, err // should technically return EISDIR. Allocate should never be called for a // directory, because it requires a writable fd. func (FileDescriptionDefaultImpl) Allocate(ctx context.Context, mode, offset, length uint64) error { - return syserror.ENODEV + return linuxerr.ENODEV } // Readiness implements waiter.Waitable.Readiness analogously to @@ -88,81 +89,81 @@ func (FileDescriptionDefaultImpl) EventUnregister(e *waiter.Entry) { // PRead implements FileDescriptionImpl.PRead analogously to // file_operations::read == file_operations::read_iter == NULL in Linux. func (FileDescriptionDefaultImpl) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Read implements FileDescriptionImpl.Read analogously to // file_operations::read == file_operations::read_iter == NULL in Linux. func (FileDescriptionDefaultImpl) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // PWrite implements FileDescriptionImpl.PWrite analogously to // file_operations::write == file_operations::write_iter == NULL in Linux. func (FileDescriptionDefaultImpl) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Write implements FileDescriptionImpl.Write analogously to // file_operations::write == file_operations::write_iter == NULL in Linux. func (FileDescriptionDefaultImpl) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // IterDirents implements FileDescriptionImpl.IterDirents analogously to // file_operations::iterate == file_operations::iterate_shared == NULL in // Linux. func (FileDescriptionDefaultImpl) IterDirents(ctx context.Context, cb IterDirentsCallback) error { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Seek implements FileDescriptionImpl.Seek analogously to // file_operations::llseek == NULL in Linux. func (FileDescriptionDefaultImpl) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } // Sync implements FileDescriptionImpl.Sync analogously to // file_operations::fsync == NULL in Linux. func (FileDescriptionDefaultImpl) Sync(ctx context.Context) error { - return syserror.EINVAL + return linuxerr.EINVAL } // ConfigureMMap implements FileDescriptionImpl.ConfigureMMap analogously to // file_operations::mmap == NULL in Linux. func (FileDescriptionDefaultImpl) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { - return syserror.ENODEV + return linuxerr.ENODEV } // Ioctl implements FileDescriptionImpl.Ioctl analogously to // file_operations::unlocked_ioctl == NULL in Linux. func (FileDescriptionDefaultImpl) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } // ListXattr implements FileDescriptionImpl.ListXattr analogously to // inode_operations::listxattr == NULL in Linux. func (FileDescriptionDefaultImpl) ListXattr(ctx context.Context, size uint64) ([]string, error) { // This isn't exactly accurate; see FileDescription.ListXattr. - return nil, syserror.ENOTSUP + return nil, linuxerr.ENOTSUP } // GetXattr implements FileDescriptionImpl.GetXattr analogously to // inode::i_opflags & IOP_XATTR == 0 in Linux. func (FileDescriptionDefaultImpl) GetXattr(ctx context.Context, opts GetXattrOptions) (string, error) { - return "", syserror.ENOTSUP + return "", linuxerr.ENOTSUP } // SetXattr implements FileDescriptionImpl.SetXattr analogously to // inode::i_opflags & IOP_XATTR == 0 in Linux. func (FileDescriptionDefaultImpl) SetXattr(ctx context.Context, opts SetXattrOptions) error { - return syserror.ENOTSUP + return linuxerr.ENOTSUP } // RemoveXattr implements FileDescriptionImpl.RemoveXattr analogously to // inode::i_opflags & IOP_XATTR == 0 in Linux. func (FileDescriptionDefaultImpl) RemoveXattr(ctx context.Context, name string) error { - return syserror.ENOTSUP + return linuxerr.ENOTSUP } // DirectoryFileDescriptionDefaultImpl may be embedded by implementations of @@ -333,10 +334,10 @@ func (fd *DynamicBytesFileDescriptionImpl) Seek(ctx context.Context, offset int6 offset += fd.off default: // fs/seq_file:seq_lseek() rejects SEEK_END etc. - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if offset < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if offset != fd.lastRead { // Regenerate the file's contents immediately. Compare @@ -357,7 +358,7 @@ func (fd *DynamicBytesFileDescriptionImpl) Seek(ctx context.Context, offset int6 // Preconditions: fd.mu must be locked. func (fd *DynamicBytesFileDescriptionImpl) pwriteLocked(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 { - return 0, syserror.EOPNOTSUPP + return 0, linuxerr.EOPNOTSUPP } limit, err := CheckLimit(ctx, offset, src.NumBytes()) if err != nil { @@ -467,27 +468,27 @@ func (NoLockFD) SupportsLocks() bool { // LockBSD implements FileDescriptionImpl.LockBSD. func (NoLockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error { - return syserror.ENOLCK + return linuxerr.ENOLCK } // UnlockBSD implements FileDescriptionImpl.UnlockBSD. func (NoLockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error { - return syserror.ENOLCK + return linuxerr.ENOLCK } // LockPOSIX implements FileDescriptionImpl.LockPOSIX. func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error { - return syserror.ENOLCK + return linuxerr.ENOLCK } // UnlockPOSIX implements FileDescriptionImpl.UnlockPOSIX. func (NoLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error { - return syserror.ENOLCK + return linuxerr.ENOLCK } // TestPOSIX implements FileDescriptionImpl.TestPOSIX. func (NoLockFD) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) { - return linux.Flock{}, syserror.ENOLCK + return linux.Flock{}, linuxerr.ENOLCK } // BadLockFD implements Lock*/Unlock* portion of FileDescriptionImpl interface @@ -503,25 +504,25 @@ func (BadLockFD) SupportsLocks() bool { // LockBSD implements FileDescriptionImpl.LockBSD. func (BadLockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error { - return syserror.EBADF + return linuxerr.EBADF } // UnlockBSD implements FileDescriptionImpl.UnlockBSD. func (BadLockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error { - return syserror.EBADF + return linuxerr.EBADF } // LockPOSIX implements FileDescriptionImpl.LockPOSIX. func (BadLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error { - return syserror.EBADF + return linuxerr.EBADF } // UnlockPOSIX implements FileDescriptionImpl.UnlockPOSIX. func (BadLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error { - return syserror.EBADF + return linuxerr.EBADF } // TestPOSIX implements FileDescriptionImpl.TestPOSIX. func (BadLockFD) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) { - return linux.Flock{}, syserror.EBADF + return linux.Flock{}, linuxerr.EBADF } diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go index 1cd607c0a..3423dede1 100644 --- a/pkg/sentry/vfs/file_description_impl_util_test.go +++ b/pkg/sentry/vfs/file_description_impl_util_test.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -102,7 +103,7 @@ func (fd *testFD) Stat(ctx context.Context, opts StatOptions) (linux.Statx, erro // SetStat implements FileDescriptionImpl.SetStat. func (fd *testFD) SetStat(ctx context.Context, opts SetStatOptions) error { - return syserror.EPERM + return linuxerr.EPERM } func TestGenCountFD(t *testing.T) { @@ -155,10 +156,10 @@ func TestGenCountFD(t *testing.T) { } // Write and PWrite fails. - if _, err := fd.Write(ctx, ioseq, WriteOptions{}); err != syserror.EIO { + if _, err := fd.Write(ctx, ioseq, WriteOptions{}); !linuxerr.Equals(linuxerr.EIO, err) { t.Errorf("Write: got err %v, wanted %v", err, syserror.EIO) } - if _, err := fd.PWrite(ctx, ioseq, 0, WriteOptions{}); err != syserror.EIO { + if _, err := fd.PWrite(ctx, ioseq, 0, WriteOptions{}); !linuxerr.Equals(linuxerr.EIO, err) { t.Errorf("Write: got err %v, wanted %v", err, syserror.EIO) } } @@ -215,10 +216,10 @@ func TestWritable(t *testing.T) { if n, err := fd.Seek(ctx, 1, linux.SEEK_SET); n != 0 && err != nil { t.Errorf("Seek: got err (%v, %v), wanted (0, nil)", n, err) } - if n, err := fd.Write(ctx, writeIOSeq, WriteOptions{}); n != 0 && err != syserror.EINVAL { + if n, err := fd.Write(ctx, writeIOSeq, WriteOptions{}); n != 0 && !linuxerr.Equals(linuxerr.EINVAL, err) { t.Errorf("Write: got err (%v, %v), wanted (0, EINVAL)", n, err) } - if n, err := fd.PWrite(ctx, writeIOSeq, 2, WriteOptions{}); n != 0 && err != syserror.EINVAL { + if n, err := fd.PWrite(ctx, writeIOSeq, 2, WriteOptions{}); n != 0 && !linuxerr.Equals(linuxerr.EINVAL, err) { t.Errorf("PWrite: got err (%v, %v), wanted (0, EINVAL)", n, err) } } diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go index 49d29e20b..088beb8e2 100644 --- a/pkg/sentry/vfs/inotify.go +++ b/pkg/sentry/vfs/inotify.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/uniqueid" @@ -98,7 +99,7 @@ func NewInotifyFD(ctx context.Context, vfsObj *VirtualFilesystem, flags uint32) // O_CLOEXEC affects file descriptors, so it must be handled outside of vfs. flags &^= linux.O_CLOEXEC if flags&^linux.O_NONBLOCK != 0 { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } id := uniqueid.GlobalFromContext(ctx) @@ -184,23 +185,23 @@ func (i *Inotify) Readiness(mask waiter.EventMask) waiter.EventMask { // PRead implements FileDescriptionImpl.PRead. func (*Inotify) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } // PWrite implements FileDescriptionImpl.PWrite. func (*Inotify) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { - return 0, syserror.ESPIPE + return 0, linuxerr.ESPIPE } // Write implements FileDescriptionImpl.Write. func (*Inotify) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // Read implements FileDescriptionImpl.Read. func (i *Inotify) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) { if dst.NumBytes() < inotifyEventBaseSize { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } i.evMu.Lock() @@ -226,7 +227,7 @@ func (i *Inotify) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOpt // write some events out. return writeLen, nil } - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // Linux always dequeues an available event as long as there's enough @@ -262,7 +263,7 @@ func (i *Inotify) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallAr return 0, err default: - return 0, syserror.ENOTTY + return 0, linuxerr.ENOTTY } } @@ -332,7 +333,7 @@ func (i *Inotify) AddWatch(target *Dentry, mask uint32) (int32, error) { if ws == nil { // While Linux supports inotify watches on all filesystem types, watches on // filesystems like kernfs are not generally useful, so we do not. - return 0, syserror.EPERM + return 0, linuxerr.EPERM } // Does the target already have a watch from this inotify instance? if existing := ws.Lookup(i.id); existing != nil { @@ -360,7 +361,7 @@ func (i *Inotify) RmWatch(ctx context.Context, wd int32) error { w, ok := i.watches[wd] if !ok { i.mu.Unlock() - return syserror.EINVAL + return linuxerr.EINVAL } // Remove the watch from this instance. diff --git a/pkg/sentry/vfs/memxattr/BUILD b/pkg/sentry/vfs/memxattr/BUILD index ea82f4987..444ab42b9 100644 --- a/pkg/sentry/vfs/memxattr/BUILD +++ b/pkg/sentry/vfs/memxattr/BUILD @@ -8,9 +8,9 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/errors/linuxerr", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", "//pkg/sync", - "//pkg/syserror", ], ) diff --git a/pkg/sentry/vfs/memxattr/xattr.go b/pkg/sentry/vfs/memxattr/xattr.go index 9b7953fa3..f0f82a4d6 100644 --- a/pkg/sentry/vfs/memxattr/xattr.go +++ b/pkg/sentry/vfs/memxattr/xattr.go @@ -20,10 +20,10 @@ import ( "strings" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // SimpleExtendedAttributes implements extended attributes using a map of @@ -49,12 +49,12 @@ func (x *SimpleExtendedAttributes) GetXattr(creds *auth.Credentials, mode linux. value, ok := x.xattrs[opts.Name] x.mu.RUnlock() if !ok { - return "", syserror.ENODATA + return "", linuxerr.ENODATA } // Check that the size of the buffer provided in getxattr(2) is large enough // to contain the value. if opts.Size != 0 && uint64(len(value)) > opts.Size { - return "", syserror.ERANGE + return "", linuxerr.ERANGE } return value, nil } @@ -69,17 +69,17 @@ func (x *SimpleExtendedAttributes) SetXattr(creds *auth.Credentials, mode linux. defer x.mu.Unlock() if x.xattrs == nil { if opts.Flags&linux.XATTR_REPLACE != 0 { - return syserror.ENODATA + return linuxerr.ENODATA } x.xattrs = make(map[string]string) } _, ok := x.xattrs[opts.Name] if ok && opts.Flags&linux.XATTR_CREATE != 0 { - return syserror.EEXIST + return linuxerr.EEXIST } if !ok && opts.Flags&linux.XATTR_REPLACE != 0 { - return syserror.ENODATA + return linuxerr.ENODATA } x.xattrs[opts.Name] = opts.Value @@ -106,7 +106,7 @@ func (x *SimpleExtendedAttributes) ListXattr(creds *auth.Credentials, size uint6 } x.mu.RUnlock() if size != 0 && uint64(listSize) > size { - return nil, syserror.ERANGE + return nil, linuxerr.ERANGE } return names, nil } @@ -120,7 +120,7 @@ func (x *SimpleExtendedAttributes) RemoveXattr(creds *auth.Credentials, mode lin x.mu.Lock() defer x.mu.Unlock() if _, ok := x.xattrs[name]; !ok { - return syserror.ENODATA + return linuxerr.ENODATA } delete(x.xattrs, name) return nil diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index f93da3af1..4d6b59a26 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/refsvfs2" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" @@ -159,7 +160,7 @@ func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth rft := vfs.getFilesystemType(fsTypeName) if rft == nil { ctx.Warningf("Unknown filesystem type: %s", fsTypeName) - return nil, syserror.ENODEV + return nil, linuxerr.ENODEV } fs, root, err := rft.fsType.GetFilesystem(ctx, vfs, creds, source, opts.GetFilesystemOptions) if err != nil { @@ -192,10 +193,10 @@ func (vfs *VirtualFilesystem) NewDisconnectedMount(fs *Filesystem, root *Dentry, func (vfs *VirtualFilesystem) MountDisconnected(ctx context.Context, creds *auth.Credentials, source string, fsTypeName string, opts *MountOptions) (*Mount, error) { rft := vfs.getFilesystemType(fsTypeName) if rft == nil { - return nil, syserror.ENODEV + return nil, linuxerr.ENODEV } if !opts.InternalMount && !rft.opts.AllowUserMount { - return nil, syserror.ENODEV + return nil, linuxerr.ENODEV } fs, root, err := rft.fsType.GetFilesystem(ctx, vfs, creds, source, opts.GetFilesystemOptions) if err != nil { @@ -284,7 +285,7 @@ func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentia // UmountAt removes the Mount at the given path. func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *UmountOptions) error { if opts.Flags&^(linux.MNT_FORCE|linux.MNT_DETACH) != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } // MNT_FORCE is currently unimplemented except for the permission check. @@ -292,7 +293,7 @@ func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credenti // namespace, and not in the owner user namespace for the target mount. See // fs/namespace.c:SYSCALL_DEFINE2(umount, ...) if opts.Flags&linux.MNT_FORCE != 0 && creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, creds.UserNamespace.Root()) { - return syserror.EPERM + return linuxerr.EPERM } vd, err := vfs.GetDentryAt(ctx, creds, pop, &GetDentryOptions{}) @@ -301,19 +302,19 @@ func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credenti } defer vd.DecRef(ctx) if vd.dentry != vd.mount.root { - return syserror.EINVAL + return linuxerr.EINVAL } vfs.mountMu.Lock() if mntns := MountNamespaceFromContext(ctx); mntns != nil { defer mntns.DecRef(ctx) if mntns != vd.mount.ns { vfs.mountMu.Unlock() - return syserror.EINVAL + return linuxerr.EINVAL } if vd.mount == vd.mount.ns.root { vfs.mountMu.Unlock() - return syserror.EINVAL + return linuxerr.EINVAL } } @@ -326,7 +327,7 @@ func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credenti if len(vd.mount.children) != 0 { vfs.mounts.seq.EndWrite() vfs.mountMu.Unlock() - return syserror.EBUSY + return linuxerr.EBUSY } // We are holding a reference on vd.mount. expectedRefs := int64(1) @@ -336,7 +337,7 @@ func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credenti if atomic.LoadInt64(&vd.mount.refs)&^math.MinInt64 != expectedRefs { // mask out MSB vfs.mounts.seq.EndWrite() vfs.mountMu.Unlock() - return syserror.EBUSY + return linuxerr.EBUSY } } vdsToDecRef, mountsToDecRef := vfs.umountRecursiveLocked(vd.mount, &umountRecursiveOptions{ @@ -710,7 +711,7 @@ func (vfs *VirtualFilesystem) SetMountReadOnly(mnt *Mount, ro bool) error { func (mnt *Mount) CheckBeginWrite() error { if atomic.AddInt64(&mnt.writers, 1) < 0 { atomic.AddInt64(&mnt.writers, -1) - return syserror.EROFS + return linuxerr.EROFS } return nil } @@ -728,7 +729,7 @@ func (mnt *Mount) setReadOnlyLocked(ro bool) error { } if ro { if !atomic.CompareAndSwapInt64(&mnt.writers, 0, math.MinInt64) { - return syserror.EBUSY + return linuxerr.EBUSY } return nil } diff --git a/pkg/sentry/vfs/opath.go b/pkg/sentry/vfs/opath.go index e9651b631..da0b33b79 100644 --- a/pkg/sentry/vfs/opath.go +++ b/pkg/sentry/vfs/opath.go @@ -17,10 +17,10 @@ package vfs import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -40,77 +40,77 @@ func (fd *opathFD) Release(context.Context) { // Allocate implements FileDescriptionImpl.Allocate. func (fd *opathFD) Allocate(ctx context.Context, mode, offset, length uint64) error { - return syserror.EBADF + return linuxerr.EBADF } // PRead implements FileDescriptionImpl.PRead. func (fd *opathFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // Read implements FileDescriptionImpl.Read. func (fd *opathFD) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // PWrite implements FileDescriptionImpl.PWrite. func (fd *opathFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // Write implements FileDescriptionImpl.Write. func (fd *opathFD) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // Ioctl implements FileDescriptionImpl.Ioctl. func (fd *opathFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // IterDirents implements FileDescriptionImpl.IterDirents. func (fd *opathFD) IterDirents(ctx context.Context, cb IterDirentsCallback) error { - return syserror.EBADF + return linuxerr.EBADF } // Seek implements FileDescriptionImpl.Seek. func (fd *opathFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - return 0, syserror.EBADF + return 0, linuxerr.EBADF } // ConfigureMMap implements FileDescriptionImpl.ConfigureMMap. func (fd *opathFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { - return syserror.EBADF + return linuxerr.EBADF } // ListXattr implements FileDescriptionImpl.ListXattr. func (fd *opathFD) ListXattr(ctx context.Context, size uint64) ([]string, error) { - return nil, syserror.EBADF + return nil, linuxerr.EBADF } // GetXattr implements FileDescriptionImpl.GetXattr. func (fd *opathFD) GetXattr(ctx context.Context, opts GetXattrOptions) (string, error) { - return "", syserror.EBADF + return "", linuxerr.EBADF } // SetXattr implements FileDescriptionImpl.SetXattr. func (fd *opathFD) SetXattr(ctx context.Context, opts SetXattrOptions) error { - return syserror.EBADF + return linuxerr.EBADF } // RemoveXattr implements FileDescriptionImpl.RemoveXattr. func (fd *opathFD) RemoveXattr(ctx context.Context, name string) error { - return syserror.EBADF + return linuxerr.EBADF } // Sync implements FileDescriptionImpl.Sync. func (fd *opathFD) Sync(ctx context.Context) error { - return syserror.EBADF + return linuxerr.EBADF } // SetStat implements FileDescriptionImpl.SetStat. func (fd *opathFD) SetStat(ctx context.Context, opts SetStatOptions) error { - return syserror.EBADF + return linuxerr.EBADF } // Stat implements FileDescriptionImpl.Stat. diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go index b7704874f..4744514bd 100644 --- a/pkg/sentry/vfs/permissions.go +++ b/pkg/sentry/vfs/permissions.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/syserror" @@ -77,7 +78,7 @@ func GenericCheckPermissions(creds *auth.Credentials, ats AccessTypes, mode linu // the caller's user namespace; compare // kernel/capability.c:privileged_wrt_inode_uidgid(). if !kuid.In(creds.UserNamespace).Ok() || !kgid.In(creds.UserNamespace).Ok() { - return syserror.EACCES + return linuxerr.EACCES } // CAP_DAC_READ_SEARCH allows the caller to read and search arbitrary // directories, and read arbitrary non-directory files. @@ -94,7 +95,7 @@ func GenericCheckPermissions(creds *auth.Credentials, ats AccessTypes, mode linu return nil } } - return syserror.EACCES + return linuxerr.EACCES } // MayLink determines whether creating a hard link to a file with the given @@ -110,12 +111,12 @@ func MayLink(creds *auth.Credentials, mode linux.FileMode, kuid auth.KUID, kgid // Only regular files can be hard linked. if mode.FileType() != linux.S_IFREG { - return syserror.EPERM + return linuxerr.EPERM } // Setuid files should not get pinned to the filesystem. if mode&linux.S_ISUID != 0 { - return syserror.EPERM + return linuxerr.EPERM } // Executable setgid files should not get pinned to the filesystem, but we @@ -123,7 +124,7 @@ func MayLink(creds *auth.Credentials, mode linux.FileMode, kuid auth.KUID, kgid // Hardlinking to unreadable or unwritable sources is dangerous. if err := GenericCheckPermissions(creds, MayRead|MayWrite, mode, kuid, kgid); err != nil { - return syserror.EPERM + return linuxerr.EPERM } return nil } @@ -199,7 +200,7 @@ func CheckSetStat(ctx context.Context, creds *auth.Credentials, opts *SetStatOpt } if stat.Mask&linux.STATX_MODE != 0 { if !CanActAsOwner(creds, kuid) { - return syserror.EPERM + return linuxerr.EPERM } // TODO(b/30815691): "If the calling process is not privileged (Linux: // does not have the CAP_FSETID capability), and the group of the file @@ -210,13 +211,13 @@ func CheckSetStat(ctx context.Context, creds *auth.Credentials, opts *SetStatOpt if stat.Mask&linux.STATX_UID != 0 { if !((creds.EffectiveKUID == kuid && auth.KUID(stat.UID) == kuid) || HasCapabilityOnFile(creds, linux.CAP_CHOWN, kuid, kgid)) { - return syserror.EPERM + return linuxerr.EPERM } } if stat.Mask&linux.STATX_GID != 0 { if !((creds.EffectiveKUID == kuid && creds.InGroup(auth.KGID(stat.GID))) || HasCapabilityOnFile(creds, linux.CAP_CHOWN, kuid, kgid)) { - return syserror.EPERM + return linuxerr.EPERM } } if opts.NeedWritePerm && !creds.HasCapability(linux.CAP_DAC_OVERRIDE) { @@ -229,7 +230,7 @@ func CheckSetStat(ctx context.Context, creds *auth.Credentials, opts *SetStatOpt if (stat.Mask&linux.STATX_ATIME != 0 && stat.Atime.Nsec != linux.UTIME_NOW) || (stat.Mask&linux.STATX_MTIME != 0 && stat.Mtime.Nsec != linux.UTIME_NOW) || (stat.Mask&linux.STATX_CTIME != 0 && stat.Ctime.Nsec != linux.UTIME_NOW) { - return syserror.EPERM + return linuxerr.EPERM } if err := GenericCheckPermissions(creds, MayWrite, mode, kuid, kgid); err != nil { return err @@ -252,7 +253,7 @@ func CheckDeleteSticky(creds *auth.Credentials, parentMode linux.FileMode, paren HasCapabilityOnFile(creds, linux.CAP_FOWNER, childKUID, childKGID) { return nil } - return syserror.EPERM + return linuxerr.EPERM } // CanActAsOwner returns true if creds can act as the owner of a file with the @@ -306,9 +307,9 @@ func CheckXattrPermissions(creds *auth.Credentials, ats AccessTypes, mode linux. return nil } if ats.MayWrite() { - return syserror.EPERM + return linuxerr.EPERM } - return syserror.ENODATA + return linuxerr.ENODATA case strings.HasPrefix(name, linux.XATTR_USER_PREFIX): // In the user.* namespace, only regular files and directories can have // extended attributes. For sticky directories, only the owner and @@ -316,12 +317,12 @@ func CheckXattrPermissions(creds *auth.Credentials, ats AccessTypes, mode linux. filetype := mode.FileType() if filetype != linux.ModeRegular && filetype != linux.ModeDirectory { if ats.MayWrite() { - return syserror.EPERM + return linuxerr.EPERM } - return syserror.ENODATA + return linuxerr.ENODATA } if filetype == linux.ModeDirectory && mode&linux.ModeSticky != 0 && ats.MayWrite() && !CanActAsOwner(creds, kuid) { - return syserror.EPERM + return linuxerr.EPERM } } return nil diff --git a/pkg/sentry/vfs/resolving_path.go b/pkg/sentry/vfs/resolving_path.go index 97b898aba..6f58f33ce 100644 --- a/pkg/sentry/vfs/resolving_path.go +++ b/pkg/sentry/vfs/resolving_path.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sync" @@ -327,7 +328,7 @@ func (rp *ResolvingPath) ShouldFollowSymlink() bool { // Postconditions: If HandleSymlink returns a nil error, then !rp.Done(). func (rp *ResolvingPath) HandleSymlink(target string) error { if rp.symlinks >= linux.MaxSymlinkTraversals { - return syserror.ELOOP + return linuxerr.ELOOP } if len(target) == 0 { return syserror.ENOENT @@ -377,7 +378,7 @@ func (rp *ResolvingPath) relpathPrepend(path fspath.Path) { // Preconditions: !rp.Done(). func (rp *ResolvingPath) HandleJump(target VirtualDentry) error { if rp.symlinks >= linux.MaxSymlinkTraversals { - return syserror.ELOOP + return linuxerr.ELOOP } rp.symlinks++ // Consume the path component that represented the magic link. diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 87fdcf403..eb3c60610 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -42,6 +42,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/fsmetric" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -278,14 +279,14 @@ func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credential if !newpop.Path.Begin.Ok() { oldVD.DecRef(ctx) if newpop.Path.Absolute { - return syserror.EEXIST + return linuxerr.EEXIST } return syserror.ENOENT } if newpop.FollowFinalSymlink { oldVD.DecRef(ctx) ctx.Warningf("VirtualFilesystem.LinkAt: file creation paths can't follow final symlink") - return syserror.EINVAL + return linuxerr.EINVAL } rp := vfs.getResolvingPath(creds, newpop) @@ -315,13 +316,13 @@ func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentia // pop.Path should not be empty in operations that create/delete files. // This is consistent with mkdirat(dirfd, "", mode). if pop.Path.Absolute { - return syserror.EEXIST + return linuxerr.EEXIST } return syserror.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.MkdirAt: file creation paths can't follow final symlink") - return syserror.EINVAL + return linuxerr.EINVAL } // "Under Linux, apart from the permission bits, the S_ISVTX mode bit is // also honored." - mkdir(2) @@ -353,13 +354,13 @@ func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentia // pop.Path should not be empty in operations that create/delete files. // This is consistent with mknodat(dirfd, "", mode, dev). if pop.Path.Absolute { - return syserror.EEXIST + return linuxerr.EEXIST } return syserror.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.MknodAt: file creation paths can't follow final symlink") - return syserror.EINVAL + return linuxerr.EINVAL } rp := vfs.getResolvingPath(creds, pop) @@ -402,13 +403,13 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential // filesystem implementations that do not support it). if opts.Flags&linux.O_TMPFILE != 0 { if opts.Flags&linux.O_DIRECTORY == 0 { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } if opts.Flags&linux.O_CREAT != 0 { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } if opts.Flags&linux.O_ACCMODE == linux.O_RDONLY { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } } // O_PATH causes most other flags to be ignored. @@ -426,9 +427,7 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential if opts.Flags&linux.O_DIRECTORY != 0 { rp.mustBeDir = true } - // Ignore O_PATH for verity, as verity performs extra operations on the fd for verification. - // The underlying filesystem that verity wraps opens the fd with O_PATH. - if opts.Flags&linux.O_PATH != 0 && rp.mount.fs.FilesystemType().Name() != "verity" { + if opts.Flags&linux.O_PATH != 0 { vd, err := vfs.GetDentryAt(ctx, creds, pop, &GetDentryOptions{}) if err != nil { return nil, err @@ -448,7 +447,7 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential if opts.FileExec { if fd.Mount().Flags.NoExec { fd.DecRef(ctx) - return nil, syserror.EACCES + return nil, linuxerr.EACCES } // Only a regular file can be executed. @@ -459,7 +458,7 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential } if stat.Mask&linux.STATX_TYPE == 0 || stat.Mode&linux.S_IFMT != linux.S_IFREG { fd.DecRef(ctx) - return nil, syserror.EACCES + return nil, linuxerr.EACCES } } @@ -493,13 +492,13 @@ func (vfs *VirtualFilesystem) ReadlinkAt(ctx context.Context, creds *auth.Creden func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credentials, oldpop, newpop *PathOperation, opts *RenameOptions) error { if !oldpop.Path.Begin.Ok() { if oldpop.Path.Absolute { - return syserror.EBUSY + return linuxerr.EBUSY } return syserror.ENOENT } if oldpop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.RenameAt: source path can't follow final symlink") - return syserror.EINVAL + return linuxerr.EINVAL } oldParentVD, oldName, err := vfs.getParentDirAndName(ctx, creds, oldpop) @@ -508,20 +507,20 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti } if oldName == "." || oldName == ".." { oldParentVD.DecRef(ctx) - return syserror.EBUSY + return linuxerr.EBUSY } if !newpop.Path.Begin.Ok() { oldParentVD.DecRef(ctx) if newpop.Path.Absolute { - return syserror.EBUSY + return linuxerr.EBUSY } return syserror.ENOENT } if newpop.FollowFinalSymlink { oldParentVD.DecRef(ctx) ctx.Warningf("VirtualFilesystem.RenameAt: destination path can't follow final symlink") - return syserror.EINVAL + return linuxerr.EINVAL } rp := vfs.getResolvingPath(creds, newpop) @@ -555,13 +554,13 @@ func (vfs *VirtualFilesystem) RmdirAt(ctx context.Context, creds *auth.Credentia // pop.Path should not be empty in operations that create/delete files. // This is consistent with unlinkat(dirfd, "", AT_REMOVEDIR). if pop.Path.Absolute { - return syserror.EBUSY + return linuxerr.EBUSY } return syserror.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.RmdirAt: file deletion paths can't follow final symlink") - return syserror.EINVAL + return linuxerr.EINVAL } rp := vfs.getResolvingPath(creds, pop) @@ -638,13 +637,13 @@ func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credent // pop.Path should not be empty in operations that create/delete files. // This is consistent with symlinkat(oldpath, newdirfd, ""). if pop.Path.Absolute { - return syserror.EEXIST + return linuxerr.EEXIST } return syserror.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.SymlinkAt: file creation paths can't follow final symlink") - return syserror.EINVAL + return linuxerr.EINVAL } rp := vfs.getResolvingPath(creds, pop) @@ -672,13 +671,13 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti // pop.Path should not be empty in operations that create/delete files. // This is consistent with unlinkat(dirfd, "", 0). if pop.Path.Absolute { - return syserror.EBUSY + return linuxerr.EBUSY } return syserror.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.UnlinkAt: file deletion paths can't follow final symlink") - return syserror.EINVAL + return linuxerr.EINVAL } rp := vfs.getResolvingPath(creds, pop) @@ -731,8 +730,8 @@ func (vfs *VirtualFilesystem) ListXattrAt(ctx context.Context, creds *auth.Crede rp.Release(ctx) return names, nil } - if err == syserror.ENOTSUP { - // Linux doesn't actually return ENOTSUP in this case; instead, + if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) { + // Linux doesn't actually return EOPNOTSUPP in this case; instead, // fs/xattr.c:vfs_listxattr() falls back to allowing the security // subsystem to return security extended attributes, which by // default don't exist. @@ -830,14 +829,14 @@ func (vfs *VirtualFilesystem) MkdirAllAt(ctx context.Context, currentPath string Path: fspath.Parse(currentPath), } stat, err := vfs.StatAt(ctx, creds, pop, &StatOptions{Mask: linux.STATX_TYPE}) - switch err { - case nil: + switch { + case err == nil: if stat.Mask&linux.STATX_TYPE == 0 || stat.Mode&linux.FileTypeMask != linux.ModeDirectory { - return syserror.ENOTDIR + return linuxerr.ENOTDIR } // Directory already exists. return nil - case syserror.ENOENT: + case linuxerr.Equals(linuxerr.ENOENT, err): // Expected, we will create the dir. default: return fmt.Errorf("stat failed for %q during directory creation: %w", currentPath, err) @@ -871,7 +870,7 @@ func (vfs *VirtualFilesystem) MakeSyntheticMountpoint(ctx context.Context, targe Root: root, Start: root, Path: fspath.Parse(target), - }, mkdirOpts); err != nil && err != syserror.EEXIST { + }, mkdirOpts); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) { return fmt.Errorf("failed to create mountpoint %q: %w", target, err) } return nil diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go index 8d563d53a..e8f7d1f01 100644 --- a/pkg/sentry/watchdog/watchdog.go +++ b/pkg/sentry/watchdog/watchdog.go @@ -77,11 +77,6 @@ var DefaultOpts = Opts{ // trigger it. const descheduleThreshold = 1 * time.Second -var ( - stuckStartup = metric.MustCreateNewUint64Metric("/watchdog/stuck_startup_detected", true /* sync */, "Incremented once on startup watchdog timeout") - stuckTasks = metric.MustCreateNewUint64Metric("/watchdog/stuck_tasks_detected", true /* sync */, "Cumulative count of stuck tasks detected") -) - // Amount of time to wait before dumping the stack to the log again when the same task(s) remains stuck. var stackDumpSameTaskPeriod = time.Minute @@ -242,7 +237,6 @@ func (w *Watchdog) waitForStart() { return } - stuckStartup.Increment() metric.WeirdnessMetric.Increment("watchdog_stuck_startup") var buf bytes.Buffer @@ -316,7 +310,6 @@ func (w *Watchdog) runTurn() { // unless they are surrounded by // Task.UninterruptibleSleepStart/Finish. tc = &offender{lastUpdateTime: lastUpdateTime} - stuckTasks.Increment() metric.WeirdnessMetric.Increment("watchdog_stuck_tasks") newTaskFound = true } diff --git a/pkg/shim/epoll.go b/pkg/shim/epoll.go index 737d2b781..463e11a84 100644 --- a/pkg/shim/epoll.go +++ b/pkg/shim/epoll.go @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package shim diff --git a/pkg/shim/proc/BUILD b/pkg/shim/proc/BUILD index 544bdc170..c8527a6d9 100644 --- a/pkg/shim/proc/BUILD +++ b/pkg/shim/proc/BUILD @@ -20,7 +20,9 @@ go_library( "//shim:__subpackages__", ], deps = [ + "//pkg/cleanup", "//pkg/shim/runsc", + "//pkg/shim/utils", "@com_github_containerd_console//:go_default_library", "@com_github_containerd_containerd//errdefs:go_default_library", "@com_github_containerd_containerd//log:go_default_library", diff --git a/pkg/shim/proc/exec.go b/pkg/shim/proc/exec.go index 14df3a778..da2e21598 100644 --- a/pkg/shim/proc/exec.go +++ b/pkg/shim/proc/exec.go @@ -26,11 +26,13 @@ import ( "github.com/containerd/console" "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" "github.com/containerd/containerd/pkg/stdio" "github.com/containerd/fifo" runc "github.com/containerd/go-runc" specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/shim/runsc" ) @@ -92,6 +94,12 @@ func (e *execProcess) SetExited(status int) { } func (e *execProcess) setExited(status int) { + if !e.exited.IsZero() { + log.L.Debugf("Exec: status already set to %d, ignoring status: %d", e.status, status) + return + } + + log.L.Debugf("Exec: setting status: %d", status) e.status = status e.exited = time.Now() e.parent.Platform.ShutdownConsole(context.Background(), e.console) @@ -105,7 +113,7 @@ func (e *execProcess) Delete(ctx context.Context) error { return e.execState.Delete(ctx) } -func (e *execProcess) delete(ctx context.Context) error { +func (e *execProcess) delete() { e.wg.Wait() if e.io != nil { for _, c := range e.closers { @@ -113,13 +121,6 @@ func (e *execProcess) delete(ctx context.Context) error { } e.io.Close() } - pidfile := filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) - // silently ignore error - os.Remove(pidfile) - internalPidfile := filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) - // silently ignore error - os.Remove(internalPidfile) - return nil } func (e *execProcess) Resize(ws console.WinSize) error { @@ -171,42 +172,53 @@ func (e *execProcess) Start(ctx context.Context) error { return e.execState.Start(ctx) } -func (e *execProcess) start(ctx context.Context) (err error) { - var ( - socket *runc.Socket - pidfile = filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) - internalPidfile = filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) - ) - if e.stdio.Terminal { - if socket, err = runc.NewTempConsoleSocket(); err != nil { +func (e *execProcess) start(ctx context.Context) error { + var socket *runc.Socket + + switch { + case e.stdio.Terminal: + s, err := runc.NewTempConsoleSocket() + if err != nil { return fmt.Errorf("failed to create runc console socket: %w", err) } - defer socket.Close() - } else if e.stdio.IsNull() { - if e.io, err = runc.NewNullIO(); err != nil { + defer s.Close() + socket = s + + case e.stdio.IsNull(): + io, err := runc.NewNullIO() + if err != nil { return fmt.Errorf("creating new NULL IO: %w", err) } - } else { - if e.io, err = runc.NewPipeIO(e.parent.IoUID, e.parent.IoGID, withConditionalIO(e.stdio)); err != nil { + e.io = io + + default: + io, err := runc.NewPipeIO(e.parent.IoUID, e.parent.IoGID, withConditionalIO(e.stdio)) + if err != nil { return fmt.Errorf("failed to create runc io pipes: %w", err) } + e.io = io } + opts := &runsc.ExecOpts{ - PidFile: pidfile, - InternalPidFile: internalPidfile, + PidFile: filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)), + InternalPidFile: filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)), IO: e.io, Detach: true, } + defer func() { + _ = os.Remove(opts.PidFile) + _ = os.Remove(opts.InternalPidFile) + }() if socket != nil { opts.ConsoleSocket = socket } + eventCh := e.parent.Monitor.Subscribe() - defer func() { - // Unsubscribe if an error is returned. - if err != nil { - e.parent.Monitor.Unsubscribe(eventCh) - } - }() + cu := cleanup.Make(func() { + e.parent.Monitor.Unsubscribe(eventCh) + }) + defer cu.Clean() + if err := e.parent.runtime.Exec(ctx, e.parent.id, e.spec, opts); err != nil { close(e.waitBlock) return e.parent.runtimeError(err, "OCI runtime exec failed") @@ -234,6 +246,7 @@ func (e *execProcess) start(ctx context.Context) (err error) { return fmt.Errorf("failed to start io pipe copy: %w", err) } } + pid, err := runc.ReadPidFile(opts.PidFile) if err != nil { return fmt.Errorf("failed to retrieve OCI runtime exec pid: %w", err) @@ -244,6 +257,7 @@ func (e *execProcess) start(ctx context.Context) (err error) { return fmt.Errorf("failed to retrieve OCI runtime exec internal pid: %w", err) } e.internalPid = internalPid + go func() { defer e.parent.Monitor.Unsubscribe(eventCh) for event := range eventCh { @@ -257,21 +271,25 @@ func (e *execProcess) start(ctx context.Context) (err error) { } } }() + + cu.Release() // cancel cleanup on success. return nil } -func (e *execProcess) Status(ctx context.Context) (string, error) { +func (e *execProcess) Status(context.Context) (string, error) { e.mu.Lock() defer e.mu.Unlock() // if we don't have a pid then the exec process has just been created if e.pid == 0 { return "created", nil } - // if we have a pid and it can be signaled, the process is running - // TODO(random-liu): Use `runsc kill --pid`. - if err := unix.Kill(e.pid, 0); err == nil { - return "running", nil + // This checks that `runsc exec` process is still running. This process has + // the same lifetime as the process executing inside the container. So instead + // of calling `runsc kill --pid`, just do a quick check that `runsc exec` is + // still running. + if err := unix.Kill(e.pid, 0); err != nil { + // Can't signal the process, it must have exited. + return "stopped", nil } - // else if we have a pid but it can nolonger be signaled, it has stopped - return "stopped", nil + return "running", nil } diff --git a/pkg/shim/proc/exec_state.go b/pkg/shim/proc/exec_state.go index 04a5d19b4..03ecb401a 100644 --- a/pkg/shim/proc/exec_state.go +++ b/pkg/shim/proc/exec_state.go @@ -63,10 +63,8 @@ func (s *execCreatedState) Start(ctx context.Context) error { return nil } -func (s *execCreatedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } +func (s *execCreatedState) Delete(context.Context) error { + s.p.delete() s.transition(deleted) return nil } @@ -143,16 +141,14 @@ func (s *execStoppedState) Start(context.Context) error { return fmt.Errorf("cannot start a stopped process") } -func (s *execStoppedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } +func (s *execStoppedState) Delete(context.Context) error { + s.p.delete() s.transition(deleted) return nil } -func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) +func (s *execStoppedState) Kill(_ context.Context, sig uint32, _ bool) error { + return handleStoppedKill(sig) } func (s *execStoppedState) SetExited(int) { diff --git a/pkg/shim/proc/init_state.go b/pkg/shim/proc/init_state.go index d65020e76..5347ddefe 100644 --- a/pkg/shim/proc/init_state.go +++ b/pkg/shim/proc/init_state.go @@ -23,6 +23,7 @@ import ( "github.com/containerd/containerd/pkg/process" runc "github.com/containerd/go-runc" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/shim/utils" ) type stateTransition int @@ -235,6 +236,6 @@ func handleStoppedKill(signal uint32) error { // already been killed. return nil default: - return errdefs.ToGRPCf(errdefs.ErrNotFound, "process not found") + return utils.ErrToGRPCf(errdefs.ErrNotFound, "process not found") } } diff --git a/pkg/shim/runtimeoptions/runtimeoptions_cri.go b/pkg/shim/runtimeoptions/runtimeoptions_cri.go index e6102b4cf..23bbd82be 100644 --- a/pkg/shim/runtimeoptions/runtimeoptions_cri.go +++ b/pkg/shim/runtimeoptions/runtimeoptions_cri.go @@ -13,6 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package runtimeoptions import ( diff --git a/pkg/shim/service.go b/pkg/shim/service.go index ea9a1ae10..24e3b7a82 100644 --- a/pkg/shim/service.go +++ b/pkg/shim/service.go @@ -452,10 +452,10 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*ta } process, err := newInit(r.Bundle, filepath.Join(r.Bundle, "work"), ns, s.platform, config, &s.opts, st.Rootfs) if err != nil { - return nil, errdefs.ToGRPC(err) + return nil, utils.ErrToGRPC(err) } if err := process.Create(ctx, config); err != nil { - return nil, errdefs.ToGRPC(err) + return nil, utils.ErrToGRPC(err) } // Set up OOM notification on the sandbox's cgroup. This is done on @@ -530,10 +530,10 @@ func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*typ p := s.processes[r.ExecID] s.mu.Unlock() if p != nil { - return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID) + return nil, utils.ErrToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID) } if s.task == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") } process, err := s.task.Exec(ctx, s.bundle, &proc.ExecConfig{ ID: r.ExecID, @@ -544,7 +544,7 @@ func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*typ Spec: r.Spec, }) if err != nil { - return nil, errdefs.ToGRPC(err) + return nil, utils.ErrToGRPC(err) } s.mu.Lock() s.processes[r.ExecID] = process @@ -565,7 +565,7 @@ func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (* Height: uint16(r.Height), } if err := p.Resize(ws); err != nil { - return nil, errdefs.ToGRPC(err) + return nil, utils.ErrToGRPC(err) } return empty, nil } @@ -615,7 +615,7 @@ func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*types.Em log.L.Debugf("Pause, id: %s", r.ID) if s.task == nil { log.L.Debugf("Pause error, id: %s: container not created", r.ID) - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") } err := s.task.Runtime().Pause(ctx, r.ID) if err != nil { @@ -629,7 +629,7 @@ func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*types. log.L.Debugf("Resume, id: %s", r.ID) if s.task == nil { log.L.Debugf("Resume error, id: %s: container not created", r.ID) - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") } err := s.task.Runtime().Resume(ctx, r.ID) if err != nil { @@ -648,7 +648,7 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*types.Empt } if err := p.Kill(ctx, r.Signal, r.All); err != nil { log.L.Debugf("Kill failed: %v", err) - return nil, errdefs.ToGRPC(err) + return nil, utils.ErrToGRPC(err) } log.L.Debugf("Kill succeeded") return empty, nil @@ -660,7 +660,7 @@ func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.Pi pids, err := s.getContainerPids(ctx, r.ID) if err != nil { - return nil, errdefs.ToGRPC(err) + return nil, utils.ErrToGRPC(err) } var processes []*task.ProcessInfo for _, pid := range pids { @@ -706,7 +706,7 @@ func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*type // Checkpoint checkpoints the container. func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*types.Empty, error) { log.L.Debugf("Checkpoint, id: %s", r.ID) - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) + return empty, utils.ErrToGRPC(errdefs.ErrNotImplemented) } // Connect returns shim information such as the shim's pid. @@ -737,7 +737,7 @@ func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI. log.L.Debugf("Stats, id: %s", r.ID) if s.task == nil { log.L.Debugf("Stats error, id: %s: container not created", r.ID) - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") } stats, err := s.task.Stats(ctx, s.id) if err != nil { @@ -811,7 +811,7 @@ func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI. // Update updates a running container. func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*types.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) + return empty, utils.ErrToGRPC(errdefs.ErrNotImplemented) } // Wait waits for a process to exit. @@ -908,14 +908,14 @@ func (s *service) getProcess(execID string) (process.Process, error) { if execID == "" { if s.task == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") } return s.task, nil } p := s.processes[execID] if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID) + return nil, utils.ErrToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID) } return p, nil } diff --git a/pkg/shim/service_linux.go b/pkg/shim/service_linux.go index 829f69282..fb2f8b062 100644 --- a/pkg/shim/service_linux.go +++ b/pkg/shim/service_linux.go @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package shim diff --git a/pkg/shim/utils/BUILD b/pkg/shim/utils/BUILD index 54a0aabb7..2eb82f63c 100644 --- a/pkg/shim/utils/BUILD +++ b/pkg/shim/utils/BUILD @@ -6,6 +6,7 @@ go_library( name = "utils", srcs = [ "annotations.go", + "errors.go", "utils.go", "volumes.go", ], @@ -14,14 +15,23 @@ go_library( "//shim:__subpackages__", ], deps = [ + "@com_github_containerd_containerd//errdefs:go_default_library", "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + "@org_golang_google_grpc//codes:go_default_library", + "@org_golang_google_grpc//status:go_default_library", ], ) go_test( name = "utils_test", size = "small", - srcs = ["volumes_test.go"], + srcs = [ + "errors_test.go", + "volumes_test.go", + ], library = ":utils", - deps = ["@com_github_opencontainers_runtime_spec//specs-go:go_default_library"], + deps = [ + "@com_github_containerd_containerd//errdefs:go_default_library", + "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + ], ) diff --git a/pkg/shim/utils/errors.go b/pkg/shim/utils/errors.go new file mode 100644 index 000000000..971d68c36 --- /dev/null +++ b/pkg/shim/utils/errors.go @@ -0,0 +1,74 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "context" + "errors" + "fmt" + + "github.com/containerd/containerd/errdefs" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// ErrToGRPC wraps containerd's ToGRPC error mapper which depends on +// github.com/pkg/errors to work correctly. Once we upgrade to containerd v1.4, +// this function can go away and we can use errdefs.ToGRPC directly instead. +// +// TODO(gvisor.dev/issue/6232): Remove after upgrading to containerd v1.4 +func ErrToGRPC(err error) error { + return errToGRPCMsg(err, err.Error()) +} + +// ErrToGRPCf maps the error to grpc error codes, assembling the formatting +// string and combining it with the target error string. +// +// TODO(gvisor.dev/issue/6232): Remove after upgrading to containerd v1.4 +func ErrToGRPCf(err error, format string, args ...interface{}) error { + formatted := fmt.Sprintf(format, args...) + msg := fmt.Sprintf("%s: %s", formatted, err.Error()) + return errToGRPCMsg(err, msg) +} + +func errToGRPCMsg(err error, msg string) error { + if err == nil { + return nil + } + if _, ok := status.FromError(err); ok { + return err + } + + switch { + case errors.Is(err, errdefs.ErrInvalidArgument): + return status.Errorf(codes.InvalidArgument, msg) + case errors.Is(err, errdefs.ErrNotFound): + return status.Errorf(codes.NotFound, msg) + case errors.Is(err, errdefs.ErrAlreadyExists): + return status.Errorf(codes.AlreadyExists, msg) + case errors.Is(err, errdefs.ErrFailedPrecondition): + return status.Errorf(codes.FailedPrecondition, msg) + case errors.Is(err, errdefs.ErrUnavailable): + return status.Errorf(codes.Unavailable, msg) + case errors.Is(err, errdefs.ErrNotImplemented): + return status.Errorf(codes.Unimplemented, msg) + case errors.Is(err, context.Canceled): + return status.Errorf(codes.Canceled, msg) + case errors.Is(err, context.DeadlineExceeded): + return status.Errorf(codes.DeadlineExceeded, msg) + } + + return errdefs.ToGRPC(err) +} diff --git a/pkg/shim/utils/errors_test.go b/pkg/shim/utils/errors_test.go new file mode 100644 index 000000000..0a8fe34c8 --- /dev/null +++ b/pkg/shim/utils/errors_test.go @@ -0,0 +1,50 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "fmt" + "testing" + + "github.com/containerd/containerd/errdefs" +) + +func TestGRPCRoundTripsErrors(t *testing.T) { + for _, tc := range []struct { + name string + err error + test func(err error) bool + }{ + { + name: "passthrough", + err: errdefs.ErrNotFound, + test: errdefs.IsNotFound, + }, + { + name: "wrapped", + err: fmt.Errorf("oh no: %w", errdefs.ErrNotFound), + test: errdefs.IsNotFound, + }, + } { + t.Run(tc.name, func(t *testing.T) { + if err := errdefs.FromGRPC(ErrToGRPC(tc.err)); !tc.test(err) { + t.Errorf("errToGRPC got %+v", err) + } + if err := errdefs.FromGRPC(ErrToGRPCf(tc.err, "testing %s", "123")); !tc.test(err) { + t.Errorf("errToGRPCf got %+v", err) + } + }) + } +} diff --git a/pkg/state/state_norace.go b/pkg/state/state_norace.go index 4281aed6d..be09d6141 100644 --- a/pkg/state/state_norace.go +++ b/pkg/state/state_norace.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !race // +build !race package state diff --git a/pkg/state/state_race.go b/pkg/state/state_race.go index 8232981ce..c9f4fd5cf 100644 --- a/pkg/state/state_race.go +++ b/pkg/state/state_race.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build race // +build race package state diff --git a/pkg/state/tests/register_test.go b/pkg/state/tests/register_test.go index 75bdbfc6e..2199d6b01 100644 --- a/pkg/state/tests/register_test.go +++ b/pkg/state/tests/register_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build race // +build race package tests diff --git a/pkg/sync/checklocks_off_unsafe.go b/pkg/sync/checklocks_off_unsafe.go index 62c81b149..87c56dd12 100644 --- a/pkg/sync/checklocks_off_unsafe.go +++ b/pkg/sync/checklocks_off_unsafe.go @@ -3,6 +3,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !checklocks // +build !checklocks package sync diff --git a/pkg/sync/checklocks_on_unsafe.go b/pkg/sync/checklocks_on_unsafe.go index 24f933ed1..f2bfde083 100644 --- a/pkg/sync/checklocks_on_unsafe.go +++ b/pkg/sync/checklocks_on_unsafe.go @@ -3,6 +3,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build checklocks // +build checklocks package sync diff --git a/pkg/sync/goyield_go113_unsafe.go b/pkg/sync/goyield_go113_unsafe.go index 8aee0d455..c4b03e9aa 100644 --- a/pkg/sync/goyield_go113_unsafe.go +++ b/pkg/sync/goyield_go113_unsafe.go @@ -3,8 +3,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build go1.13 -// +build !go1.14 +//go:build go1.13 && !go1.14 +// +build go1.13,!go1.14 package sync diff --git a/pkg/sync/goyield_unsafe.go b/pkg/sync/goyield_unsafe.go index f3cc12163..8639bb64e 100644 --- a/pkg/sync/goyield_unsafe.go +++ b/pkg/sync/goyield_unsafe.go @@ -3,8 +3,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build go1.14 -// +build !go1.18 +//go:build go1.14 && !go1.18 +// +build go1.14,!go1.18 // Check go:linkname function signatures when updating Go version. diff --git a/pkg/sync/mutex_test.go b/pkg/sync/mutex_test.go index 4fb51a8ab..9e4e3f0b2 100644 --- a/pkg/sync/mutex_test.go +++ b/pkg/sync/mutex_test.go @@ -64,7 +64,7 @@ func TestTryLockUnlock(t *testing.T) { if !m.TryLock() { t.Fatal("failed to aquire lock") } - m.Unlock() + m.Unlock() // +checklocksforce if !m.TryLock() { t.Fatal("failed to aquire lock after unlock") } diff --git a/pkg/sync/mutex_unsafe.go b/pkg/sync/mutex_unsafe.go index 411a80a8a..e00d9467d 100644 --- a/pkg/sync/mutex_unsafe.go +++ b/pkg/sync/mutex_unsafe.go @@ -3,8 +3,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build go1.13 -// +build !go1.18 +//go:build go1.13 && !go1.18 +// +build go1.13,!go1.18 // When updating the build constraint (above), check that syncMutex matches the // standard library sync.Mutex definition. @@ -32,6 +32,18 @@ func (m *CrossGoroutineMutex) state() *int32 { return &(*syncMutex)(unsafe.Pointer(&m.Mutex)).state } +// Lock locks the underlying Mutex. +// +checklocksignore +func (m *CrossGoroutineMutex) Lock() { + m.Mutex.Lock() +} + +// Unlock unlocks the underlying Mutex. +// +checklocksignore +func (m *CrossGoroutineMutex) Unlock() { + m.Mutex.Unlock() +} + const ( mutexUnlocked = 0 mutexLocked = 1 @@ -62,6 +74,7 @@ type Mutex struct { // Lock locks m. If the lock is already in use, the calling goroutine blocks // until the mutex is available. +// +checklocksignore func (m *Mutex) Lock() { noteLock(unsafe.Pointer(m)) m.m.Lock() @@ -80,6 +93,7 @@ func (m *Mutex) Unlock() { // TryLock tries to acquire the mutex. It returns true if it succeeds and false // otherwise. TryLock does not block. +// +checklocksignore func (m *Mutex) TryLock() bool { // Note lock first to enforce proper locking even if unsuccessful. noteLock(unsafe.Pointer(m)) diff --git a/pkg/sync/norace_unsafe.go b/pkg/sync/norace_unsafe.go index 70b5f3a5e..8eca99134 100644 --- a/pkg/sync/norace_unsafe.go +++ b/pkg/sync/norace_unsafe.go @@ -3,6 +3,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !race // +build !race package sync diff --git a/pkg/sync/race_amd64.s b/pkg/sync/race_amd64.s index 57bc0ec79..199602387 100644 --- a/pkg/sync/race_amd64.s +++ b/pkg/sync/race_amd64.s @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build race -// +build amd64 +//go:build race && amd64 +// +build race,amd64 #include "textflag.h" diff --git a/pkg/sync/race_arm64.s b/pkg/sync/race_arm64.s index 88f091fda..c4192e870 100644 --- a/pkg/sync/race_arm64.s +++ b/pkg/sync/race_arm64.s @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build race -// +build arm64 +//go:build race && arm64 +// +build race,arm64 #include "textflag.h" diff --git a/pkg/sync/race_unsafe.go b/pkg/sync/race_unsafe.go index 59985c270..381163cac 100644 --- a/pkg/sync/race_unsafe.go +++ b/pkg/sync/race_unsafe.go @@ -3,6 +3,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build race // +build race package sync diff --git a/pkg/sync/runtime_unsafe.go b/pkg/sync/runtime_unsafe.go index 39c766331..1d9cf304e 100644 --- a/pkg/sync/runtime_unsafe.go +++ b/pkg/sync/runtime_unsafe.go @@ -3,8 +3,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build go1.13 -// +build !go1.18 +//go:build go1.13 && !go1.18 +// +build go1.13,!go1.18 // Check go:linkname function signatures, type definitions, and constants when // updating Go version. diff --git a/pkg/sync/rwmutex_test.go b/pkg/sync/rwmutex_test.go index 5ca96d12b..56a88e712 100644 --- a/pkg/sync/rwmutex_test.go +++ b/pkg/sync/rwmutex_test.go @@ -172,7 +172,7 @@ func TestRWTryLockUnlock(t *testing.T) { if !rwm.TryLock() { t.Fatal("failed to aquire lock") } - rwm.Unlock() + rwm.Unlock() // +checklocksforce if !rwm.TryLock() { t.Fatal("failed to aquire lock after unlock") } diff --git a/pkg/sync/rwmutex_unsafe.go b/pkg/sync/rwmutex_unsafe.go index 892d3e641..7829b06db 100644 --- a/pkg/sync/rwmutex_unsafe.go +++ b/pkg/sync/rwmutex_unsafe.go @@ -37,6 +37,7 @@ const rwmutexMaxReaders = 1 << 30 // TryRLock locks rw for reading. It returns true if it succeeds and false // otherwise. It does not block. +// +checklocksignore func (rw *CrossGoroutineRWMutex) TryRLock() bool { if RaceEnabled { RaceDisable() @@ -65,6 +66,7 @@ func (rw *CrossGoroutineRWMutex) TryRLock() bool { // It should not be used for recursive read locking; a blocked Lock call // excludes new readers from acquiring the lock. See the documentation on the // RWMutex type. +// +checklocksignore func (rw *CrossGoroutineRWMutex) RLock() { if RaceEnabled { RaceDisable() @@ -83,6 +85,7 @@ func (rw *CrossGoroutineRWMutex) RLock() { // // Preconditions: // * rw is locked for reading. +// +checklocksignore func (rw *CrossGoroutineRWMutex) RUnlock() { if RaceEnabled { RaceReleaseMerge(unsafe.Pointer(&rw.writerSem)) @@ -134,6 +137,7 @@ func (rw *CrossGoroutineRWMutex) TryLock() bool { // Lock locks rw for writing. If the lock is already locked for reading or // writing, Lock blocks until the lock is available. +// +checklocksignore func (rw *CrossGoroutineRWMutex) Lock() { if RaceEnabled { RaceDisable() @@ -228,6 +232,7 @@ type RWMutex struct { // TryRLock locks rw for reading. It returns true if it succeeds and false // otherwise. It does not block. +// +checklocksignore func (rw *RWMutex) TryRLock() bool { // Note lock first to enforce proper locking even if unsuccessful. noteLock(unsafe.Pointer(rw)) @@ -243,6 +248,7 @@ func (rw *RWMutex) TryRLock() bool { // It should not be used for recursive read locking; a blocked Lock call // excludes new readers from acquiring the lock. See the documentation on the // RWMutex type. +// +checklocksignore func (rw *RWMutex) RLock() { noteLock(unsafe.Pointer(rw)) rw.m.RLock() @@ -261,6 +267,7 @@ func (rw *RWMutex) RUnlock() { // TryLock locks rw for writing. It returns true if it succeeds and false // otherwise. It does not block. +// +checklocksignore func (rw *RWMutex) TryLock() bool { // Note lock first to enforce proper locking even if unsuccessful. noteLock(unsafe.Pointer(rw)) @@ -273,6 +280,7 @@ func (rw *RWMutex) TryLock() bool { // Lock locks rw for writing. If the lock is already locked for reading or // writing, Lock blocks until the lock is available. +// +checklocksignore func (rw *RWMutex) Lock() { noteLock(unsafe.Pointer(rw)) rw.m.Lock() diff --git a/pkg/syserr/BUILD b/pkg/syserr/BUILD index 7b3160309..ceee494fc 100644 --- a/pkg/syserr/BUILD +++ b/pkg/syserr/BUILD @@ -12,6 +12,8 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/abi/linux/errno", + "//pkg/errors", + "//pkg/errors/linuxerr", "//pkg/syserror", "//pkg/tcpip", "@org_golang_x_sys//unix:go_default_library", diff --git a/pkg/syserr/host_linux.go b/pkg/syserr/host_linux.go index c8c10f48b..fb92738af 100644 --- a/pkg/syserr/host_linux.go +++ b/pkg/syserr/host_linux.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package syserr diff --git a/pkg/syserr/syserr.go b/pkg/syserr/syserr.go index fb77ac8bd..558240008 100644 --- a/pkg/syserr/syserr.go +++ b/pkg/syserr/syserr.go @@ -22,6 +22,8 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux/errno" + "gvisor.dev/gvisor/pkg/errors" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/syserror" ) @@ -50,11 +52,11 @@ func New(message string, linuxTranslation errno.Errno) *Error { } e := error(unix.Errno(err.errno)) - // syserror.ErrWouldBlock gets translated to syserror.EWOULDBLOCK and + // syserror.ErrWouldBlock gets translated to linuxerr.EWOULDBLOCK and // enables proper blocking semantics. This should temporary address the // class of blocking bugs that keep popping up with the current state of // the error space. - if e == syserror.EWOULDBLOCK { + if err.errno == linuxerr.EWOULDBLOCK.Errno() { e = syserror.ErrWouldBlock } linuxBackwardsTranslations[err.errno] = linuxBackwardsTranslation{err: e, ok: true} @@ -280,6 +282,11 @@ func FromError(err error) *Error { if errno, ok := err.(unix.Errno); ok { return FromHost(errno) } + + if linuxErr, ok := err.(*errors.Error); ok { + return FromHost(unix.Errno(linuxErr.Errno())) + } + if errno, ok := syserror.TranslateError(err); ok { return FromHost(errno) } diff --git a/pkg/syserror/syserror.go b/pkg/syserror/syserror.go index 56b621357..b24edb364 100644 --- a/pkg/syserror/syserror.go +++ b/pkg/syserror/syserror.go @@ -26,63 +26,16 @@ import ( // The following variables have the same meaning as their syscall equivalent. var ( - E2BIG = error(unix.E2BIG) - EACCES = error(unix.EACCES) - EADDRINUSE = error(unix.EADDRINUSE) - EAGAIN = error(unix.EAGAIN) - EBADF = error(unix.EBADF) - EBADFD = error(unix.EBADFD) - EBUSY = error(unix.EBUSY) - ECHILD = error(unix.ECHILD) - ECONNABORTED = error(unix.ECONNABORTED) - ECONNREFUSED = error(unix.ECONNREFUSED) - ECONNRESET = error(unix.ECONNRESET) - EDEADLK = error(unix.EDEADLK) - EEXIST = error(unix.EEXIST) - EFAULT = error(unix.EFAULT) - EFBIG = error(unix.EFBIG) - EIDRM = error(unix.EIDRM) - EINTR = error(unix.EINTR) - EINVAL = error(unix.EINVAL) - EIO = error(unix.EIO) - EISDIR = error(unix.EISDIR) - ELIBBAD = error(unix.ELIBBAD) - ELOOP = error(unix.ELOOP) - EMFILE = error(unix.EMFILE) - EMLINK = error(unix.EMLINK) - EMSGSIZE = error(unix.EMSGSIZE) - ENAMETOOLONG = error(unix.ENAMETOOLONG) - ENOATTR = ENODATA - ENOBUFS = error(unix.ENOBUFS) - ENODATA = error(unix.ENODATA) - ENODEV = error(unix.ENODEV) - ENOENT = error(unix.ENOENT) - ENOEXEC = error(unix.ENOEXEC) - ENOLCK = error(unix.ENOLCK) - ENOLINK = error(unix.ENOLINK) - ENOMEM = error(unix.ENOMEM) - ENOSPC = error(unix.ENOSPC) - ENOSYS = error(unix.ENOSYS) - ENOTCONN = error(unix.ENOTCONN) - ENOTDIR = error(unix.ENOTDIR) - ENOTEMPTY = error(unix.ENOTEMPTY) - ENOTSOCK = error(unix.ENOTSOCK) - ENOTSUP = error(unix.ENOTSUP) - ENOTTY = error(unix.ENOTTY) - ENXIO = error(unix.ENXIO) - EOPNOTSUPP = error(unix.EOPNOTSUPP) - EOVERFLOW = error(unix.EOVERFLOW) - EPERM = error(unix.EPERM) - EPIPE = error(unix.EPIPE) - ERANGE = error(unix.ERANGE) - EREMOTE = error(unix.EREMOTE) - EROFS = error(unix.EROFS) - ESPIPE = error(unix.ESPIPE) - ESRCH = error(unix.ESRCH) - ETIMEDOUT = error(unix.ETIMEDOUT) - EUSERS = error(unix.EUSERS) - EWOULDBLOCK = error(unix.EWOULDBLOCK) - EXDEV = error(unix.EXDEV) + EIDRM = error(unix.EIDRM) + EINTR = error(unix.EINTR) + EIO = error(unix.EIO) + EISDIR = error(unix.EISDIR) + ENOENT = error(unix.ENOENT) + ENOEXEC = error(unix.ENOEXEC) + ENOMEM = error(unix.ENOMEM) + ENOTSOCK = error(unix.ENOTSOCK) + ENOSPC = error(unix.ENOSPC) + ENOSYS = error(unix.ENOSYS) ) var ( diff --git a/pkg/tcpip/BUILD b/pkg/tcpip/BUILD index ed4d7e958..f00cfd0f5 100644 --- a/pkg/tcpip/BUILD +++ b/pkg/tcpip/BUILD @@ -46,7 +46,6 @@ deps_test( "//pkg/gohacks", "//pkg/goid", "//pkg/ilist", - "//pkg/iovec", "//pkg/linewriter", "//pkg/log", "//pkg/rand", diff --git a/pkg/tcpip/header/checksum.go b/pkg/tcpip/header/checksum.go index 6aa9acfa8..e2c85e220 100644 --- a/pkg/tcpip/header/checksum.go +++ b/pkg/tcpip/header/checksum.go @@ -18,6 +18,7 @@ package header import ( "encoding/binary" + "fmt" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -234,3 +235,64 @@ func PseudoHeaderChecksum(protocol tcpip.TransportProtocolNumber, srcAddr tcpip. return Checksum([]byte{0, uint8(protocol)}, xsum) } + +// checksumUpdate2ByteAlignedUint16 updates a uint16 value in a calculated +// checksum. +// +// The value MUST begin at a 2-byte boundary in the original buffer. +func checksumUpdate2ByteAlignedUint16(xsum, old, new uint16) uint16 { + // As per RFC 1071 page 4, + // (4) Incremental Update + // + // ... + // + // To update the checksum, simply add the differences of the + // sixteen bit integers that have been changed. To see why this + // works, observe that every 16-bit integer has an additive inverse + // and that addition is associative. From this it follows that + // given the original value m, the new value m', and the old + // checksum C, the new checksum C' is: + // + // C' = C + (-m) + m' = C + (m' - m) + return ChecksumCombine(xsum, ChecksumCombine(new, ^old)) +} + +// checksumUpdate2ByteAlignedAddress updates an address in a calculated +// checksum. +// +// The addresses must have the same length and must contain an even number +// of bytes. The address MUST begin at a 2-byte boundary in the original buffer. +func checksumUpdate2ByteAlignedAddress(xsum uint16, old, new tcpip.Address) uint16 { + const uint16Bytes = 2 + + if len(old) != len(new) { + panic(fmt.Sprintf("buffer lengths are different; old = %d, new = %d", len(old), len(new))) + } + + if len(old)%uint16Bytes != 0 { + panic(fmt.Sprintf("buffer has an odd number of bytes; got = %d", len(old))) + } + + // As per RFC 1071 page 4, + // (4) Incremental Update + // + // ... + // + // To update the checksum, simply add the differences of the + // sixteen bit integers that have been changed. To see why this + // works, observe that every 16-bit integer has an additive inverse + // and that addition is associative. From this it follows that + // given the original value m, the new value m', and the old + // checksum C, the new checksum C' is: + // + // C' = C + (-m) + m' = C + (m' - m) + for len(old) != 0 { + // Convert the 2 byte sequences to uint16 values then apply the increment + // update. + xsum = checksumUpdate2ByteAlignedUint16(xsum, (uint16(old[0])<<8)+uint16(old[1]), (uint16(new[0])<<8)+uint16(new[1])) + old = old[uint16Bytes:] + new = new[uint16Bytes:] + } + + return xsum +} diff --git a/pkg/tcpip/header/checksum_test.go b/pkg/tcpip/header/checksum_test.go index d267dabd0..3445511f4 100644 --- a/pkg/tcpip/header/checksum_test.go +++ b/pkg/tcpip/header/checksum_test.go @@ -23,6 +23,7 @@ import ( "sync" "testing" + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" ) @@ -256,3 +257,205 @@ func TestICMPv6Checksum(t *testing.T) { }) }, want, fmt.Sprintf("header: {% x} data {% x}", h, vv.ToView())) } + +func randomAddress(size int) tcpip.Address { + s := make([]byte, size) + for i := 0; i < size; i++ { + s[i] = byte(rand.Uint32()) + } + return tcpip.Address(s) +} + +func TestChecksummableNetworkUpdateAddress(t *testing.T) { + tests := []struct { + name string + update func(header.IPv4, tcpip.Address) + }{ + { + name: "SetSourceAddressWithChecksumUpdate", + update: header.IPv4.SetSourceAddressWithChecksumUpdate, + }, + { + name: "SetDestinationAddressWithChecksumUpdate", + update: header.IPv4.SetDestinationAddressWithChecksumUpdate, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + for i := 0; i < 1000; i++ { + var origBytes [header.IPv4MinimumSize]byte + header.IPv4(origBytes[:]).Encode(&header.IPv4Fields{ + TOS: 1, + TotalLength: header.IPv4MinimumSize, + ID: 2, + Flags: 3, + FragmentOffset: 4, + TTL: 5, + Protocol: 6, + Checksum: 0, + SrcAddr: randomAddress(header.IPv4AddressSize), + DstAddr: randomAddress(header.IPv4AddressSize), + }) + + addr := randomAddress(header.IPv4AddressSize) + + bytesCopy := origBytes + h := header.IPv4(bytesCopy[:]) + origXSum := h.CalculateChecksum() + h.SetChecksum(^origXSum) + + test.update(h, addr) + got := ^h.Checksum() + h.SetChecksum(0) + want := h.CalculateChecksum() + if got != want { + t.Errorf("got h.Checksum() = 0x%x, want = 0x%x; originalBytes = 0x%x, new addr = %s", got, want, origBytes, addr) + } + } + }) + } +} + +func TestChecksummableTransportUpdatePort(t *testing.T) { + // The fields in the pseudo header is not tested here so we just use 0. + const pseudoHeaderXSum = 0 + + tests := []struct { + name string + transportHdr func(_, _ uint16) (header.ChecksummableTransport, func(uint16) uint16) + proto tcpip.TransportProtocolNumber + }{ + { + name: "TCP", + transportHdr: func(src, dst uint16) (header.ChecksummableTransport, func(uint16) uint16) { + h := header.TCP(make([]byte, header.TCPMinimumSize)) + h.Encode(&header.TCPFields{ + SrcPort: src, + DstPort: dst, + SeqNum: 1, + AckNum: 2, + DataOffset: header.TCPMinimumSize, + Flags: 3, + WindowSize: 4, + Checksum: 0, + UrgentPointer: 5, + }) + h.SetChecksum(^h.CalculateChecksum(pseudoHeaderXSum)) + return h, h.CalculateChecksum + }, + proto: header.TCPProtocolNumber, + }, + { + name: "UDP", + transportHdr: func(src, dst uint16) (header.ChecksummableTransport, func(uint16) uint16) { + h := header.UDP(make([]byte, header.UDPMinimumSize)) + h.Encode(&header.UDPFields{ + SrcPort: src, + DstPort: dst, + Length: 0, + Checksum: 0, + }) + h.SetChecksum(^h.CalculateChecksum(pseudoHeaderXSum)) + return h, h.CalculateChecksum + }, + proto: header.UDPProtocolNumber, + }, + } + + for i := 0; i < 1000; i++ { + origSrcPort := uint16(rand.Uint32()) + origDstPort := uint16(rand.Uint32()) + newPort := uint16(rand.Uint32()) + + t.Run(fmt.Sprintf("OrigSrcPort=%d,OrigDstPort=%d,NewPort=%d", origSrcPort, origDstPort, newPort), func(*testing.T) { + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + for _, subTest := range []struct { + name string + update func(header.ChecksummableTransport) + }{ + { + name: "Source port", + update: func(h header.ChecksummableTransport) { h.SetSourcePortWithChecksumUpdate(newPort) }, + }, + { + name: "Destination port", + update: func(h header.ChecksummableTransport) { h.SetDestinationPortWithChecksumUpdate(newPort) }, + }, + } { + t.Run(subTest.name, func(t *testing.T) { + h, calcXSum := test.transportHdr(origSrcPort, origDstPort) + subTest.update(h) + // TCP and UDP hold the 1s complement of the fully calculated + // checksum. + got := ^h.Checksum() + h.SetChecksum(0) + + if want := calcXSum(pseudoHeaderXSum); got != want { + h, _ := test.transportHdr(origSrcPort, origDstPort) + t.Errorf("got Checksum() = 0x%x, want = 0x%x; originalBytes = %#v, new port = %d", got, want, h, newPort) + } + }) + } + }) + } + }) + } +} + +func TestChecksummableTransportUpdatePseudoHeaderAddress(t *testing.T) { + const addressSize = 6 + + tests := []struct { + name string + transportHdr func() header.ChecksummableTransport + proto tcpip.TransportProtocolNumber + }{ + { + name: "TCP", + transportHdr: func() header.ChecksummableTransport { return header.TCP(make([]byte, header.TCPMinimumSize)) }, + proto: header.TCPProtocolNumber, + }, + { + name: "UDP", + transportHdr: func() header.ChecksummableTransport { return header.UDP(make([]byte, header.UDPMinimumSize)) }, + proto: header.UDPProtocolNumber, + }, + } + + for i := 0; i < 1000; i++ { + permanent := randomAddress(addressSize) + old := randomAddress(addressSize) + new := randomAddress(addressSize) + + t.Run(fmt.Sprintf("Permanent=%q,Old=%q,New=%q", permanent, old, new), func(t *testing.T) { + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + for _, fullChecksum := range []bool{true, false} { + t.Run(fmt.Sprintf("FullChecksum=%t", fullChecksum), func(t *testing.T) { + initialXSum := header.PseudoHeaderChecksum(test.proto, permanent, old, 0) + if fullChecksum { + // TCP and UDP hold the 1s complement of the fully calculated + // checksum. + initialXSum = ^initialXSum + } + + h := test.transportHdr() + h.SetChecksum(initialXSum) + h.UpdateChecksumPseudoHeaderAddress(old, new, fullChecksum) + + got := h.Checksum() + if fullChecksum { + got = ^got + } + if want := header.PseudoHeaderChecksum(test.proto, permanent, new, 0); got != want { + t.Errorf("got Checksum() = 0x%x, want = 0x%x; h = %#v", got, want, h) + } + }) + } + }) + } + }) + } +} diff --git a/pkg/tcpip/header/interfaces.go b/pkg/tcpip/header/interfaces.go index 861cbbb70..3a41adfc4 100644 --- a/pkg/tcpip/header/interfaces.go +++ b/pkg/tcpip/header/interfaces.go @@ -53,6 +53,31 @@ type Transport interface { Payload() []byte } +// ChecksummableTransport is a Transport that supports checksumming. +type ChecksummableTransport interface { + Transport + + // SetSourcePortWithChecksumUpdate sets the source port and updates + // the checksum. + // + // The receiver's checksum must be a fully calculated checksum. + SetSourcePortWithChecksumUpdate(port uint16) + + // SetDestinationPortWithChecksumUpdate sets the destination port and updates + // the checksum. + // + // The receiver's checksum must be a fully calculated checksum. + SetDestinationPortWithChecksumUpdate(port uint16) + + // UpdateChecksumPseudoHeaderAddress updates the checksum to reflect an + // updated address in the pseudo header. + // + // If fullChecksum is true, the receiver's checksum field is assumed to hold a + // fully calculated checksum. Otherwise, it is assumed to hold a partially + // calculated checksum which only reflects the pseudo header. + UpdateChecksumPseudoHeaderAddress(old, new tcpip.Address, fullChecksum bool) +} + // Network offers generic methods to query and/or update the fields of the // header of a network protocol buffer. type Network interface { @@ -90,3 +115,16 @@ type Network interface { // SetTOS sets the values of the "type of service" and "flow label" fields. SetTOS(t uint8, l uint32) } + +// ChecksummableNetwork is a Network that supports checksumming. +type ChecksummableNetwork interface { + Network + + // SetSourceAddressAndChecksum sets the source address and updates the + // checksum to reflect the new address. + SetSourceAddressWithChecksumUpdate(tcpip.Address) + + // SetDestinationAddressAndChecksum sets the destination address and + // updates the checksum to reflect the new address. + SetDestinationAddressWithChecksumUpdate(tcpip.Address) +} diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go index e9abbb709..dcc549c7b 100644 --- a/pkg/tcpip/header/ipv4.go +++ b/pkg/tcpip/header/ipv4.go @@ -305,6 +305,18 @@ func (b IPv4) DestinationAddress() tcpip.Address { return tcpip.Address(b[dstAddr : dstAddr+IPv4AddressSize]) } +// SetSourceAddressWithChecksumUpdate implements ChecksummableNetwork. +func (b IPv4) SetSourceAddressWithChecksumUpdate(new tcpip.Address) { + b.SetChecksum(^checksumUpdate2ByteAlignedAddress(^b.Checksum(), b.SourceAddress(), new)) + b.SetSourceAddress(new) +} + +// SetDestinationAddressWithChecksumUpdate implements ChecksummableNetwork. +func (b IPv4) SetDestinationAddressWithChecksumUpdate(new tcpip.Address) { + b.SetChecksum(^checksumUpdate2ByteAlignedAddress(^b.Checksum(), b.DestinationAddress(), new)) + b.SetDestinationAddress(new) +} + // padIPv4OptionsLength returns the total length for IPv4 options of length l // after applying padding according to RFC 791: // The internet header padding is used to ensure that the internet diff --git a/pkg/tcpip/header/ndp_options.go b/pkg/tcpip/header/ndp_options.go index b1f39e6e6..a647ea968 100644 --- a/pkg/tcpip/header/ndp_options.go +++ b/pkg/tcpip/header/ndp_options.go @@ -233,6 +233,17 @@ func (i *NDPOptionIterator) Next() (NDPOption, bool, error) { case ndpNonceOptionType: return NDPNonceOption(body), false, nil + case ndpRouteInformationType: + if numBodyBytes > ndpRouteInformationMaxLength { + return nil, true, fmt.Errorf("got %d bytes for NDP Route Information option's body, expected at max %d bytes: %w", numBodyBytes, ndpRouteInformationMaxLength, ErrNDPOptMalformedBody) + } + opt := NDPRouteInformation(body) + if err := opt.hasError(); err != nil { + return nil, true, err + } + + return opt, false, nil + case ndpPrefixInformationType: // Make sure the length of a Prefix Information option // body is ndpPrefixInformationLength, as per RFC 4861 @@ -930,3 +941,137 @@ func isUpperLetter(b byte) bool { func isDigit(b byte) bool { return b >= '0' && b <= '9' } + +// As per RFC 4191 section 2.3, +// +// 2.3. Route Information Option +// +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | Type | Length | Prefix Length |Resvd|Prf|Resvd| +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | Route Lifetime | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | Prefix (Variable Length) | +// . . +// . . +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// +// Fields: +// +// Type 24 +// +// +// Length 8-bit unsigned integer. The length of the option +// (including the Type and Length fields) in units of 8 +// octets. The Length field is 1, 2, or 3 depending on the +// Prefix Length. If Prefix Length is greater than 64, then +// Length must be 3. If Prefix Length is greater than 0, +// then Length must be 2 or 3. If Prefix Length is zero, +// then Length must be 1, 2, or 3. +const ( + ndpRouteInformationType = ndpOptionIdentifier(24) + ndpRouteInformationMaxLength = 22 + + ndpRouteInformationPrefixLengthIdx = 0 + ndpRouteInformationFlagsIdx = 1 + ndpRouteInformationPrfShift = 3 + ndpRouteInformationPrfMask = 3 << ndpRouteInformationPrfShift + ndpRouteInformationRouteLifetimeIdx = 2 + ndpRouteInformationRoutePrefixIdx = 6 +) + +// NDPRouteInformation is the NDP Router Information option, as defined by +// RFC 4191 section 2.3. +type NDPRouteInformation []byte + +func (NDPRouteInformation) kind() ndpOptionIdentifier { + return ndpRouteInformationType +} + +func (o NDPRouteInformation) length() int { + return len(o) +} + +func (o NDPRouteInformation) serializeInto(b []byte) int { + return copy(b, o) +} + +// String implements fmt.Stringer. +func (o NDPRouteInformation) String() string { + return fmt.Sprintf("%T", o) +} + +// PrefixLength returns the length of the prefix. +func (o NDPRouteInformation) PrefixLength() uint8 { + return o[ndpRouteInformationPrefixLengthIdx] +} + +// RoutePreference returns the preference of the route over other routes to the +// same destination but through a different router. +func (o NDPRouteInformation) RoutePreference() NDPRoutePreference { + return NDPRoutePreference((o[ndpRouteInformationFlagsIdx] & ndpRouteInformationPrfMask) >> ndpRouteInformationPrfShift) +} + +// RouteLifetime returns the lifetime of the route. +// +// Note, a value of 0 implies the route is now invalid and a value of +// infinity/forever is represented by NDPInfiniteLifetime. +func (o NDPRouteInformation) RouteLifetime() time.Duration { + return time.Second * time.Duration(binary.BigEndian.Uint32(o[ndpRouteInformationRouteLifetimeIdx:])) +} + +// Prefix returns the prefix of the destination subnet this route is for. +func (o NDPRouteInformation) Prefix() (tcpip.Subnet, error) { + prefixLength := int(o.PrefixLength()) + if max := IPv6AddressSize * 8; prefixLength > max { + return tcpip.Subnet{}, fmt.Errorf("got prefix length = %d, want <= %d", prefixLength, max) + } + + prefix := o[ndpRouteInformationRoutePrefixIdx:] + var addrBytes [IPv6AddressSize]byte + if n := copy(addrBytes[:], prefix); n != len(prefix) { + panic(fmt.Sprintf("got copy(addrBytes, prefix) = %d, want = %d", n, len(prefix))) + } + + return tcpip.AddressWithPrefix{ + Address: tcpip.Address(addrBytes[:]), + PrefixLen: prefixLength, + }.Subnet(), nil +} + +func (o NDPRouteInformation) hasError() error { + l := len(o) + if l < ndpRouteInformationRoutePrefixIdx { + return fmt.Errorf("%T too small, got = %d bytes: %w", o, l, ErrNDPOptMalformedBody) + } + + prefixLength := int(o.PrefixLength()) + if max := IPv6AddressSize * 8; prefixLength > max { + return fmt.Errorf("got prefix length = %d, want <= %d: %w", prefixLength, max, ErrNDPOptMalformedBody) + } + + // Length 8-bit unsigned integer. The length of the option + // (including the Type and Length fields) in units of 8 + // octets. The Length field is 1, 2, or 3 depending on the + // Prefix Length. If Prefix Length is greater than 64, then + // Length must be 3. If Prefix Length is greater than 0, + // then Length must be 2 or 3. If Prefix Length is zero, + // then Length must be 1, 2, or 3. + l += 2 // Add 2 bytes for the type and length bytes. + lengthField := l / lengthByteUnits + if prefixLength > 64 { + if lengthField != 3 { + return fmt.Errorf("Length field must be 3 when Prefix Length (%d) is > 64 (got = %d): %w", prefixLength, lengthField, ErrNDPOptMalformedBody) + } + } else if prefixLength > 0 { + if lengthField != 2 && lengthField != 3 { + return fmt.Errorf("Length field must be 2 or 3 when Prefix Length (%d) is between 0 and 64 (got = %d): %w", prefixLength, lengthField, ErrNDPOptMalformedBody) + } + } else if lengthField == 0 || lengthField > 3 { + return fmt.Errorf("Length field must be 1, 2, or 3 when Prefix Length is zero (got = %d): %w", lengthField, ErrNDPOptMalformedBody) + } + + return nil +} diff --git a/pkg/tcpip/header/ndp_router_advert.go b/pkg/tcpip/header/ndp_router_advert.go index 7e2f0c797..7d6efa083 100644 --- a/pkg/tcpip/header/ndp_router_advert.go +++ b/pkg/tcpip/header/ndp_router_advert.go @@ -16,9 +16,12 @@ package header import ( "encoding/binary" + "fmt" "time" ) +var _ fmt.Stringer = NDPRoutePreference(0) + // NDPRoutePreference is the preference values for default routers or // more-specific routes. // @@ -64,6 +67,22 @@ const ( ReservedRoutePreference = 0b10 ) +// String implements fmt.Stringer. +func (p NDPRoutePreference) String() string { + switch p { + case HighRoutePreference: + return "HighRoutePreference" + case MediumRoutePreference: + return "MediumRoutePreference" + case LowRoutePreference: + return "LowRoutePreference" + case ReservedRoutePreference: + return "ReservedRoutePreference" + default: + return fmt.Sprintf("NDPRoutePreference(%d)", p) + } +} + // NDPRouterAdvert is an NDP Router Advertisement message. It will only contain // the body of an ICMPv6 packet. // diff --git a/pkg/tcpip/header/ndp_test.go b/pkg/tcpip/header/ndp_test.go index 8fd1f7d13..2a897e938 100644 --- a/pkg/tcpip/header/ndp_test.go +++ b/pkg/tcpip/header/ndp_test.go @@ -21,6 +21,7 @@ import ( "fmt" "io" "regexp" + "strings" "testing" "time" @@ -58,6 +59,224 @@ func TestNDPNeighborSolicit(t *testing.T) { } } +func TestNDPRouteInformationOption(t *testing.T) { + tests := []struct { + name string + + length uint8 + prefixLength uint8 + prf NDPRoutePreference + lifetimeS uint32 + prefixBytes []byte + expectedPrefix tcpip.Subnet + + expectedErr error + }{ + { + name: "Length=1 with Prefix Length = 0", + length: 1, + prefixLength: 0, + prf: MediumRoutePreference, + lifetimeS: 1, + prefixBytes: nil, + expectedPrefix: IPv6EmptySubnet, + }, + { + name: "Length=1 but Prefix Length > 0", + length: 1, + prefixLength: 1, + prf: MediumRoutePreference, + lifetimeS: 1, + prefixBytes: nil, + expectedErr: ErrNDPOptMalformedBody, + }, + { + name: "Length=2 with Prefix Length = 0", + length: 2, + prefixLength: 0, + prf: MediumRoutePreference, + lifetimeS: 1, + prefixBytes: nil, + expectedPrefix: IPv6EmptySubnet, + }, + { + name: "Length=2 with Prefix Length in [1, 64] (1)", + length: 2, + prefixLength: 1, + prf: LowRoutePreference, + lifetimeS: 1, + prefixBytes: nil, + expectedPrefix: tcpip.AddressWithPrefix{ + Address: tcpip.Address(strings.Repeat("\x00", IPv6AddressSize)), + PrefixLen: 1, + }.Subnet(), + }, + { + name: "Length=2 with Prefix Length in [1, 64] (64)", + length: 2, + prefixLength: 64, + prf: HighRoutePreference, + lifetimeS: 1, + prefixBytes: nil, + expectedPrefix: tcpip.AddressWithPrefix{ + Address: tcpip.Address(strings.Repeat("\x00", IPv6AddressSize)), + PrefixLen: 64, + }.Subnet(), + }, + { + name: "Length=2 with Prefix Length > 64", + length: 2, + prefixLength: 65, + prf: HighRoutePreference, + lifetimeS: 1, + prefixBytes: nil, + expectedErr: ErrNDPOptMalformedBody, + }, + { + name: "Length=3 with Prefix Length = 0", + length: 3, + prefixLength: 0, + prf: MediumRoutePreference, + lifetimeS: 1, + prefixBytes: nil, + expectedPrefix: IPv6EmptySubnet, + }, + { + name: "Length=3 with Prefix Length in [1, 64] (1)", + length: 3, + prefixLength: 1, + prf: LowRoutePreference, + lifetimeS: 1, + prefixBytes: nil, + expectedPrefix: tcpip.AddressWithPrefix{ + Address: tcpip.Address(strings.Repeat("\x00", IPv6AddressSize)), + PrefixLen: 1, + }.Subnet(), + }, + { + name: "Length=3 with Prefix Length in [1, 64] (64)", + length: 3, + prefixLength: 64, + prf: HighRoutePreference, + lifetimeS: 1, + prefixBytes: nil, + expectedPrefix: tcpip.AddressWithPrefix{ + Address: tcpip.Address(strings.Repeat("\x00", IPv6AddressSize)), + PrefixLen: 64, + }.Subnet(), + }, + { + name: "Length=3 with Prefix Length in [65, 128] (65)", + length: 3, + prefixLength: 65, + prf: HighRoutePreference, + lifetimeS: 1, + prefixBytes: nil, + expectedPrefix: tcpip.AddressWithPrefix{ + Address: tcpip.Address(strings.Repeat("\x00", IPv6AddressSize)), + PrefixLen: 65, + }.Subnet(), + }, + { + name: "Length=3 with Prefix Length in [65, 128] (128)", + length: 3, + prefixLength: 128, + prf: HighRoutePreference, + lifetimeS: 1, + prefixBytes: nil, + expectedPrefix: tcpip.AddressWithPrefix{ + Address: tcpip.Address(strings.Repeat("\x00", IPv6AddressSize)), + PrefixLen: 128, + }.Subnet(), + }, + { + name: "Length=3 with (invalid) Prefix Length > 128", + length: 3, + prefixLength: 129, + prf: HighRoutePreference, + lifetimeS: 1, + prefixBytes: nil, + expectedErr: ErrNDPOptMalformedBody, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + expectedRouteInformationBytes := [...]byte{ + // Type, Length + 24, test.length, + + // Prefix Length, Prf + uint8(test.prefixLength), uint8(test.prf) << 3, + + // Route Lifetime + 0, 0, 0, 0, + + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + } + binary.BigEndian.PutUint32(expectedRouteInformationBytes[4:], test.lifetimeS) + _ = copy(expectedRouteInformationBytes[8:], test.prefixBytes) + + opts := NDPOptions(expectedRouteInformationBytes[:test.length*lengthByteUnits]) + it, err := opts.Iter(false) + if err != nil { + t.Fatalf("got Iter(false) = (_, %s), want = (_, nil)", err) + } + opt, done, err := it.Next() + if !errors.Is(err, test.expectedErr) { + t.Fatalf("got Next() = (_, _, %s), want = (_, _, %s)", err, test.expectedErr) + } + if want := test.expectedErr != nil; done != want { + t.Fatalf("got Next() = (_, %t, _), want = (_, %t, _)", done, want) + } + if test.expectedErr != nil { + return + } + + if got := opt.kind(); got != ndpRouteInformationType { + t.Errorf("got kind() = %d, want = %d", got, ndpRouteInformationType) + } + + ri, ok := opt.(NDPRouteInformation) + if !ok { + t.Fatalf("got opt = %T, want = NDPRouteInformation", opt) + } + + if got := ri.PrefixLength(); got != test.prefixLength { + t.Errorf("got PrefixLength() = %d, want = %d", got, test.prefixLength) + } + if got := ri.RoutePreference(); got != test.prf { + t.Errorf("got RoutePreference() = %d, want = %d", got, test.prf) + } + if got, want := ri.RouteLifetime(), time.Duration(test.lifetimeS)*time.Second; got != want { + t.Errorf("got RouteLifetime() = %s, want = %s", got, want) + } + if got, err := ri.Prefix(); err != nil { + t.Errorf("Prefix(): %s", err) + } else if got != test.expectedPrefix { + t.Errorf("got Prefix() = %s, want = %s", got, test.expectedPrefix) + } + + // Iterator should not return anything else. + { + next, done, err := it.Next() + if err != nil { + t.Errorf("got Next() = (_, _, %s), want = (_, _, nil)", err) + } + if !done { + t.Error("got Next() = (_, false, _), want = (_, true, _)") + } + if next != nil { + t.Errorf("got Next() = (%x, _, _), want = (nil, _, _)", next) + } + } + }) + } +} + // TestNDPNeighborAdvert tests the functions of NDPNeighborAdvert. func TestNDPNeighborAdvert(t *testing.T) { b := []byte{ @@ -1498,3 +1717,32 @@ func TestNDPOptionsIter(t *testing.T) { t.Errorf("got Next = (%x, _, _), want = (nil, _, _)", next) } } + +func TestNDPRoutePreferenceStringer(t *testing.T) { + p := NDPRoutePreference(0) + for { + var wantStr string + switch p { + case 0b01: + wantStr = "HighRoutePreference" + case 0b00: + wantStr = "MediumRoutePreference" + case 0b11: + wantStr = "LowRoutePreference" + case 0b10: + wantStr = "ReservedRoutePreference" + default: + wantStr = fmt.Sprintf("NDPRoutePreference(%d)", p) + } + + if gotStr := p.String(); gotStr != wantStr { + t.Errorf("got NDPRoutePreference(%d).String() = %s, want = %s", p, gotStr, wantStr) + } + + p++ + if p == 0 { + // Overflowed, we hit all values. + break + } + } +} diff --git a/pkg/tcpip/header/tcp.go b/pkg/tcpip/header/tcp.go index 8dabe3354..a75e51a28 100644 --- a/pkg/tcpip/header/tcp.go +++ b/pkg/tcpip/header/tcp.go @@ -390,6 +390,35 @@ func (b TCP) EncodePartial(partialChecksum, length uint16, seqnum, acknum uint32 b.SetChecksum(^checksum) } +// SetSourcePortWithChecksumUpdate implements ChecksummableTransport. +func (b TCP) SetSourcePortWithChecksumUpdate(new uint16) { + old := b.SourcePort() + b.SetSourcePort(new) + b.SetChecksum(^checksumUpdate2ByteAlignedUint16(^b.Checksum(), old, new)) +} + +// SetDestinationPortWithChecksumUpdate implements ChecksummableTransport. +func (b TCP) SetDestinationPortWithChecksumUpdate(new uint16) { + old := b.DestinationPort() + b.SetDestinationPort(new) + b.SetChecksum(^checksumUpdate2ByteAlignedUint16(^b.Checksum(), old, new)) +} + +// UpdateChecksumPseudoHeaderAddress implements ChecksummableTransport. +func (b TCP) UpdateChecksumPseudoHeaderAddress(old, new tcpip.Address, fullChecksum bool) { + xsum := b.Checksum() + if fullChecksum { + xsum = ^xsum + } + + xsum = checksumUpdate2ByteAlignedAddress(xsum, old, new) + if fullChecksum { + xsum = ^xsum + } + + b.SetChecksum(xsum) +} + // ParseSynOptions parses the options received in a SYN segment and returns the // relevant ones. opts should point to the option part of the TCP header. func ParseSynOptions(opts []byte, isAck bool) TCPSynOptions { diff --git a/pkg/tcpip/header/udp.go b/pkg/tcpip/header/udp.go index ae9d167ff..f69d53314 100644 --- a/pkg/tcpip/header/udp.go +++ b/pkg/tcpip/header/udp.go @@ -130,3 +130,32 @@ func (b UDP) Encode(u *UDPFields) { binary.BigEndian.PutUint16(b[udpLength:], u.Length) binary.BigEndian.PutUint16(b[udpChecksum:], u.Checksum) } + +// SetSourcePortWithChecksumUpdate implements ChecksummableTransport. +func (b UDP) SetSourcePortWithChecksumUpdate(new uint16) { + old := b.SourcePort() + b.SetSourcePort(new) + b.SetChecksum(^checksumUpdate2ByteAlignedUint16(^b.Checksum(), old, new)) +} + +// SetDestinationPortWithChecksumUpdate implements ChecksummableTransport. +func (b UDP) SetDestinationPortWithChecksumUpdate(new uint16) { + old := b.DestinationPort() + b.SetDestinationPort(new) + b.SetChecksum(^checksumUpdate2ByteAlignedUint16(^b.Checksum(), old, new)) +} + +// UpdateChecksumPseudoHeaderAddress implements ChecksummableTransport. +func (b UDP) UpdateChecksumPseudoHeaderAddress(old, new tcpip.Address, fullChecksum bool) { + xsum := b.Checksum() + if fullChecksum { + xsum = ^xsum + } + + xsum = checksumUpdate2ByteAlignedAddress(xsum, old, new) + if fullChecksum { + xsum = ^xsum + } + + b.SetChecksum(xsum) +} diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD index d971194e6..1d0163823 100644 --- a/pkg/tcpip/link/fdbased/BUILD +++ b/pkg/tcpip/link/fdbased/BUILD @@ -14,7 +14,6 @@ go_library( ], visibility = ["//visibility:public"], deps = [ - "//pkg/iovec", "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index 735c28da1..e8e716db0 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package fdbased provides the implemention of data-link layer endpoints @@ -44,7 +45,6 @@ import ( "sync/atomic" "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/iovec" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -138,6 +138,20 @@ type endpoint struct { // gsoKind is the supported kind of GSO. gsoKind stack.SupportedGSO + + // maxSyscallHeaderBytes has the same meaning as + // Options.MaxSyscallHeaderBytes. + maxSyscallHeaderBytes uintptr + + // writevMaxIovs is the maximum number of iovecs that may be passed to + // rawfile.NonBlockingWriteIovec, as possibly limited by + // maxSyscallHeaderBytes. (No analogous limit is defined for + // rawfile.NonBlockingSendMMsg, since in that case the maximum number of + // iovecs also depends on the number of mmsghdrs. Instead, if sendBatch + // encounters a packet whose iovec count is limited by + // maxSyscallHeaderBytes, it falls back to writing the packet using writev + // via WritePacket.) + writevMaxIovs int } // Options specify the details about the fd-based endpoint to be created. @@ -186,6 +200,11 @@ type Options struct { // RXChecksumOffload if true, indicates that this endpoints capability // set should include CapabilityRXChecksumOffload. RXChecksumOffload bool + + // If MaxSyscallHeaderBytes is non-zero, it is the maximum number of bytes + // of struct iovec, msghdr, and mmsghdr that may be passed by each host + // system call. + MaxSyscallHeaderBytes int } // fanoutID is used for AF_PACKET based endpoints to enable PACKET_FANOUT @@ -235,14 +254,25 @@ func New(opts *Options) (stack.LinkEndpoint, error) { return nil, fmt.Errorf("opts.FD is empty, at least one FD must be specified") } + if opts.MaxSyscallHeaderBytes < 0 { + return nil, fmt.Errorf("opts.MaxSyscallHeaderBytes is negative") + } + e := &endpoint{ - fds: opts.FDs, - mtu: opts.MTU, - caps: caps, - closed: opts.ClosedFunc, - addr: opts.Address, - hdrSize: hdrSize, - packetDispatchMode: opts.PacketDispatchMode, + fds: opts.FDs, + mtu: opts.MTU, + caps: caps, + closed: opts.ClosedFunc, + addr: opts.Address, + hdrSize: hdrSize, + packetDispatchMode: opts.PacketDispatchMode, + maxSyscallHeaderBytes: uintptr(opts.MaxSyscallHeaderBytes), + writevMaxIovs: rawfile.MaxIovs, + } + if e.maxSyscallHeaderBytes != 0 { + if max := int(e.maxSyscallHeaderBytes / rawfile.SizeofIovec); max < e.writevMaxIovs { + e.writevMaxIovs = max + } } // Increment fanoutID to ensure that we don't re-use the same fanoutID for @@ -470,9 +500,8 @@ func (e *endpoint) WritePacket(r stack.RouteInfo, protocol tcpip.NetworkProtocol e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt) } - var builder iovec.Builder - fd := e.fds[pkt.Hash%uint32(len(e.fds))] + var vnetHdrBuf []byte if e.gsoKind == stack.HWGSOSupported { vnetHdr := virtioNetHdr{} if pkt.GSOOptions.Type != stack.GSONone { @@ -494,71 +523,123 @@ func (e *endpoint) WritePacket(r stack.RouteInfo, protocol tcpip.NetworkProtocol vnetHdr.gsoSize = pkt.GSOOptions.MSS } } + vnetHdrBuf = vnetHdr.marshal() + } - vnetHdrBuf := vnetHdr.marshal() - builder.Add(vnetHdrBuf) + views := pkt.Views() + numIovecs := len(views) + if len(vnetHdrBuf) != 0 { + numIovecs++ + } + if numIovecs > e.writevMaxIovs { + numIovecs = e.writevMaxIovs } - for _, v := range pkt.Views() { - builder.Add(v) + // Allocate small iovec arrays on the stack. + var iovecsArr [8]unix.Iovec + iovecs := iovecsArr[:0] + if numIovecs > len(iovecsArr) { + iovecs = make([]unix.Iovec, 0, numIovecs) + } + iovecs = rawfile.AppendIovecFromBytes(iovecs, vnetHdrBuf, numIovecs) + for _, v := range views { + iovecs = rawfile.AppendIovecFromBytes(iovecs, v, numIovecs) } - return rawfile.NonBlockingWriteIovec(fd, builder.Build()) + return rawfile.NonBlockingWriteIovec(fd, iovecs) } -func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, tcpip.Error) { +func (e *endpoint) sendBatch(batchFD int, pkts []*stack.PacketBuffer) (int, tcpip.Error) { // Send a batch of packets through batchFD. - mmsgHdrs := make([]rawfile.MMsgHdr, 0, len(batch)) - for _, pkt := range batch { - if e.hdrSize > 0 { - e.AddHeader(pkt.EgressRoute.LocalLinkAddress, pkt.EgressRoute.RemoteLinkAddress, pkt.NetworkProtocolNumber, pkt) - } + mmsgHdrsStorage := make([]rawfile.MMsgHdr, 0, len(pkts)) + packets := 0 + for packets < len(pkts) { + mmsgHdrs := mmsgHdrsStorage + batch := pkts[packets:] + syscallHeaderBytes := uintptr(0) + for _, pkt := range batch { + if e.hdrSize > 0 { + e.AddHeader(pkt.EgressRoute.LocalLinkAddress, pkt.EgressRoute.RemoteLinkAddress, pkt.NetworkProtocolNumber, pkt) + } - var vnetHdrBuf []byte - if e.gsoKind == stack.HWGSOSupported { - vnetHdr := virtioNetHdr{} - if pkt.GSOOptions.Type != stack.GSONone { - vnetHdr.hdrLen = uint16(pkt.HeaderSize()) - if pkt.GSOOptions.NeedsCsum { - vnetHdr.flags = _VIRTIO_NET_HDR_F_NEEDS_CSUM - vnetHdr.csumStart = header.EthernetMinimumSize + pkt.GSOOptions.L3HdrLen - vnetHdr.csumOffset = pkt.GSOOptions.CsumOffset - } - if pkt.GSOOptions.Type != stack.GSONone && uint16(pkt.Data().Size()) > pkt.GSOOptions.MSS { - switch pkt.GSOOptions.Type { - case stack.GSOTCPv4: - vnetHdr.gsoType = _VIRTIO_NET_HDR_GSO_TCPV4 - case stack.GSOTCPv6: - vnetHdr.gsoType = _VIRTIO_NET_HDR_GSO_TCPV6 - default: - panic(fmt.Sprintf("Unknown gso type: %v", pkt.GSOOptions.Type)) + var vnetHdrBuf []byte + if e.gsoKind == stack.HWGSOSupported { + vnetHdr := virtioNetHdr{} + if pkt.GSOOptions.Type != stack.GSONone { + vnetHdr.hdrLen = uint16(pkt.HeaderSize()) + if pkt.GSOOptions.NeedsCsum { + vnetHdr.flags = _VIRTIO_NET_HDR_F_NEEDS_CSUM + vnetHdr.csumStart = header.EthernetMinimumSize + pkt.GSOOptions.L3HdrLen + vnetHdr.csumOffset = pkt.GSOOptions.CsumOffset + } + if pkt.GSOOptions.Type != stack.GSONone && uint16(pkt.Data().Size()) > pkt.GSOOptions.MSS { + switch pkt.GSOOptions.Type { + case stack.GSOTCPv4: + vnetHdr.gsoType = _VIRTIO_NET_HDR_GSO_TCPV4 + case stack.GSOTCPv6: + vnetHdr.gsoType = _VIRTIO_NET_HDR_GSO_TCPV6 + default: + panic(fmt.Sprintf("Unknown gso type: %v", pkt.GSOOptions.Type)) + } + vnetHdr.gsoSize = pkt.GSOOptions.MSS } - vnetHdr.gsoSize = pkt.GSOOptions.MSS } + vnetHdrBuf = vnetHdr.marshal() } - vnetHdrBuf = vnetHdr.marshal() - } - var builder iovec.Builder - builder.Add(vnetHdrBuf) - for _, v := range pkt.Views() { - builder.Add(v) - } - iovecs := builder.Build() + views := pkt.Views() + numIovecs := len(views) + if len(vnetHdrBuf) != 0 { + numIovecs++ + } + if numIovecs > rawfile.MaxIovs { + numIovecs = rawfile.MaxIovs + } + if e.maxSyscallHeaderBytes != 0 { + syscallHeaderBytes += rawfile.SizeofMMsgHdr + uintptr(numIovecs)*rawfile.SizeofIovec + if syscallHeaderBytes > e.maxSyscallHeaderBytes { + // We can't fit this packet into this call to sendmmsg(). + // We could potentially do so if we reduced numIovecs + // further, but this might incur considerable extra + // copying. Leave it to the next batch instead. + break + } + } - var mmsgHdr rawfile.MMsgHdr - mmsgHdr.Msg.Iov = &iovecs[0] - mmsgHdr.Msg.SetIovlen((len(iovecs))) - mmsgHdrs = append(mmsgHdrs, mmsgHdr) - } + // We can't easily allocate iovec arrays on the stack here since + // they will escape this loop iteration via mmsgHdrs. + iovecs := make([]unix.Iovec, 0, numIovecs) + iovecs = rawfile.AppendIovecFromBytes(iovecs, vnetHdrBuf, numIovecs) + for _, v := range views { + iovecs = rawfile.AppendIovecFromBytes(iovecs, v, numIovecs) + } - packets := 0 - for len(mmsgHdrs) > 0 { - sent, err := rawfile.NonBlockingSendMMsg(batchFD, mmsgHdrs) - if err != nil { - return packets, err + var mmsgHdr rawfile.MMsgHdr + mmsgHdr.Msg.Iov = &iovecs[0] + mmsgHdr.Msg.SetIovlen(len(iovecs)) + mmsgHdrs = append(mmsgHdrs, mmsgHdr) + } + + if len(mmsgHdrs) == 0 { + // We can't fit batch[0] into a mmsghdr while staying under + // e.maxSyscallHeaderBytes. Use WritePacket, which will avoid the + // mmsghdr (by using writev) and re-buffer iovecs more aggressively + // if necessary (by using e.writevMaxIovs instead of + // rawfile.MaxIovs). + pkt := batch[0] + if err := e.WritePacket(pkt.EgressRoute, pkt.NetworkProtocolNumber, pkt); err != nil { + return packets, err + } + packets++ + } else { + for len(mmsgHdrs) > 0 { + sent, err := rawfile.NonBlockingSendMMsg(batchFD, mmsgHdrs) + if err != nil { + return packets, err + } + packets += sent + mmsgHdrs = mmsgHdrs[sent:] + } } - packets += sent - mmsgHdrs = mmsgHdrs[sent:] } return packets, nil @@ -676,8 +757,9 @@ func NewInjectable(fd int, mtu uint32, capabilities stack.LinkEndpointCapabiliti unix.SetNonblock(fd, true) return &InjectableEndpoint{endpoint: endpoint{ - fds: []int{fd}, - mtu: mtu, - caps: capabilities, + fds: []int{fd}, + mtu: mtu, + caps: capabilities, + writevMaxIovs: rawfile.MaxIovs, }} } diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go index 8aad338b6..eccd21579 100644 --- a/pkg/tcpip/link/fdbased/endpoint_test.go +++ b/pkg/tcpip/link/fdbased/endpoint_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package fdbased diff --git a/pkg/tcpip/link/fdbased/endpoint_unsafe.go b/pkg/tcpip/link/fdbased/endpoint_unsafe.go index df14eaad1..904393faa 100644 --- a/pkg/tcpip/link/fdbased/endpoint_unsafe.go +++ b/pkg/tcpip/link/fdbased/endpoint_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package fdbased diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go index 5d698a5e9..bfae34ab9 100644 --- a/pkg/tcpip/link/fdbased/mmap.go +++ b/pkg/tcpip/link/fdbased/mmap.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build (linux && amd64) || (linux && arm64) // +build linux,amd64 linux,arm64 package fdbased diff --git a/pkg/tcpip/link/fdbased/mmap_stub.go b/pkg/tcpip/link/fdbased/mmap_stub.go index 67be52d67..9d8679502 100644 --- a/pkg/tcpip/link/fdbased/mmap_stub.go +++ b/pkg/tcpip/link/fdbased/mmap_stub.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !linux || (!amd64 && !arm64) // +build !linux !amd64,!arm64 package fdbased diff --git a/pkg/tcpip/link/fdbased/mmap_unsafe.go b/pkg/tcpip/link/fdbased/mmap_unsafe.go index 1293f68a2..58d5dfeef 100644 --- a/pkg/tcpip/link/fdbased/mmap_unsafe.go +++ b/pkg/tcpip/link/fdbased/mmap_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build (linux && amd64) || (linux && arm64) // +build linux,amd64 linux,arm64 package fdbased diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go index 4b7ef3aac..ab2855a63 100644 --- a/pkg/tcpip/link/fdbased/packet_dispatchers.go +++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package fdbased diff --git a/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go index 2206fe0e6..c1438da21 100644 --- a/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go +++ b/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux && !amd64 && !arm64 // +build linux,!amd64,!arm64 package rawfile diff --git a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go index 5002245a1..da900c24b 100644 --- a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go +++ b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build ((linux && amd64) || (linux && arm64)) && go1.12 && !go1.18 // +build linux,amd64 linux,arm64 // +build go1.12 // +build !go1.18 diff --git a/pkg/tcpip/link/rawfile/errors.go b/pkg/tcpip/link/rawfile/errors.go index 9743e70ea..7e21a78d4 100644 --- a/pkg/tcpip/link/rawfile/errors.go +++ b/pkg/tcpip/link/rawfile/errors.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package rawfile diff --git a/pkg/tcpip/link/rawfile/errors_test.go b/pkg/tcpip/link/rawfile/errors_test.go index 8f4bd60da..1b88c309b 100644 --- a/pkg/tcpip/link/rawfile/errors_test.go +++ b/pkg/tcpip/link/rawfile/errors_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package rawfile diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go index ba92aedbc..53448a641 100644 --- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go +++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package rawfile contains utilities for using the netstack with raw host @@ -19,12 +20,66 @@ package rawfile import ( + "reflect" "unsafe" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip" ) +// SizeofIovec is the size of a unix.Iovec in bytes. +const SizeofIovec = unsafe.Sizeof(unix.Iovec{}) + +// MaxIovs is UIO_MAXIOV, the maximum number of iovecs that may be passed to a +// host system call in a single array. +const MaxIovs = 1024 + +// IovecFromBytes returns a unix.Iovec representing bs. +// +// Preconditions: len(bs) > 0. +func IovecFromBytes(bs []byte) unix.Iovec { + iov := unix.Iovec{ + Base: &bs[0], + } + iov.SetLen(len(bs)) + return iov +} + +func bytesFromIovec(iov unix.Iovec) (bs []byte) { + sh := (*reflect.SliceHeader)(unsafe.Pointer(&bs)) + sh.Data = uintptr(unsafe.Pointer(iov.Base)) + sh.Len = int(iov.Len) + sh.Cap = int(iov.Len) + return +} + +// AppendIovecFromBytes returns append(iovs, IovecFromBytes(bs)). If len(bs) == +// 0, AppendIovecFromBytes returns iovs without modification. If len(iovs) >= +// max, AppendIovecFromBytes replaces the final iovec in iovs with one that +// also includes the contents of bs. Note that this implies that +// AppendIovecFromBytes is only usable when the returned iovec slice is used as +// the source of a write. +func AppendIovecFromBytes(iovs []unix.Iovec, bs []byte, max int) []unix.Iovec { + if len(bs) == 0 { + return iovs + } + if len(iovs) < max { + return append(iovs, IovecFromBytes(bs)) + } + iovs[len(iovs)-1] = IovecFromBytes(append(bytesFromIovec(iovs[len(iovs)-1]), bs...)) + return iovs +} + +// MMsgHdr represents the mmsg_hdr structure required by recvmmsg() on linux. +type MMsgHdr struct { + Msg unix.Msghdr + Len uint32 + _ [4]byte +} + +// SizeofMMsgHdr is the size of a MMsgHdr in bytes. +const SizeofMMsgHdr = unsafe.Sizeof(MMsgHdr{}) + // GetMTU determines the MTU of a network interface device. func GetMTU(name string) (uint32, error) { fd, err := unix.Socket(unix.AF_UNIX, unix.SOCK_DGRAM, 0) @@ -137,13 +192,6 @@ func BlockingReadv(fd int, iovecs []unix.Iovec) (int, tcpip.Error) { } } -// MMsgHdr represents the mmsg_hdr structure required by recvmmsg() on linux. -type MMsgHdr struct { - Msg unix.Msghdr - Len uint32 - _ [4]byte -} - // BlockingRecvMMsg reads from a file descriptor that is set up as non-blocking // and stores the received messages in a slice of MMsgHdr structures. If no data // is available, it will block in a poll() syscall until the file descriptor diff --git a/pkg/tcpip/link/sharedmem/rx.go b/pkg/tcpip/link/sharedmem/rx.go index 8e6f3e5e3..e882a128c 100644 --- a/pkg/tcpip/link/sharedmem/rx.go +++ b/pkg/tcpip/link/sharedmem/rx.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package sharedmem diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go index df9a0b90a..30cf659b8 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem.go +++ b/pkg/tcpip/link/sharedmem/sharedmem.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package sharedmem provides the implemention of data-link layer endpoints diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go index 0f72d4e95..d6d953085 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem_test.go +++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package sharedmem diff --git a/pkg/tcpip/link/sniffer/pcap.go b/pkg/tcpip/link/sniffer/pcap.go index c16c19647..3bb864ed2 100644 --- a/pkg/tcpip/link/sniffer/pcap.go +++ b/pkg/tcpip/link/sniffer/pcap.go @@ -39,8 +39,6 @@ type pcapHeader struct { Network uint32 } -const pcapPacketHeaderLen = 16 - type pcapPacketHeader struct { // Seconds is the timestamp seconds. Seconds uint32 @@ -55,8 +53,7 @@ type pcapPacketHeader struct { OriginalLength uint32 } -func newPCAPPacketHeader(incLen, orgLen uint32) pcapPacketHeader { - now := time.Now() +func newPCAPPacketHeader(now time.Time, incLen, orgLen uint32) pcapPacketHeader { return pcapPacketHeader{ Seconds: uint32(now.Unix()), Microseconds: uint32(now.Nanosecond() / 1000), diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go index 2d6a3a833..3df826f3c 100644 --- a/pkg/tcpip/link/sniffer/sniffer.go +++ b/pkg/tcpip/link/sniffer/sniffer.go @@ -87,11 +87,7 @@ func NewWithPrefix(lower stack.LinkEndpoint, logPrefix string) stack.LinkEndpoin } func zoneOffset() (int32, error) { - loc, err := time.LoadLocation("Local") - if err != nil { - return 0, err - } - date := time.Date(0, 0, 0, 0, 0, 0, 0, loc) + date := time.Date(0, 0, 0, 0, 0, 0, 0, time.Local) _, offset := date.Zone() return int32(offset), nil } @@ -117,8 +113,9 @@ func writePCAPHeader(w io.Writer, maxLen uint32) error { // NewWithWriter creates a new sniffer link-layer endpoint. It wraps around // another endpoint and logs packets as they traverse the endpoint. // -// Packets are logged to writer in the pcap format. A sniffer created with this -// function will not emit packets using the standard log package. +// Each packet is written to writer in the pcap format in a single Write call +// without synchronization. A sniffer created with this function will not emit +// packets using the standard log package. // // snapLen is the maximum amount of a packet to be saved. Packets with a length // less than or equal to snapLen will be saved in their entirety. Longer @@ -159,27 +156,29 @@ func (e *endpoint) dumpPacket(dir direction, protocol tcpip.NetworkProtocolNumbe if max := int(e.maxPCAPLen); length > max { length = max } - if err := binary.Write(writer, binary.BigEndian, newPCAPPacketHeader(uint32(length), uint32(totalLength))); err != nil { - panic(err) - } - write := func(b []byte) { - if len(b) > length { - b = b[:length] + packetHeader := newPCAPPacketHeader(time.Now(), uint32(length), uint32(totalLength)) + packet := make([]byte, binary.Size(packetHeader)+length) + { + writer := tcpip.SliceWriter(packet) + if err := binary.Write(&writer, binary.BigEndian, packetHeader); err != nil { + panic(err) } - for len(b) != 0 { + for _, b := range pkt.Views() { + if length == 0 { + break + } + if len(b) > length { + b = b[:length] + } n, err := writer.Write(b) if err != nil { panic(err) } - b = b[n:] length -= n } } - for _, v := range pkt.Views() { - if length == 0 { - break - } - write(v) + if _, err := writer.Write(packet); err != nil { + panic(err) } } } diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD index 7656cca6a..4758a99ad 100644 --- a/pkg/tcpip/link/tun/BUILD +++ b/pkg/tcpip/link/tun/BUILD @@ -26,6 +26,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/log", "//pkg/refs", "//pkg/refsvfs2", diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go index 36af2a029..d23210503 100644 --- a/pkg/tcpip/link/tun/device.go +++ b/pkg/tcpip/link/tun/device.go @@ -18,6 +18,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" @@ -88,12 +89,12 @@ func (d *Device) SetIff(s *stack.Stack, name string, flags Flags) error { defer d.mu.Unlock() if d.endpoint != nil { - return syserror.EINVAL + return linuxerr.EINVAL } // Input validation. if flags.TAP && flags.TUN || !flags.TAP && !flags.TUN { - return syserror.EINVAL + return linuxerr.EINVAL } prefix := "tun" @@ -108,7 +109,7 @@ func (d *Device) SetIff(s *stack.Stack, name string, flags Flags) error { endpoint, err := attachOrCreateNIC(s, name, prefix, linkCaps) if err != nil { - return syserror.EINVAL + return linuxerr.EINVAL } d.endpoint = endpoint @@ -125,7 +126,7 @@ func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkE endpoint, ok := linkEP.(*tunEndpoint) if !ok { // Not a NIC created by tun device. - return nil, syserror.EOPNOTSUPP + return nil, linuxerr.EOPNOTSUPP } if !endpoint.TryIncRef() { // Race detected: NIC got deleted in between. @@ -159,7 +160,7 @@ func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkE // Race detected: A NIC has been created in between. continue default: - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } } } @@ -170,7 +171,7 @@ func (d *Device) Write(data []byte) (int64, error) { endpoint := d.endpoint d.mu.RUnlock() if endpoint == nil { - return 0, syserror.EBADFD + return 0, linuxerr.EBADFD } if !endpoint.IsAttached() { return 0, syserror.EIO @@ -207,6 +208,15 @@ func (d *Device) Write(data []byte) (int64, error) { protocol = pktInfoHdr.Protocol() case ethHdr != nil: protocol = ethHdr.Type() + case d.flags.TUN: + // TUN interface with IFF_NO_PI enabled, thus + // we need to determine protocol from version field + version := data[0] >> 4 + if version == 4 { + protocol = header.IPv4ProtocolNumber + } else if version == 6 { + protocol = header.IPv6ProtocolNumber + } } // Try to determine remote link address, default zero. @@ -233,7 +243,7 @@ func (d *Device) Read() ([]byte, error) { endpoint := d.endpoint d.mu.RUnlock() if endpoint == nil { - return nil, syserror.EBADFD + return nil, linuxerr.EBADFD } for { @@ -264,13 +274,6 @@ func (d *Device) encodePkt(info *channel.PacketInfo) (buffer.View, bool) { vv.AppendView(buffer.View(hdr)) } - // If the packet does not already have link layer header, and the route - // does not exist, we can't compute it. This is possibly a raw packet, tun - // device doesn't support this at the moment. - if info.Pkt.LinkHeader().View().IsEmpty() && len(info.Route.RemoteLinkAddress) == 0 { - return nil, false - } - // Ethernet header (TAP only). if d.flags.TAP { // Add ethernet header if not provided. diff --git a/pkg/tcpip/link/tun/tun_unsafe.go b/pkg/tcpip/link/tun/tun_unsafe.go index 0591fbd63..db4338e79 100644 --- a/pkg/tcpip/link/tun/tun_unsafe.go +++ b/pkg/tcpip/link/tun/tun_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package tun contains methods to open TAP and TUN devices. diff --git a/pkg/tcpip/network/internal/ip/generic_multicast_protocol_test.go b/pkg/tcpip/network/internal/ip/generic_multicast_protocol_test.go index 0b51563cd..1261ad414 100644 --- a/pkg/tcpip/network/internal/ip/generic_multicast_protocol_test.go +++ b/pkg/tcpip/network/internal/ip/generic_multicast_protocol_test.go @@ -126,7 +126,7 @@ func (m *mockMulticastGroupProtocol) sendQueuedReports() { // Precondition: m.mu must be read locked. func (m *mockMulticastGroupProtocol) Enabled() bool { if m.mu.TryLock() { - m.mu.Unlock() + m.mu.Unlock() // +checklocksforce: TryLock. m.t.Fatal("got write lock, expected to not take the lock; generic multicast protocol must take the read or write lock before calling Enabled") } @@ -138,11 +138,11 @@ func (m *mockMulticastGroupProtocol) Enabled() bool { // Precondition: m.mu must be locked. func (m *mockMulticastGroupProtocol) SendReport(groupAddress tcpip.Address) (bool, tcpip.Error) { if m.mu.TryLock() { - m.mu.Unlock() + m.mu.Unlock() // +checklocksforce: TryLock. m.t.Fatalf("got write lock, expected to not take the lock; generic multicast protocol must take the write lock before sending report for %s", groupAddress) } if m.mu.TryRLock() { - m.mu.RUnlock() + m.mu.RUnlock() // +checklocksforce: TryLock. m.t.Fatalf("got read lock, expected to not take the lock; generic multicast protocol must take the write lock before sending report for %s", groupAddress) } @@ -155,11 +155,11 @@ func (m *mockMulticastGroupProtocol) SendReport(groupAddress tcpip.Address) (boo // Precondition: m.mu must be locked. func (m *mockMulticastGroupProtocol) SendLeave(groupAddress tcpip.Address) tcpip.Error { if m.mu.TryLock() { - m.mu.Unlock() + m.mu.Unlock() // +checklocksforce: TryLock. m.t.Fatalf("got write lock, expected to not take the lock; generic multicast protocol must take the write lock before sending leave for %s", groupAddress) } if m.mu.TryRLock() { - m.mu.RUnlock() + m.mu.RUnlock() // +checklocksforce: TryLock. m.t.Fatalf("got read lock, expected to not take the lock; generic multicast protocol must take the write lock before sending leave for %s", groupAddress) } diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go index f5693defe..b1aec5312 100644 --- a/pkg/tcpip/network/ipv6/ipv6.go +++ b/pkg/tcpip/network/ipv6/ipv6.go @@ -344,7 +344,10 @@ func (e *endpoint) onAddressAssignedLocked(addr tcpip.Address) { func (e *endpoint) InvalidateDefaultRouter(rtr tcpip.Address) { e.mu.Lock() defer e.mu.Unlock() - e.mu.ndp.invalidateDefaultRouter(rtr) + + // We represent default routers with a default (off-link) route through the + // router. + e.mu.ndp.invalidateOffLinkRoute(offLinkRoute{dest: header.IPv6EmptySubnet, router: rtr}) } // SetNDPConfigurations implements NDPEndpoint. diff --git a/pkg/tcpip/network/ipv6/ndp.go b/pkg/tcpip/network/ipv6/ndp.go index c44e4ac4e..8837d66d8 100644 --- a/pkg/tcpip/network/ipv6/ndp.go +++ b/pkg/tcpip/network/ipv6/ndp.go @@ -54,6 +54,11 @@ const ( // Advertisements, as a host. defaultDiscoverDefaultRouters = true + // defaultDiscoverMoreSpecificRoutes is the default configuration for + // whether or not to discover more-specific routes from incoming Router + // Advertisements, as a host. + defaultDiscoverMoreSpecificRoutes = true + // defaultDiscoverOnLinkPrefixes is the default configuration for // whether or not to discover on-link prefixes from incoming Router // Advertisements' Prefix Information option, as a host. @@ -78,13 +83,13 @@ const ( // we cannot have a negative delay. minimumMaxRtrSolicitationDelay = 0 - // MaxDiscoveredDefaultRouters is the maximum number of discovered - // default routers. The stack should stop discovering new routers after - // discovering MaxDiscoveredDefaultRouters routers. + // MaxDiscoveredOffLinkRoutes is the maximum number of discovered off-link + // routes. The stack should stop discovering new off-link routes after + // this limit is reached. // // This value MUST be at minimum 2 as per RFC 4861 section 6.3.4, and // SHOULD be more. - MaxDiscoveredDefaultRouters = 10 + MaxDiscoveredOffLinkRoutes = 10 // MaxDiscoveredOnLinkPrefixes is the maximum number of discovered // on-link prefixes. The stack should stop discovering new on-link @@ -352,12 +357,18 @@ type NDPConfigurations struct { // DiscoverDefaultRouters determines whether or not default routers are // discovered from Router Advertisements, as per RFC 4861 section 6. This - // configuration is ignored if HandleRAs is false. + // configuration is ignored if RAs will not be processed (see HandleRAs). DiscoverDefaultRouters bool + // DiscoverMoreSpecificRoutes determines whether or not more specific routes + // are discovered from Router Advertisements, as per RFC 4191. This + // configuration is ignored if RAs will not be processed (see HandleRAs). + DiscoverMoreSpecificRoutes bool + // DiscoverOnLinkPrefixes determines whether or not on-link prefixes are // discovered from Router Advertisements' Prefix Information option, as per - // RFC 4861 section 6. This configuration is ignored if HandleRAs is false. + // RFC 4861 section 6. This configuration is ignored if RAs will not be + // processed (see HandleRAs). DiscoverOnLinkPrefixes bool // AutoGenGlobalAddresses determines whether or not an IPv6 endpoint performs @@ -408,6 +419,7 @@ func DefaultNDPConfigurations() NDPConfigurations { MaxRtrSolicitationDelay: defaultMaxRtrSolicitationDelay, HandleRAs: defaultHandleRAs, DiscoverDefaultRouters: defaultDiscoverDefaultRouters, + DiscoverMoreSpecificRoutes: defaultDiscoverMoreSpecificRoutes, DiscoverOnLinkPrefixes: defaultDiscoverOnLinkPrefixes, AutoGenGlobalAddresses: defaultAutoGenGlobalAddresses, AutoGenTempGlobalAddresses: defaultAutoGenTempGlobalAddresses, @@ -448,6 +460,11 @@ type timer struct { timer tcpip.Timer } +type offLinkRoute struct { + dest tcpip.Subnet + router tcpip.Address +} + // ndpState is the per-Interface NDP state. type ndpState struct { // Do not allow overwriting this state. @@ -462,8 +479,8 @@ type ndpState struct { // The DAD timers to send the next NS message, or resolve the address. dad ip.DAD - // The default routers discovered through Router Advertisements. - defaultRouters map[tcpip.Address]defaultRouterState + // The off-link routes discovered through Router Advertisements. + offLinkRoutes map[offLinkRoute]offLinkRouteState // rtrSolicitTimer is the timer used to send the next router solicitation // message. @@ -491,12 +508,12 @@ type ndpState struct { temporaryAddressDesyncFactor time.Duration } -// defaultRouterState holds data associated with a default router discovered by +// offLinkRouteState holds data associated with an off-link route discovered by // a Router Advertisement (RA). -type defaultRouterState struct { +type offLinkRouteState struct { prf header.NDPRoutePreference - // Job to invalidate the default router. + // Job to invalidate the route. // // Must not be nil. invalidationJob *tcpip.Job @@ -727,38 +744,9 @@ func (ndp *ndpState) handleRA(ip tcpip.Address, ra header.NDPRouterAdvert) { prf = header.MediumRoutePreference } - rtr, ok := ndp.defaultRouters[ip] - rl := ra.RouterLifetime() - switch { - case !ok && rl != 0: - // This is a new default router we are discovering. - // - // Only remember it if we currently know about less than - // MaxDiscoveredDefaultRouters routers. - if len(ndp.defaultRouters) < MaxDiscoveredDefaultRouters { - ndp.rememberDefaultRouter(ip, rl, prf) - } - - case ok && rl != 0: - // This is an already discovered default router. Update - // the invalidation job. - rtr.invalidationJob.Cancel() - rtr.invalidationJob.Schedule(rl) - - if prf != rtr.prf { - rtr.prf = prf - - // Inform the integrator about router preference updates. - ndp.ep.protocol.options.NDPDisp.OnOffLinkRouteUpdated(ndp.ep.nic.ID(), header.IPv6EmptySubnet, ip, prf) - } - - ndp.defaultRouters[ip] = rtr - - case ok && rl == 0: - // We know about the router but it is no longer to be - // used as a default router so invalidate it. - ndp.invalidateDefaultRouter(ip) - } + // We represent default routers with a default (off-link) route through the + // router. + ndp.handleOffLinkRouteDiscovery(offLinkRoute{dest: header.IPv6EmptySubnet, router: ip}, ra.RouterLifetime(), prf) } // TODO(b/141556115): Do (RetransTimer, ReachableTime)) Parameter @@ -810,58 +798,107 @@ func (ndp *ndpState) handleRA(ip tcpip.Address, ra header.NDPRouterAdvert) { if opt.AutonomousAddressConfigurationFlag() { ndp.handleAutonomousPrefixInformation(opt) } + + case header.NDPRouteInformation: + if !ndp.configs.DiscoverMoreSpecificRoutes { + continue + } + + dest, err := opt.Prefix() + if err != nil { + panic(fmt.Sprintf("%T.Prefix(): %s", opt, err)) + } + + prf := opt.RoutePreference() + if prf == header.ReservedRoutePreference { + // As per RFC 4191 section 2.3, + // + // Prf (Route Preference) + // 2-bit signed integer. The Route Preference indicates + // whether to prefer the router associated with this prefix + // over others, when multiple identical prefixes (for + // different routers) have been received. If the Reserved + // (10) value is received, the Route Information Option MUST + // be ignored. + continue + } + + ndp.handleOffLinkRouteDiscovery(offLinkRoute{dest: dest, router: ip}, opt.RouteLifetime(), prf) } // TODO(b/141556115): Do (MTU) Parameter Discovery. } } -// invalidateDefaultRouter invalidates a discovered default router. +// invalidateOffLinkRoute invalidates a discovered off-link route. // // The IPv6 endpoint that ndp belongs to MUST be locked. -func (ndp *ndpState) invalidateDefaultRouter(ip tcpip.Address) { - rtr, ok := ndp.defaultRouters[ip] - - // Is the router still discovered? +func (ndp *ndpState) invalidateOffLinkRoute(route offLinkRoute) { + state, ok := ndp.offLinkRoutes[route] if !ok { - // ...Nope, do nothing further. return } - rtr.invalidationJob.Cancel() - delete(ndp.defaultRouters, ip) + state.invalidationJob.Cancel() + delete(ndp.offLinkRoutes, route) - // Let the integrator know a discovered default router is invalidated. + // Let the integrator know a discovered off-link route is invalidated. if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil { - ndpDisp.OnOffLinkRouteInvalidated(ndp.ep.nic.ID(), header.IPv6EmptySubnet, ip) + ndpDisp.OnOffLinkRouteInvalidated(ndp.ep.nic.ID(), route.dest, route.router) } } -// rememberDefaultRouter remembers a newly discovered default router with IPv6 -// link-local address ip with lifetime rl. -// -// The router identified by ip MUST NOT already be known by the IPv6 endpoint. +// handleOffLinkRouteDiscovery handles the discovery of an off-link route. // -// The IPv6 endpoint that ndp belongs to MUST be locked. -func (ndp *ndpState) rememberDefaultRouter(ip tcpip.Address, rl time.Duration, prf header.NDPRoutePreference) { +// Precondition: ndp.ep.mu must be locked. +func (ndp *ndpState) handleOffLinkRouteDiscovery(route offLinkRoute, lifetime time.Duration, prf header.NDPRoutePreference) { ndpDisp := ndp.ep.protocol.options.NDPDisp if ndpDisp == nil { return } - // Inform the integrator when we discovered a default router. - ndpDisp.OnOffLinkRouteUpdated(ndp.ep.nic.ID(), header.IPv6EmptySubnet, ip, prf) + state, ok := ndp.offLinkRoutes[route] + switch { + case !ok && lifetime != 0: + // This is a new route we are discovering. + // + // Only remember it if we currently know about less than + // MaxDiscoveredOffLinkRoutes routers. + if len(ndp.offLinkRoutes) < MaxDiscoveredOffLinkRoutes { + // Inform the integrator when we discovered an off-link route. + ndpDisp.OnOffLinkRouteUpdated(ndp.ep.nic.ID(), route.dest, route.router, prf) + + state := offLinkRouteState{ + prf: prf, + invalidationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() { + ndp.invalidateOffLinkRoute(route) + }), + } - state := defaultRouterState{ - prf: prf, - invalidationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() { - ndp.invalidateDefaultRouter(ip) - }), - } + state.invalidationJob.Schedule(lifetime) + + ndp.offLinkRoutes[route] = state + } - state.invalidationJob.Schedule(rl) + case ok && lifetime != 0: + // This is an already discovered off-link route. Update the lifetime. + state.invalidationJob.Cancel() + state.invalidationJob.Schedule(lifetime) - ndp.defaultRouters[ip] = state + if prf != state.prf { + state.prf = prf + + // Inform the integrator about route preference updates. + ndpDisp.OnOffLinkRouteUpdated(ndp.ep.nic.ID(), route.dest, route.router, prf) + } + + ndp.offLinkRoutes[route] = state + + case ok && lifetime == 0: + // The already discovered off-link route is no longer considered valid so we + // invalidate it immediately. + ndp.invalidateOffLinkRoute(route) + } } // rememberOnLinkPrefix remembers a newly discovered on-link prefix with IPv6 @@ -1677,12 +1714,12 @@ func (ndp *ndpState) cleanupState() { panic(fmt.Sprintf("ndp: still have discovered on-link prefixes after cleaning up; found = %d", got)) } - for router := range ndp.defaultRouters { - ndp.invalidateDefaultRouter(router) + for route := range ndp.offLinkRoutes { + ndp.invalidateOffLinkRoute(route) } - if got := len(ndp.defaultRouters); got != 0 { - panic(fmt.Sprintf("ndp: still have discovered default routers after cleaning up; found = %d", got)) + if got := len(ndp.offLinkRoutes); got != 0 { + panic(fmt.Sprintf("ndp: still have discovered off-link routes after cleaning up; found = %d", got)) } ndp.dhcpv6Configuration = 0 @@ -1845,14 +1882,14 @@ func (ndp *ndpState) stopSolicitingRouters() { } func (ndp *ndpState) init(ep *endpoint, dadOptions ip.DADOptions) { - if ndp.defaultRouters != nil { + if ndp.offLinkRoutes != nil { panic("attempted to initialize NDP state twice") } ndp.ep = ep ndp.configs = ep.protocol.options.NDPConfigs ndp.dad.Init(&ndp.ep.mu, ep.protocol.options.DADConfigs, dadOptions) - ndp.defaultRouters = make(map[tcpip.Address]defaultRouterState) + ndp.offLinkRoutes = make(map[offLinkRoute]offLinkRouteState) ndp.onLinkPrefixes = make(map[tcpip.Subnet]onLinkPrefixState) ndp.slaacPrefixes = make(map[tcpip.Subnet]slaacPrefixState) diff --git a/pkg/tcpip/network/ipv6/ndp_test.go b/pkg/tcpip/network/ipv6/ndp_test.go index 130438f3b..f0186c64e 100644 --- a/pkg/tcpip/network/ipv6/ndp_test.go +++ b/pkg/tcpip/network/ipv6/ndp_test.go @@ -93,7 +93,7 @@ func TestStackNDPEndpointInvalidateDefaultRouter(t *testing.T) { ipv6EP := ep.(*endpoint) ipv6EP.mu.Lock() - ipv6EP.mu.ndp.rememberDefaultRouter(lladdr1, time.Hour, header.MediumRoutePreference) + ipv6EP.mu.ndp.handleOffLinkRouteDiscovery(offLinkRoute{dest: header.IPv6EmptySubnet, router: lladdr1}, time.Hour, header.MediumRoutePreference) ipv6EP.mu.Unlock() if ndpDisp.addr != lladdr1 { diff --git a/pkg/tcpip/ports/BUILD b/pkg/tcpip/ports/BUILD index b7f6d52ae..fe98a52af 100644 --- a/pkg/tcpip/ports/BUILD +++ b/pkg/tcpip/ports/BUILD @@ -12,6 +12,7 @@ go_library( deps = [ "//pkg/sync", "//pkg/tcpip", + "//pkg/tcpip/header", ], ) diff --git a/pkg/tcpip/ports/ports.go b/pkg/tcpip/ports/ports.go index 854d6a6ba..fb8ef1ee2 100644 --- a/pkg/tcpip/ports/ports.go +++ b/pkg/tcpip/ports/ports.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" ) const ( @@ -122,7 +123,7 @@ type deviceToDest map[tcpip.NICID]destToCounter // If either of the port reuse flags is enabled on any of the nodes, all nodes // sharing a port must share at least one reuse flag. This matches Linux's // behavior. -func (dd deviceToDest) isAvailable(res Reservation) bool { +func (dd deviceToDest) isAvailable(res Reservation, portSpecified bool) bool { flagBits := res.Flags.Bits() if res.BindToDevice == 0 { intersection := FlagMask @@ -138,6 +139,9 @@ func (dd deviceToDest) isAvailable(res Reservation) bool { return false } } + if !portSpecified && res.Transport == header.TCPProtocolNumber { + return false + } return true } @@ -146,16 +150,26 @@ func (dd deviceToDest) isAvailable(res Reservation) bool { if dests, ok := dd[0]; ok { var count int intersection, count = dests.intersectionFlags(res) - if count > 0 && intersection&flagBits == 0 { - return false + if count > 0 { + if intersection&flagBits == 0 { + return false + } + if !portSpecified && res.Transport == header.TCPProtocolNumber { + return false + } } } if dests, ok := dd[res.BindToDevice]; ok { flags, count := dests.intersectionFlags(res) intersection &= flags - if count > 0 && intersection&flagBits == 0 { - return false + if count > 0 { + if intersection&flagBits == 0 { + return false + } + if !portSpecified && res.Transport == header.TCPProtocolNumber { + return false + } } } @@ -168,12 +182,12 @@ type addrToDevice map[tcpip.Address]deviceToDest // isAvailable checks whether an IP address is available to bind to. If the // address is the "any" address, check all other addresses. Otherwise, just // check against the "any" address and the provided address. -func (ad addrToDevice) isAvailable(res Reservation) bool { +func (ad addrToDevice) isAvailable(res Reservation, portSpecified bool) bool { if res.Addr == anyIPAddress { // If binding to the "any" address then check that there are no // conflicts with all addresses. for _, devices := range ad { - if !devices.isAvailable(res) { + if !devices.isAvailable(res, portSpecified) { return false } } @@ -182,14 +196,14 @@ func (ad addrToDevice) isAvailable(res Reservation) bool { // Check that there is no conflict with the "any" address. if devices, ok := ad[anyIPAddress]; ok { - if !devices.isAvailable(res) { + if !devices.isAvailable(res, portSpecified) { return false } } // Check that this is no conflict with the provided address. if devices, ok := ad[res.Addr]; ok { - if !devices.isAvailable(res) { + if !devices.isAvailable(res, portSpecified) { return false } } @@ -310,7 +324,7 @@ func (pm *PortManager) ReservePort(rng *rand.Rand, res Reservation, testPort Por // If a port is specified, just try to reserve it for all network // protocols. if res.Port != 0 { - if !pm.reserveSpecificPortLocked(res) { + if !pm.reserveSpecificPortLocked(res, true /* portSpecified */) { return 0, &tcpip.ErrPortInUse{} } if testPort != nil { @@ -330,7 +344,7 @@ func (pm *PortManager) ReservePort(rng *rand.Rand, res Reservation, testPort Por // A port wasn't specified, so try to find one. return pm.PickEphemeralPort(rng, func(p uint16) (bool, tcpip.Error) { res.Port = p - if !pm.reserveSpecificPortLocked(res) { + if !pm.reserveSpecificPortLocked(res, false /* portSpecified */) { return false, nil } if testPort != nil { @@ -350,12 +364,12 @@ func (pm *PortManager) ReservePort(rng *rand.Rand, res Reservation, testPort Por // reserveSpecificPortLocked tries to reserve the given port on all given // protocols. -func (pm *PortManager) reserveSpecificPortLocked(res Reservation) bool { +func (pm *PortManager) reserveSpecificPortLocked(res Reservation, portSpecified bool) bool { // Make sure the port is available. for _, network := range res.Networks { desc := portDescriptor{network, res.Transport, res.Port} if addrs, ok := pm.allocatedPorts[desc]; ok { - if !addrs.isAvailable(res) { + if !addrs.isAvailable(res, portSpecified) { return false } } diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go index b9a24ff56..009cab643 100644 --- a/pkg/tcpip/sample/tun_tcp_connect/main.go +++ b/pkg/tcpip/sample/tun_tcp_connect/main.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // This sample creates a stack with TCP and IPv4 protocols on top of a TUN diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go index ef1bfc186..c10b19aa0 100644 --- a/pkg/tcpip/sample/tun_tcp_echo/main.go +++ b/pkg/tcpip/sample/tun_tcp_echo/main.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // This sample creates a stack with TCP and IPv4 protocols on top of a TUN diff --git a/pkg/tcpip/socketops.go b/pkg/tcpip/socketops.go index 0ea85f9ed..5642c86f8 100644 --- a/pkg/tcpip/socketops.go +++ b/pkg/tcpip/socketops.go @@ -15,17 +15,11 @@ package tcpip import ( - "math" "sync/atomic" - "gvisor.dev/gvisor/pkg/atomicbitops" "gvisor.dev/gvisor/pkg/sync" ) -// PacketOverheadFactor is used to multiply the value provided by the user on a -// SetSockOpt for setting the send/receive buffer sizes sockets. -const PacketOverheadFactor = 2 - // SocketOptionsHandler holds methods that help define endpoint specific // behavior for socket level socket options. These must be implemented by // endpoints to get notified when socket level options are set. @@ -60,7 +54,7 @@ type SocketOptionsHandler interface { // buffer size. It also returns the newly set value. OnSetSendBufferSize(v int64) (newSz int64) - // OnSetReceiveBufferSize is invoked to set the SO_RCVBUFSIZE. + // OnSetReceiveBufferSize is invoked by SO_RCVBUF and SO_RCVBUFFORCE. OnSetReceiveBufferSize(v, oldSz int64) (newSz int64) } @@ -213,16 +207,24 @@ type SocketOptions struct { // will not change. getSendBufferLimits GetSendBufferLimits `state:"manual"` + // sendBufSizeMu protects sendBufferSize and calls to + // handler.OnSetSendBufferSize. + sendBufSizeMu sync.Mutex `state:"nosave"` + // sendBufferSize determines the send buffer size for this socket. - sendBufferSize atomicbitops.AlignedAtomicInt64 + sendBufferSize int64 // getReceiveBufferLimits provides the handler to get the min, default and // max size for receive buffer. It is initialized at the creation time and // will not change. getReceiveBufferLimits GetReceiveBufferLimits `state:"manual"` + // receiveBufSizeMu protects receiveBufferSize and calls to + // handler.OnSetReceiveBufferSize. + receiveBufSizeMu sync.Mutex `state:"nosave"` + // receiveBufferSize determines the receive buffer size for this socket. - receiveBufferSize atomicbitops.AlignedAtomicInt64 + receiveBufferSize int64 // mu protects the access to the below fields. mu sync.Mutex `state:"nosave"` @@ -612,81 +614,52 @@ func (so *SocketOptions) SetBindToDevice(bindToDevice int32) Error { return nil } +// SendBufferLimits returns the [min, max) range of allowable send buffer +// sizes. +func (so *SocketOptions) SendBufferLimits() (min, max int64) { + limits := so.getSendBufferLimits(so.stackHandler) + return int64(limits.Min), int64(limits.Max) +} + // GetSendBufferSize gets value for SO_SNDBUF option. func (so *SocketOptions) GetSendBufferSize() int64 { - return so.sendBufferSize.Load() + so.sendBufSizeMu.Lock() + defer so.sendBufSizeMu.Unlock() + return so.sendBufferSize } // SetSendBufferSize sets value for SO_SNDBUF option. notify indicates if the // stack handler should be invoked to set the send buffer size. func (so *SocketOptions) SetSendBufferSize(sendBufferSize int64, notify bool) { - v := sendBufferSize - - if !notify { - so.sendBufferSize.Store(v) - return - } - - // Make sure the send buffer size is within the min and max - // allowed. - ss := so.getSendBufferLimits(so.stackHandler) - min := int64(ss.Min) - max := int64(ss.Max) - // Validate the send buffer size with min and max values. - // Multiply it by factor of 2. - if v > max { - v = max - } - - if v < math.MaxInt32/PacketOverheadFactor { - v *= PacketOverheadFactor - if v < min { - v = min - } - } else { - v = math.MaxInt32 + so.sendBufSizeMu.Lock() + defer so.sendBufSizeMu.Unlock() + if notify { + sendBufferSize = so.handler.OnSetSendBufferSize(sendBufferSize) } + so.sendBufferSize = sendBufferSize +} - // Notify endpoint about change in buffer size. - newSz := so.handler.OnSetSendBufferSize(v) - so.sendBufferSize.Store(newSz) +// ReceiveBufferLimits returns the [min, max) range of allowable receive buffer +// sizes. +func (so *SocketOptions) ReceiveBufferLimits() (min, max int64) { + limits := so.getReceiveBufferLimits(so.stackHandler) + return int64(limits.Min), int64(limits.Max) } // GetReceiveBufferSize gets value for SO_RCVBUF option. func (so *SocketOptions) GetReceiveBufferSize() int64 { - return so.receiveBufferSize.Load() + so.receiveBufSizeMu.Lock() + defer so.receiveBufSizeMu.Unlock() + return so.receiveBufferSize } -// SetReceiveBufferSize sets value for SO_RCVBUF option. +// SetReceiveBufferSize sets the value of the SO_RCVBUF option, optionally +// notifying the owning endpoint. func (so *SocketOptions) SetReceiveBufferSize(receiveBufferSize int64, notify bool) { - if !notify { - so.receiveBufferSize.Store(receiveBufferSize) - return - } - - // Make sure the send buffer size is within the min and max - // allowed. - v := receiveBufferSize - ss := so.getReceiveBufferLimits(so.stackHandler) - min := int64(ss.Min) - max := int64(ss.Max) - // Validate the send buffer size with min and max values. - if v > max { - v = max - } - - // Multiply it by factor of 2. - if v < math.MaxInt32/PacketOverheadFactor { - v *= PacketOverheadFactor - if v < min { - v = min - } - } else { - v = math.MaxInt32 + so.receiveBufSizeMu.Lock() + defer so.receiveBufSizeMu.Unlock() + if notify { + receiveBufferSize = so.handler.OnSetReceiveBufferSize(receiveBufferSize, so.receiveBufferSize) } - - oldSz := so.receiveBufferSize.Load() - // Notify endpoint about change in buffer size. - newSz := so.handler.OnSetReceiveBufferSize(v, oldSz) - so.receiveBufferSize.Store(newSz) + so.receiveBufferSize = receiveBufferSize } diff --git a/pkg/tcpip/stack/addressable_endpoint_state.go b/pkg/tcpip/stack/addressable_endpoint_state.go index ce9cebdaa..ae0bb4ace 100644 --- a/pkg/tcpip/stack/addressable_endpoint_state.go +++ b/pkg/tcpip/stack/addressable_endpoint_state.go @@ -249,7 +249,7 @@ func (a *AddressableEndpointState) addAndAcquireAddressLocked(addr tcpip.Address // or we are adding a new temporary or permanent address. // // The address MUST be write locked at this point. - defer addrState.mu.Unlock() + defer addrState.mu.Unlock() // +checklocksforce if permanent { if addrState.mu.kind.IsPermanent() { diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go index 18e0d4374..068dab7ce 100644 --- a/pkg/tcpip/stack/conntrack.go +++ b/pkg/tcpip/stack/conntrack.go @@ -363,7 +363,7 @@ func (ct *ConnTrack) insertConn(conn *conn) { // Unlocking can happen in any order. ct.buckets[tupleBucket].mu.Unlock() if tupleBucket != replyBucket { - ct.buckets[replyBucket].mu.Unlock() + ct.buckets[replyBucket].mu.Unlock() // +checklocksforce } } @@ -405,16 +405,23 @@ func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, r *Route) bool { // validated if checksum offloading is off. It may require IP defrag if the // packets are fragmented. + var newAddr tcpip.Address + var newPort uint16 + + updateSRCFields := false + switch hook { case Prerouting, Output: if conn.manip == manipDestination { switch dir { case dirOriginal: - tcpHeader.SetDestinationPort(conn.reply.srcPort) - netHeader.SetDestinationAddress(conn.reply.srcAddr) + newPort = conn.reply.srcPort + newAddr = conn.reply.srcAddr case dirReply: - tcpHeader.SetSourcePort(conn.original.dstPort) - netHeader.SetSourceAddress(conn.original.dstAddr) + newPort = conn.original.dstPort + newAddr = conn.original.dstAddr + + updateSRCFields = true } pkt.NatDone = true } @@ -422,11 +429,13 @@ func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, r *Route) bool { if conn.manip == manipSource { switch dir { case dirOriginal: - tcpHeader.SetSourcePort(conn.reply.dstPort) - netHeader.SetSourceAddress(conn.reply.dstAddr) + newPort = conn.reply.dstPort + newAddr = conn.reply.dstAddr + + updateSRCFields = true case dirReply: - tcpHeader.SetDestinationPort(conn.original.srcPort) - netHeader.SetDestinationAddress(conn.original.srcAddr) + newPort = conn.original.srcPort + newAddr = conn.original.srcAddr } pkt.NatDone = true } @@ -437,29 +446,31 @@ func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, r *Route) bool { return false } + fullChecksum := false + updatePseudoHeader := false switch hook { case Prerouting, Input: case Output, Postrouting: // Calculate the TCP checksum and set it. - tcpHeader.SetChecksum(0) - length := uint16(len(tcpHeader) + pkt.Data().Size()) - xsum := header.PseudoHeaderChecksum(header.TCPProtocolNumber, netHeader.SourceAddress(), netHeader.DestinationAddress(), length) if pkt.GSOOptions.Type != GSONone && pkt.GSOOptions.NeedsCsum { - tcpHeader.SetChecksum(xsum) + updatePseudoHeader = true } else if r.RequiresTXTransportChecksum() { - xsum = header.ChecksumCombine(xsum, pkt.Data().AsRange().Checksum()) - tcpHeader.SetChecksum(^tcpHeader.CalculateChecksum(xsum)) + fullChecksum = true + updatePseudoHeader = true } default: panic(fmt.Sprintf("unrecognized hook = %s", hook)) } - // After modification, IPv4 packets need a valid checksum. - if pkt.NetworkProtocolNumber == header.IPv4ProtocolNumber { - netHeader := header.IPv4(pkt.NetworkHeader().View()) - netHeader.SetChecksum(0) - netHeader.SetChecksum(^netHeader.CalculateChecksum()) - } + rewritePacket( + netHeader, + tcpHeader, + updateSRCFields, + fullChecksum, + updatePseudoHeader, + newPort, + newAddr, + ) // Update the state of tcb. conn.mu.Lock() @@ -615,7 +626,7 @@ func (ct *ConnTrack) reapTupleLocked(tuple *tuple, bucket int, now time.Time) bo // Don't re-unlock if both tuples are in the same bucket. if differentBuckets { - ct.buckets[replyBucket].mu.Unlock() + ct.buckets[replyBucket].mu.Unlock() // +checklocksforce } return true diff --git a/pkg/tcpip/stack/iptables_targets.go b/pkg/tcpip/stack/iptables_targets.go index 91e266de8..96cc899bb 100644 --- a/pkg/tcpip/stack/iptables_targets.go +++ b/pkg/tcpip/stack/iptables_targets.go @@ -133,29 +133,23 @@ func (rt *RedirectTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, r switch protocol := pkt.TransportProtocolNumber; protocol { case header.UDPProtocolNumber: udpHeader := header.UDP(pkt.TransportHeader().View()) - udpHeader.SetDestinationPort(rt.Port) - // Calculate UDP checksum and set it. if hook == Output { - udpHeader.SetChecksum(0) - netHeader := pkt.Network() - netHeader.SetDestinationAddress(address) - // Only calculate the checksum if offloading isn't supported. - if r.RequiresTXTransportChecksum() { - length := uint16(pkt.Size()) - uint16(len(pkt.NetworkHeader().View())) - xsum := header.PseudoHeaderChecksum(protocol, netHeader.SourceAddress(), netHeader.DestinationAddress(), length) - xsum = header.ChecksumCombine(xsum, pkt.Data().AsRange().Checksum()) - udpHeader.SetChecksum(^udpHeader.CalculateChecksum(xsum)) - } + requiresChecksum := r.RequiresTXTransportChecksum() + rewritePacket( + pkt.Network(), + udpHeader, + false, /* updateSRCFields */ + requiresChecksum, + requiresChecksum, + rt.Port, + address, + ) + } else { + udpHeader.SetDestinationPort(rt.Port) } - // After modification, IPv4 packets need a valid checksum. - if pkt.NetworkProtocolNumber == header.IPv4ProtocolNumber { - netHeader := header.IPv4(pkt.NetworkHeader().View()) - netHeader.SetChecksum(0) - netHeader.SetChecksum(^netHeader.CalculateChecksum()) - } pkt.NatDone = true case header.TCPProtocolNumber: if ct == nil { @@ -214,26 +208,18 @@ func (st *SNATTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, r *Rou switch protocol := pkt.TransportProtocolNumber; protocol { case header.UDPProtocolNumber: - udpHeader := header.UDP(pkt.TransportHeader().View()) - udpHeader.SetChecksum(0) - udpHeader.SetSourcePort(st.Port) - netHeader := pkt.Network() - netHeader.SetSourceAddress(st.Addr) - // Only calculate the checksum if offloading isn't supported. - if r.RequiresTXTransportChecksum() { - length := uint16(pkt.Size()) - uint16(len(pkt.NetworkHeader().View())) - xsum := header.PseudoHeaderChecksum(protocol, netHeader.SourceAddress(), netHeader.DestinationAddress(), length) - xsum = header.ChecksumCombine(xsum, pkt.Data().AsRange().Checksum()) - udpHeader.SetChecksum(^udpHeader.CalculateChecksum(xsum)) - } + requiresChecksum := r.RequiresTXTransportChecksum() + rewritePacket( + pkt.Network(), + header.UDP(pkt.TransportHeader().View()), + true, /* updateSRCFields */ + requiresChecksum, + requiresChecksum, + st.Port, + st.Addr, + ) - // After modification, IPv4 packets need a valid checksum. - if pkt.NetworkProtocolNumber == header.IPv4ProtocolNumber { - netHeader := header.IPv4(pkt.NetworkHeader().View()) - netHeader.SetChecksum(0) - netHeader.SetChecksum(^netHeader.CalculateChecksum()) - } pkt.NatDone = true case header.TCPProtocolNumber: if ct == nil { @@ -252,3 +238,42 @@ func (st *SNATTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, r *Rou return RuleAccept, 0 } + +func rewritePacket(n header.Network, t header.ChecksummableTransport, updateSRCFields, fullChecksum, updatePseudoHeader bool, newPort uint16, newAddr tcpip.Address) { + if updateSRCFields { + if fullChecksum { + t.SetSourcePortWithChecksumUpdate(newPort) + } else { + t.SetSourcePort(newPort) + } + } else { + if fullChecksum { + t.SetDestinationPortWithChecksumUpdate(newPort) + } else { + t.SetDestinationPort(newPort) + } + } + + if updatePseudoHeader { + var oldAddr tcpip.Address + if updateSRCFields { + oldAddr = n.SourceAddress() + } else { + oldAddr = n.DestinationAddress() + } + + t.UpdateChecksumPseudoHeaderAddress(oldAddr, newAddr, fullChecksum) + } + + if checksummableNetHeader, ok := n.(header.ChecksummableNetwork); ok { + if updateSRCFields { + checksummableNetHeader.SetSourceAddressWithChecksumUpdate(newAddr) + } else { + checksummableNetHeader.SetDestinationAddressWithChecksumUpdate(newAddr) + } + } else if updateSRCFields { + n.SetSourceAddress(newAddr) + } else { + n.SetDestinationAddress(newAddr) + } +} diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go index bd512d312..4d5431da1 100644 --- a/pkg/tcpip/stack/ndp_test.go +++ b/pkg/tcpip/stack/ndp_test.go @@ -1152,6 +1152,39 @@ func raBufWithPI(ip tcpip.Address, rl uint16, prefix tcpip.AddressWithPrefix, on }) } +// raBufWithRIO returns a valid NDP Router Advertisement with a single Route +// Information option. +// +// All fields in the RA will be zero except the RIO option. +func raBufWithRIO(t *testing.T, ip tcpip.Address, prefix tcpip.AddressWithPrefix, lifetimeSeconds uint32, prf header.NDPRoutePreference) *stack.PacketBuffer { + // buf will hold the route information option after the Type and Length + // fields. + // + // 2.3. Route Information Option + // + // 0 1 2 3 + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | Type | Length | Prefix Length |Resvd|Prf|Resvd| + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | Route Lifetime | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | Prefix (Variable Length) | + // . . + // . . + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + var buf [22]byte + buf[0] = uint8(prefix.PrefixLen) + buf[1] = byte(prf) << 3 + binary.BigEndian.PutUint32(buf[2:], lifetimeSeconds) + if n := copy(buf[6:], prefix.Address); n != len(prefix.Address) { + t.Fatalf("got copy(...) = %d, want = %d", n, len(prefix.Address)) + } + return raBufWithOpts(ip, 0 /* router lifetime */, header.NDPOptionsSerializer{ + header.NDPRouteInformation(buf[:]), + }) +} + func TestDynamicConfigurationsDisabled(t *testing.T) { const ( nicID = 1 @@ -1308,8 +1341,8 @@ func boolToUint64(v bool) uint64 { return 0 } -func checkOffLinkRouteEvent(e ndpOffLinkRouteEvent, nicID tcpip.NICID, router tcpip.Address, prf header.NDPRoutePreference, updated bool) string { - return cmp.Diff(ndpOffLinkRouteEvent{nicID: nicID, subnet: header.IPv6EmptySubnet, router: router, prf: prf, updated: updated}, e, cmp.AllowUnexported(e)) +func checkOffLinkRouteEvent(e ndpOffLinkRouteEvent, nicID tcpip.NICID, subnet tcpip.Subnet, router tcpip.Address, prf header.NDPRoutePreference, updated bool) string { + return cmp.Diff(ndpOffLinkRouteEvent{nicID: nicID, subnet: subnet, router: router, prf: prf, updated: updated}, e, cmp.AllowUnexported(e)) } func testWithRAs(t *testing.T, f func(*testing.T, ipv6.HandleRAsConfiguration, bool)) { @@ -1342,126 +1375,171 @@ func testWithRAs(t *testing.T, f func(*testing.T, ipv6.HandleRAsConfiguration, b } } -func TestRouterDiscovery(t *testing.T) { +func TestOffLinkRouteDiscovery(t *testing.T) { const nicID = 1 - testWithRAs(t, func(t *testing.T, handleRAs ipv6.HandleRAsConfiguration, forwarding bool) { - ndpDisp := ndpDispatcher{ - offLinkRouteC: make(chan ndpOffLinkRouteEvent, 1), - } - e := channel.New(0, 1280, linkAddr1) - clock := faketime.NewManualClock() - s := stack.New(stack.Options{ - NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{ - NDPConfigs: ipv6.NDPConfigurations{ - HandleRAs: handleRAs, - DiscoverDefaultRouters: true, - }, - NDPDisp: &ndpDisp, - })}, - Clock: clock, - }) + moreSpecificPrefix := tcpip.AddressWithPrefix{Address: testutil.MustParse6("a00::"), PrefixLen: 16} + tests := []struct { + name string - expectOffLinkRouteEvent := func(addr tcpip.Address, prf header.NDPRoutePreference, updated bool) { - t.Helper() + discoverDefaultRouters bool + discoverMoreSpecificRoutes bool - select { - case e := <-ndpDisp.offLinkRouteC: - if diff := checkOffLinkRouteEvent(e, nicID, addr, prf, updated); diff != "" { - t.Errorf("off-link route event mismatch (-want +got):\n%s", diff) + dest tcpip.Subnet + ra func(*testing.T, tcpip.Address, uint16, header.NDPRoutePreference) *stack.PacketBuffer + }{ + { + name: "Default router discovery", + discoverDefaultRouters: true, + discoverMoreSpecificRoutes: false, + dest: header.IPv6EmptySubnet, + ra: func(_ *testing.T, router tcpip.Address, lifetimeSeconds uint16, prf header.NDPRoutePreference) *stack.PacketBuffer { + return raBufWithPrf(router, lifetimeSeconds, prf) + }, + }, + { + name: "More-specific route discovery", + discoverDefaultRouters: false, + discoverMoreSpecificRoutes: true, + dest: moreSpecificPrefix.Subnet(), + ra: func(t *testing.T, router tcpip.Address, lifetimeSeconds uint16, prf header.NDPRoutePreference) *stack.PacketBuffer { + return raBufWithRIO(t, router, moreSpecificPrefix, uint32(lifetimeSeconds), prf) + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + testWithRAs(t, func(t *testing.T, handleRAs ipv6.HandleRAsConfiguration, forwarding bool) { + ndpDisp := ndpDispatcher{ + offLinkRouteC: make(chan ndpOffLinkRouteEvent, 1), } - default: - t.Fatal("expected router discovery event") - } - } + e := channel.New(0, 1280, linkAddr1) + clock := faketime.NewManualClock() + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{ + NDPConfigs: ipv6.NDPConfigurations{ + HandleRAs: handleRAs, + DiscoverDefaultRouters: test.discoverDefaultRouters, + DiscoverMoreSpecificRoutes: test.discoverMoreSpecificRoutes, + }, + NDPDisp: &ndpDisp, + })}, + Clock: clock, + }) - expectAsyncOffLinkRouteInvalidationEvent := func(addr tcpip.Address, timeout time.Duration) { - t.Helper() + expectOffLinkRouteEvent := func(addr tcpip.Address, prf header.NDPRoutePreference, updated bool) { + t.Helper() - clock.Advance(timeout) - select { - case e := <-ndpDisp.offLinkRouteC: - var prf header.NDPRoutePreference - if diff := checkOffLinkRouteEvent(e, nicID, addr, prf, false); diff != "" { - t.Errorf("off-link route event mismatch (-want +got):\n%s", diff) + select { + case e := <-ndpDisp.offLinkRouteC: + if diff := checkOffLinkRouteEvent(e, nicID, test.dest, addr, prf, updated); diff != "" { + t.Errorf("off-link route event mismatch (-want +got):\n%s", diff) + } + default: + t.Fatal("expected router discovery event") + } } - default: - t.Fatal("timed out waiting for router discovery event") - } - } - if err := s.SetForwardingDefaultAndAllNICs(ipv6.ProtocolNumber, forwarding); err != nil { - t.Fatalf("SetForwardingDefaultAndAllNICs(%d, %t): %s", ipv6.ProtocolNumber, forwarding, err) - } + expectAsyncOffLinkRouteInvalidationEvent := func(addr tcpip.Address, timeout time.Duration) { + t.Helper() - if err := s.CreateNIC(nicID, e); err != nil { - t.Fatalf("CreateNIC(%d, _): %s", nicID, err) - } + clock.Advance(timeout) + select { + case e := <-ndpDisp.offLinkRouteC: + var prf header.NDPRoutePreference + if diff := checkOffLinkRouteEvent(e, nicID, test.dest, addr, prf, false); diff != "" { + t.Errorf("off-link route event mismatch (-want +got):\n%s", diff) + } + default: + t.Fatal("timed out waiting for router discovery event") + } + } - // Rx an RA from lladdr2 with zero lifetime. It should not be - // remembered. - e.InjectInbound(header.IPv6ProtocolNumber, raBufSimple(llAddr2, 0)) - select { - case <-ndpDisp.offLinkRouteC: - t.Fatal("unexpectedly updated an off-link route with 0 lifetime") - default: - } + if err := s.SetForwardingDefaultAndAllNICs(ipv6.ProtocolNumber, forwarding); err != nil { + t.Fatalf("SetForwardingDefaultAndAllNICs(%d, %t): %s", ipv6.ProtocolNumber, forwarding, err) + } - // Rx an RA from lladdr2 with a huge lifetime and reserved preference value - // (which should be interpreted as the default (medium) preference value). - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPrf(llAddr2, 1000, header.ReservedRoutePreference)) - expectOffLinkRouteEvent(llAddr2, header.MediumRoutePreference, true) - - // Rx an RA from another router (lladdr3) with non-zero lifetime and - // non-default preference value. - const l3LifetimeSeconds = 6 - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPrf(llAddr3, l3LifetimeSeconds, header.HighRoutePreference)) - expectOffLinkRouteEvent(llAddr3, header.HighRoutePreference, true) - - // Rx an RA from lladdr2 with lesser lifetime and default (medium) - // preference value. - const l2LifetimeSeconds = 2 - e.InjectInbound(header.IPv6ProtocolNumber, raBufSimple(llAddr2, l2LifetimeSeconds)) - select { - case <-ndpDisp.offLinkRouteC: - t.Fatal("should not receive a off-link route event when updating lifetimes for known routers") - default: - } + if err := s.CreateNIC(nicID, e); err != nil { + t.Fatalf("CreateNIC(%d, _): %s", nicID, err) + } - // Rx an RA from lladdr2 with a different preference. - e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPrf(llAddr2, l2LifetimeSeconds, header.LowRoutePreference)) - expectOffLinkRouteEvent(llAddr2, header.LowRoutePreference, true) - - // Wait for lladdr2's router invalidation job to execute. The lifetime - // of the router should have been updated to the most recent (smaller) - // lifetime. - // - // Wait for the normal lifetime plus an extra bit for the - // router to get invalidated. If we don't get an invalidation - // event after this time, then something is wrong. - expectAsyncOffLinkRouteInvalidationEvent(llAddr2, l2LifetimeSeconds*time.Second) - - // Rx an RA from lladdr2 with huge lifetime. - e.InjectInbound(header.IPv6ProtocolNumber, raBufSimple(llAddr2, 1000)) - expectOffLinkRouteEvent(llAddr2, header.MediumRoutePreference, true) - - // Rx an RA from lladdr2 with zero lifetime. It should be invalidated. - e.InjectInbound(header.IPv6ProtocolNumber, raBufSimple(llAddr2, 0)) - expectOffLinkRouteEvent(llAddr2, header.MediumRoutePreference, false) - - // Wait for lladdr3's router invalidation job to execute. The lifetime - // of the router should have been updated to the most recent (smaller) - // lifetime. - // - // Wait for the normal lifetime plus an extra bit for the - // router to get invalidated. If we don't get an invalidation - // event after this time, then something is wrong. - expectAsyncOffLinkRouteInvalidationEvent(llAddr3, l3LifetimeSeconds*time.Second) - }) + // Rx an RA from lladdr2 with zero lifetime. It should not be + // remembered. + e.InjectInbound(header.IPv6ProtocolNumber, test.ra(t, llAddr2, 0, header.MediumRoutePreference)) + select { + case <-ndpDisp.offLinkRouteC: + t.Fatal("unexpectedly updated an off-link route with 0 lifetime") + default: + } + + // Discover an off-link route through llAddr2. + e.InjectInbound(header.IPv6ProtocolNumber, test.ra(t, llAddr2, 1000, header.ReservedRoutePreference)) + if test.discoverMoreSpecificRoutes { + // The reserved value is considered invalid with more-specific route + // discovery so we inject the same packet but with the default + // (medium) preference value. + select { + case <-ndpDisp.offLinkRouteC: + t.Fatal("unexpectedly updated an off-link route with a reserved preference value") + default: + } + e.InjectInbound(header.IPv6ProtocolNumber, test.ra(t, llAddr2, 1000, header.MediumRoutePreference)) + } + expectOffLinkRouteEvent(llAddr2, header.MediumRoutePreference, true) + + // Rx an RA from another router (lladdr3) with non-zero lifetime and + // non-default preference value. + const l3LifetimeSeconds = 6 + e.InjectInbound(header.IPv6ProtocolNumber, test.ra(t, llAddr3, l3LifetimeSeconds, header.HighRoutePreference)) + expectOffLinkRouteEvent(llAddr3, header.HighRoutePreference, true) + + // Rx an RA from lladdr2 with lesser lifetime and default (medium) + // preference value. + const l2LifetimeSeconds = 2 + e.InjectInbound(header.IPv6ProtocolNumber, test.ra(t, llAddr2, l2LifetimeSeconds, header.MediumRoutePreference)) + select { + case <-ndpDisp.offLinkRouteC: + t.Fatal("should not receive a off-link route event when updating lifetimes for known routers") + default: + } + + // Rx an RA from lladdr2 with a different preference. + e.InjectInbound(header.IPv6ProtocolNumber, test.ra(t, llAddr2, l2LifetimeSeconds, header.LowRoutePreference)) + expectOffLinkRouteEvent(llAddr2, header.LowRoutePreference, true) + + // Wait for lladdr2's router invalidation job to execute. The lifetime + // of the router should have been updated to the most recent (smaller) + // lifetime. + // + // Wait for the normal lifetime plus an extra bit for the + // router to get invalidated. If we don't get an invalidation + // event after this time, then something is wrong. + expectAsyncOffLinkRouteInvalidationEvent(llAddr2, l2LifetimeSeconds*time.Second) + + // Rx an RA from lladdr2 with huge lifetime. + e.InjectInbound(header.IPv6ProtocolNumber, test.ra(t, llAddr2, 1000, header.MediumRoutePreference)) + expectOffLinkRouteEvent(llAddr2, header.MediumRoutePreference, true) + + // Rx an RA from lladdr2 with zero lifetime. It should be invalidated. + e.InjectInbound(header.IPv6ProtocolNumber, test.ra(t, llAddr2, 0, header.MediumRoutePreference)) + expectOffLinkRouteEvent(llAddr2, header.MediumRoutePreference, false) + + // Wait for lladdr3's router invalidation job to execute. The lifetime + // of the router should have been updated to the most recent (smaller) + // lifetime. + // + // Wait for the normal lifetime plus an extra bit for the + // router to get invalidated. If we don't get an invalidation + // event after this time, then something is wrong. + expectAsyncOffLinkRouteInvalidationEvent(llAddr3, l3LifetimeSeconds*time.Second) + }) + }) + } } // TestRouterDiscoveryMaxRouters tests that only -// ipv6.MaxDiscoveredDefaultRouters discovered routers are remembered. +// ipv6.MaxDiscoveredOffLinkRoutes discovered routers are remembered. func TestRouterDiscoveryMaxRouters(t *testing.T) { const nicID = 1 @@ -1484,17 +1562,17 @@ func TestRouterDiscoveryMaxRouters(t *testing.T) { } // Receive an RA from 2 more than the max number of discovered routers. - for i := 1; i <= ipv6.MaxDiscoveredDefaultRouters+2; i++ { + for i := 1; i <= ipv6.MaxDiscoveredOffLinkRoutes+2; i++ { linkAddr := []byte{2, 2, 3, 4, 5, 0} linkAddr[5] = byte(i) llAddr := header.LinkLocalAddr(tcpip.LinkAddress(linkAddr)) e.InjectInbound(header.IPv6ProtocolNumber, raBufSimple(llAddr, 5)) - if i <= ipv6.MaxDiscoveredDefaultRouters { + if i <= ipv6.MaxDiscoveredOffLinkRoutes { select { case e := <-ndpDisp.offLinkRouteC: - if diff := checkOffLinkRouteEvent(e, nicID, llAddr, header.MediumRoutePreference, true); diff != "" { + if diff := checkOffLinkRouteEvent(e, nicID, header.IPv6EmptySubnet, llAddr, header.MediumRoutePreference, true); diff != "" { t.Errorf("off-link route event mismatch (-want +got):\n%s", diff) } default: @@ -4583,7 +4661,7 @@ func TestNoCleanupNDPStateWhenForwardingEnabled(t *testing.T) { ) select { case e := <-ndpDisp.offLinkRouteC: - if diff := checkOffLinkRouteEvent(e, nicID, llAddr3, header.MediumRoutePreference, true /* discovered */); diff != "" { + if diff := checkOffLinkRouteEvent(e, nicID, header.IPv6EmptySubnet, llAddr3, header.MediumRoutePreference, true /* discovered */); diff != "" { t.Errorf("off-link route event mismatch (-want +got):\n%s", diff) } default: @@ -5278,8 +5356,9 @@ func TestRouterSolicitation(t *testing.T) { RandSource: &randSource, }) - if err := s.CreateNIC(nicID, &e); err != nil { - t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) + opts := stack.NICOptions{Disabled: true} + if err := s.CreateNICWithOptions(nicID, &e, opts); err != nil { + t.Fatalf("CreateNICWithOptions(%d, _, %#v) = %s", nicID, opts, err) } if addr := test.nicAddr; addr != "" { @@ -5288,6 +5367,10 @@ func TestRouterSolicitation(t *testing.T) { } } + if err := s.EnableNIC(nicID); err != nil { + t.Fatalf("EnableNIC(%d): %s", nicID, err) + } + // Make sure each RS is sent at the right time. remaining := test.maxRtrSolicit if remaining != 0 { diff --git a/pkg/tcpip/stack/tcp.go b/pkg/tcpip/stack/tcp.go index e90c1a770..90a8ba6cf 100644 --- a/pkg/tcpip/stack/tcp.go +++ b/pkg/tcpip/stack/tcp.go @@ -380,9 +380,6 @@ type TCPSndBufState struct { // SndClosed indicates that the endpoint has been closed for sends. SndClosed bool - // SndBufInQueue is the number of bytes in the send queue. - SndBufInQueue seqnum.Size - // PacketTooBigCount is used to notify the main protocol routine how // many times a "packet too big" control packet is received. PacketTooBigCount int diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go index cb316d27a..f9a15efb2 100644 --- a/pkg/tcpip/transport/icmp/endpoint.go +++ b/pkg/tcpip/transport/icmp/endpoint.go @@ -213,6 +213,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult // reacquire the mutex in exclusive mode. // // Returns true for retry if preparation should be retried. +// +checklocks:e.mu func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip.Error) { switch e.state { case stateInitial: @@ -229,10 +230,8 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip } e.mu.RUnlock() - defer e.mu.RLock() - e.mu.Lock() - defer e.mu.Unlock() + defer e.mu.DowngradeLock() // The state changed when we released the shared locked and re-acquired // it in exclusive mode. Try again. diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index b6687911a..ab5da987a 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -132,7 +132,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProt // headers included. Because they're write-only, We don't need to // register with the stack. if !associated { - e.ops.SetReceiveBufferSize(0, false) + e.ops.SetReceiveBufferSize(0, false /* notify */) e.waiterQueue = nil return e, nil } diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index d807b13b7..aa413ad05 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -330,7 +330,9 @@ func (l *listenContext) performHandshake(s *segment, opts *header.TCPSynOptions, } ep := h.ep - if err := h.complete(); err != nil { + // N.B. the endpoint is generated above by startHandshake, and will be + // returned locked. This first call is forced. + if err := h.complete(); err != nil { // +checklocksforce ep.stack.Stats().TCP.FailedConnectionAttempts.Increment() ep.stats.FailedConnectionAttempts.Increment() l.cleanupFailedHandshake(h) @@ -364,6 +366,7 @@ func (l *listenContext) closeAllPendingEndpoints() { } // Precondition: h.ep.mu must be held. +// +checklocks:h.ep.mu func (l *listenContext) cleanupFailedHandshake(h *handshake) { e := h.ep e.mu.Unlock() @@ -504,7 +507,9 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header } go func() { - if err := h.complete(); err != nil { + // Note that startHandshake returns a locked endpoint. The + // force call here just makes it so. + if err := h.complete(); err != nil { // +checklocksforce e.stack.Stats().TCP.FailedConnectionAttempts.Increment() e.stats.FailedConnectionAttempts.Increment() ctx.cleanupFailedHandshake(h) diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index f86450298..93ed161f9 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -511,6 +511,7 @@ func (h *handshake) start() { } // complete completes the TCP 3-way handshake initiated by h.start(). +// +checklocks:h.ep.mu func (h *handshake) complete() tcpip.Error { // Set up the wakers. var s sleep.Sleeper @@ -909,30 +910,13 @@ func (e *endpoint) sendRaw(data buffer.VectorisedView, flags header.TCPFlags, se return err } -func (e *endpoint) handleWrite() { - e.sndQueueInfo.sndQueueMu.Lock() - next := e.drainSendQueueLocked() - e.sndQueueInfo.sndQueueMu.Unlock() - - e.sendData(next) -} - -// Move packets from send queue to send list. -// -// Precondition: e.sndBufMu must be locked. -func (e *endpoint) drainSendQueueLocked() *segment { - first := e.sndQueueInfo.sndQueue.Front() - if first != nil { - e.snd.writeList.PushBackList(&e.sndQueueInfo.sndQueue) - e.sndQueueInfo.SndBufInQueue = 0 - } - return first -} - // Precondition: e.mu must be locked. func (e *endpoint) sendData(next *segment) { // Initialize the next segment to write if it's currently nil. if e.snd.writeNext == nil { + if next == nil { + return + } e.snd.writeNext = next } @@ -940,17 +924,6 @@ func (e *endpoint) sendData(next *segment) { e.snd.sendData() } -func (e *endpoint) handleClose() { - if !e.EndpointState().connected() { - return - } - // Drain the send queue. - e.handleWrite() - - // Mark send side as closed. - e.snd.Closed = true -} - // resetConnectionLocked puts the endpoint in an error state with the given // error code and sends a RST if and only if the error is not ErrConnectionReset // indicating that the connection is being reset due to receiving a RST. This @@ -1311,42 +1284,45 @@ func (e *endpoint) disableKeepaliveTimer() { e.keepalive.Unlock() } -// protocolMainLoop is the main loop of the TCP protocol. It runs in its own -// goroutine and is responsible for sending segments and handling received -// segments. -func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{}) tcpip.Error { - e.mu.Lock() - var closeTimer tcpip.Timer - var closeWaker sleep.Waker - - epilogue := func() { - // e.mu is expected to be hold upon entering this section. - if e.snd != nil { - e.snd.resendTimer.cleanup() - e.snd.probeTimer.cleanup() - e.snd.reorderTimer.cleanup() - } +// protocolMainLoopDone is called at the end of protocolMainLoop. +// +checklocksrelease:e.mu +func (e *endpoint) protocolMainLoopDone(closeTimer tcpip.Timer, closeWaker *sleep.Waker) { + if e.snd != nil { + e.snd.resendTimer.cleanup() + e.snd.probeTimer.cleanup() + e.snd.reorderTimer.cleanup() + } - if closeTimer != nil { - closeTimer.Stop() - } + if closeTimer != nil { + closeTimer.Stop() + } - e.completeWorkerLocked() + e.completeWorkerLocked() - if e.drainDone != nil { - close(e.drainDone) - } + if e.drainDone != nil { + close(e.drainDone) + } - e.mu.Unlock() + e.mu.Unlock() - e.drainClosingSegmentQueue() + e.drainClosingSegmentQueue() - // When the protocol loop exits we should wake up our waiters. - e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents) - } + // When the protocol loop exits we should wake up our waiters. + e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents) +} + +// protocolMainLoop is the main loop of the TCP protocol. It runs in its own +// goroutine and is responsible for sending segments and handling received +// segments. +func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{}) tcpip.Error { + var ( + closeTimer tcpip.Timer + closeWaker sleep.Waker + ) + e.mu.Lock() if handshake { - if err := e.h.complete(); err != nil { + if err := e.h.complete(); err != nil { // +checklocksforce e.lastErrorMu.Lock() e.lastError = err e.lastErrorMu.Unlock() @@ -1355,8 +1331,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ e.hardError = err e.workerCleanup = true - // Lock released below. - epilogue() + e.protocolMainLoopDone(closeTimer, &closeWaker) return err } } @@ -1402,14 +1377,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ { w: &e.sndQueueInfo.sndWaker, f: func() tcpip.Error { - e.handleWrite() - return nil - }, - }, - { - w: &e.sndQueueInfo.sndCloseWaker, - f: func() tcpip.Error { - e.handleClose() + e.sendData(nil /* next */) return nil }, }, @@ -1507,7 +1475,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ // Only block the worker if the endpoint // is not in closed state or error state. close(e.drainDone) - e.mu.Unlock() + e.mu.Unlock() // +checklocksforce <-e.undrain e.mu.Lock() } @@ -1568,8 +1536,6 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ if err != nil { e.resetConnectionLocked(err) } - // Lock released below. - epilogue() } loop: @@ -1593,6 +1559,7 @@ loop: // just want to terminate the loop and cleanup the // endpoint. cleanupOnError(nil) + e.protocolMainLoopDone(closeTimer, &closeWaker) return nil case StateTimeWait: fallthrough @@ -1601,6 +1568,7 @@ loop: default: if err := funcs[v].f(); err != nil { cleanupOnError(err) + e.protocolMainLoopDone(closeTimer, &closeWaker) return nil } } @@ -1624,13 +1592,13 @@ loop: // Handle any StateError transition from StateTimeWait. if e.EndpointState() == StateError { cleanupOnError(nil) + e.protocolMainLoopDone(closeTimer, &closeWaker) return nil } e.transitionToStateCloseLocked() - // Lock released below. - epilogue() + e.protocolMainLoopDone(closeTimer, &closeWaker) // A new SYN was received during TIME_WAIT and we need to abort // the timewait and redirect the segment to the listener queue @@ -1700,6 +1668,7 @@ func (e *endpoint) handleTimeWaitSegments() (extendTimeWait bool, reuseTW func() // should be executed after releasing the endpoint registrations. This is // done in cases where a new SYN is received during TIME_WAIT that carries // a sequence number larger than one see on the connection. +// +checklocks:e.mu func (e *endpoint) doTimeWait() (twReuse func()) { // Trigger a 2 * MSL time wait state. During this period // we will drop all incoming segments. diff --git a/pkg/tcpip/transport/tcp/dispatcher.go b/pkg/tcpip/transport/tcp/dispatcher.go index dff7cb89c..7d110516b 100644 --- a/pkg/tcpip/transport/tcp/dispatcher.go +++ b/pkg/tcpip/transport/tcp/dispatcher.go @@ -127,7 +127,7 @@ func (p *processor) start(wg *sync.WaitGroup) { case !ep.segmentQueue.empty(): p.epQ.enqueue(ep) } - ep.mu.Unlock() + ep.mu.Unlock() // +checklocksforce } else { ep.newSegmentWaker.Assert() } diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index a27e2110b..ebc88d6c3 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -293,16 +293,9 @@ type sndQueueInfo struct { sndQueueMu sync.Mutex `state:"nosave"` stack.TCPSndBufState - // sndQueue holds segments that are ready to be sent. - sndQueue segmentList `state:"wait"` - - // sndWaker is used to signal the protocol goroutine when segments are - // added to the `sndQueue`. + // sndWaker is used to signal the protocol goroutine when there may be + // segments that need to be sent. sndWaker sleep.Waker `state:"manual"` - - // sndCloseWaker is used to notify the protocol goroutine when the send - // side is closed. - sndCloseWaker sleep.Waker `state:"manual"` } // rcvQueueInfo contains the endpoint's rcvQueue and associated metadata. @@ -671,6 +664,7 @@ func calculateAdvertisedMSS(userMSS uint16, r *stack.Route) uint16 { // The assumption behind spinning here being that background packet processing // should not be holding the lock for long and spinning reduces latency as we // avoid an expensive sleep/wakeup of of the syscall goroutine). +// +checklocksacquire:e.mu func (e *endpoint) LockUser() { for { // Try first if the sock is locked then check if it's owned @@ -690,7 +684,7 @@ func (e *endpoint) LockUser() { continue } atomic.StoreUint32(&e.ownedByUser, 1) - return + return // +checklocksforce } } @@ -707,7 +701,7 @@ func (e *endpoint) LockUser() { // protocol goroutine altogether. // // Precondition: e.LockUser() must have been called before calling e.UnlockUser() -// +checklocks:e.mu +// +checklocksrelease:e.mu func (e *endpoint) UnlockUser() { // Lock segment queue before checking so that we avoid a race where // segments can be queued between the time we check if queue is empty @@ -743,12 +737,13 @@ func (e *endpoint) UnlockUser() { } // StopWork halts packet processing. Only to be used in tests. +// +checklocksacquire:e.mu func (e *endpoint) StopWork() { e.mu.Lock() } // ResumeWork resumes packet processing. Only to be used in tests. -// +checklocks:e.mu +// +checklocksrelease:e.mu func (e *endpoint) ResumeWork() { e.mu.Unlock() } @@ -759,7 +754,7 @@ func (e *endpoint) ResumeWork() { // // Precondition: e.mu must be held to call this method. func (e *endpoint) setEndpointState(state EndpointState) { - oldstate := EndpointState(atomic.LoadUint32(&e.state)) + oldstate := EndpointState(atomic.SwapUint32(&e.state, uint32(state))) switch state { case StateEstablished: e.stack.Stats().TCP.CurrentEstablished.Increment() @@ -776,7 +771,6 @@ func (e *endpoint) setEndpointState(state EndpointState) { e.stack.Stats().TCP.CurrentEstablished.Decrement() } } - atomic.StoreUint32(&e.state, uint32(state)) } // EndpointState returns the current state of the endpoint. @@ -1487,87 +1481,101 @@ func (e *endpoint) isEndpointWritableLocked() (int, tcpip.Error) { return avail, nil } -// Write writes data to the endpoint's peer. -func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { - // Linux completely ignores any address passed to sendto(2) for TCP sockets - // (without the MSG_FASTOPEN flag). Corking is unimplemented, so opts.More - // and opts.EndOfRecord are also ignored. +// readFromPayloader reads a slice from the Payloader. +// +checklocks:e.mu +// +checklocks:e.sndQueueInfo.sndQueueMu +func (e *endpoint) readFromPayloader(p tcpip.Payloader, opts tcpip.WriteOptions, avail int) ([]byte, tcpip.Error) { + // We can release locks while copying data. + // + // This is not possible if atomic is set, because we can't allow the + // available buffer space to be consumed by some other caller while we + // are copying data in. + if !opts.Atomic { + e.sndQueueInfo.sndQueueMu.Unlock() + defer e.sndQueueInfo.sndQueueMu.Lock() - e.LockUser() - defer e.UnlockUser() + e.UnlockUser() + defer e.LockUser() + } - nextSeg, n, err := func() (*segment, int, tcpip.Error) { - e.sndQueueInfo.sndQueueMu.Lock() - defer e.sndQueueInfo.sndQueueMu.Unlock() + // Fetch data. + if l := p.Len(); l < avail { + avail = l + } + if avail == 0 { + return nil, nil + } + v := make([]byte, avail) + n, err := p.Read(v) + if err != nil && err != io.EOF { + return nil, &tcpip.ErrBadBuffer{} + } + return v[:n], nil +} + +// queueSegment reads data from the payloader and returns a segment to be sent. +// +checklocks:e.mu +func (e *endpoint) queueSegment(p tcpip.Payloader, opts tcpip.WriteOptions) (*segment, int, tcpip.Error) { + e.sndQueueInfo.sndQueueMu.Lock() + defer e.sndQueueInfo.sndQueueMu.Unlock() + + avail, err := e.isEndpointWritableLocked() + if err != nil { + e.stats.WriteErrors.WriteClosed.Increment() + return nil, 0, err + } + + v, err := e.readFromPayloader(p, opts, avail) + if err != nil { + return nil, 0, err + } + + // Do not queue zero length segments. + if len(v) == 0 { + return nil, 0, nil + } + if !opts.Atomic { + // Since we released locks in between it's possible that the + // endpoint transitioned to a CLOSED/ERROR states so make + // sure endpoint is still writable before trying to write. avail, err := e.isEndpointWritableLocked() if err != nil { e.stats.WriteErrors.WriteClosed.Increment() return nil, 0, err } - v, err := func() ([]byte, tcpip.Error) { - // We can release locks while copying data. - // - // This is not possible if atomic is set, because we can't allow the - // available buffer space to be consumed by some other caller while we - // are copying data in. - if !opts.Atomic { - e.sndQueueInfo.sndQueueMu.Unlock() - defer e.sndQueueInfo.sndQueueMu.Lock() - - e.UnlockUser() - defer e.LockUser() - } - - // Fetch data. - if l := p.Len(); l < avail { - avail = l - } - if avail == 0 { - return nil, nil - } - v := make([]byte, avail) - n, err := p.Read(v) - if err != nil && err != io.EOF { - return nil, &tcpip.ErrBadBuffer{} - } - return v[:n], nil - }() - if len(v) == 0 || err != nil { - return nil, 0, err + // Discard any excess data copied in due to avail being reduced due + // to a simultaneous write call to the socket. + if avail < len(v) { + v = v[:avail] } + } - if !opts.Atomic { - // Since we released locks in between it's possible that the - // endpoint transitioned to a CLOSED/ERROR states so make - // sure endpoint is still writable before trying to write. - avail, err := e.isEndpointWritableLocked() - if err != nil { - e.stats.WriteErrors.WriteClosed.Increment() - return nil, 0, err - } + // Add data to the send queue. + s := newOutgoingSegment(e.TransportEndpointInfo.ID, e.stack.Clock(), v) + e.sndQueueInfo.SndBufUsed += len(v) + e.snd.writeList.PushBack(s) - // Discard any excess data copied in due to avail being reduced due - // to a simultaneous write call to the socket. - if avail < len(v) { - v = v[:avail] - } - } + return s, len(v), nil +} - // Add data to the send queue. - s := newOutgoingSegment(e.TransportEndpointInfo.ID, e.stack.Clock(), v) - e.sndQueueInfo.SndBufUsed += len(v) - e.sndQueueInfo.SndBufInQueue += seqnum.Size(len(v)) - e.sndQueueInfo.sndQueue.PushBack(s) +// Write writes data to the endpoint's peer. +func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { + // Linux completely ignores any address passed to sendto(2) for TCP sockets + // (without the MSG_FASTOPEN flag). Corking is unimplemented, so opts.More + // and opts.EndOfRecord are also ignored. + + e.LockUser() + defer e.UnlockUser() - return e.drainSendQueueLocked(), len(v), nil - }() // Return if either we didn't queue anything or if an error occurred while // attempting to queue data. + nextSeg, n, err := e.queueSegment(p, opts) if n == 0 || err != nil { return 0, err } + e.sendData(nextSeg) return int64(n), nil } @@ -2314,7 +2322,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) tcp // connection setting here. if !handshake { e.segmentQueue.mu.Lock() - for _, l := range []segmentList{e.segmentQueue.list, e.sndQueueInfo.sndQueue, e.snd.writeList} { + for _, l := range []segmentList{e.segmentQueue.list, e.snd.writeList} { for s := l.Front(); s != nil; s = s.Next() { s.id = e.TransportEndpointInfo.ID e.sndQueueInfo.sndWaker.Assert() @@ -2372,6 +2380,9 @@ func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) tcpip.Error { e.notifyProtocolGoroutine(notifyTickleWorker) return nil } + // Wake up any readers that maybe waiting for the stream to become + // readable. + e.waiterQueue.Notify(waiter.ReadableEvents) } // Close for write. @@ -2388,12 +2399,20 @@ func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) tcpip.Error { // Queue fin segment. s := newOutgoingSegment(e.TransportEndpointInfo.ID, e.stack.Clock(), nil) - e.sndQueueInfo.sndQueue.PushBack(s) - e.sndQueueInfo.SndBufInQueue++ + e.snd.writeList.PushBack(s) // Mark endpoint as closed. e.sndQueueInfo.SndClosed = true e.sndQueueInfo.sndQueueMu.Unlock() - e.handleClose() + + // Drain the send queue. + e.sendData(s) + + // Mark send side as closed. + e.snd.Closed = true + + // Wake up any writers that maybe waiting for the stream to become + // writable. + e.waiterQueue.Notify(waiter.WritableEvents) } return nil @@ -2501,6 +2520,7 @@ func (e *endpoint) listen(backlog int) tcpip.Error { // startAcceptedLoop sets up required state and starts a goroutine with the // main loop for accepted connections. +// +checklocksrelease:e.mu func (e *endpoint) startAcceptedLoop() { e.workerRunning = true e.mu.Unlock() diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go index 65c86823a..2e709ed78 100644 --- a/pkg/tcpip/transport/tcp/forwarder.go +++ b/pkg/tcpip/transport/tcp/forwarder.go @@ -164,8 +164,9 @@ func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, return nil, err } - // Start the protocol goroutine. - ep.startAcceptedLoop() + // Start the protocol goroutine. Note that the endpoint is returned + // from performHandshake locked. + ep.startAcceptedLoop() // +checklocksforce return ep, nil } diff --git a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go index ced3a9c58..84fb1c416 100644 --- a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go +++ b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go @@ -16,6 +16,7 @@ // iterations taking long enough that the retransmit timer can kick in causing // the congestion window measurements to fail due to extra packets etc. // +//go:build !race // +build !race package tcp_test diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 9bbe9bc3e..031f01357 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -2147,7 +2147,7 @@ func TestSmallSegReceiveWindowAdvertisement(t *testing.T) { // Bump up the receive buffer size such that, when the receive window grows, // the scaled window exceeds maxUint16. - c.EP.SocketOptions().SetReceiveBufferSize(int64(opt.Max), true) + c.EP.SocketOptions().SetReceiveBufferSize(int64(opt.Max)*2, true /* notify */) // Keep the payload size < segment overhead and such that it is a multiple // of the window scaled value. This enables the test to perform equality @@ -2267,7 +2267,7 @@ func TestNoWindowShrinking(t *testing.T) { initialWnd := header.TCP(header.IPv4(pkt).Payload()).WindowSize() << c.RcvdWindowScale initialLastAcceptableSeq := iss.Add(seqnum.Size(initialWnd)) // Now shrink the receive buffer to half its original size. - c.EP.SocketOptions().SetReceiveBufferSize(int64(rcvBufSize/2), true) + c.EP.SocketOptions().SetReceiveBufferSize(int64(rcvBufSize), true /* notify */) data := generateRandomPayload(t, rcvBufSize) // Send a payload of half the size of rcvBufSize. @@ -2523,7 +2523,7 @@ func TestScaledWindowAccept(t *testing.T) { defer ep.Close() // Set the window size greater than the maximum non-scaled window. - ep.SocketOptions().SetReceiveBufferSize(65535*3, true) + ep.SocketOptions().SetReceiveBufferSize(65535*6, true /* notify */) if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) @@ -2595,7 +2595,7 @@ func TestNonScaledWindowAccept(t *testing.T) { defer ep.Close() // Set the window size greater than the maximum non-scaled window. - ep.SocketOptions().SetReceiveBufferSize(65535*3, true) + ep.SocketOptions().SetReceiveBufferSize(65535*6, true /* notify */) if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) @@ -3188,7 +3188,7 @@ func TestPassiveSendMSSLessThanMTU(t *testing.T) { // Set the buffer size to a deterministic size so that we can check the // window scaling option. const rcvBufferSize = 0x20000 - ep.SocketOptions().SetReceiveBufferSize(rcvBufferSize, true) + ep.SocketOptions().SetReceiveBufferSize(rcvBufferSize*2, true /* notify */) if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) @@ -3327,7 +3327,7 @@ func TestSynOptionsOnActiveConnect(t *testing.T) { // window scaling option. const rcvBufferSize = 0x20000 const wndScale = 3 - c.EP.SocketOptions().SetReceiveBufferSize(rcvBufferSize, true) + c.EP.SocketOptions().SetReceiveBufferSize(rcvBufferSize*2, true /* notify */) // Start connection attempt. we, ch := waiter.NewChannelEntry(nil) @@ -3451,17 +3451,13 @@ loop: for { switch _, err := c.EP.Read(ioutil.Discard, tcpip.ReadOptions{}); err.(type) { case *tcpip.ErrWouldBlock: - select { - case <-ch: - // Expect the state to be StateError and subsequent Reads to fail with HardError. - _, err := c.EP.Read(ioutil.Discard, tcpip.ReadOptions{}) - if d := cmp.Diff(&tcpip.ErrConnectionReset{}, err); d != "" { - t.Fatalf("c.EP.Read() mismatch (-want +got):\n%s", d) - } - break loop - case <-time.After(1 * time.Second): - t.Fatalf("Timed out waiting for reset to arrive") + <-ch + // Expect the state to be StateError and subsequent Reads to fail with HardError. + _, err := c.EP.Read(ioutil.Discard, tcpip.ReadOptions{}) + if d := cmp.Diff(&tcpip.ErrConnectionReset{}, err); d != "" { + t.Fatalf("c.EP.Read() mismatch (-want +got):\n%s", d) } + break loop case *tcpip.ErrConnectionReset: break loop default: @@ -3472,14 +3468,27 @@ loop: if tcp.EndpointState(c.EP.State()) != tcp.StateError { t.Fatalf("got EP state is not StateError") } - if got := c.Stack().Stats().TCP.EstablishedResets.Value(); got != 1 { - t.Errorf("got stats.TCP.EstablishedResets.Value() = %d, want = 1", got) + + checkValid := func() []error { + var errors []error + if got := c.Stack().Stats().TCP.EstablishedResets.Value(); got != 1 { + errors = append(errors, fmt.Errorf("got stats.TCP.EstablishedResets.Value() = %d, want = 1", got)) + } + if got := c.Stack().Stats().TCP.CurrentEstablished.Value(); got != 0 { + errors = append(errors, fmt.Errorf("got stats.TCP.CurrentEstablished.Value() = %d, want = 0", got)) + } + if got := c.Stack().Stats().TCP.CurrentConnected.Value(); got != 0 { + errors = append(errors, fmt.Errorf("got stats.TCP.CurrentConnected.Value() = %d, want = 0", got)) + } + return errors } - if got := c.Stack().Stats().TCP.CurrentEstablished.Value(); got != 0 { - t.Errorf("got stats.TCP.CurrentEstablished.Value() = %d, want = 0", got) + + start := time.Now() + for time.Since(start) < time.Minute && len(checkValid()) > 0 { + time.Sleep(50 * time.Millisecond) } - if got := c.Stack().Stats().TCP.CurrentConnected.Value(); got != 0 { - t.Errorf("got stats.TCP.CurrentConnected.Value() = %d, want = 0", got) + for _, err := range checkValid() { + t.Error(err) } } @@ -3615,6 +3624,38 @@ func TestMaxRTO(t *testing.T) { } } +// TestZeroSizedWriteRetransmit tests that a zero sized write should not +// result in a panic on an RTO as no segment should have been queued for +// a zero sized write. +func TestZeroSizedWriteRetransmit(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + c.CreateConnected(context.TestInitialSequenceNumber, 30000 /* rcvWnd */, -1 /* epRcvBuf */) + + var r bytes.Reader + _, err := c.EP.Write(&r, tcpip.WriteOptions{}) + if err != nil { + t.Fatalf("Write failed: %s", err) + } + // Now do a non-zero sized write to trigger actual sending of data. + r.Reset(make([]byte, 1)) + _, err = c.EP.Write(&r, tcpip.WriteOptions{}) + if err != nil { + t.Fatalf("Write failed: %s", err) + } + // Do not ACK the packet and expect an original transmit and a + // retransmit. This should not cause a panic. + for i := 0; i < 2; i++ { + checker.IPv4(t, c.GetPacket(), + checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlagsMatch(header.TCPFlagAck, ^header.TCPFlagPsh), + ), + ) + } +} + // TestRetransmitIPv4IDUniqueness tests that the IPv4 Identification field is // unique on retransmits. func TestRetransmitIPv4IDUniqueness(t *testing.T) { @@ -4628,52 +4669,6 @@ func TestDefaultBufferSizes(t *testing.T) { checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*3) } -func TestMinMaxBufferSizes(t *testing.T) { - s := stack.New(stack.Options{ - NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol}, - TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol}, - }) - - // Check the default values. - ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{}) - if err != nil { - t.Fatalf("NewEndpoint failed; %s", err) - } - defer ep.Close() - - // Change the min/max values for send/receive - { - opt := tcpip.TCPReceiveBufferSizeRangeOption{Min: 200, Default: tcp.DefaultReceiveBufferSize * 2, Max: tcp.DefaultReceiveBufferSize * 20} - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { - t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err) - } - } - - { - opt := tcpip.TCPSendBufferSizeRangeOption{Min: 300, Default: tcp.DefaultSendBufferSize * 3, Max: tcp.DefaultSendBufferSize * 30} - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { - t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err) - } - } - - // Set values below the min/2. - ep.SocketOptions().SetReceiveBufferSize(99, true) - checkRecvBufferSize(t, ep, 200) - - ep.SocketOptions().SetSendBufferSize(149, true) - - checkSendBufferSize(t, ep, 300) - - // Set values above the max. - ep.SocketOptions().SetReceiveBufferSize(1+tcp.DefaultReceiveBufferSize*20, true) - // Values above max are capped at max and then doubled. - checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*20*2) - - ep.SocketOptions().SetSendBufferSize(1+tcp.DefaultSendBufferSize*30, true) - // Values above max are capped at max and then doubled. - checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*30*2) -} - func TestBindToDeviceOption(t *testing.T) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol}, @@ -6068,6 +6063,11 @@ func TestSynRcvdBadSeqNumber(t *testing.T) { // complete the connection to test that the large SEQ num // did not change the state from SYN-RCVD. + // Get setup to be notified about connection establishment. + we, ch := waiter.NewChannelEntry(nil) + c.WQ.EventRegister(&we, waiter.ReadableEvents) + defer c.WQ.EventUnregister(&we) + // Send ACK to move to ESTABLISHED state. c.SendPacket(nil, &context.Headers{ SrcPort: context.TestPort, @@ -6078,32 +6078,12 @@ func TestSynRcvdBadSeqNumber(t *testing.T) { RcvWnd: 30000, }) + <-ch newEP, _, err := c.EP.Accept(nil) - switch err.(type) { - case nil, *tcpip.ErrWouldBlock: - default: + if err != nil { t.Fatalf("Accept failed: %s", err) } - if cmp.Equal(&tcpip.ErrWouldBlock{}, err) { - // Try to accept the connections in the backlog. - we, ch := waiter.NewChannelEntry(nil) - c.WQ.EventRegister(&we, waiter.ReadableEvents) - defer c.WQ.EventUnregister(&we) - - // Wait for connection to be established. - select { - case <-ch: - newEP, _, err = c.EP.Accept(nil) - if err != nil { - t.Fatalf("Accept failed: %s", err) - } - - case <-time.After(1 * time.Second): - t.Fatalf("Timed out waiting for accept") - } - } - // Now verify that the TCP socket is usable and in a connected state. data := "Don't panic" var r strings.Reader @@ -6209,12 +6189,26 @@ func TestPassiveFailedConnectionAttemptIncrement(t *testing.T) { RcvWnd: 30000, }) - time.Sleep(50 * time.Millisecond) - if got := stats.TCP.ListenOverflowSynDrop.Value(); got != want { - t.Errorf("got stats.TCP.ListenOverflowSynDrop.Value() = %d, want = %d", got, want) + checkValid := func() []error { + var errors []error + if got := stats.TCP.ListenOverflowSynDrop.Value(); got != want { + errors = append(errors, fmt.Errorf("got stats.TCP.ListenOverflowSynDrop.Value() = %d, want = %d", got, want)) + } + if got := c.EP.Stats().(*tcp.Stats).ReceiveErrors.ListenOverflowSynDrop.Value(); got != want { + errors = append(errors, fmt.Errorf("got EP stats Stats.ReceiveErrors.ListenOverflowSynDrop = %d, want = %d", got, want)) + } + return errors } - if got := c.EP.Stats().(*tcp.Stats).ReceiveErrors.ListenOverflowSynDrop.Value(); got != want { - t.Errorf("got EP stats Stats.ReceiveErrors.ListenOverflowSynDrop = %d, want = %d", got, want) + + start := time.Now() + for time.Since(start) < time.Minute && len(checkValid()) > 0 { + time.Sleep(50 * time.Millisecond) + } + for _, err := range checkValid() { + t.Error(err) + } + if t.Failed() { + t.FailNow() } we, ch := waiter.NewChannelEntry(nil) @@ -6225,15 +6219,10 @@ func TestPassiveFailedConnectionAttemptIncrement(t *testing.T) { _, _, err = c.EP.Accept(nil) if cmp.Equal(&tcpip.ErrWouldBlock{}, err) { // Wait for connection to be established. - select { - case <-ch: - _, _, err = c.EP.Accept(nil) - if err != nil { - t.Fatalf("Accept failed: %s", err) - } - - case <-time.After(1 * time.Second): - t.Fatalf("Timed out waiting for accept") + <-ch + _, _, err = c.EP.Accept(nil) + if err != nil { + t.Fatalf("Accept failed: %s", err) } } } @@ -7483,7 +7472,7 @@ func TestTCPUserTimeout(t *testing.T) { select { case <-notifyCh: case <-time.After(2 * initRTO): - t.Fatalf("connection still alive after %s, should have been closed after :%s", 2*initRTO, userTimeout) + t.Fatalf("connection still alive after %s, should have been closed after %s", 2*initRTO, userTimeout) } // No packet should be received as the connection should be silently @@ -7717,7 +7706,7 @@ func TestIncreaseWindowOnBufferResize(t *testing.T) { // Increasing the buffer from should generate an ACK, // since window grew from small value to larger equal MSS - c.EP.SocketOptions().SetReceiveBufferSize(rcvBuf*2, true) + c.EP.SocketOptions().SetReceiveBufferSize(rcvBuf*4, true /* notify */) checker.IPv4(t, c.GetPacket(), checker.PayloadLen(header.TCPMinimumSize), checker.TCP( diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go index 53efecc5a..96e4849d2 100644 --- a/pkg/tcpip/transport/tcp/testing/context/context.go +++ b/pkg/tcpip/transport/tcp/testing/context/context.go @@ -757,7 +757,7 @@ func (c *Context) Create(epRcvBuf int) { } if epRcvBuf != -1 { - c.EP.SocketOptions().SetReceiveBufferSize(int64(epRcvBuf), true /* notify */) + c.EP.SocketOptions().SetReceiveBufferSize(int64(epRcvBuf)*2, true /* notify */) } } diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index def9d7186..82a3f2287 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -364,6 +364,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult // reacquire the mutex in exclusive mode. // // Returns true for retry if preparation should be retried. +// +checklocks:e.mu func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip.Error) { switch e.EndpointState() { case StateInitial: @@ -380,10 +381,8 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip } e.mu.RUnlock() - defer e.mu.RLock() - e.mu.Lock() - defer e.mu.Unlock() + defer e.mu.DowngradeLock() // The state changed when we released the shared locked and re-acquired // it in exclusive mode. Try again. @@ -449,37 +448,20 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp return n, err } -func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { - if err := e.LastError(); err != nil { - return 0, err - } - - // MSG_MORE is unimplemented. (This also means that MSG_EOR is a no-op.) - if opts.More { - return 0, &tcpip.ErrInvalidOptionValue{} - } - - to := opts.To - +func (e *endpoint) buildUDPPacketInfo(p tcpip.Payloader, opts tcpip.WriteOptions) (udpPacketInfo, tcpip.Error) { e.mu.RLock() - lockReleased := false - defer func() { - if lockReleased { - return - } - e.mu.RUnlock() - }() + defer e.mu.RUnlock() // If we've shutdown with SHUT_WR we are in an invalid state for sending. if e.shutdownFlags&tcpip.ShutdownWrite != 0 { - return 0, &tcpip.ErrClosedForSend{} + return udpPacketInfo{}, &tcpip.ErrClosedForSend{} } // Prepare for write. for { - retry, err := e.prepareForWrite(to) + retry, err := e.prepareForWrite(opts.To) if err != nil { - return 0, err + return udpPacketInfo{}, err } if !retry { @@ -489,34 +471,34 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp route := e.route dstPort := e.dstPort - if to != nil { + if opts.To != nil { // Reject destination address if it goes through a different // NIC than the endpoint was bound to. - nicID := to.NIC + nicID := opts.To.NIC if nicID == 0 { nicID = tcpip.NICID(e.ops.GetBindToDevice()) } if e.BindNICID != 0 { if nicID != 0 && nicID != e.BindNICID { - return 0, &tcpip.ErrNoRoute{} + return udpPacketInfo{}, &tcpip.ErrNoRoute{} } nicID = e.BindNICID } - if to.Port == 0 { + if opts.To.Port == 0 { // Port 0 is an invalid port to send to. - return 0, &tcpip.ErrInvalidEndpointState{} + return udpPacketInfo{}, &tcpip.ErrInvalidEndpointState{} } - dst, netProto, err := e.checkV4MappedLocked(*to) + dst, netProto, err := e.checkV4MappedLocked(*opts.To) if err != nil { - return 0, err + return udpPacketInfo{}, err } r, _, err := e.connectRoute(nicID, dst, netProto) if err != nil { - return 0, err + return udpPacketInfo{}, err } defer r.Release() @@ -525,12 +507,12 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp } if !e.ops.GetBroadcast() && route.IsOutboundBroadcast() { - return 0, &tcpip.ErrBroadcastDisabled{} + return udpPacketInfo{}, &tcpip.ErrBroadcastDisabled{} } v := make([]byte, p.Len()) if _, err := io.ReadFull(p, v); err != nil { - return 0, &tcpip.ErrBadBuffer{} + return udpPacketInfo{}, &tcpip.ErrBadBuffer{} } if len(v) > header.UDPMaximumPacketSize { // Payload can't possibly fit in a packet. @@ -548,24 +530,39 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp v, ) } - return 0, &tcpip.ErrMessageTooLong{} + return udpPacketInfo{}, &tcpip.ErrMessageTooLong{} } ttl := e.ttl useDefaultTTL := ttl == 0 - if header.IsV4MulticastAddress(route.RemoteAddress()) || header.IsV6MulticastAddress(route.RemoteAddress()) { ttl = e.multicastTTL // Multicast allows a 0 TTL. useDefaultTTL = false } - localPort := e.ID.LocalPort - sendTOS := e.sendTOS - owner := e.owner - noChecksum := e.SocketOptions().GetNoChecksum() - lockReleased = true - e.mu.RUnlock() + return udpPacketInfo{ + route: route, + data: buffer.View(v), + localPort: e.ID.LocalPort, + remotePort: dstPort, + ttl: ttl, + useDefaultTTL: useDefaultTTL, + tos: e.sendTOS, + owner: e.owner, + noChecksum: e.SocketOptions().GetNoChecksum(), + }, nil +} + +func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { + if err := e.LastError(); err != nil { + return 0, err + } + + // MSG_MORE is unimplemented. (This also means that MSG_EOR is a no-op.) + if opts.More { + return 0, &tcpip.ErrInvalidOptionValue{} + } // Do not hold lock when sending as loopback is synchronous and if the UDP // datagram ends up generating an ICMP response then it can result in a @@ -577,10 +574,15 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp // // See: https://golang.org/pkg/sync/#RWMutex for details on why recursive read // locking is prohibited. - if err := sendUDP(route, buffer.View(v).ToVectorisedView(), localPort, dstPort, ttl, useDefaultTTL, sendTOS, owner, noChecksum); err != nil { + u, err := e.buildUDPPacketInfo(p, opts) + if err != nil { return 0, err } - return int64(len(v)), nil + n, err := u.send() + if err != nil { + return 0, err + } + return int64(n), nil } // OnReuseAddressSet implements tcpip.SocketOptionsHandler. @@ -817,14 +819,30 @@ func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error { return nil } -// sendUDP sends a UDP segment via the provided network endpoint and under the -// provided identity. -func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort uint16, ttl uint8, useDefaultTTL bool, tos uint8, owner tcpip.PacketOwner, noChecksum bool) tcpip.Error { +// udpPacketInfo contains all information required to send a UDP packet. +// +// This should be used as a value-only type, which exists in order to simplify +// return value syntax. It should not be exported or extended. +type udpPacketInfo struct { + route *stack.Route + data buffer.View + localPort uint16 + remotePort uint16 + ttl uint8 + useDefaultTTL bool + tos uint8 + owner tcpip.PacketOwner + noChecksum bool +} + +// send sends the given packet. +func (u *udpPacketInfo) send() (int, tcpip.Error) { + vv := u.data.ToVectorisedView() pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ - ReserveHeaderBytes: header.UDPMinimumSize + int(r.MaxHeaderLength()), - Data: data, + ReserveHeaderBytes: header.UDPMinimumSize + int(u.route.MaxHeaderLength()), + Data: vv, }) - pkt.Owner = owner + pkt.Owner = u.owner // Initialize the UDP header. udp := header.UDP(pkt.TransportHeader().Push(header.UDPMinimumSize)) @@ -832,8 +850,8 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u length := uint16(pkt.Size()) udp.Encode(&header.UDPFields{ - SrcPort: localPort, - DstPort: remotePort, + SrcPort: u.localPort, + DstPort: u.remotePort, Length: length, }) @@ -841,30 +859,30 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u // On IPv4, UDP checksum is optional, and a zero value indicates the // transmitter skipped the checksum generation (RFC768). // On IPv6, UDP checksum is not optional (RFC2460 Section 8.1). - if r.RequiresTXTransportChecksum() && - (!noChecksum || r.NetProto() == header.IPv6ProtocolNumber) { - xsum := r.PseudoHeaderChecksum(ProtocolNumber, length) - for _, v := range data.Views() { + if u.route.RequiresTXTransportChecksum() && + (!u.noChecksum || u.route.NetProto() == header.IPv6ProtocolNumber) { + xsum := u.route.PseudoHeaderChecksum(ProtocolNumber, length) + for _, v := range vv.Views() { xsum = header.Checksum(v, xsum) } udp.SetChecksum(^udp.CalculateChecksum(xsum)) } - if useDefaultTTL { - ttl = r.DefaultTTL() + if u.useDefaultTTL { + u.ttl = u.route.DefaultTTL() } - if err := r.WritePacket(stack.NetworkHeaderParams{ + if err := u.route.WritePacket(stack.NetworkHeaderParams{ Protocol: ProtocolNumber, - TTL: ttl, - TOS: tos, + TTL: u.ttl, + TOS: u.tos, }, pkt); err != nil { - r.Stats().UDP.PacketSendErrors.Increment() - return err + u.route.Stats().UDP.PacketSendErrors.Increment() + return 0, err } // Track count of packets sent. - r.Stats().UDP.PacketsSent.Increment() - return nil + u.route.Stats().UDP.PacketsSent.Increment() + return len(u.data), nil } // checkV4MappedLocked determines the effective network protocol and converts diff --git a/pkg/test/testutil/testutil_runfiles.go b/pkg/test/testutil/testutil_runfiles.go index ece9ea9a1..1dbd48a47 100644 --- a/pkg/test/testutil/testutil_runfiles.go +++ b/pkg/test/testutil/testutil_runfiles.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package testutil import ( diff --git a/pkg/urpc/urpc.go b/pkg/urpc/urpc.go index 0e9a829f6..0ef635a2f 100644 --- a/pkg/urpc/urpc.go +++ b/pkg/urpc/urpc.go @@ -27,6 +27,7 @@ import ( "os" "reflect" "runtime" + "time" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" @@ -458,29 +459,45 @@ func (s *Server) StartHandling(client *unet.Socket) { // No new requests should be initiated after calling Stop. Existing clients // will be closed after completing any pending RPCs. This method will block // until all clients have disconnected. -func (s *Server) Stop() { - // Wait for all outstanding requests. - defer s.wg.Wait() - +// +// timeout is the time for clients to complete ongoing RPCs. +func (s *Server) Stop(timeout time.Duration) { // Call any Stop callbacks. for _, stopper := range s.stoppers { stopper.Stop() } - // Close all known clients. - s.mu.Lock() - defer s.mu.Unlock() - for client, state := range s.clients { - switch state { - case idle: - // Close connection now. - client.Close() - s.clients[client] = closed - case processing: - // Request close when done. - s.clients[client] = closeRequested + done := make(chan bool, 1) + go func() { + if timeout != 0 { + timer := time.NewTicker(timeout) + defer timer.Stop() + select { + case <-done: + return + case <-timer.C: + } } - } + + // Close all known clients. + s.mu.Lock() + defer s.mu.Unlock() + for client, state := range s.clients { + switch state { + case idle: + // Close connection now. + client.Close() + s.clients[client] = closed + case processing: + // Request close when done. + s.clients[client] = closeRequested + } + } + }() + + // Wait for all outstanding requests. + s.wg.Wait() + done <- true } // Client is a urpc client. diff --git a/pkg/usermem/BUILD b/pkg/usermem/BUILD index d7decd78a..9c37a9626 100644 --- a/pkg/usermem/BUILD +++ b/pkg/usermem/BUILD @@ -14,10 +14,10 @@ go_library( deps = [ "//pkg/atomicbitops", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/gohacks", "//pkg/hostarch", "//pkg/safemem", - "//pkg/syserror", ], ) @@ -30,8 +30,8 @@ go_test( library = ":usermem", deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/safemem", - "//pkg/syserror", ], ) diff --git a/pkg/usermem/bytes_io.go b/pkg/usermem/bytes_io.go index 3da3c0294..777ac59a6 100644 --- a/pkg/usermem/bytes_io.go +++ b/pkg/usermem/bytes_io.go @@ -16,9 +16,9 @@ package usermem import ( "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" - "gvisor.dev/gvisor/pkg/syserror" ) const maxInt = int(^uint(0) >> 1) @@ -51,7 +51,7 @@ func (b *BytesIO) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, op // ZeroOut implements IO.ZeroOut. func (b *BytesIO) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts IOOpts) (int64, error) { if toZero > int64(maxInt) { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } rngN, rngErr := b.rangeCheck(addr, int(toZero)) if rngN == 0 { @@ -89,15 +89,15 @@ func (b *BytesIO) rangeCheck(addr hostarch.Addr, length int) (int, error) { return 0, nil } if length < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } max := hostarch.Addr(len(b.Bytes)) if addr >= max { - return 0, syserror.EFAULT + return 0, linuxerr.EFAULT } end, ok := addr.AddLength(uint64(length)) if !ok || end > max { - return int(max - addr), syserror.EFAULT + return int(max - addr), linuxerr.EFAULT } return length, nil } diff --git a/pkg/usermem/usermem.go b/pkg/usermem/usermem.go index 0d6d25e50..cde1038ed 100644 --- a/pkg/usermem/usermem.go +++ b/pkg/usermem/usermem.go @@ -22,11 +22,10 @@ import ( "strconv" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/gohacks" - "gvisor.dev/gvisor/pkg/safemem" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/safemem" ) // IO provides access to the contents of a virtual memory space. @@ -163,7 +162,7 @@ func (rw *IOReadWriter) Read(dst []byte) (int, error) { // Disallow wraparound. rw.Addr = ^hostarch.Addr(0) if err != nil { - err = syserror.EFAULT + err = linuxerr.EFAULT } } return n, err @@ -179,7 +178,7 @@ func (rw *IOReadWriter) Write(src []byte) (int, error) { // Disallow wraparound. rw.Addr = ^hostarch.Addr(0) if err != nil { - err = syserror.EFAULT + err = linuxerr.EFAULT } } return n, err @@ -214,7 +213,7 @@ func CopyStringIn(ctx context.Context, uio IO, addr hostarch.Addr, maxlen int, o } end, ok := addr.AddLength(uint64(readlen)) if !ok { - return gohacks.StringFromImmutableBytes(buf[:done]), syserror.EFAULT + return gohacks.StringFromImmutableBytes(buf[:done]), linuxerr.EFAULT } // Shorten the read to avoid crossing page boundaries, since faulting // in a page unnecessarily is expensive. This also ensures that partial @@ -244,7 +243,7 @@ func CopyStringIn(ctx context.Context, uio IO, addr hostarch.Addr, maxlen int, o } addr = end } - return gohacks.StringFromImmutableBytes(buf), syserror.ENAMETOOLONG + return gohacks.StringFromImmutableBytes(buf), linuxerr.ENAMETOOLONG } // CopyOutVec copies bytes from src to the memory mapped at ars in uio. The @@ -382,7 +381,7 @@ func CopyInt32StringsInVec(ctx context.Context, uio IO, ars hostarch.AddrRangeSe // Parse a single value. val, err := strconv.ParseInt(string(buf[i:nextI]), 10, 32) if err != nil { - return int64(i), syserror.EINVAL + return int64(i), linuxerr.EINVAL } dsts[j] = int32(val) @@ -398,7 +397,7 @@ func CopyInt32StringsInVec(ctx context.Context, uio IO, ars hostarch.AddrRangeSe return int64(i), cperr } if j == 0 { - return int64(i), syserror.EINVAL + return int64(i), linuxerr.EINVAL } return int64(i), nil } diff --git a/pkg/usermem/usermem_test.go b/pkg/usermem/usermem_test.go index 9b697b593..a5e2fe69e 100644 --- a/pkg/usermem/usermem_test.go +++ b/pkg/usermem/usermem_test.go @@ -22,9 +22,9 @@ import ( "testing" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" - "gvisor.dev/gvisor/pkg/syserror" ) // newContext returns a context.Context that we can use in these tests (we @@ -51,7 +51,7 @@ func TestBytesIOCopyOutSuccess(t *testing.T) { func TestBytesIOCopyOutFailure(t *testing.T) { b := newBytesIOString("ABC") n, err := b.CopyOut(newContext(), 1, []byte("foo"), IOOpts{}) - if wantN, wantErr := 2, syserror.EFAULT; n != wantN || err != wantErr { + if wantN, wantErr := 2, linuxerr.EFAULT; n != wantN || err != wantErr { t.Errorf("CopyOut: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) } if got, want := b.Bytes, []byte("Afo"); !bytes.Equal(got, want) { @@ -75,7 +75,7 @@ func TestBytesIOCopyInFailure(t *testing.T) { b := newBytesIOString("Afo") var dst [3]byte n, err := b.CopyIn(newContext(), 1, dst[:], IOOpts{}) - if wantN, wantErr := 2, syserror.EFAULT; n != wantN || err != wantErr { + if wantN, wantErr := 2, linuxerr.EFAULT; n != wantN || err != wantErr { t.Errorf("CopyIn: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) } if got, want := dst[:], []byte("fo\x00"); !bytes.Equal(got, want) { @@ -97,7 +97,7 @@ func TestBytesIOZeroOutSuccess(t *testing.T) { func TestBytesIOZeroOutFailure(t *testing.T) { b := newBytesIOString("ABC") n, err := b.ZeroOut(newContext(), 1, 3, IOOpts{}) - if wantN, wantErr := int64(2), syserror.EFAULT; n != wantN || err != wantErr { + if wantN, wantErr := int64(2), linuxerr.EFAULT; n != wantN || err != wantErr { t.Errorf("ZeroOut: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) } if got, want := b.Bytes, []byte("A\x00\x00"); !bytes.Equal(got, want) { @@ -125,7 +125,7 @@ func TestBytesIOCopyOutFromFailure(t *testing.T) { {Start: 1, End: 4}, {Start: 4, End: 7}, }), safemem.FromIOReader{bytes.NewBufferString("foobar")}, IOOpts{}) - if wantN, wantErr := int64(4), syserror.EFAULT; n != wantN || err != wantErr { + if wantN, wantErr := int64(4), linuxerr.EFAULT; n != wantN || err != wantErr { t.Errorf("CopyOutFrom: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) } if got, want := b.Bytes, []byte("Afoob"); !bytes.Equal(got, want) { @@ -155,7 +155,7 @@ func TestBytesIOCopyInToFailure(t *testing.T) { {Start: 1, End: 4}, {Start: 4, End: 7}, }), safemem.FromIOWriter{&dst}, IOOpts{}) - if wantN, wantErr := int64(4), syserror.EFAULT; n != wantN || err != wantErr { + if wantN, wantErr := int64(4), linuxerr.EFAULT; n != wantN || err != wantErr { t.Errorf("CopyOutFrom: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) } if got, want := dst.Bytes(), []byte("foob"); !bytes.Equal(got, want) { @@ -206,14 +206,14 @@ func TestCopyStringInVeryLong(t *testing.T) { func TestCopyStringInNoTerminatingZeroByte(t *testing.T) { want := strings.Repeat("A", copyStringIncrement-1) got, err := CopyStringIn(newContext(), newBytesIOString(want), 0, 2*copyStringIncrement, IOOpts{}) - if wantErr := syserror.EFAULT; got != want || err != wantErr { + if wantErr := linuxerr.EFAULT; got != want || err != wantErr { t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, %v)", got, err, want, wantErr) } } func TestCopyStringInTruncatedByMaxlen(t *testing.T) { got, err := CopyStringIn(newContext(), newBytesIOString(strings.Repeat("A", 10)), 0, 5, IOOpts{}) - if want, wantErr := strings.Repeat("A", 5), syserror.ENAMETOOLONG; got != want || err != wantErr { + if want, wantErr := strings.Repeat("A", 5), linuxerr.ENAMETOOLONG; got != want || err != wantErr { t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, %v)", got, err, want, wantErr) } } @@ -272,8 +272,8 @@ func TestCopyInt32StringsInVecRequiresOneValidValue(t *testing.T) { src := BytesIOSequence([]byte(s)) initial := []int32{1, 2} dsts := append([]int32(nil), initial...) - if n, err := CopyInt32StringsInVec(newContext(), src.IO, src.Addrs, dsts, src.Opts); err != syserror.EINVAL { - t.Errorf("CopyInt32StringsInVec: got (%d, %v), wanted (_, %v)", n, err, syserror.EINVAL) + if n, err := CopyInt32StringsInVec(newContext(), src.IO, src.Addrs, dsts, src.Opts); !linuxerr.Equals(linuxerr.EINVAL, err) { + t.Errorf("CopyInt32StringsInVec: got (%d, %v), wanted (_, %v)", n, err, linuxerr.EINVAL) } if !reflect.DeepEqual(dsts, initial) { t.Errorf("dsts: got %v, wanted %v", dsts, initial) |