diff options
Diffstat (limited to 'pkg/sentry/kernel')
52 files changed, 363 insertions, 366 deletions
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index a82d641da..26614b029 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -226,6 +226,8 @@ go_library( "//pkg/context", "//pkg/coverage", "//pkg/cpuid", + "//pkg/errors", + "//pkg/errors/linuxerr", "//pkg/eventchannel", "//pkg/fspath", "//pkg/goid", @@ -299,6 +301,7 @@ go_test( deps = [ "//pkg/abi", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/sentry/arch", "//pkg/sentry/contexttest", @@ -310,6 +313,5 @@ go_test( "//pkg/sentry/time", "//pkg/sentry/usage", "//pkg/sync", - "//pkg/syserror", ], ) diff --git a/pkg/sentry/kernel/abstract_socket_namespace.go b/pkg/sentry/kernel/abstract_socket_namespace.go index d100e58d7..5d86a04f3 100644 --- a/pkg/sentry/kernel/abstract_socket_namespace.go +++ b/pkg/sentry/kernel/abstract_socket_namespace.go @@ -27,7 +27,7 @@ import ( // +stateify savable type abstractEndpoint struct { ep transport.BoundEndpoint - socket refsvfs2.RefCounter + socket refsvfs2.TryRefCounter name string ns *AbstractSocketNamespace } @@ -57,7 +57,7 @@ func NewAbstractSocketNamespace() *AbstractSocketNamespace { // its backing socket. type boundEndpoint struct { transport.BoundEndpoint - socket refsvfs2.RefCounter + socket refsvfs2.TryRefCounter } // Release implements transport.BoundEndpoint.Release. @@ -89,7 +89,7 @@ func (a *AbstractSocketNamespace) BoundEndpoint(name string) transport.BoundEndp // // When the last reference managed by socket is dropped, ep may be removed from the // namespace. -func (a *AbstractSocketNamespace) Bind(ctx context.Context, name string, ep transport.BoundEndpoint, socket refsvfs2.RefCounter) error { +func (a *AbstractSocketNamespace) Bind(ctx context.Context, name string, ep transport.BoundEndpoint, socket refsvfs2.TryRefCounter) error { a.mu.Lock() defer a.mu.Unlock() @@ -109,7 +109,7 @@ func (a *AbstractSocketNamespace) Bind(ctx context.Context, name string, ep tran // Remove removes the specified socket at name from the abstract socket // namespace, if it has not yet been replaced. -func (a *AbstractSocketNamespace) Remove(name string, socket refsvfs2.RefCounter) { +func (a *AbstractSocketNamespace) Remove(name string, socket refsvfs2.TryRefCounter) { a.mu.Lock() defer a.mu.Unlock() diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD index 12180351d..7a1a36454 100644 --- a/pkg/sentry/kernel/auth/BUILD +++ b/pkg/sentry/kernel/auth/BUILD @@ -63,6 +63,7 @@ go_library( "//pkg/abi/linux", "//pkg/bits", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/log", "//pkg/sync", "//pkg/syserror", diff --git a/pkg/sentry/kernel/auth/credentials.go b/pkg/sentry/kernel/auth/credentials.go index 3325fedcb..fc245c54b 100644 --- a/pkg/sentry/kernel/auth/credentials.go +++ b/pkg/sentry/kernel/auth/credentials.go @@ -16,7 +16,7 @@ package auth import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ) // Credentials contains information required to authorize privileged operations @@ -203,7 +203,7 @@ func (c *Credentials) UseUID(uid UID) (KUID, error) { // uid must be mapped. kuid := c.UserNamespace.MapToKUID(uid) if !kuid.Ok() { - return NoID, syserror.EINVAL + return NoID, linuxerr.EINVAL } // If c has CAP_SETUID, then it can use any UID in its user namespace. if c.HasCapability(linux.CAP_SETUID) { @@ -214,7 +214,7 @@ func (c *Credentials) UseUID(uid UID) (KUID, error) { if kuid == c.RealKUID || kuid == c.EffectiveKUID || kuid == c.SavedKUID { return kuid, nil } - return NoID, syserror.EPERM + return NoID, linuxerr.EPERM } // UseGID checks that c can use gid in its user namespace, then translates it @@ -222,7 +222,7 @@ func (c *Credentials) UseUID(uid UID) (KUID, error) { func (c *Credentials) UseGID(gid GID) (KGID, error) { kgid := c.UserNamespace.MapToKGID(gid) if !kgid.Ok() { - return NoID, syserror.EINVAL + return NoID, linuxerr.EINVAL } if c.HasCapability(linux.CAP_SETGID) { return kgid, nil @@ -230,7 +230,7 @@ func (c *Credentials) UseGID(gid GID) (KGID, error) { if kgid == c.RealKGID || kgid == c.EffectiveKGID || kgid == c.SavedKGID { return kgid, nil } - return NoID, syserror.EPERM + return NoID, linuxerr.EPERM } // SetUID translates the provided uid to the root user namespace and updates c's @@ -239,7 +239,7 @@ func (c *Credentials) UseGID(gid GID) (KGID, error) { func (c *Credentials) SetUID(uid UID) error { kuid := c.UserNamespace.MapToKUID(uid) if !kuid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } c.RealKUID = kuid c.EffectiveKUID = kuid @@ -253,7 +253,7 @@ func (c *Credentials) SetUID(uid UID) error { func (c *Credentials) SetGID(gid GID) error { kgid := c.UserNamespace.MapToKGID(gid) if !kgid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } c.RealKGID = kgid c.EffectiveKGID = kgid diff --git a/pkg/sentry/kernel/auth/id_map.go b/pkg/sentry/kernel/auth/id_map.go index 28cbe159d..f06a374a0 100644 --- a/pkg/sentry/kernel/auth/id_map.go +++ b/pkg/sentry/kernel/auth/id_map.go @@ -17,7 +17,7 @@ package auth import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ) // MapFromKUID translates kuid, a UID in the root namespace, to a UID in ns. @@ -106,11 +106,11 @@ func (ns *UserNamespace) SetUIDMap(ctx context.Context, entries []IDMapEntry) er // than once to a uid_map file in a user namespace fails with the error // EPERM. Similar rules apply for gid_map files." - user_namespaces(7) if !ns.uidMapFromParent.IsEmpty() { - return syserror.EPERM + return linuxerr.EPERM } // "At least one line must be written to the file." if len(entries) == 0 { - return syserror.EINVAL + return linuxerr.EINVAL } // """ // In order for a process to write to the /proc/[pid]/uid_map @@ -121,12 +121,12 @@ func (ns *UserNamespace) SetUIDMap(ctx context.Context, entries []IDMapEntry) er // in the user namespace of the process pid. // """ if !c.HasCapabilityIn(linux.CAP_SETUID, ns) { - return syserror.EPERM + return linuxerr.EPERM } // "2. The writing process must either be in the user namespace of the process // pid or be in the parent user namespace of the process pid." if c.UserNamespace != ns && c.UserNamespace != ns.parent { - return syserror.EPERM + return linuxerr.EPERM } // """ // 3. (see trySetUIDMap) @@ -145,14 +145,14 @@ func (ns *UserNamespace) SetUIDMap(ctx context.Context, entries []IDMapEntry) er // parent user namespace to a user ID (group ID) in the user namespace. // """ if len(entries) != 1 || ns.parent.MapToKUID(UID(entries[0].FirstParentID)) != c.EffectiveKUID || entries[0].Length != 1 { - return syserror.EPERM + return linuxerr.EPERM } // """ // + The writing process must have the same effective user ID as the // process that created the user namespace. // """ if c.EffectiveKUID != ns.owner { - return syserror.EPERM + return linuxerr.EPERM } } // trySetUIDMap leaves data in maps if it fails. @@ -170,11 +170,11 @@ func (ns *UserNamespace) trySetUIDMap(entries []IDMapEntry) error { // checks for NoID. lastID := e.FirstID + e.Length if lastID <= e.FirstID { - return syserror.EINVAL + return linuxerr.EINVAL } lastParentID := e.FirstParentID + e.Length if lastParentID <= e.FirstParentID { - return syserror.EINVAL + return linuxerr.EINVAL } // "3. The mapped user IDs (group IDs) must in turn have a mapping in // the parent user namespace." @@ -182,14 +182,14 @@ func (ns *UserNamespace) trySetUIDMap(entries []IDMapEntry) error { // mappings when it's created, so SetUIDMap would have returned EPERM // without reaching this point if ns is root. if !ns.parent.allIDsMapped(&ns.parent.uidMapToParent, e.FirstParentID, lastParentID) { - return syserror.EPERM + return linuxerr.EPERM } // If either of these Adds fail, we have an overlapping range. if !ns.uidMapFromParent.Add(idMapRange{e.FirstParentID, lastParentID}, e.FirstID) { - return syserror.EINVAL + return linuxerr.EINVAL } if !ns.uidMapToParent.Add(idMapRange{e.FirstID, lastID}, e.FirstParentID) { - return syserror.EINVAL + return linuxerr.EINVAL } } return nil @@ -202,24 +202,24 @@ func (ns *UserNamespace) SetGIDMap(ctx context.Context, entries []IDMapEntry) er ns.mu.Lock() defer ns.mu.Unlock() if !ns.gidMapFromParent.IsEmpty() { - return syserror.EPERM + return linuxerr.EPERM } if len(entries) == 0 { - return syserror.EINVAL + return linuxerr.EINVAL } if !c.HasCapabilityIn(linux.CAP_SETGID, ns) { - return syserror.EPERM + return linuxerr.EPERM } if c.UserNamespace != ns && c.UserNamespace != ns.parent { - return syserror.EPERM + return linuxerr.EPERM } if !c.HasCapabilityIn(linux.CAP_SETGID, ns.parent) { if len(entries) != 1 || ns.parent.MapToKGID(GID(entries[0].FirstParentID)) != c.EffectiveKGID || entries[0].Length != 1 { - return syserror.EPERM + return linuxerr.EPERM } // It's correct for this to still be UID. if c.EffectiveKUID != ns.owner { - return syserror.EPERM + return linuxerr.EPERM } // "In the case of gid_map, use of the setgroups(2) system call must // first be denied by writing "deny" to the /proc/[pid]/setgroups file @@ -239,20 +239,20 @@ func (ns *UserNamespace) trySetGIDMap(entries []IDMapEntry) error { for _, e := range entries { lastID := e.FirstID + e.Length if lastID <= e.FirstID { - return syserror.EINVAL + return linuxerr.EINVAL } lastParentID := e.FirstParentID + e.Length if lastParentID <= e.FirstParentID { - return syserror.EINVAL + return linuxerr.EINVAL } if !ns.parent.allIDsMapped(&ns.parent.gidMapToParent, e.FirstParentID, lastParentID) { - return syserror.EPERM + return linuxerr.EPERM } if !ns.gidMapFromParent.Add(idMapRange{e.FirstParentID, lastParentID}, e.FirstID) { - return syserror.EINVAL + return linuxerr.EINVAL } if !ns.gidMapToParent.Add(idMapRange{e.FirstID, lastID}, e.FirstParentID) { - return syserror.EINVAL + return linuxerr.EINVAL } } return nil diff --git a/pkg/sentry/kernel/auth/user_namespace.go b/pkg/sentry/kernel/auth/user_namespace.go index 9dd52c860..40a406f9d 100644 --- a/pkg/sentry/kernel/auth/user_namespace.go +++ b/pkg/sentry/kernel/auth/user_namespace.go @@ -17,8 +17,8 @@ package auth import ( "math" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // A UserNamespace represents a user namespace. See user_namespaces(7) for @@ -105,7 +105,7 @@ func (c *Credentials) NewChildUserNamespace() (*UserNamespace, error) { if c.UserNamespace.depth() >= maxUserNamespaceDepth { // "... Calls to unshare(2) or clone(2) that would cause this limit to // be exceeded fail with the error EUSERS." - user_namespaces(7) - return nil, syserror.EUSERS + return nil, linuxerr.EUSERS } // "EPERM: CLONE_NEWUSER was specified in flags, but either the effective // user ID or the effective group ID of the caller does not have a mapping @@ -114,10 +114,10 @@ func (c *Credentials) NewChildUserNamespace() (*UserNamespace, error) { // process are mapped to user IDs and group IDs in the user namespace of // the calling process at the time of the call." - unshare(2) if !c.EffectiveKUID.In(c.UserNamespace).Ok() { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } if !c.EffectiveKGID.In(c.UserNamespace).Ok() { - return nil, syserror.EPERM + return nil, linuxerr.EPERM } return &UserNamespace{ parent: c.UserNamespace, diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD index 6224a0cbd..6b2dd09da 100644 --- a/pkg/sentry/kernel/fasync/BUILD +++ b/pkg/sentry/kernel/fasync/BUILD @@ -8,12 +8,12 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", + "//pkg/errors/linuxerr", "//pkg/sentry/fs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", "//pkg/sync", - "//pkg/syserror", "//pkg/waiter", ], ) diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go index 5d584dc45..473987a79 100644 --- a/pkg/sentry/kernel/fasync/fasync.go +++ b/pkg/sentry/kernel/fasync/fasync.go @@ -17,12 +17,12 @@ package fasync import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -248,7 +248,7 @@ func (a *FileAsync) Signal() linux.Signal { // to send SIGIO. func (a *FileAsync) SetSignal(signal linux.Signal) error { if signal != 0 && !signal.IsValid() { - return syserror.EINVAL + return linuxerr.EINVAL } a.mu.Lock() defer a.mu.Unlock() diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index 62777faa8..8786a70b5 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -23,12 +23,12 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // FDFlags define flags for an individual descriptor. @@ -156,7 +156,7 @@ func (f *FDTable) dropVFS2(ctx context.Context, file *vfs.FileDescription) { // Release any POSIX lock possibly held by the FDTable. if file.SupportsLocks() { err := file.UnlockPOSIX(ctx, f, lock.LockRange{0, lock.LockEOF}) - if err != nil && err != syserror.ENOLCK { + if err != nil && !linuxerr.Equals(linuxerr.ENOLCK, err) { panic(fmt.Sprintf("UnlockPOSIX failed: %v", err)) } } diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD index 6c31e082c..cfdea5cf7 100644 --- a/pkg/sentry/kernel/futex/BUILD +++ b/pkg/sentry/kernel/futex/BUILD @@ -37,6 +37,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/sentry/memmap", @@ -53,8 +54,8 @@ go_test( library = ":futex", deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/sync", - "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go index 0427cf3f4..f5c364c96 100644 --- a/pkg/sentry/kernel/futex/futex.go +++ b/pkg/sentry/kernel/futex/futex.go @@ -20,6 +20,7 @@ package futex import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" @@ -122,7 +123,7 @@ func check(t Target, addr hostarch.Addr, val uint32) error { return err } if cur != val { - return syserror.EAGAIN + return linuxerr.EAGAIN } return nil } @@ -332,7 +333,7 @@ func getKey(t Target, addr hostarch.Addr, private bool) (Key, error) { // Ensure the address is aligned. // It must be a DWORD boundary. if addr&0x3 != 0 { - return Key{}, syserror.EINVAL + return Key{}, linuxerr.EINVAL } if private { return Key{Kind: KindPrivate, Offset: uint64(addr)}, nil @@ -397,8 +398,8 @@ func (m *Manager) Fork() *Manager { } // lockBucket returns a locked bucket for the given key. -func (m *Manager) lockBucket(k *Key) *bucket { - var b *bucket +// +checklocksacquire:b.mu +func (m *Manager) lockBucket(k *Key) (b *bucket) { if k.Kind == KindSharedMappable { b = m.sharedBucket } else { @@ -409,7 +410,9 @@ func (m *Manager) lockBucket(k *Key) *bucket { } // lockBuckets returns locked buckets for the given keys. -func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) { +// +checklocksacquire:b1.mu +// +checklocksacquire:b2.mu +func (m *Manager) lockBuckets(k1, k2 *Key) (b1 *bucket, b2 *bucket) { // Buckets must be consistently ordered to avoid circular lock // dependencies. We order buckets in m.privateBuckets by index (lowest // index first), and all buckets in m.privateBuckets precede @@ -419,8 +422,8 @@ func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) { if k1.Kind != KindSharedMappable && k2.Kind != KindSharedMappable { i1 := bucketIndexForAddr(k1.addr()) i2 := bucketIndexForAddr(k2.addr()) - b1 := &m.privateBuckets[i1] - b2 := &m.privateBuckets[i2] + b1 = &m.privateBuckets[i1] + b2 = &m.privateBuckets[i2] switch { case i1 < i2: b1.mu.Lock() @@ -431,19 +434,30 @@ func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) { default: b1.mu.Lock() } - return b1, b2 + return b1, b2 // +checklocksforce } // At least one of b1 or b2 should be m.sharedBucket. - b1 := m.sharedBucket - b2 := m.sharedBucket + b1 = m.sharedBucket + b2 = m.sharedBucket if k1.Kind != KindSharedMappable { b1 = m.lockBucket(k1) } else if k2.Kind != KindSharedMappable { b2 = m.lockBucket(k2) } m.sharedBucket.mu.Lock() - return b1, b2 + return b1, b2 // +checklocksforce +} + +// unlockBuckets unlocks two buckets. +// +checklocksrelease:b1.mu +// +checklocksrelease:b2.mu +func (m *Manager) unlockBuckets(b1, b2 *bucket) { + b1.mu.Unlock() + if b1 != b2 { + b2.mu.Unlock() + } + return // +checklocksforce } // Wake wakes up to n waiters matching the bitmask on the given addr. @@ -476,10 +490,7 @@ func (m *Manager) doRequeue(t Target, addr, naddr hostarch.Addr, private bool, c defer k2.release(t) b1, b2 := m.lockBuckets(&k1, &k2) - defer b1.mu.Unlock() - if b2 != b1 { - defer b2.mu.Unlock() - } + defer m.unlockBuckets(b1, b2) if checkval { if err := check(t, addr, val); err != nil { @@ -526,10 +537,7 @@ func (m *Manager) WakeOp(t Target, addr1, addr2 hostarch.Addr, private bool, nwa defer k2.release(t) b1, b2 := m.lockBuckets(&k1, &k2) - defer b1.mu.Unlock() - if b2 != b1 { - defer b2.mu.Unlock() - } + defer m.unlockBuckets(b1, b2) done := 0 cond, err := atomicOp(t, addr2, op) @@ -670,7 +678,7 @@ func (m *Manager) lockPILocked(w *Waiter, t Target, addr hostarch.Addr, tid uint return false, err } if (cur & linux.FUTEX_TID_MASK) == tid { - return false, syserror.EDEADLK + return false, linuxerr.EDEADLK } if (cur & linux.FUTEX_TID_MASK) == 0 { @@ -745,7 +753,7 @@ func (m *Manager) unlockPILocked(t Target, addr hostarch.Addr, tid uint32, b *bu } if (cur & linux.FUTEX_TID_MASK) != tid { - return syserror.EPERM + return linuxerr.EPERM } var next *Waiter // Who's the next owner? @@ -773,7 +781,7 @@ func (m *Manager) unlockPILocked(t Target, addr hostarch.Addr, tid uint32, b *bu if prev != cur { // Let user mode handle CAS races. This is different than lock, which // retries when CAS fails. - return syserror.EAGAIN + return linuxerr.EAGAIN } return nil } @@ -790,7 +798,7 @@ func (m *Manager) unlockPILocked(t Target, addr hostarch.Addr, tid uint32, b *bu return err } if prev != cur { - return syserror.EINVAL + return linuxerr.EINVAL } b.wakeWaiterLocked(next) diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go index deba44e5c..04c136f87 100644 --- a/pkg/sentry/kernel/futex/futex_test.go +++ b/pkg/sentry/kernel/futex/futex_test.go @@ -21,8 +21,8 @@ import ( "testing" "unsafe" - "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sync" ) @@ -488,7 +488,7 @@ func (t *testMutex) Lock() { // Wait for it to be "not locked". w := NewWaiter() err := t.m.WaitPrepare(w, t.d, t.a, true, testMutexLocked, ^uint32(0)) - if err == unix.EAGAIN { + if linuxerr.Equals(linuxerr.EAGAIN, err) { continue } if err != nil { diff --git a/pkg/sentry/kernel/kcov.go b/pkg/sentry/kernel/kcov.go index 4b943106b..e8a71bec1 100644 --- a/pkg/sentry/kernel/kcov.go +++ b/pkg/sentry/kernel/kcov.go @@ -22,13 +22,13 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/coverage" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) // kcovAreaSizeMax is the maximum number of uint64 entries allowed in the kcov @@ -125,19 +125,19 @@ func (kcov *Kcov) InitTrace(size uint64) error { defer kcov.mu.Unlock() if kcov.mode != linux.KCOV_MODE_DISABLED { - return syserror.EBUSY + return linuxerr.EBUSY } // To simplify all the logic around mapping, we require that the length of the // shared region is a multiple of the system page size. if (8*size)&(hostarch.PageSize-1) != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } // We need space for at least two uint64s to hold current position and a // single PC. if size < 2 || size > kcovAreaSizeMax { - return syserror.EINVAL + return linuxerr.EINVAL } kcov.size = size @@ -157,7 +157,7 @@ func (kcov *Kcov) EnableTrace(ctx context.Context, traceKind uint8) error { // KCOV_ENABLE must be preceded by KCOV_INIT_TRACE and an mmap call. if kcov.mode != linux.KCOV_MODE_INIT || kcov.mappable == nil { - return syserror.EINVAL + return linuxerr.EINVAL } switch traceKind { @@ -165,13 +165,13 @@ func (kcov *Kcov) EnableTrace(ctx context.Context, traceKind uint8) error { kcov.mode = linux.KCOV_MODE_TRACE_PC case linux.KCOV_TRACE_CMP: // We do not support KCOV_MODE_TRACE_CMP. - return syserror.ENOTSUP + return linuxerr.ENOTSUP default: - return syserror.EINVAL + return linuxerr.EINVAL } if kcov.owningTask != nil && kcov.owningTask != t { - return syserror.EBUSY + return linuxerr.EBUSY } kcov.owningTask = t @@ -195,7 +195,7 @@ func (kcov *Kcov) DisableTrace(ctx context.Context) error { } if t != kcov.owningTask { - return syserror.EINVAL + return linuxerr.EINVAL } kcov.mode = linux.KCOV_MODE_INIT kcov.owningTask = nil @@ -237,7 +237,7 @@ func (kcov *Kcov) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) erro defer kcov.mu.Unlock() if kcov.mode != linux.KCOV_MODE_INIT { - return syserror.EINVAL + return linuxerr.EINVAL } if kcov.mappable == nil { diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 352c36ba9..df5160b67 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -1299,11 +1299,11 @@ func (k *Kernel) WaitExited() { } // Kill requests that all tasks in k immediately exit as if group exiting with -// status es. Kill does not wait for tasks to exit. -func (k *Kernel) Kill(es ExitStatus) { +// status ws. Kill does not wait for tasks to exit. +func (k *Kernel) Kill(ws linux.WaitStatus) { k.extMu.Lock() defer k.extMu.Unlock() - k.tasks.Kill(es) + k.tasks.Kill(ws) } // Pause requests that all tasks in k temporarily stop executing, and blocks diff --git a/pkg/sentry/kernel/kernel_opts.go b/pkg/sentry/kernel/kernel_opts.go index 2e66ec587..5ffafb0d1 100644 --- a/pkg/sentry/kernel/kernel_opts.go +++ b/pkg/sentry/kernel/kernel_opts.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package kernel // SpecialOpts contains non-standard options for the kernel. diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD index 34c617b08..94ebac7c5 100644 --- a/pkg/sentry/kernel/pipe/BUILD +++ b/pkg/sentry/kernel/pipe/BUILD @@ -21,6 +21,7 @@ go_library( "//pkg/abi/linux", "//pkg/amutex", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/marshal/primitive", "//pkg/safemem", @@ -47,6 +48,7 @@ go_test( library = ":pipe", deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/sentry/contexttest", "//pkg/sentry/fs", "//pkg/syserror", diff --git a/pkg/sentry/kernel/pipe/node.go b/pkg/sentry/kernel/pipe/node.go index 6497dc4ba..08786d704 100644 --- a/pkg/sentry/kernel/pipe/node.go +++ b/pkg/sentry/kernel/pipe/node.go @@ -17,6 +17,7 @@ package pipe import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sync" @@ -112,7 +113,7 @@ func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.Fi // read side isn't open yet. if flags.NonBlocking { w.DecRef(ctx) - return nil, syserror.ENXIO + return nil, linuxerr.ENXIO } if !waitFor(&i.mu, &i.rWakeup, ctx) { @@ -130,10 +131,10 @@ func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.Fi return rw, nil default: - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } } func (*inodeOperations) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error { - return syserror.EPIPE + return linuxerr.EPIPE } diff --git a/pkg/sentry/kernel/pipe/node_test.go b/pkg/sentry/kernel/pipe/node_test.go index d6fb0fdb8..d25cf658e 100644 --- a/pkg/sentry/kernel/pipe/node_test.go +++ b/pkg/sentry/kernel/pipe/node_test.go @@ -19,6 +19,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/syserror" @@ -258,7 +259,7 @@ func TestNonblockingWriteOpenFileNoReaders(t *testing.T) { ctx := newSleeperContext(t) f := NewInodeOperations(ctx, perms, newNamedPipe(t)) - if _, err := testOpen(ctx, t, f, fs.FileFlags{Write: true, NonBlocking: true}, nil); err != syserror.ENXIO { + if _, err := testOpen(ctx, t, f, fs.FileFlags{Write: true, NonBlocking: true}, nil); !linuxerr.Equals(linuxerr.ENXIO, err) { t.Fatalf("Nonblocking open for write failed unexpected error %v.", err) } } diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index 06769931a..85e3ce9f4 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -22,6 +22,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -428,18 +429,18 @@ func (p *Pipe) FifoSize(context.Context, *fs.File) (int64, error) { // SetFifoSize implements fs.FifoSizer.SetFifoSize. func (p *Pipe) SetFifoSize(size int64) (int64, error) { if size < 0 { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if size < MinimumPipeSize { size = MinimumPipeSize // Per spec. } if size > MaximumPipeSize { - return 0, syserror.EPERM + return 0, linuxerr.EPERM } p.mu.Lock() defer p.mu.Unlock() if size < p.size { - return 0, syserror.EBUSY + return 0, linuxerr.EBUSY } p.max = size return size, nil diff --git a/pkg/sentry/kernel/pipe/pipe_unsafe.go b/pkg/sentry/kernel/pipe/pipe_unsafe.go index dd60cba24..077c5d596 100644 --- a/pkg/sentry/kernel/pipe/pipe_unsafe.go +++ b/pkg/sentry/kernel/pipe/pipe_unsafe.go @@ -23,6 +23,8 @@ import ( // concurrent calls cannot deadlock. // // Preconditions: x != y. +// +checklocksacquire:x.mu +// +checklocksacquire:y.mu func lockTwoPipes(x, y *Pipe) { // Lock the two pipes in order of increasing address. if uintptr(unsafe.Pointer(x)) < uintptr(unsafe.Pointer(y)) { diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go index 3fa5d1d2f..c883a9014 100644 --- a/pkg/sentry/kernel/pipe/pipe_util.go +++ b/pkg/sentry/kernel/pipe/pipe_util.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/amutex" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -86,7 +87,7 @@ func (p *Pipe) Write(ctx context.Context, src usermem.IOSequence) (int64, error) if n > 0 { p.Notify(waiter.ReadableEvents) } - if err == unix.EPIPE { + if linuxerr.Equals(linuxerr.EPIPE, err) { // If we are returning EPIPE send SIGPIPE to the task. if sendSig := linux.SignalNoInfoFuncFromContext(ctx); sendSig != nil { sendSig(linux.SIGPIPE) @@ -156,6 +157,7 @@ func (p *Pipe) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArgume // // mu must be held by the caller. waitFor returns with mu held, but it will // drop mu before blocking for any reader/writers. +// +checklocks:mu func waitFor(mu *sync.Mutex, wakeupChan *chan struct{}, sleeper amutex.Sleeper) bool { // Ideally this function would simply use a condition variable. However, the // wait needs to be interruptible via 'sleeper', so we must sychronize via a diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index 95b948edb..077d5fd7f 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -17,6 +17,7 @@ package pipe import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -79,7 +80,7 @@ func (vp *VFSPipe) ReaderWriterPair(ctx context.Context, mnt *vfs.Mount, vfsd *v // Allocate implements vfs.FileDescriptionImpl.Allocate. func (*VFSPipe) Allocate(context.Context, uint64, uint64, uint64) error { - return syserror.ESPIPE + return linuxerr.ESPIPE } // Open opens the pipe represented by vp. @@ -90,7 +91,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s readable := vfs.MayReadFileWithOpenFlags(statusFlags) writable := vfs.MayWriteFileWithOpenFlags(statusFlags) if !readable && !writable { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } fd, err := vp.newFD(mnt, vfsd, statusFlags, locks) @@ -131,7 +132,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s // side isn't open yet. if statusFlags&linux.O_NONBLOCK != 0 { fd.DecRef(ctx) - return nil, syserror.ENXIO + return nil, linuxerr.ENXIO } // Wait for a reader to open the other end. if !waitFor(&vp.mu, &vp.rWakeup, ctx) { @@ -224,7 +225,7 @@ func (fd *VFSPipeFD) Readiness(mask waiter.EventMask) waiter.EventMask { // Allocate implements vfs.FileDescriptionImpl.Allocate. func (fd *VFSPipeFD) Allocate(ctx context.Context, mode, offset, length uint64) error { - return syserror.ESPIPE + return linuxerr.ESPIPE } // EventRegister implements waiter.Waitable.EventRegister. @@ -415,7 +416,7 @@ func Tee(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) { // Preconditions: count > 0. func spliceOrTee(ctx context.Context, dst, src *VFSPipeFD, count int64, removeFromSrc bool) (int64, error) { if dst.pipe == src.pipe { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } lockTwoPipes(dst.pipe, src.pipe) diff --git a/pkg/sentry/kernel/posixtimer.go b/pkg/sentry/kernel/posixtimer.go index d801a3d83..319754a42 100644 --- a/pkg/sentry/kernel/posixtimer.go +++ b/pkg/sentry/kernel/posixtimer.go @@ -18,8 +18,8 @@ import ( "math" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/syserror" ) // IntervalTimer represents a POSIX interval timer as described by @@ -175,7 +175,7 @@ func (t *Task) IntervalTimerCreate(c ktime.Clock, sigev *linux.Sigevent) (linux. break } if t.tg.nextTimerID == end { - return 0, syserror.EAGAIN + return 0, linuxerr.EAGAIN } } @@ -214,16 +214,16 @@ func (t *Task) IntervalTimerCreate(c ktime.Clock, sigev *linux.Sigevent) (linux. target, ok := t.tg.pidns.tasks[ThreadID(sigev.Tid)] t.tg.pidns.owner.mu.RUnlock() if !ok || target.tg != t.tg { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } it.target = target default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } if sigev.Notify != linux.SIGEV_NONE { it.signo = linux.Signal(sigev.Signo) if !it.signo.IsValid() { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } } it.timer = ktime.NewTimer(c, it) @@ -238,7 +238,7 @@ func (t *Task) IntervalTimerDelete(id linux.TimerID) error { defer t.tg.timerMu.Unlock() it := t.tg.timers[id] if it == nil { - return syserror.EINVAL + return linuxerr.EINVAL } delete(t.tg.timers, id) it.DestroyTimer() @@ -251,7 +251,7 @@ func (t *Task) IntervalTimerSettime(id linux.TimerID, its linux.Itimerspec, abs defer t.tg.timerMu.Unlock() it := t.tg.timers[id] if it == nil { - return linux.Itimerspec{}, syserror.EINVAL + return linux.Itimerspec{}, linuxerr.EINVAL } newS, err := ktime.SettingFromItimerspec(its, abs, it.timer.Clock()) @@ -269,7 +269,7 @@ func (t *Task) IntervalTimerGettime(id linux.TimerID) (linux.Itimerspec, error) defer t.tg.timerMu.Unlock() it := t.tg.timers[id] if it == nil { - return linux.Itimerspec{}, syserror.EINVAL + return linux.Itimerspec{}, linuxerr.EINVAL } tm, s := it.timer.Get() @@ -285,7 +285,7 @@ func (t *Task) IntervalTimerGetoverrun(id linux.TimerID) (int32, error) { defer t.tg.timerMu.Unlock() it := t.tg.timers[id] if it == nil { - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } // By timer_create(2) invariant, either it.target == nil (in which case // it.overrunLast is immutably 0) or t.tg == it.target.tg; and the fact diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index a6287fd6a..21358ec92 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -19,6 +19,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/mm" @@ -294,7 +295,7 @@ func (t *Task) isYAMADescendantOfLocked(ancestor *Task) bool { // Precondition: the TaskSet mutex must be locked (for reading or writing). func (t *Task) hasYAMAExceptionForLocked(tracer *Task) bool { - allowed, ok := t.k.ptraceExceptions[t] + allowed, ok := t.k.ptraceExceptions[t.tg.leader] if !ok { return false } @@ -481,7 +482,7 @@ func (t *Task) ptraceTraceme() error { t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() if t.hasTracer() { - return syserror.EPERM + return linuxerr.EPERM } if t.parent == nil { // In Linux, only init can not have a parent, and init is assumed never @@ -497,7 +498,7 @@ func (t *Task) ptraceTraceme() error { return nil } if !t.parent.canTraceLocked(t, true) { - return syserror.EPERM + return linuxerr.EPERM } if t.parent.exitState != TaskExitNone { // Fail silently, as if we were successfully attached but then @@ -513,21 +514,21 @@ func (t *Task) ptraceTraceme() error { // ptrace(PTRACE_SEIZE, target, 0, opts) if seize is true. t is the caller. func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error { if t.tg == target.tg { - return syserror.EPERM + return linuxerr.EPERM } t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() if !t.canTraceLocked(target, true) { - return syserror.EPERM + return linuxerr.EPERM } if target.hasTracer() { - return syserror.EPERM + return linuxerr.EPERM } // Attaching to zombies and dead tasks is not permitted; the exit // notification logic relies on this. Linux allows attaching to PF_EXITING // tasks, though. if target.exitState >= TaskExitZombie { - return syserror.EPERM + return linuxerr.EPERM } if seize { if err := target.ptraceSetOptionsLocked(opts); err != nil { @@ -651,6 +652,7 @@ func (t *Task) forgetTracerLocked() { // Preconditions: // * The signal mutex must be locked. // * The caller must be running on the task goroutine. +// +checklocks:t.tg.signalHandlers.mu func (t *Task) ptraceSignalLocked(info *linux.SignalInfo) bool { if linux.Signal(info.Signo) == linux.SIGKILL { return false @@ -910,7 +912,7 @@ func (t *Task) ptraceExit() { return } t.tg.signalHandlers.mu.Lock() - status := t.exitStatus.Status() + status := t.exitStatus t.tg.signalHandlers.mu.Unlock() t.Debugf("Entering PTRACE_EVENT_EXIT stop") t.ptraceEventLocked(linux.PTRACE_EVENT_EXIT, uint64(status)) @@ -938,7 +940,7 @@ func (t *Task) ptraceKill(target *Task) error { t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() if target.Tracer() != t { - return syserror.ESRCH + return linuxerr.ESRCH } target.tg.signalHandlers.mu.Lock() defer target.tg.signalHandlers.mu.Unlock() @@ -962,7 +964,7 @@ func (t *Task) ptraceInterrupt(target *Task) error { t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() if target.Tracer() != t { - return syserror.ESRCH + return linuxerr.ESRCH } if !target.ptraceSeized { return syserror.EIO @@ -994,7 +996,7 @@ func (t *Task) ptraceSetOptionsLocked(opts uintptr) error { linux.PTRACE_O_TRACEVFORK | linux.PTRACE_O_TRACEVFORKDONE) if opts&^valid != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } t.ptraceOpts = ptraceOptions{ ExitKill: opts&linux.PTRACE_O_EXITKILL != 0, @@ -1020,7 +1022,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { // specified by pid. target := t.tg.pidns.TaskWithID(pid) if target == nil { - return syserror.ESRCH + return linuxerr.ESRCH } // PTRACE_ATTACH and PTRACE_SEIZE do not require that target is not already @@ -1045,7 +1047,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { t.tg.pidns.owner.mu.RLock() if target.Tracer() != t { t.tg.pidns.owner.mu.RUnlock() - return syserror.ESRCH + return linuxerr.ESRCH } if !target.ptraceFreeze() { t.tg.pidns.owner.mu.RUnlock() @@ -1053,7 +1055,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { // PTRACE_TRACEME, PTRACE_INTERRUPT, and PTRACE_KILL) require the // tracee to be in a ptrace-stop, otherwise they fail with ESRCH." - // ptrace(2) - return syserror.ESRCH + return linuxerr.ESRCH } t.tg.pidns.owner.mu.RUnlock() // Even if the target has a ptrace-stop active, the tracee's task goroutine @@ -1221,7 +1223,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { t.tg.pidns.owner.mu.RLock() defer t.tg.pidns.owner.mu.RUnlock() if target.ptraceSiginfo == nil { - return syserror.EINVAL + return linuxerr.EINVAL } _, err := target.ptraceSiginfo.CopyOut(t, data) return err @@ -1234,14 +1236,14 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { t.tg.pidns.owner.mu.RLock() defer t.tg.pidns.owner.mu.RUnlock() if target.ptraceSiginfo == nil { - return syserror.EINVAL + return linuxerr.EINVAL } target.ptraceSiginfo = &info return nil case linux.PTRACE_GETSIGMASK: if addr != linux.SignalSetSize { - return syserror.EINVAL + return linuxerr.EINVAL } mask := target.SignalMask() _, err := mask.CopyOut(t, data) @@ -1249,7 +1251,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error { case linux.PTRACE_SETSIGMASK: if addr != linux.SignalSetSize { - return syserror.EINVAL + return linuxerr.EINVAL } var mask linux.SignalSet if _, err := mask.CopyIn(t, data); err != nil { diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go index 5ae05b5c3..63422e155 100644 --- a/pkg/sentry/kernel/ptrace_amd64.go +++ b/pkg/sentry/kernel/ptrace_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package kernel diff --git a/pkg/sentry/kernel/ptrace_arm64.go b/pkg/sentry/kernel/ptrace_arm64.go index 46dd84cbc..27514d67b 100644 --- a/pkg/sentry/kernel/ptrace_arm64.go +++ b/pkg/sentry/kernel/ptrace_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package kernel diff --git a/pkg/sentry/kernel/rseq.go b/pkg/sentry/kernel/rseq.go index 4bc5bca44..de352f4f2 100644 --- a/pkg/sentry/kernel/rseq.go +++ b/pkg/sentry/kernel/rseq.go @@ -18,9 +18,9 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/hostcpu" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -59,23 +59,23 @@ func (t *Task) RSeqAvailable() bool { func (t *Task) SetRSeq(addr hostarch.Addr, length, signature uint32) error { if t.rseqAddr != 0 { if t.rseqAddr != addr { - return syserror.EINVAL + return linuxerr.EINVAL } if t.rseqSignature != signature { - return syserror.EINVAL + return linuxerr.EINVAL } - return syserror.EBUSY + return linuxerr.EBUSY } // rseq must be aligned and correctly sized. if addr&(linux.AlignOfRSeq-1) != 0 { - return syserror.EINVAL + return linuxerr.EINVAL } if length != linux.SizeOfRSeq { - return syserror.EINVAL + return linuxerr.EINVAL } if _, ok := t.MemoryManager().CheckIORange(addr, linux.SizeOfRSeq); !ok { - return syserror.EFAULT + return linuxerr.EFAULT } t.rseqAddr = addr @@ -92,7 +92,7 @@ func (t *Task) SetRSeq(addr hostarch.Addr, length, signature uint32) error { t.Debugf("Failed to copy CPU to %#x for rseq: %v", t.rseqAddr, err) t.forceSignal(linux.SIGSEGV, false /* unconditional */) t.SendSignal(SignalInfoPriv(linux.SIGSEGV)) - return syserror.EFAULT + return linuxerr.EFAULT } return nil @@ -103,16 +103,16 @@ func (t *Task) SetRSeq(addr hostarch.Addr, length, signature uint32) error { // Preconditions: The caller must be running on the task goroutine. func (t *Task) ClearRSeq(addr hostarch.Addr, length, signature uint32) error { if t.rseqAddr == 0 { - return syserror.EINVAL + return linuxerr.EINVAL } if t.rseqAddr != addr { - return syserror.EINVAL + return linuxerr.EINVAL } if length != linux.SizeOfRSeq { - return syserror.EINVAL + return linuxerr.EINVAL } if t.rseqSignature != signature { - return syserror.EPERM + return linuxerr.EPERM } if err := t.rseqClearCPU(); err != nil { @@ -152,10 +152,10 @@ func (t *Task) SetOldRSeqCriticalRegion(r OldRSeqCriticalRegion) error { return nil } if r.CriticalSection.Start >= r.CriticalSection.End { - return syserror.EINVAL + return linuxerr.EINVAL } if r.CriticalSection.Contains(r.Restart) { - return syserror.EINVAL + return linuxerr.EINVAL } // TODO(jamieliu): check that r.CriticalSection and r.Restart are in // the application address range, for consistency with Linux. @@ -187,7 +187,7 @@ func (t *Task) SetOldRSeqCPUAddr(addr hostarch.Addr) error { // unfortunate, but unlikely in a correct program. if err := t.rseqUpdateCPU(); err != nil { t.oldRSeqCPUAddr = 0 - return syserror.EINVAL // yes, EINVAL, not err or EFAULT + return linuxerr.EINVAL // yes, EINVAL, not err or EFAULT } return nil } diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD index 65e5427c1..a787c00a8 100644 --- a/pkg/sentry/kernel/semaphore/BUILD +++ b/pkg/sentry/kernel/semaphore/BUILD @@ -25,6 +25,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/log", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go index 47bb66b42..485c3a788 100644 --- a/pkg/sentry/kernel/semaphore/semaphore.go +++ b/pkg/sentry/kernel/semaphore/semaphore.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -127,7 +128,7 @@ func NewRegistry(userNS *auth.UserNamespace) *Registry { // exists. func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linux.FileMode, private, create, exclusive bool) (*Set, error) { if nsems < 0 || nsems > semsMax { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } r.mu.Lock() @@ -142,15 +143,15 @@ func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linu // Check that caller can access semaphore set. creds := auth.CredentialsFromContext(ctx) if !set.checkPerms(creds, fs.PermsFromMode(mode)) { - return nil, syserror.EACCES + return nil, linuxerr.EACCES } // Validate parameters. if nsems > int32(set.Size()) { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } if create && exclusive { - return nil, syserror.EEXIST + return nil, linuxerr.EEXIST } return set, nil } @@ -163,7 +164,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linu // Zero is only valid if an existing set is found. if nsems == 0 { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } // Apply system limits. @@ -238,7 +239,7 @@ func (r *Registry) RemoveID(id int32, creds *auth.Credentials) error { set := r.semaphores[id] if set == nil { - return syserror.EINVAL + return linuxerr.EINVAL } index, found := r.findIndexByID(id) if !found { @@ -252,7 +253,7 @@ func (r *Registry) RemoveID(id int32, creds *auth.Credentials) error { // "The effective user ID of the calling process must match the creator or // owner of the semaphore set, or the caller must be privileged." if !set.checkCredentials(creds) && !set.checkCapability(creds) { - return syserror.EACCES + return linuxerr.EACCES } delete(r.semaphores, set.ID) @@ -370,7 +371,7 @@ func (s *Set) Change(ctx context.Context, creds *auth.Credentials, owner fs.File // "The effective UID of the calling process must match the owner or creator // of the semaphore set, or the caller must be privileged." if !s.checkCredentials(creds) && !s.checkCapability(creds) { - return syserror.EACCES + return linuxerr.EACCES } s.owner = owner @@ -395,7 +396,7 @@ func (s *Set) semStat(creds *auth.Credentials, permMask fs.PermMask) (*linux.Sem defer s.mu.Unlock() if !s.checkPerms(creds, permMask) { - return nil, syserror.EACCES + return nil, linuxerr.EACCES } return &linux.SemidDS{ @@ -417,7 +418,7 @@ func (s *Set) semStat(creds *auth.Credentials, permMask fs.PermMask) (*linux.Sem // SetVal overrides a semaphore value, waking up waiters as needed. func (s *Set) SetVal(ctx context.Context, num int32, val int16, creds *auth.Credentials, pid int32) error { if val < 0 || val > valueMax { - return syserror.ERANGE + return linuxerr.ERANGE } s.mu.Lock() @@ -425,12 +426,12 @@ func (s *Set) SetVal(ctx context.Context, num int32, val int16, creds *auth.Cred // "The calling process must have alter permission on the semaphore set." if !s.checkPerms(creds, fs.PermMask{Write: true}) { - return syserror.EACCES + return linuxerr.EACCES } sem := s.findSem(num) if sem == nil { - return syserror.ERANGE + return linuxerr.ERANGE } // TODO(gvisor.dev/issue/137): Clear undo entries in all processes. @@ -452,7 +453,7 @@ func (s *Set) SetValAll(ctx context.Context, vals []uint16, creds *auth.Credenti for _, val := range vals { if val > valueMax { - return syserror.ERANGE + return linuxerr.ERANGE } } @@ -461,7 +462,7 @@ func (s *Set) SetValAll(ctx context.Context, vals []uint16, creds *auth.Credenti // "The calling process must have alter permission on the semaphore set." if !s.checkPerms(creds, fs.PermMask{Write: true}) { - return syserror.EACCES + return linuxerr.EACCES } for i, val := range vals { @@ -483,12 +484,12 @@ func (s *Set) GetVal(num int32, creds *auth.Credentials) (int16, error) { // "The calling process must have read permission on the semaphore set." if !s.checkPerms(creds, fs.PermMask{Read: true}) { - return 0, syserror.EACCES + return 0, linuxerr.EACCES } sem := s.findSem(num) if sem == nil { - return 0, syserror.ERANGE + return 0, linuxerr.ERANGE } return sem.value, nil } @@ -500,7 +501,7 @@ func (s *Set) GetValAll(creds *auth.Credentials) ([]uint16, error) { // "The calling process must have read permission on the semaphore set." if !s.checkPerms(creds, fs.PermMask{Read: true}) { - return nil, syserror.EACCES + return nil, linuxerr.EACCES } vals := make([]uint16, s.Size()) @@ -517,12 +518,12 @@ func (s *Set) GetPID(num int32, creds *auth.Credentials) (int32, error) { // "The calling process must have read permission on the semaphore set." if !s.checkPerms(creds, fs.PermMask{Read: true}) { - return 0, syserror.EACCES + return 0, linuxerr.EACCES } sem := s.findSem(num) if sem == nil { - return 0, syserror.ERANGE + return 0, linuxerr.ERANGE } return sem.pid, nil } @@ -533,12 +534,12 @@ func (s *Set) countWaiters(num int32, creds *auth.Credentials, pred func(w *wait // The calling process must have read permission on the semaphore set. if !s.checkPerms(creds, fs.PermMask{Read: true}) { - return 0, syserror.EACCES + return 0, linuxerr.EACCES } sem := s.findSem(num) if sem == nil { - return 0, syserror.ERANGE + return 0, linuxerr.ERANGE } var cnt uint16 for w := sem.waiters.Front(); w != nil; w = w.Next() { @@ -581,7 +582,7 @@ func (s *Set) ExecuteOps(ctx context.Context, ops []linux.Sembuf, creds *auth.Cr readOnly := true for _, op := range ops { if s.findSem(int32(op.SemNum)) == nil { - return nil, 0, syserror.EFBIG + return nil, 0, linuxerr.EFBIG } if op.SemOp != 0 { readOnly = false @@ -589,7 +590,7 @@ func (s *Set) ExecuteOps(ctx context.Context, ops []linux.Sembuf, creds *auth.Cr } if !s.checkPerms(creds, fs.PermMask{Read: readOnly, Write: !readOnly}) { - return nil, 0, syserror.EACCES + return nil, 0, linuxerr.EACCES } ch, num, err := s.executeOps(ctx, ops, pid) @@ -624,7 +625,7 @@ func (s *Set) executeOps(ctx context.Context, ops []linux.Sembuf, pid int32) (ch if op.SemOp < 0 { // Handle 'wait' operation. if -op.SemOp > valueMax { - return nil, 0, syserror.ERANGE + return nil, 0, linuxerr.ERANGE } if -op.SemOp > tmpVals[op.SemNum] { // Not enough resources, must wait. @@ -639,7 +640,7 @@ func (s *Set) executeOps(ctx context.Context, ops []linux.Sembuf, pid int32) (ch } else { // op.SemOp > 0: Handle 'signal' operation. if tmpVals[op.SemNum] > valueMax-op.SemOp { - return nil, 0, syserror.ERANGE + return nil, 0, linuxerr.ERANGE } } diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go index ca9076406..f9f872522 100644 --- a/pkg/sentry/kernel/sessions.go +++ b/pkg/sentry/kernel/sessions.go @@ -16,7 +16,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ) // SessionID is the public identifier. @@ -120,8 +120,9 @@ func (pg *ProcessGroup) Originator() *ThreadGroup { // IsOrphan returns true if this process group is an orphan. func (pg *ProcessGroup) IsOrphan() bool { - pg.originator.TaskSet().mu.RLock() - defer pg.originator.TaskSet().mu.RUnlock() + ts := pg.originator.TaskSet() + ts.mu.RLock() + defer ts.mu.RUnlock() return pg.ancestors == 0 } @@ -277,14 +278,14 @@ func (tg *ThreadGroup) createSession() error { continue } if s.leader == tg { - return syserror.EPERM + return linuxerr.EPERM } if s.id == SessionID(id) { - return syserror.EPERM + return linuxerr.EPERM } for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() { if pg.id == ProcessGroupID(id) { - return syserror.EPERM + return linuxerr.EPERM } } } @@ -369,17 +370,22 @@ func (tg *ThreadGroup) CreateProcessGroup() error { // Get the ID for this thread in the current namespace. id := tg.pidns.tgids[tg] + // Check whether a process still exists or not. + if id == 0 { + return linuxerr.ESRCH + } + // Per above, check for a Session leader or existing group. for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() { if s.leader.pidns != tg.pidns { continue } if s.leader == tg { - return syserror.EPERM + return linuxerr.EPERM } for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() { if pg.id == ProcessGroupID(id) { - return syserror.EPERM + return linuxerr.EPERM } } } @@ -437,17 +443,17 @@ func (tg *ThreadGroup) JoinProcessGroup(pidns *PIDNamespace, pgid ProcessGroupID // Lookup the ProcessGroup. pg := pidns.processGroups[pgid] if pg == nil { - return syserror.EPERM + return linuxerr.EPERM } // Disallow the join if an execve has performed, per POSIX. if checkExec && tg.execed { - return syserror.EACCES + return linuxerr.EACCES } // See if it's in the same session as ours. if pg.session != tg.processGroup.session { - return syserror.EPERM + return linuxerr.EPERM } // Join the group; adjust children. diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD index 1c3c0794f..5b69333fe 100644 --- a/pkg/sentry/kernel/shm/BUILD +++ b/pkg/sentry/kernel/shm/BUILD @@ -28,6 +28,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/refs", diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index a73f1bdca..f7ac4c2b2 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -38,6 +38,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -145,7 +146,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui // // Note that 'private' always implies the creation of a new segment // whether IPC_CREAT is specified or not. - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } r.mu.Lock() @@ -169,20 +170,20 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui // memory segment, and does not have the CAP_IPC_OWNER // capability in the user namespace that governs its IPC // namespace." - man shmget(2) - return nil, syserror.EACCES + return nil, linuxerr.EACCES } if size > shm.size { // "A segment for the given key exists, but size is greater than // the size of that segment." - man shmget(2) - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } if create && exclusive { // "IPC_CREAT and IPC_EXCL were specified in shmflg, but a // shared memory segment already exists for key." // - man shmget(2) - return nil, syserror.EEXIST + return nil, linuxerr.EEXIST } shm.IncRef() @@ -200,7 +201,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui if val, ok := hostarch.Addr(size).RoundUp(); ok { sizeAligned = uint64(val) } else { - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } if numPages := sizeAligned / hostarch.PageSize; r.totalPages+numPages > linux.SHMALL { @@ -511,7 +512,7 @@ func (*Shm) CopyMapping(context.Context, memmap.MappingSpace, hostarch.AddrRange func (s *Shm) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { var err error if required.End > s.fr.Length() { - err = &memmap.BusError{syserror.EFAULT} + err = &memmap.BusError{linuxerr.EFAULT} } if source := optional.Intersect(memmap.MappableRange{0, s.fr.Length()}); source.Length() != 0 { return []memmap.Translation{ @@ -558,7 +559,7 @@ func (s *Shm) ConfigureAttach(ctx context.Context, addr hostarch.Addr, opts Atta // "The calling process does not have the required permissions for the // requested attach type, and does not have the CAP_IPC_OWNER capability // in the user namespace that governs its IPC namespace." - man shmat(2) - return memmap.MMapOpts{}, syserror.EACCES + return memmap.MMapOpts{}, linuxerr.EACCES } return memmap.MMapOpts{ Length: s.size, @@ -595,7 +596,7 @@ func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) { // read access for shmid, and the calling process does not have the // CAP_IPC_OWNER capability in the user namespace that governs its IPC // namespace." - man shmctl(2) - return nil, syserror.EACCES + return nil, linuxerr.EACCES } var mode uint16 @@ -645,14 +646,14 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error { defer s.mu.Unlock() if !s.checkOwnership(ctx) { - return syserror.EPERM + return linuxerr.EPERM } creds := auth.CredentialsFromContext(ctx) uid := creds.UserNamespace.MapToKUID(auth.UID(ds.ShmPerm.UID)) gid := creds.UserNamespace.MapToKGID(auth.GID(ds.ShmPerm.GID)) if !uid.Ok() || !gid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } // User may only modify the lower 9 bits of the mode. All the other bits are diff --git a/pkg/sentry/kernel/signalfd/BUILD b/pkg/sentry/kernel/signalfd/BUILD index 76d472292..1110ecca5 100644 --- a/pkg/sentry/kernel/signalfd/BUILD +++ b/pkg/sentry/kernel/signalfd/BUILD @@ -9,6 +9,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", diff --git a/pkg/sentry/kernel/signalfd/signalfd.go b/pkg/sentry/kernel/signalfd/signalfd.go index f58ec4194..47958e2d4 100644 --- a/pkg/sentry/kernel/signalfd/signalfd.go +++ b/pkg/sentry/kernel/signalfd/signalfd.go @@ -18,6 +18,7 @@ package signalfd import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -64,7 +65,7 @@ func New(ctx context.Context, mask linux.SignalSet) (*fs.File, error) { t := kernel.TaskFromContext(ctx) if t == nil { // No task context? Not valid. - return nil, syserror.EINVAL + return nil, linuxerr.EINVAL } // name matches fs/signalfd.c:signalfd4. dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[signalfd]") diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 2e3b4488a..59eeb253d 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/inet" @@ -32,7 +33,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -232,7 +232,7 @@ type Task struct { // exitStatus is the task's exit status. // // exitStatus is protected by the signal mutex. - exitStatus ExitStatus + exitStatus linux.WaitStatus // syscallRestartBlock represents a custom restart function to run in // restart_syscall(2) to resume an interrupted syscall. @@ -846,7 +846,7 @@ func (t *Task) OOMScoreAdj() int32 { // value should be between -1000 and 1000 inclusive. func (t *Task) SetOOMScoreAdj(adj int32) error { if adj > 1000 || adj < -1000 { - return syserror.EINVAL + return linuxerr.EINVAL } atomic.StoreInt32(&t.tg.oomScoreAdj, adj) return nil diff --git a/pkg/sentry/kernel/task_acct.go b/pkg/sentry/kernel/task_acct.go index e574997f7..dd364ae50 100644 --- a/pkg/sentry/kernel/task_acct.go +++ b/pkg/sentry/kernel/task_acct.go @@ -18,10 +18,10 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) // Getitimer implements getitimer(2). @@ -44,7 +44,7 @@ func (t *Task) Getitimer(id int32) (linux.ItimerVal, error) { s, _ = t.tg.itimerProfSetting.At(tm) t.tg.signalHandlers.mu.Unlock() default: - return linux.ItimerVal{}, syserror.EINVAL + return linux.ItimerVal{}, linuxerr.EINVAL } val, iv := ktime.SpecFromSetting(tm, s) return linux.ItimerVal{ @@ -105,7 +105,7 @@ func (t *Task) Setitimer(id int32, newitv linux.ItimerVal) (linux.ItimerVal, err return linux.ItimerVal{}, err } default: - return linux.ItimerVal{}, syserror.EINVAL + return linux.ItimerVal{}, linuxerr.EINVAL } oldval, oldiv := ktime.SpecFromSetting(tm, olds) return linux.ItimerVal{ diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go index ecbe8f920..b2520eecf 100644 --- a/pkg/sentry/kernel/task_block.go +++ b/pkg/sentry/kernel/task_block.go @@ -19,6 +19,7 @@ import ( "runtime/trace" "time" + "gvisor.dev/gvisor/pkg/errors/linuxerr" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -45,7 +46,7 @@ func (t *Task) BlockWithTimeout(C chan struct{}, haveTimeout bool, timeout time. err := t.BlockWithDeadline(C, true, deadline) // Timeout, explicitly return a remaining duration of 0. - if err == syserror.ETIMEDOUT { + if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { return 0, err } @@ -162,7 +163,7 @@ func (t *Task) block(C <-chan struct{}, timerChan <-chan struct{}) error { region.End() t.SleepFinish(true) // We've timed out. - return syserror.ETIMEDOUT + return linuxerr.ETIMEDOUT } } diff --git a/pkg/sentry/kernel/task_cgroup.go b/pkg/sentry/kernel/task_cgroup.go index 7c138e80f..828b90014 100644 --- a/pkg/sentry/kernel/task_cgroup.go +++ b/pkg/sentry/kernel/task_cgroup.go @@ -20,15 +20,13 @@ import ( "sort" "strings" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/syserror" ) // EnterInitialCgroups moves t into an initial set of cgroups. // // Precondition: t isn't in any cgroups yet, t.cgs is empty. -// -// +checklocksignore parent.mu is conditionally acquired. func (t *Task) EnterInitialCgroups(parent *Task) { var inherit map[Cgroup]struct{} if parent != nil { @@ -67,7 +65,7 @@ func (t *Task) EnterCgroup(c Cgroup) error { // // TODO(b/183137098): Implement cgroup migration. log.Warningf("Cgroup migration is not implemented") - return syserror.EBUSY + return linuxerr.EBUSY } } } diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index 405771f3f..7e1347aa6 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -20,9 +20,9 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" "gvisor.dev/gvisor/pkg/cleanup" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -142,25 +142,25 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { // address, any set of signal handlers must refer to the same address // space. if !opts.NewSignalHandlers && opts.NewAddressSpace { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // In order for the behavior of thread-group-directed signals to be sane, // all tasks in a thread group must share signal handlers. if !opts.NewThreadGroup && opts.NewSignalHandlers { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // All tasks in a thread group must be in the same PID namespace. if !opts.NewThreadGroup && (opts.NewPIDNamespace || t.childPIDNamespace != nil) { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // The two different ways of specifying a new PID namespace are // incompatible. if opts.NewPIDNamespace && t.childPIDNamespace != nil { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Thread groups and FS contexts cannot span user namespaces. if opts.NewUserNamespace && (!opts.NewThreadGroup || !opts.NewFSContext) { - return 0, nil, syserror.EINVAL + return 0, nil, linuxerr.EINVAL } // Pull task registers and FPU state, a cloned task will inherit the @@ -182,7 +182,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { // in which it resides)." - clone(2). Neither chroot(2) nor // user_namespaces(7) document this. if t.IsChrooted() { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } userns, err = creds.NewChildUserNamespace() if err != nil { @@ -190,7 +190,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { } } if (opts.NewPIDNamespace || opts.NewNetworkNamespace || opts.NewUTSNamespace) && !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, userns) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } utsns := t.UTSNamespace() @@ -241,7 +241,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { } if opts.SetTLS { if !image.Arch.SetTLS(uintptr(opts.TLS)) { - return 0, nil, syserror.EPERM + return 0, nil, linuxerr.EPERM } } @@ -463,14 +463,14 @@ func (t *Task) Unshare(opts *SharingOptions) error { // sense that clone(2) allows a task to share signal handlers and address // spaces with tasks in other thread groups. if opts.NewAddressSpace || opts.NewSignalHandlers { - return syserror.EINVAL + return linuxerr.EINVAL } creds := t.Credentials() if opts.NewThreadGroup { t.tg.signalHandlers.mu.Lock() if t.tg.tasksCount != 1 { t.tg.signalHandlers.mu.Unlock() - return syserror.EINVAL + return linuxerr.EINVAL } t.tg.signalHandlers.mu.Unlock() // This isn't racy because we're the only living task, and therefore @@ -478,7 +478,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { } if opts.NewUserNamespace { if t.IsChrooted() { - return syserror.EPERM + return linuxerr.EPERM } newUserNS, err := creds.NewChildUserNamespace() if err != nil { @@ -494,7 +494,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { haveCapSysAdmin := t.HasCapability(linux.CAP_SYS_ADMIN) if opts.NewPIDNamespace { if !haveCapSysAdmin { - return syserror.EPERM + return linuxerr.EPERM } t.childPIDNamespace = t.tg.pidns.NewChild(t.UserNamespace()) } @@ -503,14 +503,14 @@ func (t *Task) Unshare(opts *SharingOptions) error { if opts.NewNetworkNamespace { if !haveCapSysAdmin { t.mu.Unlock() - return syserror.EPERM + return linuxerr.EPERM } t.netns = inet.NewNamespace(t.netns) } if opts.NewUTSNamespace { if !haveCapSysAdmin { t.mu.Unlock() - return syserror.EPERM + return linuxerr.EPERM } // Note that this must happen after NewUserNamespace, so the // new user namespace is used if there is one. @@ -519,7 +519,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { if opts.NewIPCNamespace { if !haveCapSysAdmin { t.mu.Unlock() - return syserror.EPERM + return linuxerr.EPERM } // Note that "If CLONE_NEWIPC is set, then create the process in a new IPC // namespace" diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index d115b8783..fbfcc19e5 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -28,66 +28,14 @@ import ( "errors" "fmt" "strconv" - "strings" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) -// An ExitStatus is a value communicated from an exiting task or thread group -// to the party that reaps it. -// -// +stateify savable -type ExitStatus struct { - // Code is the numeric value passed to the call to exit or exit_group that - // caused the exit. If the exit was not caused by such a call, Code is 0. - Code int - - // Signo is the signal that caused the exit. If the exit was not caused by - // a signal, Signo is 0. - Signo int -} - -func (es ExitStatus) String() string { - var b strings.Builder - if code := es.Code; code != 0 { - if b.Len() != 0 { - b.WriteByte(' ') - } - _, _ = fmt.Fprintf(&b, "Code=%d", code) - } - if signal := es.Signo; signal != 0 { - if b.Len() != 0 { - b.WriteByte(' ') - } - _, _ = fmt.Fprintf(&b, "Signal=%d", signal) - } - return b.String() -} - -// Signaled returns true if the ExitStatus indicates that the exiting task or -// thread group was killed by a signal. -func (es ExitStatus) Signaled() bool { - return es.Signo != 0 -} - -// Status returns the numeric representation of the ExitStatus returned by e.g. -// the wait4() system call. -func (es ExitStatus) Status() uint32 { - return ((uint32(es.Code) & 0xff) << 8) | (uint32(es.Signo) & 0xff) -} - -// ShellExitCode returns the numeric exit code that Bash would return for an -// exit status of es. -func (es ExitStatus) ShellExitCode() int { - if es.Signaled() { - return 128 + es.Signo - } - return es.Code -} - // TaskExitState represents a step in the task exit path. // // "Exiting" and "exited" are often ambiguous; prefer to name specific states. @@ -163,13 +111,13 @@ func (t *Task) killedLocked() bool { return t.pendingSignals.pendingSet&linux.SignalSetOf(linux.SIGKILL) != 0 } -// PrepareExit indicates an exit with status es. +// PrepareExit indicates an exit with the given status. // // Preconditions: The caller must be running on the task goroutine. -func (t *Task) PrepareExit(es ExitStatus) { +func (t *Task) PrepareExit(ws linux.WaitStatus) { t.tg.signalHandlers.mu.Lock() defer t.tg.signalHandlers.mu.Unlock() - t.exitStatus = es + t.exitStatus = ws } // PrepareGroupExit indicates a group exit with status es to t's thread group. @@ -180,7 +128,7 @@ func (t *Task) PrepareExit(es ExitStatus) { // ptrace.) // // Preconditions: The caller must be running on the task goroutine. -func (t *Task) PrepareGroupExit(es ExitStatus) { +func (t *Task) PrepareGroupExit(ws linux.WaitStatus) { t.tg.signalHandlers.mu.Lock() defer t.tg.signalHandlers.mu.Unlock() if t.tg.exiting || t.tg.execing != nil { @@ -198,8 +146,8 @@ func (t *Task) PrepareGroupExit(es ExitStatus) { return } t.tg.exiting = true - t.tg.exitStatus = es - t.exitStatus = es + t.tg.exitStatus = ws + t.exitStatus = ws for sibling := t.tg.tasks.Front(); sibling != nil; sibling = sibling.Next() { if sibling != t { sibling.killLocked() @@ -207,11 +155,11 @@ func (t *Task) PrepareGroupExit(es ExitStatus) { } } -// Kill requests that all tasks in ts exit as if group exiting with status es. +// Kill requests that all tasks in ts exit as if group exiting with status ws. // Kill does not wait for tasks to exit. // // Kill has no analogue in Linux; it's provided for save/restore only. -func (ts *TaskSet) Kill(es ExitStatus) { +func (ts *TaskSet) Kill(ws linux.WaitStatus) { ts.mu.Lock() defer ts.mu.Unlock() ts.Root.exiting = true @@ -219,7 +167,7 @@ func (ts *TaskSet) Kill(es ExitStatus) { t.tg.signalHandlers.mu.Lock() if !t.tg.exiting { t.tg.exiting = true - t.tg.exitStatus = es + t.tg.exitStatus = ws } t.killLocked() t.tg.signalHandlers.mu.Unlock() @@ -730,10 +678,10 @@ func (t *Task) exitNotificationSignal(sig linux.Signal, receiver *Task) *linux.S info.SetUID(int32(t.Credentials().RealKUID.In(receiver.UserNamespace()).OrOverflow())) if t.exitStatus.Signaled() { info.Code = linux.CLD_KILLED - info.SetStatus(int32(t.exitStatus.Signo)) + info.SetStatus(int32(t.exitStatus.TerminationSignal())) } else { info.Code = linux.CLD_EXITED - info.SetStatus(int32(t.exitStatus.Code)) + info.SetStatus(int32(t.exitStatus.ExitStatus())) } // TODO(b/72102453): Set utime, stime. return info @@ -741,7 +689,7 @@ func (t *Task) exitNotificationSignal(sig linux.Signal, receiver *Task) *linux.S // ExitStatus returns t's exit status, which is only guaranteed to be // meaningful if t.ExitState() != TaskExitNone. -func (t *Task) ExitStatus() ExitStatus { +func (t *Task) ExitStatus() linux.WaitStatus { t.tg.pidns.owner.mu.RLock() defer t.tg.pidns.owner.mu.RUnlock() t.tg.signalHandlers.mu.Lock() @@ -751,7 +699,7 @@ func (t *Task) ExitStatus() ExitStatus { // ExitStatus returns the exit status that would be returned by a consuming // wait*() on tg. -func (tg *ThreadGroup) ExitStatus() ExitStatus { +func (tg *ThreadGroup) ExitStatus() linux.WaitStatus { tg.pidns.owner.mu.RLock() defer tg.pidns.owner.mu.RUnlock() tg.signalHandlers.mu.Lock() @@ -762,7 +710,9 @@ func (tg *ThreadGroup) ExitStatus() ExitStatus { return tg.leader.exitStatus } -// TerminationSignal returns the thread group's termination signal. +// TerminationSignal returns the thread group's termination signal, which is +// the signal that will be sent to its leader's parent when all threads have +// exited. func (tg *ThreadGroup) TerminationSignal() linux.Signal { tg.pidns.owner.mu.RLock() defer tg.pidns.owner.mu.RUnlock() @@ -888,8 +838,8 @@ type WaitResult struct { // Event is exactly one of the events defined above. Event waiter.EventMask - // Status is the numeric status associated with the event. - Status uint32 + // Status is the wait status associated with the event. + Status linux.WaitStatus } // Wait waits for an event from a thread group that is a child of t's thread @@ -942,7 +892,7 @@ func (t *Task) waitOnce(opts *WaitOptions) (*WaitResult, error) { if anyWaitableTasks { return nil, ErrNoWaitableEvent } - return nil, syserror.ECHILD + return nil, linuxerr.ECHILD } // Preconditions: The TaskSet mutex must be locked for writing. @@ -1042,7 +992,7 @@ func (t *Task) waitCollectZombieLocked(target *Task, opts *WaitOptions, asPtrace } pid := t.tg.pidns.tids[target] uid := target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow() - status := target.exitStatus.Status() + status := target.exitStatus if !opts.ConsumeEvent { return &WaitResult{ Task: target, @@ -1056,7 +1006,7 @@ func (t *Task) waitCollectZombieLocked(target *Task, opts *WaitOptions, asPtrace // differ from that reported by a consuming wait; the latter will return // the group exit code if one is available. if target.tg.exiting { - status = target.tg.exitStatus.Status() + status = target.tg.exitStatus } // t may be (in the thread group of) target's parent, tracer, or both. We // don't need to check for !exitTracerAcked because tracees are detached @@ -1122,12 +1072,11 @@ func (t *Task) waitCollectChildGroupStopLocked(target *Task, opts *WaitOptions) target.tg.groupStopWaitable = false } return &WaitResult{ - Task: target, - TID: pid, - UID: uid, - Event: EventChildGroupStop, - // There is no name for these status constants. - Status: (uint32(sig)&0xff)<<8 | 0x7f, + Task: target, + TID: pid, + UID: uid, + Event: EventChildGroupStop, + Status: linux.WaitStatusStopped(uint32(sig)), } } @@ -1148,7 +1097,7 @@ func (t *Task) waitCollectGroupContinueLocked(target *Task, opts *WaitOptions) * TID: pid, UID: uid, Event: EventGroupContinue, - Status: 0xffff, + Status: linux.WaitStatusContinued(), } } @@ -1176,7 +1125,7 @@ func (t *Task) waitCollectTraceeStopLocked(target *Task, opts *WaitOptions) *Wai TID: pid, UID: uid, Event: EventTraceeStop, - Status: uint32(code)<<8 | 0x7f, + Status: linux.WaitStatusStopped(uint32(code)), } } diff --git a/pkg/sentry/kernel/task_identity.go b/pkg/sentry/kernel/task_identity.go index 0325967e4..a9067b682 100644 --- a/pkg/sentry/kernel/task_identity.go +++ b/pkg/sentry/kernel/task_identity.go @@ -16,9 +16,9 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/syserror" ) // Credentials returns t's credentials. @@ -47,7 +47,7 @@ func (t *Task) HasCapability(cp linux.Capability) bool { func (t *Task) SetUID(uid auth.UID) error { // setuid considers -1 to be invalid. if !uid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } t.mu.Lock() @@ -56,7 +56,7 @@ func (t *Task) SetUID(uid auth.UID) error { creds := t.Credentials() kuid := creds.UserNamespace.MapToKUID(uid) if !kuid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } // "setuid() sets the effective user ID of the calling process. If the // effective UID of the caller is root (more precisely: if the caller has @@ -70,7 +70,7 @@ func (t *Task) SetUID(uid auth.UID) error { // capability) and uid does not match the real UID or saved set-user-ID of // the calling process." if kuid != creds.RealKUID && kuid != creds.SavedKUID { - return syserror.EPERM + return linuxerr.EPERM } t.setKUIDsUncheckedLocked(creds.RealKUID, kuid, creds.SavedKUID) return nil @@ -87,26 +87,26 @@ func (t *Task) SetREUID(r, e auth.UID) error { if r.Ok() { newR = creds.UserNamespace.MapToKUID(r) if !newR.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } } newE := creds.EffectiveKUID if e.Ok() { newE = creds.UserNamespace.MapToKUID(e) if !newE.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } } if !creds.HasCapability(linux.CAP_SETUID) { // "Unprivileged processes may only set the effective user ID to the // real user ID, the effective user ID, or the saved set-user-ID." if newE != creds.RealKUID && newE != creds.EffectiveKUID && newE != creds.SavedKUID { - return syserror.EPERM + return linuxerr.EPERM } // "Unprivileged users may only set the real user ID to the real user // ID or the effective user ID." if newR != creds.RealKUID && newR != creds.EffectiveKUID { - return syserror.EPERM + return linuxerr.EPERM } } // "If the real user ID is set (i.e., ruid is not -1) or the effective user @@ -223,7 +223,7 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) { // SetGID implements the semantics of setgid(2). func (t *Task) SetGID(gid auth.GID) error { if !gid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } t.mu.Lock() @@ -232,14 +232,14 @@ func (t *Task) SetGID(gid auth.GID) error { creds := t.Credentials() kgid := creds.UserNamespace.MapToKGID(gid) if !kgid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } if creds.HasCapability(linux.CAP_SETGID) { t.setKGIDsUncheckedLocked(kgid, kgid, kgid) return nil } if kgid != creds.RealKGID && kgid != creds.SavedKGID { - return syserror.EPERM + return linuxerr.EPERM } t.setKGIDsUncheckedLocked(creds.RealKGID, kgid, creds.SavedKGID) return nil @@ -255,22 +255,22 @@ func (t *Task) SetREGID(r, e auth.GID) error { if r.Ok() { newR = creds.UserNamespace.MapToKGID(r) if !newR.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } } newE := creds.EffectiveKGID if e.Ok() { newE = creds.UserNamespace.MapToKGID(e) if !newE.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } } if !creds.HasCapability(linux.CAP_SETGID) { if newE != creds.RealKGID && newE != creds.EffectiveKGID && newE != creds.SavedKGID { - return syserror.EPERM + return linuxerr.EPERM } if newR != creds.RealKGID && newR != creds.EffectiveKGID { - return syserror.EPERM + return linuxerr.EPERM } } newS := creds.SavedKGID @@ -343,13 +343,13 @@ func (t *Task) SetExtraGIDs(gids []auth.GID) error { defer t.mu.Unlock() creds := t.Credentials() if !creds.HasCapability(linux.CAP_SETGID) { - return syserror.EPERM + return linuxerr.EPERM } kgids := make([]auth.KGID, len(gids)) for i, gid := range gids { kgid := creds.UserNamespace.MapToKGID(gid) if !kgid.Ok() { - return syserror.EINVAL + return linuxerr.EINVAL } kgids[i] = kgid } @@ -367,25 +367,25 @@ func (t *Task) SetCapabilitySets(permitted, inheritable, effective auth.Capabili // "Permitted: This is a limiting superset for the effective capabilities // that the thread may assume." - capabilities(7) if effective & ^permitted != 0 { - return syserror.EPERM + return linuxerr.EPERM } creds := t.Credentials() // "It is also a limiting superset for the capabilities that may be added // to the inheritable set by a thread that does not have the CAP_SETPCAP // capability in its effective set." if !creds.HasCapability(linux.CAP_SETPCAP) && (inheritable & ^(creds.InheritableCaps|creds.PermittedCaps) != 0) { - return syserror.EPERM + return linuxerr.EPERM } // "If a thread drops a capability from its permitted set, it can never // reacquire that capability (unless it execve(2)s ..." if permitted & ^creds.PermittedCaps != 0 { - return syserror.EPERM + return linuxerr.EPERM } // "... if a capability is not in the bounding set, then a thread can't add // this capability to its inheritable set, even if it was in its permitted // capabilities ..." if inheritable & ^(creds.InheritableCaps|creds.BoundingCaps) != 0 { - return syserror.EPERM + return linuxerr.EPERM } creds = creds.Fork() // The credentials object is immutable. See doc for creds. creds.PermittedCaps = permitted @@ -402,7 +402,7 @@ func (t *Task) DropBoundingCapability(cp linux.Capability) error { defer t.mu.Unlock() creds := t.Credentials() if !creds.HasCapability(linux.CAP_SETPCAP) { - return syserror.EPERM + return linuxerr.EPERM } creds = creds.Fork() // The credentials object is immutable. See doc for creds. creds.BoundingCaps &^= auth.CapabilitySetOf(cp) @@ -422,7 +422,7 @@ func (t *Task) SetUserNamespace(ns *auth.UserNamespace) error { // If t just created ns, then t.creds is guaranteed to have CAP_SYS_ADMIN // in ns (by rule 3 in auth.Credentials.HasCapability). if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) { - return syserror.EPERM + return linuxerr.EPERM } creds = creds.Fork() // The credentials object is immutable. See doc for creds. diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go index 72b9a0384..8de08151a 100644 --- a/pkg/sentry/kernel/task_log.go +++ b/pkg/sentry/kernel/task_log.go @@ -235,7 +235,7 @@ func (t *Task) traceExitEvent() { if !trace.IsEnabled() { return } - trace.Logf(t.traceContext, traceCategory, "exit status: 0x%x", t.exitStatus.Status()) + trace.Logf(t.traceContext, traceCategory, "exit status: %s", t.exitStatus) } // traceExecEvent is called when a task calls exec. diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go index 068f25af1..054ff212f 100644 --- a/pkg/sentry/kernel/task_run.go +++ b/pkg/sentry/kernel/task_run.go @@ -377,7 +377,7 @@ func (app *runApp) execute(t *Task) taskRunState { default: // What happened? Can't continue. t.Warningf("Unexpected SwitchToApp error: %v", err) - t.PrepareExit(ExitStatus{Code: ExtractErrno(err, -1)}) + t.PrepareExit(linux.WaitStatusExit(int32(ExtractErrno(err, -1)))) return (*runExit)(nil) } } diff --git a/pkg/sentry/kernel/task_sched.go b/pkg/sentry/kernel/task_sched.go index f142feab4..9d9fa76a6 100644 --- a/pkg/sentry/kernel/task_sched.go +++ b/pkg/sentry/kernel/task_sched.go @@ -23,12 +23,12 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/hostcpu" "gvisor.dev/gvisor/pkg/sentry/kernel/sched" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) // TaskGoroutineState is a coarse representation of the current execution @@ -601,7 +601,7 @@ func (t *Task) SetCPUMask(mask sched.CPUSet) error { // Ensure that at least 1 CPU is still allowed. if mask.NumCPUs() == 0 { - return syserror.EINVAL + return linuxerr.EINVAL } if t.k.useHostCores { diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go index 8ca61ed48..7065ac79c 100644 --- a/pkg/sentry/kernel/task_signals.go +++ b/pkg/sentry/kernel/task_signals.go @@ -22,6 +22,7 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/eventchannel" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -156,7 +157,8 @@ func (t *Task) PendingSignals() linux.SignalSet { // deliverSignal delivers the given signal and returns the following run state. func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRunState { - sigact := computeAction(linux.Signal(info.Signo), act) + sig := linux.Signal(info.Signo) + sigact := computeAction(sig, act) if t.haveSyscallReturn { if sre, ok := syserror.SyscallRestartErrnoFromReturn(t.Arch().Return()); ok { @@ -197,14 +199,14 @@ func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRu } // Attach an fault address if appropriate. - switch linux.Signal(info.Signo) { + switch sig { case linux.SIGSEGV, linux.SIGFPE, linux.SIGILL, linux.SIGTRAP, linux.SIGBUS: ucs.FaultAddr = info.Addr() } eventchannel.Emit(ucs) - t.PrepareGroupExit(ExitStatus{Signo: int(info.Signo)}) + t.PrepareGroupExit(linux.WaitStatusTerminationSignal(sig)) return (*runExit)(nil) case SignalActionStop: @@ -224,12 +226,12 @@ func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRu // Send a forced SIGSEGV. If the signal that couldn't be delivered // was a SIGSEGV, force the handler to SIG_DFL. - t.forceSignal(linux.SIGSEGV, linux.Signal(info.Signo) == linux.SIGSEGV /* unconditional */) + t.forceSignal(linux.SIGSEGV, sig == linux.SIGSEGV /* unconditional */) t.SendSignal(SignalInfoPriv(linux.SIGSEGV)) } default: - panic(fmt.Sprintf("Unknown signal action %+v, %d?", info, computeAction(linux.Signal(info.Signo), act))) + panic(fmt.Sprintf("Unknown signal action %+v, %d?", info, computeAction(sig, act))) } return (*runInterrupt)(nil) } @@ -338,7 +340,7 @@ func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*linux. } if timeout == 0 { - return nil, syserror.EAGAIN + return nil, linuxerr.EAGAIN } // Unblock signals we're waiting for. Remember the original signal mask so @@ -359,8 +361,8 @@ func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*linux. if info := t.dequeueSignalLocked(mask); info != nil { return info, nil } - if err == syserror.ETIMEDOUT { - return nil, syserror.EAGAIN + if err == linuxerr.ETIMEDOUT { + return nil, linuxerr.EAGAIN } return nil, err } @@ -369,9 +371,9 @@ func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*linux. // // The following errors may be returned: // -// syserror.ESRCH - The task has exited. -// syserror.EINVAL - The signal is not valid. -// syserror.EAGAIN - THe signal is realtime, and cannot be queued. +// linuxerr.ESRCH - The task has exited. +// linuxerr.EINVAL - The signal is not valid. +// linuxerr.EAGAIN - THe signal is realtime, and cannot be queued. // func (t *Task) SendSignal(info *linux.SignalInfo) error { t.tg.pidns.owner.mu.RLock() @@ -406,14 +408,14 @@ func (t *Task) sendSignalLocked(info *linux.SignalInfo, group bool) error { func (t *Task) sendSignalTimerLocked(info *linux.SignalInfo, group bool, timer *IntervalTimer) error { if t.exitState == TaskExitDead { - return syserror.ESRCH + return linuxerr.ESRCH } sig := linux.Signal(info.Signo) if sig == 0 { return nil } if !sig.IsValid() { - return syserror.EINVAL + return linuxerr.EINVAL } // Signal side effects apply even if the signal is ultimately discarded. @@ -450,7 +452,7 @@ func (t *Task) sendSignalTimerLocked(info *linux.SignalInfo, group bool, timer * } if !q.enqueue(info, timer) { if sig.IsRealtime() { - return syserror.EAGAIN + return linuxerr.EAGAIN } t.Debugf("Discarding duplicate signal %d", sig) if timer != nil { @@ -505,7 +507,7 @@ func (tg *ThreadGroup) applySignalSideEffectsLocked(sig linux.Signal) { // ignores tg.execing. if !tg.exiting { tg.exiting = true - tg.exitStatus = ExitStatus{Signo: int(linux.SIGKILL)} + tg.exitStatus = linux.WaitStatusTerminationSignal(linux.SIGKILL) } for t := tg.tasks.Front(); t != nil; t = t.Next() { t.killLocked() @@ -684,7 +686,7 @@ func (t *Task) SetSignalStack(alt linux.SignalStack) bool { // to *actptr (if actptr is not nil) and returns the old signal action. func (tg *ThreadGroup) SetSigAction(sig linux.Signal, actptr *linux.SigAction) (linux.SigAction, error) { if !sig.IsValid() { - return linux.SigAction{}, syserror.EINVAL + return linux.SigAction{}, linuxerr.EINVAL } tg.pidns.owner.mu.RLock() @@ -695,7 +697,7 @@ func (tg *ThreadGroup) SetSigAction(sig linux.Signal, actptr *linux.SigAction) ( oldact := sh.actions[sig] if actptr != nil { if sig == linux.SIGKILL || sig == linux.SIGSTOP { - return oldact, syserror.EINVAL + return oldact, linuxerr.EINVAL } act := *actptr diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go index 41fd2d471..0565059c1 100644 --- a/pkg/sentry/kernel/task_start.go +++ b/pkg/sentry/kernel/task_start.go @@ -17,6 +17,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -299,7 +300,7 @@ func (ns *PIDNamespace) allocateTID() (ThreadID, error) { // Did we do a full cycle? if tid == ns.last { // No tid available. - return 0, syserror.EAGAIN + return 0, linuxerr.EAGAIN } } } diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go index 601fc0d3a..0586c9def 100644 --- a/pkg/sentry/kernel/task_syscall.go +++ b/pkg/sentry/kernel/task_syscall.go @@ -22,6 +22,8 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bits" + "gvisor.dev/gvisor/pkg/errors" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/metric" @@ -159,7 +161,7 @@ func (t *Task) doSyscall() taskRunState { // ok case linux.SECCOMP_RET_KILL_THREAD: t.Debugf("Syscall %d: killed by seccomp", sysno) - t.PrepareExit(ExitStatus{Signo: int(linux.SIGSYS)}) + t.PrepareExit(linux.WaitStatusTerminationSignal(linux.SIGSYS)) return (*runExit)(nil) case linux.SECCOMP_RET_TRACE: t.Debugf("Syscall %d: stopping for PTRACE_EVENT_SECCOMP", sysno) @@ -309,7 +311,7 @@ func (t *Task) doVsyscall(addr hostarch.Addr, sysno uintptr) taskRunState { return &runVsyscallAfterPtraceEventSeccomp{addr, sysno, caller} case linux.SECCOMP_RET_KILL_THREAD: t.Debugf("vsyscall %d: killed by seccomp", sysno) - t.PrepareExit(ExitStatus{Signo: int(linux.SIGSYS)}) + t.PrepareExit(linux.WaitStatusTerminationSignal(linux.SIGSYS)) return (*runExit)(nil) default: panic(fmt.Sprintf("Unknown seccomp result %d", r)) @@ -336,7 +338,7 @@ func (r *runVsyscallAfterPtraceEventSeccomp) execute(t *Task) taskRunState { // Documentation/prctl/seccomp_filter.txt. On Linux, changing orig_ax or ip // causes do_exit(SIGSYS), and changing sp is ignored. if (sysno != ^uintptr(0) && sysno != r.sysno) || hostarch.Addr(t.Arch().IP()) != r.addr { - t.PrepareExit(ExitStatus{Signo: int(linux.SIGSYS)}) + t.PrepareExit(linux.WaitStatusTerminationSignal(linux.SIGSYS)) return (*runExit)(nil) } if sysno == ^uintptr(0) { @@ -357,7 +359,7 @@ func (t *Task) doVsyscallInvoke(sysno uintptr, args arch.SyscallArguments, calle t.Arch().SetReturn(uintptr(rval)) } else { t.Debugf("vsyscall %d, caller %x: emulated syscall returned error: %v", sysno, t.Arch().Value(caller), err) - if err == syserror.EFAULT { + if linuxerr.Equals(linuxerr.EFAULT, err) { t.forceSignal(linux.SIGSEGV, false /* unconditional */) t.SendSignal(SignalInfoPriv(linux.SIGSEGV)) // A return is not emulated in this case. @@ -379,6 +381,8 @@ func ExtractErrno(err error, sysno int) int { return 0 case unix.Errno: return int(err) + case *errors.Error: + return int(err.Errno()) case syserror.SyscallRestartErrno: return int(err) case *memmap.BusError: diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go index fc6d9438a..8e2c36598 100644 --- a/pkg/sentry/kernel/task_usermem.go +++ b/pkg/sentry/kernel/task_usermem.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/syserror" @@ -132,7 +133,7 @@ func (t *Task) CopyOutIovecs(addr hostarch.Addr, src hostarch.AddrRangeSeq) erro case 8: const itemLen = 16 if _, ok := addr.AddLength(uint64(src.NumRanges()) * itemLen); !ok { - return syserror.EFAULT + return linuxerr.EFAULT } b := t.CopyScratchBuffer(itemLen) @@ -190,7 +191,7 @@ func (t *Task) CopyInIovecs(addr hostarch.Addr, numIovecs int) (hostarch.AddrRan case 8: const itemLen = 16 if _, ok := addr.AddLength(uint64(numIovecs) * itemLen); !ok { - return hostarch.AddrRangeSeq{}, syserror.EFAULT + return hostarch.AddrRangeSeq{}, linuxerr.EFAULT } b := t.CopyScratchBuffer(itemLen) @@ -202,11 +203,11 @@ func (t *Task) CopyInIovecs(addr hostarch.Addr, numIovecs int) (hostarch.AddrRan base := hostarch.Addr(hostarch.ByteOrder.Uint64(b[0:8])) length := hostarch.ByteOrder.Uint64(b[8:16]) if length > math.MaxInt64 { - return hostarch.AddrRangeSeq{}, syserror.EINVAL + return hostarch.AddrRangeSeq{}, linuxerr.EINVAL } ar, ok := t.MemoryManager().CheckIORange(base, int64(length)) if !ok { - return hostarch.AddrRangeSeq{}, syserror.EFAULT + return hostarch.AddrRangeSeq{}, linuxerr.EFAULT } if numIovecs == 1 { @@ -252,7 +253,7 @@ func (t *Task) SingleIOSequence(addr hostarch.Addr, length int, opts usermem.IOO } ar, ok := t.MemoryManager().CheckIORange(addr, int64(length)) if !ok { - return usermem.IOSequence{}, syserror.EFAULT + return usermem.IOSequence{}, linuxerr.EFAULT } return usermem.IOSequence{ IO: t.MemoryManager(), @@ -270,7 +271,7 @@ func (t *Task) SingleIOSequence(addr hostarch.Addr, length int, opts usermem.IOO // Preconditions: Same as Task.CopyInIovecs. func (t *Task) IovecsIOSequence(addr hostarch.Addr, iovcnt int, opts usermem.IOOpts) (usermem.IOSequence, error) { if iovcnt < 0 || iovcnt > linux.UIO_MAXIOV { - return usermem.IOSequence{}, syserror.EINVAL + return usermem.IOSequence{}, linuxerr.EINVAL } ars, err := t.CopyInIovecs(addr, iovcnt) if err != nil { @@ -312,7 +313,7 @@ func (cc *taskCopyContext) getMemoryManager() (*mm.MemoryManager, error) { tmm := cc.t.MemoryManager() cc.t.mu.Unlock() if !tmm.IncUsers() { - return nil, syserror.EFAULT + return nil, linuxerr.EFAULT } return tmm, nil } diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go index 4566e4c7c..2eda15303 100644 --- a/pkg/sentry/kernel/thread_group.go +++ b/pkg/sentry/kernel/thread_group.go @@ -19,13 +19,13 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // A ThreadGroup is a logical grouping of tasks that has widespread @@ -143,7 +143,7 @@ type ThreadGroup struct { // // While exiting is false, exitStatus is protected by the signal mutex. // When exiting becomes true, exitStatus becomes immutable. - exitStatus ExitStatus + exitStatus linux.WaitStatus // terminationSignal is the signal that this thread group's leader will // send to its parent when it exits. @@ -357,7 +357,7 @@ func (tg *ThreadGroup) SetControllingTTY(tty *TTY, steal bool, isReadable bool) // "The calling process must be a session leader and not have a // controlling terminal already." - tty_ioctl(4) if tg.processGroup.session.leader != tg || tg.tty != nil { - return syserror.EINVAL + return linuxerr.EINVAL } creds := auth.CredentialsFromContext(tg.leader) @@ -371,7 +371,7 @@ func (tg *ThreadGroup) SetControllingTTY(tty *TTY, steal bool, isReadable bool) if tty.tg != nil && tg.processGroup.session != tty.tg.processGroup.session { // Stealing requires CAP_SYS_ADMIN in the root user namespace. if !hasAdmin || !steal { - return syserror.EPERM + return linuxerr.EPERM } // Steal the TTY away. Unlike TIOCNOTTY, don't send signals. for othertg := range tg.pidns.owner.Root.tgids { @@ -391,7 +391,7 @@ func (tg *ThreadGroup) SetControllingTTY(tty *TTY, steal bool, isReadable bool) } if !isReadable && !hasAdmin { - return syserror.EPERM + return linuxerr.EPERM } // Set the controlling terminal and foreground process group. @@ -419,7 +419,7 @@ func (tg *ThreadGroup) ReleaseControllingTTY(tty *TTY) error { if tg.tty == nil || tg.tty != tty { tg.signalHandlers.mu.Unlock() - return syserror.ENOTTY + return linuxerr.ENOTTY } // "If the process was session leader, then send SIGHUP and SIGCONT to @@ -473,7 +473,7 @@ func (tg *ThreadGroup) ForegroundProcessGroup(tty *TTY) (int32, error) { // "When fd does not refer to the controlling terminal of the calling // process, -1 is returned" - tcgetpgrp(3) if tg.tty != tty { - return -1, syserror.ENOTTY + return -1, linuxerr.ENOTTY } return int32(tg.processGroup.session.foreground.id), nil @@ -496,24 +496,24 @@ func (tg *ThreadGroup) SetForegroundProcessGroup(tty *TTY, pgid ProcessGroupID) // tty must be the controlling terminal. if tg.tty != tty { - return -1, syserror.ENOTTY + return -1, linuxerr.ENOTTY } // pgid must be positive. if pgid < 0 { - return -1, syserror.EINVAL + return -1, linuxerr.EINVAL } // pg must not be empty. Empty process groups are removed from their // pid namespaces. pg, ok := tg.pidns.processGroups[pgid] if !ok { - return -1, syserror.ESRCH + return -1, linuxerr.ESRCH } // pg must be part of this process's session. if tg.processGroup.session != pg.session { - return -1, syserror.EPERM + return -1, linuxerr.EPERM } tg.processGroup.session.foreground.id = pgid diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD index 2817aa3ba..e293d9a0f 100644 --- a/pkg/sentry/kernel/time/BUILD +++ b/pkg/sentry/kernel/time/BUILD @@ -13,8 +13,8 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/sync", - "//pkg/syserror", "//pkg/waiter", ], ) diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go index 26aa34aa6..191b92811 100644 --- a/pkg/sentry/kernel/time/time.go +++ b/pkg/sentry/kernel/time/time.go @@ -22,8 +22,8 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) @@ -322,7 +322,7 @@ func SettingFromSpec(value time.Duration, interval time.Duration, c Clock) (Sett // interpreted as a time relative to now. func SettingFromSpecAt(value time.Duration, interval time.Duration, now Time) (Setting, error) { if value < 0 { - return Setting{}, syserror.EINVAL + return Setting{}, linuxerr.EINVAL } if value == 0 { return Setting{Period: interval}, nil @@ -338,7 +338,7 @@ func SettingFromSpecAt(value time.Duration, interval time.Duration, now Time) (S // interpreted as an absolute time. func SettingFromAbsSpec(value Time, interval time.Duration) (Setting, error) { if value.Before(ZeroTime) { - return Setting{}, syserror.EINVAL + return Setting{}, linuxerr.EINVAL } if value.IsZero() { return Setting{Period: interval}, nil diff --git a/pkg/sentry/kernel/timekeeper_test.go b/pkg/sentry/kernel/timekeeper_test.go index dfc3c0719..b6039505a 100644 --- a/pkg/sentry/kernel/timekeeper_test.go +++ b/pkg/sentry/kernel/timekeeper_test.go @@ -17,12 +17,12 @@ package kernel import ( "testing" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/pgalloc" sentrytime "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/syserror" ) // mockClocks is a sentrytime.Clocks that simply returns the times in the @@ -45,7 +45,7 @@ func (c *mockClocks) GetTime(id sentrytime.ClockID) (int64, error) { case sentrytime.Realtime: return c.realtime, nil default: - return 0, syserror.EINVAL + return 0, linuxerr.EINVAL } } |