diff options
author | Kevin Krakauer <krakauer@google.com> | 2019-06-12 15:21:22 -0700 |
---|---|---|
committer | Kevin Krakauer <krakauer@google.com> | 2019-06-12 15:21:22 -0700 |
commit | 0bbbcafd68154e7c7b46692b84a39fb6bb5f1568 (patch) | |
tree | d8fba01ad76900715665b0418a786de2d77e2a05 /pkg/sentry/fs | |
parent | 06a83df533244dc2b3b8adfc1bf0608d3753c1d9 (diff) | |
parent | 70578806e8d3e01fae2249b3e602cd5b05d378a0 (diff) |
Merge branch 'master' into iptables-1-pkg
Change-Id: I7457a11de4725e1bf3811420c505d225b1cb6943
Diffstat (limited to 'pkg/sentry/fs')
-rw-r--r-- | pkg/sentry/fs/dirent.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/file.go | 24 | ||||
-rw-r--r-- | pkg/sentry/fs/gofer/socket.go | 16 | ||||
-rw-r--r-- | pkg/sentry/fs/host/socket.go | 73 | ||||
-rw-r--r-- | pkg/sentry/fs/host/socket_test.go | 156 | ||||
-rw-r--r-- | pkg/sentry/fs/inode.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/inode_overlay.go | 12 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/inode.go | 40 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/net.go | 34 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/task.go | 17 | ||||
-rw-r--r-- | pkg/sentry/fs/timerfd/timerfd.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/tmpfs/fs.go | 25 |
13 files changed, 162 insertions, 244 deletions
diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index c0bc261a2..a0a35c242 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -805,7 +805,7 @@ func (d *Dirent) Bind(ctx context.Context, root *Dirent, name string, data trans var childDir *Dirent err := d.genericCreate(ctx, root, name, func() error { var e error - childDir, e = d.Inode.Bind(ctx, name, data, perms) + childDir, e = d.Inode.Bind(ctx, d, name, data, perms) if e != nil { return e } diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index 8c1307235..f64954457 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -545,12 +545,28 @@ type lockedWriter struct { // Write implements io.Writer.Write. func (w *lockedWriter) Write(buf []byte) (int, error) { - n, err := w.File.FileOperations.Write(w.Ctx, w.File, usermem.BytesIOSequence(buf), w.File.offset) - return int(n), err + return w.WriteAt(buf, w.File.offset) } // WriteAt implements io.Writer.WriteAt. func (w *lockedWriter) WriteAt(buf []byte, offset int64) (int, error) { - n, err := w.File.FileOperations.Write(w.Ctx, w.File, usermem.BytesIOSequence(buf), offset) - return int(n), err + var ( + written int + err error + ) + // The io.Writer contract requires that Write writes all available + // bytes and does not return short writes. This causes errors with + // io.Copy, since our own Write interface does not have this same + // contract. Enforce that here. + for written < len(buf) { + var n int64 + n, err = w.File.FileOperations.Write(w.Ctx, w.File, usermem.BytesIOSequence(buf[written:]), offset+int64(written)) + if n > 0 { + written += int(n) + } + if err != nil { + break + } + } + return written, err } diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go index cbd5b9a84..7ac0a421f 100644 --- a/pkg/sentry/fs/gofer/socket.go +++ b/pkg/sentry/fs/gofer/socket.go @@ -15,6 +15,7 @@ package gofer import ( + "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/p9" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" @@ -61,13 +62,13 @@ type endpoint struct { path string } -func unixSockToP9(t transport.SockType) (p9.ConnectFlags, bool) { +func sockTypeToP9(t linux.SockType) (p9.ConnectFlags, bool) { switch t { - case transport.SockStream: + case linux.SOCK_STREAM: return p9.StreamSocket, true - case transport.SockSeqpacket: + case linux.SOCK_SEQPACKET: return p9.SeqpacketSocket, true - case transport.SockDgram: + case linux.SOCK_DGRAM: return p9.DgramSocket, true } return 0, false @@ -75,7 +76,7 @@ func unixSockToP9(t transport.SockType) (p9.ConnectFlags, bool) { // BidirectionalConnect implements ConnectableEndpoint.BidirectionalConnect. func (e *endpoint) BidirectionalConnect(ce transport.ConnectingEndpoint, returnConnect func(transport.Receiver, transport.ConnectedEndpoint)) *syserr.Error { - cf, ok := unixSockToP9(ce.Type()) + cf, ok := sockTypeToP9(ce.Type()) if !ok { return syserr.ErrConnectionRefused } @@ -139,3 +140,8 @@ func (e *endpoint) UnidirectionalConnect() (transport.ConnectedEndpoint, *syserr func (e *endpoint) Release() { e.inode.DecRef() } + +// Passcred implements transport.BoundEndpoint.Passcred. +func (e *endpoint) Passcred() bool { + return false +} diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go index 3ed137006..305eea718 100644 --- a/pkg/sentry/fs/host/socket.go +++ b/pkg/sentry/fs/host/socket.go @@ -15,9 +15,11 @@ package host import ( + "fmt" "sync" "syscall" + "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/fd" "gvisor.googlesource.com/gvisor/pkg/fdnotifier" "gvisor.googlesource.com/gvisor/pkg/log" @@ -51,25 +53,11 @@ type ConnectedEndpoint struct { // ref keeps track of references to a connectedEndpoint. ref refs.AtomicRefCount - // mu protects fd, readClosed and writeClosed. - mu sync.RWMutex `state:"nosave"` - - // file is an *fd.FD containing the FD backing this endpoint. It must be - // set to nil if it has been closed. - file *fd.FD `state:"nosave"` - - // readClosed is true if the FD has read shutdown or if it has been closed. - readClosed bool - - // writeClosed is true if the FD has write shutdown or if it has been - // closed. - writeClosed bool - // If srfd >= 0, it is the host FD that file was imported from. srfd int `state:"wait"` // stype is the type of Unix socket. - stype transport.SockType + stype linux.SockType // sndbuf is the size of the send buffer. // @@ -78,6 +66,13 @@ type ConnectedEndpoint struct { // prevent lots of small messages from filling the real send buffer // size on the host. sndbuf int `state:"nosave"` + + // mu protects the fields below. + mu sync.RWMutex `state:"nosave"` + + // file is an *fd.FD containing the FD backing this endpoint. It must be + // set to nil if it has been closed. + file *fd.FD `state:"nosave"` } // init performs initialization required for creating new ConnectedEndpoints and @@ -111,7 +106,7 @@ func (c *ConnectedEndpoint) init() *syserr.Error { return syserr.ErrInvalidEndpointState } - c.stype = transport.SockType(stype) + c.stype = linux.SockType(stype) c.sndbuf = sndbuf return nil @@ -169,7 +164,7 @@ func NewSocketWithDirent(ctx context.Context, d *fs.Dirent, f *fd.FD, flags fs.F ep := transport.NewExternal(e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e) - return unixsocket.NewWithDirent(ctx, d, ep, e.stype != transport.SockStream, flags), nil + return unixsocket.NewWithDirent(ctx, d, ep, e.stype, flags), nil } // newSocket allocates a new unix socket with host endpoint. @@ -201,16 +196,13 @@ func newSocket(ctx context.Context, orgfd int, saveable bool) (*fs.File, error) ep := transport.NewExternal(e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e) - return unixsocket.New(ctx, ep, e.stype != transport.SockStream), nil + return unixsocket.New(ctx, ep, e.stype), nil } // Send implements transport.ConnectedEndpoint.Send. func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (uintptr, bool, *syserr.Error) { c.mu.RLock() defer c.mu.RUnlock() - if c.writeClosed { - return 0, false, syserr.ErrClosedForSend - } if !controlMessages.Empty() { return 0, false, syserr.ErrInvalidEndpointState @@ -218,7 +210,7 @@ func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.Contro // Since stream sockets don't preserve message boundaries, we can write // only as much of the message as fits in the send buffer. - truncate := c.stype == transport.SockStream + truncate := c.stype == linux.SOCK_STREAM n, totalLen, err := fdWriteVec(c.file.FD(), data, c.sndbuf, truncate) if n < totalLen && err == nil { @@ -244,8 +236,13 @@ func (c *ConnectedEndpoint) SendNotify() {} // CloseSend implements transport.ConnectedEndpoint.CloseSend. func (c *ConnectedEndpoint) CloseSend() { c.mu.Lock() - c.writeClosed = true - c.mu.Unlock() + defer c.mu.Unlock() + + if err := syscall.Shutdown(c.file.FD(), syscall.SHUT_WR); err != nil { + // A well-formed UDS shutdown can't fail. See + // net/unix/af_unix.c:unix_shutdown. + panic(fmt.Sprintf("failed write shutdown on host socket %+v: %v", c, err)) + } } // CloseNotify implements transport.ConnectedEndpoint.CloseNotify. @@ -255,9 +252,7 @@ func (c *ConnectedEndpoint) CloseNotify() {} func (c *ConnectedEndpoint) Writable() bool { c.mu.RLock() defer c.mu.RUnlock() - if c.writeClosed { - return true - } + return fdnotifier.NonBlockingPoll(int32(c.file.FD()), waiter.EventOut)&waiter.EventOut != 0 } @@ -285,9 +280,6 @@ func (c *ConnectedEndpoint) EventUpdate() { func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights uintptr, peek bool) (uintptr, uintptr, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) { c.mu.RLock() defer c.mu.RUnlock() - if c.readClosed { - return 0, 0, transport.ControlMessages{}, false, tcpip.FullAddress{}, false, syserr.ErrClosedForReceive - } var cm unet.ControlMessage if numRights > 0 { @@ -344,31 +336,34 @@ func (c *ConnectedEndpoint) RecvNotify() {} // CloseRecv implements transport.Receiver.CloseRecv. func (c *ConnectedEndpoint) CloseRecv() { c.mu.Lock() - c.readClosed = true - c.mu.Unlock() + defer c.mu.Unlock() + + if err := syscall.Shutdown(c.file.FD(), syscall.SHUT_RD); err != nil { + // A well-formed UDS shutdown can't fail. See + // net/unix/af_unix.c:unix_shutdown. + panic(fmt.Sprintf("failed read shutdown on host socket %+v: %v", c, err)) + } } // Readable implements transport.Receiver.Readable. func (c *ConnectedEndpoint) Readable() bool { c.mu.RLock() defer c.mu.RUnlock() - if c.readClosed { - return true - } + return fdnotifier.NonBlockingPoll(int32(c.file.FD()), waiter.EventIn)&waiter.EventIn != 0 } // SendQueuedSize implements transport.Receiver.SendQueuedSize. func (c *ConnectedEndpoint) SendQueuedSize() int64 { - // SendQueuedSize isn't supported for host sockets because we don't allow the - // sentry to call ioctl(2). + // TODO(gvisor.dev/issue/273): SendQueuedSize isn't supported for host + // sockets because we don't allow the sentry to call ioctl(2). return -1 } // RecvQueuedSize implements transport.Receiver.RecvQueuedSize. func (c *ConnectedEndpoint) RecvQueuedSize() int64 { - // RecvQueuedSize isn't supported for host sockets because we don't allow the - // sentry to call ioctl(2). + // TODO(gvisor.dev/issue/273): RecvQueuedSize isn't supported for host + // sockets because we don't allow the sentry to call ioctl(2). return -1 } diff --git a/pkg/sentry/fs/host/socket_test.go b/pkg/sentry/fs/host/socket_test.go index 06392a65a..bc3ce5627 100644 --- a/pkg/sentry/fs/host/socket_test.go +++ b/pkg/sentry/fs/host/socket_test.go @@ -198,20 +198,6 @@ func TestListen(t *testing.T) { } } -func TestSend(t *testing.T) { - e := ConnectedEndpoint{writeClosed: true} - if _, _, err := e.Send(nil, transport.ControlMessages{}, tcpip.FullAddress{}); err != syserr.ErrClosedForSend { - t.Errorf("Got %#v.Send() = %v, want = %v", e, err, syserr.ErrClosedForSend) - } -} - -func TestRecv(t *testing.T) { - e := ConnectedEndpoint{readClosed: true} - if _, _, _, _, _, _, err := e.Recv(nil, false, 0, false); err != syserr.ErrClosedForReceive { - t.Errorf("Got %#v.Recv() = %v, want = %v", e, err, syserr.ErrClosedForReceive) - } -} - func TestPasscred(t *testing.T) { e := ConnectedEndpoint{} if got, want := e.Passcred(), false; got != want { @@ -244,20 +230,6 @@ func TestQueuedSize(t *testing.T) { } } -func TestReadable(t *testing.T) { - e := ConnectedEndpoint{readClosed: true} - if got, want := e.Readable(), true; got != want { - t.Errorf("Got %#v.Readable() = %t, want = %t", e, got, want) - } -} - -func TestWritable(t *testing.T) { - e := ConnectedEndpoint{writeClosed: true} - if got, want := e.Writable(), true; got != want { - t.Errorf("Got %#v.Writable() = %t, want = %t", e, got, want) - } -} - func TestRelease(t *testing.T) { f, err := syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, 0) if err != nil { @@ -272,131 +244,3 @@ func TestRelease(t *testing.T) { t.Errorf("got = %#v, want = %#v", c, want) } } - -func TestClose(t *testing.T) { - type testCase struct { - name string - cep *ConnectedEndpoint - addFD bool - f func() - want *ConnectedEndpoint - } - - var tests []testCase - - // nil is the value used by ConnectedEndpoint to indicate a closed file. - // Non-nil files are used to check if the file gets closed. - - f, err := syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, 0) - if err != nil { - t.Fatal("Creating socket:", err) - } - c := &ConnectedEndpoint{queue: &waiter.Queue{}, file: fd.New(f)} - tests = append(tests, testCase{ - name: "First CloseRecv", - cep: c, - addFD: false, - f: c.CloseRecv, - want: &ConnectedEndpoint{queue: c.queue, file: c.file, readClosed: true}, - }) - - f, err = syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, 0) - if err != nil { - t.Fatal("Creating socket:", err) - } - c = &ConnectedEndpoint{queue: &waiter.Queue{}, file: fd.New(f), readClosed: true} - tests = append(tests, testCase{ - name: "Second CloseRecv", - cep: c, - addFD: false, - f: c.CloseRecv, - want: &ConnectedEndpoint{queue: c.queue, file: c.file, readClosed: true}, - }) - - f, err = syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, 0) - if err != nil { - t.Fatal("Creating socket:", err) - } - c = &ConnectedEndpoint{queue: &waiter.Queue{}, file: fd.New(f)} - tests = append(tests, testCase{ - name: "First CloseSend", - cep: c, - addFD: false, - f: c.CloseSend, - want: &ConnectedEndpoint{queue: c.queue, file: c.file, writeClosed: true}, - }) - - f, err = syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, 0) - if err != nil { - t.Fatal("Creating socket:", err) - } - c = &ConnectedEndpoint{queue: &waiter.Queue{}, file: fd.New(f), writeClosed: true} - tests = append(tests, testCase{ - name: "Second CloseSend", - cep: c, - addFD: false, - f: c.CloseSend, - want: &ConnectedEndpoint{queue: c.queue, file: c.file, writeClosed: true}, - }) - - f, err = syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, 0) - if err != nil { - t.Fatal("Creating socket:", err) - } - c = &ConnectedEndpoint{queue: &waiter.Queue{}, file: fd.New(f), writeClosed: true} - tests = append(tests, testCase{ - name: "CloseSend then CloseRecv", - cep: c, - addFD: true, - f: c.CloseRecv, - want: &ConnectedEndpoint{queue: c.queue, file: c.file, readClosed: true, writeClosed: true}, - }) - - f, err = syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, 0) - if err != nil { - t.Fatal("Creating socket:", err) - } - c = &ConnectedEndpoint{queue: &waiter.Queue{}, file: fd.New(f), readClosed: true} - tests = append(tests, testCase{ - name: "CloseRecv then CloseSend", - cep: c, - addFD: true, - f: c.CloseSend, - want: &ConnectedEndpoint{queue: c.queue, file: c.file, readClosed: true, writeClosed: true}, - }) - - f, err = syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, 0) - if err != nil { - t.Fatal("Creating socket:", err) - } - c = &ConnectedEndpoint{queue: &waiter.Queue{}, file: fd.New(f), readClosed: true, writeClosed: true} - tests = append(tests, testCase{ - name: "Full close then CloseRecv", - cep: c, - addFD: false, - f: c.CloseRecv, - want: &ConnectedEndpoint{queue: c.queue, file: c.file, readClosed: true, writeClosed: true}, - }) - - f, err = syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, 0) - if err != nil { - t.Fatal("Creating socket:", err) - } - c = &ConnectedEndpoint{queue: &waiter.Queue{}, file: fd.New(f), readClosed: true, writeClosed: true} - tests = append(tests, testCase{ - name: "Full close then CloseSend", - cep: c, - addFD: false, - f: c.CloseSend, - want: &ConnectedEndpoint{queue: c.queue, file: c.file, readClosed: true, writeClosed: true}, - }) - - for _, test := range tests { - if test.addFD { - fdnotifier.AddFD(int32(test.cep.file.FD()), nil) - } - if test.f(); !reflect.DeepEqual(test.cep, test.want) { - t.Errorf("%s: got = %#v, want = %#v", test.name, test.cep, test.want) - } - } -} diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index aef1a1cb9..0b54c2e77 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -220,9 +220,9 @@ func (i *Inode) Rename(ctx context.Context, oldParent *Dirent, renamed *Dirent, } // Bind calls i.InodeOperations.Bind with i as the directory. -func (i *Inode) Bind(ctx context.Context, name string, data transport.BoundEndpoint, perm FilePermissions) (*Dirent, error) { +func (i *Inode) Bind(ctx context.Context, parent *Dirent, name string, data transport.BoundEndpoint, perm FilePermissions) (*Dirent, error) { if i.overlay != nil { - return overlayBind(ctx, i.overlay, name, data, perm) + return overlayBind(ctx, i.overlay, parent, name, data, perm) } return i.InodeOperations.Bind(ctx, i, name, data, perm) } diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index cdffe173b..06506fb20 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -398,14 +398,14 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena return nil } -func overlayBind(ctx context.Context, o *overlayEntry, name string, data transport.BoundEndpoint, perm FilePermissions) (*Dirent, error) { +func overlayBind(ctx context.Context, o *overlayEntry, parent *Dirent, name string, data transport.BoundEndpoint, perm FilePermissions) (*Dirent, error) { + if err := copyUp(ctx, parent); err != nil { + return nil, err + } + o.copyMu.RLock() defer o.copyMu.RUnlock() - // We do not support doing anything exciting with sockets unless there - // is already a directory in the upper filesystem. - if o.upper == nil { - return nil, syserror.EOPNOTSUPP - } + d, err := o.upper.InodeOperations.Bind(ctx, o.upper, name, data, perm) if err != nil { return nil, err diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index d19c360e0..1728fe0b5 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -45,6 +45,7 @@ go_library( "//pkg/sentry/kernel/time", "//pkg/sentry/limits", "//pkg/sentry/mm", + "//pkg/sentry/socket", "//pkg/sentry/socket/rpcinet", "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", diff --git a/pkg/sentry/fs/proc/inode.go b/pkg/sentry/fs/proc/inode.go index 379569823..986bc0a45 100644 --- a/pkg/sentry/fs/proc/inode.go +++ b/pkg/sentry/fs/proc/inode.go @@ -21,11 +21,14 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.googlesource.com/gvisor/pkg/sentry/mm" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" ) // taskOwnedInodeOps wraps an fs.InodeOperations and overrides the UnstableAttr -// method to return the task as the owner. +// method to return either the task or root as the owner, depending on the +// task's dumpability. // // +stateify savable type taskOwnedInodeOps struct { @@ -41,9 +44,42 @@ func (i *taskOwnedInodeOps) UnstableAttr(ctx context.Context, inode *fs.Inode) ( if err != nil { return fs.UnstableAttr{}, err } - // Set the task owner as the file owner. + + // By default, set the task owner as the file owner. creds := i.t.Credentials() uattr.Owner = fs.FileOwner{creds.EffectiveKUID, creds.EffectiveKGID} + + // Linux doesn't apply dumpability adjustments to world + // readable/executable directories so that applications can stat + // /proc/PID to determine the effective UID of a process. See + // fs/proc/base.c:task_dump_owner. + if fs.IsDir(inode.StableAttr) && uattr.Perms == fs.FilePermsFromMode(0555) { + return uattr, nil + } + + // If the task is not dumpable, then root (in the namespace preferred) + // owns the file. + var m *mm.MemoryManager + i.t.WithMuLocked(func(t *kernel.Task) { + m = t.MemoryManager() + }) + + if m == nil { + uattr.Owner.UID = auth.RootKUID + uattr.Owner.GID = auth.RootKGID + } else if m.Dumpability() != mm.UserDumpable { + if kuid := creds.UserNamespace.MapToKUID(auth.RootUID); kuid.Ok() { + uattr.Owner.UID = kuid + } else { + uattr.Owner.UID = auth.RootKUID + } + if kgid := creds.UserNamespace.MapToKGID(auth.RootGID); kgid.Ok() { + uattr.Owner.GID = kgid + } else { + uattr.Owner.GID = auth.RootKGID + } + } + return uattr, nil } diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index 4a107c739..034950158 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -27,6 +27,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" "gvisor.googlesource.com/gvisor/pkg/sentry/inet" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/socket" "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix" "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" ) @@ -213,17 +214,18 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s fmt.Fprintf(&buf, "Num RefCount Protocol Flags Type St Inode Path\n") // Entries - for _, sref := range n.k.ListSockets(linux.AF_UNIX) { - s := sref.Get() + for _, se := range n.k.ListSockets() { + s := se.Sock.Get() if s == nil { - log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", sref) + log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", se.Sock) continue } sfile := s.(*fs.File) - sops, ok := sfile.FileOperations.(*unix.SocketOperations) - if !ok { - panic(fmt.Sprintf("Found non-unix socket file in unix socket table: %+v", sfile)) + if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX { + // Not a unix socket. + continue } + sops := sfile.FileOperations.(*unix.SocketOperations) addr, err := sops.Endpoint().GetLocalAddress() if err != nil { @@ -240,24 +242,6 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s } } - var sockState int - switch sops.Endpoint().Type() { - case linux.SOCK_DGRAM: - sockState = linux.SS_CONNECTING - // Unlike Linux, we don't have unbound connection-less sockets, - // so no SS_DISCONNECTING. - - case linux.SOCK_SEQPACKET: - fallthrough - case linux.SOCK_STREAM: - // Connectioned. - if sops.Endpoint().(transport.ConnectingEndpoint).Connected() { - sockState = linux.SS_CONNECTED - } else { - sockState = linux.SS_UNCONNECTED - } - } - // In the socket entry below, the value for the 'Num' field requires // some consideration. Linux prints the address to the struct // unix_sock representing a socket in the kernel, but may redact the @@ -282,7 +266,7 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s 0, // Protocol, always 0 for UDS. sockFlags, // Flags. sops.Endpoint().Type(), // Type. - sockState, // State. + sops.State(), // State. sfile.InodeID(), // Inode. ) diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 77e03d349..21a965f90 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -96,7 +96,7 @@ func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, showSubtasks boo contents["cgroup"] = newCGroupInode(t, msrc, p.cgroupControllers) } - // TODO(b/31916171): Set EUID/EGID based on dumpability. + // N.B. taskOwnedInodeOps enforces dumpability-based ownership. d := &taskDir{ Dir: *ramfs.NewDir(t, contents, fs.RootOwner, fs.FilePermsFromMode(0555)), t: t, @@ -667,6 +667,21 @@ func newComm(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { return newProcInode(c, msrc, fs.SpecialFile, t) } +// Check implements fs.InodeOperations.Check. +func (c *comm) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool { + // This file can always be read or written by members of the same + // thread group. See fs/proc/base.c:proc_tid_comm_permission. + // + // N.B. This check is currently a no-op as we don't yet support writing + // and this file is world-readable anyways. + t := kernel.TaskFromContext(ctx) + if t != nil && t.ThreadGroup() == c.t.ThreadGroup() && !p.Execute { + return true + } + + return fs.ContextCanAccessFile(ctx, inode, p) +} + // GetFile implements fs.InodeOperations.GetFile. func (c *comm) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { return fs.NewFile(ctx, dirent, flags, &commFile{t: c.t}), nil diff --git a/pkg/sentry/fs/timerfd/timerfd.go b/pkg/sentry/fs/timerfd/timerfd.go index bce5f091d..c1721f434 100644 --- a/pkg/sentry/fs/timerfd/timerfd.go +++ b/pkg/sentry/fs/timerfd/timerfd.go @@ -54,6 +54,8 @@ type TimerOperations struct { // NewFile returns a timerfd File that receives time from c. func NewFile(ctx context.Context, c ktime.Clock) *fs.File { dirent := fs.NewDirent(anon.NewInode(ctx), "anon_inode:[timerfd]") + // Release the initial dirent reference after NewFile takes a reference. + defer dirent.DecRef() tops := &TimerOperations{} tops.timer = ktime.NewTimer(c, tops) // Timerfds reject writes, but the Write flag must be set in order to diff --git a/pkg/sentry/fs/tmpfs/fs.go b/pkg/sentry/fs/tmpfs/fs.go index b7c29a4d1..83e1bf247 100644 --- a/pkg/sentry/fs/tmpfs/fs.go +++ b/pkg/sentry/fs/tmpfs/fs.go @@ -34,6 +34,16 @@ const ( // GID for the root directory. rootGIDKey = "gid" + // cacheKey sets the caching policy for the mount. + cacheKey = "cache" + + // cacheAll uses the virtual file system cache for everything (default). + cacheAll = "cache" + + // cacheRevalidate allows dirents to be cached, but revalidates them on each + // lookup. + cacheRevalidate = "revalidate" + // TODO(edahlgren/mpratt): support a tmpfs size limit. // size = "size" @@ -122,15 +132,24 @@ func (f *Filesystem) Mount(ctx context.Context, device string, flags fs.MountSou delete(options, rootGIDKey) } + // Construct a mount which will follow the cache options provided. + var msrc *fs.MountSource + switch options[cacheKey] { + case "", cacheAll: + msrc = fs.NewCachingMountSource(f, flags) + case cacheRevalidate: + msrc = fs.NewRevalidatingMountSource(f, flags) + default: + return nil, fmt.Errorf("invalid cache policy option %q", options[cacheKey]) + } + delete(options, cacheKey) + // Fail if the caller passed us more options than we can parse. They may be // expecting us to set something we can't set. if len(options) > 0 { return nil, fmt.Errorf("unsupported mount options: %v", options) } - // Construct a mount which will cache dirents. - msrc := fs.NewCachingMountSource(f, flags) - // Construct the tmpfs root. return NewDir(ctx, nil, owner, perms, msrc), nil } |