diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/sentry/control/BUILD | 3 | ||||
-rw-r--r-- | pkg/sentry/control/proc.go | 98 | ||||
-rw-r--r-- | pkg/sentry/fdimport/BUILD | 19 | ||||
-rw-r--r-- | pkg/sentry/fdimport/fdimport.go | 129 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/host/host.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/host/tty.go | 26 | ||||
-rw-r--r-- | pkg/sentry/kernel/timekeeper.go | 19 | ||||
-rw-r--r-- | pkg/sentry/vfs/context.go | 24 | ||||
-rw-r--r-- | pkg/sentry/vfs/epoll.go | 1 |
9 files changed, 246 insertions, 75 deletions
diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD index d16d78aa5..e74275d2d 100644 --- a/pkg/sentry/control/BUILD +++ b/pkg/sentry/control/BUILD @@ -20,9 +20,11 @@ go_library( "//pkg/fd", "//pkg/fspath", "//pkg/log", + "//pkg/sentry/fdimport", "//pkg/sentry/fs", "//pkg/sentry/fs/host", "//pkg/sentry/fsbridge", + "//pkg/sentry/fsimpl/host", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/time", @@ -36,6 +38,7 @@ go_library( "//pkg/syserror", "//pkg/tcpip/link/sniffer", "//pkg/urpc", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index b51fb3959..2ed17ee09 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -24,13 +24,16 @@ import ( "text/tabwriter" "time" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/fdimport" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/host" "gvisor.dev/gvisor/pkg/sentry/fsbridge" + hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" @@ -84,15 +87,13 @@ type ExecArgs struct { // the root group if not set explicitly. KGID auth.KGID - // ExtraKGIDs is the list of additional groups to which the user - // belongs. + // ExtraKGIDs is the list of additional groups to which the user belongs. ExtraKGIDs []auth.KGID // Capabilities is the list of capabilities to give to the process. Capabilities *auth.TaskCapabilities - // StdioIsPty indicates that FDs 0, 1, and 2 are connected to a host - // pty FD. + // StdioIsPty indicates that FDs 0, 1, and 2 are connected to a host pty FD. StdioIsPty bool // FilePayload determines the files to give to the new process. @@ -117,7 +118,7 @@ func (args ExecArgs) String() string { // Exec runs a new task. func (proc *Proc) Exec(args *ExecArgs, waitStatus *uint32) error { - newTG, _, _, err := proc.execAsync(args) + newTG, _, _, _, err := proc.execAsync(args) if err != nil { return err } @@ -130,26 +131,18 @@ func (proc *Proc) Exec(args *ExecArgs, waitStatus *uint32) error { // ExecAsync runs a new task, but doesn't wait for it to finish. It is defined // as a function rather than a method to avoid exposing execAsync as an RPC. -func ExecAsync(proc *Proc, args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, error) { +func ExecAsync(proc *Proc, args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { return proc.execAsync(args) } // execAsync runs a new task, but doesn't wait for it to finish. It returns the // newly created thread group and its PID. If the stdio FDs are TTYs, then a // TTYFileOperations that wraps the TTY is also returned. -func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, error) { +func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { // Import file descriptors. fdTable := proc.Kernel.NewFDTable() defer fdTable.DecRef() - // No matter what happens, we should close all files in the FilePayload - // before returning. Any files that are imported will be duped. - defer func() { - for _, f := range args.FilePayload.Files { - f.Close() - } - }() - creds := auth.NewUserCredentials( args.KUID, args.KGID, @@ -202,7 +195,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI vfsObj := proc.Kernel.VFS() file, err := ResolveExecutablePath(ctx, vfsObj, initArgs.WorkingDirectory, initArgs.Argv[0], paths) if err != nil { - return nil, 0, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err) + return nil, 0, nil, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err) } initArgs.File = fsbridge.NewVFSFile(file) } else { @@ -214,74 +207,57 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI // initArgs must hold a reference on MountNamespace, which will // be donated to the new process in CreateProcess. - initArgs.MountNamespaceVFS2.IncRef() + initArgs.MountNamespace.IncRef() } f, err := initArgs.MountNamespace.ResolveExecutablePath(ctx, initArgs.WorkingDirectory, initArgs.Argv[0], paths) if err != nil { - return nil, 0, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err) + return nil, 0, nil, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err) } initArgs.Filename = f } } - // TODO(gvisor.dev/issue/1623): Use host FD when supported in VFS2. - var ttyFile *fs.File - for appFD, hostFile := range args.FilePayload.Files { - var appFile *fs.File - - if args.StdioIsPty && appFD < 3 { - // Import the file as a host TTY file. - if ttyFile == nil { - var err error - appFile, err = host.ImportFile(ctx, int(hostFile.Fd()), true /* isTTY */) - if err != nil { - return nil, 0, nil, err - } - defer appFile.DecRef() - - // Remember this in the TTY file, as we will - // use it for the other stdio FDs. - ttyFile = appFile - } else { - // Re-use the existing TTY file, as all three - // stdio FDs must point to the same fs.File in - // order to share TTY state, specifically the - // foreground process group id. - appFile = ttyFile - } - } else { - // Import the file as a regular host file. - var err error - appFile, err = host.ImportFile(ctx, int(hostFile.Fd()), false /* isTTY */) + fds := make([]int, len(args.FilePayload.Files)) + for i, file := range args.FilePayload.Files { + if kernel.VFS2Enabled { + // Need to dup to remove ownership from os.File. + dup, err := unix.Dup(int(file.Fd())) if err != nil { - return nil, 0, nil, err + return nil, 0, nil, nil, fmt.Errorf("duplicating payload files: %w", err) } - defer appFile.DecRef() + fds[i] = dup + } else { + // VFS1 dups the file on import. + fds[i] = int(file.Fd()) } - - // Add the file to the FD map. - if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil { - return nil, 0, nil, err + } + ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, args.StdioIsPty, fds) + if err != nil { + if kernel.VFS2Enabled { + for _, fd := range fds { + unix.Close(fd) + } } + return nil, 0, nil, nil, err } tg, tid, err := proc.Kernel.CreateProcess(initArgs) if err != nil { - return nil, 0, nil, err + return nil, 0, nil, nil, err } - var ttyFileOps *host.TTYFileOperations - if ttyFile != nil { - // Set the foreground process group on the TTY before starting - // the process. - ttyFileOps = ttyFile.FileOperations.(*host.TTYFileOperations) - ttyFileOps.InitForegroundProcessGroup(tg.ProcessGroup()) + // Set the foreground process group on the TTY before starting the process. + switch { + case ttyFile != nil: + ttyFile.InitForegroundProcessGroup(tg.ProcessGroup()) + case ttyFileVFS2 != nil: + ttyFileVFS2.InitForegroundProcessGroup(tg.ProcessGroup()) } // Start the newly created process. proc.Kernel.StartProcess(tg) - return tg, tid, ttyFileOps, nil + return tg, tid, ttyFile, ttyFileVFS2, nil } // PsArgs is the set of arguments to ps. diff --git a/pkg/sentry/fdimport/BUILD b/pkg/sentry/fdimport/BUILD new file mode 100644 index 000000000..5e41ceb4e --- /dev/null +++ b/pkg/sentry/fdimport/BUILD @@ -0,0 +1,19 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "fdimport", + srcs = [ + "fdimport.go", + ], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/context", + "//pkg/sentry/fs", + "//pkg/sentry/fs/host", + "//pkg/sentry/fsimpl/host", + "//pkg/sentry/kernel", + "//pkg/sentry/vfs", + ], +) diff --git a/pkg/sentry/fdimport/fdimport.go b/pkg/sentry/fdimport/fdimport.go new file mode 100644 index 000000000..a4199f9e9 --- /dev/null +++ b/pkg/sentry/fdimport/fdimport.go @@ -0,0 +1,129 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fdimport + +import ( + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/host" + hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +// Import imports a slice of FDs into the given FDTable. If console is true, +// sets up TTY for the first 3 FDs in the slice representing stdin, stdout, +// stderr. Upon success, Import takes ownership of all FDs. +func Import(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []int) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { + if kernel.VFS2Enabled { + ttyFile, err := importVFS2(ctx, fdTable, console, fds) + return nil, ttyFile, err + } + ttyFile, err := importFS(ctx, fdTable, console, fds) + return ttyFile, nil, err +} + +func importFS(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []int) (*host.TTYFileOperations, error) { + var ttyFile *fs.File + for appFD, hostFD := range fds { + var appFile *fs.File + + if console && appFD < 3 { + // Import the file as a host TTY file. + if ttyFile == nil { + var err error + appFile, err = host.ImportFile(ctx, hostFD, true /* isTTY */) + if err != nil { + return nil, err + } + defer appFile.DecRef() + + // Remember this in the TTY file, as we will + // use it for the other stdio FDs. + ttyFile = appFile + } else { + // Re-use the existing TTY file, as all three + // stdio FDs must point to the same fs.File in + // order to share TTY state, specifically the + // foreground process group id. + appFile = ttyFile + } + } else { + // Import the file as a regular host file. + var err error + appFile, err = host.ImportFile(ctx, hostFD, false /* isTTY */) + if err != nil { + return nil, err + } + defer appFile.DecRef() + } + + // Add the file to the FD map. + if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil { + return nil, err + } + } + + if ttyFile == nil { + return nil, nil + } + return ttyFile.FileOperations.(*host.TTYFileOperations), nil +} + +func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdioFDs []int) (*hostvfs2.TTYFileDescription, error) { + k := kernel.KernelFromContext(ctx) + + var ttyFile *vfs.FileDescription + for appFD, hostFD := range stdioFDs { + var appFile *vfs.FileDescription + + if console && appFD < 3 { + // Import the file as a host TTY file. + if ttyFile == nil { + var err error + appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD, true /* isTTY */) + if err != nil { + return nil, err + } + defer appFile.DecRef() + + // Remember this in the TTY file, as we will use it for the other stdio + // FDs. + ttyFile = appFile + } else { + // Re-use the existing TTY file, as all three stdio FDs must point to + // the same fs.File in order to share TTY state, specifically the + // foreground process group id. + appFile = ttyFile + } + } else { + var err error + appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD, false /* isTTY */) + if err != nil { + return nil, err + } + defer appFile.DecRef() + } + + if err := fdTable.NewFDAtVFS2(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil { + return nil, err + } + } + + if ttyFile == nil { + return nil, nil + } + return ttyFile.Impl().(*hostvfs2.TTYFileDescription), nil +} diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 34fbc69af..2be498afc 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -422,7 +422,7 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount) (*vfs.F // TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that // we don't allow importing arbitrary file types without proper support. if i.isTTY { - fd := &ttyFD{ + fd := &TTYFileDescription{ fileDescription: fileDescription{inode: i}, termios: linux.DefaultSlaveTermios, } diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go index 8936afb06..68af6e5af 100644 --- a/pkg/sentry/fsimpl/host/tty.go +++ b/pkg/sentry/fsimpl/host/tty.go @@ -26,15 +26,15 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -// ttyFD implements vfs.FileDescriptionImpl for a host file descriptor -// that wraps a TTY FD. -type ttyFD struct { +// TTYFileDescription implements vfs.FileDescriptionImpl for a host file +// descriptor that wraps a TTY FD. +type TTYFileDescription struct { fileDescription // mu protects the fields below. mu sync.Mutex `state:"nosave"` - // session is the session attached to this ttyFD. + // session is the session attached to this TTYFileDescription. session *kernel.Session // fgProcessGroup is the foreground process group that is currently @@ -48,7 +48,7 @@ type ttyFD struct { // InitForegroundProcessGroup sets the foreground process group and session for // the TTY. This should only be called once, after the foreground process group // has been created, but before it has started running. -func (t *ttyFD) InitForegroundProcessGroup(pg *kernel.ProcessGroup) { +func (t *TTYFileDescription) InitForegroundProcessGroup(pg *kernel.ProcessGroup) { t.mu.Lock() defer t.mu.Unlock() if t.fgProcessGroup != nil { @@ -59,14 +59,14 @@ func (t *ttyFD) InitForegroundProcessGroup(pg *kernel.ProcessGroup) { } // ForegroundProcessGroup returns the foreground process for the TTY. -func (t *ttyFD) ForegroundProcessGroup() *kernel.ProcessGroup { +func (t *TTYFileDescription) ForegroundProcessGroup() *kernel.ProcessGroup { t.mu.Lock() defer t.mu.Unlock() return t.fgProcessGroup } // Release implements fs.FileOperations.Release. -func (t *ttyFD) Release() { +func (t *TTYFileDescription) Release() { t.mu.Lock() t.fgProcessGroup = nil t.mu.Unlock() @@ -78,7 +78,7 @@ func (t *ttyFD) Release() { // // Reading from a TTY is only allowed for foreground process groups. Background // process groups will either get EIO or a SIGTTIN. -func (t *ttyFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { +func (t *TTYFileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { t.mu.Lock() defer t.mu.Unlock() @@ -96,7 +96,7 @@ func (t *ttyFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, // // Reading from a TTY is only allowed for foreground process groups. Background // process groups will either get EIO or a SIGTTIN. -func (t *ttyFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { +func (t *TTYFileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { t.mu.Lock() defer t.mu.Unlock() @@ -111,7 +111,7 @@ func (t *ttyFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadO } // PWrite implements vfs.FileDescriptionImpl. -func (t *ttyFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { +func (t *TTYFileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { t.mu.Lock() defer t.mu.Unlock() @@ -126,7 +126,7 @@ func (t *ttyFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64 } // Write implements vfs.FileDescriptionImpl. -func (t *ttyFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { +func (t *TTYFileDescription) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { t.mu.Lock() defer t.mu.Unlock() @@ -141,7 +141,7 @@ func (t *ttyFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.Writ } // Ioctl implements vfs.FileDescriptionImpl. -func (t *ttyFD) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { // Ignore arg[0]. This is the real FD: fd := t.inode.hostFD ioctl := args[1].Uint64() @@ -321,7 +321,7 @@ func (t *ttyFD) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArgum // is a bit convoluted, but documented inline. // // Preconditions: t.mu must be held. -func (t *ttyFD) checkChange(ctx context.Context, sig linux.Signal) error { +func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal) error { task := kernel.TaskFromContext(ctx) if task == nil { // No task? Linux does not have an analog for this case, but diff --git a/pkg/sentry/kernel/timekeeper.go b/pkg/sentry/kernel/timekeeper.go index dc99301de..da0ea7bb5 100644 --- a/pkg/sentry/kernel/timekeeper.go +++ b/pkg/sentry/kernel/timekeeper.go @@ -16,6 +16,7 @@ package kernel import ( "fmt" + "sync/atomic" "time" "gvisor.dev/gvisor/pkg/log" @@ -48,6 +49,9 @@ type Timekeeper struct { // It is set only once, by SetClocks. monotonicOffset int64 `state:"nosave"` + // monotonicLowerBound is the lowerBound for monotonic time. + monotonicLowerBound int64 `state:"nosave"` + // restored, if non-nil, indicates that this Timekeeper was restored // from a state file. The clocks are not set until restored is closed. restored chan struct{} `state:"nosave"` @@ -271,6 +275,21 @@ func (t *Timekeeper) GetTime(c sentrytime.ClockID) (int64, error) { now, err := t.clocks.GetTime(c) if err == nil && c == sentrytime.Monotonic { now += t.monotonicOffset + for { + // It's possible that the clock is shaky. This may be due to + // platform issues, e.g. the KVM platform relies on the guest + // TSC and host TSC, which may not be perfectly in sync. To + // work around this issue, ensure that the monotonic time is + // always bounded by the last time read. + oldLowerBound := atomic.LoadInt64(&t.monotonicLowerBound) + if now < oldLowerBound { + now = oldLowerBound + break + } + if atomic.CompareAndSwapInt64(&t.monotonicLowerBound, oldLowerBound, now) { + break + } + } } return now, err } diff --git a/pkg/sentry/vfs/context.go b/pkg/sentry/vfs/context.go index 82781e6d3..c9e724fef 100644 --- a/pkg/sentry/vfs/context.go +++ b/pkg/sentry/vfs/context.go @@ -49,3 +49,27 @@ func RootFromContext(ctx context.Context) VirtualDentry { } return VirtualDentry{} } + +type rootContext struct { + context.Context + root VirtualDentry +} + +// WithRoot returns a copy of ctx with the given root. +func WithRoot(ctx context.Context, root VirtualDentry) context.Context { + return &rootContext{ + Context: ctx, + root: root, + } +} + +// Value implements Context.Value. +func (rc rootContext) Value(key interface{}) interface{} { + switch key { + case CtxRoot: + rc.root.IncRef() + return rc.root + default: + return rc.Context.Value(key) + } +} diff --git a/pkg/sentry/vfs/epoll.go b/pkg/sentry/vfs/epoll.go index 8e0b40841..8297f964b 100644 --- a/pkg/sentry/vfs/epoll.go +++ b/pkg/sentry/vfs/epoll.go @@ -192,6 +192,7 @@ func (ep *EpollInstance) AddInterest(file *FileDescription, num int32, event lin mask: mask, userData: event.Data, } + epi.waiter.Callback = epi ep.interest[key] = epi wmask := waiter.EventMaskFromLinux(mask) file.EventRegister(&epi.waiter, wmask) |