diff options
Diffstat (limited to 'pkg/sentry/fs')
-rw-r--r-- | pkg/sentry/fs/g3doc/fuse.md | 99 | ||||
-rw-r--r-- | pkg/sentry/fs/host/BUILD | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/host/socket_unsafe.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/host/tty.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/inode.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/inode_overlay.go | 11 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/sys_net.go | 120 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/sys_net_state.go | 15 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/sys_net_test.go | 73 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/task.go | 46 | ||||
-rw-r--r-- | pkg/sentry/fs/tty/BUILD | 3 | ||||
-rw-r--r-- | pkg/sentry/fs/tty/dir.go | 46 | ||||
-rw-r--r-- | pkg/sentry/fs/tty/fs.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/tty/line_discipline.go | 55 | ||||
-rw-r--r-- | pkg/sentry/fs/tty/master.go | 37 | ||||
-rw-r--r-- | pkg/sentry/fs/tty/queue.go | 14 | ||||
-rw-r--r-- | pkg/sentry/fs/tty/replica.go (renamed from pkg/sentry/fs/tty/slave.go) | 88 | ||||
-rw-r--r-- | pkg/sentry/fs/tty/terminal.go | 39 | ||||
-rw-r--r-- | pkg/sentry/fs/tty/tty_test.go | 4 |
20 files changed, 509 insertions, 158 deletions
diff --git a/pkg/sentry/fs/g3doc/fuse.md b/pkg/sentry/fs/g3doc/fuse.md index 2ca84dd74..05e043583 100644 --- a/pkg/sentry/fs/g3doc/fuse.md +++ b/pkg/sentry/fs/g3doc/fuse.md @@ -79,7 +79,7 @@ ops can be implemented in parallel. - Implement `/dev/fuse` - a character device used to establish an FD for communication between the sentry and the server daemon. -- Implement basic FUSE ops like `FUSE_INIT`, `FUSE_DESTROY`. +- Implement basic FUSE ops like `FUSE_INIT`. #### Read-only mount with basic file operations @@ -95,6 +95,103 @@ ops can be implemented in parallel. - Implement the remaining FUSE ops and decide if we can omit rarely used operations like ioctl. +### Design Details + +#### Lifecycle for a FUSE Request + +- User invokes a syscall +- Sentry prepares corresponding request + - If FUSE device is available + - Write the request in binary + - If FUSE device is full + - Kernel task blocked until available +- Sentry notifies the readers of fuse device that it's ready for read +- FUSE daemon reads the request and processes it +- Sentry waits until a reply is written to the FUSE device + - but returns directly for async requests +- FUSE daemon writes to the fuse device +- Sentry processes the reply + - For sync requests, unblock blocked kernel task + - For async requests, execute pre-specified callback if any +- Sentry returns the syscall to the user + +#### Channels and Queues for Requests in Different Stages + +`connection.initializedChan` + +- a channel that the requests issued before connection initialization blocks + on. + +`fd.queue` + +- a queue of requests that haven’t been read by the FUSE daemon yet. + +`fd.completions` + +- a map of the requests that have been prepared but not yet received a + response, including the ones on the `fd.queue`. + +`fd.waitQueue` + +- a queue of waiters that is waiting for the fuse device fd to be available, + such as the FUSE daemon. + +`fd.fullQueueCh` + +- a channel that the kernel task will be blocked on when the fd is not + available. + +#### Basic I/O Implementation + +Currently we have implemented basic functionalities of read and write for our +FUSE. We describe the design and ways to improve it here: + +##### Basic FUSE Read + +The vfs2 expects implementations of `vfs.FileDescriptionImpl.Read()` and +`vfs.FileDescriptionImpl.PRead()`. When a syscall is made, it will eventually +reach our implementation of those interface functions located at +`pkg/sentry/fsimpl/fuse/regular_file.go` for regular files. + +After validation checks of the input, sentry sends `FUSE_READ` requests to the +FUSE daemon. The FUSE daemon returns data after the `fuse_out_header` as the +responses. For the first version, we create a copy in kernel memory of those +data. They are represented as a byte slice in the marshalled struct. This +happens as a common process for all the FUSE responses at this moment at +`pkg/sentry/fsimpl/fuse/dev.go:writeLocked()`. We then directly copy from this +intermediate buffer to the input buffer provided by the read syscall. + +There is an extra requirement for FUSE: When mounting the FUSE fs, the mounter +or the FUSE daemon can specify a `max_read` or a `max_pages` parameter. They are +the upperbound of the bytes to read in each `FUSE_READ` request. We implemented +the code to handle the fragmented reads. + +To improve the performance: ideally we should have buffer cache to copy those +data from the responses of FUSE daemon into, as is also the design of several +other existing file system implementations for sentry, instead of a single-use +temporary buffer. Directly mapping the memory of one process to another could +also boost the performance, but to keep them isolated, we did not choose to do +so. + +##### Basic FUSE Write + +The vfs2 invokes implementations of `vfs.FileDescriptionImpl.Write()` and +`vfs.FileDescriptionImpl.PWrite()` on the regular file descriptor of FUSE when a +user makes write(2) and pwrite(2) syscall. + +For valid writes, sentry sends the bytes to write after a `FUSE_WRITE` header +(can be regarded as a request with 2 payloads) to the FUSE daemon. For the first +version, we allocate a buffer inside kernel memory to store the bytes from the +user, and copy directly from that buffer to the memory of FUSE daemon. This +happens at `pkg/sentry/fsimpl/fuse/dev.go:readLocked()` + +The parameters `max_write` and `max_pages` restrict the number of bytes in one +`FUSE_WRITE`. There are code handling fragmented writes in current +implementation. + +To have better performance: the extra copy created to store the bytes to write +can be replaced by the buffer cache as well. + # Appendix ## FUSE Protocol diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index 42a6c41c2..1368014c4 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -32,6 +32,7 @@ go_library( "//pkg/fdnotifier", "//pkg/iovec", "//pkg/log", + "//pkg/marshal/primitive", "//pkg/refs", "//pkg/safemem", "//pkg/secio", @@ -55,7 +56,6 @@ go_library( "//pkg/unet", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/primitive", ], ) diff --git a/pkg/sentry/fs/host/socket_unsafe.go b/pkg/sentry/fs/host/socket_unsafe.go index 5d4f312cf..c8231e0aa 100644 --- a/pkg/sentry/fs/host/socket_unsafe.go +++ b/pkg/sentry/fs/host/socket_unsafe.go @@ -65,10 +65,10 @@ func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int64) ( controlTrunc = msg.Flags&syscall.MSG_CTRUNC == syscall.MSG_CTRUNC if n > length { - return length, n, msg.Controllen, controlTrunc, err + return length, n, msg.Controllen, controlTrunc, nil } - return n, n, msg.Controllen, controlTrunc, err + return n, n, msg.Controllen, controlTrunc, nil } // fdWriteVec sends from bufs to fd. diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index 67a807f9d..1183727ab 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -17,6 +17,7 @@ package host import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -24,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // LINT.IfChange @@ -54,7 +54,7 @@ type TTYFileOperations struct { func newTTYFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags, iops *inodeOperations) *fs.File { return fs.NewFile(ctx, dirent, flags, &TTYFileOperations{ fileOperations: fileOperations{iops: iops}, - termios: linux.DefaultSlaveTermios, + termios: linux.DefaultReplicaTermios, }) } diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index b79cd9877..004910453 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -270,7 +270,7 @@ func (i *Inode) GetXattr(ctx context.Context, name string, size uint64) (string, // SetXattr calls i.InodeOperations.SetXattr with i as the Inode. func (i *Inode) SetXattr(ctx context.Context, d *Dirent, name, value string, flags uint32) error { if i.overlay != nil { - return overlaySetxattr(ctx, i.overlay, d, name, value, flags) + return overlaySetXattr(ctx, i.overlay, d, name, value, flags) } return i.InodeOperations.SetXattr(ctx, i, name, value, flags) } diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index dc2e353d9..b16ab08ba 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -16,7 +16,6 @@ package fs import ( "fmt" - "strings" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" @@ -539,7 +538,7 @@ func overlayGetXattr(ctx context.Context, o *overlayEntry, name string, size uin // Don't forward the value of the extended attribute if it would // unexpectedly change the behavior of a wrapping overlay layer. - if strings.HasPrefix(XattrOverlayPrefix, name) { + if isXattrOverlay(name) { return "", syserror.ENODATA } @@ -553,9 +552,9 @@ func overlayGetXattr(ctx context.Context, o *overlayEntry, name string, size uin return s, err } -func overlaySetxattr(ctx context.Context, o *overlayEntry, d *Dirent, name, value string, flags uint32) error { +func overlaySetXattr(ctx context.Context, o *overlayEntry, d *Dirent, name, value string, flags uint32) error { // Don't allow changes to overlay xattrs through a setxattr syscall. - if strings.HasPrefix(XattrOverlayPrefix, name) { + if isXattrOverlay(name) { return syserror.EPERM } @@ -578,7 +577,7 @@ func overlayListXattr(ctx context.Context, o *overlayEntry, size uint64) (map[st for name := range names { // Same as overlayGetXattr, we shouldn't forward along // overlay attributes. - if strings.HasPrefix(XattrOverlayPrefix, name) { + if isXattrOverlay(name) { delete(names, name) } } @@ -587,7 +586,7 @@ func overlayListXattr(ctx context.Context, o *overlayEntry, size uint64) (map[st func overlayRemoveXattr(ctx context.Context, o *overlayEntry, d *Dirent, name string) error { // Don't allow changes to overlay xattrs through a removexattr syscall. - if strings.HasPrefix(XattrOverlayPrefix, name) { + if isXattrOverlay(name) { return syserror.EPERM } diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index 77c2c5c0e..b8b2281a8 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -50,6 +50,7 @@ go_library( "//pkg/sync", "//pkg/syserror", "//pkg/tcpip/header", + "//pkg/tcpip/network/ipv4", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index 8615b60f0..e555672ad 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -54,7 +55,7 @@ type tcpMemInode struct { // size stores the tcp buffer size during save, and sets the buffer // size in netstack in restore. We must save/restore this here, since - // netstack itself is stateless. + // a netstack instance is created on restore. size inet.TCPBufferSize // mu protects against concurrent reads/writes to files based on this @@ -258,6 +259,9 @@ func (f *tcpSackFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSeque if src.NumBytes() == 0 { return 0, nil } + + // Only consider size of one memory page for input for performance reasons. + // We are only reading if it's zero or not anyway. src = src.TakeFirst(usermem.PageSize - 1) var v int32 @@ -383,11 +387,125 @@ func (p *proc) newSysNetCore(ctx context.Context, msrc *fs.MountSource, s inet.S return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } +// ipForwarding implements fs.InodeOperations. +// +// ipForwarding is used to enable/disable packet forwarding of netstack. +// +// +stateify savable +type ipForwarding struct { + fsutil.SimpleFileInode + + stack inet.Stack `state:"wait"` + + // enabled stores the IPv4 forwarding state on save. + // We must save/restore this here, since a netstack instance + // is created on restore. + enabled *bool +} + +func newIPForwardingInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { + ipf := &ipForwarding{ + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + stack: s, + } + sattr := fs.StableAttr{ + DeviceID: device.ProcDevice.DeviceID(), + InodeID: device.ProcDevice.NextIno(), + BlockSize: usermem.PageSize, + Type: fs.SpecialFile, + } + return fs.NewInode(ctx, ipf, msrc, sattr) +} + +// Truncate implements fs.InodeOperations.Truncate. Truncate is called when +// O_TRUNC is specified for any kind of existing Dirent but is not called via +// (f)truncate for proc files. +func (*ipForwarding) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + +// +stateify savable +type ipForwardingFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + ipf *ipForwarding + + stack inet.Stack `state:"wait"` +} + +// GetFile implements fs.InodeOperations.GetFile. +func (ipf *ipForwarding) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + flags.Pread = true + flags.Pwrite = true + return fs.NewFile(ctx, dirent, flags, &ipForwardingFile{ + stack: ipf.stack, + ipf: ipf, + }), nil +} + +// Read implements fs.FileOperations.Read. +func (f *ipForwardingFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + return 0, io.EOF + } + + if f.ipf.enabled == nil { + enabled := f.stack.Forwarding(ipv4.ProtocolNumber) + f.ipf.enabled = &enabled + } + + val := "0\n" + if *f.ipf.enabled { + // Technically, this is not quite compatible with Linux. Linux + // stores these as an integer, so if you write "2" into + // ip_forward, you should get 2 back. + val = "1\n" + } + n, err := dst.CopyOut(ctx, []byte(val)) + return int64(n), err +} + +// Write implements fs.FileOperations.Write. +// +// Offset is ignored, multiple writes are not supported. +func (f *ipForwardingFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + if src.NumBytes() == 0 { + return 0, nil + } + + // Only consider size of one memory page for input for performance reasons. + // We are only reading if it's zero or not anyway. + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return n, err + } + if f.ipf.enabled == nil { + f.ipf.enabled = new(bool) + } + *f.ipf.enabled = v != 0 + return n, f.stack.SetForwarding(ipv4.ProtocolNumber, *f.ipf.enabled) +} + func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { contents := map[string]*fs.Inode{ // Add tcp_sack. "tcp_sack": newTCPSackInode(ctx, msrc, s), + // Add ip_forward. + "ip_forward": newIPForwardingInode(ctx, msrc, s), + // The following files are simple stubs until they are // implemented in netstack, most of these files are // configuration related. We use the value closest to the diff --git a/pkg/sentry/fs/proc/sys_net_state.go b/pkg/sentry/fs/proc/sys_net_state.go index 6eba709c6..4cb4741af 100644 --- a/pkg/sentry/fs/proc/sys_net_state.go +++ b/pkg/sentry/fs/proc/sys_net_state.go @@ -14,7 +14,11 @@ package proc -import "fmt" +import ( + "fmt" + + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" +) // beforeSave is invoked by stateify. func (t *tcpMemInode) beforeSave() { @@ -40,3 +44,12 @@ func (s *tcpSack) afterLoad() { } } } + +// afterLoad is invoked by stateify. +func (ipf *ipForwarding) afterLoad() { + if ipf.enabled != nil { + if err := ipf.stack.SetForwarding(ipv4.ProtocolNumber, *ipf.enabled); err != nil { + panic(fmt.Sprintf("failed to set IPv4 forwarding [%v]: %v", *ipf.enabled, err)) + } + } +} diff --git a/pkg/sentry/fs/proc/sys_net_test.go b/pkg/sentry/fs/proc/sys_net_test.go index 355e83d47..6ef5738e7 100644 --- a/pkg/sentry/fs/proc/sys_net_test.go +++ b/pkg/sentry/fs/proc/sys_net_test.go @@ -123,3 +123,76 @@ func TestConfigureRecvBufferSize(t *testing.T) { } } } + +// TestIPForwarding tests the implementation of +// /proc/sys/net/ipv4/ip_forwarding +func TestIPForwarding(t *testing.T) { + ctx := context.Background() + s := inet.NewTestStack() + + var cases = []struct { + comment string + initial bool + str string + final bool + }{ + { + comment: `Forwarding is disabled; write 1 and enable forwarding`, + initial: false, + str: "1", + final: true, + }, + { + comment: `Forwarding is disabled; write 0 and disable forwarding`, + initial: false, + str: "0", + final: false, + }, + { + comment: `Forwarding is enabled; write 1 and enable forwarding`, + initial: true, + str: "1", + final: true, + }, + { + comment: `Forwarding is enabled; write 0 and disable forwarding`, + initial: true, + str: "0", + final: false, + }, + { + comment: `Forwarding is disabled; write 2404 and enable forwarding`, + initial: false, + str: "2404", + final: true, + }, + { + comment: `Forwarding is enabled; write 2404 and enable forwarding`, + initial: true, + str: "2404", + final: true, + }, + } + for _, c := range cases { + t.Run(c.comment, func(t *testing.T) { + s.IPForwarding = c.initial + ipf := &ipForwarding{stack: s} + file := &ipForwardingFile{ + stack: s, + ipf: ipf, + } + + // Write the values. + src := usermem.BytesIOSequence([]byte(c.str)) + if n, err := file.Write(ctx, nil, src, 0); n != int64(len(c.str)) || err != nil { + t.Errorf("file.Write(ctx, nil, %q, 0) = (%d, %v); want (%d, nil)", c.str, n, err, len(c.str)) + } + + // Read the values from the stack and check them. + if got, want := s.IPForwarding, c.final; got != want { + t.Errorf("s.IPForwarding incorrect; got: %v, want: %v", got, want) + } + + }) + } +} diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 9cf7f2a62..22d658acf 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -84,6 +84,7 @@ func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, isThreadGroup bo "auxv": newAuxvec(t, msrc), "cmdline": newExecArgInode(t, msrc, cmdlineExecArg), "comm": newComm(t, msrc), + "cwd": newCwd(t, msrc), "environ": newExecArgInode(t, msrc, environExecArg), "exe": newExe(t, msrc), "fd": newFdDir(t, msrc), @@ -300,6 +301,49 @@ func (e *exe) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { return exec.PathnameWithDeleted(ctx), nil } +// cwd is an fs.InodeOperations symlink for the /proc/PID/cwd file. +// +// +stateify savable +type cwd struct { + ramfs.Symlink + + t *kernel.Task +} + +func newCwd(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + cwdSymlink := &cwd{ + Symlink: *ramfs.NewSymlink(t, fs.RootOwner, ""), + t: t, + } + return newProcInode(t, cwdSymlink, msrc, fs.Symlink, t) +} + +// Readlink implements fs.InodeOperations. +func (e *cwd) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { + if !kernel.ContextCanTrace(ctx, e.t, false) { + return "", syserror.EACCES + } + if err := checkTaskState(e.t); err != nil { + return "", err + } + cwd := e.t.FSContext().WorkingDirectory() + if cwd == nil { + // It could have raced with process deletion. + return "", syserror.ESRCH + } + defer cwd.DecRef(ctx) + + root := fs.RootFromContext(ctx) + if root == nil { + // It could have raced with process deletion. + return "", syserror.ESRCH + } + defer root.DecRef(ctx) + + name, _ := cwd.FullName(root) + return name, nil +} + // namespaceSymlink represents a symlink in the namespacefs, such as the files // in /proc/<pid>/ns. // @@ -604,7 +648,7 @@ func (s *statusData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ( var vss, rss, data uint64 s.t.WithMuLocked(func(t *kernel.Task) { if fdTable := t.FDTable(); fdTable != nil { - fds = fdTable.Size() + fds = fdTable.CurrentMaxFDs() } if mm := t.MemoryManager(); mm != nil { vss = mm.VirtualMemorySize() diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD index 5cb0e0417..e6d0eb359 100644 --- a/pkg/sentry/fs/tty/BUILD +++ b/pkg/sentry/fs/tty/BUILD @@ -10,13 +10,14 @@ go_library( "line_discipline.go", "master.go", "queue.go", - "slave.go", + "replica.go", "terminal.go", ], visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/marshal/primitive", "//pkg/refs", "//pkg/safemem", "//pkg/sentry/arch", diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go index 463f6189e..c2da80bc2 100644 --- a/pkg/sentry/fs/tty/dir.go +++ b/pkg/sentry/fs/tty/dir.go @@ -37,14 +37,14 @@ import ( // This indirectly manages all terminals within the mount. // // New Terminals are created by masterInodeOperations.GetFile, which registers -// the slave Inode in the this directory for discovery via Lookup/Readdir. The -// slave inode is unregistered when the master file is Released, as the slave +// the replica Inode in the this directory for discovery via Lookup/Readdir. The +// replica inode is unregistered when the master file is Released, as the replica // is no longer discoverable at that point. // // References on the underlying Terminal are held by masterFileOperations and -// slaveInodeOperations. +// replicaInodeOperations. // -// masterInodeOperations and slaveInodeOperations hold a pointer to +// masterInodeOperations and replicaInodeOperations hold a pointer to // dirInodeOperations, which is reference counted by the refcount their // corresponding Dirents hold on their parent (this directory). // @@ -76,16 +76,16 @@ type dirInodeOperations struct { // master is the master PTY inode. master *fs.Inode - // slaves contains the slave inodes reachable from the directory. + // replicas contains the replica inodes reachable from the directory. // - // A new slave is added by allocateTerminal and is removed by + // A new replica is added by allocateTerminal and is removed by // masterFileOperations.Release. // - // A reference is held on every slave in the map. - slaves map[uint32]*fs.Inode + // A reference is held on every replica in the map. + replicas map[uint32]*fs.Inode // dentryMap is a SortedDentryMap used to implement Readdir containing - // the master and all entries in slaves. + // the master and all entries in replicas. dentryMap *fs.SortedDentryMap // next is the next pty index to use. @@ -101,7 +101,7 @@ func newDir(ctx context.Context, m *fs.MountSource) *fs.Inode { d := &dirInodeOperations{ InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, fs.RootOwner, fs.FilePermsFromMode(0555), linux.DEVPTS_SUPER_MAGIC), msrc: m, - slaves: make(map[uint32]*fs.Inode), + replicas: make(map[uint32]*fs.Inode), dentryMap: fs.NewSortedDentryMap(nil), } // Linux devpts uses a default mode of 0000 for ptmx which can be @@ -133,7 +133,7 @@ func (d *dirInodeOperations) Release(ctx context.Context) { defer d.mu.Unlock() d.master.DecRef(ctx) - if len(d.slaves) != 0 { + if len(d.replicas) != 0 { panic(fmt.Sprintf("devpts directory still contains active terminals: %+v", d)) } } @@ -149,14 +149,14 @@ func (d *dirInodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name str return fs.NewDirent(ctx, d.master, name), nil } - // Slave number? + // Replica number? n, err := strconv.ParseUint(name, 10, 32) if err != nil { // Not found. return nil, syserror.ENOENT } - s, ok := d.slaves[uint32(n)] + s, ok := d.replicas[uint32(n)] if !ok { return nil, syserror.ENOENT } @@ -236,7 +236,7 @@ func (d *dirInodeOperations) allocateTerminal(ctx context.Context) (*Terminal, e return nil, syserror.ENOMEM } - if _, ok := d.slaves[n]; ok { + if _, ok := d.replicas[n]; ok { panic(fmt.Sprintf("pty index collision; index %d already exists", n)) } @@ -244,19 +244,19 @@ func (d *dirInodeOperations) allocateTerminal(ctx context.Context) (*Terminal, e d.next++ // The reference returned by newTerminal is returned to the caller. - // Take another for the slave inode. + // Take another for the replica inode. t.IncRef() // Create a pts node. The owner is based on the context that opens // ptmx. creds := auth.CredentialsFromContext(ctx) uid, gid := creds.EffectiveKUID, creds.EffectiveKGID - slave := newSlaveInode(ctx, d, t, fs.FileOwner{uid, gid}, fs.FilePermsFromMode(0666)) + replica := newReplicaInode(ctx, d, t, fs.FileOwner{uid, gid}, fs.FilePermsFromMode(0666)) - d.slaves[n] = slave + d.replicas[n] = replica d.dentryMap.Add(strconv.FormatUint(uint64(n), 10), fs.DentAttr{ - Type: slave.StableAttr.Type, - InodeID: slave.StableAttr.InodeID, + Type: replica.StableAttr.Type, + InodeID: replica.StableAttr.InodeID, }) return t, nil @@ -267,18 +267,18 @@ func (d *dirInodeOperations) masterClose(ctx context.Context, t *Terminal) { d.mu.Lock() defer d.mu.Unlock() - // The slave end disappears from the directory when the master end is - // closed, even if the slave end is open elsewhere. + // The replica end disappears from the directory when the master end is + // closed, even if the replica end is open elsewhere. // // N.B. since we're using a backdoor method to remove a directory entry // we won't properly fire inotify events like Linux would. - s, ok := d.slaves[t.n] + s, ok := d.replicas[t.n] if !ok { panic(fmt.Sprintf("Terminal %+v doesn't exist in %+v?", t, d)) } s.DecRef(ctx) - delete(d.slaves, t.n) + delete(d.replicas, t.n) d.dentryMap.Remove(strconv.FormatUint(uint64(t.n), 10)) } diff --git a/pkg/sentry/fs/tty/fs.go b/pkg/sentry/fs/tty/fs.go index 2d4d44bf3..13f4901db 100644 --- a/pkg/sentry/fs/tty/fs.go +++ b/pkg/sentry/fs/tty/fs.go @@ -79,8 +79,8 @@ type superOperations struct{} // // It always returns true, forcing a Lookup for all entries. // -// Slave entries are dropped from dir when their master is closed, so an -// existing slave Dirent in the tree is not sufficient to guarantee that it +// Replica entries are dropped from dir when their master is closed, so an +// existing replica Dirent in the tree is not sufficient to guarantee that it // still exists on the filesystem. func (superOperations) Revalidate(context.Context, string, *fs.Inode, *fs.Inode) bool { return true diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go index 2e9dd2d55..b34f4a0eb 100644 --- a/pkg/sentry/fs/tty/line_discipline.go +++ b/pkg/sentry/fs/tty/line_discipline.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -43,7 +44,7 @@ const ( ) // lineDiscipline dictates how input and output are handled between the -// pseudoterminal (pty) master and slave. It can be configured to alter I/O, +// pseudoterminal (pty) master and replica. It can be configured to alter I/O, // modify control characters (e.g. Ctrl-C for SIGINT), etc. The following man // pages are good resources for how to affect the line discipline: // @@ -54,8 +55,8 @@ const ( // // lineDiscipline has a simple structure but supports a multitude of options // (see the above man pages). It consists of two queues of bytes: one from the -// terminal master to slave (the input queue) and one from slave to master (the -// output queue). When bytes are written to one end of the pty, the line +// terminal master to replica (the input queue) and one from replica to master +// (the output queue). When bytes are written to one end of the pty, the line // discipline reads the bytes, modifies them or takes special action if // required, and enqueues them to be read by the other end of the pty: // @@ -64,7 +65,7 @@ const ( // | (inputQueueWrite) +-------------+ (inputQueueRead) | // | | // | v -// masterFD slaveFD +// masterFD replicaFD // ^ | // | | // | output to terminal +--------------+ output from process | @@ -103,8 +104,8 @@ type lineDiscipline struct { // masterWaiter is used to wait on the master end of the TTY. masterWaiter waiter.Queue `state:"zerovalue"` - // slaveWaiter is used to wait on the slave end of the TTY. - slaveWaiter waiter.Queue `state:"zerovalue"` + // replicaWaiter is used to wait on the replica end of the TTY. + replicaWaiter waiter.Queue `state:"zerovalue"` } func newLineDiscipline(termios linux.KernelTermios) *lineDiscipline { @@ -115,27 +116,23 @@ func newLineDiscipline(termios linux.KernelTermios) *lineDiscipline { } // getTermios gets the linux.Termios for the tty. -func (l *lineDiscipline) getTermios(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (l *lineDiscipline) getTermios(task *kernel.Task, args arch.SyscallArguments) (uintptr, error) { l.termiosMu.RLock() defer l.termiosMu.RUnlock() // We must copy a Termios struct, not KernelTermios. t := l.termios.ToTermios() - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), t, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := t.CopyOut(task, args[2].Pointer()) return 0, err } // setTermios sets a linux.Termios for the tty. -func (l *lineDiscipline) setTermios(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (l *lineDiscipline) setTermios(task *kernel.Task, args arch.SyscallArguments) (uintptr, error) { l.termiosMu.Lock() defer l.termiosMu.Unlock() oldCanonEnabled := l.termios.LEnabled(linux.ICANON) // We must copy a Termios struct, not KernelTermios. var t linux.Termios - _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &t, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := t.CopyIn(task, args[2].Pointer()) l.termios.FromTermios(t) // If canonical mode is turned off, move bytes from inQueue's wait @@ -146,27 +143,23 @@ func (l *lineDiscipline) setTermios(ctx context.Context, io usermem.IO, args arc l.inQueue.pushWaitBufLocked(l) l.inQueue.readable = true l.inQueue.mu.Unlock() - l.slaveWaiter.Notify(waiter.EventIn) + l.replicaWaiter.Notify(waiter.EventIn) } return 0, err } -func (l *lineDiscipline) windowSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { +func (l *lineDiscipline) windowSize(t *kernel.Task, args arch.SyscallArguments) error { l.sizeMu.Lock() defer l.sizeMu.Unlock() - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), l.size, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := l.size.CopyOut(t, args[2].Pointer()) return err } -func (l *lineDiscipline) setWindowSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { +func (l *lineDiscipline) setWindowSize(t *kernel.Task, args arch.SyscallArguments) error { l.sizeMu.Lock() defer l.sizeMu.Unlock() - _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &l.size, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := l.size.CopyIn(t, args[2].Pointer()) return err } @@ -176,14 +169,14 @@ func (l *lineDiscipline) masterReadiness() waiter.EventMask { return l.inQueue.writeReadiness(&linux.MasterTermios) | l.outQueue.readReadiness(&linux.MasterTermios) } -func (l *lineDiscipline) slaveReadiness() waiter.EventMask { +func (l *lineDiscipline) replicaReadiness() waiter.EventMask { l.termiosMu.RLock() defer l.termiosMu.RUnlock() return l.outQueue.writeReadiness(&l.termios) | l.inQueue.readReadiness(&l.termios) } -func (l *lineDiscipline) inputQueueReadSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { - return l.inQueue.readableSize(ctx, io, args) +func (l *lineDiscipline) inputQueueReadSize(t *kernel.Task, args arch.SyscallArguments) error { + return l.inQueue.readableSize(t, args) } func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) { @@ -196,7 +189,7 @@ func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSeque if n > 0 { l.masterWaiter.Notify(waiter.EventOut) if pushed { - l.slaveWaiter.Notify(waiter.EventIn) + l.replicaWaiter.Notify(waiter.EventIn) } return n, nil } @@ -211,14 +204,14 @@ func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequ return 0, err } if n > 0 { - l.slaveWaiter.Notify(waiter.EventIn) + l.replicaWaiter.Notify(waiter.EventIn) return n, nil } return 0, syserror.ErrWouldBlock } -func (l *lineDiscipline) outputQueueReadSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { - return l.outQueue.readableSize(ctx, io, args) +func (l *lineDiscipline) outputQueueReadSize(t *kernel.Task, args arch.SyscallArguments) error { + return l.outQueue.readableSize(t, args) } func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) { @@ -229,7 +222,7 @@ func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequ return 0, err } if n > 0 { - l.slaveWaiter.Notify(waiter.EventOut) + l.replicaWaiter.Notify(waiter.EventOut) if pushed { l.masterWaiter.Notify(waiter.EventIn) } diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index e00746017..b91184b1b 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -17,9 +17,11 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -152,46 +154,51 @@ func (mf *masterFileOperations) Write(ctx context.Context, _ *fs.File, src userm // Ioctl implements fs.FileOperations.Ioctl. func (mf *masterFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + t := kernel.TaskFromContext(ctx) + if t == nil { + // ioctl(2) may only be called from a task goroutine. + return 0, syserror.ENOTTY + } + switch cmd := args[1].Uint(); cmd { case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ // Get the number of bytes in the output queue read buffer. - return 0, mf.t.ld.outputQueueReadSize(ctx, io, args) + return 0, mf.t.ld.outputQueueReadSize(t, args) case linux.TCGETS: // N.B. TCGETS on the master actually returns the configuration - // of the slave end. - return mf.t.ld.getTermios(ctx, io, args) + // of the replica end. + return mf.t.ld.getTermios(t, args) case linux.TCSETS: // N.B. TCSETS on the master actually affects the configuration - // of the slave end. - return mf.t.ld.setTermios(ctx, io, args) + // of the replica end. + return mf.t.ld.setTermios(t, args) case linux.TCSETSW: // TODO(b/29356795): This should drain the output queue first. - return mf.t.ld.setTermios(ctx, io, args) + return mf.t.ld.setTermios(t, args) case linux.TIOCGPTN: - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), uint32(mf.t.n), usermem.IOOpts{ - AddressSpaceActive: true, - }) + nP := primitive.Uint32(mf.t.n) + _, err := nP.CopyOut(t, args[2].Pointer()) return 0, err case linux.TIOCSPTLCK: // TODO(b/29356795): Implement pty locking. For now just pretend we do. return 0, nil case linux.TIOCGWINSZ: - return 0, mf.t.ld.windowSize(ctx, io, args) + return 0, mf.t.ld.windowSize(t, args) case linux.TIOCSWINSZ: - return 0, mf.t.ld.setWindowSize(ctx, io, args) + return 0, mf.t.ld.setWindowSize(t, args) case linux.TIOCSCTTY: // Make the given terminal the controlling terminal of the // calling process. - return 0, mf.t.setControllingTTY(ctx, io, args, true /* isMaster */) + return 0, mf.t.setControllingTTY(ctx, args, true /* isMaster */) case linux.TIOCNOTTY: // Release this process's controlling terminal. - return 0, mf.t.releaseControllingTTY(ctx, io, args, true /* isMaster */) + return 0, mf.t.releaseControllingTTY(ctx, args, true /* isMaster */) case linux.TIOCGPGRP: // Get the foreground process group. - return mf.t.foregroundProcessGroup(ctx, io, args, true /* isMaster */) + return mf.t.foregroundProcessGroup(ctx, args, true /* isMaster */) case linux.TIOCSPGRP: // Set the foreground process group. - return mf.t.setForegroundProcessGroup(ctx, io, args, true /* isMaster */) + return mf.t.setForegroundProcessGroup(ctx, args, true /* isMaster */) default: maybeEmitUnimplementedEvent(ctx, cmd) return 0, syserror.ENOTTY diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go index c5d7ec717..79975d812 100644 --- a/pkg/sentry/fs/tty/queue.go +++ b/pkg/sentry/fs/tty/queue.go @@ -17,8 +17,10 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -32,7 +34,7 @@ import ( const waitBufMaxBytes = 131072 // queue represents one of the input or output queues between a pty master and -// slave. Bytes written to a queue are added to the read buffer until it is +// replica. Bytes written to a queue are added to the read buffer until it is // full, at which point they are written to the wait buffer. Bytes are // processed (i.e. undergo termios transformations) as they are added to the // read buffer. The read buffer is readable when its length is nonzero and @@ -85,17 +87,15 @@ func (q *queue) writeReadiness(t *linux.KernelTermios) waiter.EventMask { } // readableSize writes the number of readable bytes to userspace. -func (q *queue) readableSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { +func (q *queue) readableSize(t *kernel.Task, args arch.SyscallArguments) error { q.mu.Lock() defer q.mu.Unlock() - var size int32 + size := primitive.Int32(0) if q.readable { - size = int32(len(q.readBuf)) + size = primitive.Int32(len(q.readBuf)) } - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), size, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := size.CopyOut(t, args[2].Pointer()) return err } diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/replica.go index 7c7292687..385d230fb 100644 --- a/pkg/sentry/fs/tty/slave.go +++ b/pkg/sentry/fs/tty/replica.go @@ -17,9 +17,11 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -27,11 +29,11 @@ import ( // LINT.IfChange -// slaveInodeOperations are the fs.InodeOperations for the slave end of the +// replicaInodeOperations are the fs.InodeOperations for the replica end of the // Terminal (pts file). // // +stateify savable -type slaveInodeOperations struct { +type replicaInodeOperations struct { fsutil.SimpleFileInode // d is the containing dir. @@ -41,13 +43,13 @@ type slaveInodeOperations struct { t *Terminal } -var _ fs.InodeOperations = (*slaveInodeOperations)(nil) +var _ fs.InodeOperations = (*replicaInodeOperations)(nil) -// newSlaveInode creates an fs.Inode for the slave end of a terminal. +// newReplicaInode creates an fs.Inode for the replica end of a terminal. // -// newSlaveInode takes ownership of t. -func newSlaveInode(ctx context.Context, d *dirInodeOperations, t *Terminal, owner fs.FileOwner, p fs.FilePermissions) *fs.Inode { - iops := &slaveInodeOperations{ +// newReplicaInode takes ownership of t. +func newReplicaInode(ctx context.Context, d *dirInodeOperations, t *Terminal, owner fs.FileOwner, p fs.FilePermissions) *fs.Inode { + iops := &replicaInodeOperations{ SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, owner, p, linux.DEVPTS_SUPER_MAGIC), d: d, t: t, @@ -64,18 +66,18 @@ func newSlaveInode(ctx context.Context, d *dirInodeOperations, t *Terminal, owne Type: fs.CharacterDevice, // See fs/devpts/inode.c:devpts_fill_super. BlockSize: 1024, - DeviceFileMajor: linux.UNIX98_PTY_SLAVE_MAJOR, + DeviceFileMajor: linux.UNIX98_PTY_REPLICA_MAJOR, DeviceFileMinor: t.n, }) } // Release implements fs.InodeOperations.Release. -func (si *slaveInodeOperations) Release(ctx context.Context) { +func (si *replicaInodeOperations) Release(ctx context.Context) { si.t.DecRef(ctx) } // Truncate implements fs.InodeOperations.Truncate. -func (*slaveInodeOperations) Truncate(context.Context, *fs.Inode, int64) error { +func (*replicaInodeOperations) Truncate(context.Context, *fs.Inode, int64) error { return nil } @@ -83,14 +85,15 @@ func (*slaveInodeOperations) Truncate(context.Context, *fs.Inode, int64) error { // // This may race with destruction of the terminal. If the terminal is gone, it // returns ENOENT. -func (si *slaveInodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { - return fs.NewFile(ctx, d, flags, &slaveFileOperations{si: si}), nil +func (si *replicaInodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, d, flags, &replicaFileOperations{si: si}), nil } -// slaveFileOperations are the fs.FileOperations for the slave end of a terminal. +// replicaFileOperations are the fs.FileOperations for the replica end of a +// terminal. // // +stateify savable -type slaveFileOperations struct { +type replicaFileOperations struct { fsutil.FilePipeSeek `state:"nosave"` fsutil.FileNotDirReaddir `state:"nosave"` fsutil.FileNoFsync `state:"nosave"` @@ -100,79 +103,84 @@ type slaveFileOperations struct { fsutil.FileUseInodeUnstableAttr `state:"nosave"` // si is the inode operations. - si *slaveInodeOperations + si *replicaInodeOperations } -var _ fs.FileOperations = (*slaveFileOperations)(nil) +var _ fs.FileOperations = (*replicaFileOperations)(nil) // Release implements fs.FileOperations.Release. -func (sf *slaveFileOperations) Release(context.Context) { +func (sf *replicaFileOperations) Release(context.Context) { } // EventRegister implements waiter.Waitable.EventRegister. -func (sf *slaveFileOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) { - sf.si.t.ld.slaveWaiter.EventRegister(e, mask) +func (sf *replicaFileOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + sf.si.t.ld.replicaWaiter.EventRegister(e, mask) } // EventUnregister implements waiter.Waitable.EventUnregister. -func (sf *slaveFileOperations) EventUnregister(e *waiter.Entry) { - sf.si.t.ld.slaveWaiter.EventUnregister(e) +func (sf *replicaFileOperations) EventUnregister(e *waiter.Entry) { + sf.si.t.ld.replicaWaiter.EventUnregister(e) } // Readiness implements waiter.Waitable.Readiness. -func (sf *slaveFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask { - return sf.si.t.ld.slaveReadiness() +func (sf *replicaFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask { + return sf.si.t.ld.replicaReadiness() } // Read implements fs.FileOperations.Read. -func (sf *slaveFileOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) { +func (sf *replicaFileOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) { return sf.si.t.ld.inputQueueRead(ctx, dst) } // Write implements fs.FileOperations.Write. -func (sf *slaveFileOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) { +func (sf *replicaFileOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) { return sf.si.t.ld.outputQueueWrite(ctx, src) } // Ioctl implements fs.FileOperations.Ioctl. -func (sf *slaveFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (sf *replicaFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + t := kernel.TaskFromContext(ctx) + if t == nil { + // ioctl(2) may only be called from a task goroutine. + return 0, syserror.ENOTTY + } + switch cmd := args[1].Uint(); cmd { case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ // Get the number of bytes in the input queue read buffer. - return 0, sf.si.t.ld.inputQueueReadSize(ctx, io, args) + return 0, sf.si.t.ld.inputQueueReadSize(t, args) case linux.TCGETS: - return sf.si.t.ld.getTermios(ctx, io, args) + return sf.si.t.ld.getTermios(t, args) case linux.TCSETS: - return sf.si.t.ld.setTermios(ctx, io, args) + return sf.si.t.ld.setTermios(t, args) case linux.TCSETSW: // TODO(b/29356795): This should drain the output queue first. - return sf.si.t.ld.setTermios(ctx, io, args) + return sf.si.t.ld.setTermios(t, args) case linux.TIOCGPTN: - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), uint32(sf.si.t.n), usermem.IOOpts{ - AddressSpaceActive: true, - }) + nP := primitive.Uint32(sf.si.t.n) + _, err := nP.CopyOut(t, args[2].Pointer()) return 0, err case linux.TIOCGWINSZ: - return 0, sf.si.t.ld.windowSize(ctx, io, args) + return 0, sf.si.t.ld.windowSize(t, args) case linux.TIOCSWINSZ: - return 0, sf.si.t.ld.setWindowSize(ctx, io, args) + return 0, sf.si.t.ld.setWindowSize(t, args) case linux.TIOCSCTTY: // Make the given terminal the controlling terminal of the // calling process. - return 0, sf.si.t.setControllingTTY(ctx, io, args, false /* isMaster */) + return 0, sf.si.t.setControllingTTY(ctx, args, false /* isMaster */) case linux.TIOCNOTTY: // Release this process's controlling terminal. - return 0, sf.si.t.releaseControllingTTY(ctx, io, args, false /* isMaster */) + return 0, sf.si.t.releaseControllingTTY(ctx, args, false /* isMaster */) case linux.TIOCGPGRP: // Get the foreground process group. - return sf.si.t.foregroundProcessGroup(ctx, io, args, false /* isMaster */) + return sf.si.t.foregroundProcessGroup(ctx, args, false /* isMaster */) case linux.TIOCSPGRP: // Set the foreground process group. - return sf.si.t.setForegroundProcessGroup(ctx, io, args, false /* isMaster */) + return sf.si.t.setForegroundProcessGroup(ctx, args, false /* isMaster */) default: maybeEmitUnimplementedEvent(ctx, cmd) return 0, syserror.ENOTTY } } -// LINT.ThenChange(../../fsimpl/devpts/slave.go) +// LINT.ThenChange(../../fsimpl/devpts/replica.go) diff --git a/pkg/sentry/fs/tty/terminal.go b/pkg/sentry/fs/tty/terminal.go index ddcccf4da..4f431d74d 100644 --- a/pkg/sentry/fs/tty/terminal.go +++ b/pkg/sentry/fs/tty/terminal.go @@ -17,10 +17,10 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/usermem" ) // LINT.IfChange @@ -44,19 +44,19 @@ type Terminal struct { // this terminal. This field is immutable. masterKTTY *kernel.TTY - // slaveKTTY contains the controlling process of the slave end of this + // replicaKTTY contains the controlling process of the replica end of this // terminal. This field is immutable. - slaveKTTY *kernel.TTY + replicaKTTY *kernel.TTY } func newTerminal(ctx context.Context, d *dirInodeOperations, n uint32) *Terminal { - termios := linux.DefaultSlaveTermios + termios := linux.DefaultReplicaTermios t := Terminal{ - d: d, - n: n, - ld: newLineDiscipline(termios), - masterKTTY: &kernel.TTY{Index: n}, - slaveKTTY: &kernel.TTY{Index: n}, + d: d, + n: n, + ld: newLineDiscipline(termios), + masterKTTY: &kernel.TTY{Index: n}, + replicaKTTY: &kernel.TTY{Index: n}, } t.EnableLeakCheck("tty.Terminal") return &t @@ -64,7 +64,7 @@ func newTerminal(ctx context.Context, d *dirInodeOperations, n uint32) *Terminal // setControllingTTY makes tm the controlling terminal of the calling thread // group. -func (tm *Terminal) setControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error { +func (tm *Terminal) setControllingTTY(ctx context.Context, args arch.SyscallArguments, isMaster bool) error { task := kernel.TaskFromContext(ctx) if task == nil { panic("setControllingTTY must be called from a task context") @@ -75,7 +75,7 @@ func (tm *Terminal) setControllingTTY(ctx context.Context, io usermem.IO, args a // releaseControllingTTY removes tm as the controlling terminal of the calling // thread group. -func (tm *Terminal) releaseControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error { +func (tm *Terminal) releaseControllingTTY(ctx context.Context, args arch.SyscallArguments, isMaster bool) error { task := kernel.TaskFromContext(ctx) if task == nil { panic("releaseControllingTTY must be called from a task context") @@ -85,7 +85,7 @@ func (tm *Terminal) releaseControllingTTY(ctx context.Context, io usermem.IO, ar } // foregroundProcessGroup gets the process group ID of tm's foreground process. -func (tm *Terminal) foregroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) { +func (tm *Terminal) foregroundProcessGroup(ctx context.Context, args arch.SyscallArguments, isMaster bool) (uintptr, error) { task := kernel.TaskFromContext(ctx) if task == nil { panic("foregroundProcessGroup must be called from a task context") @@ -97,24 +97,21 @@ func (tm *Terminal) foregroundProcessGroup(ctx context.Context, io usermem.IO, a } // Write it out to *arg. - _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(ret), usermem.IOOpts{ - AddressSpaceActive: true, - }) + retP := primitive.Int32(ret) + _, err = retP.CopyOut(task, args[2].Pointer()) return 0, err } // foregroundProcessGroup sets tm's foreground process. -func (tm *Terminal) setForegroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) { +func (tm *Terminal) setForegroundProcessGroup(ctx context.Context, args arch.SyscallArguments, isMaster bool) (uintptr, error) { task := kernel.TaskFromContext(ctx) if task == nil { panic("setForegroundProcessGroup must be called from a task context") } // Read in the process group ID. - var pgid int32 - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgid, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + var pgid primitive.Int32 + if _, err := pgid.CopyIn(task, args[2].Pointer()); err != nil { return 0, err } @@ -126,7 +123,7 @@ func (tm *Terminal) tty(isMaster bool) *kernel.TTY { if isMaster { return tm.masterKTTY } - return tm.slaveKTTY + return tm.replicaKTTY } // LINT.ThenChange(../../fsimpl/devpts/terminal.go) diff --git a/pkg/sentry/fs/tty/tty_test.go b/pkg/sentry/fs/tty/tty_test.go index 2cbc05678..49edee83d 100644 --- a/pkg/sentry/fs/tty/tty_test.go +++ b/pkg/sentry/fs/tty/tty_test.go @@ -22,8 +22,8 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -func TestSimpleMasterToSlave(t *testing.T) { - ld := newLineDiscipline(linux.DefaultSlaveTermios) +func TestSimpleMasterToReplica(t *testing.T) { + ld := newLineDiscipline(linux.DefaultReplicaTermios) ctx := contexttest.Context(t) inBytes := []byte("hello, tty\n") src := usermem.BytesIOSequence(inBytes) |