From e91164893d6bbaf42639b7e4bb948e9165587130 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Tue, 25 Aug 2020 15:26:54 -0700 Subject: [go-marshal] Enable auto-marshalling for host tty. PiperOrigin-RevId: 328415633 --- pkg/abi/linux/tty.go | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'pkg/abi') diff --git a/pkg/abi/linux/tty.go b/pkg/abi/linux/tty.go index 8ac02aee8..e640969a6 100644 --- a/pkg/abi/linux/tty.go +++ b/pkg/abi/linux/tty.go @@ -23,6 +23,8 @@ const ( ) // Winsize is struct winsize, defined in uapi/asm-generic/termios.h. +// +// +marshal type Winsize struct { Row uint16 Col uint16 @@ -31,6 +33,8 @@ type Winsize struct { } // Termios is struct termios, defined in uapi/asm-generic/termbits.h. +// +// +marshal type Termios struct { InputFlags uint32 OutputFlags uint32 -- cgit v1.2.3 From 5683a8568adc9c13c1cf9d360dae105dc60b145d Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 25 Aug 2020 16:26:53 -0700 Subject: Expose basic coverage information to userspace through kcov interface. In Linux, a kernel configuration is set that compiles the kernel with a custom function that is called at the beginning of every basic block, which updates the memory-mapped coverage information. The Go coverage tool does not allow us to inject arbitrary instructions into basic blocks, but it does provide data that we can convert to a kcov-like format and transfer them to userspace through a memory mapping. Note that this is not a strict implementation of kcov, which is especially tricky to do because we do not have the same coverage tools available in Go that that are available for the actual Linux kernel. In Linux, a kernel configuration is set that compiles the kernel with a custom function that is called at the beginning of every basic block to write program counters to the kcov memory mapping. In Go, however, coverage tools only give us a count of basic blocks as they are executed. Every time we return to userspace, we collect the coverage information and write out PCs for each block that was executed, providing userspace with the illusion that the kcov data is always up to date. For convenience, we also generate a unique synthetic PC for each block instead of using actual PCs. Finally, we do not provide thread-specific coverage data (each kcov instance only contains PCs executed by the thread owning it); instead, we will supply data for any file specified by -- instrumentation_filter. Also, fix issue in nogo that was causing pkg/coverage:coverage_nogo compilation to fail. PiperOrigin-RevId: 328426526 --- pkg/abi/linux/ioctl.go | 21 +++ pkg/coverage/BUILD | 14 ++ pkg/coverage/coverage.go | 175 +++++++++++++++++++++ pkg/sentry/fsimpl/sys/BUILD | 5 + pkg/sentry/fsimpl/sys/kcov.go | 116 ++++++++++++++ pkg/sentry/fsimpl/sys/sys.go | 18 ++- pkg/sentry/kernel/BUILD | 3 + pkg/sentry/kernel/kcov.go | 321 +++++++++++++++++++++++++++++++++++++++ pkg/sentry/kernel/kcov_unsafe.go | 28 ++++ pkg/sentry/kernel/kernel.go | 2 +- pkg/sentry/kernel/task.go | 18 +++ pkg/sentry/kernel/task_exit.go | 2 + runsc/config/config.go | 6 +- test/syscalls/BUILD | 4 + test/syscalls/linux/BUILD | 14 ++ test/syscalls/linux/kcov.cc | 70 +++++++++ 16 files changed, 813 insertions(+), 4 deletions(-) create mode 100644 pkg/coverage/BUILD create mode 100644 pkg/coverage/coverage.go create mode 100644 pkg/sentry/fsimpl/sys/kcov.go create mode 100644 pkg/sentry/kernel/kcov.go create mode 100644 pkg/sentry/kernel/kcov_unsafe.go create mode 100644 test/syscalls/linux/kcov.cc (limited to 'pkg/abi') diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go index 2c5e56ae5..d6dbedc3e 100644 --- a/pkg/abi/linux/ioctl.go +++ b/pkg/abi/linux/ioctl.go @@ -117,3 +117,24 @@ const ( func IOC(dir, typ, nr, size uint32) uint32 { return uint32(dir)<<_IOC_DIRSHIFT | typ<<_IOC_TYPESHIFT | nr<<_IOC_NRSHIFT | size<<_IOC_SIZESHIFT } + +// Kcov ioctls from kernel/kcov.h. +var ( + KCOV_INIT_TRACE = IOC(_IOC_READ, 'c', 1, 8) + KCOV_ENABLE = IOC(_IOC_NONE, 'c', 100, 0) + KCOV_DISABLE = IOC(_IOC_NONE, 'c', 101, 0) +) + +// Kcov trace types from kernel/kcov.h. +const ( + KCOV_TRACE_PC = 0 + KCOV_TRACE_CMP = 1 +) + +// Kcov state constants from kernel/kcov.h. +const ( + KCOV_MODE_DISABLED = 0 + KCOV_MODE_INIT = 1 + KCOV_MODE_TRACE_PC = 2 + KCOV_MODE_TRACE_CMP = 3 +) diff --git a/pkg/coverage/BUILD b/pkg/coverage/BUILD new file mode 100644 index 000000000..a198e8028 --- /dev/null +++ b/pkg/coverage/BUILD @@ -0,0 +1,14 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "coverage", + srcs = ["coverage.go"], + visibility = ["//:sandbox"], + deps = [ + "//pkg/sync", + "//pkg/usermem", + "@io_bazel_rules_go//go/tools/coverdata", + ], +) diff --git a/pkg/coverage/coverage.go b/pkg/coverage/coverage.go new file mode 100644 index 000000000..6831adcce --- /dev/null +++ b/pkg/coverage/coverage.go @@ -0,0 +1,175 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package coverage provides an interface through which Go coverage data can +// be collected, converted to kcov format, and exposed to userspace. +// +// Coverage can be enabled by calling bazel {build,test} with +// --collect_coverage_data and --instrumentation_filter with the desired +// coverage surface. This causes bazel to use the Go cover tool manually to +// generate instrumented files. It injects a hook that registers all coverage +// data with the coverdata package. +package coverage + +import ( + "fmt" + "io" + "sort" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" + + "github.com/bazelbuild/rules_go/go/tools/coverdata" +) + +// KcovAvailable returns whether the kcov coverage interface is available. It is +// available as long as coverage is enabled for some files. +func KcovAvailable() bool { + return len(coverdata.Cover.Blocks) > 0 +} + +// coverageMu must be held while accessing coverdata.Cover. This prevents +// concurrent reads/writes from multiple threads collecting coverage data. +var coverageMu sync.RWMutex + +// once ensures that globalData is only initialized once. +var once sync.Once + +var globalData struct { + // files is the set of covered files sorted by filename. It is calculated at + // startup. + files []string + + // syntheticPCs are a set of PCs calculated at startup, where the PC + // at syntheticPCs[i][j] corresponds to file i, block j. + syntheticPCs [][]uint64 +} + +// ClearCoverageData clears existing coverage data. +func ClearCoverageData() { + coverageMu.Lock() + defer coverageMu.Unlock() + for _, counters := range coverdata.Cover.Counters { + for index := 0; index < len(counters); index++ { + atomic.StoreUint32(&counters[index], 0) + } + } +} + +var coveragePool = sync.Pool{ + New: func() interface{} { + return make([]byte, 0) + }, +} + +// ConsumeCoverageData builds and writes the collection of covered PCs. It +// returns the number of bytes written. +// +// In Linux, a kernel configuration is set that compiles the kernel with a +// custom function that is called at the beginning of every basic block, which +// updates the memory-mapped coverage information. The Go coverage tool does not +// allow us to inject arbitrary instructions into basic blocks, but it does +// provide data that we can convert to a kcov-like format and transfer them to +// userspace through a memory mapping. +// +// Note that this is not a strict implementation of kcov, which is especially +// tricky to do because we do not have the same coverage tools available in Go +// that that are available for the actual Linux kernel. In Linux, a kernel +// configuration is set that compiles the kernel with a custom function that is +// called at the beginning of every basic block to write program counters to the +// kcov memory mapping. In Go, however, coverage tools only give us a count of +// basic blocks as they are executed. Every time we return to userspace, we +// collect the coverage information and write out PCs for each block that was +// executed, providing userspace with the illusion that the kcov data is always +// up to date. For convenience, we also generate a unique synthetic PC for each +// block instead of using actual PCs. Finally, we do not provide thread-specific +// coverage data (each kcov instance only contains PCs executed by the thread +// owning it); instead, we will supply data for any file specified by -- +// instrumentation_filter. +// +// Note that we "consume", i.e. clear, coverdata when this function is run, to +// ensure that each event is only reported once. +// +// TODO(b/160639712): evaluate whether it is ok to reset the global coverage +// data every time this function is run. We could technically have each thread +// store a local snapshot against which we compare the most recent coverdata so +// that separate threads do not affect each other's view of the data. +func ConsumeCoverageData(w io.Writer) int { + once.Do(initCoverageData) + + coverageMu.Lock() + defer coverageMu.Unlock() + + total := 0 + var pcBuffer [8]byte + for fileIndex, file := range globalData.files { + counters := coverdata.Cover.Counters[file] + for index := 0; index < len(counters); index++ { + val := atomic.SwapUint32(&counters[index], 0) + if val != 0 { + // Calculate the synthetic PC. + pc := globalData.syntheticPCs[fileIndex][index] + + usermem.ByteOrder.PutUint64(pcBuffer[:], pc) + n, err := w.Write(pcBuffer[:]) + if err != nil { + if err == io.EOF { + // Simply stop writing if we encounter EOF; it's ok if we attempted to + // write more than we can hold. + return total + n + } + panic(fmt.Sprintf("Internal error writing PCs to kcov area: %v", err)) + } + total += n + } + } + } + + if total == 0 { + // An empty profile indicates that coverage is not enabled, in which case + // there shouldn't be any task work registered. + panic("kcov task work is registered, but no coverage data was found") + } + return total +} + +// initCoverageData initializes globalData. It should only be called once, +// before any kcov data is written. +func initCoverageData() { + // First, order all files. Then calculate synthetic PCs for every block + // (using the well-defined ordering for files as well). + for file := range coverdata.Cover.Blocks { + globalData.files = append(globalData.files, file) + } + sort.Strings(globalData.files) + + // nextSyntheticPC is the first PC that we generate for a block. + // + // This uses a standard-looking kernel range for simplicity. + // + // FIXME(b/160639712): This is only necessary because syzkaller requires + // addresses in the kernel range. If we can remove this constraint, then we + // should be able to use the actual addresses. + var nextSyntheticPC uint64 = 0xffffffff80000000 + for _, file := range globalData.files { + blocks := coverdata.Cover.Blocks[file] + thisFile := make([]uint64, 0, len(blocks)) + for range blocks { + thisFile = append(thisFile, nextSyntheticPC) + nextSyntheticPC++ // Advance. + } + globalData.syntheticPCs = append(globalData.syntheticPCs, thisFile) + } +} diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD index 1b548ccd4..f9b232da6 100644 --- a/pkg/sentry/fsimpl/sys/BUILD +++ b/pkg/sentry/fsimpl/sys/BUILD @@ -5,17 +5,22 @@ licenses(["notice"]) go_library( name = "sys", srcs = [ + "kcov.go", "sys.go", ], visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/coverage", + "//pkg/sentry/arch", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sentry/memmap", "//pkg/sentry/vfs", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsimpl/sys/kcov.go b/pkg/sentry/fsimpl/sys/kcov.go new file mode 100644 index 000000000..92710d877 --- /dev/null +++ b/pkg/sentry/fsimpl/sys/kcov.go @@ -0,0 +1,116 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sys + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +func (fs *filesystem) newKcovFile(ctx context.Context, creds *auth.Credentials) *kernfs.Dentry { + k := &kcovInode{} + k.InodeAttrs.Init(creds, 0, 0, fs.NextIno(), linux.S_IFREG|0600) + d := &kernfs.Dentry{} + d.Init(k) + return d +} + +// kcovInode implements kernfs.Inode. +type kcovInode struct { + kernfs.InodeAttrs + kernfs.InodeNoopRefCount + kernfs.InodeNotSymlink + kernfs.InodeNotDirectory +} + +func (i *kcovInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + k := kernel.KernelFromContext(ctx) + if k == nil { + panic("KernelFromContext returned nil") + } + fd := &kcovFD{ + inode: i, + kcov: k.NewKcov(), + } + + if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{ + DenyPRead: true, + DenyPWrite: true, + }); err != nil { + return nil, err + } + return &fd.vfsfd, nil +} + +type kcovFD struct { + vfs.FileDescriptionDefaultImpl + vfs.NoLockFD + + vfsfd vfs.FileDescription + inode *kcovInode + kcov *kernel.Kcov +} + +// Ioctl implements vfs.FileDescriptionImpl.Ioctl. +func (fd *kcovFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { + cmd := uint32(args[1].Int()) + arg := args[2].Uint64() + switch uint32(cmd) { + case linux.KCOV_INIT_TRACE: + return 0, fd.kcov.InitTrace(arg) + case linux.KCOV_ENABLE: + return 0, fd.kcov.EnableTrace(ctx, uint8(arg)) + case linux.KCOV_DISABLE: + if arg != 0 { + // This arg is unused; it should be 0. + return 0, syserror.EINVAL + } + return 0, fd.kcov.DisableTrace(ctx) + default: + return 0, syserror.ENOTTY + } +} + +// ConfigureMmap implements vfs.FileDescriptionImpl.ConfigureMmap. +func (fd *kcovFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { + return fd.kcov.ConfigureMMap(ctx, opts) +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *kcovFD) Release(ctx context.Context) { + // kcov instances have reference counts in Linux, but this seems sufficient + // for our purposes. + fd.kcov.Reset() +} + +// SetStat implements vfs.FileDescriptionImpl.SetStat. +func (fd *kcovFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { + creds := auth.CredentialsFromContext(ctx) + fs := fd.vfsfd.VirtualDentry().Mount().Filesystem() + return fd.inode.SetStat(ctx, fs, creds, opts) +} + +// Stat implements vfs.FileDescriptionImpl.Stat. +func (fd *kcovFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { + return fd.inode.Stat(ctx, fd.vfsfd.Mount().Filesystem(), opts) +} diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index 393feb802..1f042d9f7 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/coverage" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -73,7 +74,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt }), "firmware": fs.newDir(creds, defaultSysDirMode, nil), "fs": fs.newDir(creds, defaultSysDirMode, nil), - "kernel": fs.newDir(creds, defaultSysDirMode, nil), + "kernel": kernelDir(ctx, fs, creds), "module": fs.newDir(creds, defaultSysDirMode, nil), "power": fs.newDir(creds, defaultSysDirMode, nil), }) @@ -94,6 +95,21 @@ func cpuDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) *kernf return fs.newDir(creds, defaultSysDirMode, children) } +func kernelDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) *kernfs.Dentry { + // If kcov is available, set up /sys/kernel/debug/kcov. Technically, debugfs + // should be mounted at debug/, but for our purposes, it is sufficient to + // keep it in sys. + var children map[string]*kernfs.Dentry + if coverage.KcovAvailable() { + children = map[string]*kernfs.Dentry{ + "debug": fs.newDir(creds, linux.FileMode(0700), map[string]*kernfs.Dentry{ + "kcov": fs.newKcovFile(ctx, creds), + }), + } + } + return fs.newDir(creds, defaultSysDirMode, children) +} + // Release implements vfs.FilesystemImpl.Release. func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index 5416a310d..d1ecceba3 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -91,6 +91,8 @@ go_library( "fd_table_unsafe.go", "fs_context.go", "ipc_namespace.go", + "kcov.go", + "kcov_unsafe.go", "kernel.go", "kernel_opts.go", "kernel_state.go", @@ -157,6 +159,7 @@ go_library( "//pkg/bits", "//pkg/bpf", "//pkg/context", + "//pkg/coverage", "//pkg/cpuid", "//pkg/eventchannel", "//pkg/fspath", diff --git a/pkg/sentry/kernel/kcov.go b/pkg/sentry/kernel/kcov.go new file mode 100644 index 000000000..aad63aa99 --- /dev/null +++ b/pkg/sentry/kernel/kcov.go @@ -0,0 +1,321 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kernel + +import ( + "fmt" + "io" + "sync" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/coverage" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// kcovAreaSizeMax is the maximum number of uint64 entries allowed in the kcov +// area. On Linux, the maximum is INT_MAX / 8. +const kcovAreaSizeMax = 10 * 1024 * 1024 + +// Kcov provides kernel coverage data to userspace through a memory-mapped +// region, as kcov does in Linux. +// +// To give the illusion that the data is always up to date, we update the shared +// memory every time before we return to userspace. +type Kcov struct { + // mfp provides application memory. It is immutable after creation. + mfp pgalloc.MemoryFileProvider + + // mu protects all of the fields below. + mu sync.RWMutex + + // mode is the current kcov mode. + mode uint8 + + // size is the size of the mapping through which the kernel conveys coverage + // information to userspace. + size uint64 + + // owningTask is the task that currently owns coverage data on the system. The + // interface for kcov essentially requires that coverage is only going to a + // single task. Note that kcov should only generate coverage data for the + // owning task, but we currently generate global coverage. + owningTask *Task + + // count is a locally cached version of the first uint64 in the kcov data, + // which is the number of subsequent entries representing PCs. + // + // It is used with kcovInode.countBlock(), to copy in/out the first element of + // the actual data in an efficient manner, avoid boilerplate, and prevent + // accidental garbage escapes by the temporary counts. + count uint64 + + mappable *mm.SpecialMappable +} + +// NewKcov creates and returns a Kcov instance. +func (k *Kernel) NewKcov() *Kcov { + return &Kcov{ + mfp: k, + } +} + +var coveragePool = sync.Pool{ + New: func() interface{} { + return make([]byte, 0) + }, +} + +// TaskWork implements TaskWorker.TaskWork. +func (kcov *Kcov) TaskWork(t *Task) { + kcov.mu.Lock() + defer kcov.mu.Unlock() + + rw := &kcovReadWriter{ + mf: kcov.mfp.MemoryFile(), + fr: kcov.mappable.FileRange(), + } + + // Read in the PC count. + if _, err := safemem.ReadFullToBlocks(rw, kcov.countBlock()); err != nil { + panic(fmt.Sprintf("Internal error reading count from kcov area: %v", err)) + } + + rw.off = 8 * (1 + kcov.count) + n := coverage.ConsumeCoverageData(&kcovIOWriter{rw}) + + // Update the pc count, based on the number of entries written. Note that if + // we reached the end of the kcov area, we may not have written everything in + // output. + kcov.count += uint64(n / 8) + rw.off = 0 + if _, err := safemem.WriteFullFromBlocks(rw, kcov.countBlock()); err != nil { + panic(fmt.Sprintf("Internal error writing count to kcov area: %v", err)) + } + + // Re-register for future work. + t.RegisterWork(kcov) +} + +// InitTrace performs the KCOV_INIT_TRACE ioctl. +func (kcov *Kcov) InitTrace(size uint64) error { + kcov.mu.Lock() + defer kcov.mu.Unlock() + + if kcov.mode != linux.KCOV_MODE_DISABLED { + return syserror.EBUSY + } + + // To simplify all the logic around mapping, we require that the length of the + // shared region is a multiple of the system page size. + if (8*size)&(usermem.PageSize-1) != 0 { + return syserror.EINVAL + } + + // We need space for at least two uint64s to hold current position and a + // single PC. + if size < 2 || size > kcovAreaSizeMax { + return syserror.EINVAL + } + + kcov.size = size + kcov.mode = linux.KCOV_MODE_INIT + return nil +} + +// EnableTrace performs the KCOV_ENABLE_TRACE ioctl. +func (kcov *Kcov) EnableTrace(ctx context.Context, traceMode uint8) error { + t := TaskFromContext(ctx) + if t == nil { + panic("kcovInode.EnableTrace() cannot be used outside of a task goroutine") + } + + kcov.mu.Lock() + defer kcov.mu.Unlock() + + // KCOV_ENABLE must be preceded by KCOV_INIT_TRACE and an mmap call. + if kcov.mode != linux.KCOV_MODE_INIT || kcov.mappable == nil { + return syserror.EINVAL + } + + switch traceMode { + case linux.KCOV_TRACE_PC: + kcov.mode = traceMode + case linux.KCOV_TRACE_CMP: + // We do not support KCOV_MODE_TRACE_CMP. + return syserror.ENOTSUP + default: + return syserror.EINVAL + } + + if kcov.owningTask != nil && kcov.owningTask != t { + return syserror.EBUSY + } + + kcov.owningTask = t + t.RegisterWork(kcov) + + // Clear existing coverage data; the task expects to read only coverage data + // from the time it is activated. + coverage.ClearCoverageData() + return nil +} + +// DisableTrace performs the KCOV_DISABLE_TRACE ioctl. +func (kcov *Kcov) DisableTrace(ctx context.Context) error { + kcov.mu.Lock() + defer kcov.mu.Unlock() + + t := TaskFromContext(ctx) + if t == nil { + panic("kcovInode.EnableTrace() cannot be used outside of a task goroutine") + } + + if t != kcov.owningTask { + return syserror.EINVAL + } + kcov.owningTask = nil + kcov.mode = linux.KCOV_MODE_INIT + kcov.resetLocked() + return nil +} + +// Reset is called when the owning task exits. +func (kcov *Kcov) Reset() { + kcov.mu.Lock() + kcov.resetLocked() + kcov.mu.Unlock() +} + +// The kcov instance is reset when the owning task exits or when tracing is +// disabled. +func (kcov *Kcov) resetLocked() { + kcov.owningTask = nil + if kcov.mappable != nil { + kcov.mappable = nil + } +} + +// ConfigureMMap is called by the vfs.FileDescription for this kcov instance to +// implement vfs.FileDescription.ConfigureMMap. +func (kcov *Kcov) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { + kcov.mu.Lock() + defer kcov.mu.Unlock() + + if kcov.mode != linux.KCOV_MODE_INIT { + return syserror.EINVAL + } + + if kcov.mappable == nil { + // Set up the kcov area. + fr, err := kcov.mfp.MemoryFile().Allocate(kcov.size*8, usage.Anonymous) + if err != nil { + return err + } + + // Get the thread id for the mmap name. + t := TaskFromContext(ctx) + if t == nil { + panic("ThreadFromContext returned nil") + } + // For convenience, a special mappable is used here. Note that these mappings + // will look different under /proc/[pid]/maps than they do on Linux. + kcov.mappable = mm.NewSpecialMappable(fmt.Sprintf("[kcov:%d]", t.ThreadID()), kcov.mfp, fr) + } + opts.Mappable = kcov.mappable + opts.MappingIdentity = kcov.mappable + return nil +} + +// kcovReadWriter implements safemem.Reader and safemem.Writer. +type kcovReadWriter struct { + off uint64 + mf *pgalloc.MemoryFile + fr memmap.FileRange +} + +// ReadToBlocks implements safemem.Reader.ReadToBlocks. +func (rw *kcovReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { + if dsts.IsEmpty() { + return 0, nil + } + + // Limit the read to the kcov range and check for overflow. + if rw.fr.Length() <= rw.off { + return 0, io.EOF + } + start := rw.fr.Start + rw.off + end := rw.fr.Start + rw.fr.Length() + if rend := start + dsts.NumBytes(); rend < end { + end = rend + } + + // Get internal mappings. + bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, usermem.Read) + if err != nil { + return 0, err + } + + // Copy from internal mappings. + n, err := safemem.CopySeq(dsts, bs) + rw.off += n + return n, err +} + +// WriteFromBlocks implements safemem.Writer.WriteFromBlocks. +func (rw *kcovReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { + if srcs.IsEmpty() { + return 0, nil + } + + // Limit the write to the kcov area and check for overflow. + if rw.fr.Length() <= rw.off { + return 0, io.EOF + } + start := rw.fr.Start + rw.off + end := rw.fr.Start + rw.fr.Length() + if wend := start + srcs.NumBytes(); wend < end { + end = wend + } + + // Get internal mapping. + bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, usermem.Write) + if err != nil { + return 0, err + } + + // Copy to internal mapping. + n, err := safemem.CopySeq(bs, srcs) + rw.off += n + return n, err +} + +// kcovIOWriter implements io.Writer as a basic wrapper over kcovReadWriter. +type kcovIOWriter struct { + rw *kcovReadWriter +} + +// Write implements io.Writer.Write. +func (w *kcovIOWriter) Write(p []byte) (int, error) { + bs := safemem.BlockSeqOf(safemem.BlockFromSafeSlice(p)) + n, err := safemem.WriteFullFromBlocks(w.rw, bs) + return int(n), err +} diff --git a/pkg/sentry/kernel/kcov_unsafe.go b/pkg/sentry/kernel/kcov_unsafe.go new file mode 100644 index 000000000..6f64022eb --- /dev/null +++ b/pkg/sentry/kernel/kcov_unsafe.go @@ -0,0 +1,28 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kernel + +import ( + "unsafe" + + "gvisor.dev/gvisor/pkg/safemem" +) + +// countBlock provides a safemem.BlockSeq for k.count. +// +// Like k.count, the block returned is protected by k.mu. +func (k *Kcov) countBlock() safemem.BlockSeq { + return safemem.BlockSeqOf(safemem.BlockFromSafePointer(unsafe.Pointer(&k.count), int(unsafe.Sizeof(k.count)))) +} diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 2e0175e36..402aa1718 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -248,7 +248,7 @@ type Kernel struct { // SpecialOpts contains special kernel options. SpecialOpts - // VFS keeps the filesystem state used across the kernel. + // vfs keeps the filesystem state used across the kernel. vfs vfs.VirtualFilesystem // hostMount is the Mount used for file descriptors that were imported diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 5aee699e7..a436610c9 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -574,6 +574,11 @@ type Task struct { // // startTime is protected by mu. startTime ktime.Time + + // kcov is the kcov instance providing code coverage owned by this task. + // + // kcov is exclusive to the task goroutine. + kcov *Kcov } func (t *Task) savePtraceTracer() *Task { @@ -903,3 +908,16 @@ func (t *Task) UID() uint32 { func (t *Task) GID() uint32 { return uint32(t.Credentials().EffectiveKGID) } + +// SetKcov sets the kcov instance associated with t. +func (t *Task) SetKcov(k *Kcov) { + t.kcov = k +} + +// ResetKcov clears the kcov instance associated with t. +func (t *Task) ResetKcov() { + if t.kcov != nil { + t.kcov.Reset() + t.kcov = nil + } +} diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index c165d6cb1..b76f7f503 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -239,6 +239,8 @@ func (*runExitMain) execute(t *Task) taskRunState { t.traceExitEvent() lastExiter := t.exitThreadGroup() + t.ResetKcov() + // If the task has a cleartid, and the thread group wasn't killed by a // signal, handle that before releasing the MM. if t.cleartid != 0 { diff --git a/runsc/config/config.go b/runsc/config/config.go index ca85cef51..8cf0378d5 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -300,10 +300,10 @@ type Config struct { // E.g. 0.2 CPU quota will result in 1, and 1.9 in 2. CPUNumFromQuota bool - // Enables VFS2 (not plumbled through yet). + // Enables VFS2 (not plumbed through yet). VFS2 bool - // Enables FUSE usage (not plumbled through yet). + // Enables FUSE usage (not plumbed through yet). FUSE bool // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in @@ -353,6 +353,8 @@ func (c *Config) ToFlags() []string { "--tx-checksum-offload=" + strconv.FormatBool(c.TXChecksumOffload), "--overlayfs-stale-read=" + strconv.FormatBool(c.OverlayfsStaleRead), "--qdisc=" + c.QDisc.String(), + "--vfs2=" + strconv.FormatBool(c.VFS2), + "--fuse=" + strconv.FormatBool(c.FUSE), } if c.CPUNumFromQuota { f = append(f, "--cpu-num-from-quota") diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index eea1401ac..65e8299c3 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -262,6 +262,10 @@ syscall_test( test = "//test/syscalls/linux:itimer_test", ) +syscall_test( + test = "//test/syscalls/linux:kcov_test", +) + syscall_test( test = "//test/syscalls/linux:kill_test", ) diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index ed0b6ecf4..5a323d331 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1067,6 +1067,20 @@ cc_binary( ], ) +cc_binary( + name = "kcov_test", + testonly = 1, + srcs = ["kcov.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + cc_binary( name = "kill_test", testonly = 1, diff --git a/test/syscalls/linux/kcov.cc b/test/syscalls/linux/kcov.cc new file mode 100644 index 000000000..f3c30444e --- /dev/null +++ b/test/syscalls/linux/kcov.cc @@ -0,0 +1,70 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// For this test to work properly, it must be run with coverage enabled. On +// native Linux, this involves compiling the kernel with kcov enabled. For +// gVisor, we need to enable the Go coverage tool, e.g. +// bazel test --collect_coverage_data --instrumentation_filter=//pkg/... . +TEST(KcovTest, Kcov) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_DAC_OVERRIDE)))); + + constexpr int kSize = 4096; + constexpr int KCOV_INIT_TRACE = 0x80086301; + constexpr int KCOV_ENABLE = 0x6364; + + int fd; + ASSERT_THAT(fd = open("/sys/kernel/debug/kcov", O_RDWR), + AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(ENOENT))); + + // Kcov not enabled. + SKIP_IF(errno == ENOENT); + + ASSERT_THAT(ioctl(fd, KCOV_INIT_TRACE, kSize), SyscallSucceeds()); + uint64_t* area = (uint64_t*)mmap(nullptr, kSize * sizeof(uint64_t), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + ASSERT_TRUE(area != MAP_FAILED); + ASSERT_THAT(ioctl(fd, KCOV_ENABLE, 0), SyscallSucceeds()); + + for (int i = 0; i < 10; i++) { + // Make some syscalls to generate coverage data. + ASSERT_THAT(ioctl(fd, KCOV_ENABLE, 0), SyscallFailsWithErrno(EINVAL)); + } + + uint64_t num_pcs = *(uint64_t*)(area); + EXPECT_GT(num_pcs, 0); + for (uint64_t i = 1; i <= num_pcs; i++) { + // Verify that PCs are in the standard kernel range. + EXPECT_GT(area[i], 0xffffffff7fffffffL); + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor -- cgit v1.2.3 From e382f99346f54122276a38561f42556b600a9454 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Tue, 25 Aug 2020 16:38:07 -0700 Subject: Return non-zero size for tmpfs statfs(2). This does not implement accepting or enforcing any size limit, which will be more complex and has performance implications; it just returns a fixed non-zero size. Updates #1936 PiperOrigin-RevId: 328428588 --- pkg/abi/linux/fs.go | 11 ++++++----- pkg/sentry/fs/tmpfs/tmpfs.go | 12 ++++++++++-- pkg/sentry/fsimpl/tmpfs/filesystem.go | 12 +----------- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 24 ++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 18 deletions(-) (limited to 'pkg/abi') diff --git a/pkg/abi/linux/fs.go b/pkg/abi/linux/fs.go index 158d2db5b..2b1ef0d4e 100644 --- a/pkg/abi/linux/fs.go +++ b/pkg/abi/linux/fs.go @@ -44,17 +44,18 @@ type Statfs struct { // Type is one of the filesystem magic values, defined above. Type uint64 - // BlockSize is the data block size. + // BlockSize is the optimal transfer block size in bytes. BlockSize int64 - // Blocks is the number of data blocks in use. + // Blocks is the maximum number of data blocks the filesystem may store, in + // units of BlockSize. Blocks uint64 - // BlocksFree is the number of free blocks. + // BlocksFree is the number of free data blocks, in units of BlockSize. BlocksFree uint64 - // BlocksAvailable is the number of blocks free for use by - // unprivileged users. + // BlocksAvailable is the number of data blocks free for use by + // unprivileged users, in units of BlockSize. BlocksAvailable uint64 // Files is the number of used file nodes on the filesystem. diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go index b095312fe..998b697ca 100644 --- a/pkg/sentry/fs/tmpfs/tmpfs.go +++ b/pkg/sentry/fs/tmpfs/tmpfs.go @@ -16,6 +16,8 @@ package tmpfs import ( + "math" + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -32,9 +34,15 @@ import ( var fsInfo = fs.Info{ Type: linux.TMPFS_MAGIC, + // tmpfs currently does not support configurable size limits. In Linux, + // such a tmpfs mount will return f_blocks == f_bfree == f_bavail == 0 from + // statfs(2). However, many applications treat this as having a size limit + // of 0. To work around this, claim to have a very large but non-zero size, + // chosen to ensure that BlockSize * Blocks does not overflow int64 (which + // applications may also handle incorrectly). // TODO(b/29637826): allow configuring a tmpfs size and enforce it. - TotalBlocks: 0, - FreeBlocks: 0, + TotalBlocks: math.MaxInt64 / usermem.PageSize, + FreeBlocks: math.MaxInt64 / usermem.PageSize, } // rename implements fs.InodeOperations.Rename for tmpfs nodes. diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 7924a0911..eddfeab76 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" ) // Sync implements vfs.FilesystemImpl.Sync. @@ -706,16 +705,7 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu if _, err := resolveLocked(ctx, rp); err != nil { return linux.Statfs{}, err } - statfs := linux.Statfs{ - Type: linux.TMPFS_MAGIC, - BlockSize: usermem.PageSize, - FragmentSize: usermem.PageSize, - NameLength: linux.NAME_MAX, - // TODO(b/29637826): Allow configuring a tmpfs size and enforce it. - Blocks: 0, - BlocksFree: 0, - } - return statfs, nil + return globalStatfs, nil } // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 428f62aaa..a7fdf19ca 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -201,6 +201,25 @@ func (fs *filesystem) Release(ctx context.Context) { fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) } +// immutable +var globalStatfs = linux.Statfs{ + Type: linux.TMPFS_MAGIC, + BlockSize: usermem.PageSize, + FragmentSize: usermem.PageSize, + NameLength: linux.NAME_MAX, + + // tmpfs currently does not support configurable size limits. In Linux, + // such a tmpfs mount will return f_blocks == f_bfree == f_bavail == 0 from + // statfs(2). However, many applications treat this as having a size limit + // of 0. To work around this, claim to have a very large but non-zero size, + // chosen to ensure that BlockSize * Blocks does not overflow int64 (which + // applications may also handle incorrectly). + // TODO(b/29637826): allow configuring a tmpfs size and enforce it. + Blocks: math.MaxInt64 / usermem.PageSize, + BlocksFree: math.MaxInt64 / usermem.PageSize, + BlocksAvailable: math.MaxInt64 / usermem.PageSize, +} + // dentry implements vfs.DentryImpl. type dentry struct { vfsd vfs.Dentry @@ -698,6 +717,11 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) return nil } +// StatFS implements vfs.FileDescriptionImpl.StatFS. +func (fd *fileDescription) StatFS(ctx context.Context) (linux.Statfs, error) { + return globalStatfs, nil +} + // Listxattr implements vfs.FileDescriptionImpl.Listxattr. func (fd *fileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) { return fd.inode().listxattr(size) -- cgit v1.2.3 From 2a322c451e0a04df55d8fa4ea6e055da39231efa Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Wed, 26 Aug 2020 10:03:46 -0700 Subject: tmpfs: Allow xattrs in the trusted namespace if creds has CAP_SYS_ADMIN. This is needed to support the overlay opaque attribute. PiperOrigin-RevId: 328552985 --- pkg/abi/linux/xattr.go | 3 ++ pkg/sentry/fsimpl/overlay/filesystem.go | 2 +- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 68 ++++++++++++++++++------------ test/syscalls/linux/memfd.cc | 17 +------- test/syscalls/linux/xattr.cc | 74 +++++++++++++++++++++++++++++++++ test/util/fs_util.cc | 20 +++++++++ test/util/fs_util.h | 6 +++ 7 files changed, 147 insertions(+), 43 deletions(-) (limited to 'pkg/abi') diff --git a/pkg/abi/linux/xattr.go b/pkg/abi/linux/xattr.go index 99180b208..8ef837f27 100644 --- a/pkg/abi/linux/xattr.go +++ b/pkg/abi/linux/xattr.go @@ -23,6 +23,9 @@ const ( XATTR_CREATE = 1 XATTR_REPLACE = 2 + XATTR_TRUSTED_PREFIX = "trusted." + XATTR_TRUSTED_PREFIX_LEN = len(XATTR_TRUSTED_PREFIX) + XATTR_USER_PREFIX = "user." XATTR_USER_PREFIX_LEN = len(XATTR_USER_PREFIX) ) diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go index a3cee4047..e720bfb0b 100644 --- a/pkg/sentry/fsimpl/overlay/filesystem.go +++ b/pkg/sentry/fsimpl/overlay/filesystem.go @@ -30,7 +30,7 @@ import ( // _OVL_XATTR_OPAQUE is an extended attribute key whose value is set to "y" for // opaque directories. // Linux: fs/overlayfs/overlayfs.h:OVL_XATTR_OPAQUE -const _OVL_XATTR_OPAQUE = "trusted.overlay.opaque" +const _OVL_XATTR_OPAQUE = linux.XATTR_TRUSTED_PREFIX + "overlay.opaque" func isWhiteout(stat *linux.Statx) bool { return stat.Mode&linux.S_IFMT == linux.S_IFCHR && stat.RdevMajor == 0 && stat.RdevMinor == 0 diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index a7fdf19ca..c4cec4130 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -631,49 +631,65 @@ func (i *inode) listxattr(size uint64) ([]string, error) { } func (i *inode) getxattr(creds *auth.Credentials, opts *vfs.GetxattrOptions) (string, error) { - if err := i.checkPermissions(creds, vfs.MayRead); err != nil { + if err := i.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil { return "", err } - if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) { - return "", syserror.EOPNOTSUPP - } - if !i.userXattrSupported() { - return "", syserror.ENODATA - } return i.xattrs.Getxattr(opts) } func (i *inode) setxattr(creds *auth.Credentials, opts *vfs.SetxattrOptions) error { - if err := i.checkPermissions(creds, vfs.MayWrite); err != nil { + if err := i.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil { return err } - if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) { - return syserror.EOPNOTSUPP - } - if !i.userXattrSupported() { - return syserror.EPERM - } return i.xattrs.Setxattr(opts) } func (i *inode) removexattr(creds *auth.Credentials, name string) error { - if err := i.checkPermissions(creds, vfs.MayWrite); err != nil { + if err := i.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil { return err } - if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { - return syserror.EOPNOTSUPP - } - if !i.userXattrSupported() { - return syserror.EPERM - } return i.xattrs.Removexattr(name) } -// Extended attributes in the user.* namespace are only supported for regular -// files and directories. -func (i *inode) userXattrSupported() bool { - filetype := linux.S_IFMT & atomic.LoadUint32(&i.mode) - return filetype == linux.S_IFREG || filetype == linux.S_IFDIR +func (i *inode) checkXattrPermissions(creds *auth.Credentials, name string, ats vfs.AccessTypes) error { + switch { + case ats&vfs.MayRead == vfs.MayRead: + if err := i.checkPermissions(creds, vfs.MayRead); err != nil { + return err + } + case ats&vfs.MayWrite == vfs.MayWrite: + if err := i.checkPermissions(creds, vfs.MayWrite); err != nil { + return err + } + default: + panic(fmt.Sprintf("checkXattrPermissions called with impossible AccessTypes: %v", ats)) + } + + switch { + case strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX): + // The trusted.* namespace can only be accessed by privileged + // users. + if creds.HasCapability(linux.CAP_SYS_ADMIN) { + return nil + } + if ats&vfs.MayWrite == vfs.MayWrite { + return syserror.EPERM + } + return syserror.ENODATA + case strings.HasPrefix(name, linux.XATTR_USER_PREFIX): + // Extended attributes in the user.* namespace are only + // supported for regular files and directories. + filetype := linux.S_IFMT & atomic.LoadUint32(&i.mode) + if filetype == linux.S_IFREG || filetype == linux.S_IFDIR { + return nil + } + if ats&vfs.MayWrite == vfs.MayWrite { + return syserror.EPERM + } + return syserror.ENODATA + + } + return syserror.EOPNOTSUPP } // fileDescription is embedded by tmpfs implementations of diff --git a/test/syscalls/linux/memfd.cc b/test/syscalls/linux/memfd.cc index f8b7f7938..4a450742b 100644 --- a/test/syscalls/linux/memfd.cc +++ b/test/syscalls/linux/memfd.cc @@ -14,12 +14,10 @@ #include #include -#include #include #include #include #include -#include #include #include @@ -53,6 +51,7 @@ namespace { #define F_SEAL_GROW 0x0004 #define F_SEAL_WRITE 0x0008 +using ::gvisor::testing::IsTmpfs; using ::testing::StartsWith; const std::string kMemfdName = "some-memfd"; @@ -444,20 +443,6 @@ TEST(MemfdTest, SealsAreInodeLevelProperties) { EXPECT_THAT(ftruncate(memfd3.get(), kPageSize), SyscallFailsWithErrno(EPERM)); } -PosixErrorOr IsTmpfs(const std::string& path) { - struct statfs stat; - if (statfs(path.c_str(), &stat)) { - if (errno == ENOENT) { - // Nothing at path, don't raise this as an error. Instead, just report no - // tmpfs at path. - return false; - } - return PosixError(errno, - absl::StrFormat("statfs(\"%s\", %#p)", path, &stat)); - } - return stat.f_type == TMPFS_MAGIC; -} - // Tmpfs files also support seals, but are created with F_SEAL_SEAL. TEST(MemfdTest, TmpfsFilesHaveSealSeal) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs("/tmp"))); diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index cbcf08451..5510a87a0 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -28,6 +28,7 @@ #include "test/syscalls/linux/file_base.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" #include "test/util/posix_error.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" @@ -37,6 +38,8 @@ namespace testing { namespace { +using ::gvisor::testing::IsTmpfs; + class XattrTest : public FileTest {}; TEST_F(XattrTest, XattrNonexistentFile) { @@ -604,6 +607,77 @@ TEST_F(XattrTest, XattrWithFD) { EXPECT_THAT(fremovexattr(fd.get(), name), SyscallSucceeds()); } +TEST_F(XattrTest, TrustedNamespaceWithCapSysAdmin) { + // Trusted namespace not supported in VFS1. + SKIP_IF(IsRunningWithVFS1()); + + // TODO(b/66162845): Only gVisor tmpfs currently supports trusted namespace. + SKIP_IF(IsRunningOnGvisor() && + !ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs(test_file_name_))); + + // Setting/Getting in the trusted namespace requires CAP_SYS_ADMIN. + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + const char* path = test_file_name_.c_str(); + const char name[] = "trusted.test"; + + // Set. + char val = 'a'; + size_t size = sizeof(val); + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + // Get. + char got = '\0'; + EXPECT_THAT(getxattr(path, name, &got, size), SyscallSucceedsWithValue(size)); + EXPECT_EQ(val, got); + + // List. + char list[sizeof(name)]; + EXPECT_THAT(listxattr(path, list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + // Remove. + EXPECT_THAT(removexattr(path, name), SyscallSucceeds()); + + // Get should now return ENODATA. + EXPECT_THAT(getxattr(path, name, &got, size), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, TrustedNamespaceWithoutCapSysAdmin) { + // Trusted namespace not supported in VFS1. + SKIP_IF(IsRunningWithVFS1()); + + // TODO(b/66162845): Only gVisor tmpfs currently supports trusted namespace. + SKIP_IF(IsRunningOnGvisor() && + !ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs(test_file_name_))); + + // Drop CAP_SYS_ADMIN if we have it. + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))) { + EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false)); + } + + const char* path = test_file_name_.c_str(); + const char name[] = "trusted.test"; + + // Set fails. + char val = 'a'; + size_t size = sizeof(val); + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), + SyscallFailsWithErrno(EPERM)); + + // Get fails. + char got = '\0'; + EXPECT_THAT(getxattr(path, name, &got, size), SyscallFailsWithErrno(ENODATA)); + + // List still works, but returns no items. + char list[sizeof(name)]; + EXPECT_THAT(listxattr(path, list, sizeof(list)), SyscallSucceedsWithValue(0)); + + // Remove fails. + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EPERM)); +} + } // namespace } // namespace testing diff --git a/test/util/fs_util.cc b/test/util/fs_util.cc index 5418948fe..dffa16183 100644 --- a/test/util/fs_util.cc +++ b/test/util/fs_util.cc @@ -15,7 +15,11 @@ #include "test/util/fs_util.h" #include +#ifndef __fuchsia__ +#include +#endif // __fuchsia__ #include +#include #include #include @@ -629,5 +633,21 @@ PosixErrorOr ProcessExePath(int pid) { return ReadLink(absl::StrCat("/proc/", pid, "/exe")); } +#ifndef __fuchsia__ +PosixErrorOr IsTmpfs(const std::string& path) { + struct statfs stat; + if (statfs(path.c_str(), &stat)) { + if (errno == ENOENT) { + // Nothing at path, don't raise this as an error. Instead, just report no + // tmpfs at path. + return false; + } + return PosixError(errno, + absl::StrFormat("statfs(\"%s\", %#p)", path, &stat)); + } + return stat.f_type == TMPFS_MAGIC; +} +#endif // __fuchsia__ + } // namespace testing } // namespace gvisor diff --git a/test/util/fs_util.h b/test/util/fs_util.h index 8cdac23a1..044190657 100644 --- a/test/util/fs_util.h +++ b/test/util/fs_util.h @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -178,6 +179,11 @@ std::string CleanPath(absl::string_view path); // Returns the full path to the executable of the given pid or a PosixError. PosixErrorOr ProcessExePath(int pid); +#ifndef __fuchsia__ +// IsTmpfs returns true if the file at path is backed by tmpfs. +PosixErrorOr IsTmpfs(const std::string& path); +#endif // __fucshia__ + namespace internal { // Not part of the public API. std::string JoinPathImpl(std::initializer_list paths); -- cgit v1.2.3 From 97d6398d435025c7ab361c36994feab2c7e2d84f Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Thu, 27 Aug 2020 10:51:59 -0700 Subject: ip6tables: (de)serialize ip6tables structs More implementation+testing to follow. #3549. PiperOrigin-RevId: 328770160 --- pkg/abi/linux/netfilter_ipv6.go | 13 ++ pkg/sentry/socket/netfilter/BUILD | 1 + pkg/sentry/socket/netfilter/ipv4.go | 33 +++- pkg/sentry/socket/netfilter/ipv6.go | 265 +++++++++++++++++++++++++++++++ pkg/sentry/socket/netfilter/netfilter.go | 77 +++++---- pkg/sentry/socket/netfilter/targets.go | 10 +- pkg/sentry/socket/netstack/netstack.go | 75 ++++++++- pkg/tcpip/stack/iptables.go | 12 +- pkg/tcpip/stack/iptables_types.go | 5 + test/syscalls/linux/ip6tables.cc | 48 ++++++ 10 files changed, 489 insertions(+), 50 deletions(-) create mode 100644 pkg/sentry/socket/netfilter/ipv6.go (limited to 'pkg/abi') diff --git a/pkg/abi/linux/netfilter_ipv6.go b/pkg/abi/linux/netfilter_ipv6.go index 9bb9efb10..f6117024c 100644 --- a/pkg/abi/linux/netfilter_ipv6.go +++ b/pkg/abi/linux/netfilter_ipv6.go @@ -290,6 +290,19 @@ type IP6TIP struct { const SizeOfIP6TIP = 136 +// Flags in IP6TIP.Flags. Corresponding constants are in +// include/uapi/linux/netfilter_ipv6/ip6_tables.h. +const ( + // Whether to check the Protocol field. + IP6T_F_PROTO = 0x01 + // Whether to match the TOS field. + IP6T_F_TOS = 0x02 + // Indicates that the jump target is an aboslute GOTO, not an offset. + IP6T_F_GOTO = 0x04 + // Enables all flags. + IP6T_F_MASK = 0x07 +) + // Flags in IP6TIP.InverseFlags. Corresponding constants are in // include/uapi/linux/netfilter_ipv6/ip6_tables.h. const ( diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD index 795620589..8aea0200f 100644 --- a/pkg/sentry/socket/netfilter/BUILD +++ b/pkg/sentry/socket/netfilter/BUILD @@ -7,6 +7,7 @@ go_library( srcs = [ "extensions.go", "ipv4.go", + "ipv6.go", "netfilter.go", "owner_matcher.go", "targets.go", diff --git a/pkg/sentry/socket/netfilter/ipv4.go b/pkg/sentry/socket/netfilter/ipv4.go index 4fb887e49..e4c55a100 100644 --- a/pkg/sentry/socket/netfilter/ipv4.go +++ b/pkg/sentry/socket/netfilter/ipv4.go @@ -36,14 +36,37 @@ var emptyIPv4Filter = stack.IPHeaderFilter{ SrcMask: "\x00\x00\x00\x00", } -func getEntries4(table stack.Table, info *linux.IPTGetinfo) linux.KernelIPTGetEntries { +// convertNetstackToBinary4 converts the iptables as stored in netstack to the +// format expected by the iptables tool. Linux stores each table as a binary +// blob that can only be traversed by parsing a little data, reading some +// offsets, jumping to those offsets, parsing again, etc. +func convertNetstackToBinary4(stk *stack.Stack, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo, error) { + // The table name has to fit in the struct. + if linux.XT_TABLE_MAXNAMELEN < len(tablename) { + return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename) + } + + table, ok := stk.IPTables().GetTable(tablename.String(), false) + if !ok { + return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename) + } + + // Setup the info struct. + entries, info := getEntries4(table, tablename) + return entries, info, nil +} + +func getEntries4(table stack.Table, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo) { + var info linux.IPTGetinfo var entries linux.KernelIPTGetEntries + copy(info.Name[:], tablename[:]) copy(entries.Name[:], info.Name[:]) + info.ValidHooks = table.ValidHooks() for ruleIdx, rule := range table.Rules { nflog("convert to binary: current offset: %d", entries.Size) - setHooksAndUnderflow(info, table, entries.Size, ruleIdx) + setHooksAndUnderflow(&info, table, entries.Size, ruleIdx) // Each rule corresponds to an entry. entry := linux.KernelIPTEntry{ Entry: linux.IPTEntry{ @@ -100,7 +123,7 @@ func getEntries4(table stack.Table, info *linux.IPTGetinfo) linux.KernelIPTGetEn info.Size = entries.Size nflog("convert to binary: finished with an marshalled size of %d", info.Size) - return entries + return entries, info } func modifyEntries4(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, table *stack.Table) (map[uint32]int, *syserr.Error) { @@ -205,7 +228,9 @@ func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) { ifnameMask := string(iptip.OutputInterfaceMask[:n]) return stack.IPHeaderFilter{ - Protocol: tcpip.TransportProtocolNumber(iptip.Protocol), + Protocol: tcpip.TransportProtocolNumber(iptip.Protocol), + // A Protocol value of 0 indicates all protocols match. + CheckProtocol: iptip.Protocol != 0, Dst: tcpip.Address(iptip.Dst[:]), DstMask: tcpip.Address(iptip.DstMask[:]), DstInvert: iptip.InverseFlags&linux.IPT_INV_DSTIP != 0, diff --git a/pkg/sentry/socket/netfilter/ipv6.go b/pkg/sentry/socket/netfilter/ipv6.go new file mode 100644 index 000000000..3b2c1becd --- /dev/null +++ b/pkg/sentry/socket/netfilter/ipv6.go @@ -0,0 +1,265 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package netfilter + +import ( + "bytes" + "fmt" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/usermem" +) + +// emptyIPv6Filter is for comparison with a rule's filters to determine whether +// it is also empty. It is immutable. +var emptyIPv6Filter = stack.IPHeaderFilter{ + Dst: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + DstMask: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + Src: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + SrcMask: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", +} + +// convertNetstackToBinary6 converts the ip6tables as stored in netstack to the +// format expected by the iptables tool. Linux stores each table as a binary +// blob that can only be traversed by parsing a little data, reading some +// offsets, jumping to those offsets, parsing again, etc. +func convertNetstackToBinary6(stk *stack.Stack, tablename linux.TableName) (linux.KernelIP6TGetEntries, linux.IPTGetinfo, error) { + // The table name has to fit in the struct. + if linux.XT_TABLE_MAXNAMELEN < len(tablename) { + return linux.KernelIP6TGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename) + } + + table, ok := stk.IPTables().GetTable(tablename.String(), true) + if !ok { + return linux.KernelIP6TGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename) + } + + // Setup the info struct, which is the same in IPv4 and IPv6. + entries, info := getEntries6(table, tablename) + return entries, info, nil +} + +func getEntries6(table stack.Table, tablename linux.TableName) (linux.KernelIP6TGetEntries, linux.IPTGetinfo) { + var info linux.IPTGetinfo + var entries linux.KernelIP6TGetEntries + copy(info.Name[:], tablename[:]) + copy(entries.Name[:], info.Name[:]) + info.ValidHooks = table.ValidHooks() + + for ruleIdx, rule := range table.Rules { + nflog("convert to binary: current offset: %d", entries.Size) + + setHooksAndUnderflow(&info, table, entries.Size, ruleIdx) + // Each rule corresponds to an entry. + entry := linux.KernelIP6TEntry{ + Entry: linux.IP6TEntry{ + IPv6: linux.IP6TIP{ + Protocol: uint16(rule.Filter.Protocol), + }, + NextOffset: linux.SizeOfIP6TEntry, + TargetOffset: linux.SizeOfIP6TEntry, + }, + } + copy(entry.Entry.IPv6.Dst[:], rule.Filter.Dst) + copy(entry.Entry.IPv6.DstMask[:], rule.Filter.DstMask) + copy(entry.Entry.IPv6.Src[:], rule.Filter.Src) + copy(entry.Entry.IPv6.SrcMask[:], rule.Filter.SrcMask) + copy(entry.Entry.IPv6.OutputInterface[:], rule.Filter.OutputInterface) + copy(entry.Entry.IPv6.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask) + if rule.Filter.DstInvert { + entry.Entry.IPv6.InverseFlags |= linux.IP6T_INV_DSTIP + } + if rule.Filter.SrcInvert { + entry.Entry.IPv6.InverseFlags |= linux.IP6T_INV_SRCIP + } + if rule.Filter.OutputInterfaceInvert { + entry.Entry.IPv6.InverseFlags |= linux.IP6T_INV_VIA_OUT + } + if rule.Filter.CheckProtocol { + entry.Entry.IPv6.Flags |= linux.IP6T_F_PROTO + } + + for _, matcher := range rule.Matchers { + // Serialize the matcher and add it to the + // entry. + serialized := marshalMatcher(matcher) + nflog("convert to binary: matcher serialized as: %v", serialized) + if len(serialized)%8 != 0 { + panic(fmt.Sprintf("matcher %T is not 64-bit aligned", matcher)) + } + entry.Elems = append(entry.Elems, serialized...) + entry.Entry.NextOffset += uint16(len(serialized)) + entry.Entry.TargetOffset += uint16(len(serialized)) + } + + // Serialize and append the target. + serialized := marshalTarget(rule.Target) + if len(serialized)%8 != 0 { + panic(fmt.Sprintf("target %T is not 64-bit aligned", rule.Target)) + } + entry.Elems = append(entry.Elems, serialized...) + entry.Entry.NextOffset += uint16(len(serialized)) + + nflog("convert to binary: adding entry: %+v", entry) + + entries.Size += uint32(entry.Entry.NextOffset) + entries.Entrytable = append(entries.Entrytable, entry) + info.NumEntries++ + } + + info.Size = entries.Size + nflog("convert to binary: finished with an marshalled size of %d", info.Size) + return entries, info +} + +func modifyEntries6(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, table *stack.Table) (map[uint32]int, *syserr.Error) { + nflog("set entries: setting entries in table %q", replace.Name.String()) + + // Convert input into a list of rules and their offsets. + var offset uint32 + // offsets maps rule byte offsets to their position in table.Rules. + offsets := map[uint32]int{} + for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ { + nflog("set entries: processing entry at offset %d", offset) + + // Get the struct ipt_entry. + if len(optVal) < linux.SizeOfIP6TEntry { + nflog("optVal has insufficient size for entry %d", len(optVal)) + return nil, syserr.ErrInvalidArgument + } + var entry linux.IP6TEntry + buf := optVal[:linux.SizeOfIP6TEntry] + binary.Unmarshal(buf, usermem.ByteOrder, &entry) + initialOptValLen := len(optVal) + optVal = optVal[linux.SizeOfIP6TEntry:] + + if entry.TargetOffset < linux.SizeOfIP6TEntry { + nflog("entry has too-small target offset %d", entry.TargetOffset) + return nil, syserr.ErrInvalidArgument + } + + // TODO(gvisor.dev/issue/170): We should support more IPTIP + // filtering fields. + filter, err := filterFromIP6TIP(entry.IPv6) + if err != nil { + nflog("bad iptip: %v", err) + return nil, syserr.ErrInvalidArgument + } + + // TODO(gvisor.dev/issue/170): Matchers and targets can specify + // that they only work for certain protocols, hooks, tables. + // Get matchers. + matchersSize := entry.TargetOffset - linux.SizeOfIP6TEntry + if len(optVal) < int(matchersSize) { + nflog("entry doesn't have enough room for its matchers (only %d bytes remain)", len(optVal)) + return nil, syserr.ErrInvalidArgument + } + matchers, err := parseMatchers(filter, optVal[:matchersSize]) + if err != nil { + nflog("failed to parse matchers: %v", err) + return nil, syserr.ErrInvalidArgument + } + optVal = optVal[matchersSize:] + + // Get the target of the rule. + targetSize := entry.NextOffset - entry.TargetOffset + if len(optVal) < int(targetSize) { + nflog("entry doesn't have enough room for its target (only %d bytes remain)", len(optVal)) + return nil, syserr.ErrInvalidArgument + } + target, err := parseTarget(filter, optVal[:targetSize]) + if err != nil { + nflog("failed to parse target: %v", err) + return nil, syserr.ErrInvalidArgument + } + optVal = optVal[targetSize:] + + table.Rules = append(table.Rules, stack.Rule{ + Filter: filter, + Target: target, + Matchers: matchers, + }) + offsets[offset] = int(entryIdx) + offset += uint32(entry.NextOffset) + + if initialOptValLen-len(optVal) != int(entry.NextOffset) { + nflog("entry NextOffset is %d, but entry took up %d bytes", entry.NextOffset, initialOptValLen-len(optVal)) + return nil, syserr.ErrInvalidArgument + } + } + return offsets, nil +} + +func filterFromIP6TIP(iptip linux.IP6TIP) (stack.IPHeaderFilter, error) { + if containsUnsupportedFields6(iptip) { + return stack.IPHeaderFilter{}, fmt.Errorf("unsupported fields in struct iptip: %+v", iptip) + } + if len(iptip.Dst) != header.IPv6AddressSize || len(iptip.DstMask) != header.IPv6AddressSize { + return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of destination (%d) and/or destination mask (%d) fields", len(iptip.Dst), len(iptip.DstMask)) + } + if len(iptip.Src) != header.IPv6AddressSize || len(iptip.SrcMask) != header.IPv6AddressSize { + return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of source (%d) and/or source mask (%d) fields", len(iptip.Src), len(iptip.SrcMask)) + } + + n := bytes.IndexByte([]byte(iptip.OutputInterface[:]), 0) + if n == -1 { + n = len(iptip.OutputInterface) + } + ifname := string(iptip.OutputInterface[:n]) + + n = bytes.IndexByte([]byte(iptip.OutputInterfaceMask[:]), 0) + if n == -1 { + n = len(iptip.OutputInterfaceMask) + } + ifnameMask := string(iptip.OutputInterfaceMask[:n]) + + return stack.IPHeaderFilter{ + Protocol: tcpip.TransportProtocolNumber(iptip.Protocol), + // In ip6tables a flag controls whether to check the protocol. + CheckProtocol: iptip.Flags&linux.IP6T_F_PROTO != 0, + Dst: tcpip.Address(iptip.Dst[:]), + DstMask: tcpip.Address(iptip.DstMask[:]), + DstInvert: iptip.InverseFlags&linux.IP6T_INV_DSTIP != 0, + Src: tcpip.Address(iptip.Src[:]), + SrcMask: tcpip.Address(iptip.SrcMask[:]), + SrcInvert: iptip.InverseFlags&linux.IP6T_INV_SRCIP != 0, + OutputInterface: ifname, + OutputInterfaceMask: ifnameMask, + OutputInterfaceInvert: iptip.InverseFlags&linux.IP6T_INV_VIA_OUT != 0, + }, nil +} + +func containsUnsupportedFields6(iptip linux.IP6TIP) bool { + // The following features are supported: + // - Protocol + // - Dst and DstMask + // - Src and SrcMask + // - The inverse destination IP check flag + // - OutputInterface, OutputInterfaceMask and its inverse. + var emptyInterface = [linux.IFNAMSIZ]byte{} + flagMask := uint8(linux.IP6T_F_PROTO) + // Disable any supported inverse flags. + inverseMask := uint8(linux.IP6T_INV_DSTIP) | uint8(linux.IP6T_INV_SRCIP) | uint8(linux.IP6T_INV_VIA_OUT) + return iptip.InputInterface != emptyInterface || + iptip.InputInterfaceMask != emptyInterface || + iptip.Flags&^flagMask != 0 || + iptip.InverseFlags&^inverseMask != 0 || + iptip.TOS != 0 +} diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index df256676f..3e1735079 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -42,14 +42,19 @@ func nflog(format string, args ...interface{}) { } // GetInfo returns information about iptables. -func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPTGetinfo, *syserr.Error) { +func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, ipv6 bool) (linux.IPTGetinfo, *syserr.Error) { // Read in the struct and table name. var info linux.IPTGetinfo if _, err := info.CopyIn(t, outPtr); err != nil { return linux.IPTGetinfo{}, syserr.FromError(err) } - _, info, err := convertNetstackToBinary(stack, info.Name) + var err error + if ipv6 { + _, info, err = convertNetstackToBinary6(stack, info.Name) + } else { + _, info, err = convertNetstackToBinary4(stack, info.Name) + } if err != nil { nflog("couldn't convert iptables: %v", err) return linux.IPTGetinfo{}, syserr.ErrInvalidArgument @@ -59,9 +64,9 @@ func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPT return info, nil } -// GetEntries4 returns netstack's iptables rules encoded for the iptables tool. +// GetEntries4 returns netstack's iptables rules. func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) { - // Read in the ABI struct. + // Read in the struct and table name. var userEntries linux.IPTGetEntries if _, err := userEntries.CopyIn(t, outPtr); err != nil { nflog("couldn't copy in entries %q", userEntries.Name) @@ -70,7 +75,7 @@ func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen // Convert netstack's iptables rules to something that the iptables // tool can understand. - entries, _, err := convertNetstackToBinary(stack, userEntries.Name) + entries, _, err := convertNetstackToBinary4(stack, userEntries.Name) if err != nil { nflog("couldn't read entries: %v", err) return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument @@ -83,28 +88,29 @@ func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen return entries, nil } -// convertNetstackToBinary converts the iptables as stored in netstack to the -// format expected by the iptables tool. Linux stores each table as a binary -// blob that can only be traversed by parsing a bit, reading some offsets, -// jumping to those offsets, parsing again, etc. -func convertNetstackToBinary(stk *stack.Stack, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo, error) { - // The table name has to fit in the struct. - if linux.XT_TABLE_MAXNAMELEN < len(tablename) { - return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename) +// GetEntries6 returns netstack's ip6tables rules. +func GetEntries6(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIP6TGetEntries, *syserr.Error) { + // Read in the struct and table name. IPv4 and IPv6 utilize structs + // with the same layout. + var userEntries linux.IPTGetEntries + if _, err := userEntries.CopyIn(t, outPtr); err != nil { + nflog("couldn't copy in entries %q", userEntries.Name) + return linux.KernelIP6TGetEntries{}, syserr.FromError(err) } - table, ok := stk.IPTables().GetTable(tablename.String()) - if !ok { - return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename) + // Convert netstack's iptables rules to something that the iptables + // tool can understand. + entries, _, err := convertNetstackToBinary6(stack, userEntries.Name) + if err != nil { + nflog("couldn't read entries: %v", err) + return linux.KernelIP6TGetEntries{}, syserr.ErrInvalidArgument + } + if binary.Size(entries) > uintptr(outLen) { + nflog("insufficient GetEntries output size: %d", uintptr(outLen)) + return linux.KernelIP6TGetEntries{}, syserr.ErrInvalidArgument } - // Setup the info struct. - var info linux.IPTGetinfo - info.ValidHooks = table.ValidHooks() - copy(info.Name[:], tablename[:]) - - entries := getEntries4(table, &info) - return entries, info, nil + return entries, nil } // setHooksAndUnderflow checks whether the rule at ruleIdx is a hook entrypoint @@ -128,7 +134,7 @@ func setHooksAndUnderflow(info *linux.IPTGetinfo, table stack.Table, offset uint // SetEntries sets iptables rules for a single table. See // net/ipv4/netfilter/ip_tables.c:translate_table for reference. -func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error { +func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error { var replace linux.IPTReplace replaceBuf := optVal[:linux.SizeOfIPTReplace] optVal = optVal[linux.SizeOfIPTReplace:] @@ -146,7 +152,13 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error { return syserr.ErrInvalidArgument } - offsets, err := modifyEntries4(stk, optVal, &replace, &table) + var err *syserr.Error + var offsets map[uint32]int + if ipv6 { + offsets, err = modifyEntries6(stk, optVal, &replace, &table) + } else { + offsets, err = modifyEntries4(stk, optVal, &replace, &table) + } if err != nil { return err } @@ -163,7 +175,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error { table.BuiltinChains[hk] = ruleIdx } if offset == replace.Underflow[hook] { - if !validUnderflow(table.Rules[ruleIdx]) { + if !validUnderflow(table.Rules[ruleIdx], ipv6) { nflog("underflow for hook %d isn't an unconditional ACCEPT or DROP", ruleIdx) return syserr.ErrInvalidArgument } @@ -228,7 +240,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error { if ruleIdx == stack.HookUnset { continue } - if !isUnconditionalAccept(table.Rules[ruleIdx]) { + if !isUnconditionalAccept(table.Rules[ruleIdx], ipv6) { nflog("hook %d is unsupported.", hook) return syserr.ErrInvalidArgument } @@ -240,7 +252,8 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error { // - There are no chains without an unconditional final rule. // - There are no chains without an unconditional underflow rule. - return syserr.TranslateNetstackError(stk.IPTables().ReplaceTable(replace.Name.String(), table)) + return syserr.TranslateNetstackError(stk.IPTables().ReplaceTable(replace.Name.String(), table, ipv6)) + } // parseMatchers parses 0 or more matchers from optVal. optVal should contain @@ -286,11 +299,11 @@ func parseMatchers(filter stack.IPHeaderFilter, optVal []byte) ([]stack.Matcher, return matchers, nil } -func validUnderflow(rule stack.Rule) bool { +func validUnderflow(rule stack.Rule, ipv6 bool) bool { if len(rule.Matchers) != 0 { return false } - if rule.Filter != emptyIPv4Filter { + if (ipv6 && rule.Filter != emptyIPv6Filter) || (!ipv6 && rule.Filter != emptyIPv4Filter) { return false } switch rule.Target.(type) { @@ -301,8 +314,8 @@ func validUnderflow(rule stack.Rule) bool { } } -func isUnconditionalAccept(rule stack.Rule) bool { - if !validUnderflow(rule) { +func isUnconditionalAccept(rule stack.Rule, ipv6 bool) bool { + if !validUnderflow(rule, ipv6) { return false } _, ok := rule.Target.(stack.AcceptTarget) diff --git a/pkg/sentry/socket/netfilter/targets.go b/pkg/sentry/socket/netfilter/targets.go index 8ebdaff18..87e41abd8 100644 --- a/pkg/sentry/socket/netfilter/targets.go +++ b/pkg/sentry/socket/netfilter/targets.go @@ -218,8 +218,8 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro return nil, fmt.Errorf("netfilter.SetEntries: optVal has insufficient size for redirect target %d", len(optVal)) } - if filter.Protocol != header.TCPProtocolNumber && filter.Protocol != header.UDPProtocolNumber { - return nil, fmt.Errorf("netfilter.SetEntries: invalid argument") + if p := filter.Protocol; p != header.TCPProtocolNumber && p != header.UDPProtocolNumber { + return nil, fmt.Errorf("netfilter.SetEntries: bad proto %d", p) } var redirectTarget linux.XTRedirectTarget @@ -232,7 +232,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro // RangeSize should be 1. if nfRange.RangeSize != 1 { - return nil, fmt.Errorf("netfilter.SetEntries: invalid argument") + return nil, fmt.Errorf("netfilter.SetEntries: bad rangesize %d", nfRange.RangeSize) } // TODO(gvisor.dev/issue/170): Check if the flags are valid. @@ -240,7 +240,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro // For now, redirect target only supports destination port change. // Port range and IP range are not supported yet. if nfRange.RangeIPV4.Flags&linux.NF_NAT_RANGE_PROTO_SPECIFIED == 0 { - return nil, fmt.Errorf("netfilter.SetEntries: invalid argument") + return nil, fmt.Errorf("netfilter.SetEntries: invalid range flags %d", nfRange.RangeIPV4.Flags) } target.RangeProtoSpecified = true @@ -249,7 +249,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro // TODO(gvisor.dev/issue/170): Port range is not supported yet. if nfRange.RangeIPV4.MinPort != nfRange.RangeIPV4.MaxPort { - return nil, fmt.Errorf("netfilter.SetEntries: invalid argument") + return nil, fmt.Errorf("netfilter.SetEntries: minport != maxport (%d, %d)", nfRange.RangeIPV4.MinPort, nfRange.RangeIPV4.MaxPort) } // Convert port from big endian to little endian. diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 9e2ebc7d4..2af2d8252 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -997,7 +997,7 @@ func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family in return getSockOptTCP(t, ep, name, outLen) case linux.SOL_IPV6: - return getSockOptIPv6(t, ep, name, outLen) + return getSockOptIPv6(t, s, ep, name, outPtr, outLen) case linux.SOL_IP: return getSockOptIP(t, s, ep, name, outPtr, outLen, family) @@ -1455,7 +1455,7 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (marshal } // getSockOptIPv6 implements GetSockOpt when level is SOL_IPV6. -func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (marshal.Marshallable, *syserr.Error) { +func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { switch name { case linux.IPV6_V6ONLY: if outLen < sizeOfInt32 { @@ -1508,10 +1508,50 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (marsha vP := primitive.Int32(boolToInt32(v)) return &vP, nil - case linux.SO_ORIGINAL_DST: + case linux.IP6T_ORIGINAL_DST: // TODO(gvisor.dev/issue/170): ip6tables. return nil, syserr.ErrInvalidArgument + case linux.IP6T_SO_GET_INFO: + if outLen < linux.SizeOfIPTGetinfo { + return nil, syserr.ErrInvalidArgument + } + + // Only valid for raw IPv6 sockets. + if family, skType, _ := s.Type(); family != linux.AF_INET6 || skType != linux.SOCK_RAW { + return nil, syserr.ErrProtocolNotAvailable + } + + stack := inet.StackFromContext(t) + if stack == nil { + return nil, syserr.ErrNoDevice + } + info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr, true) + if err != nil { + return nil, err + } + return &info, nil + + case linux.IP6T_SO_GET_ENTRIES: + // IPTGetEntries is reused for IPv6. + if outLen < linux.SizeOfIPTGetEntries { + return nil, syserr.ErrInvalidArgument + } + // Only valid for raw IPv6 sockets. + if family, skType, _ := s.Type(); family != linux.AF_INET6 || skType != linux.SOCK_RAW { + return nil, syserr.ErrProtocolNotAvailable + } + + stack := inet.StackFromContext(t) + if stack == nil { + return nil, syserr.ErrNoDevice + } + entries, err := netfilter.GetEntries6(t, stack.(*Stack).Stack, outPtr, outLen) + if err != nil { + return nil, err + } + return &entries, nil + default: emitUnimplementedEventIPv6(t, name) } @@ -1649,7 +1689,7 @@ func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in if stack == nil { return nil, syserr.ErrNoDevice } - info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr) + info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr, false) if err != nil { return nil, err } @@ -1722,7 +1762,7 @@ func SetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, level int return setSockOptTCP(t, ep, name, optVal) case linux.SOL_IPV6: - return setSockOptIPv6(t, ep, name, optVal) + return setSockOptIPv6(t, s, ep, name, optVal) case linux.SOL_IP: return setSockOptIP(t, s, ep, name, optVal) @@ -2027,7 +2067,7 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) * } // setSockOptIPv6 implements SetSockOpt when level is SOL_IPV6. -func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *syserr.Error { +func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, optVal []byte) *syserr.Error { switch name { case linux.IPV6_V6ONLY: if len(optVal) < sizeOfInt32 { @@ -2076,6 +2116,27 @@ func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveTClassOption, v != 0)) + case linux.IP6T_SO_SET_REPLACE: + if len(optVal) < linux.SizeOfIP6TReplace { + return syserr.ErrInvalidArgument + } + + // Only valid for raw IPv6 sockets. + if family, skType, _ := s.Type(); family != linux.AF_INET6 || skType != linux.SOCK_RAW { + return syserr.ErrProtocolNotAvailable + } + + stack := inet.StackFromContext(t) + if stack == nil { + return syserr.ErrNoDevice + } + // Stack must be a netstack stack. + return netfilter.SetEntries(stack.(*Stack).Stack, optVal, true) + + case linux.IP6T_SO_SET_ADD_COUNTERS: + // TODO(gvisor.dev/issue/170): Counter support. + return nil + default: emitUnimplementedEventIPv6(t, name) } @@ -2271,7 +2332,7 @@ func setSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in return syserr.ErrNoDevice } // Stack must be a netstack stack. - return netfilter.SetEntries(stack.(*Stack).Stack, optVal) + return netfilter.SetEntries(stack.(*Stack).Stack, optVal, false) case linux.IPT_SO_SET_ADD_COUNTERS: // TODO(gvisor.dev/issue/170): Counter support. diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go index 41ef4236b..30aa41db2 100644 --- a/pkg/tcpip/stack/iptables.go +++ b/pkg/tcpip/stack/iptables.go @@ -165,7 +165,11 @@ func EmptyNATTable() Table { } // GetTable returns a table by name. -func (it *IPTables) GetTable(name string) (Table, bool) { +func (it *IPTables) GetTable(name string, ipv6 bool) (Table, bool) { + // TODO(gvisor.dev/issue/3549): Enable IPv6. + if ipv6 { + return Table{}, false + } id, ok := nameToID[name] if !ok { return Table{}, false @@ -176,7 +180,11 @@ func (it *IPTables) GetTable(name string) (Table, bool) { } // ReplaceTable replaces or inserts table by name. -func (it *IPTables) ReplaceTable(name string, table Table) *tcpip.Error { +func (it *IPTables) ReplaceTable(name string, table Table, ipv6 bool) *tcpip.Error { + // TODO(gvisor.dev/issue/3549): Enable IPv6. + if ipv6 { + return tcpip.ErrInvalidOptionValue + } id, ok := nameToID[name] if !ok { return tcpip.ErrInvalidOptionValue diff --git a/pkg/tcpip/stack/iptables_types.go b/pkg/tcpip/stack/iptables_types.go index 73274ada9..fbbd2f50f 100644 --- a/pkg/tcpip/stack/iptables_types.go +++ b/pkg/tcpip/stack/iptables_types.go @@ -155,6 +155,11 @@ type IPHeaderFilter struct { // Protocol matches the transport protocol. Protocol tcpip.TransportProtocolNumber + // CheckProtocol determines whether the Protocol field should be + // checked during matching. + // TODO(gvisor.dev/issue/3549): Check this field during matching. + CheckProtocol bool + // Dst matches the destination IP address. Dst tcpip.Address diff --git a/test/syscalls/linux/ip6tables.cc b/test/syscalls/linux/ip6tables.cc index 685e513f8..78e1fa09d 100644 --- a/test/syscalls/linux/ip6tables.cc +++ b/test/syscalls/linux/ip6tables.cc @@ -34,6 +34,54 @@ constexpr size_t kEmptyStandardEntrySize = constexpr size_t kEmptyErrorEntrySize = sizeof(struct ip6t_entry) + sizeof(struct xt_error_target); +TEST(IP6TablesBasic, FailSockoptNonRaw) { + // Even if the user has CAP_NET_RAW, they shouldn't be able to use the + // ip6tables sockopts with a non-raw socket. + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int sock; + ASSERT_THAT(sock = socket(AF_INET6, SOCK_DGRAM, 0), SyscallSucceeds()); + + struct ipt_getinfo info = {}; + snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); + socklen_t info_size = sizeof(info); + EXPECT_THAT(getsockopt(sock, SOL_IPV6, IP6T_SO_GET_INFO, &info, &info_size), + SyscallFailsWithErrno(ENOPROTOOPT)); + + EXPECT_THAT(close(sock), SyscallSucceeds()); +} + +TEST(IP6TablesBasic, GetInfoErrorPrecedence) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int sock; + ASSERT_THAT(sock = socket(AF_INET6, SOCK_DGRAM, 0), SyscallSucceeds()); + + // When using the wrong type of socket and a too-short optlen, we should get + // EINVAL. + struct ipt_getinfo info = {}; + snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); + socklen_t info_size = sizeof(info) - 1; + EXPECT_THAT(getsockopt(sock, SOL_IPV6, IP6T_SO_GET_INFO, &info, &info_size), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(IP6TablesBasic, GetEntriesErrorPrecedence) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int sock; + ASSERT_THAT(sock = socket(AF_INET6, SOCK_DGRAM, 0), SyscallSucceeds()); + + // When using the wrong type of socket and a too-short optlen, we should get + // EINVAL. + struct ip6t_get_entries entries = {}; + socklen_t entries_size = sizeof(struct ip6t_get_entries) - 1; + snprintf(entries.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); + EXPECT_THAT( + getsockopt(sock, SOL_IPV6, IP6T_SO_GET_ENTRIES, &entries, &entries_size), + SyscallFailsWithErrno(EINVAL)); +} + // This tests the initial state of a machine with empty ip6tables via // getsockopt(IP6T_SO_GET_INFO). We don't have a guarantee that the iptables are // empty when running in native, but we can test that gVisor has the same -- cgit v1.2.3 From 8d75fc4883ca8c10fb615203993d56d33a9e36b6 Mon Sep 17 00:00:00 2001 From: Rahat Mahmood Date: Fri, 28 Aug 2020 14:29:16 -0700 Subject: Implement StatFS for various VFS2 filesystems. This mainly involved enabling kernfs' client filesystems to provide a StatFS implementation. Fixes #3411, #3515. PiperOrigin-RevId: 329009864 --- pkg/abi/linux/fs.go | 1 + pkg/sentry/fsimpl/devpts/devpts.go | 10 +++++++++- pkg/sentry/fsimpl/devpts/master.go | 1 + pkg/sentry/fsimpl/devpts/slave.go | 1 + pkg/sentry/fsimpl/fuse/fusefs.go | 8 +++++++- pkg/sentry/fsimpl/host/host.go | 1 + pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go | 1 + pkg/sentry/fsimpl/kernfs/filesystem.go | 5 ++--- pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 16 +++++++++++++--- pkg/sentry/fsimpl/kernfs/kernfs.go | 5 +++++ pkg/sentry/fsimpl/kernfs/kernfs_test.go | 8 +++++--- pkg/sentry/fsimpl/kernfs/symlink.go | 1 + pkg/sentry/fsimpl/pipefs/pipefs.go | 8 +++++--- pkg/sentry/fsimpl/proc/filesystem.go | 7 +++++++ pkg/sentry/fsimpl/proc/subtasks.go | 9 +++++---- pkg/sentry/fsimpl/proc/task.go | 7 ++++--- pkg/sentry/fsimpl/proc/task_fds.go | 19 +++++++++++-------- pkg/sentry/fsimpl/proc/task_files.go | 2 ++ pkg/sentry/fsimpl/proc/tasks.go | 9 +++++---- pkg/sentry/fsimpl/proc/tasks_files.go | 2 ++ pkg/sentry/fsimpl/sockfs/sockfs.go | 9 +++++++-- pkg/sentry/fsimpl/sys/kcov.go | 3 ++- pkg/sentry/fsimpl/sys/sys.go | 14 ++++++++++++++ pkg/sentry/vfs/filesystem_impl_util.go | 13 +++++++++++++ test/syscalls/BUILD | 1 + test/syscalls/linux/pipe.cc | 12 ++++++++++++ test/syscalls/linux/proc.cc | 14 ++++++++++++++ test/syscalls/linux/socket.cc | 17 +++++++++++++++++ test/syscalls/linux/statfs.cc | 16 ++++++++++------ 29 files changed, 178 insertions(+), 42 deletions(-) (limited to 'pkg/abi') diff --git a/pkg/abi/linux/fs.go b/pkg/abi/linux/fs.go index 2b1ef0d4e..0d921ed6f 100644 --- a/pkg/abi/linux/fs.go +++ b/pkg/abi/linux/fs.go @@ -29,6 +29,7 @@ const ( SYSFS_MAGIC = 0x62656572 TMPFS_MAGIC = 0x01021994 V9FS_MAGIC = 0x01021997 + FUSE_SUPER_MAGIC = 0x65735546 ) // Filesystem path limits, from uapi/linux/limits.h. diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go index 0eaff9087..57580f4d4 100644 --- a/pkg/sentry/fsimpl/devpts/devpts.go +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -111,12 +111,13 @@ func (fs *filesystem) Release(ctx context.Context) { // rootInode is the root directory inode for the devpts mounts. type rootInode struct { - rootInodeRefs + implStatFS kernfs.AlwaysValid kernfs.InodeAttrs kernfs.InodeDirectoryNoNewChildren kernfs.InodeNotSymlink kernfs.OrderedChildren + rootInodeRefs locks vfs.FileLocks @@ -240,3 +241,10 @@ func (i *rootInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, func (i *rootInode) DecRef(context.Context) { i.rootInodeRefs.DecRef(i.Destroy) } + +type implStatFS struct{} + +// StatFS implements kernfs.Inode.StatFS. +func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) { + return vfs.GenericStatFS(linux.DEVPTS_SUPER_MAGIC), nil +} diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go index 3bb397f71..60feb1993 100644 --- a/pkg/sentry/fsimpl/devpts/master.go +++ b/pkg/sentry/fsimpl/devpts/master.go @@ -30,6 +30,7 @@ import ( // masterInode is the inode for the master end of the Terminal. type masterInode struct { + implStatFS kernfs.InodeAttrs kernfs.InodeNoopRefCount kernfs.InodeNotDirectory diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go index 32e4e1908..a9da7af64 100644 --- a/pkg/sentry/fsimpl/devpts/slave.go +++ b/pkg/sentry/fsimpl/devpts/slave.go @@ -29,6 +29,7 @@ import ( // slaveInode is the inode for the slave end of the Terminal. type slaveInode struct { + implStatFS kernfs.InodeAttrs kernfs.InodeNoopRefCount kernfs.InodeNotDirectory diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go index 9717c0e15..810819ae4 100644 --- a/pkg/sentry/fsimpl/fuse/fusefs.go +++ b/pkg/sentry/fsimpl/fuse/fusefs.go @@ -200,9 +200,9 @@ func (fs *filesystem) Release(ctx context.Context) { type inode struct { inodeRefs kernfs.InodeAttrs + kernfs.InodeDirectoryNoNewChildren kernfs.InodeNoDynamicLookup kernfs.InodeNotSymlink - kernfs.InodeDirectoryNoNewChildren kernfs.OrderedChildren locks vfs.FileLocks @@ -331,3 +331,9 @@ func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptio func (i *inode) DecRef(context.Context) { i.inodeRefs.DecRef(i.Destroy) } + +// StatFS implements kernfs.Inode.StatFS. +func (i *inode) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) { + // TODO(gvisor.dev/issues/3413): Complete the implementation of statfs. + return vfs.GenericStatFS(linux.FUSE_SUPER_MAGIC), nil +} diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 2d3821f33..7561f821c 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -186,6 +186,7 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe // inode implements kernfs.Inode. type inode struct { + kernfs.InodeNoStatFS kernfs.InodeNotDirectory kernfs.InodeNotSymlink diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go index 12adf727a..1ee089620 100644 --- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go +++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go @@ -35,6 +35,7 @@ import ( // +stateify savable type DynamicBytesFile struct { InodeAttrs + InodeNoStatFS InodeNoopRefCount InodeNotDirectory InodeNotSymlink diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index e5d6b5c35..0e3011689 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -721,14 +721,13 @@ func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf // StatFSAt implements vfs.FilesystemImpl.StatFSAt. func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { fs.mu.RLock() - _, _, err := fs.walkExistingLocked(ctx, rp) + _, inode, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() fs.processDeferredDecRefs(ctx) if err != nil { return linux.Statfs{}, err } - // TODO(gvisor.dev/issue/1193): actually implement statfs. - return linux.Statfs{}, syserror.ENOSYS + return inode.StatFS(ctx, fs.VFSFilesystem()) } // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index f442a5606..c0b863ba4 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -546,12 +546,13 @@ func (InodeSymlink) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.D // // +stateify savable type StaticDirectory struct { - StaticDirectoryRefs - InodeNotSymlink - InodeDirectoryNoNewChildren InodeAttrs + InodeDirectoryNoNewChildren InodeNoDynamicLookup + InodeNoStatFS + InodeNotSymlink OrderedChildren + StaticDirectoryRefs locks vfs.FileLocks fdOpts GenericDirectoryFDOptions @@ -609,3 +610,12 @@ type AlwaysValid struct{} func (*AlwaysValid) Valid(context.Context) bool { return true } + +// InodeNoStatFS partially implements the Inode interface, where the client +// filesystem doesn't support statfs(2). +type InodeNoStatFS struct{} + +// StatFS implements Inode.StatFS. +func (*InodeNoStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) { + return linux.Statfs{}, syserror.ENOSYS +} diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index ca3685800..88fcd54aa 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -320,6 +320,11 @@ type Inode interface { // Precondition: rp.Done(). vfsd.Impl() must be the kernfs Dentry containing // the inode on which Open() is being called. Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) + + // StatFS returns filesystem statistics for the client filesystem. This + // corresponds to vfs.FilesystemImpl.StatFSAt. If the client filesystem + // doesn't support statfs(2), this should return ENOSYS. + StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) } type inodeRefs interface { diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go index e376d1736..675587c6b 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go @@ -98,9 +98,10 @@ func (*attrs) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.S type readonlyDir struct { readonlyDirRefs attrs - kernfs.InodeNotSymlink - kernfs.InodeNoDynamicLookup kernfs.InodeDirectoryNoNewChildren + kernfs.InodeNoDynamicLookup + kernfs.InodeNoStatFS + kernfs.InodeNotSymlink kernfs.OrderedChildren locks vfs.FileLocks @@ -137,9 +138,10 @@ func (d *readonlyDir) DecRef(context.Context) { type dir struct { dirRefs attrs - kernfs.InodeNotSymlink kernfs.InodeNoDynamicLookup + kernfs.InodeNotSymlink kernfs.OrderedChildren + kernfs.InodeNoStatFS locks vfs.FileLocks diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go index 2ab3f53fd..64731a3e4 100644 --- a/pkg/sentry/fsimpl/kernfs/symlink.go +++ b/pkg/sentry/fsimpl/kernfs/symlink.go @@ -28,6 +28,7 @@ type StaticSymlink struct { InodeAttrs InodeNoopRefCount InodeSymlink + InodeNoStatFS target string } diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go index 2ca793db9..7053ad6db 100644 --- a/pkg/sentry/fsimpl/pipefs/pipefs.go +++ b/pkg/sentry/fsimpl/pipefs/pipefs.go @@ -143,14 +143,16 @@ func (i *inode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth. return syserror.EPERM } -// TODO(gvisor.dev/issue/1193): kernfs does not provide a way to implement -// statfs, from which we should indicate PIPEFS_MAGIC. - // Open implements kernfs.Inode.Open. func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { return i.pipe.Open(ctx, rp.Mount(), vfsd, opts.Flags, &i.locks) } +// StatFS implements kernfs.Inode.StatFS. +func (i *inode) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) { + return vfs.GenericStatFS(linux.PIPEFS_MAGIC), nil +} + // NewConnectedPipeFDs returns a pair of FileDescriptions representing the read // and write ends of a newly-created pipe, as for pipe(2) and pipe2(2). // diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go index c350ec127..03b5941b9 100644 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ b/pkg/sentry/fsimpl/proc/filesystem.go @@ -121,3 +121,10 @@ func newStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64 type InternalData struct { Cgroups map[string]string } + +type implStatFS struct{} + +// StatFS implements kernfs.Inode.StatFS. +func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) { + return vfs.GenericStatFS(linux.PROC_SUPER_MAGIC), nil +} diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go index 01c0efb3a..d57d94dbc 100644 --- a/pkg/sentry/fsimpl/proc/subtasks.go +++ b/pkg/sentry/fsimpl/proc/subtasks.go @@ -31,12 +31,13 @@ import ( // // +stateify savable type subtasksInode struct { - subtasksInodeRefs - kernfs.InodeNotSymlink - kernfs.InodeDirectoryNoNewChildren + implStatFS + kernfs.AlwaysValid kernfs.InodeAttrs + kernfs.InodeDirectoryNoNewChildren + kernfs.InodeNotSymlink kernfs.OrderedChildren - kernfs.AlwaysValid + subtasksInodeRefs locks vfs.FileLocks diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index 66b557abd..dbdb5d929 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -32,12 +32,13 @@ import ( // // +stateify savable type taskInode struct { - taskInodeRefs - kernfs.InodeNotSymlink + implStatFS + kernfs.InodeAttrs kernfs.InodeDirectoryNoNewChildren kernfs.InodeNoDynamicLookup - kernfs.InodeAttrs + kernfs.InodeNotSymlink kernfs.OrderedChildren + taskInodeRefs locks vfs.FileLocks diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go index 0527b2de8..3f0d78461 100644 --- a/pkg/sentry/fsimpl/proc/task_fds.go +++ b/pkg/sentry/fsimpl/proc/task_fds.go @@ -100,13 +100,14 @@ func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, off // // +stateify savable type fdDirInode struct { + fdDir fdDirInodeRefs - kernfs.InodeNotSymlink - kernfs.InodeDirectoryNoNewChildren + implStatFS + kernfs.AlwaysValid kernfs.InodeAttrs + kernfs.InodeDirectoryNoNewChildren + kernfs.InodeNotSymlink kernfs.OrderedChildren - kernfs.AlwaysValid - fdDir } var _ kernfs.Inode = (*fdDirInode)(nil) @@ -185,6 +186,7 @@ func (i *fdDirInode) DecRef(context.Context) { // // +stateify savable type fdSymlink struct { + implStatFS kernfs.InodeAttrs kernfs.InodeNoopRefCount kernfs.InodeSymlink @@ -233,13 +235,14 @@ func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDen // // +stateify savable type fdInfoDirInode struct { + fdDir fdInfoDirInodeRefs - kernfs.InodeNotSymlink - kernfs.InodeDirectoryNoNewChildren + implStatFS + kernfs.AlwaysValid kernfs.InodeAttrs + kernfs.InodeDirectoryNoNewChildren + kernfs.InodeNotSymlink kernfs.OrderedChildren - kernfs.AlwaysValid - fdDir } var _ kernfs.Inode = (*fdInfoDirInode)(nil) diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 830b78949..356036b9b 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -648,6 +648,7 @@ func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset // // +stateify savable type exeSymlink struct { + implStatFS kernfs.InodeAttrs kernfs.InodeNoopRefCount kernfs.InodeSymlink @@ -832,6 +833,7 @@ func (s *namespaceSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.Vir // namespaceInode is a synthetic inode created to represent a namespace in // /proc/[pid]/ns/*. type namespaceInode struct { + implStatFS kernfs.InodeAttrs kernfs.InodeNoopRefCount kernfs.InodeNotDirectory diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go index 863c4467e..3ea00ab87 100644 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ b/pkg/sentry/fsimpl/proc/tasks.go @@ -37,12 +37,13 @@ const ( // // +stateify savable type tasksInode struct { - tasksInodeRefs - kernfs.InodeNotSymlink - kernfs.InodeDirectoryNoNewChildren + implStatFS + kernfs.AlwaysValid kernfs.InodeAttrs + kernfs.InodeDirectoryNoNewChildren + kernfs.InodeNotSymlink kernfs.OrderedChildren - kernfs.AlwaysValid + tasksInodeRefs locks vfs.FileLocks diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go index 7d8983aa5..8c41729e4 100644 --- a/pkg/sentry/fsimpl/proc/tasks_files.go +++ b/pkg/sentry/fsimpl/proc/tasks_files.go @@ -32,6 +32,7 @@ import ( ) type selfSymlink struct { + implStatFS kernfs.InodeAttrs kernfs.InodeNoopRefCount kernfs.InodeSymlink @@ -74,6 +75,7 @@ func (*selfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, } type threadSelfSymlink struct { + implStatFS kernfs.InodeAttrs kernfs.InodeNoopRefCount kernfs.InodeSymlink diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go index c61818ff6..94a998568 100644 --- a/pkg/sentry/fsimpl/sockfs/sockfs.go +++ b/pkg/sentry/fsimpl/sockfs/sockfs.go @@ -81,10 +81,10 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe // inode implements kernfs.Inode. type inode struct { - kernfs.InodeNotDirectory - kernfs.InodeNotSymlink kernfs.InodeAttrs kernfs.InodeNoopRefCount + kernfs.InodeNotDirectory + kernfs.InodeNotSymlink } // Open implements kernfs.Inode.Open. @@ -92,6 +92,11 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr return nil, syserror.ENXIO } +// StatFS implements kernfs.Inode.StatFS. +func (i *inode) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) { + return vfs.GenericStatFS(linux.SOCKFS_MAGIC), nil +} + // NewDentry constructs and returns a sockfs dentry. // // Preconditions: mnt.Filesystem() must have been returned by NewFilesystem(). diff --git a/pkg/sentry/fsimpl/sys/kcov.go b/pkg/sentry/fsimpl/sys/kcov.go index 92710d877..73f3d3309 100644 --- a/pkg/sentry/fsimpl/sys/kcov.go +++ b/pkg/sentry/fsimpl/sys/kcov.go @@ -39,8 +39,9 @@ func (fs *filesystem) newKcovFile(ctx context.Context, creds *auth.Credentials) type kcovInode struct { kernfs.InodeAttrs kernfs.InodeNoopRefCount - kernfs.InodeNotSymlink kernfs.InodeNotDirectory + kernfs.InodeNotSymlink + implStatFS } func (i *kcovInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index ea30a4ec2..39952d2d0 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -163,9 +163,16 @@ func (d *dir) DecRef(context.Context) { d.dirRefs.DecRef(d.Destroy) } +// StatFS implements kernfs.Inode.StatFS. +func (d *dir) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) { + return vfs.GenericStatFS(linux.SYSFS_MAGIC), nil +} + // cpuFile implements kernfs.Inode. type cpuFile struct { + implStatFS kernfs.DynamicBytesFile + maxCores uint } @@ -182,3 +189,10 @@ func (fs *filesystem) newCPUFile(creds *auth.Credentials, maxCores uint, mode li d.Init(c) return d } + +type implStatFS struct{} + +// StatFS implements kernfs.Inode.StatFS. +func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) { + return vfs.GenericStatFS(linux.SYSFS_MAGIC), nil +} diff --git a/pkg/sentry/vfs/filesystem_impl_util.go b/pkg/sentry/vfs/filesystem_impl_util.go index 465e610e0..2620cf975 100644 --- a/pkg/sentry/vfs/filesystem_impl_util.go +++ b/pkg/sentry/vfs/filesystem_impl_util.go @@ -16,6 +16,9 @@ package vfs import ( "strings" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/usermem" ) // GenericParseMountOptions parses a comma-separated list of options of the @@ -41,3 +44,13 @@ func GenericParseMountOptions(str string) map[string]string { } return m } + +// GenericStatFS returns a statfs struct filled with the common fields for a +// general filesystem. This is analogous to Linux's fs/libfs.cs:simple_statfs(). +func GenericStatFS(fsMagic uint64) linux.Statfs { + return linux.Statfs{ + Type: fsMagic, + BlockSize: usermem.PageSize, + NameLength: linux.NAME_MAX, + } +} diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 65e8299c3..f949bc0e3 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -803,6 +803,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:statfs_test", + use_tmpfs = True, # Test specifically relies on TEST_TMPDIR to be tmpfs. ) syscall_test( diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc index 34291850d..c097c9187 100644 --- a/test/syscalls/linux/pipe.cc +++ b/test/syscalls/linux/pipe.cc @@ -13,7 +13,9 @@ // limitations under the License. #include /* Obtain O_* constant definitions */ +#include #include +#include #include #include @@ -198,6 +200,16 @@ TEST_P(PipeTest, NonBlocking) { SyscallFailsWithErrno(EWOULDBLOCK)); } +TEST(PipeTest, StatFS) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + struct statfs st; + EXPECT_THAT(fstatfs(fds[0], &st), SyscallSucceeds()); + EXPECT_EQ(st.f_type, PIPEFS_MAGIC); + EXPECT_EQ(st.f_bsize, getpagesize()); + EXPECT_EQ(st.f_namelen, NAME_MAX); +} + TEST(Pipe2Test, CloExec) { int fds[2]; ASSERT_THAT(pipe2(fds, O_CLOEXEC), SyscallSucceeds()); diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index d6b875dbf..b73189e55 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -2159,6 +2161,18 @@ TEST(Proc, PidTidIOAccounting) { noop.Join(); } +TEST(Proc, Statfs) { + struct statfs st; + EXPECT_THAT(statfs("/proc", &st), SyscallSucceeds()); + if (IsRunningWithVFS1()) { + EXPECT_EQ(st.f_type, ANON_INODE_FS_MAGIC); + } else { + EXPECT_EQ(st.f_type, PROC_SUPER_MAGIC); + } + EXPECT_EQ(st.f_bsize, getpagesize()); + EXPECT_EQ(st.f_namelen, NAME_MAX); +} + } // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket.cc b/test/syscalls/linux/socket.cc index c20cd3fcc..e680d3dd7 100644 --- a/test/syscalls/linux/socket.cc +++ b/test/syscalls/linux/socket.cc @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -26,6 +27,9 @@ namespace gvisor { namespace testing { +// From linux/magic.h, but we can't depend on linux headers here. +#define SOCKFS_MAGIC 0x534F434B + TEST(SocketTest, UnixSocketPairProtocol) { int socks[2]; ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, PF_UNIX, socks), @@ -94,6 +98,19 @@ TEST(SocketTest, UnixSocketStat) { } } +TEST(SocketTest, UnixSocketStatFS) { + SKIP_IF(IsRunningWithVFS1()); + + FileDescriptor bound = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); + + struct statfs st; + EXPECT_THAT(fstatfs(bound.get(), &st), SyscallSucceeds()); + EXPECT_EQ(st.f_type, SOCKFS_MAGIC); + EXPECT_EQ(st.f_bsize, getpagesize()); + EXPECT_EQ(st.f_namelen, NAME_MAX); +} + using SocketOpenTest = ::testing::TestWithParam; // UDS cannot be opened. diff --git a/test/syscalls/linux/statfs.cc b/test/syscalls/linux/statfs.cc index aca51d30f..49f2f156c 100644 --- a/test/syscalls/linux/statfs.cc +++ b/test/syscalls/linux/statfs.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include @@ -26,6 +27,10 @@ namespace testing { namespace { +// From linux/magic.h. For some reason, not defined in the headers for some +// build environments. +#define OVERLAYFS_SUPER_MAGIC 0x794c7630 + TEST(StatfsTest, CannotStatBadPath) { auto temp_file = NewTempAbsPathInDir("/tmp"); @@ -38,19 +43,18 @@ TEST(StatfsTest, InternalTmpfs) { struct statfs st; EXPECT_THAT(statfs(temp_file.path().c_str(), &st), SyscallSucceeds()); + // Note: We could be an overlay or goferfs on some configurations. + EXPECT_TRUE(st.f_type == TMPFS_MAGIC || st.f_type == OVERLAYFS_SUPER_MAGIC || + st.f_type == V9FS_MAGIC); } TEST(StatfsTest, InternalDevShm) { struct statfs st; EXPECT_THAT(statfs("/dev/shm", &st), SyscallSucceeds()); -} - -TEST(StatfsTest, NameLen) { - struct statfs st; - EXPECT_THAT(statfs("/dev/shm", &st), SyscallSucceeds()); // This assumes that /dev/shm is tmpfs. - EXPECT_EQ(st.f_namelen, NAME_MAX); + // Note: We could be an overlay on some configurations. + EXPECT_TRUE(st.f_type == TMPFS_MAGIC || st.f_type == OVERLAYFS_SUPER_MAGIC); } TEST(FstatfsTest, CannotStatBadFd) { -- cgit v1.2.3 From 114d1268b26e71c99fabb9d5b13d7c95c7c5c34b Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Tue, 1 Sep 2020 12:59:49 -0700 Subject: [go-marshal] Enable auto-marshalling for fs/tty. PiperOrigin-RevId: 329564614 --- pkg/abi/linux/tty.go | 1 + pkg/sentry/fs/tty/BUILD | 1 + pkg/sentry/fs/tty/line_discipline.go | 33 ++++++++++++----------------- pkg/sentry/fs/tty/master.go | 33 +++++++++++++++++------------ pkg/sentry/fs/tty/queue.go | 12 +++++------ pkg/sentry/fs/tty/slave.go | 33 +++++++++++++++++------------ pkg/sentry/fs/tty/terminal.go | 21 ++++++++---------- pkg/sentry/fsimpl/devpts/BUILD | 2 ++ pkg/sentry/fsimpl/devpts/line_discipline.go | 33 ++++++++++++----------------- pkg/sentry/fsimpl/devpts/master.go | 33 +++++++++++++++++------------ pkg/sentry/fsimpl/devpts/queue.go | 12 +++++------ pkg/sentry/fsimpl/devpts/slave.go | 33 +++++++++++++++++------------ pkg/sentry/fsimpl/devpts/terminal.go | 21 ++++++++---------- 13 files changed, 140 insertions(+), 128 deletions(-) (limited to 'pkg/abi') diff --git a/pkg/abi/linux/tty.go b/pkg/abi/linux/tty.go index e640969a6..5a5ff0aa2 100644 --- a/pkg/abi/linux/tty.go +++ b/pkg/abi/linux/tty.go @@ -341,6 +341,7 @@ var DefaultSlaveTermios = KernelTermios{ // include/uapi/asm-generic/termios.h. // // +stateify savable +// +marshal type WindowSize struct { Rows uint16 Cols uint16 diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD index 5cb0e0417..b3f5a8244 100644 --- a/pkg/sentry/fs/tty/BUILD +++ b/pkg/sentry/fs/tty/BUILD @@ -31,6 +31,7 @@ go_library( "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", + "//tools/go_marshal/primitive", ], ) diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go index 2e9dd2d55..b6bc011a9 100644 --- a/pkg/sentry/fs/tty/line_discipline.go +++ b/pkg/sentry/fs/tty/line_discipline.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -115,27 +116,23 @@ func newLineDiscipline(termios linux.KernelTermios) *lineDiscipline { } // getTermios gets the linux.Termios for the tty. -func (l *lineDiscipline) getTermios(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (l *lineDiscipline) getTermios(task *kernel.Task, args arch.SyscallArguments) (uintptr, error) { l.termiosMu.RLock() defer l.termiosMu.RUnlock() // We must copy a Termios struct, not KernelTermios. t := l.termios.ToTermios() - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), t, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := t.CopyOut(task, args[2].Pointer()) return 0, err } // setTermios sets a linux.Termios for the tty. -func (l *lineDiscipline) setTermios(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (l *lineDiscipline) setTermios(task *kernel.Task, args arch.SyscallArguments) (uintptr, error) { l.termiosMu.Lock() defer l.termiosMu.Unlock() oldCanonEnabled := l.termios.LEnabled(linux.ICANON) // We must copy a Termios struct, not KernelTermios. var t linux.Termios - _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &t, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := t.CopyIn(task, args[2].Pointer()) l.termios.FromTermios(t) // If canonical mode is turned off, move bytes from inQueue's wait @@ -152,21 +149,17 @@ func (l *lineDiscipline) setTermios(ctx context.Context, io usermem.IO, args arc return 0, err } -func (l *lineDiscipline) windowSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { +func (l *lineDiscipline) windowSize(t *kernel.Task, args arch.SyscallArguments) error { l.sizeMu.Lock() defer l.sizeMu.Unlock() - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), l.size, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := l.size.CopyOut(t, args[2].Pointer()) return err } -func (l *lineDiscipline) setWindowSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { +func (l *lineDiscipline) setWindowSize(t *kernel.Task, args arch.SyscallArguments) error { l.sizeMu.Lock() defer l.sizeMu.Unlock() - _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &l.size, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := l.size.CopyIn(t, args[2].Pointer()) return err } @@ -182,8 +175,8 @@ func (l *lineDiscipline) slaveReadiness() waiter.EventMask { return l.outQueue.writeReadiness(&l.termios) | l.inQueue.readReadiness(&l.termios) } -func (l *lineDiscipline) inputQueueReadSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { - return l.inQueue.readableSize(ctx, io, args) +func (l *lineDiscipline) inputQueueReadSize(t *kernel.Task, args arch.SyscallArguments) error { + return l.inQueue.readableSize(t, args) } func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) { @@ -217,8 +210,8 @@ func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequ return 0, syserror.ErrWouldBlock } -func (l *lineDiscipline) outputQueueReadSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { - return l.outQueue.readableSize(ctx, io, args) +func (l *lineDiscipline) outputQueueReadSize(t *kernel.Task, args arch.SyscallArguments) error { + return l.outQueue.readableSize(t, args) } func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) { diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index e00746017..175457297 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -20,10 +20,12 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" + "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // LINT.IfChange @@ -152,46 +154,51 @@ func (mf *masterFileOperations) Write(ctx context.Context, _ *fs.File, src userm // Ioctl implements fs.FileOperations.Ioctl. func (mf *masterFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + t := kernel.TaskFromContext(ctx) + if t == nil { + // ioctl(2) may only be called from a task goroutine. + return 0, syserror.ENOTTY + } + switch cmd := args[1].Uint(); cmd { case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ // Get the number of bytes in the output queue read buffer. - return 0, mf.t.ld.outputQueueReadSize(ctx, io, args) + return 0, mf.t.ld.outputQueueReadSize(t, args) case linux.TCGETS: // N.B. TCGETS on the master actually returns the configuration // of the slave end. - return mf.t.ld.getTermios(ctx, io, args) + return mf.t.ld.getTermios(t, args) case linux.TCSETS: // N.B. TCSETS on the master actually affects the configuration // of the slave end. - return mf.t.ld.setTermios(ctx, io, args) + return mf.t.ld.setTermios(t, args) case linux.TCSETSW: // TODO(b/29356795): This should drain the output queue first. - return mf.t.ld.setTermios(ctx, io, args) + return mf.t.ld.setTermios(t, args) case linux.TIOCGPTN: - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), uint32(mf.t.n), usermem.IOOpts{ - AddressSpaceActive: true, - }) + nP := primitive.Uint32(mf.t.n) + _, err := nP.CopyOut(t, args[2].Pointer()) return 0, err case linux.TIOCSPTLCK: // TODO(b/29356795): Implement pty locking. For now just pretend we do. return 0, nil case linux.TIOCGWINSZ: - return 0, mf.t.ld.windowSize(ctx, io, args) + return 0, mf.t.ld.windowSize(t, args) case linux.TIOCSWINSZ: - return 0, mf.t.ld.setWindowSize(ctx, io, args) + return 0, mf.t.ld.setWindowSize(t, args) case linux.TIOCSCTTY: // Make the given terminal the controlling terminal of the // calling process. - return 0, mf.t.setControllingTTY(ctx, io, args, true /* isMaster */) + return 0, mf.t.setControllingTTY(ctx, args, true /* isMaster */) case linux.TIOCNOTTY: // Release this process's controlling terminal. - return 0, mf.t.releaseControllingTTY(ctx, io, args, true /* isMaster */) + return 0, mf.t.releaseControllingTTY(ctx, args, true /* isMaster */) case linux.TIOCGPGRP: // Get the foreground process group. - return mf.t.foregroundProcessGroup(ctx, io, args, true /* isMaster */) + return mf.t.foregroundProcessGroup(ctx, args, true /* isMaster */) case linux.TIOCSPGRP: // Set the foreground process group. - return mf.t.setForegroundProcessGroup(ctx, io, args, true /* isMaster */) + return mf.t.setForegroundProcessGroup(ctx, args, true /* isMaster */) default: maybeEmitUnimplementedEvent(ctx, cmd) return 0, syserror.ENOTTY diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go index c5d7ec717..f0a18c75a 100644 --- a/pkg/sentry/fs/tty/queue.go +++ b/pkg/sentry/fs/tty/queue.go @@ -19,10 +19,12 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" + "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // LINT.IfChange @@ -85,17 +87,15 @@ func (q *queue) writeReadiness(t *linux.KernelTermios) waiter.EventMask { } // readableSize writes the number of readable bytes to userspace. -func (q *queue) readableSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { +func (q *queue) readableSize(t *kernel.Task, args arch.SyscallArguments) error { q.mu.Lock() defer q.mu.Unlock() - var size int32 + size := primitive.Int32(0) if q.readable { - size = int32(len(q.readBuf)) + size = primitive.Int32(len(q.readBuf)) } - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), size, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := size.CopyOut(t, args[2].Pointer()) return err } diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go index 7c7292687..933d2c3ff 100644 --- a/pkg/sentry/fs/tty/slave.go +++ b/pkg/sentry/fs/tty/slave.go @@ -20,9 +20,11 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" + "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // LINT.IfChange @@ -136,39 +138,44 @@ func (sf *slaveFileOperations) Write(ctx context.Context, _ *fs.File, src userme // Ioctl implements fs.FileOperations.Ioctl. func (sf *slaveFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + t := kernel.TaskFromContext(ctx) + if t == nil { + // ioctl(2) may only be called from a task goroutine. + return 0, syserror.ENOTTY + } + switch cmd := args[1].Uint(); cmd { case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ // Get the number of bytes in the input queue read buffer. - return 0, sf.si.t.ld.inputQueueReadSize(ctx, io, args) + return 0, sf.si.t.ld.inputQueueReadSize(t, args) case linux.TCGETS: - return sf.si.t.ld.getTermios(ctx, io, args) + return sf.si.t.ld.getTermios(t, args) case linux.TCSETS: - return sf.si.t.ld.setTermios(ctx, io, args) + return sf.si.t.ld.setTermios(t, args) case linux.TCSETSW: // TODO(b/29356795): This should drain the output queue first. - return sf.si.t.ld.setTermios(ctx, io, args) + return sf.si.t.ld.setTermios(t, args) case linux.TIOCGPTN: - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), uint32(sf.si.t.n), usermem.IOOpts{ - AddressSpaceActive: true, - }) + nP := primitive.Uint32(sf.si.t.n) + _, err := nP.CopyOut(t, args[2].Pointer()) return 0, err case linux.TIOCGWINSZ: - return 0, sf.si.t.ld.windowSize(ctx, io, args) + return 0, sf.si.t.ld.windowSize(t, args) case linux.TIOCSWINSZ: - return 0, sf.si.t.ld.setWindowSize(ctx, io, args) + return 0, sf.si.t.ld.setWindowSize(t, args) case linux.TIOCSCTTY: // Make the given terminal the controlling terminal of the // calling process. - return 0, sf.si.t.setControllingTTY(ctx, io, args, false /* isMaster */) + return 0, sf.si.t.setControllingTTY(ctx, args, false /* isMaster */) case linux.TIOCNOTTY: // Release this process's controlling terminal. - return 0, sf.si.t.releaseControllingTTY(ctx, io, args, false /* isMaster */) + return 0, sf.si.t.releaseControllingTTY(ctx, args, false /* isMaster */) case linux.TIOCGPGRP: // Get the foreground process group. - return sf.si.t.foregroundProcessGroup(ctx, io, args, false /* isMaster */) + return sf.si.t.foregroundProcessGroup(ctx, args, false /* isMaster */) case linux.TIOCSPGRP: // Set the foreground process group. - return sf.si.t.setForegroundProcessGroup(ctx, io, args, false /* isMaster */) + return sf.si.t.setForegroundProcessGroup(ctx, args, false /* isMaster */) default: maybeEmitUnimplementedEvent(ctx, cmd) return 0, syserror.ENOTTY diff --git a/pkg/sentry/fs/tty/terminal.go b/pkg/sentry/fs/tty/terminal.go index ddcccf4da..56b59632d 100644 --- a/pkg/sentry/fs/tty/terminal.go +++ b/pkg/sentry/fs/tty/terminal.go @@ -20,7 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // LINT.IfChange @@ -64,7 +64,7 @@ func newTerminal(ctx context.Context, d *dirInodeOperations, n uint32) *Terminal // setControllingTTY makes tm the controlling terminal of the calling thread // group. -func (tm *Terminal) setControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error { +func (tm *Terminal) setControllingTTY(ctx context.Context, args arch.SyscallArguments, isMaster bool) error { task := kernel.TaskFromContext(ctx) if task == nil { panic("setControllingTTY must be called from a task context") @@ -75,7 +75,7 @@ func (tm *Terminal) setControllingTTY(ctx context.Context, io usermem.IO, args a // releaseControllingTTY removes tm as the controlling terminal of the calling // thread group. -func (tm *Terminal) releaseControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error { +func (tm *Terminal) releaseControllingTTY(ctx context.Context, args arch.SyscallArguments, isMaster bool) error { task := kernel.TaskFromContext(ctx) if task == nil { panic("releaseControllingTTY must be called from a task context") @@ -85,7 +85,7 @@ func (tm *Terminal) releaseControllingTTY(ctx context.Context, io usermem.IO, ar } // foregroundProcessGroup gets the process group ID of tm's foreground process. -func (tm *Terminal) foregroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) { +func (tm *Terminal) foregroundProcessGroup(ctx context.Context, args arch.SyscallArguments, isMaster bool) (uintptr, error) { task := kernel.TaskFromContext(ctx) if task == nil { panic("foregroundProcessGroup must be called from a task context") @@ -97,24 +97,21 @@ func (tm *Terminal) foregroundProcessGroup(ctx context.Context, io usermem.IO, a } // Write it out to *arg. - _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(ret), usermem.IOOpts{ - AddressSpaceActive: true, - }) + retP := primitive.Int32(ret) + _, err = retP.CopyOut(task, args[2].Pointer()) return 0, err } // foregroundProcessGroup sets tm's foreground process. -func (tm *Terminal) setForegroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) { +func (tm *Terminal) setForegroundProcessGroup(ctx context.Context, args arch.SyscallArguments, isMaster bool) (uintptr, error) { task := kernel.TaskFromContext(ctx) if task == nil { panic("setForegroundProcessGroup must be called from a task context") } // Read in the process group ID. - var pgid int32 - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgid, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + var pgid primitive.Int32 + if _, err := pgid.CopyIn(task, args[2].Pointer()); err != nil { return 0, err } diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD index 3f64fab3a..3e8c5e3fd 100644 --- a/pkg/sentry/fsimpl/devpts/BUILD +++ b/pkg/sentry/fsimpl/devpts/BUILD @@ -43,6 +43,8 @@ go_library( "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", + "//tools/go_marshal/marshal", + "//tools/go_marshal/primitive", ], ) diff --git a/pkg/sentry/fsimpl/devpts/line_discipline.go b/pkg/sentry/fsimpl/devpts/line_discipline.go index f7bc325d1..b954c1ba1 100644 --- a/pkg/sentry/fsimpl/devpts/line_discipline.go +++ b/pkg/sentry/fsimpl/devpts/line_discipline.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -113,27 +114,23 @@ func newLineDiscipline(termios linux.KernelTermios) *lineDiscipline { } // getTermios gets the linux.Termios for the tty. -func (l *lineDiscipline) getTermios(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (l *lineDiscipline) getTermios(task *kernel.Task, args arch.SyscallArguments) (uintptr, error) { l.termiosMu.RLock() defer l.termiosMu.RUnlock() // We must copy a Termios struct, not KernelTermios. t := l.termios.ToTermios() - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), t, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := t.CopyOut(task, args[2].Pointer()) return 0, err } // setTermios sets a linux.Termios for the tty. -func (l *lineDiscipline) setTermios(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { +func (l *lineDiscipline) setTermios(task *kernel.Task, args arch.SyscallArguments) (uintptr, error) { l.termiosMu.Lock() defer l.termiosMu.Unlock() oldCanonEnabled := l.termios.LEnabled(linux.ICANON) // We must copy a Termios struct, not KernelTermios. var t linux.Termios - _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &t, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := t.CopyIn(task, args[2].Pointer()) l.termios.FromTermios(t) // If canonical mode is turned off, move bytes from inQueue's wait @@ -150,21 +147,17 @@ func (l *lineDiscipline) setTermios(ctx context.Context, io usermem.IO, args arc return 0, err } -func (l *lineDiscipline) windowSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { +func (l *lineDiscipline) windowSize(t *kernel.Task, args arch.SyscallArguments) error { l.sizeMu.Lock() defer l.sizeMu.Unlock() - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), l.size, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := l.size.CopyOut(t, args[2].Pointer()) return err } -func (l *lineDiscipline) setWindowSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { +func (l *lineDiscipline) setWindowSize(t *kernel.Task, args arch.SyscallArguments) error { l.sizeMu.Lock() defer l.sizeMu.Unlock() - _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &l.size, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := l.size.CopyIn(t, args[2].Pointer()) return err } @@ -180,8 +173,8 @@ func (l *lineDiscipline) slaveReadiness() waiter.EventMask { return l.outQueue.writeReadiness(&l.termios) | l.inQueue.readReadiness(&l.termios) } -func (l *lineDiscipline) inputQueueReadSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { - return l.inQueue.readableSize(ctx, io, args) +func (l *lineDiscipline) inputQueueReadSize(t *kernel.Task, io usermem.IO, args arch.SyscallArguments) error { + return l.inQueue.readableSize(t, io, args) } func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) { @@ -215,8 +208,8 @@ func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequ return 0, syserror.ErrWouldBlock } -func (l *lineDiscipline) outputQueueReadSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { - return l.outQueue.readableSize(ctx, io, args) +func (l *lineDiscipline) outputQueueReadSize(t *kernel.Task, io usermem.IO, args arch.SyscallArguments) error { + return l.outQueue.readableSize(t, io, args) } func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) { diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go index 60feb1993..3422db6a4 100644 --- a/pkg/sentry/fsimpl/devpts/master.go +++ b/pkg/sentry/fsimpl/devpts/master.go @@ -20,12 +20,14 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" + "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // masterInode is the inode for the master end of the Terminal. @@ -131,46 +133,51 @@ func (mfd *masterFileDescription) Write(ctx context.Context, src usermem.IOSeque // Ioctl implements vfs.FileDescriptionImpl.Ioctl. func (mfd *masterFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + t := kernel.TaskFromContext(ctx) + if t == nil { + // ioctl(2) may only be called from a task goroutine. + return 0, syserror.ENOTTY + } + switch cmd := args[1].Uint(); cmd { case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ // Get the number of bytes in the output queue read buffer. - return 0, mfd.t.ld.outputQueueReadSize(ctx, io, args) + return 0, mfd.t.ld.outputQueueReadSize(t, io, args) case linux.TCGETS: // N.B. TCGETS on the master actually returns the configuration // of the slave end. - return mfd.t.ld.getTermios(ctx, io, args) + return mfd.t.ld.getTermios(t, args) case linux.TCSETS: // N.B. TCSETS on the master actually affects the configuration // of the slave end. - return mfd.t.ld.setTermios(ctx, io, args) + return mfd.t.ld.setTermios(t, args) case linux.TCSETSW: // TODO(b/29356795): This should drain the output queue first. - return mfd.t.ld.setTermios(ctx, io, args) + return mfd.t.ld.setTermios(t, args) case linux.TIOCGPTN: - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), uint32(mfd.t.n), usermem.IOOpts{ - AddressSpaceActive: true, - }) + nP := primitive.Uint32(mfd.t.n) + _, err := nP.CopyOut(t, args[2].Pointer()) return 0, err case linux.TIOCSPTLCK: // TODO(b/29356795): Implement pty locking. For now just pretend we do. return 0, nil case linux.TIOCGWINSZ: - return 0, mfd.t.ld.windowSize(ctx, io, args) + return 0, mfd.t.ld.windowSize(t, args) case linux.TIOCSWINSZ: - return 0, mfd.t.ld.setWindowSize(ctx, io, args) + return 0, mfd.t.ld.setWindowSize(t, args) case linux.TIOCSCTTY: // Make the given terminal the controlling terminal of the // calling process. - return 0, mfd.t.setControllingTTY(ctx, io, args, true /* isMaster */) + return 0, mfd.t.setControllingTTY(ctx, args, true /* isMaster */) case linux.TIOCNOTTY: // Release this process's controlling terminal. - return 0, mfd.t.releaseControllingTTY(ctx, io, args, true /* isMaster */) + return 0, mfd.t.releaseControllingTTY(ctx, args, true /* isMaster */) case linux.TIOCGPGRP: // Get the foreground process group. - return mfd.t.foregroundProcessGroup(ctx, io, args, true /* isMaster */) + return mfd.t.foregroundProcessGroup(ctx, args, true /* isMaster */) case linux.TIOCSPGRP: // Set the foreground process group. - return mfd.t.setForegroundProcessGroup(ctx, io, args, true /* isMaster */) + return mfd.t.setForegroundProcessGroup(ctx, args, true /* isMaster */) default: maybeEmitUnimplementedEvent(ctx, cmd) return 0, syserror.ENOTTY diff --git a/pkg/sentry/fsimpl/devpts/queue.go b/pkg/sentry/fsimpl/devpts/queue.go index 331c13997..08eca2589 100644 --- a/pkg/sentry/fsimpl/devpts/queue.go +++ b/pkg/sentry/fsimpl/devpts/queue.go @@ -19,10 +19,12 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" + "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // waitBufMaxBytes is the maximum size of a wait buffer. It is based on @@ -83,17 +85,15 @@ func (q *queue) writeReadiness(t *linux.KernelTermios) waiter.EventMask { } // readableSize writes the number of readable bytes to userspace. -func (q *queue) readableSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { +func (q *queue) readableSize(t *kernel.Task, io usermem.IO, args arch.SyscallArguments) error { q.mu.Lock() defer q.mu.Unlock() - var size int32 + size := primitive.Int32(0) if q.readable { - size = int32(len(q.readBuf)) + size = primitive.Int32(len(q.readBuf)) } - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), size, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := size.CopyOut(t, args[2].Pointer()) return err } diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go index a9da7af64..5f4b474b3 100644 --- a/pkg/sentry/fsimpl/devpts/slave.go +++ b/pkg/sentry/fsimpl/devpts/slave.go @@ -20,11 +20,13 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" + "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // slaveInode is the inode for the slave end of the Terminal. @@ -135,39 +137,44 @@ func (sfd *slaveFileDescription) Write(ctx context.Context, src usermem.IOSequen // Ioctl implements vfs.FileDescriptionImpl.Ioctl. func (sfd *slaveFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + t := kernel.TaskFromContext(ctx) + if t == nil { + // ioctl(2) may only be called from a task goroutine. + return 0, syserror.ENOTTY + } + switch cmd := args[1].Uint(); cmd { case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ // Get the number of bytes in the input queue read buffer. - return 0, sfd.inode.t.ld.inputQueueReadSize(ctx, io, args) + return 0, sfd.inode.t.ld.inputQueueReadSize(t, io, args) case linux.TCGETS: - return sfd.inode.t.ld.getTermios(ctx, io, args) + return sfd.inode.t.ld.getTermios(t, args) case linux.TCSETS: - return sfd.inode.t.ld.setTermios(ctx, io, args) + return sfd.inode.t.ld.setTermios(t, args) case linux.TCSETSW: // TODO(b/29356795): This should drain the output queue first. - return sfd.inode.t.ld.setTermios(ctx, io, args) + return sfd.inode.t.ld.setTermios(t, args) case linux.TIOCGPTN: - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), uint32(sfd.inode.t.n), usermem.IOOpts{ - AddressSpaceActive: true, - }) + nP := primitive.Uint32(sfd.inode.t.n) + _, err := nP.CopyOut(t, args[2].Pointer()) return 0, err case linux.TIOCGWINSZ: - return 0, sfd.inode.t.ld.windowSize(ctx, io, args) + return 0, sfd.inode.t.ld.windowSize(t, args) case linux.TIOCSWINSZ: - return 0, sfd.inode.t.ld.setWindowSize(ctx, io, args) + return 0, sfd.inode.t.ld.setWindowSize(t, args) case linux.TIOCSCTTY: // Make the given terminal the controlling terminal of the // calling process. - return 0, sfd.inode.t.setControllingTTY(ctx, io, args, false /* isMaster */) + return 0, sfd.inode.t.setControllingTTY(ctx, args, false /* isMaster */) case linux.TIOCNOTTY: // Release this process's controlling terminal. - return 0, sfd.inode.t.releaseControllingTTY(ctx, io, args, false /* isMaster */) + return 0, sfd.inode.t.releaseControllingTTY(ctx, args, false /* isMaster */) case linux.TIOCGPGRP: // Get the foreground process group. - return sfd.inode.t.foregroundProcessGroup(ctx, io, args, false /* isMaster */) + return sfd.inode.t.foregroundProcessGroup(ctx, args, false /* isMaster */) case linux.TIOCSPGRP: // Set the foreground process group. - return sfd.inode.t.setForegroundProcessGroup(ctx, io, args, false /* isMaster */) + return sfd.inode.t.setForegroundProcessGroup(ctx, args, false /* isMaster */) default: maybeEmitUnimplementedEvent(ctx, cmd) return 0, syserror.ENOTTY diff --git a/pkg/sentry/fsimpl/devpts/terminal.go b/pkg/sentry/fsimpl/devpts/terminal.go index 7d2781c54..e88eb6360 100644 --- a/pkg/sentry/fsimpl/devpts/terminal.go +++ b/pkg/sentry/fsimpl/devpts/terminal.go @@ -19,7 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // Terminal is a pseudoterminal. @@ -54,7 +54,7 @@ func newTerminal(n uint32) *Terminal { // setControllingTTY makes tm the controlling terminal of the calling thread // group. -func (tm *Terminal) setControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error { +func (tm *Terminal) setControllingTTY(ctx context.Context, args arch.SyscallArguments, isMaster bool) error { task := kernel.TaskFromContext(ctx) if task == nil { panic("setControllingTTY must be called from a task context") @@ -65,7 +65,7 @@ func (tm *Terminal) setControllingTTY(ctx context.Context, io usermem.IO, args a // releaseControllingTTY removes tm as the controlling terminal of the calling // thread group. -func (tm *Terminal) releaseControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error { +func (tm *Terminal) releaseControllingTTY(ctx context.Context, args arch.SyscallArguments, isMaster bool) error { task := kernel.TaskFromContext(ctx) if task == nil { panic("releaseControllingTTY must be called from a task context") @@ -75,7 +75,7 @@ func (tm *Terminal) releaseControllingTTY(ctx context.Context, io usermem.IO, ar } // foregroundProcessGroup gets the process group ID of tm's foreground process. -func (tm *Terminal) foregroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) { +func (tm *Terminal) foregroundProcessGroup(ctx context.Context, args arch.SyscallArguments, isMaster bool) (uintptr, error) { task := kernel.TaskFromContext(ctx) if task == nil { panic("foregroundProcessGroup must be called from a task context") @@ -87,24 +87,21 @@ func (tm *Terminal) foregroundProcessGroup(ctx context.Context, io usermem.IO, a } // Write it out to *arg. - _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(ret), usermem.IOOpts{ - AddressSpaceActive: true, - }) + retP := primitive.Int32(ret) + _, err = retP.CopyOut(task, args[2].Pointer()) return 0, err } // foregroundProcessGroup sets tm's foreground process. -func (tm *Terminal) setForegroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) { +func (tm *Terminal) setForegroundProcessGroup(ctx context.Context, args arch.SyscallArguments, isMaster bool) (uintptr, error) { task := kernel.TaskFromContext(ctx) if task == nil { panic("setForegroundProcessGroup must be called from a task context") } // Read in the process group ID. - var pgid int32 - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgid, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + var pgid primitive.Int32 + if _, err := pgid.CopyIn(task, args[2].Pointer()); err != nil { return 0, err } -- cgit v1.2.3 From 4332affa833c6a11326aa5db366419ba7445cdaf Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Tue, 1 Sep 2020 14:41:54 -0700 Subject: Refactor tty codebase to use master-replica terminology. Updates #2972 PiperOrigin-RevId: 329584905 --- pkg/abi/linux/dev.go | 6 +- pkg/abi/linux/tty.go | 6 +- pkg/sentry/fs/host/tty.go | 2 +- pkg/sentry/fs/tty/BUILD | 2 +- pkg/sentry/fs/tty/dir.go | 46 +++--- pkg/sentry/fs/tty/fs.go | 4 +- pkg/sentry/fs/tty/line_discipline.go | 22 +-- pkg/sentry/fs/tty/master.go | 4 +- pkg/sentry/fs/tty/queue.go | 2 +- pkg/sentry/fs/tty/replica.go | 186 +++++++++++++++++++++ pkg/sentry/fs/tty/slave.go | 185 --------------------- pkg/sentry/fs/tty/terminal.go | 18 +-- pkg/sentry/fs/tty/tty_test.go | 4 +- pkg/sentry/fsimpl/devpts/BUILD | 2 +- pkg/sentry/fsimpl/devpts/devpts.go | 34 ++-- pkg/sentry/fsimpl/devpts/devpts_test.go | 4 +- pkg/sentry/fsimpl/devpts/line_discipline.go | 22 +-- pkg/sentry/fsimpl/devpts/master.go | 4 +- pkg/sentry/fsimpl/devpts/queue.go | 2 +- pkg/sentry/fsimpl/devpts/replica.go | 205 +++++++++++++++++++++++ pkg/sentry/fsimpl/devpts/slave.go | 205 ----------------------- pkg/sentry/fsimpl/devpts/terminal.go | 16 +- pkg/sentry/fsimpl/host/host.go | 2 +- runsc/boot/fs.go | 12 +- runsc/cmd/exec.go | 4 +- runsc/console/console.go | 18 +-- runsc/container/console_test.go | 10 +- runsc/container/multi_container_test.go | 2 +- runsc/sandbox/sandbox.go | 4 +- test/syscalls/linux/pty.cc | 243 ++++++++++++++-------------- test/syscalls/linux/pty_root.cc | 4 +- test/util/pty_util.cc | 10 +- test/util/pty_util.h | 8 +- 33 files changed, 651 insertions(+), 647 deletions(-) create mode 100644 pkg/sentry/fs/tty/replica.go delete mode 100644 pkg/sentry/fs/tty/slave.go create mode 100644 pkg/sentry/fsimpl/devpts/replica.go delete mode 100644 pkg/sentry/fsimpl/devpts/slave.go (limited to 'pkg/abi') diff --git a/pkg/abi/linux/dev.go b/pkg/abi/linux/dev.go index 192e2093b..7771650b3 100644 --- a/pkg/abi/linux/dev.go +++ b/pkg/abi/linux/dev.go @@ -54,9 +54,9 @@ const ( // Unix98 PTY masters. UNIX98_PTY_MASTER_MAJOR = 128 - // UNIX98_PTY_SLAVE_MAJOR is the initial major device number for - // Unix98 PTY slaves. - UNIX98_PTY_SLAVE_MAJOR = 136 + // UNIX98_PTY_REPLICA_MAJOR is the initial major device number for + // Unix98 PTY replicas. + UNIX98_PTY_REPLICA_MAJOR = 136 ) // Minor device numbers for TTYAUX_MAJOR. diff --git a/pkg/abi/linux/tty.go b/pkg/abi/linux/tty.go index 5a5ff0aa2..47e65d9fb 100644 --- a/pkg/abi/linux/tty.go +++ b/pkg/abi/linux/tty.go @@ -325,9 +325,9 @@ var MasterTermios = KernelTermios{ OutputSpeed: 38400, } -// DefaultSlaveTermios is the default terminal configuration of the slave end -// of a Unix98 pseudoterminal. -var DefaultSlaveTermios = KernelTermios{ +// DefaultReplicaTermios is the default terminal configuration of the replica +// end of a Unix98 pseudoterminal. +var DefaultReplicaTermios = KernelTermios{ InputFlags: ICRNL | IXON, OutputFlags: OPOST | ONLCR, ControlFlags: B38400 | CS8 | CREAD, diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index 67a807f9d..87d56a51d 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -54,7 +54,7 @@ type TTYFileOperations struct { func newTTYFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags, iops *inodeOperations) *fs.File { return fs.NewFile(ctx, dirent, flags, &TTYFileOperations{ fileOperations: fileOperations{iops: iops}, - termios: linux.DefaultSlaveTermios, + termios: linux.DefaultReplicaTermios, }) } diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD index b3f5a8244..fdd5a40d5 100644 --- a/pkg/sentry/fs/tty/BUILD +++ b/pkg/sentry/fs/tty/BUILD @@ -10,7 +10,7 @@ go_library( "line_discipline.go", "master.go", "queue.go", - "slave.go", + "replica.go", "terminal.go", ], visibility = ["//pkg/sentry:internal"], diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go index 463f6189e..c2da80bc2 100644 --- a/pkg/sentry/fs/tty/dir.go +++ b/pkg/sentry/fs/tty/dir.go @@ -37,14 +37,14 @@ import ( // This indirectly manages all terminals within the mount. // // New Terminals are created by masterInodeOperations.GetFile, which registers -// the slave Inode in the this directory for discovery via Lookup/Readdir. The -// slave inode is unregistered when the master file is Released, as the slave +// the replica Inode in the this directory for discovery via Lookup/Readdir. The +// replica inode is unregistered when the master file is Released, as the replica // is no longer discoverable at that point. // // References on the underlying Terminal are held by masterFileOperations and -// slaveInodeOperations. +// replicaInodeOperations. // -// masterInodeOperations and slaveInodeOperations hold a pointer to +// masterInodeOperations and replicaInodeOperations hold a pointer to // dirInodeOperations, which is reference counted by the refcount their // corresponding Dirents hold on their parent (this directory). // @@ -76,16 +76,16 @@ type dirInodeOperations struct { // master is the master PTY inode. master *fs.Inode - // slaves contains the slave inodes reachable from the directory. + // replicas contains the replica inodes reachable from the directory. // - // A new slave is added by allocateTerminal and is removed by + // A new replica is added by allocateTerminal and is removed by // masterFileOperations.Release. // - // A reference is held on every slave in the map. - slaves map[uint32]*fs.Inode + // A reference is held on every replica in the map. + replicas map[uint32]*fs.Inode // dentryMap is a SortedDentryMap used to implement Readdir containing - // the master and all entries in slaves. + // the master and all entries in replicas. dentryMap *fs.SortedDentryMap // next is the next pty index to use. @@ -101,7 +101,7 @@ func newDir(ctx context.Context, m *fs.MountSource) *fs.Inode { d := &dirInodeOperations{ InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, fs.RootOwner, fs.FilePermsFromMode(0555), linux.DEVPTS_SUPER_MAGIC), msrc: m, - slaves: make(map[uint32]*fs.Inode), + replicas: make(map[uint32]*fs.Inode), dentryMap: fs.NewSortedDentryMap(nil), } // Linux devpts uses a default mode of 0000 for ptmx which can be @@ -133,7 +133,7 @@ func (d *dirInodeOperations) Release(ctx context.Context) { defer d.mu.Unlock() d.master.DecRef(ctx) - if len(d.slaves) != 0 { + if len(d.replicas) != 0 { panic(fmt.Sprintf("devpts directory still contains active terminals: %+v", d)) } } @@ -149,14 +149,14 @@ func (d *dirInodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name str return fs.NewDirent(ctx, d.master, name), nil } - // Slave number? + // Replica number? n, err := strconv.ParseUint(name, 10, 32) if err != nil { // Not found. return nil, syserror.ENOENT } - s, ok := d.slaves[uint32(n)] + s, ok := d.replicas[uint32(n)] if !ok { return nil, syserror.ENOENT } @@ -236,7 +236,7 @@ func (d *dirInodeOperations) allocateTerminal(ctx context.Context) (*Terminal, e return nil, syserror.ENOMEM } - if _, ok := d.slaves[n]; ok { + if _, ok := d.replicas[n]; ok { panic(fmt.Sprintf("pty index collision; index %d already exists", n)) } @@ -244,19 +244,19 @@ func (d *dirInodeOperations) allocateTerminal(ctx context.Context) (*Terminal, e d.next++ // The reference returned by newTerminal is returned to the caller. - // Take another for the slave inode. + // Take another for the replica inode. t.IncRef() // Create a pts node. The owner is based on the context that opens // ptmx. creds := auth.CredentialsFromContext(ctx) uid, gid := creds.EffectiveKUID, creds.EffectiveKGID - slave := newSlaveInode(ctx, d, t, fs.FileOwner{uid, gid}, fs.FilePermsFromMode(0666)) + replica := newReplicaInode(ctx, d, t, fs.FileOwner{uid, gid}, fs.FilePermsFromMode(0666)) - d.slaves[n] = slave + d.replicas[n] = replica d.dentryMap.Add(strconv.FormatUint(uint64(n), 10), fs.DentAttr{ - Type: slave.StableAttr.Type, - InodeID: slave.StableAttr.InodeID, + Type: replica.StableAttr.Type, + InodeID: replica.StableAttr.InodeID, }) return t, nil @@ -267,18 +267,18 @@ func (d *dirInodeOperations) masterClose(ctx context.Context, t *Terminal) { d.mu.Lock() defer d.mu.Unlock() - // The slave end disappears from the directory when the master end is - // closed, even if the slave end is open elsewhere. + // The replica end disappears from the directory when the master end is + // closed, even if the replica end is open elsewhere. // // N.B. since we're using a backdoor method to remove a directory entry // we won't properly fire inotify events like Linux would. - s, ok := d.slaves[t.n] + s, ok := d.replicas[t.n] if !ok { panic(fmt.Sprintf("Terminal %+v doesn't exist in %+v?", t, d)) } s.DecRef(ctx) - delete(d.slaves, t.n) + delete(d.replicas, t.n) d.dentryMap.Remove(strconv.FormatUint(uint64(t.n), 10)) } diff --git a/pkg/sentry/fs/tty/fs.go b/pkg/sentry/fs/tty/fs.go index 2d4d44bf3..13f4901db 100644 --- a/pkg/sentry/fs/tty/fs.go +++ b/pkg/sentry/fs/tty/fs.go @@ -79,8 +79,8 @@ type superOperations struct{} // // It always returns true, forcing a Lookup for all entries. // -// Slave entries are dropped from dir when their master is closed, so an -// existing slave Dirent in the tree is not sufficient to guarantee that it +// Replica entries are dropped from dir when their master is closed, so an +// existing replica Dirent in the tree is not sufficient to guarantee that it // still exists on the filesystem. func (superOperations) Revalidate(context.Context, string, *fs.Inode, *fs.Inode) bool { return true diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go index b6bc011a9..b34f4a0eb 100644 --- a/pkg/sentry/fs/tty/line_discipline.go +++ b/pkg/sentry/fs/tty/line_discipline.go @@ -44,7 +44,7 @@ const ( ) // lineDiscipline dictates how input and output are handled between the -// pseudoterminal (pty) master and slave. It can be configured to alter I/O, +// pseudoterminal (pty) master and replica. It can be configured to alter I/O, // modify control characters (e.g. Ctrl-C for SIGINT), etc. The following man // pages are good resources for how to affect the line discipline: // @@ -55,8 +55,8 @@ const ( // // lineDiscipline has a simple structure but supports a multitude of options // (see the above man pages). It consists of two queues of bytes: one from the -// terminal master to slave (the input queue) and one from slave to master (the -// output queue). When bytes are written to one end of the pty, the line +// terminal master to replica (the input queue) and one from replica to master +// (the output queue). When bytes are written to one end of the pty, the line // discipline reads the bytes, modifies them or takes special action if // required, and enqueues them to be read by the other end of the pty: // @@ -65,7 +65,7 @@ const ( // | (inputQueueWrite) +-------------+ (inputQueueRead) | // | | // | v -// masterFD slaveFD +// masterFD replicaFD // ^ | // | | // | output to terminal +--------------+ output from process | @@ -104,8 +104,8 @@ type lineDiscipline struct { // masterWaiter is used to wait on the master end of the TTY. masterWaiter waiter.Queue `state:"zerovalue"` - // slaveWaiter is used to wait on the slave end of the TTY. - slaveWaiter waiter.Queue `state:"zerovalue"` + // replicaWaiter is used to wait on the replica end of the TTY. + replicaWaiter waiter.Queue `state:"zerovalue"` } func newLineDiscipline(termios linux.KernelTermios) *lineDiscipline { @@ -143,7 +143,7 @@ func (l *lineDiscipline) setTermios(task *kernel.Task, args arch.SyscallArgument l.inQueue.pushWaitBufLocked(l) l.inQueue.readable = true l.inQueue.mu.Unlock() - l.slaveWaiter.Notify(waiter.EventIn) + l.replicaWaiter.Notify(waiter.EventIn) } return 0, err @@ -169,7 +169,7 @@ func (l *lineDiscipline) masterReadiness() waiter.EventMask { return l.inQueue.writeReadiness(&linux.MasterTermios) | l.outQueue.readReadiness(&linux.MasterTermios) } -func (l *lineDiscipline) slaveReadiness() waiter.EventMask { +func (l *lineDiscipline) replicaReadiness() waiter.EventMask { l.termiosMu.RLock() defer l.termiosMu.RUnlock() return l.outQueue.writeReadiness(&l.termios) | l.inQueue.readReadiness(&l.termios) @@ -189,7 +189,7 @@ func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSeque if n > 0 { l.masterWaiter.Notify(waiter.EventOut) if pushed { - l.slaveWaiter.Notify(waiter.EventIn) + l.replicaWaiter.Notify(waiter.EventIn) } return n, nil } @@ -204,7 +204,7 @@ func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequ return 0, err } if n > 0 { - l.slaveWaiter.Notify(waiter.EventIn) + l.replicaWaiter.Notify(waiter.EventIn) return n, nil } return 0, syserror.ErrWouldBlock @@ -222,7 +222,7 @@ func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequ return 0, err } if n > 0 { - l.slaveWaiter.Notify(waiter.EventOut) + l.replicaWaiter.Notify(waiter.EventOut) if pushed { l.masterWaiter.Notify(waiter.EventIn) } diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index 175457297..bebf90ffa 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -166,11 +166,11 @@ func (mf *masterFileOperations) Ioctl(ctx context.Context, _ *fs.File, io userme return 0, mf.t.ld.outputQueueReadSize(t, args) case linux.TCGETS: // N.B. TCGETS on the master actually returns the configuration - // of the slave end. + // of the replica end. return mf.t.ld.getTermios(t, args) case linux.TCSETS: // N.B. TCSETS on the master actually affects the configuration - // of the slave end. + // of the replica end. return mf.t.ld.setTermios(t, args) case linux.TCSETSW: // TODO(b/29356795): This should drain the output queue first. diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go index f0a18c75a..e070a1b71 100644 --- a/pkg/sentry/fs/tty/queue.go +++ b/pkg/sentry/fs/tty/queue.go @@ -34,7 +34,7 @@ import ( const waitBufMaxBytes = 131072 // queue represents one of the input or output queues between a pty master and -// slave. Bytes written to a queue are added to the read buffer until it is +// replica. Bytes written to a queue are added to the read buffer until it is // full, at which point they are written to the wait buffer. Bytes are // processed (i.e. undergo termios transformations) as they are added to the // read buffer. The read buffer is readable when its length is nonzero and diff --git a/pkg/sentry/fs/tty/replica.go b/pkg/sentry/fs/tty/replica.go new file mode 100644 index 000000000..cb6cd6864 --- /dev/null +++ b/pkg/sentry/fs/tty/replica.go @@ -0,0 +1,186 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tty + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" + "gvisor.dev/gvisor/tools/go_marshal/primitive" +) + +// LINT.IfChange + +// replicaInodeOperations are the fs.InodeOperations for the replica end of the +// Terminal (pts file). +// +// +stateify savable +type replicaInodeOperations struct { + fsutil.SimpleFileInode + + // d is the containing dir. + d *dirInodeOperations + + // t is the connected Terminal. + t *Terminal +} + +var _ fs.InodeOperations = (*replicaInodeOperations)(nil) + +// newReplicaInode creates an fs.Inode for the replica end of a terminal. +// +// newReplicaInode takes ownership of t. +func newReplicaInode(ctx context.Context, d *dirInodeOperations, t *Terminal, owner fs.FileOwner, p fs.FilePermissions) *fs.Inode { + iops := &replicaInodeOperations{ + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, owner, p, linux.DEVPTS_SUPER_MAGIC), + d: d, + t: t, + } + + return fs.NewInode(ctx, iops, d.msrc, fs.StableAttr{ + DeviceID: ptsDevice.DeviceID(), + // N.B. Linux always uses inode id = tty index + 3. See + // fs/devpts/inode.c:devpts_pty_new. + // + // TODO(b/75267214): Since ptsDevice must be shared between + // different mounts, we must not assign fixed numbers. + InodeID: ptsDevice.NextIno(), + Type: fs.CharacterDevice, + // See fs/devpts/inode.c:devpts_fill_super. + BlockSize: 1024, + DeviceFileMajor: linux.UNIX98_PTY_REPLICA_MAJOR, + DeviceFileMinor: t.n, + }) +} + +// Release implements fs.InodeOperations.Release. +func (si *replicaInodeOperations) Release(ctx context.Context) { + si.t.DecRef(ctx) +} + +// Truncate implements fs.InodeOperations.Truncate. +func (*replicaInodeOperations) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + +// GetFile implements fs.InodeOperations.GetFile. +// +// This may race with destruction of the terminal. If the terminal is gone, it +// returns ENOENT. +func (si *replicaInodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, d, flags, &replicaFileOperations{si: si}), nil +} + +// replicaFileOperations are the fs.FileOperations for the replica end of a +// terminal. +// +// +stateify savable +type replicaFileOperations struct { + fsutil.FilePipeSeek `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileNoFsync `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + + // si is the inode operations. + si *replicaInodeOperations +} + +var _ fs.FileOperations = (*replicaFileOperations)(nil) + +// Release implements fs.FileOperations.Release. +func (sf *replicaFileOperations) Release(context.Context) { +} + +// EventRegister implements waiter.Waitable.EventRegister. +func (sf *replicaFileOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + sf.si.t.ld.replicaWaiter.EventRegister(e, mask) +} + +// EventUnregister implements waiter.Waitable.EventUnregister. +func (sf *replicaFileOperations) EventUnregister(e *waiter.Entry) { + sf.si.t.ld.replicaWaiter.EventUnregister(e) +} + +// Readiness implements waiter.Waitable.Readiness. +func (sf *replicaFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask { + return sf.si.t.ld.replicaReadiness() +} + +// Read implements fs.FileOperations.Read. +func (sf *replicaFileOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) { + return sf.si.t.ld.inputQueueRead(ctx, dst) +} + +// Write implements fs.FileOperations.Write. +func (sf *replicaFileOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) { + return sf.si.t.ld.outputQueueWrite(ctx, src) +} + +// Ioctl implements fs.FileOperations.Ioctl. +func (sf *replicaFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + t := kernel.TaskFromContext(ctx) + if t == nil { + // ioctl(2) may only be called from a task goroutine. + return 0, syserror.ENOTTY + } + + switch cmd := args[1].Uint(); cmd { + case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ + // Get the number of bytes in the input queue read buffer. + return 0, sf.si.t.ld.inputQueueReadSize(t, args) + case linux.TCGETS: + return sf.si.t.ld.getTermios(t, args) + case linux.TCSETS: + return sf.si.t.ld.setTermios(t, args) + case linux.TCSETSW: + // TODO(b/29356795): This should drain the output queue first. + return sf.si.t.ld.setTermios(t, args) + case linux.TIOCGPTN: + nP := primitive.Uint32(sf.si.t.n) + _, err := nP.CopyOut(t, args[2].Pointer()) + return 0, err + case linux.TIOCGWINSZ: + return 0, sf.si.t.ld.windowSize(t, args) + case linux.TIOCSWINSZ: + return 0, sf.si.t.ld.setWindowSize(t, args) + case linux.TIOCSCTTY: + // Make the given terminal the controlling terminal of the + // calling process. + return 0, sf.si.t.setControllingTTY(ctx, args, false /* isMaster */) + case linux.TIOCNOTTY: + // Release this process's controlling terminal. + return 0, sf.si.t.releaseControllingTTY(ctx, args, false /* isMaster */) + case linux.TIOCGPGRP: + // Get the foreground process group. + return sf.si.t.foregroundProcessGroup(ctx, args, false /* isMaster */) + case linux.TIOCSPGRP: + // Set the foreground process group. + return sf.si.t.setForegroundProcessGroup(ctx, args, false /* isMaster */) + default: + maybeEmitUnimplementedEvent(ctx, cmd) + return 0, syserror.ENOTTY + } +} + +// LINT.ThenChange(../../fsimpl/devpts/replica.go) diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go deleted file mode 100644 index 933d2c3ff..000000000 --- a/pkg/sentry/fs/tty/slave.go +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tty - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/primitive" -) - -// LINT.IfChange - -// slaveInodeOperations are the fs.InodeOperations for the slave end of the -// Terminal (pts file). -// -// +stateify savable -type slaveInodeOperations struct { - fsutil.SimpleFileInode - - // d is the containing dir. - d *dirInodeOperations - - // t is the connected Terminal. - t *Terminal -} - -var _ fs.InodeOperations = (*slaveInodeOperations)(nil) - -// newSlaveInode creates an fs.Inode for the slave end of a terminal. -// -// newSlaveInode takes ownership of t. -func newSlaveInode(ctx context.Context, d *dirInodeOperations, t *Terminal, owner fs.FileOwner, p fs.FilePermissions) *fs.Inode { - iops := &slaveInodeOperations{ - SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, owner, p, linux.DEVPTS_SUPER_MAGIC), - d: d, - t: t, - } - - return fs.NewInode(ctx, iops, d.msrc, fs.StableAttr{ - DeviceID: ptsDevice.DeviceID(), - // N.B. Linux always uses inode id = tty index + 3. See - // fs/devpts/inode.c:devpts_pty_new. - // - // TODO(b/75267214): Since ptsDevice must be shared between - // different mounts, we must not assign fixed numbers. - InodeID: ptsDevice.NextIno(), - Type: fs.CharacterDevice, - // See fs/devpts/inode.c:devpts_fill_super. - BlockSize: 1024, - DeviceFileMajor: linux.UNIX98_PTY_SLAVE_MAJOR, - DeviceFileMinor: t.n, - }) -} - -// Release implements fs.InodeOperations.Release. -func (si *slaveInodeOperations) Release(ctx context.Context) { - si.t.DecRef(ctx) -} - -// Truncate implements fs.InodeOperations.Truncate. -func (*slaveInodeOperations) Truncate(context.Context, *fs.Inode, int64) error { - return nil -} - -// GetFile implements fs.InodeOperations.GetFile. -// -// This may race with destruction of the terminal. If the terminal is gone, it -// returns ENOENT. -func (si *slaveInodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { - return fs.NewFile(ctx, d, flags, &slaveFileOperations{si: si}), nil -} - -// slaveFileOperations are the fs.FileOperations for the slave end of a terminal. -// -// +stateify savable -type slaveFileOperations struct { - fsutil.FilePipeSeek `state:"nosave"` - fsutil.FileNotDirReaddir `state:"nosave"` - fsutil.FileNoFsync `state:"nosave"` - fsutil.FileNoMMap `state:"nosave"` - fsutil.FileNoSplice `state:"nosave"` - fsutil.FileNoopFlush `state:"nosave"` - fsutil.FileUseInodeUnstableAttr `state:"nosave"` - - // si is the inode operations. - si *slaveInodeOperations -} - -var _ fs.FileOperations = (*slaveFileOperations)(nil) - -// Release implements fs.FileOperations.Release. -func (sf *slaveFileOperations) Release(context.Context) { -} - -// EventRegister implements waiter.Waitable.EventRegister. -func (sf *slaveFileOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) { - sf.si.t.ld.slaveWaiter.EventRegister(e, mask) -} - -// EventUnregister implements waiter.Waitable.EventUnregister. -func (sf *slaveFileOperations) EventUnregister(e *waiter.Entry) { - sf.si.t.ld.slaveWaiter.EventUnregister(e) -} - -// Readiness implements waiter.Waitable.Readiness. -func (sf *slaveFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask { - return sf.si.t.ld.slaveReadiness() -} - -// Read implements fs.FileOperations.Read. -func (sf *slaveFileOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) { - return sf.si.t.ld.inputQueueRead(ctx, dst) -} - -// Write implements fs.FileOperations.Write. -func (sf *slaveFileOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) { - return sf.si.t.ld.outputQueueWrite(ctx, src) -} - -// Ioctl implements fs.FileOperations.Ioctl. -func (sf *slaveFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { - t := kernel.TaskFromContext(ctx) - if t == nil { - // ioctl(2) may only be called from a task goroutine. - return 0, syserror.ENOTTY - } - - switch cmd := args[1].Uint(); cmd { - case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ - // Get the number of bytes in the input queue read buffer. - return 0, sf.si.t.ld.inputQueueReadSize(t, args) - case linux.TCGETS: - return sf.si.t.ld.getTermios(t, args) - case linux.TCSETS: - return sf.si.t.ld.setTermios(t, args) - case linux.TCSETSW: - // TODO(b/29356795): This should drain the output queue first. - return sf.si.t.ld.setTermios(t, args) - case linux.TIOCGPTN: - nP := primitive.Uint32(sf.si.t.n) - _, err := nP.CopyOut(t, args[2].Pointer()) - return 0, err - case linux.TIOCGWINSZ: - return 0, sf.si.t.ld.windowSize(t, args) - case linux.TIOCSWINSZ: - return 0, sf.si.t.ld.setWindowSize(t, args) - case linux.TIOCSCTTY: - // Make the given terminal the controlling terminal of the - // calling process. - return 0, sf.si.t.setControllingTTY(ctx, args, false /* isMaster */) - case linux.TIOCNOTTY: - // Release this process's controlling terminal. - return 0, sf.si.t.releaseControllingTTY(ctx, args, false /* isMaster */) - case linux.TIOCGPGRP: - // Get the foreground process group. - return sf.si.t.foregroundProcessGroup(ctx, args, false /* isMaster */) - case linux.TIOCSPGRP: - // Set the foreground process group. - return sf.si.t.setForegroundProcessGroup(ctx, args, false /* isMaster */) - default: - maybeEmitUnimplementedEvent(ctx, cmd) - return 0, syserror.ENOTTY - } -} - -// LINT.ThenChange(../../fsimpl/devpts/slave.go) diff --git a/pkg/sentry/fs/tty/terminal.go b/pkg/sentry/fs/tty/terminal.go index 56b59632d..c9dbf1f3b 100644 --- a/pkg/sentry/fs/tty/terminal.go +++ b/pkg/sentry/fs/tty/terminal.go @@ -44,19 +44,19 @@ type Terminal struct { // this terminal. This field is immutable. masterKTTY *kernel.TTY - // slaveKTTY contains the controlling process of the slave end of this + // replicaKTTY contains the controlling process of the replica end of this // terminal. This field is immutable. - slaveKTTY *kernel.TTY + replicaKTTY *kernel.TTY } func newTerminal(ctx context.Context, d *dirInodeOperations, n uint32) *Terminal { - termios := linux.DefaultSlaveTermios + termios := linux.DefaultReplicaTermios t := Terminal{ - d: d, - n: n, - ld: newLineDiscipline(termios), - masterKTTY: &kernel.TTY{Index: n}, - slaveKTTY: &kernel.TTY{Index: n}, + d: d, + n: n, + ld: newLineDiscipline(termios), + masterKTTY: &kernel.TTY{Index: n}, + replicaKTTY: &kernel.TTY{Index: n}, } t.EnableLeakCheck("tty.Terminal") return &t @@ -123,7 +123,7 @@ func (tm *Terminal) tty(isMaster bool) *kernel.TTY { if isMaster { return tm.masterKTTY } - return tm.slaveKTTY + return tm.replicaKTTY } // LINT.ThenChange(../../fsimpl/devpts/terminal.go) diff --git a/pkg/sentry/fs/tty/tty_test.go b/pkg/sentry/fs/tty/tty_test.go index 2cbc05678..49edee83d 100644 --- a/pkg/sentry/fs/tty/tty_test.go +++ b/pkg/sentry/fs/tty/tty_test.go @@ -22,8 +22,8 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -func TestSimpleMasterToSlave(t *testing.T) { - ld := newLineDiscipline(linux.DefaultSlaveTermios) +func TestSimpleMasterToReplica(t *testing.T) { + ld := newLineDiscipline(linux.DefaultReplicaTermios) ctx := contexttest.Context(t) inBytes := []byte("hello, tty\n") src := usermem.BytesIOSequence(inBytes) diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD index 3e8c5e3fd..ac48ab34b 100644 --- a/pkg/sentry/fsimpl/devpts/BUILD +++ b/pkg/sentry/fsimpl/devpts/BUILD @@ -21,8 +21,8 @@ go_library( "line_discipline.go", "master.go", "queue.go", + "replica.go", "root_inode_refs.go", - "slave.go", "terminal.go", ], visibility = ["//pkg/sentry:internal"], diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go index 57580f4d4..dcf1ee25b 100644 --- a/pkg/sentry/fsimpl/devpts/devpts.go +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -79,7 +79,7 @@ func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds // Construct the root directory. This is always inode id 1. root := &rootInode{ - slaves: make(map[uint32]*slaveInode), + replicas: make(map[uint32]*replicaInode), } root.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, devMinor, 1, linux.ModeDirectory|0555) root.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) @@ -133,8 +133,8 @@ type rootInode struct { // mu protects the fields below. mu sync.Mutex - // slaves maps pty ids to slave inodes. - slaves map[uint32]*slaveInode + // replicas maps pty ids to replica inodes. + replicas map[uint32]*replicaInode // nextIdx is the next pty index to use. Must be accessed atomically. // @@ -154,22 +154,22 @@ func (i *rootInode) allocateTerminal(creds *auth.Credentials) (*Terminal, error) idx := i.nextIdx i.nextIdx++ - // Sanity check that slave with idx does not exist. - if _, ok := i.slaves[idx]; ok { + // Sanity check that replica with idx does not exist. + if _, ok := i.replicas[idx]; ok { panic(fmt.Sprintf("pty index collision; index %d already exists", idx)) } - // Create the new terminal and slave. + // Create the new terminal and replica. t := newTerminal(idx) - slave := &slaveInode{ + replica := &replicaInode{ root: i, t: t, } // Linux always uses pty index + 3 as the inode id. See // fs/devpts/inode.c:devpts_pty_new(). - slave.InodeAttrs.Init(creds, i.InodeAttrs.DevMajor(), i.InodeAttrs.DevMinor(), uint64(idx+3), linux.ModeCharacterDevice|0600) - slave.dentry.Init(slave) - i.slaves[idx] = slave + replica.InodeAttrs.Init(creds, i.InodeAttrs.DevMajor(), i.InodeAttrs.DevMinor(), uint64(idx+3), linux.ModeCharacterDevice|0600) + replica.dentry.Init(replica) + i.replicas[idx] = replica return t, nil } @@ -179,11 +179,11 @@ func (i *rootInode) masterClose(t *Terminal) { i.mu.Lock() defer i.mu.Unlock() - // Sanity check that slave with idx exists. - if _, ok := i.slaves[t.n]; !ok { + // Sanity check that replica with idx exists. + if _, ok := i.replicas[t.n]; !ok { panic(fmt.Sprintf("pty with index %d does not exist", t.n)) } - delete(i.slaves, t.n) + delete(i.replicas, t.n) } // Open implements kernfs.Inode.Open. @@ -205,7 +205,7 @@ func (i *rootInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error } i.mu.Lock() defer i.mu.Unlock() - if si, ok := i.slaves[uint32(idx)]; ok { + if si, ok := i.replicas[uint32(idx)]; ok { si.dentry.IncRef() return si.dentry.VFSDentry(), nil @@ -217,8 +217,8 @@ func (i *rootInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error func (i *rootInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) { i.mu.Lock() defer i.mu.Unlock() - ids := make([]int, 0, len(i.slaves)) - for id := range i.slaves { + ids := make([]int, 0, len(i.replicas)) + for id := range i.replicas { ids = append(ids, int(id)) } sort.Ints(ids) @@ -226,7 +226,7 @@ func (i *rootInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, dirent := vfs.Dirent{ Name: strconv.FormatUint(uint64(id), 10), Type: linux.DT_CHR, - Ino: i.slaves[uint32(id)].InodeAttrs.Ino(), + Ino: i.replicas[uint32(id)].InodeAttrs.Ino(), NextOff: offset + 1, } if err := cb.Handle(dirent); err != nil { diff --git a/pkg/sentry/fsimpl/devpts/devpts_test.go b/pkg/sentry/fsimpl/devpts/devpts_test.go index b7c149047..448390cfe 100644 --- a/pkg/sentry/fsimpl/devpts/devpts_test.go +++ b/pkg/sentry/fsimpl/devpts/devpts_test.go @@ -22,8 +22,8 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -func TestSimpleMasterToSlave(t *testing.T) { - ld := newLineDiscipline(linux.DefaultSlaveTermios) +func TestSimpleMasterToReplica(t *testing.T) { + ld := newLineDiscipline(linux.DefaultReplicaTermios) ctx := contexttest.Context(t) inBytes := []byte("hello, tty\n") src := usermem.BytesIOSequence(inBytes) diff --git a/pkg/sentry/fsimpl/devpts/line_discipline.go b/pkg/sentry/fsimpl/devpts/line_discipline.go index b954c1ba1..e6b0e81cf 100644 --- a/pkg/sentry/fsimpl/devpts/line_discipline.go +++ b/pkg/sentry/fsimpl/devpts/line_discipline.go @@ -42,7 +42,7 @@ const ( ) // lineDiscipline dictates how input and output are handled between the -// pseudoterminal (pty) master and slave. It can be configured to alter I/O, +// pseudoterminal (pty) master and replica. It can be configured to alter I/O, // modify control characters (e.g. Ctrl-C for SIGINT), etc. The following man // pages are good resources for how to affect the line discipline: // @@ -53,8 +53,8 @@ const ( // // lineDiscipline has a simple structure but supports a multitude of options // (see the above man pages). It consists of two queues of bytes: one from the -// terminal master to slave (the input queue) and one from slave to master (the -// output queue). When bytes are written to one end of the pty, the line +// terminal master to replica (the input queue) and one from replica to master +// (the output queue). When bytes are written to one end of the pty, the line // discipline reads the bytes, modifies them or takes special action if // required, and enqueues them to be read by the other end of the pty: // @@ -63,7 +63,7 @@ const ( // | (inputQueueWrite) +-------------+ (inputQueueRead) | // | | // | v -// masterFD slaveFD +// masterFD replicaFD // ^ | // | | // | output to terminal +--------------+ output from process | @@ -102,8 +102,8 @@ type lineDiscipline struct { // masterWaiter is used to wait on the master end of the TTY. masterWaiter waiter.Queue `state:"zerovalue"` - // slaveWaiter is used to wait on the slave end of the TTY. - slaveWaiter waiter.Queue `state:"zerovalue"` + // replicaWaiter is used to wait on the replica end of the TTY. + replicaWaiter waiter.Queue `state:"zerovalue"` } func newLineDiscipline(termios linux.KernelTermios) *lineDiscipline { @@ -141,7 +141,7 @@ func (l *lineDiscipline) setTermios(task *kernel.Task, args arch.SyscallArgument l.inQueue.pushWaitBufLocked(l) l.inQueue.readable = true l.inQueue.mu.Unlock() - l.slaveWaiter.Notify(waiter.EventIn) + l.replicaWaiter.Notify(waiter.EventIn) } return 0, err @@ -167,7 +167,7 @@ func (l *lineDiscipline) masterReadiness() waiter.EventMask { return l.inQueue.writeReadiness(&linux.MasterTermios) | l.outQueue.readReadiness(&linux.MasterTermios) } -func (l *lineDiscipline) slaveReadiness() waiter.EventMask { +func (l *lineDiscipline) replicaReadiness() waiter.EventMask { l.termiosMu.RLock() defer l.termiosMu.RUnlock() return l.outQueue.writeReadiness(&l.termios) | l.inQueue.readReadiness(&l.termios) @@ -187,7 +187,7 @@ func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSeque if n > 0 { l.masterWaiter.Notify(waiter.EventOut) if pushed { - l.slaveWaiter.Notify(waiter.EventIn) + l.replicaWaiter.Notify(waiter.EventIn) } return n, nil } @@ -202,7 +202,7 @@ func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequ return 0, err } if n > 0 { - l.slaveWaiter.Notify(waiter.EventIn) + l.replicaWaiter.Notify(waiter.EventIn) return n, nil } return 0, syserror.ErrWouldBlock @@ -220,7 +220,7 @@ func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequ return 0, err } if n > 0 { - l.slaveWaiter.Notify(waiter.EventOut) + l.replicaWaiter.Notify(waiter.EventOut) if pushed { l.masterWaiter.Notify(waiter.EventIn) } diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go index 3422db6a4..d07e1ded8 100644 --- a/pkg/sentry/fsimpl/devpts/master.go +++ b/pkg/sentry/fsimpl/devpts/master.go @@ -145,11 +145,11 @@ func (mfd *masterFileDescription) Ioctl(ctx context.Context, io usermem.IO, args return 0, mfd.t.ld.outputQueueReadSize(t, io, args) case linux.TCGETS: // N.B. TCGETS on the master actually returns the configuration - // of the slave end. + // of the replica end. return mfd.t.ld.getTermios(t, args) case linux.TCSETS: // N.B. TCSETS on the master actually affects the configuration - // of the slave end. + // of the replica end. return mfd.t.ld.setTermios(t, args) case linux.TCSETSW: // TODO(b/29356795): This should drain the output queue first. diff --git a/pkg/sentry/fsimpl/devpts/queue.go b/pkg/sentry/fsimpl/devpts/queue.go index 08eca2589..ca36b66e9 100644 --- a/pkg/sentry/fsimpl/devpts/queue.go +++ b/pkg/sentry/fsimpl/devpts/queue.go @@ -32,7 +32,7 @@ import ( const waitBufMaxBytes = 131072 // queue represents one of the input or output queues between a pty master and -// slave. Bytes written to a queue are added to the read buffer until it is +// replica. Bytes written to a queue are added to the read buffer until it is // full, at which point they are written to the wait buffer. Bytes are // processed (i.e. undergo termios transformations) as they are added to the // read buffer. The read buffer is readable when its length is nonzero and diff --git a/pkg/sentry/fsimpl/devpts/replica.go b/pkg/sentry/fsimpl/devpts/replica.go new file mode 100644 index 000000000..1f99f4b4d --- /dev/null +++ b/pkg/sentry/fsimpl/devpts/replica.go @@ -0,0 +1,205 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package devpts + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" + "gvisor.dev/gvisor/tools/go_marshal/primitive" +) + +// replicaInode is the inode for the replica end of the Terminal. +type replicaInode struct { + implStatFS + kernfs.InodeAttrs + kernfs.InodeNoopRefCount + kernfs.InodeNotDirectory + kernfs.InodeNotSymlink + + locks vfs.FileLocks + + // Keep a reference to this inode's dentry. + dentry kernfs.Dentry + + // root is the devpts root inode. + root *rootInode + + // t is the connected Terminal. + t *Terminal +} + +var _ kernfs.Inode = (*replicaInode)(nil) + +// Open implements kernfs.Inode.Open. +func (si *replicaInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + si.IncRef() + fd := &replicaFileDescription{ + inode: si, + } + fd.LockFD.Init(&si.locks) + if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { + si.DecRef(ctx) + return nil, err + } + return &fd.vfsfd, nil + +} + +// Valid implements kernfs.Inode.Valid. +func (si *replicaInode) Valid(context.Context) bool { + // Return valid if the replica still exists. + si.root.mu.Lock() + defer si.root.mu.Unlock() + _, ok := si.root.replicas[si.t.n] + return ok +} + +// Stat implements kernfs.Inode.Stat. +func (si *replicaInode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { + statx, err := si.InodeAttrs.Stat(ctx, vfsfs, opts) + if err != nil { + return linux.Statx{}, err + } + statx.Blksize = 1024 + statx.RdevMajor = linux.UNIX98_PTY_REPLICA_MAJOR + statx.RdevMinor = si.t.n + return statx, nil +} + +// SetStat implements kernfs.Inode.SetStat +func (si *replicaInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { + if opts.Stat.Mask&linux.STATX_SIZE != 0 { + return syserror.EINVAL + } + return si.InodeAttrs.SetStat(ctx, vfsfs, creds, opts) +} + +type replicaFileDescription struct { + vfsfd vfs.FileDescription + vfs.FileDescriptionDefaultImpl + vfs.LockFD + + inode *replicaInode +} + +var _ vfs.FileDescriptionImpl = (*replicaFileDescription)(nil) + +// Release implements fs.FileOperations.Release. +func (sfd *replicaFileDescription) Release(ctx context.Context) { + sfd.inode.DecRef(ctx) +} + +// EventRegister implements waiter.Waitable.EventRegister. +func (sfd *replicaFileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + sfd.inode.t.ld.replicaWaiter.EventRegister(e, mask) +} + +// EventUnregister implements waiter.Waitable.EventUnregister. +func (sfd *replicaFileDescription) EventUnregister(e *waiter.Entry) { + sfd.inode.t.ld.replicaWaiter.EventUnregister(e) +} + +// Readiness implements waiter.Waitable.Readiness. +func (sfd *replicaFileDescription) Readiness(mask waiter.EventMask) waiter.EventMask { + return sfd.inode.t.ld.replicaReadiness() +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (sfd *replicaFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) { + return sfd.inode.t.ld.inputQueueRead(ctx, dst) +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (sfd *replicaFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) { + return sfd.inode.t.ld.outputQueueWrite(ctx, src) +} + +// Ioctl implements vfs.FileDescriptionImpl.Ioctl. +func (sfd *replicaFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + t := kernel.TaskFromContext(ctx) + if t == nil { + // ioctl(2) may only be called from a task goroutine. + return 0, syserror.ENOTTY + } + + switch cmd := args[1].Uint(); cmd { + case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ + // Get the number of bytes in the input queue read buffer. + return 0, sfd.inode.t.ld.inputQueueReadSize(t, io, args) + case linux.TCGETS: + return sfd.inode.t.ld.getTermios(t, args) + case linux.TCSETS: + return sfd.inode.t.ld.setTermios(t, args) + case linux.TCSETSW: + // TODO(b/29356795): This should drain the output queue first. + return sfd.inode.t.ld.setTermios(t, args) + case linux.TIOCGPTN: + nP := primitive.Uint32(sfd.inode.t.n) + _, err := nP.CopyOut(t, args[2].Pointer()) + return 0, err + case linux.TIOCGWINSZ: + return 0, sfd.inode.t.ld.windowSize(t, args) + case linux.TIOCSWINSZ: + return 0, sfd.inode.t.ld.setWindowSize(t, args) + case linux.TIOCSCTTY: + // Make the given terminal the controlling terminal of the + // calling process. + return 0, sfd.inode.t.setControllingTTY(ctx, args, false /* isMaster */) + case linux.TIOCNOTTY: + // Release this process's controlling terminal. + return 0, sfd.inode.t.releaseControllingTTY(ctx, args, false /* isMaster */) + case linux.TIOCGPGRP: + // Get the foreground process group. + return sfd.inode.t.foregroundProcessGroup(ctx, args, false /* isMaster */) + case linux.TIOCSPGRP: + // Set the foreground process group. + return sfd.inode.t.setForegroundProcessGroup(ctx, args, false /* isMaster */) + default: + maybeEmitUnimplementedEvent(ctx, cmd) + return 0, syserror.ENOTTY + } +} + +// SetStat implements vfs.FileDescriptionImpl.SetStat. +func (sfd *replicaFileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { + creds := auth.CredentialsFromContext(ctx) + fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem() + return sfd.inode.SetStat(ctx, fs, creds, opts) +} + +// Stat implements vfs.FileDescriptionImpl.Stat. +func (sfd *replicaFileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { + fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem() + return sfd.inode.Stat(ctx, fs, opts) +} + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (sfd *replicaFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return sfd.Locks().LockPOSIX(ctx, &sfd.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (sfd *replicaFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return sfd.Locks().UnlockPOSIX(ctx, &sfd.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go deleted file mode 100644 index 5f4b474b3..000000000 --- a/pkg/sentry/fsimpl/devpts/slave.go +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package devpts - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/arch" - fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/primitive" -) - -// slaveInode is the inode for the slave end of the Terminal. -type slaveInode struct { - implStatFS - kernfs.InodeAttrs - kernfs.InodeNoopRefCount - kernfs.InodeNotDirectory - kernfs.InodeNotSymlink - - locks vfs.FileLocks - - // Keep a reference to this inode's dentry. - dentry kernfs.Dentry - - // root is the devpts root inode. - root *rootInode - - // t is the connected Terminal. - t *Terminal -} - -var _ kernfs.Inode = (*slaveInode)(nil) - -// Open implements kernfs.Inode.Open. -func (si *slaveInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - si.IncRef() - fd := &slaveFileDescription{ - inode: si, - } - fd.LockFD.Init(&si.locks) - if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { - si.DecRef(ctx) - return nil, err - } - return &fd.vfsfd, nil - -} - -// Valid implements kernfs.Inode.Valid. -func (si *slaveInode) Valid(context.Context) bool { - // Return valid if the slave still exists. - si.root.mu.Lock() - defer si.root.mu.Unlock() - _, ok := si.root.slaves[si.t.n] - return ok -} - -// Stat implements kernfs.Inode.Stat. -func (si *slaveInode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { - statx, err := si.InodeAttrs.Stat(ctx, vfsfs, opts) - if err != nil { - return linux.Statx{}, err - } - statx.Blksize = 1024 - statx.RdevMajor = linux.UNIX98_PTY_SLAVE_MAJOR - statx.RdevMinor = si.t.n - return statx, nil -} - -// SetStat implements kernfs.Inode.SetStat -func (si *slaveInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { - if opts.Stat.Mask&linux.STATX_SIZE != 0 { - return syserror.EINVAL - } - return si.InodeAttrs.SetStat(ctx, vfsfs, creds, opts) -} - -type slaveFileDescription struct { - vfsfd vfs.FileDescription - vfs.FileDescriptionDefaultImpl - vfs.LockFD - - inode *slaveInode -} - -var _ vfs.FileDescriptionImpl = (*slaveFileDescription)(nil) - -// Release implements fs.FileOperations.Release. -func (sfd *slaveFileDescription) Release(ctx context.Context) { - sfd.inode.DecRef(ctx) -} - -// EventRegister implements waiter.Waitable.EventRegister. -func (sfd *slaveFileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) { - sfd.inode.t.ld.slaveWaiter.EventRegister(e, mask) -} - -// EventUnregister implements waiter.Waitable.EventUnregister. -func (sfd *slaveFileDescription) EventUnregister(e *waiter.Entry) { - sfd.inode.t.ld.slaveWaiter.EventUnregister(e) -} - -// Readiness implements waiter.Waitable.Readiness. -func (sfd *slaveFileDescription) Readiness(mask waiter.EventMask) waiter.EventMask { - return sfd.inode.t.ld.slaveReadiness() -} - -// Read implements vfs.FileDescriptionImpl.Read. -func (sfd *slaveFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) { - return sfd.inode.t.ld.inputQueueRead(ctx, dst) -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (sfd *slaveFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) { - return sfd.inode.t.ld.outputQueueWrite(ctx, src) -} - -// Ioctl implements vfs.FileDescriptionImpl.Ioctl. -func (sfd *slaveFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { - t := kernel.TaskFromContext(ctx) - if t == nil { - // ioctl(2) may only be called from a task goroutine. - return 0, syserror.ENOTTY - } - - switch cmd := args[1].Uint(); cmd { - case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ - // Get the number of bytes in the input queue read buffer. - return 0, sfd.inode.t.ld.inputQueueReadSize(t, io, args) - case linux.TCGETS: - return sfd.inode.t.ld.getTermios(t, args) - case linux.TCSETS: - return sfd.inode.t.ld.setTermios(t, args) - case linux.TCSETSW: - // TODO(b/29356795): This should drain the output queue first. - return sfd.inode.t.ld.setTermios(t, args) - case linux.TIOCGPTN: - nP := primitive.Uint32(sfd.inode.t.n) - _, err := nP.CopyOut(t, args[2].Pointer()) - return 0, err - case linux.TIOCGWINSZ: - return 0, sfd.inode.t.ld.windowSize(t, args) - case linux.TIOCSWINSZ: - return 0, sfd.inode.t.ld.setWindowSize(t, args) - case linux.TIOCSCTTY: - // Make the given terminal the controlling terminal of the - // calling process. - return 0, sfd.inode.t.setControllingTTY(ctx, args, false /* isMaster */) - case linux.TIOCNOTTY: - // Release this process's controlling terminal. - return 0, sfd.inode.t.releaseControllingTTY(ctx, args, false /* isMaster */) - case linux.TIOCGPGRP: - // Get the foreground process group. - return sfd.inode.t.foregroundProcessGroup(ctx, args, false /* isMaster */) - case linux.TIOCSPGRP: - // Set the foreground process group. - return sfd.inode.t.setForegroundProcessGroup(ctx, args, false /* isMaster */) - default: - maybeEmitUnimplementedEvent(ctx, cmd) - return 0, syserror.ENOTTY - } -} - -// SetStat implements vfs.FileDescriptionImpl.SetStat. -func (sfd *slaveFileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { - creds := auth.CredentialsFromContext(ctx) - fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem() - return sfd.inode.SetStat(ctx, fs, creds, opts) -} - -// Stat implements vfs.FileDescriptionImpl.Stat. -func (sfd *slaveFileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { - fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem() - return sfd.inode.Stat(ctx, fs, opts) -} - -// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. -func (sfd *slaveFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { - return sfd.Locks().LockPOSIX(ctx, &sfd.vfsfd, uid, t, start, length, whence, block) -} - -// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. -func (sfd *slaveFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { - return sfd.Locks().UnlockPOSIX(ctx, &sfd.vfsfd, uid, start, length, whence) -} diff --git a/pkg/sentry/fsimpl/devpts/terminal.go b/pkg/sentry/fsimpl/devpts/terminal.go index e88eb6360..731955d62 100644 --- a/pkg/sentry/fsimpl/devpts/terminal.go +++ b/pkg/sentry/fsimpl/devpts/terminal.go @@ -36,18 +36,18 @@ type Terminal struct { // this terminal. This field is immutable. masterKTTY *kernel.TTY - // slaveKTTY contains the controlling process of the slave end of this + // replicaKTTY contains the controlling process of the replica end of this // terminal. This field is immutable. - slaveKTTY *kernel.TTY + replicaKTTY *kernel.TTY } func newTerminal(n uint32) *Terminal { - termios := linux.DefaultSlaveTermios + termios := linux.DefaultReplicaTermios t := Terminal{ - n: n, - ld: newLineDiscipline(termios), - masterKTTY: &kernel.TTY{Index: n}, - slaveKTTY: &kernel.TTY{Index: n}, + n: n, + ld: newLineDiscipline(termios), + masterKTTY: &kernel.TTY{Index: n}, + replicaKTTY: &kernel.TTY{Index: n}, } return &t } @@ -113,5 +113,5 @@ func (tm *Terminal) tty(isMaster bool) *kernel.TTY { if isMaster { return tm.masterKTTY } - return tm.slaveKTTY + return tm.replicaKTTY } diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 7561f821c..1bd0e4ee8 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -496,7 +496,7 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u if i.isTTY { fd := &TTYFileDescription{ fileDescription: fileDescription{inode: i}, - termios: linux.DefaultSlaveTermios, + termios: linux.DefaultReplicaTermios, } fd.LockFD.Init(&i.locks) vfsfd := &fd.vfsfd diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 163265afe..ea0461a3d 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -453,17 +453,17 @@ func (m *mountHint) isSupported() bool { func (m *mountHint) checkCompatible(mount specs.Mount) error { // Remove options that don't affect to mount's behavior. masterOpts := filterUnsupportedOptions(m.mount) - slaveOpts := filterUnsupportedOptions(mount) + replicaOpts := filterUnsupportedOptions(mount) - if len(masterOpts) != len(slaveOpts) { - return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, slaveOpts) + if len(masterOpts) != len(replicaOpts) { + return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, replicaOpts) } sort.Strings(masterOpts) - sort.Strings(slaveOpts) + sort.Strings(replicaOpts) for i, opt := range masterOpts { - if opt != slaveOpts[i] { - return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, slaveOpts) + if opt != replicaOpts[i] { + return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, replicaOpts) } } return nil diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go index 600876a27..775ed4b43 100644 --- a/runsc/cmd/exec.go +++ b/runsc/cmd/exec.go @@ -220,7 +220,7 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi cmd.Stderr = os.Stderr // If the console control socket file is provided, then create a new - // pty master/slave pair and set the TTY on the sandbox process. + // pty master/replica pair and set the TTY on the sandbox process. if ex.consoleSocket != "" { // Create a new TTY pair and send the master on the provided socket. tty, err := console.NewWithSocket(ex.consoleSocket) @@ -229,7 +229,7 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi } defer tty.Close() - // Set stdio to the new TTY slave. + // Set stdio to the new TTY replica. cmd.Stdin = tty cmd.Stdout = tty cmd.Stderr = tty diff --git a/runsc/console/console.go b/runsc/console/console.go index 64b23639a..dbb88e117 100644 --- a/runsc/console/console.go +++ b/runsc/console/console.go @@ -24,11 +24,11 @@ import ( "golang.org/x/sys/unix" ) -// NewWithSocket creates pty master/slave pair, sends the master FD over the given -// socket, and returns the slave. +// NewWithSocket creates pty master/replica pair, sends the master FD over the given +// socket, and returns the replica. func NewWithSocket(socketPath string) (*os.File, error) { - // Create a new pty master and slave. - ptyMaster, ptySlave, err := pty.Open() + // Create a new pty master and replica. + ptyMaster, ptyReplica, err := pty.Open() if err != nil { return nil, fmt.Errorf("opening pty: %v", err) } @@ -37,18 +37,18 @@ func NewWithSocket(socketPath string) (*os.File, error) { // Get a connection to the socket path. conn, err := net.Dial("unix", socketPath) if err != nil { - ptySlave.Close() + ptyReplica.Close() return nil, fmt.Errorf("dialing socket %q: %v", socketPath, err) } defer conn.Close() uc, ok := conn.(*net.UnixConn) if !ok { - ptySlave.Close() + ptyReplica.Close() return nil, fmt.Errorf("connection is not a UnixConn: %T", conn) } socket, err := uc.File() if err != nil { - ptySlave.Close() + ptyReplica.Close() return nil, fmt.Errorf("getting file for unix socket %v: %v", uc, err) } defer socket.Close() @@ -56,8 +56,8 @@ func NewWithSocket(socketPath string) (*os.File, error) { // Send the master FD over the connection. msg := unix.UnixRights(int(ptyMaster.Fd())) if err := unix.Sendmsg(int(socket.Fd()), []byte("pty-master"), msg, nil, 0); err != nil { - ptySlave.Close() + ptyReplica.Close() return nil, fmt.Errorf("sending console over unix socket %q: %v", socketPath, err) } - return ptySlave, nil + return ptyReplica, nil } diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index 995d4e267..4228399b8 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -185,14 +185,14 @@ func TestJobControlSignalExec(t *testing.T) { t.Fatalf("error starting container: %v", err) } - // Create a pty master/slave. The slave will be passed to the exec + // Create a pty master/replica. The replica will be passed to the exec // process. - ptyMaster, ptySlave, err := pty.Open() + ptyMaster, ptyReplica, err := pty.Open() if err != nil { t.Fatalf("error opening pty: %v", err) } defer ptyMaster.Close() - defer ptySlave.Close() + defer ptyReplica.Close() // Exec bash and attach a terminal. Note that occasionally /bin/sh // may be a different shell or have a different configuration (such @@ -203,9 +203,9 @@ func TestJobControlSignalExec(t *testing.T) { // Don't let bash execute from profile or rc files, otherwise // our PID counts get messed up. Argv: []string{"/bin/bash", "--noprofile", "--norc"}, - // Pass the pty slave as FD 0, 1, and 2. + // Pass the pty replica as FD 0, 1, and 2. FilePayload: urpc.FilePayload{ - Files: []*os.File{ptySlave, ptySlave, ptySlave}, + Files: []*os.File{ptyReplica, ptyReplica, ptyReplica}, }, StdioIsPty: true, } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 1beea123f..da1694280 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -1360,7 +1360,7 @@ func TestMultiContainerSharedMountRestart(t *testing.T) { } // Test that unsupported pod mounts options are ignored when matching master and -// slave mounts. +// replica mounts. func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { for name, conf := range configsWithVFS2(t, all...) { t.Run(name, func(t *testing.T) { diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index a339937fb..a8f4f64a5 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -478,10 +478,10 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn cmd.Stderr = nil // If the console control socket file is provided, then create a new - // pty master/slave pair and set the TTY on the sandbox process. + // pty master/replica pair and set the TTY on the sandbox process. if args.Spec.Process.Terminal && args.ConsoleSocket != "" { // console.NewWithSocket will send the master on the given - // socket, and return the slave. + // socket, and return the replica. tty, err := console.NewWithSocket(args.ConsoleSocket) if err != nil { return fmt.Errorf("setting up console with socket %q: %v", args.ConsoleSocket, err) diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc index 2e4ab6ca8..0b174e2be 100644 --- a/test/syscalls/linux/pty.cc +++ b/test/syscalls/linux/pty.cc @@ -71,7 +71,7 @@ constexpr absl::Duration kTimeout = absl::Seconds(20); // The maximum line size in bytes returned per read from a pty file. constexpr int kMaxLineSize = 4096; -constexpr char kMainPath[] = "/dev/ptmx"; +constexpr char kMasterPath[] = "/dev/ptmx"; // glibc defines its own, different, version of struct termios. We care about // what the kernel does, not glibc. @@ -388,22 +388,22 @@ PosixErrorOr PollAndReadFd(int fd, void* buf, size_t count, TEST(PtyTrunc, Truncate) { // Opening PTYs with O_TRUNC shouldn't cause an error, but calls to // (f)truncate should. - FileDescriptor main = - ASSERT_NO_ERRNO_AND_VALUE(Open(kMainPath, O_RDWR | O_TRUNC)); - int n = ASSERT_NO_ERRNO_AND_VALUE(ReplicaID(main)); + FileDescriptor master = + ASSERT_NO_ERRNO_AND_VALUE(Open(kMasterPath, O_RDWR | O_TRUNC)); + int n = ASSERT_NO_ERRNO_AND_VALUE(ReplicaID(master)); std::string spath = absl::StrCat("/dev/pts/", n); FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(Open(spath, O_RDWR | O_NONBLOCK | O_TRUNC)); - EXPECT_THAT(truncate(kMainPath, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(truncate(kMasterPath, 0), SyscallFailsWithErrno(EINVAL)); EXPECT_THAT(truncate(spath.c_str(), 0), SyscallFailsWithErrno(EINVAL)); - EXPECT_THAT(ftruncate(main.get(), 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(ftruncate(master.get(), 0), SyscallFailsWithErrno(EINVAL)); EXPECT_THAT(ftruncate(replica.get(), 0), SyscallFailsWithErrno(EINVAL)); } -TEST(BasicPtyTest, StatUnopenedMain) { +TEST(BasicPtyTest, StatUnopenedMaster) { struct stat s; - ASSERT_THAT(stat(kMainPath, &s), SyscallSucceeds()); + ASSERT_THAT(stat(kMasterPath, &s), SyscallSucceeds()); EXPECT_EQ(s.st_rdev, makedev(TTYAUX_MAJOR, kPtmxMinor)); EXPECT_EQ(s.st_size, 0); @@ -454,41 +454,41 @@ void ExpectReadable(const FileDescriptor& fd, int expected, char* buf) { EXPECT_EQ(expected, n); } -TEST(BasicPtyTest, OpenMainReplica) { - FileDescriptor main = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); - FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main)); +TEST(BasicPtyTest, OpenMasterReplica) { + FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); + FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master)); } -// The replica entry in /dev/pts/ disappears when the main is closed, even if +// The replica entry in /dev/pts/ disappears when the master is closed, even if // the replica is still open. -TEST(BasicPtyTest, ReplicaEntryGoneAfterMainClose) { - FileDescriptor main = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); - FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main)); +TEST(BasicPtyTest, ReplicaEntryGoneAfterMasterClose) { + FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); + FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master)); // Get pty index. int index = -1; - ASSERT_THAT(ioctl(main.get(), TIOCGPTN, &index), SyscallSucceeds()); + ASSERT_THAT(ioctl(master.get(), TIOCGPTN, &index), SyscallSucceeds()); std::string path = absl::StrCat("/dev/pts/", index); struct stat st; EXPECT_THAT(stat(path.c_str(), &st), SyscallSucceeds()); - main.reset(); + master.reset(); EXPECT_THAT(stat(path.c_str(), &st), SyscallFailsWithErrno(ENOENT)); } TEST(BasicPtyTest, Getdents) { - FileDescriptor main1 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); + FileDescriptor master1 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); int index1 = -1; - ASSERT_THAT(ioctl(main1.get(), TIOCGPTN, &index1), SyscallSucceeds()); - FileDescriptor replica1 = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main1)); + ASSERT_THAT(ioctl(master1.get(), TIOCGPTN, &index1), SyscallSucceeds()); + FileDescriptor replica1 = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master1)); - FileDescriptor main2 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); + FileDescriptor master2 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); int index2 = -1; - ASSERT_THAT(ioctl(main2.get(), TIOCGPTN, &index2), SyscallSucceeds()); - FileDescriptor replica2 = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main2)); + ASSERT_THAT(ioctl(master2.get(), TIOCGPTN, &index2), SyscallSucceeds()); + FileDescriptor replica2 = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master2)); // The directory contains ptmx, index1, and index2. (Plus any additional PTYs // unrelated to this test.) @@ -498,9 +498,9 @@ TEST(BasicPtyTest, Getdents) { EXPECT_THAT(contents, Contains(absl::StrCat(index1))); EXPECT_THAT(contents, Contains(absl::StrCat(index2))); - main2.reset(); + master2.reset(); - // The directory contains ptmx and index1, but not index2 since the main is + // The directory contains ptmx and index1, but not index2 since the master is // closed. (Plus any additional PTYs unrelated to this test.) contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev/pts/", true)); @@ -519,8 +519,8 @@ TEST(BasicPtyTest, Getdents) { class PtyTest : public ::testing::Test { protected: void SetUp() override { - main_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK)); - replica_ = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main_)); + master_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK)); + replica_ = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master_)); } void DisableCanonical() { @@ -537,21 +537,22 @@ class PtyTest : public ::testing::Test { EXPECT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds()); } - // Main and replica ends of the PTY. Non-blocking. - FileDescriptor main_; + // Master and replica ends of the PTY. Non-blocking. + FileDescriptor master_; FileDescriptor replica_; }; -// Main to replica sanity test. -TEST_F(PtyTest, WriteMainToReplica) { - // N.B. by default, the replica reads nothing until the main writes a newline. +// Master to replica sanity test. +TEST_F(PtyTest, WriteMasterToReplica) { + // N.B. by default, the replica reads nothing until the master writes a + // newline. constexpr char kBuf[] = "hello\n"; - EXPECT_THAT(WriteFd(main_.get(), kBuf, sizeof(kBuf) - 1), + EXPECT_THAT(WriteFd(master_.get(), kBuf, sizeof(kBuf) - 1), SyscallSucceedsWithValue(sizeof(kBuf) - 1)); - // Linux moves data from the main to the replica via async work scheduled via - // tty_flip_buffer_push. Since it is asynchronous, the data may not be + // Linux moves data from the master to the replica via async work scheduled + // via tty_flip_buffer_push. Since it is asynchronous, the data may not be // available for reading immediately. Instead we must poll and assert that it // becomes available "soon". @@ -561,63 +562,63 @@ TEST_F(PtyTest, WriteMainToReplica) { EXPECT_EQ(memcmp(buf, kBuf, sizeof(kBuf)), 0); } -// Replica to main sanity test. -TEST_F(PtyTest, WriteReplicaToMain) { - // N.B. by default, the main reads nothing until the replica writes a newline, - // and the main gets a carriage return. +// Replica to master sanity test. +TEST_F(PtyTest, WriteReplicaToMaster) { + // N.B. by default, the master reads nothing until the replica writes a + // newline, and the master gets a carriage return. constexpr char kInput[] = "hello\n"; constexpr char kExpected[] = "hello\r\n"; EXPECT_THAT(WriteFd(replica_.get(), kInput, sizeof(kInput) - 1), SyscallSucceedsWithValue(sizeof(kInput) - 1)); - // Linux moves data from the main to the replica via async work scheduled via - // tty_flip_buffer_push. Since it is asynchronous, the data may not be + // Linux moves data from the master to the replica via async work scheduled + // via tty_flip_buffer_push. Since it is asynchronous, the data may not be // available for reading immediately. Instead we must poll and assert that it // becomes available "soon". char buf[sizeof(kExpected)] = {}; - ExpectReadable(main_, sizeof(buf) - 1, buf); + ExpectReadable(master_, sizeof(buf) - 1, buf); EXPECT_EQ(memcmp(buf, kExpected, sizeof(kExpected)), 0); } TEST_F(PtyTest, WriteInvalidUTF8) { char c = 0xff; - ASSERT_THAT(syscall(__NR_write, main_.get(), &c, sizeof(c)), + ASSERT_THAT(syscall(__NR_write, master_.get(), &c, sizeof(c)), SyscallSucceedsWithValue(sizeof(c))); } -// Both the main and replica report the standard default termios settings. +// Both the master and replica report the standard default termios settings. // -// Note that TCGETS on the main actually redirects to the replica (see comment -// on MainTermiosUnchangable). +// Note that TCGETS on the master actually redirects to the replica (see comment +// on MasterTermiosUnchangable). TEST_F(PtyTest, DefaultTermios) { struct kernel_termios t = {}; EXPECT_THAT(ioctl(replica_.get(), TCGETS, &t), SyscallSucceeds()); EXPECT_EQ(t, DefaultTermios()); - EXPECT_THAT(ioctl(main_.get(), TCGETS, &t), SyscallSucceeds()); + EXPECT_THAT(ioctl(master_.get(), TCGETS, &t), SyscallSucceeds()); EXPECT_EQ(t, DefaultTermios()); } -// Changing termios from the main actually affects the replica. +// Changing termios from the master actually affects the replica. // -// TCSETS on the main actually redirects to the replica (see comment on -// MainTermiosUnchangable). +// TCSETS on the master actually redirects to the replica (see comment on +// MasterTermiosUnchangable). TEST_F(PtyTest, TermiosAffectsReplica) { - struct kernel_termios main_termios = {}; - EXPECT_THAT(ioctl(main_.get(), TCGETS, &main_termios), SyscallSucceeds()); - main_termios.c_lflag ^= ICANON; - EXPECT_THAT(ioctl(main_.get(), TCSETS, &main_termios), SyscallSucceeds()); + struct kernel_termios master_termios = {}; + EXPECT_THAT(ioctl(master_.get(), TCGETS, &master_termios), SyscallSucceeds()); + master_termios.c_lflag ^= ICANON; + EXPECT_THAT(ioctl(master_.get(), TCSETS, &master_termios), SyscallSucceeds()); struct kernel_termios replica_termios = {}; EXPECT_THAT(ioctl(replica_.get(), TCGETS, &replica_termios), SyscallSucceeds()); - EXPECT_EQ(main_termios, replica_termios); + EXPECT_EQ(master_termios, replica_termios); } -// The main end of the pty has termios: +// The master end of the pty has termios: // // struct kernel_termios t = { // .c_iflag = 0; @@ -629,25 +630,25 @@ TEST_F(PtyTest, TermiosAffectsReplica) { // // (From drivers/tty/pty.c:unix98_pty_init) // -// All termios control ioctls on the main actually redirect to the replica +// All termios control ioctls on the master actually redirect to the replica // (drivers/tty/tty_ioctl.c:tty_mode_ioctl), making it impossible to change the -// main termios. +// master termios. // // Verify this by setting ICRNL (which rewrites input \r to \n) and verify that -// it has no effect on the main. -TEST_F(PtyTest, MainTermiosUnchangable) { - struct kernel_termios main_termios = {}; - EXPECT_THAT(ioctl(main_.get(), TCGETS, &main_termios), SyscallSucceeds()); - main_termios.c_lflag |= ICRNL; - EXPECT_THAT(ioctl(main_.get(), TCSETS, &main_termios), SyscallSucceeds()); +// it has no effect on the master. +TEST_F(PtyTest, MasterTermiosUnchangable) { + struct kernel_termios master_termios = {}; + EXPECT_THAT(ioctl(master_.get(), TCGETS, &master_termios), SyscallSucceeds()); + master_termios.c_lflag |= ICRNL; + EXPECT_THAT(ioctl(master_.get(), TCSETS, &master_termios), SyscallSucceeds()); char c = '\r'; ASSERT_THAT(WriteFd(replica_.get(), &c, 1), SyscallSucceedsWithValue(1)); - ExpectReadable(main_, 1, &c); + ExpectReadable(master_, 1, &c); EXPECT_EQ(c, '\r'); // ICRNL had no effect! - ExpectFinished(main_); + ExpectFinished(master_); } // ICRNL rewrites input \r to \n. @@ -658,7 +659,7 @@ TEST_F(PtyTest, TermiosICRNL) { ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds()); char c = '\r'; - ASSERT_THAT(WriteFd(main_.get(), &c, 1), SyscallSucceedsWithValue(1)); + ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1)); ExpectReadable(replica_, 1, &c); EXPECT_EQ(c, '\n'); @@ -678,7 +679,7 @@ TEST_F(PtyTest, TermiosONLCR) { // Extra byte for NUL for EXPECT_STREQ. char buf[3] = {}; - ExpectReadable(main_, 2, buf); + ExpectReadable(master_, 2, buf); EXPECT_STREQ(buf, "\r\n"); ExpectFinished(replica_); @@ -691,7 +692,7 @@ TEST_F(PtyTest, TermiosIGNCR) { ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds()); char c = '\r'; - ASSERT_THAT(WriteFd(main_.get(), &c, 1), SyscallSucceedsWithValue(1)); + ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1)); // Nothing to read. ASSERT_THAT(PollAndReadFd(replica_.get(), &c, 1, kTimeout), @@ -725,18 +726,18 @@ TEST_F(PtyTest, TermiosPollReplica) { absl::SleepFor(absl::Seconds(1)); char s[] = "foo\n"; - ASSERT_THAT(WriteFd(main_.get(), s, strlen(s) + 1), SyscallSucceeds()); + ASSERT_THAT(WriteFd(master_.get(), s, strlen(s) + 1), SyscallSucceeds()); } -// Test that we can successfully poll for readable data from the main. -TEST_F(PtyTest, TermiosPollMain) { +// Test that we can successfully poll for readable data from the master. +TEST_F(PtyTest, TermiosPollMaster) { struct kernel_termios t = DefaultTermios(); t.c_iflag |= IGNCR; t.c_lflag &= ~ICANON; // for byte-by-byte reading. - ASSERT_THAT(ioctl(main_.get(), TCSETS, &t), SyscallSucceeds()); + ASSERT_THAT(ioctl(master_.get(), TCSETS, &t), SyscallSucceeds()); absl::Notification notify; - int mfd = main_.get(); + int mfd = master_.get(); ScopedThread th([mfd, ¬ify]() { notify.Notify(); @@ -765,7 +766,7 @@ TEST_F(PtyTest, TermiosINLCR) { ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds()); char c = '\n'; - ASSERT_THAT(WriteFd(main_.get(), &c, 1), SyscallSucceedsWithValue(1)); + ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1)); ExpectReadable(replica_, 1, &c); EXPECT_EQ(c, '\r'); @@ -784,7 +785,7 @@ TEST_F(PtyTest, TermiosONOCR) { ASSERT_THAT(WriteFd(replica_.get(), &c, 1), SyscallSucceedsWithValue(1)); // Nothing to read. - ASSERT_THAT(PollAndReadFd(main_.get(), &c, 1, kTimeout), + ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout), PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); // This time the column is greater than 0, so we should be able to read the CR @@ -795,17 +796,17 @@ TEST_F(PtyTest, TermiosONOCR) { SyscallSucceedsWithValue(kInputSize)); char buf[kInputSize] = {}; - ExpectReadable(main_, kInputSize, buf); + ExpectReadable(master_, kInputSize, buf); EXPECT_EQ(memcmp(buf, kInput, kInputSize), 0); - ExpectFinished(main_); + ExpectFinished(master_); // Terminal should be at column 0 again, so no CR can be read. ASSERT_THAT(WriteFd(replica_.get(), &c, 1), SyscallSucceedsWithValue(1)); // Nothing to read. - ASSERT_THAT(PollAndReadFd(main_.get(), &c, 1, kTimeout), + ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout), PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); } @@ -819,10 +820,10 @@ TEST_F(PtyTest, TermiosOCRNL) { char c = '\r'; ASSERT_THAT(WriteFd(replica_.get(), &c, 1), SyscallSucceedsWithValue(1)); - ExpectReadable(main_, 1, &c); + ExpectReadable(master_, 1, &c); EXPECT_EQ(c, '\n'); - ExpectFinished(main_); + ExpectFinished(master_); } // Tests that VEOL is disabled when we start, and that we can set it to enable @@ -830,7 +831,7 @@ TEST_F(PtyTest, TermiosOCRNL) { TEST_F(PtyTest, VEOLTermination) { // Write a few bytes ending with '\0', and confirm that we can't read. constexpr char kInput[] = "hello"; - ASSERT_THAT(WriteFd(main_.get(), kInput, sizeof(kInput)), + ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)), SyscallSucceedsWithValue(sizeof(kInput))); char buf[sizeof(kInput)] = {}; ASSERT_THAT(PollAndReadFd(replica_.get(), buf, sizeof(kInput), kTimeout), @@ -841,7 +842,7 @@ TEST_F(PtyTest, VEOLTermination) { struct kernel_termios t = DefaultTermios(); t.c_cc[VEOL] = delim; ASSERT_THAT(ioctl(replica_.get(), TCSETS, &t), SyscallSucceeds()); - ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); // Now we can read, as sending EOL caused the line to become available. ExpectReadable(replica_, sizeof(kInput), buf); @@ -861,7 +862,7 @@ TEST_F(PtyTest, CanonBigWrite) { char input[kWriteLen]; memset(input, 'M', kWriteLen - 1); input[kWriteLen - 1] = '\n'; - ASSERT_THAT(WriteFd(main_.get(), input, kWriteLen), + ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen), SyscallSucceedsWithValue(kWriteLen)); // We can read the line. @@ -877,7 +878,7 @@ TEST_F(PtyTest, SwitchCanonToNoncanon) { // Write a few bytes without a terminating character, switch to noncanonical // mode, and read them. constexpr char kInput[] = "hello"; - ASSERT_THAT(WriteFd(main_.get(), kInput, sizeof(kInput)), + ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)), SyscallSucceedsWithValue(sizeof(kInput))); // Nothing available yet. @@ -896,7 +897,7 @@ TEST_F(PtyTest, SwitchCanonToNoncanon) { TEST_F(PtyTest, SwitchCanonToNonCanonNewline) { // Write a few bytes with a terminating character. constexpr char kInput[] = "hello\n"; - ASSERT_THAT(WriteFd(main_.get(), kInput, sizeof(kInput)), + ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)), SyscallSucceedsWithValue(sizeof(kInput))); DisableCanonical(); @@ -916,12 +917,12 @@ TEST_F(PtyTest, SwitchNoncanonToCanonNewlineBig) { constexpr int kWriteLen = 4100; char input[kWriteLen]; memset(input, 'M', kWriteLen); - ASSERT_THAT(WriteFd(main_.get(), input, kWriteLen), + ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen), SyscallSucceedsWithValue(kWriteLen)); // Wait for the input queue to fill. ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), kMaxLineSize - 1)); constexpr char delim = '\n'; - ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); EnableCanonical(); @@ -941,7 +942,7 @@ TEST_F(PtyTest, SwitchNoncanonToCanonNoNewline) { // Write a few bytes without a terminating character. // mode, and read them. constexpr char kInput[] = "hello"; - ASSERT_THAT(WriteFd(main_.get(), kInput, sizeof(kInput) - 1), + ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput) - 1), SyscallSucceedsWithValue(sizeof(kInput) - 1)); ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), sizeof(kInput) - 1)); @@ -963,7 +964,7 @@ TEST_F(PtyTest, SwitchNoncanonToCanonNoNewlineBig) { constexpr int kWriteLen = 4100; char input[kWriteLen]; memset(input, 'M', kWriteLen); - ASSERT_THAT(WriteFd(main_.get(), input, kWriteLen), + ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen), SyscallSucceedsWithValue(kWriteLen)); ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), kMaxLineSize - 1)); @@ -987,12 +988,12 @@ TEST_F(PtyTest, NoncanonBigWrite) { for (int i = 0; i < kInputSize; i++) { // This makes too many syscalls for save/restore. const DisableSave ds; - ASSERT_THAT(WriteFd(main_.get(), &kInput, sizeof(kInput)), + ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)), SyscallSucceedsWithValue(sizeof(kInput))); } // We should be able to read out everything. Sleep a bit so that Linux has a - // chance to move data from the main to the replica. + // chance to move data from the master to the replica. ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), kMaxLineSize - 1)); for (int i = 0; i < kInputSize; i++) { // This makes too many syscalls for save/restore. @@ -1010,7 +1011,7 @@ TEST_F(PtyTest, NoncanonBigWrite) { // Test newline. TEST_F(PtyTest, TermiosICANONNewline) { char input[3] = {'a', 'b', 'c'}; - ASSERT_THAT(WriteFd(main_.get(), input, sizeof(input)), + ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)), SyscallSucceedsWithValue(sizeof(input))); // Extra bytes for newline (written later) and NUL for EXPECT_STREQ. @@ -1021,7 +1022,7 @@ TEST_F(PtyTest, TermiosICANONNewline) { PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); char delim = '\n'; - ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); // Now it is available. ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), sizeof(input) + 1)); @@ -1036,7 +1037,7 @@ TEST_F(PtyTest, TermiosICANONNewline) { // Test EOF (^D). TEST_F(PtyTest, TermiosICANONEOF) { char input[3] = {'a', 'b', 'c'}; - ASSERT_THAT(WriteFd(main_.get(), input, sizeof(input)), + ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)), SyscallSucceedsWithValue(sizeof(input))); // Extra byte for NUL for EXPECT_STREQ. @@ -1046,7 +1047,7 @@ TEST_F(PtyTest, TermiosICANONEOF) { ASSERT_THAT(PollAndReadFd(replica_.get(), buf, sizeof(input), kTimeout), PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); char delim = ControlCharacter('D'); - ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); // Now it is available. Note that ^D is not included. ExpectReadable(replica_, sizeof(input), buf); @@ -1069,10 +1070,10 @@ TEST_F(PtyTest, CanonDiscard) { // This makes too many syscalls for save/restore. const DisableSave ds; for (int i = 0; i < kInputSize; i++) { - ASSERT_THAT(WriteFd(main_.get(), &kInput, sizeof(kInput)), + ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)), SyscallSucceedsWithValue(sizeof(kInput))); } - ASSERT_THAT(WriteFd(main_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); } // There should be multiple truncated lines available to read. @@ -1091,9 +1092,9 @@ TEST_F(PtyTest, CanonMultiline) { constexpr char kInput2[] = "BLUE\n"; // Write both lines. - ASSERT_THAT(WriteFd(main_.get(), kInput1, sizeof(kInput1) - 1), + ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1), SyscallSucceedsWithValue(sizeof(kInput1) - 1)); - ASSERT_THAT(WriteFd(main_.get(), kInput2, sizeof(kInput2) - 1), + ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1), SyscallSucceedsWithValue(sizeof(kInput2) - 1)); // Get the first line. @@ -1117,9 +1118,9 @@ TEST_F(PtyTest, SwitchNoncanonToCanonMultiline) { constexpr char kExpected[] = "GO\nBLUE\n"; // Write both lines. - ASSERT_THAT(WriteFd(main_.get(), kInput1, sizeof(kInput1) - 1), + ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1), SyscallSucceedsWithValue(sizeof(kInput1) - 1)); - ASSERT_THAT(WriteFd(main_.get(), kInput2, sizeof(kInput2) - 1), + ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1), SyscallSucceedsWithValue(sizeof(kInput2) - 1)); ASSERT_NO_ERRNO( @@ -1140,7 +1141,7 @@ TEST_F(PtyTest, SwitchTwiceMultiline) { // Write each line. for (const std::string& input : kInputs) { - ASSERT_THAT(WriteFd(main_.get(), input.c_str(), input.size()), + ASSERT_THAT(WriteFd(master_.get(), input.c_str(), input.size()), SyscallSucceedsWithValue(input.size())); } @@ -1162,7 +1163,7 @@ TEST_F(PtyTest, SwitchTwiceMultiline) { TEST_F(PtyTest, QueueSize) { // Write the line. constexpr char kInput1[] = "GO\n"; - ASSERT_THAT(WriteFd(main_.get(), kInput1, sizeof(kInput1) - 1), + ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1), SyscallSucceedsWithValue(sizeof(kInput1) - 1)); ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), sizeof(kInput1) - 1)); @@ -1170,7 +1171,7 @@ TEST_F(PtyTest, QueueSize) { // readable size. char input[kMaxLineSize]; memset(input, 'M', kMaxLineSize); - ASSERT_THAT(WriteFd(main_.get(), input, kMaxLineSize), + ASSERT_THAT(WriteFd(master_.get(), input, kMaxLineSize), SyscallSucceedsWithValue(kMaxLineSize)); int inputBufSize = ASSERT_NO_ERRNO_AND_VALUE( WaitUntilReceived(replica_.get(), sizeof(kInput1) - 1)); @@ -1192,10 +1193,11 @@ TEST_F(PtyTest, PartialBadBuffer) { // Leave only one free byte in the buffer. char* bad_buffer = buf + kPageSize - 1; - // Write to the main. + // Write to the master. constexpr char kBuf[] = "hello\n"; constexpr size_t size = sizeof(kBuf) - 1; - EXPECT_THAT(WriteFd(main_.get(), kBuf, size), SyscallSucceedsWithValue(size)); + EXPECT_THAT(WriteFd(master_.get(), kBuf, size), + SyscallSucceedsWithValue(size)); // Read from the replica into bad_buffer. ASSERT_NO_ERRNO(WaitUntilReceived(replica_.get(), size)); @@ -1207,14 +1209,14 @@ TEST_F(PtyTest, PartialBadBuffer) { TEST_F(PtyTest, SimpleEcho) { constexpr char kInput[] = "Mr. Eko"; - EXPECT_THAT(WriteFd(main_.get(), kInput, strlen(kInput)), + EXPECT_THAT(WriteFd(master_.get(), kInput, strlen(kInput)), SyscallSucceedsWithValue(strlen(kInput))); char buf[100] = {}; - ExpectReadable(main_, strlen(kInput), buf); + ExpectReadable(master_, strlen(kInput), buf); EXPECT_STREQ(buf, kInput); - ExpectFinished(main_); + ExpectFinished(master_); } TEST_F(PtyTest, GetWindowSize) { @@ -1231,16 +1233,17 @@ TEST_F(PtyTest, SetReplicaWindowSize) { ASSERT_THAT(ioctl(replica_.get(), TIOCSWINSZ, &ws), SyscallSucceeds()); struct winsize retrieved_ws = {}; - ASSERT_THAT(ioctl(main_.get(), TIOCGWINSZ, &retrieved_ws), SyscallSucceeds()); + ASSERT_THAT(ioctl(master_.get(), TIOCGWINSZ, &retrieved_ws), + SyscallSucceeds()); EXPECT_EQ(retrieved_ws.ws_row, kRows); EXPECT_EQ(retrieved_ws.ws_col, kCols); } -TEST_F(PtyTest, SetMainWindowSize) { +TEST_F(PtyTest, SetMasterWindowSize) { constexpr uint16_t kRows = 343; constexpr uint16_t kCols = 2401; struct winsize ws = {.ws_row = kRows, .ws_col = kCols}; - ASSERT_THAT(ioctl(main_.get(), TIOCSWINSZ, &ws), SyscallSucceeds()); + ASSERT_THAT(ioctl(master_.get(), TIOCSWINSZ, &ws), SyscallSucceeds()); struct winsize retrieved_ws = {}; ASSERT_THAT(ioctl(replica_.get(), TIOCGWINSZ, &retrieved_ws), @@ -1252,8 +1255,8 @@ TEST_F(PtyTest, SetMainWindowSize) { class JobControlTest : public ::testing::Test { protected: void SetUp() override { - main_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK)); - replica_ = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main_)); + master_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK)); + replica_ = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master_)); // Make this a session leader, which also drops the controlling terminal. // In the gVisor test environment, this test will be run as the session @@ -1277,15 +1280,15 @@ class JobControlTest : public ::testing::Test { return PosixError(wstatus, "process returned"); } - // Main and replica ends of the PTY. Non-blocking. - FileDescriptor main_; + // Master and replica ends of the PTY. Non-blocking. + FileDescriptor master_; FileDescriptor replica_; }; -TEST_F(JobControlTest, SetTTYMain) { +TEST_F(JobControlTest, SetTTYMaster) { auto res = RunInChild([=]() { TEST_PCHECK(setsid() >= 0); - TEST_PCHECK(!ioctl(main_.get(), TIOCSCTTY, 0)); + TEST_PCHECK(!ioctl(master_.get(), TIOCSCTTY, 0)); }); ASSERT_NO_ERRNO(res); } @@ -1360,7 +1363,7 @@ TEST_F(JobControlTest, ReleaseWrongTTY) { auto res = RunInChild([=]() { TEST_PCHECK(setsid() >= 0); TEST_PCHECK(!ioctl(replica_.get(), TIOCSCTTY, 0)); - TEST_PCHECK(ioctl(main_.get(), TIOCNOTTY) < 0 && errno == ENOTTY); + TEST_PCHECK(ioctl(master_.get(), TIOCNOTTY) < 0 && errno == ENOTTY); }); ASSERT_NO_ERRNO(res); } diff --git a/test/syscalls/linux/pty_root.cc b/test/syscalls/linux/pty_root.cc index a534cf0bb..4ac648729 100644 --- a/test/syscalls/linux/pty_root.cc +++ b/test/syscalls/linux/pty_root.cc @@ -48,9 +48,9 @@ TEST(JobControlRootTest, StealTTY) { ASSERT_THAT(setsid(), SyscallSucceeds()); } - FileDescriptor main = + FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK)); - FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(main)); + FileDescriptor replica = ASSERT_NO_ERRNO_AND_VALUE(OpenReplica(master)); // Make replica the controlling terminal. ASSERT_THAT(ioctl(replica.get(), TIOCSCTTY, 0), SyscallSucceeds()); diff --git a/test/util/pty_util.cc b/test/util/pty_util.cc index 5fa622922..2cf0bea74 100644 --- a/test/util/pty_util.cc +++ b/test/util/pty_util.cc @@ -23,25 +23,25 @@ namespace gvisor { namespace testing { -PosixErrorOr OpenReplica(const FileDescriptor& main) { - PosixErrorOr n = ReplicaID(main); +PosixErrorOr OpenReplica(const FileDescriptor& master) { + PosixErrorOr n = ReplicaID(master); if (!n.ok()) { return PosixErrorOr(n.error()); } return Open(absl::StrCat("/dev/pts/", n.ValueOrDie()), O_RDWR | O_NONBLOCK); } -PosixErrorOr ReplicaID(const FileDescriptor& main) { +PosixErrorOr ReplicaID(const FileDescriptor& master) { // Get pty index. int n; - int ret = ioctl(main.get(), TIOCGPTN, &n); + int ret = ioctl(master.get(), TIOCGPTN, &n); if (ret < 0) { return PosixError(errno, "ioctl(TIOCGPTN) failed"); } // Unlock pts. int unlock = 0; - ret = ioctl(main.get(), TIOCSPTLCK, &unlock); + ret = ioctl(master.get(), TIOCSPTLCK, &unlock); if (ret < 0) { return PosixError(errno, "ioctl(TIOSPTLCK) failed"); } diff --git a/test/util/pty_util.h b/test/util/pty_util.h index dff6adab5..ed7658868 100644 --- a/test/util/pty_util.h +++ b/test/util/pty_util.h @@ -21,11 +21,11 @@ namespace gvisor { namespace testing { -// Opens the replica end of the passed main as R/W and nonblocking. -PosixErrorOr OpenReplica(const FileDescriptor& main); +// Opens the replica end of the passed master as R/W and nonblocking. +PosixErrorOr OpenReplica(const FileDescriptor& master); -// Get the number of the replica end of the main. -PosixErrorOr ReplicaID(const FileDescriptor& main); +// Get the number of the replica end of the master. +PosixErrorOr ReplicaID(const FileDescriptor& master); } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 0170be90f67b4dc075710d1cd82ccd8d3ac9156a Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 8 Sep 2020 15:52:01 -0700 Subject: Implement ioctl with enable verity ioctl with FS_IOC_ENABLE_VERITY is added to verity file system to enable a file as verity file. For a file, a Merkle tree is built with its data. For a directory, a Merkle tree is built with the root hashes of its children. PiperOrigin-RevId: 330604368 --- pkg/abi/linux/ioctl.go | 5 ++ pkg/sentry/fsimpl/verity/BUILD | 2 + pkg/sentry/fsimpl/verity/verity.go | 128 +++++++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+) (limited to 'pkg/abi') diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go index d6dbedc3e..a4fe7501d 100644 --- a/pkg/abi/linux/ioctl.go +++ b/pkg/abi/linux/ioctl.go @@ -113,6 +113,11 @@ const ( _IOC_DIRSHIFT = _IOC_SIZESHIFT + _IOC_SIZEBITS ) +// Constants from uapi/linux/fsverity.h. +const ( + FS_IOC_ENABLE_VERITY = 1082156677 +) + // IOC outputs the result of _IOC macro in asm-generic/ioctl.h. func IOC(dir, typ, nr, size uint32) uint32 { return uint32(dir)<<_IOC_DIRSHIFT | typ<<_IOC_TYPESHIFT | nr<<_IOC_NRSHIFT | size<<_IOC_SIZESHIFT diff --git a/pkg/sentry/fsimpl/verity/BUILD b/pkg/sentry/fsimpl/verity/BUILD index 326c4ed90..d28450e53 100644 --- a/pkg/sentry/fsimpl/verity/BUILD +++ b/pkg/sentry/fsimpl/verity/BUILD @@ -14,11 +14,13 @@ go_library( "//pkg/context", "//pkg/fspath", "//pkg/merkletree", + "//pkg/sentry/arch", "//pkg/sentry/fs/lock", "//pkg/sentry/kernel/auth", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go index eedb5f484..0bac8e938 100644 --- a/pkg/sentry/fsimpl/verity/verity.go +++ b/pkg/sentry/fsimpl/verity/verity.go @@ -22,16 +22,21 @@ package verity import ( + "strconv" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" + + "gvisor.dev/gvisor/pkg/merkletree" + "gvisor.dev/gvisor/pkg/sentry/arch" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // Name is the default filesystem name. @@ -471,6 +476,129 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) return syserror.EPERM } +// generateMerkle generates a Merkle tree file for fd. If fd points to a file +// /foo/bar, a Merkle tree file /foo/.merkle.verity.bar is generated. The root +// hash of the generated Merkle tree and the data size is returned. +// If fd points to a regular file, the data is the content of the file. If fd +// points to a directory, the data is all root hahes of its children, written +// to the Merkle tree file. +func (fd *fileDescription) generateMerkle(ctx context.Context) ([]byte, uint64, error) { + fdReader := vfs.FileReadWriteSeeker{ + FD: fd.lowerFD, + Ctx: ctx, + } + merkleReader := vfs.FileReadWriteSeeker{ + FD: fd.merkleReader, + Ctx: ctx, + } + merkleWriter := vfs.FileReadWriteSeeker{ + FD: fd.merkleWriter, + Ctx: ctx, + } + var rootHash []byte + var dataSize uint64 + + switch atomic.LoadUint32(&fd.d.mode) & linux.S_IFMT { + case linux.S_IFREG: + // For a regular file, generate a Merkle tree based on its + // content. + var err error + stat, err := fd.lowerFD.Stat(ctx, vfs.StatOptions{}) + if err != nil { + return nil, 0, err + } + dataSize = stat.Size + + rootHash, err = merkletree.Generate(&fdReader, int64(dataSize), &merkleReader, &merkleWriter, false /* dataAndTreeInSameFile */) + if err != nil { + return nil, 0, err + } + case linux.S_IFDIR: + // For a directory, generate a Merkle tree based on the root + // hashes of its children that has already been written to the + // Merkle tree file. + merkleStat, err := fd.merkleReader.Stat(ctx, vfs.StatOptions{}) + if err != nil { + return nil, 0, err + } + dataSize = merkleStat.Size + + rootHash, err = merkletree.Generate(&merkleReader, int64(dataSize), &merkleReader, &merkleWriter, true /* dataAndTreeInSameFile */) + if err != nil { + return nil, 0, err + } + default: + // TODO(b/167728857): Investigate whether and how we should + // enable other types of file. + return nil, 0, syserror.EINVAL + } + return rootHash, dataSize, nil +} + +// enableVerity enables verity features on fd by generating a Merkle tree file +// and stores its root hash in its parent directory's Merkle tree. +func (fd *fileDescription) enableVerity(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { + if !fd.d.fs.allowRuntimeEnable { + return 0, syserror.EPERM + } + + // Lock to prevent other threads performing enable or access the file + // while it's being enabled. + verityMu.Lock() + defer verityMu.Unlock() + + if fd.lowerFD == nil || fd.merkleReader == nil || fd.merkleWriter == nil || fd.parentMerkleWriter == nil { + panic("Unexpected verity fd: missing expected underlying fds") + } + + rootHash, dataSize, err := fd.generateMerkle(ctx) + if err != nil { + return 0, err + } + + stat, err := fd.parentMerkleWriter.Stat(ctx, vfs.StatOptions{}) + if err != nil { + return 0, err + } + + // Write the root hash of fd to the parent directory's Merkle tree + // file, as it should be part of the parent Merkle tree data. + // parentMerkleWriter is open with O_APPEND, so it should write + // directly to the end of the file. + if _, err = fd.parentMerkleWriter.Write(ctx, usermem.BytesIOSequence(rootHash), vfs.WriteOptions{}); err != nil { + return 0, err + } + + // Record the offset of the root hash of fd in parent directory's + // Merkle tree file. + if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{ + Name: merkleOffsetInParentXattr, + Value: strconv.Itoa(int(stat.Size)), + }); err != nil { + return 0, err + } + + // Record the size of the data being hashed for fd. + if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{ + Name: merkleSizeXattr, + Value: strconv.Itoa(int(dataSize)), + }); err != nil { + return 0, err + } + fd.d.rootHash = append(fd.d.rootHash, rootHash...) + return 0, nil +} + +// Ioctl implements vfs.FileDescriptionImpl.Ioctl. +func (fd *fileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { + switch cmd := args[1].Uint(); cmd { + case linux.FS_IOC_ENABLE_VERITY: + return fd.enableVerity(ctx, uio, args) + default: + return fd.lowerFD.Ioctl(ctx, uio, args) + } +} + // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block) -- cgit v1.2.3