diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/abi/linux/BUILD | 1 | ||||
-rw-r--r-- | pkg/abi/linux/fcntl.go | 29 | ||||
-rw-r--r-- | pkg/abi/linux/ioctl.go | 5 | ||||
-rw-r--r-- | pkg/abi/linux/signal.go | 34 | ||||
-rw-r--r-- | pkg/sentry/fs/file.go | 62 | ||||
-rw-r--r-- | pkg/sentry/fs/file_state.go | 10 | ||||
-rw-r--r-- | pkg/sentry/fs/flags.go | 7 | ||||
-rw-r--r-- | pkg/sentry/kernel/fasync/BUILD | 18 | ||||
-rw-r--r-- | pkg/sentry/kernel/fasync/fasync.go | 145 | ||||
-rw-r--r-- | pkg/sentry/kernel/sessions.go | 5 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/flags.go | 5 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_file.go | 78 |
13 files changed, 376 insertions, 24 deletions
diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index 693ce0fdd..5d00b66cc 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -31,6 +31,7 @@ go_library( "elf.go", "errors.go", "exec.go", + "fcntl.go", "file.go", "fs.go", "futex.go", diff --git a/pkg/abi/linux/fcntl.go b/pkg/abi/linux/fcntl.go new file mode 100644 index 000000000..f5dbe5199 --- /dev/null +++ b/pkg/abi/linux/fcntl.go @@ -0,0 +1,29 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Comands from linux/fcntl.h. +const ( + F_DUPFD = 0 + F_DUPFD_CLOEXEC = 1030 + F_GETFD = 1 + F_GETFL = 3 + F_GETOWN = 9 + F_SETFD = 2 + F_SETFL = 4 + F_SETLK = 6 + F_SETLKW = 7 + F_SETOWN = 8 +) diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go index 35cefbdfc..3ef046562 100644 --- a/pkg/abi/linux/ioctl.go +++ b/pkg/abi/linux/ioctl.go @@ -29,6 +29,11 @@ const ( TIOCSPTLCK = 0x40045431 FIONCLEX = 0x00005450 FIOCLEX = 0x00005451 + FIOASYNC = 0x00005452 + FIOSETOWN = 0x00008901 + SIOCSPGRP = 0x00008902 + FIOGETOWN = 0x00008903 + SIOCGPGRP = 0x00008904 ) // ioctl(2) requests provided by uapi/linux/android/binder.h diff --git a/pkg/abi/linux/signal.go b/pkg/abi/linux/signal.go index cd09008b5..fed2a159f 100644 --- a/pkg/abi/linux/signal.go +++ b/pkg/abi/linux/signal.go @@ -175,3 +175,37 @@ const ( SA_NOMASK = SA_NODEFER SA_ONESHOT = SA_RESTARTHAND ) + +// Signal info types. +const ( + SI_MASK = 0xffff0000 + SI_KILL = 0 << 16 + SI_TIMER = 1 << 16 + SI_POLL = 2 << 16 + SI_FAULT = 3 << 16 + SI_CHLD = 4 << 16 + SI_RT = 5 << 16 + SI_MESGQ = 6 << 16 + SI_SYS = 7 << 16 +) + +// SIGPOLL si_codes. +const ( + // POLL_IN indicates that data input available. + POLL_IN = SI_POLL | 1 + + // POLL_OUT indicates that output buffers available. + POLL_OUT = SI_POLL | 2 + + // POLL_MSG indicates that an input message available. + POLL_MSG = SI_POLL | 3 + + // POLL_ERR indicates that there was an i/o error. + POLL_ERR = SI_POLL | 4 + + // POLL_PRI indicates that a high priority input available. + POLL_PRI = SI_POLL | 5 + + // POLL_HUP indicates that a device disconnected. + POLL_HUP = SI_POLL | 6 +) diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index f2683bbd2..6d93ef760 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -16,6 +16,7 @@ package fs import ( "math" + "sync" "sync/atomic" "gvisor.googlesource.com/gvisor/pkg/amutex" @@ -72,9 +73,15 @@ type File struct { // other files via the Dirent cache. Dirent *Dirent + // flagsMu protects flags and async below. + flagsMu sync.Mutex `state:"nosave"` + // flags are the File's flags. Setting or getting flags is fully atomic // and is not protected by mu (below). - flags atomic.Value `state:".(FileFlags)"` + flags FileFlags + + // async handles O_ASYNC notifications. + async FileAsync // mu is dual-purpose: first, to make read(2) and write(2) thread-safe // in conformity with POSIX, and second, to cancel operations before they @@ -99,8 +106,8 @@ func NewFile(ctx context.Context, dirent *Dirent, flags FileFlags, fops FileOper UniqueID: uniqueid.GlobalFromContext(ctx), Dirent: dirent, FileOperations: fops, + flags: flags, } - f.flags.Store(flags) f.mu.Init() return f } @@ -117,22 +124,40 @@ func (f *File) DecRef() { // Release a reference on the Dirent. f.Dirent.DecRef() + + f.flagsMu.Lock() + if f.flags.Async && f.async != nil { + f.async.Unregister(f) + } + f.flagsMu.Unlock() }) } // Flags atomically loads the File's flags. func (f *File) Flags() FileFlags { - return f.flags.Load().(FileFlags) + f.flagsMu.Lock() + flags := f.flags + f.flagsMu.Unlock() + return flags } // SetFlags atomically changes the File's flags to the values contained // in newFlags. See SettableFileFlags for values that can be set. func (f *File) SetFlags(newFlags SettableFileFlags) { - flags := f.flags.Load().(FileFlags) - flags.Direct = newFlags.Direct - flags.NonBlocking = newFlags.NonBlocking - flags.Append = newFlags.Append - f.flags.Store(flags) + f.flagsMu.Lock() + f.flags.Direct = newFlags.Direct + f.flags.NonBlocking = newFlags.NonBlocking + f.flags.Append = newFlags.Append + if f.async != nil { + if newFlags.Async && !f.flags.Async { + f.async.Register(f) + } + if !newFlags.Async && f.flags.Async { + f.async.Unregister(f) + } + } + f.flags.Async = newFlags.Async + f.flagsMu.Unlock() } // Offset atomically loads the File's offset. @@ -361,6 +386,27 @@ func (f *File) Msync(ctx context.Context, mr memmap.MappableRange) error { return f.Fsync(ctx, int64(mr.Start), int64(mr.End-1), SyncData) } +// A FileAsync sends signals to its owner when w is ready for IO. +type FileAsync interface { + Register(w waiter.Waitable) + Unregister(w waiter.Waitable) +} + +// Async gets the stored FileAsync or creates a new one with the supplied +// function. If the supplied function is nil, no FileAsync is created and the +// current value is returned. +func (f *File) Async(newAsync func() FileAsync) FileAsync { + f.flagsMu.Lock() + defer f.flagsMu.Unlock() + if f.async == nil && newAsync != nil { + f.async = newAsync() + if f.flags.Async { + f.async.Register(f) + } + } + return f.async +} + // FileReader implements io.Reader and io.ReaderAt. type FileReader struct { // Ctx is the context for the file reader. diff --git a/pkg/sentry/fs/file_state.go b/pkg/sentry/fs/file_state.go index 341cbda0b..3384737ab 100644 --- a/pkg/sentry/fs/file_state.go +++ b/pkg/sentry/fs/file_state.go @@ -18,13 +18,3 @@ package fs func (f *File) afterLoad() { f.mu.Init() } - -// saveFlags is invoked by stateify. -func (f *File) saveFlags() FileFlags { - return f.flags.Load().(FileFlags) -} - -// loadFlags is invoked by stateify. -func (f *File) loadFlags(flags FileFlags) { - f.flags.Store(flags) -} diff --git a/pkg/sentry/fs/flags.go b/pkg/sentry/fs/flags.go index dfa6a3d62..7a8eefd02 100644 --- a/pkg/sentry/fs/flags.go +++ b/pkg/sentry/fs/flags.go @@ -42,6 +42,9 @@ type FileFlags struct { // Directory indicates that this file must be a directory. Directory bool + + // Async indicates that this file sends signals on IO events. + Async bool } // SettableFileFlags is a subset of FileFlags above that can be changed @@ -55,6 +58,9 @@ type SettableFileFlags struct { // Append indicates this file is append only. Append bool + + // Async indicates that this file sends signals on IO events. + Async bool } // Settable returns the subset of f that are settable. @@ -63,5 +69,6 @@ func (f FileFlags) Settable() SettableFileFlags { Direct: f.Direct, NonBlocking: f.NonBlocking, Append: f.Append, + Async: f.Async, } } diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD new file mode 100644 index 000000000..8d06e1182 --- /dev/null +++ b/pkg/sentry/kernel/fasync/BUILD @@ -0,0 +1,18 @@ +package(licenses = ["notice"]) # Apache 2.0 + +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "fasync", + srcs = ["fasync.go"], + importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/fasync", + visibility = ["//:sandbox"], + deps = [ + "//pkg/abi/linux", + "//pkg/sentry/arch", + "//pkg/sentry/fs", + "//pkg/sentry/kernel", + "//pkg/sentry/kernel/auth", + "//pkg/waiter", + ], +) diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go new file mode 100644 index 000000000..028d6766f --- /dev/null +++ b/pkg/sentry/kernel/fasync/fasync.go @@ -0,0 +1,145 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fasync provides FIOASYNC related functionality. +package fasync + +import ( + "sync" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// New creates a new FileAsync. +func New() fs.FileAsync { + return &FileAsync{} +} + +// FileAsync sends signals when the registered file is ready for IO. +type FileAsync struct { + mu sync.Mutex + e waiter.Entry + requester auth.Credentials + + // Only one of the following is allowed to be non-nil. + recipientPG *kernel.ProcessGroup + recipientTG *kernel.ThreadGroup + recipientT *kernel.Task +} + +// Callback sends a signal. +func (a *FileAsync) Callback(e *waiter.Entry) { + a.mu.Lock() + if a.e.Callback == nil { + return + } + t := a.recipientT + tg := a.recipientTG + if a.recipientPG != nil { + tg = a.recipientPG.Originator() + } + if tg != nil { + t = tg.Leader() + } + c := t.Credentials() + // Logic from sigio_perm in fs/fcntl.c. + if a.requester.EffectiveKUID == 0 || + a.requester.EffectiveKUID == c.SavedKUID || + a.requester.EffectiveKUID == c.RealKUID || + a.requester.RealKUID == c.SavedKUID || + a.requester.RealKUID == c.RealKUID { + t.SendSignal(&arch.SignalInfo{ + Signo: int32(linux.SIGIO), + // SEND_SIG_PRIV + Code: arch.SignalInfoKernel, + }) + } + a.mu.Unlock() +} + +// Register sets the file which will be monitored for IO events. +// +// The file must not be currently registered. +func (a *FileAsync) Register(w waiter.Waitable) { + a.mu.Lock() + defer a.mu.Unlock() + + if a.e.Callback != nil { + panic("registering already registered file") + } + + a.e.Callback = a + w.EventRegister(&a.e, waiter.EventIn|waiter.EventOut|waiter.EventErr|waiter.EventHUp) +} + +// Unregister stops monitoring a file. +// +// The file must be currently registered. +func (a *FileAsync) Unregister(w waiter.Waitable) { + a.mu.Lock() + defer a.mu.Unlock() + + if a.e.Callback == nil { + panic("unregistering unregistered file") + } + + w.EventUnregister(&a.e) + a.e.Callback = nil +} + +// Owner returns who is currently getting signals. All return values will be +// nil if no one is set to receive signals. +func (a *FileAsync) Owner() (*kernel.Task, *kernel.ThreadGroup, *kernel.ProcessGroup) { + a.mu.Lock() + defer a.mu.Unlock() + return a.recipientT, a.recipientTG, a.recipientPG +} + +// SetOwnerTask sets the owner (who will receive signals) to a specified task. +// Only this owner will receive signals. +func (a *FileAsync) SetOwnerTask(requester *kernel.Task, recipient *kernel.Task) { + a.mu.Lock() + defer a.mu.Unlock() + a.requester = requester.Credentials() + a.recipientT = recipient + a.recipientTG = nil + a.recipientPG = nil +} + +// SetOwnerThreadGroup sets the owner (who will receive signals) to a specified +// thread group. Only this owner will receive signals. +func (a *FileAsync) SetOwnerThreadGroup(requester *kernel.Task, recipient *kernel.ThreadGroup) { + a.mu.Lock() + defer a.mu.Unlock() + a.requester = requester.Credentials() + a.recipientT = nil + a.recipientTG = recipient + a.recipientPG = nil +} + +// SetOwnerProcessGroup sets the owner (who will receive signals) to a +// specified process group. Only this owner will receive signals. +func (a *FileAsync) SetOwnerProcessGroup(requester *kernel.Task, recipient *kernel.ProcessGroup) { + a.mu.Lock() + defer a.mu.Unlock() + a.requester = requester.Credentials() + a.recipientT = nil + a.recipientTG = nil + a.recipientPG = recipient +} diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go index 53d8fb844..fa4c7b8f6 100644 --- a/pkg/sentry/kernel/sessions.go +++ b/pkg/sentry/kernel/sessions.go @@ -110,6 +110,11 @@ type ProcessGroup struct { processGroupEntry } +// Originator retuns the originator of the process group. +func (pg *ProcessGroup) Originator() *ThreadGroup { + return pg.originator +} + // incRefWithParent grabs a reference. // // This function is called when this ProcessGroup is being associated with some diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 7cfd37fb1..d3f3cc459 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -82,6 +82,7 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/epoll", "//pkg/sentry/kernel/eventfd", + "//pkg/sentry/kernel/fasync", "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/sched", diff --git a/pkg/sentry/syscalls/linux/flags.go b/pkg/sentry/syscalls/linux/flags.go index 82bfd7c2a..3d39a20f4 100644 --- a/pkg/sentry/syscalls/linux/flags.go +++ b/pkg/sentry/syscalls/linux/flags.go @@ -61,6 +61,9 @@ func flagsToLinux(flags fs.FileFlags) (mask uint) { if flags.Directory { mask |= syscall.O_DIRECTORY } + if flags.Async { + mask |= syscall.O_ASYNC + } switch { case flags.Read && flags.Write: mask |= syscall.O_RDWR @@ -82,6 +85,7 @@ func linuxToFlags(mask uint) (flags fs.FileFlags) { Write: (mask & syscall.O_ACCMODE) != syscall.O_RDONLY, Append: mask&syscall.O_APPEND != 0, Directory: mask&syscall.O_DIRECTORY != 0, + Async: mask&syscall.O_ASYNC != 0, } } @@ -91,5 +95,6 @@ func linuxToSettableFlags(mask uint) fs.SettableFileFlags { Direct: mask&syscall.O_DIRECT != 0, NonBlocking: mask&syscall.O_NONBLOCK != 0, Append: mask&syscall.O_APPEND != 0, + Async: mask&syscall.O_ASYNC != 0, } } diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index e2980842f..490649f87 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -25,6 +25,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/fasync" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" "gvisor.googlesource.com/gvisor/pkg/sentry/limits" @@ -528,6 +529,33 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file.SetFlags(flags.Settable()) return 0, nil, nil + case linux.FIOASYNC: + var set int32 + if _, err := t.CopyIn(args[2].Pointer(), &set); err != nil { + return 0, nil, err + } + flags := file.Flags() + if set != 0 { + flags.Async = true + } else { + flags.Async = false + } + file.SetFlags(flags.Settable()) + return 0, nil, nil + + case linux.FIOSETOWN, linux.SIOCSPGRP: + var set int32 + if _, err := t.CopyIn(args[2].Pointer(), &set); err != nil { + return 0, nil, err + } + fSetOwn(t, file, set) + return 0, nil, nil + + case linux.FIOGETOWN, linux.SIOCGPGRP: + who := fGetOwn(t, file) + _, err := t.CopyOut(args[2].Pointer(), &who) + return 0, nil, err + default: ret, err := file.FileOperations.Ioctl(t, t.MemoryManager(), args) if err != nil { @@ -725,6 +753,39 @@ func Dup3(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC return uintptr(newfd), nil, nil } +func fGetOwn(t *kernel.Task, file *fs.File) int32 { + ma := file.Async(nil) + if ma == nil { + return 0 + } + a := ma.(*fasync.FileAsync) + ot, otg, opg := a.Owner() + switch { + case ot != nil: + return int32(t.PIDNamespace().IDOfTask(ot)) + case otg != nil: + return int32(t.PIDNamespace().IDOfThreadGroup(otg)) + case opg != nil: + return int32(-t.PIDNamespace().IDOfProcessGroup(opg)) + default: + return 0 + } +} + +// fSetOwn sets the file's owner with the semantics of F_SETOWN in Linux. +// +// If who is positive, it represents a PID. If negative, it represents a PGID. +// If the PID or PGID is invalid, the owner is silently unset. +func fSetOwn(t *kernel.Task, file *fs.File, who int32) { + a := file.Async(fasync.New).(*fasync.FileAsync) + if who < 0 { + pg := t.PIDNamespace().ProcessGroupWithID(kernel.ProcessGroupID(-who)) + a.SetOwnerProcessGroup(t, pg) + } + tg := t.PIDNamespace().ThreadGroupWithID(kernel.ThreadID(who)) + a.SetOwnerThreadGroup(t, tg) +} + // Fcntl implements linux syscall fcntl(2). func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { fd := kdefs.FD(args[0].Int()) @@ -737,7 +798,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall defer file.DecRef() switch cmd { - case syscall.F_DUPFD, syscall.F_DUPFD_CLOEXEC: + case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC: from := kdefs.FD(args[2].Int()) fdFlags := kernel.FDFlags{CloseOnExec: cmd == syscall.F_DUPFD_CLOEXEC} fd, err := t.FDMap().NewFDFrom(from, file, fdFlags, t.ThreadGroup().Limits()) @@ -745,19 +806,19 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err } return uintptr(fd), nil, nil - case syscall.F_GETFD: + case linux.F_GETFD: return uintptr(fdFlagsToLinux(flags)), nil, nil - case syscall.F_SETFD: + case linux.F_SETFD: flags := args[2].Uint() t.FDMap().SetFlags(fd, kernel.FDFlags{ CloseOnExec: flags&syscall.FD_CLOEXEC != 0, }) - case syscall.F_GETFL: + case linux.F_GETFL: return uintptr(flagsToLinux(file.Flags())), nil, nil - case syscall.F_SETFL: + case linux.F_SETFL: flags := uint(args[2].Uint()) file.SetFlags(linuxToSettableFlags(flags)) - case syscall.F_SETLK, syscall.F_SETLKW: + case linux.F_SETLK, linux.F_SETLKW: // In Linux the file system can choose to provide lock operations for an inode. // Normally pipe and socket types lack lock operations. We diverge and use a heavy // hammer by only allowing locks on files and directories. @@ -854,6 +915,11 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall default: return 0, nil, syserror.EINVAL } + case linux.F_GETOWN: + return uintptr(fGetOwn(t, file)), nil, nil + case linux.F_SETOWN: + fSetOwn(t, file, args[2].Int()) + return 0, nil, nil default: // Everything else is not yet supported. return 0, nil, syserror.EINVAL |