diff options
author | Ian Gudger <igudger@google.com> | 2018-06-21 10:52:33 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-06-21 10:53:21 -0700 |
commit | d571a4359cebbcf8a9b201bb125f1cdc9fb126e4 (patch) | |
tree | 53d993db5ab045897c4ad50bb73670e3f018ffea | |
parent | f2a687001ded18a4343c1aa3bfba18b08c6a816a (diff) |
Implement ioctl(FIOASYNC)
FIOASYNC and friends are used to send signals when a file is ready for IO.
This may or may not be needed by Nginx. While Nginx does use it, it is unclear
if the code that uses it has any effect.
PiperOrigin-RevId: 201550828
Change-Id: I7ba05a7db4eb2dfffde11e9bd9a35b65b98d7f50
-rw-r--r-- | pkg/abi/linux/BUILD | 1 | ||||
-rw-r--r-- | pkg/abi/linux/fcntl.go | 29 | ||||
-rw-r--r-- | pkg/abi/linux/ioctl.go | 5 | ||||
-rw-r--r-- | pkg/abi/linux/signal.go | 34 | ||||
-rw-r--r-- | pkg/sentry/fs/file.go | 62 | ||||
-rw-r--r-- | pkg/sentry/fs/file_state.go | 10 | ||||
-rw-r--r-- | pkg/sentry/fs/flags.go | 7 | ||||
-rw-r--r-- | pkg/sentry/kernel/fasync/BUILD | 18 | ||||
-rw-r--r-- | pkg/sentry/kernel/fasync/fasync.go | 145 | ||||
-rw-r--r-- | pkg/sentry/kernel/sessions.go | 5 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/flags.go | 5 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_file.go | 78 |
13 files changed, 376 insertions, 24 deletions
diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index 693ce0fdd..5d00b66cc 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -31,6 +31,7 @@ go_library( "elf.go", "errors.go", "exec.go", + "fcntl.go", "file.go", "fs.go", "futex.go", diff --git a/pkg/abi/linux/fcntl.go b/pkg/abi/linux/fcntl.go new file mode 100644 index 000000000..f5dbe5199 --- /dev/null +++ b/pkg/abi/linux/fcntl.go @@ -0,0 +1,29 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Comands from linux/fcntl.h. +const ( + F_DUPFD = 0 + F_DUPFD_CLOEXEC = 1030 + F_GETFD = 1 + F_GETFL = 3 + F_GETOWN = 9 + F_SETFD = 2 + F_SETFL = 4 + F_SETLK = 6 + F_SETLKW = 7 + F_SETOWN = 8 +) diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go index 35cefbdfc..3ef046562 100644 --- a/pkg/abi/linux/ioctl.go +++ b/pkg/abi/linux/ioctl.go @@ -29,6 +29,11 @@ const ( TIOCSPTLCK = 0x40045431 FIONCLEX = 0x00005450 FIOCLEX = 0x00005451 + FIOASYNC = 0x00005452 + FIOSETOWN = 0x00008901 + SIOCSPGRP = 0x00008902 + FIOGETOWN = 0x00008903 + SIOCGPGRP = 0x00008904 ) // ioctl(2) requests provided by uapi/linux/android/binder.h diff --git a/pkg/abi/linux/signal.go b/pkg/abi/linux/signal.go index cd09008b5..fed2a159f 100644 --- a/pkg/abi/linux/signal.go +++ b/pkg/abi/linux/signal.go @@ -175,3 +175,37 @@ const ( SA_NOMASK = SA_NODEFER SA_ONESHOT = SA_RESTARTHAND ) + +// Signal info types. +const ( + SI_MASK = 0xffff0000 + SI_KILL = 0 << 16 + SI_TIMER = 1 << 16 + SI_POLL = 2 << 16 + SI_FAULT = 3 << 16 + SI_CHLD = 4 << 16 + SI_RT = 5 << 16 + SI_MESGQ = 6 << 16 + SI_SYS = 7 << 16 +) + +// SIGPOLL si_codes. +const ( + // POLL_IN indicates that data input available. + POLL_IN = SI_POLL | 1 + + // POLL_OUT indicates that output buffers available. + POLL_OUT = SI_POLL | 2 + + // POLL_MSG indicates that an input message available. + POLL_MSG = SI_POLL | 3 + + // POLL_ERR indicates that there was an i/o error. + POLL_ERR = SI_POLL | 4 + + // POLL_PRI indicates that a high priority input available. + POLL_PRI = SI_POLL | 5 + + // POLL_HUP indicates that a device disconnected. + POLL_HUP = SI_POLL | 6 +) diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index f2683bbd2..6d93ef760 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -16,6 +16,7 @@ package fs import ( "math" + "sync" "sync/atomic" "gvisor.googlesource.com/gvisor/pkg/amutex" @@ -72,9 +73,15 @@ type File struct { // other files via the Dirent cache. Dirent *Dirent + // flagsMu protects flags and async below. + flagsMu sync.Mutex `state:"nosave"` + // flags are the File's flags. Setting or getting flags is fully atomic // and is not protected by mu (below). - flags atomic.Value `state:".(FileFlags)"` + flags FileFlags + + // async handles O_ASYNC notifications. + async FileAsync // mu is dual-purpose: first, to make read(2) and write(2) thread-safe // in conformity with POSIX, and second, to cancel operations before they @@ -99,8 +106,8 @@ func NewFile(ctx context.Context, dirent *Dirent, flags FileFlags, fops FileOper UniqueID: uniqueid.GlobalFromContext(ctx), Dirent: dirent, FileOperations: fops, + flags: flags, } - f.flags.Store(flags) f.mu.Init() return f } @@ -117,22 +124,40 @@ func (f *File) DecRef() { // Release a reference on the Dirent. f.Dirent.DecRef() + + f.flagsMu.Lock() + if f.flags.Async && f.async != nil { + f.async.Unregister(f) + } + f.flagsMu.Unlock() }) } // Flags atomically loads the File's flags. func (f *File) Flags() FileFlags { - return f.flags.Load().(FileFlags) + f.flagsMu.Lock() + flags := f.flags + f.flagsMu.Unlock() + return flags } // SetFlags atomically changes the File's flags to the values contained // in newFlags. See SettableFileFlags for values that can be set. func (f *File) SetFlags(newFlags SettableFileFlags) { - flags := f.flags.Load().(FileFlags) - flags.Direct = newFlags.Direct - flags.NonBlocking = newFlags.NonBlocking - flags.Append = newFlags.Append - f.flags.Store(flags) + f.flagsMu.Lock() + f.flags.Direct = newFlags.Direct + f.flags.NonBlocking = newFlags.NonBlocking + f.flags.Append = newFlags.Append + if f.async != nil { + if newFlags.Async && !f.flags.Async { + f.async.Register(f) + } + if !newFlags.Async && f.flags.Async { + f.async.Unregister(f) + } + } + f.flags.Async = newFlags.Async + f.flagsMu.Unlock() } // Offset atomically loads the File's offset. @@ -361,6 +386,27 @@ func (f *File) Msync(ctx context.Context, mr memmap.MappableRange) error { return f.Fsync(ctx, int64(mr.Start), int64(mr.End-1), SyncData) } +// A FileAsync sends signals to its owner when w is ready for IO. +type FileAsync interface { + Register(w waiter.Waitable) + Unregister(w waiter.Waitable) +} + +// Async gets the stored FileAsync or creates a new one with the supplied +// function. If the supplied function is nil, no FileAsync is created and the +// current value is returned. +func (f *File) Async(newAsync func() FileAsync) FileAsync { + f.flagsMu.Lock() + defer f.flagsMu.Unlock() + if f.async == nil && newAsync != nil { + f.async = newAsync() + if f.flags.Async { + f.async.Register(f) + } + } + return f.async +} + // FileReader implements io.Reader and io.ReaderAt. type FileReader struct { // Ctx is the context for the file reader. diff --git a/pkg/sentry/fs/file_state.go b/pkg/sentry/fs/file_state.go index 341cbda0b..3384737ab 100644 --- a/pkg/sentry/fs/file_state.go +++ b/pkg/sentry/fs/file_state.go @@ -18,13 +18,3 @@ package fs func (f *File) afterLoad() { f.mu.Init() } - -// saveFlags is invoked by stateify. -func (f *File) saveFlags() FileFlags { - return f.flags.Load().(FileFlags) -} - -// loadFlags is invoked by stateify. -func (f *File) loadFlags(flags FileFlags) { - f.flags.Store(flags) -} diff --git a/pkg/sentry/fs/flags.go b/pkg/sentry/fs/flags.go index dfa6a3d62..7a8eefd02 100644 --- a/pkg/sentry/fs/flags.go +++ b/pkg/sentry/fs/flags.go @@ -42,6 +42,9 @@ type FileFlags struct { // Directory indicates that this file must be a directory. Directory bool + + // Async indicates that this file sends signals on IO events. + Async bool } // SettableFileFlags is a subset of FileFlags above that can be changed @@ -55,6 +58,9 @@ type SettableFileFlags struct { // Append indicates this file is append only. Append bool + + // Async indicates that this file sends signals on IO events. + Async bool } // Settable returns the subset of f that are settable. @@ -63,5 +69,6 @@ func (f FileFlags) Settable() SettableFileFlags { Direct: f.Direct, NonBlocking: f.NonBlocking, Append: f.Append, + Async: f.Async, } } diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD new file mode 100644 index 000000000..8d06e1182 --- /dev/null +++ b/pkg/sentry/kernel/fasync/BUILD @@ -0,0 +1,18 @@ +package(licenses = ["notice"]) # Apache 2.0 + +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "fasync", + srcs = ["fasync.go"], + importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/fasync", + visibility = ["//:sandbox"], + deps = [ + "//pkg/abi/linux", + "//pkg/sentry/arch", + "//pkg/sentry/fs", + "//pkg/sentry/kernel", + "//pkg/sentry/kernel/auth", + "//pkg/waiter", + ], +) diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go new file mode 100644 index 000000000..028d6766f --- /dev/null +++ b/pkg/sentry/kernel/fasync/fasync.go @@ -0,0 +1,145 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fasync provides FIOASYNC related functionality. +package fasync + +import ( + "sync" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// New creates a new FileAsync. +func New() fs.FileAsync { + return &FileAsync{} +} + +// FileAsync sends signals when the registered file is ready for IO. +type FileAsync struct { + mu sync.Mutex + e waiter.Entry + requester auth.Credentials + + // Only one of the following is allowed to be non-nil. + recipientPG *kernel.ProcessGroup + recipientTG *kernel.ThreadGroup + recipientT *kernel.Task +} + +// Callback sends a signal. +func (a *FileAsync) Callback(e *waiter.Entry) { + a.mu.Lock() + if a.e.Callback == nil { + return + } + t := a.recipientT + tg := a.recipientTG + if a.recipientPG != nil { + tg = a.recipientPG.Originator() + } + if tg != nil { + t = tg.Leader() + } + c := t.Credentials() + // Logic from sigio_perm in fs/fcntl.c. + if a.requester.EffectiveKUID == 0 || + a.requester.EffectiveKUID == c.SavedKUID || + a.requester.EffectiveKUID == c.RealKUID || + a.requester.RealKUID == c.SavedKUID || + a.requester.RealKUID == c.RealKUID { + t.SendSignal(&arch.SignalInfo{ + Signo: int32(linux.SIGIO), + // SEND_SIG_PRIV + Code: arch.SignalInfoKernel, + }) + } + a.mu.Unlock() +} + +// Register sets the file which will be monitored for IO events. +// +// The file must not be currently registered. +func (a *FileAsync) Register(w waiter.Waitable) { + a.mu.Lock() + defer a.mu.Unlock() + + if a.e.Callback != nil { + panic("registering already registered file") + } + + a.e.Callback = a + w.EventRegister(&a.e, waiter.EventIn|waiter.EventOut|waiter.EventErr|waiter.EventHUp) +} + +// Unregister stops monitoring a file. +// +// The file must be currently registered. +func (a *FileAsync) Unregister(w waiter.Waitable) { + a.mu.Lock() + defer a.mu.Unlock() + + if a.e.Callback == nil { + panic("unregistering unregistered file") + } + + w.EventUnregister(&a.e) + a.e.Callback = nil +} + +// Owner returns who is currently getting signals. All return values will be +// nil if no one is set to receive signals. +func (a *FileAsync) Owner() (*kernel.Task, *kernel.ThreadGroup, *kernel.ProcessGroup) { + a.mu.Lock() + defer a.mu.Unlock() + return a.recipientT, a.recipientTG, a.recipientPG +} + +// SetOwnerTask sets the owner (who will receive signals) to a specified task. +// Only this owner will receive signals. +func (a *FileAsync) SetOwnerTask(requester *kernel.Task, recipient *kernel.Task) { + a.mu.Lock() + defer a.mu.Unlock() + a.requester = requester.Credentials() + a.recipientT = recipient + a.recipientTG = nil + a.recipientPG = nil +} + +// SetOwnerThreadGroup sets the owner (who will receive signals) to a specified +// thread group. Only this owner will receive signals. +func (a *FileAsync) SetOwnerThreadGroup(requester *kernel.Task, recipient *kernel.ThreadGroup) { + a.mu.Lock() + defer a.mu.Unlock() + a.requester = requester.Credentials() + a.recipientT = nil + a.recipientTG = recipient + a.recipientPG = nil +} + +// SetOwnerProcessGroup sets the owner (who will receive signals) to a +// specified process group. Only this owner will receive signals. +func (a *FileAsync) SetOwnerProcessGroup(requester *kernel.Task, recipient *kernel.ProcessGroup) { + a.mu.Lock() + defer a.mu.Unlock() + a.requester = requester.Credentials() + a.recipientT = nil + a.recipientTG = nil + a.recipientPG = recipient +} diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go index 53d8fb844..fa4c7b8f6 100644 --- a/pkg/sentry/kernel/sessions.go +++ b/pkg/sentry/kernel/sessions.go @@ -110,6 +110,11 @@ type ProcessGroup struct { processGroupEntry } +// Originator retuns the originator of the process group. +func (pg *ProcessGroup) Originator() *ThreadGroup { + return pg.originator +} + // incRefWithParent grabs a reference. // // This function is called when this ProcessGroup is being associated with some diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 7cfd37fb1..d3f3cc459 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -82,6 +82,7 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/epoll", "//pkg/sentry/kernel/eventfd", + "//pkg/sentry/kernel/fasync", "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/sched", diff --git a/pkg/sentry/syscalls/linux/flags.go b/pkg/sentry/syscalls/linux/flags.go index 82bfd7c2a..3d39a20f4 100644 --- a/pkg/sentry/syscalls/linux/flags.go +++ b/pkg/sentry/syscalls/linux/flags.go @@ -61,6 +61,9 @@ func flagsToLinux(flags fs.FileFlags) (mask uint) { if flags.Directory { mask |= syscall.O_DIRECTORY } + if flags.Async { + mask |= syscall.O_ASYNC + } switch { case flags.Read && flags.Write: mask |= syscall.O_RDWR @@ -82,6 +85,7 @@ func linuxToFlags(mask uint) (flags fs.FileFlags) { Write: (mask & syscall.O_ACCMODE) != syscall.O_RDONLY, Append: mask&syscall.O_APPEND != 0, Directory: mask&syscall.O_DIRECTORY != 0, + Async: mask&syscall.O_ASYNC != 0, } } @@ -91,5 +95,6 @@ func linuxToSettableFlags(mask uint) fs.SettableFileFlags { Direct: mask&syscall.O_DIRECT != 0, NonBlocking: mask&syscall.O_NONBLOCK != 0, Append: mask&syscall.O_APPEND != 0, + Async: mask&syscall.O_ASYNC != 0, } } diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index e2980842f..490649f87 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -25,6 +25,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/fasync" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" "gvisor.googlesource.com/gvisor/pkg/sentry/limits" @@ -528,6 +529,33 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall file.SetFlags(flags.Settable()) return 0, nil, nil + case linux.FIOASYNC: + var set int32 + if _, err := t.CopyIn(args[2].Pointer(), &set); err != nil { + return 0, nil, err + } + flags := file.Flags() + if set != 0 { + flags.Async = true + } else { + flags.Async = false + } + file.SetFlags(flags.Settable()) + return 0, nil, nil + + case linux.FIOSETOWN, linux.SIOCSPGRP: + var set int32 + if _, err := t.CopyIn(args[2].Pointer(), &set); err != nil { + return 0, nil, err + } + fSetOwn(t, file, set) + return 0, nil, nil + + case linux.FIOGETOWN, linux.SIOCGPGRP: + who := fGetOwn(t, file) + _, err := t.CopyOut(args[2].Pointer(), &who) + return 0, nil, err + default: ret, err := file.FileOperations.Ioctl(t, t.MemoryManager(), args) if err != nil { @@ -725,6 +753,39 @@ func Dup3(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC return uintptr(newfd), nil, nil } +func fGetOwn(t *kernel.Task, file *fs.File) int32 { + ma := file.Async(nil) + if ma == nil { + return 0 + } + a := ma.(*fasync.FileAsync) + ot, otg, opg := a.Owner() + switch { + case ot != nil: + return int32(t.PIDNamespace().IDOfTask(ot)) + case otg != nil: + return int32(t.PIDNamespace().IDOfThreadGroup(otg)) + case opg != nil: + return int32(-t.PIDNamespace().IDOfProcessGroup(opg)) + default: + return 0 + } +} + +// fSetOwn sets the file's owner with the semantics of F_SETOWN in Linux. +// +// If who is positive, it represents a PID. If negative, it represents a PGID. +// If the PID or PGID is invalid, the owner is silently unset. +func fSetOwn(t *kernel.Task, file *fs.File, who int32) { + a := file.Async(fasync.New).(*fasync.FileAsync) + if who < 0 { + pg := t.PIDNamespace().ProcessGroupWithID(kernel.ProcessGroupID(-who)) + a.SetOwnerProcessGroup(t, pg) + } + tg := t.PIDNamespace().ThreadGroupWithID(kernel.ThreadID(who)) + a.SetOwnerThreadGroup(t, tg) +} + // Fcntl implements linux syscall fcntl(2). func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { fd := kdefs.FD(args[0].Int()) @@ -737,7 +798,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall defer file.DecRef() switch cmd { - case syscall.F_DUPFD, syscall.F_DUPFD_CLOEXEC: + case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC: from := kdefs.FD(args[2].Int()) fdFlags := kernel.FDFlags{CloseOnExec: cmd == syscall.F_DUPFD_CLOEXEC} fd, err := t.FDMap().NewFDFrom(from, file, fdFlags, t.ThreadGroup().Limits()) @@ -745,19 +806,19 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err } return uintptr(fd), nil, nil - case syscall.F_GETFD: + case linux.F_GETFD: return uintptr(fdFlagsToLinux(flags)), nil, nil - case syscall.F_SETFD: + case linux.F_SETFD: flags := args[2].Uint() t.FDMap().SetFlags(fd, kernel.FDFlags{ CloseOnExec: flags&syscall.FD_CLOEXEC != 0, }) - case syscall.F_GETFL: + case linux.F_GETFL: return uintptr(flagsToLinux(file.Flags())), nil, nil - case syscall.F_SETFL: + case linux.F_SETFL: flags := uint(args[2].Uint()) file.SetFlags(linuxToSettableFlags(flags)) - case syscall.F_SETLK, syscall.F_SETLKW: + case linux.F_SETLK, linux.F_SETLKW: // In Linux the file system can choose to provide lock operations for an inode. // Normally pipe and socket types lack lock operations. We diverge and use a heavy // hammer by only allowing locks on files and directories. @@ -854,6 +915,11 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall default: return 0, nil, syserror.EINVAL } + case linux.F_GETOWN: + return uintptr(fGetOwn(t, file)), nil, nil + case linux.F_SETOWN: + fSetOwn(t, file, args[2].Int()) + return 0, nil, nil default: // Everything else is not yet supported. return 0, nil, syserror.EINVAL |