summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel/fd_map.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/kernel/fd_map.go')
-rw-r--r--pkg/sentry/kernel/fd_map.go364
1 files changed, 364 insertions, 0 deletions
diff --git a/pkg/sentry/kernel/fd_map.go b/pkg/sentry/kernel/fd_map.go
new file mode 100644
index 000000000..c5636d233
--- /dev/null
+++ b/pkg/sentry/kernel/fd_map.go
@@ -0,0 +1,364 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+ "bytes"
+ "fmt"
+ "sort"
+ "sync"
+ "sync/atomic"
+ "syscall"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/refs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
+)
+
+// FDs is an ordering of FD's that can be made stable.
+type FDs []kdefs.FD
+
+func (f FDs) Len() int {
+ return len(f)
+}
+
+func (f FDs) Swap(i, j int) {
+ f[i], f[j] = f[j], f[i]
+}
+
+func (f FDs) Less(i, j int) bool {
+ return f[i] < f[j]
+}
+
+// FDFlags define flags for an individual descriptor.
+//
+// +stateify savable
+type FDFlags struct {
+ // CloseOnExec indicates the descriptor should be closed on exec.
+ CloseOnExec bool
+}
+
+// ToLinuxFileFlags converts a kernel.FDFlags object to a Linux file flags
+// representation.
+func (f FDFlags) ToLinuxFileFlags() (mask uint) {
+ if f.CloseOnExec {
+ mask |= linux.O_CLOEXEC
+ }
+ return
+}
+
+// ToLinuxFDFlags converts a kernel.FDFlags object to a Linux descriptor flags
+// representation.
+func (f FDFlags) ToLinuxFDFlags() (mask uint) {
+ if f.CloseOnExec {
+ mask |= linux.FD_CLOEXEC
+ }
+ return
+}
+
+// descriptor holds the details about a file descriptor, namely a pointer the
+// file itself and the descriptor flags.
+//
+// +stateify savable
+type descriptor struct {
+ file *fs.File
+ flags FDFlags
+}
+
+// FDMap is used to manage File references and flags.
+//
+// +stateify savable
+type FDMap struct {
+ refs.AtomicRefCount
+ k *Kernel
+ files map[kdefs.FD]descriptor
+ mu sync.RWMutex `state:"nosave"`
+ uid uint64
+}
+
+// ID returns a unique identifier for this FDMap.
+func (f *FDMap) ID() uint64 {
+ return f.uid
+}
+
+// NewFDMap allocates a new FDMap that may be used by tasks in k.
+func (k *Kernel) NewFDMap() *FDMap {
+ return &FDMap{
+ k: k,
+ files: make(map[kdefs.FD]descriptor),
+ uid: atomic.AddUint64(&k.fdMapUids, 1),
+ }
+}
+
+// destroy removes all of the file descriptors from the map.
+func (f *FDMap) destroy() {
+ f.RemoveIf(func(*fs.File, FDFlags) bool {
+ return true
+ })
+}
+
+// DecRef implements RefCounter.DecRef with destructor f.destroy.
+func (f *FDMap) DecRef() {
+ f.DecRefWithDestructor(f.destroy)
+}
+
+// Size returns the number of file descriptor slots currently allocated.
+func (f *FDMap) Size() int {
+ f.mu.RLock()
+ defer f.mu.RUnlock()
+
+ return len(f.files)
+}
+
+// String is a stringer for FDMap.
+func (f *FDMap) String() string {
+ f.mu.RLock()
+ defer f.mu.RUnlock()
+
+ var b bytes.Buffer
+ for k, v := range f.files {
+ n, _ := v.file.Dirent.FullName(nil /* root */)
+ b.WriteString(fmt.Sprintf("\tfd:%d => name %s\n", k, n))
+ }
+ return b.String()
+}
+
+// NewFDFrom allocates a new FD guaranteed to be the lowest number available
+// greater than or equal to from. This property is important as Unix programs
+// tend to count on this allocation order.
+func (f *FDMap) NewFDFrom(fd kdefs.FD, file *fs.File, flags FDFlags, limitSet *limits.LimitSet) (kdefs.FD, error) {
+ if fd < 0 {
+ // Don't accept negative FDs.
+ return 0, syscall.EINVAL
+ }
+
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ // Finds the lowest fd not in the handles map.
+ lim := limitSet.Get(limits.NumberOfFiles)
+ for i := fd; lim.Cur == limits.Infinity || i < kdefs.FD(lim.Cur); i++ {
+ if _, ok := f.files[i]; !ok {
+ file.IncRef()
+ f.files[i] = descriptor{file, flags}
+ return i, nil
+ }
+ }
+
+ return -1, syscall.EMFILE
+}
+
+// NewFDAt sets the file reference for the given FD. If there is an
+// active reference for that FD, the ref count for that existing reference
+// is decremented.
+func (f *FDMap) NewFDAt(fd kdefs.FD, file *fs.File, flags FDFlags, limitSet *limits.LimitSet) error {
+ if fd < 0 {
+ // Don't accept negative FDs.
+ return syscall.EBADF
+ }
+
+ // In this one case we do not do a defer of the Unlock. The
+ // reason is that we must have done all the work needed for
+ // discarding any old open file before we return to the
+ // caller. In other words, the DecRef(), below, must have
+ // completed by the time we return to the caller to ensure
+ // side effects are, in fact, effected. A classic example is
+ // dup2(fd1, fd2); if fd2 was already open, it must be closed,
+ // and we don't want to resume the caller until it is; we have
+ // to block on the DecRef(). Hence we can not just do a 'go
+ // oldfile.DecRef()', since there would be no guarantee that
+ // it would be done before we the caller resumed. Since we
+ // must wait for the DecRef() to finish, and that could take
+ // time, it's best to first call f.muUnlock beore so we are
+ // not blocking other uses of this FDMap on the DecRef() call.
+ f.mu.Lock()
+ oldDesc, oldExists := f.files[fd]
+ lim := limitSet.Get(limits.NumberOfFiles).Cur
+ // if we're closing one then the effective limit is one
+ // more than the actual limit.
+ if oldExists && lim != limits.Infinity {
+ lim++
+ }
+ if lim != limits.Infinity && fd >= kdefs.FD(lim) {
+ f.mu.Unlock()
+ return syscall.EMFILE
+ }
+
+ file.IncRef()
+ f.files[fd] = descriptor{file, flags}
+ f.mu.Unlock()
+
+ if oldExists {
+ oldDesc.file.DecRef()
+ }
+ return nil
+}
+
+// SetFlags sets the flags for the given file descriptor, if it is valid.
+func (f *FDMap) SetFlags(fd kdefs.FD, flags FDFlags) {
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ desc, ok := f.files[fd]
+ if !ok {
+ return
+ }
+
+ f.files[fd] = descriptor{desc.file, flags}
+}
+
+// GetDescriptor returns a reference to the file and the flags for the FD. It
+// bumps its reference count as well. It returns nil if there is no File
+// for the FD, i.e. if the FD is invalid. The caller must use DecRef
+// when they are done.
+func (f *FDMap) GetDescriptor(fd kdefs.FD) (*fs.File, FDFlags) {
+ f.mu.RLock()
+ defer f.mu.RUnlock()
+
+ if desc, ok := f.files[fd]; ok {
+ desc.file.IncRef()
+ return desc.file, desc.flags
+ }
+ return nil, FDFlags{}
+}
+
+// GetFile returns a reference to the File for the FD and bumps
+// its reference count as well. It returns nil if there is no File
+// for the FD, i.e. if the FD is invalid. The caller must use DecRef
+// when they are done.
+func (f *FDMap) GetFile(fd kdefs.FD) *fs.File {
+ f.mu.RLock()
+ if desc, ok := f.files[fd]; ok {
+ desc.file.IncRef()
+ f.mu.RUnlock()
+ return desc.file
+ }
+ f.mu.RUnlock()
+ return nil
+}
+
+// fds returns an ordering of FDs.
+func (f *FDMap) fds() FDs {
+ fds := make(FDs, 0, len(f.files))
+ for fd := range f.files {
+ fds = append(fds, fd)
+ }
+ sort.Sort(fds)
+ return fds
+}
+
+// GetFDs returns a list of valid fds.
+func (f *FDMap) GetFDs() FDs {
+ f.mu.RLock()
+ defer f.mu.RUnlock()
+ return f.fds()
+}
+
+// GetRefs returns a stable slice of references to all files and bumps the
+// reference count on each. The caller must use DecRef on each reference when
+// they're done using the slice.
+func (f *FDMap) GetRefs() []*fs.File {
+ f.mu.RLock()
+ defer f.mu.RUnlock()
+
+ fds := f.fds()
+ fs := make([]*fs.File, 0, len(fds))
+ for _, fd := range fds {
+ desc := f.files[fd]
+ desc.file.IncRef()
+ fs = append(fs, desc.file)
+ }
+ return fs
+}
+
+// Fork returns an independent FDMap pointing to the same descriptors.
+func (f *FDMap) Fork() *FDMap {
+ f.mu.RLock()
+ defer f.mu.RUnlock()
+
+ clone := f.k.NewFDMap()
+
+ // Grab a extra reference for every file.
+ for fd, desc := range f.files {
+ desc.file.IncRef()
+ clone.files[fd] = desc
+ }
+
+ // That's it!
+ return clone
+}
+
+// unlock releases all file locks held by this FDMap's uid. Must only be
+// called on a non-nil *fs.File.
+func (f *FDMap) unlock(file *fs.File) {
+ id := lock.UniqueID(f.ID())
+ file.Dirent.Inode.LockCtx.Posix.UnlockRegion(id, lock.LockRange{0, lock.LockEOF})
+}
+
+// inotifyFileClose generates the appropriate inotify events for f being closed.
+func inotifyFileClose(f *fs.File) {
+ var ev uint32
+ d := f.Dirent
+
+ if fs.IsDir(d.Inode.StableAttr) {
+ ev |= linux.IN_ISDIR
+ }
+
+ if f.Flags().Write {
+ ev |= linux.IN_CLOSE_WRITE
+ } else {
+ ev |= linux.IN_CLOSE_NOWRITE
+ }
+
+ d.InotifyEvent(ev, 0)
+}
+
+// Remove removes an FD from the FDMap, and returns (File, true) if a File
+// one was found. Callers are expected to decrement the reference count on
+// the File. Otherwise returns (nil, false).
+func (f *FDMap) Remove(fd kdefs.FD) (*fs.File, bool) {
+ f.mu.Lock()
+ desc := f.files[fd]
+ delete(f.files, fd)
+ f.mu.Unlock()
+ if desc.file != nil {
+ f.unlock(desc.file)
+ inotifyFileClose(desc.file)
+ return desc.file, true
+ }
+ return nil, false
+}
+
+// RemoveIf removes all FDs where cond is true.
+func (f *FDMap) RemoveIf(cond func(*fs.File, FDFlags) bool) {
+ var removed []*fs.File
+ f.mu.Lock()
+ for fd, desc := range f.files {
+ if desc.file != nil && cond(desc.file, desc.flags) {
+ delete(f.files, fd)
+ removed = append(removed, desc.file)
+ }
+ }
+ f.mu.Unlock()
+
+ for _, file := range removed {
+ f.unlock(file)
+ inotifyFileClose(file)
+ file.DecRef()
+ }
+}