diff options
Diffstat (limited to 'pkg/sentry/fs/inode_inotify.go')
-rw-r--r-- | pkg/sentry/fs/inode_inotify.go | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/pkg/sentry/fs/inode_inotify.go b/pkg/sentry/fs/inode_inotify.go new file mode 100644 index 000000000..0f2a66a79 --- /dev/null +++ b/pkg/sentry/fs/inode_inotify.go @@ -0,0 +1,169 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "fmt" + "sync" +) + +// Watches is the collection of inotify watches on an inode. +// +// +stateify savable +type Watches struct { + // mu protects the fields below. + mu sync.RWMutex `state:"nosave"` + + // ws is the map of active watches in this collection, keyed by the inotify + // instance id of the owner. + ws map[uint64]*Watch + + // unlinked indicates whether the target inode was ever unlinked. This is a + // hack to figure out if we should queue a IN_DELETE_SELF event when this + // watches collection is being destroyed, since otherwise we have no way of + // knowing if the target inode is going down due to a deletion or + // revalidation. + unlinked bool +} + +func newWatches() *Watches { + return &Watches{} +} + +// MarkUnlinked indicates the target for this set of watches to be unlinked. +// This has implications for the IN_EXCL_UNLINK flag. +func (w *Watches) MarkUnlinked() { + w.mu.Lock() + defer w.mu.Unlock() + w.unlinked = true +} + +// Lookup returns a matching watch with the given id. Returns nil if no such +// watch exists. Note that the result returned by this method only remains valid +// if the inotify instance owning the watch is locked, preventing modification +// of the returned watch and preventing the replacement of the watch by another +// one from the same instance (since there may be at most one watch per +// instance, per target). +func (w *Watches) Lookup(id uint64) *Watch { + w.mu.Lock() + defer w.mu.Unlock() + return w.ws[id] +} + +// Add adds watch into this set of watches. The watch being added must be unique +// - its ID() should not collide with any existing watches. +func (w *Watches) Add(watch *Watch) { + w.mu.Lock() + defer w.mu.Unlock() + + // Sanity check, the new watch shouldn't collide with an existing + // watch. Silently replacing an existing watch would result in a ref leak on + // this inode. We could handle this collision by calling Unpin() on the + // existing watch, but then we end up leaking watch descriptor ids at the + // inotify level. + if _, exists := w.ws[watch.ID()]; exists { + panic(fmt.Sprintf("Watch collision with ID %+v", watch.ID())) + } + if w.ws == nil { + w.ws = make(map[uint64]*Watch) + } + w.ws[watch.ID()] = watch +} + +// Remove removes a watch with the given id from this set of watches. The caller +// is responsible for generating any watch removal event, as appropriate. The +// provided id must match an existing watch in this collection. +func (w *Watches) Remove(id uint64) { + w.mu.Lock() + defer w.mu.Unlock() + + if w.ws == nil { + // This watch set is being destroyed. The thread executing the + // destructor is already in the process of deleting all our watches. We + // got here with no refs on the inode because we raced with the + // destructor notifying all the watch owners of the inode's destruction. + // See the comment in Watches.TargetDestroyed for why this race exists. + return + } + + watch, ok := w.ws[id] + if !ok { + // While there's technically no problem with silently ignoring a missing + // watch, this is almost certainly a bug. + panic(fmt.Sprintf("Attempt to remove a watch, but no watch found with provided id %+v.", id)) + } + delete(w.ws, watch.ID()) +} + +// Notify queues a new event with all watches in this set. +func (w *Watches) Notify(name string, events, cookie uint32) { + // N.B. We don't defer the unlocks because Notify is in the hot path of + // all IO operations, and the defer costs too much for small IO + // operations. + w.mu.RLock() + for _, watch := range w.ws { + if name != "" && w.unlinked && !watch.NotifyParentAfterUnlink() { + // IN_EXCL_UNLINK - By default, when watching events on the children + // of a directory, events are generated for children even after they + // have been unlinked from the directory. This can result in large + // numbers of uninteresting events for some applications (e.g., if + // watching /tmp, in which many applications create temporary files + // whose names are immediately unlinked). Specifying IN_EXCL_UNLINK + // changes the default behavior, so that events are not generated + // for children after they have been unlinked from the watched + // directory. -- inotify(7) + // + // We know we're dealing with events for a parent when the name + // isn't empty. + continue + } + watch.Notify(name, events, cookie) + } + w.mu.RUnlock() +} + +// Unpin unpins dirent from all watches in this set. +func (w *Watches) Unpin(d *Dirent) { + w.mu.RLock() + defer w.mu.RUnlock() + for _, watch := range w.ws { + watch.Unpin(d) + } +} + +// targetDestroyed is called by the inode destructor to notify the watch owners +// of the impending destruction of the watch target. +func (w *Watches) targetDestroyed() { + var ws map[uint64]*Watch + + // We can't hold w.mu while calling watch.TargetDestroyed to preserve lock + // ordering w.r.t to the owner inotify instances. Instead, atomically move + // the watches map into a local variable so we can iterate over it safely. + // + // Because of this however, it is possible for the watches' owners to reach + // this inode while the inode has no refs. This is still safe because the + // owners can only reach the inode until this function finishes calling + // watch.TargetDestroyed() below and the inode is guaranteed to exist in the + // meanwhile. But we still have to be very careful not to rely on inode + // state that may have been already destroyed. + w.mu.Lock() + ws = w.ws + w.ws = nil + w.mu.Unlock() + + for _, watch := range ws { + watch.TargetDestroyed() + } +} |