diff options
-rw-r--r-- | pkg/p9/handlers.go | 6 | ||||
-rw-r--r-- | pkg/p9/path_tree.go | 211 | ||||
-rw-r--r-- | pkg/p9/server.go | 55 | ||||
-rwxr-xr-x | pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo.go | 2 | ||||
-rwxr-xr-x | pkg/sentry/time/seqatomic_parameters.go | 2 |
5 files changed, 193 insertions, 83 deletions
diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go index 9e622227b..999b4f684 100644 --- a/pkg/p9/handlers.go +++ b/pkg/p9/handlers.go @@ -224,7 +224,7 @@ func (t *Tattach) handle(cs *connState) message { file: sf, refs: 1, mode: attr.Mode.FileType(), - pathNode: &cs.server.pathTree, + pathNode: cs.server.pathTree, } defer root.DecRef() @@ -552,8 +552,8 @@ func (t *Tunlinkat) handle(cs *connState) message { // since we always acquire deeper in the hierarchy, we know // that we are free of lock cycles. childPathNode := ref.pathNode.pathNodeFor(t.Name) - childPathNode.mu.Lock() - defer childPathNode.mu.Unlock() + childPathNode.opMu.Lock() + defer childPathNode.opMu.Unlock() // Do the unlink. err = ref.file.UnlinkAt(t.Name, t.Flags) diff --git a/pkg/p9/path_tree.go b/pkg/p9/path_tree.go index 5a209b291..865459411 100644 --- a/pkg/p9/path_tree.go +++ b/pkg/p9/path_tree.go @@ -22,93 +22,200 @@ import ( // pathNode is a single node in a path traversal. // // These are shared by all fidRefs that point to the same path. +// +// Lock ordering: +// opMu +// childMu +// +// Two different pathNodes may only be locked if Server.renameMu is held for +// write, in which case they can be acquired in any order. type pathNode struct { - // fidRefName is a map[*fidRef]string mapping child fidRefs to their - // path component name. - fidRefNames sync.Map + // opMu synchronizes high-level, sematic operations, such as the + // simultaneous creation and deletion of a file. + // + // opMu does not directly protect any fields in pathNode. + opMu sync.RWMutex - // childNodes is a map[string]*pathNode mapping child path component - // names to their pathNode. - childNodes sync.Map + // childMu protects the fields below. + childMu sync.RWMutex - // mu does *not* protect the fields above. - // - // They are not synchronized because we allow certain operations (file - // walk) to proceed without having to acquire a write lock. mu exists - // to synchronize high-level, semantic operations, such as the - // simultaneous creation and deletion of a file. - mu sync.RWMutex + // childNodes maps child path component names to their pathNode. + childNodes map[string]*pathNode + + // childRefs maps child path component names to all of the their + // references. + childRefs map[string]map[*fidRef]struct{} + + // childRefNames maps child references back to their path component + // name. + childRefNames map[*fidRef]string +} + +func newPathNode() *pathNode { + return &pathNode{ + childNodes: make(map[string]*pathNode), + childRefs: make(map[string]map[*fidRef]struct{}), + childRefNames: make(map[*fidRef]string), + } +} + +// forEachChildRef calls fn for each child reference. +func (p *pathNode) forEachChildRef(fn func(ref *fidRef, name string)) { + p.childMu.RLock() + defer p.childMu.RUnlock() + + for name, m := range p.childRefs { + for ref := range m { + fn(ref, name) + } + } +} + +// forEachChildNode calls fn for each child pathNode. +func (p *pathNode) forEachChildNode(fn func(pn *pathNode)) { + p.childMu.RLock() + defer p.childMu.RUnlock() + + for _, pn := range p.childNodes { + fn(pn) + } } // pathNodeFor returns the path node for the given name, or a new one. -// -// Precondition: This call is synchronized w.r.t. other adding or removing of -// children. func (p *pathNode) pathNodeFor(name string) *pathNode { - // Load the existing path node. - if pn, ok := p.childNodes.Load(name); ok { - return pn.(*pathNode) + p.childMu.RLock() + // Fast path, node already exists. + if pn, ok := p.childNodes[name]; ok { + p.childMu.RUnlock() + return pn + } + p.childMu.RUnlock() + + // Slow path, create a new pathNode for shared use. + p.childMu.Lock() + + // Re-check after re-lock. + if pn, ok := p.childNodes[name]; ok { + p.childMu.Unlock() + return pn } - // Create a new pathNode for shared use. - pn, _ := p.childNodes.LoadOrStore(name, new(pathNode)) - return pn.(*pathNode) + pn := newPathNode() + p.childNodes[name] = pn + p.childMu.Unlock() + return pn } // nameFor returns the name for the given fidRef. // // Precondition: addChild is called for ref before nameFor. func (p *pathNode) nameFor(ref *fidRef) string { - if s, ok := p.fidRefNames.Load(ref); ok { - return s.(string) + p.childMu.RLock() + n, ok := p.childRefNames[ref] + p.childMu.RUnlock() + + if !ok { + // This should not happen, don't proceed. + panic(fmt.Sprintf("expected name for %+v, none found", ref)) } - // This should not happen, don't proceed. - panic(fmt.Sprintf("expected name for %+v, none found", ref)) + return n } -// addChild adds a child to p. +// addChildLocked adds a child reference to p. // -// This applies only to an individual fidRef. -// -// Precondition: ref is added only once unless it is removed before adding with -// a new name. -func (p *pathNode) addChild(ref *fidRef, name string) { - if s, ok := p.fidRefNames.Load(ref); ok { +// Precondition: As addChild, plus childMu is locked for write. +func (p *pathNode) addChildLocked(ref *fidRef, name string) { + if n, ok := p.childRefNames[ref]; ok { // This should not happen, don't proceed. - panic(fmt.Sprintf("unexpected fidRef %+v with path %q, wanted %q", ref, s, name)) + panic(fmt.Sprintf("unexpected fidRef %+v with path %q, wanted %q", ref, n, name)) } - p.fidRefNames.Store(ref, name) + p.childRefNames[ref] = name + + m, ok := p.childRefs[name] + if !ok { + m = make(map[*fidRef]struct{}) + p.childRefs[name] = m + } + + m[ref] = struct{}{} +} + +// addChild adds a child reference to p. +// +// Precondition: ref may only be added once at a time. +func (p *pathNode) addChild(ref *fidRef, name string) { + p.childMu.Lock() + p.addChildLocked(ref, name) + p.childMu.Unlock() } // removeChild removes the given child. // -// This applies only to an individual fidRef. +// This applies only to an individual fidRef, which is not required to exist. func (p *pathNode) removeChild(ref *fidRef) { - p.fidRefNames.Delete(ref) + p.childMu.Lock() + + // This ref may not exist anymore. This can occur, e.g., in unlink, + // where a removeWithName removes the ref, and then a DecRef on the ref + // attempts to remove again. + if name, ok := p.childRefNames[ref]; ok { + m, ok := p.childRefs[name] + if !ok { + // This should not happen, don't proceed. + p.childMu.Unlock() + panic(fmt.Sprintf("name %s missing from childfidRefs", name)) + } + + delete(m, ref) + if len(m) == 0 { + delete(p.childRefs, name) + } + } + + delete(p.childRefNames, ref) + + p.childMu.Unlock() } -// removeWithName removes all references with the given name. +// addPathNodeFor adds an existing pathNode as the node for name. // -// The original pathNode is returned by this function, and removed from this -// pathNode. Any operations on the removed tree must use this value. +// Preconditions: newName does not exist. +func (p *pathNode) addPathNodeFor(name string, pn *pathNode) { + p.childMu.Lock() + + if opn, ok := p.childNodes[name]; ok { + p.childMu.Unlock() + panic(fmt.Sprintf("unexpected pathNode %+v with path %q", opn, name)) + } + + p.childNodes[name] = pn + p.childMu.Unlock() +} + +// removeWithName removes all references with the given name. // -// The provided function is executed after removal. +// The provided function is executed after reference removal. The only method +// it may (transitively) call on this pathNode is addChildLocked. // -// Precondition: This call is synchronized w.r.t. other adding or removing of -// children. +// If a child pathNode for name exists, it is removed from this pathNode and +// returned by this function. Any operations on the removed tree must use this +// value. func (p *pathNode) removeWithName(name string, fn func(ref *fidRef)) *pathNode { - p.fidRefNames.Range(func(key, value interface{}) bool { - if value.(string) == name { - p.fidRefNames.Delete(key) - fn(key.(*fidRef)) + p.childMu.Lock() + defer p.childMu.Unlock() + + if m, ok := p.childRefs[name]; ok { + for ref := range m { + delete(m, ref) + delete(p.childRefNames, ref) + fn(ref) } - return true - }) + } - // Return the original path node. - origPathNode := p.pathNodeFor(name) - p.childNodes.Delete(name) + // Return the original path node, if it exists. + origPathNode := p.childNodes[name] + delete(p.childNodes, name) return origPathNode } diff --git a/pkg/p9/server.go b/pkg/p9/server.go index c56b8e439..b294efbb0 100644 --- a/pkg/p9/server.go +++ b/pkg/p9/server.go @@ -35,7 +35,7 @@ type Server struct { // These may be across different connections, but rename operations // must be serialized globally for safely. There is a single pathTree // for the entire server, and not per connection. - pathTree pathNode + pathTree *pathNode // renameMu is a global lock protecting rename operations. With this // lock, we can be certain that any given rename operation can safely @@ -49,6 +49,7 @@ type Server struct { func NewServer(attacher Attacher) *Server { return &Server{ attacher: attacher, + pathTree: newPathNode(), } } @@ -204,16 +205,13 @@ func (f *fidRef) maybeParent() *fidRef { // Precondition: this must be called via safelyWrite or safelyGlobal. func notifyDelete(pn *pathNode) { // Call on all local references. - pn.fidRefNames.Range(func(key, _ interface{}) bool { - ref := key.(*fidRef) + pn.forEachChildRef(func(ref *fidRef, _ string) { atomic.StoreUint32(&ref.deleted, 1) - return true }) // Call on all subtrees. - pn.childNodes.Range(func(_, value interface{}) bool { - notifyDelete(value.(*pathNode)) - return true + pn.forEachChildNode(func(pn *pathNode) { + notifyDelete(pn) }) } @@ -225,8 +223,10 @@ func (f *fidRef) markChildDeleted(name string) { atomic.StoreUint32(&ref.deleted, 1) }) - // Mark everything below as deleted. - notifyDelete(origPathNode) + if origPathNode != nil { + // Mark all children as deleted. + notifyDelete(origPathNode) + } } // notifyNameChange calls the relevant Renamed method on all nodes in the path, @@ -236,17 +236,13 @@ func (f *fidRef) markChildDeleted(name string) { // Precondition: this must be called via safelyGlobal. func notifyNameChange(pn *pathNode) { // Call on all local references. - pn.fidRefNames.Range(func(key, value interface{}) bool { - ref := key.(*fidRef) - name := value.(string) + pn.forEachChildRef(func(ref *fidRef, name string) { ref.file.Renamed(ref.parent.file, name) - return true }) // Call on all subtrees. - pn.childNodes.Range(func(name, value interface{}) bool { - notifyNameChange(value.(*pathNode)) - return true + pn.forEachChildNode(func(pn *pathNode) { + notifyNameChange(pn) }) } @@ -256,18 +252,25 @@ func notifyNameChange(pn *pathNode) { func (f *fidRef) renameChildTo(oldName string, target *fidRef, newName string) { target.markChildDeleted(newName) origPathNode := f.pathNode.removeWithName(oldName, func(ref *fidRef) { + // N.B. DecRef can take f.pathNode's parent's childMu. This is + // allowed because renameMu is held for write via safelyGlobal. ref.parent.DecRef() // Drop original reference. ref.parent = target // Change parent. ref.parent.IncRef() // Acquire new one. - target.pathNode.addChild(ref, newName) + if f.pathNode == target.pathNode { + target.pathNode.addChildLocked(ref, newName) + } else { + target.pathNode.addChild(ref, newName) + } ref.file.Renamed(target.file, newName) }) - // Replace the previous (now deleted) path node. - target.pathNode.childNodes.Store(newName, origPathNode) - - // Call Renamed on everything above. - notifyNameChange(origPathNode) + if origPathNode != nil { + // Replace the previous (now deleted) path node. + target.pathNode.addPathNodeFor(newName, origPathNode) + // Call Renamed on all children. + notifyNameChange(origPathNode) + } } // safelyRead executes the given operation with the local path node locked. @@ -275,8 +278,8 @@ func (f *fidRef) renameChildTo(oldName string, target *fidRef, newName string) { func (f *fidRef) safelyRead(fn func() error) (err error) { f.server.renameMu.RLock() defer f.server.renameMu.RUnlock() - f.pathNode.mu.RLock() - defer f.pathNode.mu.RUnlock() + f.pathNode.opMu.RLock() + defer f.pathNode.opMu.RUnlock() return fn() } @@ -285,8 +288,8 @@ func (f *fidRef) safelyRead(fn func() error) (err error) { func (f *fidRef) safelyWrite(fn func() error) (err error) { f.server.renameMu.RLock() defer f.server.renameMu.RUnlock() - f.pathNode.mu.Lock() - defer f.pathNode.mu.Unlock() + f.pathNode.opMu.Lock() + defer f.pathNode.opMu.Unlock() return fn() } diff --git a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo.go b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo.go index 24528b66a..895abb129 100755 --- a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo.go +++ b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo.go @@ -1,12 +1,12 @@ package kernel import ( - "strings" "unsafe" "fmt" "gvisor.dev/gvisor/third_party/gvsync" "reflect" + "strings" ) // SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race diff --git a/pkg/sentry/time/seqatomic_parameters.go b/pkg/sentry/time/seqatomic_parameters.go index fb30a22c6..f6560d0bb 100755 --- a/pkg/sentry/time/seqatomic_parameters.go +++ b/pkg/sentry/time/seqatomic_parameters.go @@ -1,12 +1,12 @@ package time import ( - "strings" "unsafe" "fmt" "gvisor.dev/gvisor/third_party/gvsync" "reflect" + "strings" ) // SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race |