summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/fspath/BUILD2
-rw-r--r--pkg/fspath/fspath.go24
-rw-r--r--pkg/fspath/fspath_test.go25
-rw-r--r--pkg/sentry/fsimpl/ext/BUILD1
-rw-r--r--pkg/sentry/fsimpl/ext/benchmark/BUILD1
-rw-r--r--pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go11
-rw-r--r--pkg/sentry/fsimpl/ext/ext_test.go9
-rw-r--r--pkg/sentry/fsimpl/ext/filesystem.go12
-rw-r--r--pkg/sentry/fsimpl/kernfs/BUILD1
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go138
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs.go12
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs_test.go7
-rw-r--r--pkg/sentry/fsimpl/memfs/BUILD2
-rw-r--r--pkg/sentry/fsimpl/memfs/benchmark_test.go27
-rw-r--r--pkg/sentry/fsimpl/memfs/filesystem.go667
-rw-r--r--pkg/sentry/fsimpl/memfs/memfs.go29
-rw-r--r--pkg/sentry/fsimpl/memfs/pipe_test.go18
-rw-r--r--pkg/sentry/platform/ring0/entry_arm64.s4
-rw-r--r--pkg/sentry/platform/ring0/lib_arm64.s7
-rw-r--r--pkg/sentry/vfs/dentry.go29
-rw-r--r--pkg/sentry/vfs/file_description.go19
-rw-r--r--pkg/sentry/vfs/filesystem.go251
-rw-r--r--pkg/sentry/vfs/options.go3
-rw-r--r--pkg/sentry/vfs/resolving_path.go46
-rw-r--r--pkg/sentry/vfs/testutil.go7
-rw-r--r--pkg/sentry/vfs/vfs.go259
-rw-r--r--pkg/syserror/syserror.go1
-rw-r--r--pkg/tcpip/stack/ndp.go35
-rw-r--r--pkg/tcpip/stack/ndp_test.go283
-rw-r--r--pkg/tcpip/stack/nic.go12
-rw-r--r--pkg/tcpip/stack/stack.go22
31 files changed, 1408 insertions, 556 deletions
diff --git a/pkg/fspath/BUILD b/pkg/fspath/BUILD
index 0c5f50397..ca540363c 100644
--- a/pkg/fspath/BUILD
+++ b/pkg/fspath/BUILD
@@ -14,7 +14,6 @@ go_library(
"fspath.go",
],
importpath = "gvisor.dev/gvisor/pkg/fspath",
- deps = ["//pkg/syserror"],
)
go_test(
@@ -25,5 +24,4 @@ go_test(
"fspath_test.go",
],
embed = [":fspath"],
- deps = ["//pkg/syserror"],
)
diff --git a/pkg/fspath/fspath.go b/pkg/fspath/fspath.go
index f68752560..9fb3fee24 100644
--- a/pkg/fspath/fspath.go
+++ b/pkg/fspath/fspath.go
@@ -18,19 +18,17 @@ package fspath
import (
"strings"
-
- "gvisor.dev/gvisor/pkg/syserror"
)
const pathSep = '/'
-// Parse parses a pathname as described by path_resolution(7).
-func Parse(pathname string) (Path, error) {
+// Parse parses a pathname as described by path_resolution(7), except that
+// empty pathnames will be parsed successfully to a Path for which
+// Path.Absolute == Path.Dir == Path.HasComponents() == false. (This is
+// necessary to support AT_EMPTY_PATH.)
+func Parse(pathname string) Path {
if len(pathname) == 0 {
- // "... POSIX decrees that an empty pathname must not be resolved
- // successfully. Linux returns ENOENT in this case." -
- // path_resolution(7)
- return Path{}, syserror.ENOENT
+ return Path{}
}
// Skip leading path separators.
i := 0
@@ -41,7 +39,7 @@ func Parse(pathname string) (Path, error) {
return Path{
Absolute: true,
Dir: true,
- }, nil
+ }
}
}
// Skip trailing path separators. This is required by Iterator.Next. This
@@ -64,7 +62,7 @@ func Parse(pathname string) (Path, error) {
},
Absolute: i != 0,
Dir: j != len(pathname)-1,
- }, nil
+ }
}
// Path contains the information contained in a pathname string.
@@ -111,6 +109,12 @@ func (p Path) String() string {
return b.String()
}
+// HasComponents returns true if p contains a non-zero number of path
+// components.
+func (p Path) HasComponents() bool {
+ return p.Begin.Ok()
+}
+
// An Iterator represents either a path component in a Path or a terminal
// iterator indicating that the end of the path has been reached.
//
diff --git a/pkg/fspath/fspath_test.go b/pkg/fspath/fspath_test.go
index 215b35622..d5e9a549a 100644
--- a/pkg/fspath/fspath_test.go
+++ b/pkg/fspath/fspath_test.go
@@ -18,15 +18,10 @@ import (
"reflect"
"strings"
"testing"
-
- "gvisor.dev/gvisor/pkg/syserror"
)
func TestParseIteratorPartialPathnames(t *testing.T) {
- path, err := Parse("/foo//bar///baz////")
- if err != nil {
- t.Fatalf("Parse failed: %v", err)
- }
+ path := Parse("/foo//bar///baz////")
// Parse strips leading slashes, and records their presence as
// Path.Absolute.
if !path.Absolute {
@@ -71,6 +66,12 @@ func TestParse(t *testing.T) {
}
tests := []testCase{
{
+ pathname: "",
+ relpath: []string{},
+ abs: false,
+ dir: false,
+ },
+ {
pathname: "/",
relpath: []string{},
abs: true,
@@ -113,10 +114,7 @@ func TestParse(t *testing.T) {
for _, test := range tests {
t.Run(test.pathname, func(t *testing.T) {
- p, err := Parse(test.pathname)
- if err != nil {
- t.Fatalf("failed to parse pathname %q: %v", test.pathname, err)
- }
+ p := Parse(test.pathname)
t.Logf("pathname %q => path %q", test.pathname, p)
if p.Absolute != test.abs {
t.Errorf("path absoluteness: got %v, wanted %v", p.Absolute, test.abs)
@@ -134,10 +132,3 @@ func TestParse(t *testing.T) {
})
}
}
-
-func TestParseEmptyPathname(t *testing.T) {
- p, err := Parse("")
- if err != syserror.ENOENT {
- t.Errorf("parsing empty pathname: got (%v, %v), wanted (<unspecified>, ENOENT)", p, err)
- }
-}
diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD
index 880b7bcd3..bc90330bc 100644
--- a/pkg/sentry/fsimpl/ext/BUILD
+++ b/pkg/sentry/fsimpl/ext/BUILD
@@ -74,6 +74,7 @@ go_test(
deps = [
"//pkg/abi/linux",
"//pkg/binary",
+ "//pkg/fspath",
"//pkg/sentry/context",
"//pkg/sentry/context/contexttest",
"//pkg/sentry/fsimpl/ext/disklayout",
diff --git a/pkg/sentry/fsimpl/ext/benchmark/BUILD b/pkg/sentry/fsimpl/ext/benchmark/BUILD
index bfc46dfa6..4fc8296ef 100644
--- a/pkg/sentry/fsimpl/ext/benchmark/BUILD
+++ b/pkg/sentry/fsimpl/ext/benchmark/BUILD
@@ -7,6 +7,7 @@ go_test(
size = "small",
srcs = ["benchmark_test.go"],
deps = [
+ "//pkg/fspath",
"//pkg/sentry/context",
"//pkg/sentry/context/contexttest",
"//pkg/sentry/fsimpl/ext",
diff --git a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
index 177ce2cb9..2f46d2d13 100644
--- a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
+++ b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
@@ -24,6 +24,7 @@ import (
"strings"
"testing"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/ext"
@@ -121,7 +122,7 @@ func BenchmarkVFS2Ext4fsStat(b *testing.B) {
stat, err := vfsfs.StatAt(ctx, creds, &vfs.PathOperation{
Root: *root,
Start: *root,
- Pathname: filePath,
+ Path: fspath.Parse(filePath),
FollowFinalSymlink: true,
}, &vfs.StatOptions{})
if err != nil {
@@ -150,9 +151,9 @@ func BenchmarkVFS2ExtfsMountStat(b *testing.B) {
creds := auth.CredentialsFromContext(ctx)
mountPointName := "/1/"
pop := vfs.PathOperation{
- Root: *root,
- Start: *root,
- Pathname: mountPointName,
+ Root: *root,
+ Start: *root,
+ Path: fspath.Parse(mountPointName),
}
// Save the mount point for later use.
@@ -181,7 +182,7 @@ func BenchmarkVFS2ExtfsMountStat(b *testing.B) {
stat, err := vfsfs.StatAt(ctx, creds, &vfs.PathOperation{
Root: *root,
Start: *root,
- Pathname: filePath,
+ Path: fspath.Parse(filePath),
FollowFinalSymlink: true,
}, &vfs.StatOptions{})
if err != nil {
diff --git a/pkg/sentry/fsimpl/ext/ext_test.go b/pkg/sentry/fsimpl/ext/ext_test.go
index e9f756732..5d6c999bd 100644
--- a/pkg/sentry/fsimpl/ext/ext_test.go
+++ b/pkg/sentry/fsimpl/ext/ext_test.go
@@ -25,6 +25,7 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
@@ -140,7 +141,7 @@ func TestSeek(t *testing.T) {
fd, err := vfsfs.OpenAt(
ctx,
auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: *root, Start: *root, Pathname: test.path},
+ &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.path)},
&vfs.OpenOptions{},
)
if err != nil {
@@ -359,7 +360,7 @@ func TestStatAt(t *testing.T) {
got, err := vfsfs.StatAt(ctx,
auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: *root, Start: *root, Pathname: test.path},
+ &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.path)},
&vfs.StatOptions{},
)
if err != nil {
@@ -429,7 +430,7 @@ func TestRead(t *testing.T) {
fd, err := vfsfs.OpenAt(
ctx,
auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: *root, Start: *root, Pathname: test.absPath},
+ &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.absPath)},
&vfs.OpenOptions{},
)
if err != nil {
@@ -565,7 +566,7 @@ func TestIterDirents(t *testing.T) {
fd, err := vfsfs.OpenAt(
ctx,
auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: *root, Start: *root, Pathname: test.path},
+ &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.path)},
&vfs.OpenOptions{},
)
if err != nil {
diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go
index d7e87979a..616fc002a 100644
--- a/pkg/sentry/fsimpl/ext/filesystem.go
+++ b/pkg/sentry/fsimpl/ext/filesystem.go
@@ -275,6 +275,16 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op
return vfsd, nil
}
+// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
+func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
+ vfsd, inode, err := fs.walk(rp, true)
+ if err != nil {
+ return nil, err
+ }
+ inode.incRef()
+ return vfsd, nil
+}
+
// OpenAt implements vfs.FilesystemImpl.OpenAt.
func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
vfsd, inode, err := fs.walk(rp, false)
@@ -378,7 +388,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
}
// RenameAt implements vfs.FilesystemImpl.RenameAt.
-func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error {
+func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
if rp.Done() {
return syserror.ENOENT
}
diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD
index 52596c090..59f7f39e2 100644
--- a/pkg/sentry/fsimpl/kernfs/BUILD
+++ b/pkg/sentry/fsimpl/kernfs/BUILD
@@ -49,6 +49,7 @@ go_test(
deps = [
":kernfs",
"//pkg/abi/linux",
+ "//pkg/fspath",
"//pkg/sentry/context",
"//pkg/sentry/context/contexttest",
"//pkg/sentry/kernel/auth",
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 3cbbe4b20..79759e0fc 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -44,39 +44,37 @@ func (fs *Filesystem) stepExistingLocked(ctx context.Context, rp *vfs.ResolvingP
return nil, err
}
afterSymlink:
+ name := rp.Component()
+ // Revalidation must be skipped if name is "." or ".."; d or its parent
+ // respectively can't be expected to transition from invalidated back to
+ // valid, so detecting invalidation and retrying would loop forever. This
+ // is consistent with Linux: fs/namei.c:walk_component() => lookup_fast()
+ // calls d_revalidate(), but walk_component() => handle_dots() does not.
+ if name == "." {
+ rp.Advance()
+ return vfsd, nil
+ }
+ if name == ".." {
+ nextVFSD, err := rp.ResolveParent(vfsd)
+ if err != nil {
+ return nil, err
+ }
+ rp.Advance()
+ return nextVFSD, nil
+ }
d.dirMu.Lock()
- nextVFSD, err := rp.ResolveComponent(vfsd)
- d.dirMu.Unlock()
+ nextVFSD, err := rp.ResolveChild(vfsd, name)
if err != nil {
+ d.dirMu.Unlock()
return nil, err
}
- if nextVFSD != nil {
- // Cached dentry exists, revalidate.
- next := nextVFSD.Impl().(*Dentry)
- if !next.inode.Valid(ctx) {
- d.dirMu.Lock()
- rp.VirtualFilesystem().ForceDeleteDentry(nextVFSD)
- d.dirMu.Unlock()
- fs.deferDecRef(nextVFSD) // Reference from Lookup.
- nextVFSD = nil
- }
- }
- if nextVFSD == nil {
- // Dentry isn't cached; it either doesn't exist or failed
- // revalidation. Attempt to resolve it via Lookup.
- name := rp.Component()
- var err error
- nextVFSD, err = d.inode.Lookup(ctx, name)
- // Reference on nextVFSD dropped by a corresponding Valid.
- if err != nil {
- return nil, err
- }
- d.InsertChild(name, nextVFSD)
+ next, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), d, name, nextVFSD)
+ d.dirMu.Unlock()
+ if err != nil {
+ return nil, err
}
- next := nextVFSD.Impl().(*Dentry)
-
// Resolve any symlink at current path component.
- if rp.ShouldFollowSymlink() && d.isSymlink() {
+ if rp.ShouldFollowSymlink() && next.isSymlink() {
// TODO: VFS2 needs something extra for /proc/[pid]/fd/ "magic symlinks".
target, err := next.inode.Readlink(ctx)
if err != nil {
@@ -89,7 +87,44 @@ afterSymlink:
}
rp.Advance()
- return nextVFSD, nil
+ return &next.vfsd, nil
+}
+
+// revalidateChildLocked must be called after a call to parent.vfsd.Child(name)
+// or vfs.ResolvingPath.ResolveChild(name) returns childVFSD (which may be
+// nil) to verify that the returned child (or lack thereof) is correct.
+//
+// Preconditions: Filesystem.mu must be locked for at least reading.
+// parent.dirMu must be locked. parent.isDir(). name is not "." or "..".
+//
+// Postconditions: Caller must call fs.processDeferredDecRefs*.
+func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *Dentry, name string, childVFSD *vfs.Dentry) (*Dentry, error) {
+ if childVFSD != nil {
+ // Cached dentry exists, revalidate.
+ child := childVFSD.Impl().(*Dentry)
+ if !child.inode.Valid(ctx) {
+ vfsObj.ForceDeleteDentry(childVFSD)
+ fs.deferDecRef(childVFSD) // Reference from Lookup.
+ childVFSD = nil
+ }
+ }
+ if childVFSD == nil {
+ // Dentry isn't cached; it either doesn't exist or failed
+ // revalidation. Attempt to resolve it via Lookup.
+ //
+ // FIXME(b/144498111): Inode.Lookup() should return *(kernfs.)Dentry,
+ // not *vfs.Dentry, since (kernfs.)Filesystem assumes that all dentries
+ // in the filesystem are (kernfs.)Dentry and performs vfs.DentryImpl
+ // casts accordingly.
+ var err error
+ childVFSD, err = parent.inode.Lookup(ctx, name)
+ if err != nil {
+ return nil, err
+ }
+ // Reference on childVFSD dropped by a corresponding Valid.
+ parent.insertChildLocked(name, childVFSD)
+ }
+ return childVFSD.Impl().(*Dentry), nil
}
// walkExistingLocked resolves rp to an existing file.
@@ -242,6 +277,19 @@ func (fs *Filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op
return vfsd, nil
}
+// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
+func (fs *Filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
+ fs.mu.RLock()
+ defer fs.processDeferredDecRefs()
+ defer fs.mu.RUnlock()
+ vfsd, _, err := fs.walkParentDirLocked(ctx, rp)
+ if err != nil {
+ return nil, err
+ }
+ vfsd.IncRef() // Ownership transferred to caller.
+ return vfsd, nil
+}
+
// LinkAt implements vfs.FilesystemImpl.LinkAt.
func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
if rp.Done() {
@@ -459,40 +507,42 @@ func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st
}
// RenameAt implements vfs.FilesystemImpl.RenameAt.
-func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error {
- noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0
- exchange := opts.Flags&linux.RENAME_EXCHANGE != 0
- whiteout := opts.Flags&linux.RENAME_WHITEOUT != 0
- if exchange && (noReplace || whiteout) {
- // Can't specify RENAME_NOREPLACE or RENAME_WHITEOUT with RENAME_EXCHANGE.
- return syserror.EINVAL
- }
- if exchange || whiteout {
- // Exchange and Whiteout flags are not supported on kernfs.
+func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
+ // Only RENAME_NOREPLACE is supported.
+ if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
return syserror.EINVAL
}
+ noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0
fs.mu.Lock()
defer fs.mu.Lock()
+ // Resolve the destination directory first to verify that it's on this
+ // Mount.
+ dstDirVFSD, dstDirInode, err := fs.walkParentDirLocked(ctx, rp)
+ fs.processDeferredDecRefsLocked()
+ if err != nil {
+ return err
+ }
mnt := rp.Mount()
- if mnt != vd.Mount() {
+ if mnt != oldParentVD.Mount() {
return syserror.EXDEV
}
-
if err := mnt.CheckBeginWrite(); err != nil {
return err
}
defer mnt.EndWrite()
- dstDirVFSD, dstDirInode, err := fs.walkParentDirLocked(ctx, rp)
+ srcDirVFSD := oldParentVD.Dentry()
+ srcDir := srcDirVFSD.Impl().(*Dentry)
+ srcDir.dirMu.Lock()
+ src, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), srcDir, oldName, srcDirVFSD.Child(oldName))
+ srcDir.dirMu.Unlock()
fs.processDeferredDecRefsLocked()
if err != nil {
return err
}
-
- srcVFSD := vd.Dentry()
- srcDirVFSD := srcVFSD.Parent()
+ srcVFSD := &src.vfsd
// Can we remove the src dentry?
if err := checkDeleteLocked(rp, srcVFSD); err != nil {
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index bb01c3d01..ac802218d 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -239,14 +239,22 @@ func (d *Dentry) destroy() {
//
// Precondition: d must represent a directory inode.
func (d *Dentry) InsertChild(name string, child *vfs.Dentry) {
+ d.dirMu.Lock()
+ d.insertChildLocked(name, child)
+ d.dirMu.Unlock()
+}
+
+// insertChildLocked is equivalent to InsertChild, with additional
+// preconditions.
+//
+// Precondition: d.dirMu must be locked.
+func (d *Dentry) insertChildLocked(name string, child *vfs.Dentry) {
if !d.isDir() {
panic(fmt.Sprintf("InsertChild called on non-directory Dentry: %+v.", d))
}
vfsDentry := d.VFSDentry()
vfsDentry.IncRef() // DecRef in child's Dentry.destroy.
- d.dirMu.Lock()
vfsDentry.InsertChild(child, name)
- d.dirMu.Unlock()
}
// The Inode interface maps filesystem-level operations that operate on paths to
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
index f78bb7b04..73b6e43b5 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
@@ -24,6 +24,7 @@ import (
"github.com/google/go-cmp/cmp"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
@@ -82,9 +83,9 @@ func newTestSystem(t *testing.T, rootFn RootDentryFn) *TestSystem {
// Precondition: path should be relative path.
func (s *TestSystem) PathOpAtRoot(path string) vfs.PathOperation {
return vfs.PathOperation{
- Root: s.root,
- Start: s.root,
- Pathname: path,
+ Root: s.root,
+ Start: s.root,
+ Path: fspath.Parse(path),
}
}
diff --git a/pkg/sentry/fsimpl/memfs/BUILD b/pkg/sentry/fsimpl/memfs/BUILD
index 0cc751eb8..5689bed3b 100644
--- a/pkg/sentry/fsimpl/memfs/BUILD
+++ b/pkg/sentry/fsimpl/memfs/BUILD
@@ -50,6 +50,7 @@ go_test(
deps = [
":memfs",
"//pkg/abi/linux",
+ "//pkg/fspath",
"//pkg/refs",
"//pkg/sentry/context",
"//pkg/sentry/context/contexttest",
@@ -68,6 +69,7 @@ go_test(
embed = [":memfs"],
deps = [
"//pkg/abi/linux",
+ "//pkg/fspath",
"//pkg/sentry/context",
"//pkg/sentry/context/contexttest",
"//pkg/sentry/kernel/auth",
diff --git a/pkg/sentry/fsimpl/memfs/benchmark_test.go b/pkg/sentry/fsimpl/memfs/benchmark_test.go
index 4a7a94a52..6e987af88 100644
--- a/pkg/sentry/fsimpl/memfs/benchmark_test.go
+++ b/pkg/sentry/fsimpl/memfs/benchmark_test.go
@@ -21,6 +21,7 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
@@ -193,9 +194,9 @@ func BenchmarkVFS2MemfsStat(b *testing.B) {
for i := depth; i > 0; i-- {
name := fmt.Sprintf("%d", i)
pop := vfs.PathOperation{
- Root: root,
- Start: vd,
- Pathname: name,
+ Root: root,
+ Start: vd,
+ Path: fspath.Parse(name),
}
if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
Mode: 0755,
@@ -216,7 +217,7 @@ func BenchmarkVFS2MemfsStat(b *testing.B) {
fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
Root: root,
Start: vd,
- Pathname: filename,
+ Path: fspath.Parse(filename),
FollowFinalSymlink: true,
}, &vfs.OpenOptions{
Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
@@ -237,7 +238,7 @@ func BenchmarkVFS2MemfsStat(b *testing.B) {
stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
Root: root,
Start: root,
- Pathname: filePath,
+ Path: fspath.Parse(filePath),
FollowFinalSymlink: true,
}, &vfs.StatOptions{})
if err != nil {
@@ -378,9 +379,9 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) {
root := mntns.Root()
defer root.DecRef()
pop := vfs.PathOperation{
- Root: root,
- Start: root,
- Pathname: mountPointName,
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(mountPointName),
}
if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
Mode: 0755,
@@ -408,9 +409,9 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) {
for i := depth; i > 0; i-- {
name := fmt.Sprintf("%d", i)
pop := vfs.PathOperation{
- Root: root,
- Start: vd,
- Pathname: name,
+ Root: root,
+ Start: vd,
+ Path: fspath.Parse(name),
}
if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
Mode: 0755,
@@ -438,7 +439,7 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) {
fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
Root: root,
Start: vd,
- Pathname: filename,
+ Path: fspath.Parse(filename),
FollowFinalSymlink: true,
}, &vfs.OpenOptions{
Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
@@ -458,7 +459,7 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) {
stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
Root: root,
Start: root,
- Pathname: filePath,
+ Path: fspath.Parse(filePath),
FollowFinalSymlink: true,
}, &vfs.StatOptions{})
if err != nil {
diff --git a/pkg/sentry/fsimpl/memfs/filesystem.go b/pkg/sentry/fsimpl/memfs/filesystem.go
index af4389459..4a83f310c 100644
--- a/pkg/sentry/fsimpl/memfs/filesystem.go
+++ b/pkg/sentry/fsimpl/memfs/filesystem.go
@@ -25,323 +25,283 @@ import (
"gvisor.dev/gvisor/pkg/syserror"
)
-// stepLocked resolves rp.Component() in parent directory vfsd.
+// Sync implements vfs.FilesystemImpl.Sync.
+func (fs *filesystem) Sync(ctx context.Context) error {
+ // All filesystem state is in-memory.
+ return nil
+}
+
+// stepLocked resolves rp.Component() to an existing file, starting from the
+// given directory.
//
// stepLocked is loosely analogous to fs/namei.c:walk_component().
//
-// Preconditions: filesystem.mu must be locked. !rp.Done(). inode ==
-// vfsd.Impl().(*dentry).inode.
-func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode) (*vfs.Dentry, *inode, error) {
- if !inode.isDir() {
- return nil, nil, syserror.ENOTDIR
+// Preconditions: filesystem.mu must be locked. !rp.Done().
+func stepLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) {
+ if !d.inode.isDir() {
+ return nil, syserror.ENOTDIR
}
- if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
- return nil, nil, err
+ if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
+ return nil, err
}
afterSymlink:
- nextVFSD, err := rp.ResolveComponent(vfsd)
+ nextVFSD, err := rp.ResolveComponent(&d.vfsd)
if err != nil {
- return nil, nil, err
+ return nil, err
}
if nextVFSD == nil {
// Since the Dentry tree is the sole source of truth for memfs, if it's
// not in the Dentry tree, it doesn't exist.
- return nil, nil, syserror.ENOENT
+ return nil, syserror.ENOENT
}
- nextInode := nextVFSD.Impl().(*dentry).inode
- if symlink, ok := nextInode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
+ next := nextVFSD.Impl().(*dentry)
+ if symlink, ok := next.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
// TODO: symlink traversals update access time
if err := rp.HandleSymlink(symlink.target); err != nil {
- return nil, nil, err
+ return nil, err
}
goto afterSymlink // don't check the current directory again
}
rp.Advance()
- return nextVFSD, nextInode, nil
+ return next, nil
}
-// walkExistingLocked resolves rp to an existing file.
+// walkParentDirLocked resolves all but the last path component of rp to an
+// existing directory, starting from the given directory (which is usually
+// rp.Start().Impl().(*dentry)). It does not check that the returned directory
+// is searchable by the provider of rp.
//
-// walkExistingLocked is loosely analogous to Linux's
-// fs/namei.c:path_lookupat().
+// walkParentDirLocked is loosely analogous to Linux's
+// fs/namei.c:path_parentat().
//
-// Preconditions: filesystem.mu must be locked.
-func walkExistingLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *inode, error) {
- vfsd := rp.Start()
- inode := vfsd.Impl().(*dentry).inode
- for !rp.Done() {
- var err error
- vfsd, inode, err = stepLocked(rp, vfsd, inode)
+// Preconditions: filesystem.mu must be locked. !rp.Done().
+func walkParentDirLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) {
+ for !rp.Final() {
+ next, err := stepLocked(rp, d)
if err != nil {
- return nil, nil, err
+ return nil, err
}
+ d = next
}
- if rp.MustBeDir() && !inode.isDir() {
- return nil, nil, syserror.ENOTDIR
+ if !d.inode.isDir() {
+ return nil, syserror.ENOTDIR
}
- return vfsd, inode, nil
+ return d, nil
}
-// walkParentDirLocked resolves all but the last path component of rp to an
-// existing directory. It does not check that the returned directory is
-// searchable by the provider of rp.
+// resolveLocked resolves rp to an existing file.
//
-// walkParentDirLocked is loosely analogous to Linux's
-// fs/namei.c:path_parentat().
+// resolveLocked is loosely analogous to Linux's fs/namei.c:path_lookupat().
//
-// Preconditions: filesystem.mu must be locked. !rp.Done().
-func walkParentDirLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *inode, error) {
- vfsd := rp.Start()
- inode := vfsd.Impl().(*dentry).inode
- for !rp.Final() {
- var err error
- vfsd, inode, err = stepLocked(rp, vfsd, inode)
+// Preconditions: filesystem.mu must be locked.
+func resolveLocked(rp *vfs.ResolvingPath) (*dentry, error) {
+ d := rp.Start().Impl().(*dentry)
+ for !rp.Done() {
+ next, err := stepLocked(rp, d)
if err != nil {
- return nil, nil, err
+ return nil, err
}
+ d = next
}
- if !inode.isDir() {
- return nil, nil, syserror.ENOTDIR
+ if rp.MustBeDir() && !d.inode.isDir() {
+ return nil, syserror.ENOTDIR
}
- return vfsd, inode, nil
+ return d, nil
}
-// checkCreateLocked checks that a file named rp.Component() may be created in
-// directory parentVFSD, then returns rp.Component().
+// doCreateAt checks that creating a file at rp is permitted, then invokes
+// create to do so.
//
-// Preconditions: filesystem.mu must be locked. parentInode ==
-// parentVFSD.Impl().(*dentry).inode. parentInode.isDir() == true.
-func checkCreateLocked(rp *vfs.ResolvingPath, parentVFSD *vfs.Dentry, parentInode *inode) (string, error) {
- if err := parentInode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil {
- return "", err
- }
- pc := rp.Component()
- if pc == "." || pc == ".." {
- return "", syserror.EEXIST
- }
- childVFSD, err := rp.ResolveChild(parentVFSD, pc)
+// doCreateAt is loosely analogous to a conjunction of Linux's
+// fs/namei.c:filename_create() and done_path_create().
+//
+// Preconditions: !rp.Done(). For the final path component in rp,
+// !rp.ShouldFollowSymlink().
+func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string) error) error {
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
if err != nil {
- return "", err
+ return err
}
- if childVFSD != nil {
- return "", syserror.EEXIST
+ if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
+ return err
}
- if parentVFSD.IsDisowned() {
- return "", syserror.ENOENT
+ name := rp.Component()
+ if name == "." || name == ".." {
+ return syserror.EEXIST
}
- return pc, nil
-}
-
-// checkDeleteLocked checks that the file represented by vfsd may be deleted.
-func checkDeleteLocked(vfsd *vfs.Dentry) error {
- parentVFSD := vfsd.Parent()
- if parentVFSD == nil {
- return syserror.EBUSY
+ // Call parent.vfsd.Child() instead of stepLocked() or rp.ResolveChild(),
+ // because if the child exists we want to return EEXIST immediately instead
+ // of attempting symlink/mount traversal.
+ if parent.vfsd.Child(name) != nil {
+ return syserror.EEXIST
}
- if parentVFSD.IsDisowned() {
+ if !dir && rp.MustBeDir() {
return syserror.ENOENT
}
- return nil
+ // In memfs, the only way to cause a dentry to be disowned is by removing
+ // it from the filesystem, so this check is equivalent to checking if
+ // parent has been removed.
+ if parent.vfsd.IsDisowned() {
+ return syserror.ENOENT
+ }
+ mnt := rp.Mount()
+ if err := mnt.CheckBeginWrite(); err != nil {
+ return err
+ }
+ defer mnt.EndWrite()
+ return create(parent, name)
}
// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
fs.mu.RLock()
defer fs.mu.RUnlock()
- vfsd, inode, err := walkExistingLocked(rp)
+ d, err := resolveLocked(rp)
if err != nil {
return nil, err
}
if opts.CheckSearchable {
- if !inode.isDir() {
+ if !d.inode.isDir() {
return nil, syserror.ENOTDIR
}
- if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
+ if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true /* isDir */); err != nil {
return nil, err
}
}
- inode.incRef()
- return vfsd, nil
+ d.IncRef()
+ return &d.vfsd, nil
}
-// LinkAt implements vfs.FilesystemImpl.LinkAt.
-func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
- if rp.Done() {
- return syserror.EEXIST
- }
- fs.mu.Lock()
- defer fs.mu.Unlock()
- parentVFSD, parentInode, err := walkParentDirLocked(rp)
- if err != nil {
- return err
- }
- pc, err := checkCreateLocked(rp, parentVFSD, parentInode)
+// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
+func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
+ fs.mu.RLock()
+ defer fs.mu.RUnlock()
+ d, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
if err != nil {
- return err
- }
- if rp.Mount() != vd.Mount() {
- return syserror.EXDEV
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return err
- }
- defer rp.Mount().EndWrite()
- d := vd.Dentry().Impl().(*dentry)
- if d.inode.isDir() {
- return syserror.EPERM
+ return nil, err
}
- d.inode.incLinksLocked()
- child := fs.newDentry(d.inode)
- parentVFSD.InsertChild(&child.vfsd, pc)
- parentInode.impl.(*directory).childList.PushBack(child)
- return nil
+ d.IncRef()
+ return &d.vfsd, nil
+}
+
+// LinkAt implements vfs.FilesystemImpl.LinkAt.
+func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
+ return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error {
+ if rp.Mount() != vd.Mount() {
+ return syserror.EXDEV
+ }
+ d := vd.Dentry().Impl().(*dentry)
+ if d.inode.isDir() {
+ return syserror.EPERM
+ }
+ if d.inode.nlink == 0 {
+ return syserror.ENOENT
+ }
+ if d.inode.nlink == maxLinks {
+ return syserror.EMLINK
+ }
+ d.inode.incLinksLocked()
+ child := fs.newDentry(d.inode)
+ parent.vfsd.InsertChild(&child.vfsd, name)
+ parent.inode.impl.(*directory).childList.PushBack(child)
+ return nil
+ })
}
// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
- if rp.Done() {
- return syserror.EEXIST
- }
- fs.mu.Lock()
- defer fs.mu.Unlock()
- parentVFSD, parentInode, err := walkParentDirLocked(rp)
- if err != nil {
- return err
- }
- pc, err := checkCreateLocked(rp, parentVFSD, parentInode)
- if err != nil {
- return err
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return err
- }
- defer rp.Mount().EndWrite()
- child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode))
- parentVFSD.InsertChild(&child.vfsd, pc)
- parentInode.impl.(*directory).childList.PushBack(child)
- parentInode.incLinksLocked() // from child's ".."
- return nil
+ return fs.doCreateAt(rp, true /* dir */, func(parent *dentry, name string) error {
+ if parent.inode.nlink == maxLinks {
+ return syserror.EMLINK
+ }
+ parent.inode.incLinksLocked() // from child's ".."
+ child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode))
+ parent.vfsd.InsertChild(&child.vfsd, name)
+ parent.inode.impl.(*directory).childList.PushBack(child)
+ return nil
+ })
}
// MknodAt implements vfs.FilesystemImpl.MknodAt.
func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
- if rp.Done() {
- return syserror.EEXIST
- }
- fs.mu.Lock()
- defer fs.mu.Unlock()
- parentVFSD, parentInode, err := walkParentDirLocked(rp)
- if err != nil {
- return err
- }
- pc, err := checkCreateLocked(rp, parentVFSD, parentInode)
- if err != nil {
- return err
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return err
- }
- defer rp.Mount().EndWrite()
-
- switch opts.Mode.FileType() {
- case 0:
- // "Zero file type is equivalent to type S_IFREG." - mknod(2)
- fallthrough
- case linux.ModeRegular:
- // TODO(b/138862511): Implement.
- return syserror.EINVAL
-
- case linux.ModeNamedPipe:
- child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode))
- parentVFSD.InsertChild(&child.vfsd, pc)
- parentInode.impl.(*directory).childList.PushBack(child)
- return nil
-
- case linux.ModeSocket:
- // TODO(b/138862511): Implement.
- return syserror.EINVAL
-
- case linux.ModeCharacterDevice:
- fallthrough
- case linux.ModeBlockDevice:
- // TODO(b/72101894): We don't support creating block or character
- // devices at the moment.
- //
- // When we start supporting block and character devices, we'll
- // need to check for CAP_MKNOD here.
- return syserror.EPERM
-
- default:
- // "EINVAL - mode requested creation of something other than a
- // regular file, device special file, FIFO or socket." - mknod(2)
- return syserror.EINVAL
- }
+ return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error {
+ switch opts.Mode.FileType() {
+ case 0, linux.S_IFREG:
+ child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode))
+ parent.vfsd.InsertChild(&child.vfsd, name)
+ parent.inode.impl.(*directory).childList.PushBack(child)
+ return nil
+ case linux.S_IFIFO:
+ child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode))
+ parent.vfsd.InsertChild(&child.vfsd, name)
+ parent.inode.impl.(*directory).childList.PushBack(child)
+ return nil
+ case linux.S_IFBLK, linux.S_IFCHR, linux.S_IFSOCK:
+ // Not yet supported.
+ return syserror.EPERM
+ default:
+ return syserror.EINVAL
+ }
+ })
}
// OpenAt implements vfs.FilesystemImpl.OpenAt.
func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- // Filter out flags that are not supported by memfs. O_DIRECTORY and
- // O_NOFOLLOW have no effect here (they're handled by VFS by setting
- // appropriate bits in rp), but are visible in FD status flags. O_NONBLOCK
- // is supported only by pipes.
- opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC | linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK
+ if opts.Flags&linux.O_TMPFILE != 0 {
+ // Not yet supported.
+ return nil, syserror.EOPNOTSUPP
+ }
+ // Handle O_CREAT and !O_CREAT separately, since in the latter case we
+ // don't need fs.mu for writing.
if opts.Flags&linux.O_CREAT == 0 {
fs.mu.RLock()
defer fs.mu.RUnlock()
- vfsd, inode, err := walkExistingLocked(rp)
+ d, err := resolveLocked(rp)
if err != nil {
return nil, err
}
- return inode.open(ctx, rp, vfsd, opts.Flags, false)
+ return d.open(ctx, rp, opts.Flags, false /* afterCreate */)
}
mustCreate := opts.Flags&linux.O_EXCL != 0
- vfsd := rp.Start()
- inode := vfsd.Impl().(*dentry).inode
+ start := rp.Start().Impl().(*dentry)
fs.mu.Lock()
defer fs.mu.Unlock()
if rp.Done() {
+ // Reject attempts to open directories with O_CREAT.
if rp.MustBeDir() {
return nil, syserror.EISDIR
}
if mustCreate {
return nil, syserror.EEXIST
}
- return inode.open(ctx, rp, vfsd, opts.Flags, false)
+ return start.open(ctx, rp, opts.Flags, false /* afterCreate */)
}
afterTrailingSymlink:
- // Walk to the parent directory of the last path component.
- for !rp.Final() {
- var err error
- vfsd, inode, err = stepLocked(rp, vfsd, inode)
- if err != nil {
- return nil, err
- }
- }
- if !inode.isDir() {
- return nil, syserror.ENOTDIR
+ parent, err := walkParentDirLocked(rp, start)
+ if err != nil {
+ return nil, err
}
// Check for search permission in the parent directory.
- if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
+ if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
return nil, err
}
// Reject attempts to open directories with O_CREAT.
if rp.MustBeDir() {
return nil, syserror.EISDIR
}
- pc := rp.Component()
- if pc == "." || pc == ".." {
+ name := rp.Component()
+ if name == "." || name == ".." {
return nil, syserror.EISDIR
}
// Determine whether or not we need to create a file.
- childVFSD, err := rp.ResolveChild(vfsd, pc)
- if err != nil {
- return nil, err
- }
- if childVFSD == nil {
+ child, err := stepLocked(rp, parent)
+ if err == syserror.ENOENT {
// Already checked for searchability above; now check for writability.
- if err := inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil {
+ if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil {
return nil, err
}
if err := rp.Mount().CheckBeginWrite(); err != nil {
@@ -349,38 +309,35 @@ afterTrailingSymlink:
}
defer rp.Mount().EndWrite()
// Create and open the child.
- childInode := fs.newRegularFile(rp.Credentials(), opts.Mode)
- child := fs.newDentry(childInode)
- vfsd.InsertChild(&child.vfsd, pc)
- inode.impl.(*directory).childList.PushBack(child)
- return childInode.open(ctx, rp, &child.vfsd, opts.Flags, true)
+ child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode))
+ parent.vfsd.InsertChild(&child.vfsd, name)
+ parent.inode.impl.(*directory).childList.PushBack(child)
+ return child.open(ctx, rp, opts.Flags, true)
}
- // Open existing file or follow symlink.
- if mustCreate {
- return nil, syserror.EEXIST
+ if err != nil {
+ return nil, err
}
- childInode := childVFSD.Impl().(*dentry).inode
- if symlink, ok := childInode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
- // TODO: symlink traversals update access time
- if err := rp.HandleSymlink(symlink.target); err != nil {
- return nil, err
- }
- // rp.Final() may no longer be true since we now need to resolve the
- // symlink target.
+ // Do we need to resolve a trailing symlink?
+ if !rp.Done() {
+ start = parent
goto afterTrailingSymlink
}
- return childInode.open(ctx, rp, childVFSD, opts.Flags, false)
+ // Open existing file.
+ if mustCreate {
+ return nil, syserror.EEXIST
+ }
+ return child.open(ctx, rp, opts.Flags, false)
}
-func (i *inode) open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32, afterCreate bool) (*vfs.FileDescription, error) {
+func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, afterCreate bool) (*vfs.FileDescription, error) {
ats := vfs.AccessTypesForOpenFlags(flags)
if !afterCreate {
- if err := i.checkPermissions(rp.Credentials(), ats, i.isDir()); err != nil {
+ if err := d.inode.checkPermissions(rp.Credentials(), ats, d.inode.isDir()); err != nil {
return nil, err
}
}
mnt := rp.Mount()
- switch impl := i.impl.(type) {
+ switch impl := d.inode.impl.(type) {
case *regularFile:
var fd regularFileFD
fd.readable = vfs.MayReadFileWithOpenFlags(flags)
@@ -392,8 +349,8 @@ func (i *inode) open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
// mnt.EndWrite() is called by regularFileFD.Release().
}
mnt.IncRef()
- vfsd.IncRef()
- fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{})
+ d.IncRef()
+ fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{})
if flags&linux.O_TRUNC != 0 {
impl.mu.Lock()
impl.data = impl.data[:0]
@@ -408,28 +365,28 @@ func (i *inode) open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
}
var fd directoryFD
mnt.IncRef()
- vfsd.IncRef()
- fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{})
+ d.IncRef()
+ fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{})
return &fd.vfsfd, nil
case *symlink:
// Can't open symlinks without O_PATH (which is unimplemented).
return nil, syserror.ELOOP
case *namedPipe:
- return newNamedPipeFD(ctx, impl, rp, vfsd, flags)
+ return newNamedPipeFD(ctx, impl, rp, &d.vfsd, flags)
default:
- panic(fmt.Sprintf("unknown inode type: %T", i.impl))
+ panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl))
}
}
// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
fs.mu.RLock()
- _, inode, err := walkExistingLocked(rp)
- fs.mu.RUnlock()
+ defer fs.mu.RUnlock()
+ d, err := resolveLocked(rp)
if err != nil {
return "", err
}
- symlink, ok := inode.impl.(*symlink)
+ symlink, ok := d.inode.impl.(*symlink)
if !ok {
return "", syserror.EINVAL
}
@@ -437,63 +394,172 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st
}
// RenameAt implements vfs.FilesystemImpl.RenameAt.
-func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error {
- if rp.Done() {
- return syserror.ENOENT
+func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
+ if opts.Flags != 0 {
+ // TODO(b/145974740): Support renameat2 flags.
+ return syserror.EINVAL
}
+
+ // Resolve newParent first to verify that it's on this Mount.
fs.mu.Lock()
defer fs.mu.Unlock()
- parentVFSD, parentInode, err := walkParentDirLocked(rp)
+ newParent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
if err != nil {
return err
}
- _, err = checkCreateLocked(rp, parentVFSD, parentInode)
- if err != nil {
+ newName := rp.Component()
+ if newName == "." || newName == ".." {
+ return syserror.EBUSY
+ }
+ mnt := rp.Mount()
+ if mnt != oldParentVD.Mount() {
+ return syserror.EXDEV
+ }
+ if err := mnt.CheckBeginWrite(); err != nil {
return err
}
- if err := rp.Mount().CheckBeginWrite(); err != nil {
+ defer mnt.EndWrite()
+
+ oldParent := oldParentVD.Dentry().Impl().(*dentry)
+ if err := oldParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
return err
}
- defer rp.Mount().EndWrite()
- // TODO: actually implement RenameAt
- return syserror.EPERM
+ // Call vfs.Dentry.Child() instead of stepLocked() or rp.ResolveChild(),
+ // because if the existing child is a symlink or mount point then we want
+ // to rename over it rather than follow it.
+ renamedVFSD := oldParent.vfsd.Child(oldName)
+ if renamedVFSD == nil {
+ return syserror.ENOENT
+ }
+ renamed := renamedVFSD.Impl().(*dentry)
+ if renamed.inode.isDir() {
+ if renamed == newParent || renamedVFSD.IsAncestorOf(&newParent.vfsd) {
+ return syserror.EINVAL
+ }
+ if oldParent != newParent {
+ // Writability is needed to change renamed's "..".
+ if err := renamed.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true /* isDir */); err != nil {
+ return err
+ }
+ }
+ } else {
+ if opts.MustBeDir || rp.MustBeDir() {
+ return syserror.ENOTDIR
+ }
+ }
+
+ if err := newParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
+ return err
+ }
+ replacedVFSD := newParent.vfsd.Child(newName)
+ var replaced *dentry
+ if replacedVFSD != nil {
+ replaced = replacedVFSD.Impl().(*dentry)
+ if replaced.inode.isDir() {
+ if !renamed.inode.isDir() {
+ return syserror.EISDIR
+ }
+ if replaced.vfsd.HasChildren() {
+ return syserror.ENOTEMPTY
+ }
+ } else {
+ if rp.MustBeDir() {
+ return syserror.ENOTDIR
+ }
+ if renamed.inode.isDir() {
+ return syserror.ENOTDIR
+ }
+ }
+ } else {
+ if renamed.inode.isDir() && newParent.inode.nlink == maxLinks {
+ return syserror.EMLINK
+ }
+ }
+ if newParent.vfsd.IsDisowned() {
+ return syserror.ENOENT
+ }
+
+ // Linux places this check before some of those above; we do it here for
+ // simplicity, under the assumption that applications are not intentionally
+ // doing noop renames expecting them to succeed where non-noop renames
+ // would fail.
+ if renamedVFSD == replacedVFSD {
+ return nil
+ }
+ vfsObj := rp.VirtualFilesystem()
+ oldParentDir := oldParent.inode.impl.(*directory)
+ newParentDir := newParent.inode.impl.(*directory)
+ if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), renamedVFSD, replacedVFSD); err != nil {
+ return err
+ }
+ if replaced != nil {
+ newParentDir.childList.Remove(replaced)
+ if replaced.inode.isDir() {
+ newParent.inode.decLinksLocked() // from replaced's ".."
+ }
+ replaced.inode.decLinksLocked()
+ }
+ oldParentDir.childList.Remove(renamed)
+ newParentDir.childList.PushBack(renamed)
+ if renamed.inode.isDir() {
+ oldParent.inode.decLinksLocked()
+ newParent.inode.incLinksLocked()
+ }
+ // TODO: update timestamps and parent directory sizes
+ vfsObj.CommitRenameReplaceDentry(renamedVFSD, &newParent.vfsd, newName, replacedVFSD)
+ return nil
}
// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
fs.mu.Lock()
defer fs.mu.Unlock()
- vfsd, inode, err := walkExistingLocked(rp)
+ parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
if err != nil {
return err
}
- if err := rp.Mount().CheckBeginWrite(); err != nil {
+ if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
return err
}
- defer rp.Mount().EndWrite()
- if err := checkDeleteLocked(vfsd); err != nil {
- return err
+ name := rp.Component()
+ if name == "." {
+ return syserror.EINVAL
}
- if !inode.isDir() {
+ if name == ".." {
+ return syserror.ENOTEMPTY
+ }
+ childVFSD := parent.vfsd.Child(name)
+ if childVFSD == nil {
+ return syserror.ENOENT
+ }
+ child := childVFSD.Impl().(*dentry)
+ if !child.inode.isDir() {
return syserror.ENOTDIR
}
- if vfsd.HasChildren() {
+ if childVFSD.HasChildren() {
return syserror.ENOTEMPTY
}
- if err := rp.VirtualFilesystem().DeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil {
+ mnt := rp.Mount()
+ if err := mnt.CheckBeginWrite(); err != nil {
+ return err
+ }
+ defer mnt.EndWrite()
+ vfsObj := rp.VirtualFilesystem()
+ if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil {
return err
}
- // Remove from parent directory's childList.
- vfsd.Parent().Impl().(*dentry).inode.impl.(*directory).childList.Remove(vfsd.Impl().(*dentry))
- inode.decRef()
+ parent.inode.impl.(*directory).childList.Remove(child)
+ parent.inode.decLinksLocked() // from child's ".."
+ child.inode.decLinksLocked()
+ vfsObj.CommitDeleteDentry(childVFSD)
return nil
}
// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
fs.mu.RLock()
- _, _, err := walkExistingLocked(rp)
- fs.mu.RUnlock()
+ defer fs.mu.RUnlock()
+ _, err := resolveLocked(rp)
if err != nil {
return err
}
@@ -507,21 +573,21 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
// StatAt implements vfs.FilesystemImpl.StatAt.
func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
fs.mu.RLock()
- _, inode, err := walkExistingLocked(rp)
- fs.mu.RUnlock()
+ defer fs.mu.RUnlock()
+ d, err := resolveLocked(rp)
if err != nil {
return linux.Statx{}, err
}
var stat linux.Statx
- inode.statTo(&stat)
+ d.inode.statTo(&stat)
return stat, nil
}
// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
fs.mu.RLock()
- _, _, err := walkExistingLocked(rp)
- fs.mu.RUnlock()
+ defer fs.mu.RUnlock()
+ _, err := resolveLocked(rp)
if err != nil {
return linux.Statfs{}, err
}
@@ -531,53 +597,52 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu
// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
- if rp.Done() {
- return syserror.EEXIST
- }
- fs.mu.Lock()
- defer fs.mu.Unlock()
- parentVFSD, parentInode, err := walkParentDirLocked(rp)
- if err != nil {
- return err
- }
- pc, err := checkCreateLocked(rp, parentVFSD, parentInode)
- if err != nil {
- return err
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return err
- }
- defer rp.Mount().EndWrite()
- child := fs.newDentry(fs.newSymlink(rp.Credentials(), target))
- parentVFSD.InsertChild(&child.vfsd, pc)
- parentInode.impl.(*directory).childList.PushBack(child)
- return nil
+ return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error {
+ child := fs.newDentry(fs.newSymlink(rp.Credentials(), target))
+ parent.vfsd.InsertChild(&child.vfsd, name)
+ parent.inode.impl.(*directory).childList.PushBack(child)
+ return nil
+ })
}
// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
fs.mu.Lock()
defer fs.mu.Unlock()
- vfsd, inode, err := walkExistingLocked(rp)
+ parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
if err != nil {
return err
}
- if err := rp.Mount().CheckBeginWrite(); err != nil {
+ if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
return err
}
- defer rp.Mount().EndWrite()
- if err := checkDeleteLocked(vfsd); err != nil {
- return err
+ name := rp.Component()
+ if name == "." || name == ".." {
+ return syserror.EISDIR
}
- if inode.isDir() {
+ childVFSD := parent.vfsd.Child(name)
+ if childVFSD == nil {
+ return syserror.ENOENT
+ }
+ child := childVFSD.Impl().(*dentry)
+ if child.inode.isDir() {
return syserror.EISDIR
}
- if err := rp.VirtualFilesystem().DeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil {
+ if !rp.MustBeDir() {
+ return syserror.ENOTDIR
+ }
+ mnt := rp.Mount()
+ if err := mnt.CheckBeginWrite(); err != nil {
+ return err
+ }
+ defer mnt.EndWrite()
+ vfsObj := rp.VirtualFilesystem()
+ if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil {
return err
}
- // Remove from parent directory's childList.
- vfsd.Parent().Impl().(*dentry).inode.impl.(*directory).childList.Remove(vfsd.Impl().(*dentry))
- inode.decLinksLocked()
+ parent.inode.impl.(*directory).childList.Remove(child)
+ child.inode.decLinksLocked()
+ vfsObj.CommitDeleteDentry(childVFSD)
return nil
}
@@ -585,7 +650,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) {
fs.mu.RLock()
defer fs.mu.RUnlock()
- _, _, err := walkExistingLocked(rp)
+ _, err := resolveLocked(rp)
if err != nil {
return nil, err
}
@@ -597,7 +662,7 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([
func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) {
fs.mu.RLock()
defer fs.mu.RUnlock()
- _, _, err := walkExistingLocked(rp)
+ _, err := resolveLocked(rp)
if err != nil {
return "", err
}
@@ -609,7 +674,7 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, nam
func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error {
fs.mu.RLock()
defer fs.mu.RUnlock()
- _, _, err := walkExistingLocked(rp)
+ _, err := resolveLocked(rp)
if err != nil {
return err
}
@@ -621,7 +686,7 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
fs.mu.RLock()
defer fs.mu.RUnlock()
- _, _, err := walkExistingLocked(rp)
+ _, err := resolveLocked(rp)
if err != nil {
return err
}
diff --git a/pkg/sentry/fsimpl/memfs/memfs.go b/pkg/sentry/fsimpl/memfs/memfs.go
index 9d509f6e4..8d0167c93 100644
--- a/pkg/sentry/fsimpl/memfs/memfs.go
+++ b/pkg/sentry/fsimpl/memfs/memfs.go
@@ -29,6 +29,7 @@ package memfs
import (
"fmt"
+ "math"
"sync"
"sync/atomic"
@@ -64,12 +65,6 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
func (fs *filesystem) Release() {
}
-// Sync implements vfs.FilesystemImpl.Sync.
-func (fs *filesystem) Sync(ctx context.Context) error {
- // All filesystem state is in-memory.
- return nil
-}
-
// dentry implements vfs.DentryImpl.
type dentry struct {
vfsd vfs.Dentry
@@ -137,6 +132,8 @@ type inode struct {
impl interface{} // immutable
}
+const maxLinks = math.MaxUint32
+
func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode linux.FileMode) {
i.refs = 1
i.mode = uint32(mode)
@@ -147,20 +144,28 @@ func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials,
i.impl = impl
}
-// Preconditions: filesystem.mu must be locked for writing.
+// incLinksLocked increments i's link count.
+//
+// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0.
+// i.nlink < maxLinks.
func (i *inode) incLinksLocked() {
- if atomic.AddUint32(&i.nlink, 1) <= 1 {
+ if i.nlink == 0 {
panic("memfs.inode.incLinksLocked() called with no existing links")
}
+ if i.nlink == maxLinks {
+ panic("memfs.inode.incLinksLocked() called with maximum link count")
+ }
+ atomic.AddUint32(&i.nlink, 1)
}
-// Preconditions: filesystem.mu must be locked for writing.
+// decLinksLocked decrements i's link count.
+//
+// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0.
func (i *inode) decLinksLocked() {
- if nlink := atomic.AddUint32(&i.nlink, ^uint32(0)); nlink == 0 {
- i.decRef()
- } else if nlink == ^uint32(0) { // negative overflow
+ if i.nlink == 0 {
panic("memfs.inode.decLinksLocked() called with no existing links")
}
+ atomic.AddUint32(&i.nlink, ^uint32(0))
}
func (i *inode) incRef() {
diff --git a/pkg/sentry/fsimpl/memfs/pipe_test.go b/pkg/sentry/fsimpl/memfs/pipe_test.go
index 5bf527c80..be917aeee 100644
--- a/pkg/sentry/fsimpl/memfs/pipe_test.go
+++ b/pkg/sentry/fsimpl/memfs/pipe_test.go
@@ -19,6 +19,7 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -38,7 +39,7 @@ func TestSeparateFDs(t *testing.T) {
pop := vfs.PathOperation{
Root: root,
Start: root,
- Pathname: fileName,
+ Path: fspath.Parse(fileName),
FollowFinalSymlink: true,
}
rfdchan := make(chan *vfs.FileDescription)
@@ -76,7 +77,7 @@ func TestNonblockingRead(t *testing.T) {
pop := vfs.PathOperation{
Root: root,
Start: root,
- Pathname: fileName,
+ Path: fspath.Parse(fileName),
FollowFinalSymlink: true,
}
openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_NONBLOCK}
@@ -108,7 +109,7 @@ func TestNonblockingWriteError(t *testing.T) {
pop := vfs.PathOperation{
Root: root,
Start: root,
- Pathname: fileName,
+ Path: fspath.Parse(fileName),
FollowFinalSymlink: true,
}
openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY | linux.O_NONBLOCK}
@@ -126,7 +127,7 @@ func TestSingleFD(t *testing.T) {
pop := vfs.PathOperation{
Root: root,
Start: root,
- Pathname: fileName,
+ Path: fspath.Parse(fileName),
FollowFinalSymlink: true,
}
openOpts := vfs.OpenOptions{Flags: linux.O_RDWR}
@@ -160,10 +161,9 @@ func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesy
// Create the pipe.
root := mntns.Root()
pop := vfs.PathOperation{
- Root: root,
- Start: root,
- Pathname: fileName,
- FollowFinalSymlink: true,
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(fileName),
}
mknodOpts := vfs.MknodOptions{Mode: linux.ModeNamedPipe | 0644}
if err := vfsObj.MknodAt(ctx, creds, &pop, &mknodOpts); err != nil {
@@ -174,7 +174,7 @@ func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesy
stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
Root: root,
Start: root,
- Pathname: fileName,
+ Path: fspath.Parse(fileName),
FollowFinalSymlink: true,
}, &vfs.StatOptions{})
if err != nil {
diff --git a/pkg/sentry/platform/ring0/entry_arm64.s b/pkg/sentry/platform/ring0/entry_arm64.s
index add2c3e08..813ef9822 100644
--- a/pkg/sentry/platform/ring0/entry_arm64.s
+++ b/pkg/sentry/platform/ring0/entry_arm64.s
@@ -554,6 +554,10 @@ TEXT ·Vectors(SB),NOSPLIT,$0
B ·El0_error_invalid(SB)
nop31Instructions()
+ // The exception-vector-table is required to be 11-bits aligned.
+ // Please see Linux source code as reference: arch/arm64/kernel/entry.s.
+ // For gvisor, I defined it as 4K in length, filled the 2nd 2K part with NOPs.
+ // So that, I can safely move the 1st 2K part into the address with 11-bits alignment.
WORD $0xd503201f //nop
nop31Instructions()
WORD $0xd503201f
diff --git a/pkg/sentry/platform/ring0/lib_arm64.s b/pkg/sentry/platform/ring0/lib_arm64.s
index 1c9171004..0e6a6235b 100644
--- a/pkg/sentry/platform/ring0/lib_arm64.s
+++ b/pkg/sentry/platform/ring0/lib_arm64.s
@@ -12,12 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include "funcdata.h"
+#include "textflag.h"
+
TEXT ·CPACREL1(SB),NOSPLIT,$0-8
WORD $0xd5381041 // MRS CPACR_EL1, R1
MOVD R1, ret+0(FP)
RET
-TEXT ·FPCR(SB),NOSPLIT,$0-8
+TEXT ·GetFPCR(SB),NOSPLIT,$0-8
WORD $0xd53b4201 // MRS NZCV, R1
MOVD R1, ret+0(FP)
RET
@@ -27,7 +30,7 @@ TEXT ·GetFPSR(SB),NOSPLIT,$0-8
MOVD R1, ret+0(FP)
RET
-TEXT ·FPCR(SB),NOSPLIT,$0-8
+TEXT ·SetFPCR(SB),NOSPLIT,$0-8
MOVD addr+0(FP), R1
WORD $0xd51b4201 // MSR R1, NZCV
RET
diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go
index 6209eb053..1bc9c4a38 100644
--- a/pkg/sentry/vfs/dentry.go
+++ b/pkg/sentry/vfs/dentry.go
@@ -234,6 +234,18 @@ func (d *Dentry) InsertChild(child *Dentry, name string) {
child.name = name
}
+// IsAncestorOf returns true if d is an ancestor of d2; that is, d is either
+// d2's parent or an ancestor of d2's parent.
+func (d *Dentry) IsAncestorOf(d2 *Dentry) bool {
+ for d2.parent != nil {
+ if d2.parent == d {
+ return true
+ }
+ d2 = d2.parent
+ }
+ return false
+}
+
// PrepareDeleteDentry must be called before attempting to delete the file
// represented by d. If PrepareDeleteDentry succeeds, the caller must call
// AbortDeleteDentry or CommitDeleteDentry depending on the deletion's outcome.
@@ -283,21 +295,6 @@ func (vfs *VirtualFilesystem) CommitDeleteDentry(d *Dentry) {
}
}
-// DeleteDentry combines PrepareDeleteDentry and CommitDeleteDentry, as
-// appropriate for in-memory filesystems that don't need to ensure that some
-// external state change succeeds before committing the deletion.
-//
-// DeleteDentry is a mutator of d and d.Parent().
-//
-// Preconditions: d is a child Dentry.
-func (vfs *VirtualFilesystem) DeleteDentry(mntns *MountNamespace, d *Dentry) error {
- if err := vfs.PrepareDeleteDentry(mntns, d); err != nil {
- return err
- }
- vfs.CommitDeleteDentry(d)
- return nil
-}
-
// ForceDeleteDentry causes d to become disowned. It should only be used in
// cases where VFS has no ability to stop the deletion (e.g. d represents the
// local state of a file on a remote filesystem on which the file has already
@@ -326,7 +323,7 @@ func (vfs *VirtualFilesystem) ForceDeleteDentry(d *Dentry) {
// CommitRenameExchangeDentry depending on the rename's outcome.
//
// Preconditions: from is a child Dentry. If to is not nil, it must be a child
-// Dentry from the same Filesystem.
+// Dentry from the same Filesystem. from != to.
func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error {
if checkInvariants {
if from.parent == nil {
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index df03886c3..0b053201a 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -192,6 +192,8 @@ func (fd *FileDescription) Impl() FileDescriptionImpl {
// be interpreted as IDs in the root UserNamespace (i.e. as auth.KUID and
// auth.KGID respectively).
//
+// All methods may return errors not specified.
+//
// FileDescriptionImpl is analogous to Linux's struct file_operations.
type FileDescriptionImpl interface {
// Release is called when the associated FileDescription reaches zero
@@ -220,6 +222,10 @@ type FileDescriptionImpl interface {
// PRead reads from the file into dst, starting at the given offset, and
// returns the number of bytes read. PRead is permitted to return partial
// reads with a nil error.
+ //
+ // Errors:
+ //
+ // - If opts.Flags specifies unsupported options, PRead returns EOPNOTSUPP.
PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error)
// Read is similar to PRead, but does not specify an offset.
@@ -229,6 +235,10 @@ type FileDescriptionImpl interface {
// the number of bytes read; note that POSIX 2.9.7 "Thread Interactions
// with Regular File Operations" requires that all operations that may
// mutate the FileDescription offset are serialized.
+ //
+ // Errors:
+ //
+ // - If opts.Flags specifies unsupported options, Read returns EOPNOTSUPP.
Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error)
// PWrite writes src to the file, starting at the given offset, and returns
@@ -238,6 +248,11 @@ type FileDescriptionImpl interface {
// As in Linux (but not POSIX), if O_APPEND is in effect for the
// FileDescription, PWrite should ignore the offset and append data to the
// end of the file.
+ //
+ // Errors:
+ //
+ // - If opts.Flags specifies unsupported options, PWrite returns
+ // EOPNOTSUPP.
PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error)
// Write is similar to PWrite, but does not specify an offset, which is
@@ -247,6 +262,10 @@ type FileDescriptionImpl interface {
// PWrite that uses a FileDescription offset, to make it possible for
// remote filesystems to implement O_APPEND correctly (i.e. atomically with
// respect to writers outside the scope of VFS).
+ //
+ // Errors:
+ //
+ // - If opts.Flags specifies unsupported options, Write returns EOPNOTSUPP.
Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error)
// IterDirents invokes cb on each entry in the directory represented by the
diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go
index b766614e7..89bd58864 100644
--- a/pkg/sentry/vfs/filesystem.go
+++ b/pkg/sentry/vfs/filesystem.go
@@ -108,6 +108,24 @@ func (fs *Filesystem) DecRef() {
// (responsible for actually implementing the operation) isn't known until path
// resolution is complete.
//
+// Unless otherwise specified, FilesystemImpl methods are responsible for
+// performing permission checks. In many cases, vfs package functions in
+// permissions.go may be used to help perform these checks.
+//
+// When multiple specified error conditions apply to a given method call, the
+// implementation may return any applicable errno unless otherwise specified,
+// but returning the earliest error specified is preferable to maximize
+// compatibility with Linux.
+//
+// All methods may return errors not specified, notably including:
+//
+// - ENOENT if a required path component does not exist.
+//
+// - ENOTDIR if an intermediate path component is not a directory.
+//
+// - Errors from vfs-package functions (ResolvingPath.Resolve*(),
+// Mount.CheckBeginWrite(), permission-checking functions, etc.)
+//
// For all methods that take or return linux.Statx, Statx.Uid and Statx.Gid
// should be interpreted as IDs in the root UserNamespace (i.e. as auth.KUID
// and auth.KGID respectively).
@@ -130,46 +148,223 @@ type FilesystemImpl interface {
// GetDentryAt does not correspond directly to a Linux syscall; it is used
// in the implementation of:
//
- // - Syscalls that need to resolve two paths: rename(), renameat(),
- // renameat2(), link(), linkat().
+ // - Syscalls that need to resolve two paths: link(), linkat().
//
// - Syscalls that need to refer to a filesystem position outside the
// context of a file description: chdir(), fchdir(), chroot(), mount(),
// umount().
GetDentryAt(ctx context.Context, rp *ResolvingPath, opts GetDentryOptions) (*Dentry, error)
+ // GetParentDentryAt returns a Dentry representing the directory at the
+ // second-to-last path component in rp. (Note that, despite the name, this
+ // is not necessarily the parent directory of the file at rp, since the
+ // last path component in rp may be "." or "..".) A reference is taken on
+ // the returned Dentry.
+ //
+ // GetParentDentryAt does not correspond directly to a Linux syscall; it is
+ // used in the implementation of the rename() family of syscalls, which
+ // must resolve the parent directories of two paths.
+ //
+ // Preconditions: !rp.Done().
+ //
+ // Postconditions: If GetParentDentryAt returns a nil error, then
+ // rp.Final(). If GetParentDentryAt returns an error returned by
+ // ResolvingPath.Resolve*(), then !rp.Done().
+ GetParentDentryAt(ctx context.Context, rp *ResolvingPath) (*Dentry, error)
+
// LinkAt creates a hard link at rp representing the same file as vd. It
// does not take ownership of references on vd.
//
- // The implementation is responsible for checking that vd.Mount() ==
- // rp.Mount(), and that vd does not represent a directory.
+ // Errors:
+ //
+ // - If the last path component in rp is "." or "..", LinkAt returns
+ // EEXIST.
+ //
+ // - If a file already exists at rp, LinkAt returns EEXIST.
+ //
+ // - If rp.MustBeDir(), LinkAt returns ENOENT.
+ //
+ // - If the directory in which the link would be created has been removed
+ // by RmdirAt or RenameAt, LinkAt returns ENOENT.
+ //
+ // - If rp.Mount != vd.Mount(), LinkAt returns EXDEV.
+ //
+ // - If vd represents a directory, LinkAt returns EPERM.
+ //
+ // - If vd represents a file for which all existing links have been
+ // removed, or a file created by open(O_TMPFILE|O_EXCL), LinkAt returns
+ // ENOENT. Equivalently, if vd represents a file with a link count of 0 not
+ // created by open(O_TMPFILE) without O_EXCL, LinkAt returns ENOENT.
+ //
+ // Preconditions: !rp.Done(). For the final path component in rp,
+ // !rp.ShouldFollowSymlink().
+ //
+ // Postconditions: If LinkAt returns an error returned by
+ // ResolvingPath.Resolve*(), then !rp.Done().
LinkAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry) error
// MkdirAt creates a directory at rp.
+ //
+ // Errors:
+ //
+ // - If the last path component in rp is "." or "..", MkdirAt returns
+ // EEXIST.
+ //
+ // - If a file already exists at rp, MkdirAt returns EEXIST.
+ //
+ // - If the directory in which the new directory would be created has been
+ // removed by RmdirAt or RenameAt, MkdirAt returns ENOENT.
+ //
+ // Preconditions: !rp.Done(). For the final path component in rp,
+ // !rp.ShouldFollowSymlink().
+ //
+ // Postconditions: If MkdirAt returns an error returned by
+ // ResolvingPath.Resolve*(), then !rp.Done().
MkdirAt(ctx context.Context, rp *ResolvingPath, opts MkdirOptions) error
// MknodAt creates a regular file, device special file, or named pipe at
// rp.
+ //
+ // Errors:
+ //
+ // - If the last path component in rp is "." or "..", MknodAt returns
+ // EEXIST.
+ //
+ // - If a file already exists at rp, MknodAt returns EEXIST.
+ //
+ // - If rp.MustBeDir(), MknodAt returns ENOENT.
+ //
+ // - If the directory in which the file would be created has been removed
+ // by RmdirAt or RenameAt, MknodAt returns ENOENT.
+ //
+ // Preconditions: !rp.Done(). For the final path component in rp,
+ // !rp.ShouldFollowSymlink().
+ //
+ // Postconditions: If MknodAt returns an error returned by
+ // ResolvingPath.Resolve*(), then !rp.Done().
MknodAt(ctx context.Context, rp *ResolvingPath, opts MknodOptions) error
// OpenAt returns an FileDescription providing access to the file at rp. A
// reference is taken on the returned FileDescription.
+ //
+ // Errors:
+ //
+ // - If opts.Flags specifies O_TMPFILE and this feature is unsupported by
+ // the implementation, OpenAt returns EOPNOTSUPP. (All other unsupported
+ // features are silently ignored, consistently with Linux's open*(2).)
OpenAt(ctx context.Context, rp *ResolvingPath, opts OpenOptions) (*FileDescription, error)
// ReadlinkAt returns the target of the symbolic link at rp.
+ //
+ // Errors:
+ //
+ // - If the file at rp is not a symbolic link, ReadlinkAt returns EINVAL.
ReadlinkAt(ctx context.Context, rp *ResolvingPath) (string, error)
- // RenameAt renames the Dentry represented by vd to rp. It does not take
- // ownership of references on vd.
+ // RenameAt renames the file named oldName in directory oldParentVD to rp.
+ // It does not take ownership of references on oldParentVD.
+ //
+ // Errors [1]:
+ //
+ // - If opts.Flags specifies unsupported options, RenameAt returns EINVAL.
+ //
+ // - If the last path component in rp is "." or "..", and opts.Flags
+ // contains RENAME_NOREPLACE, RenameAt returns EEXIST.
+ //
+ // - If the last path component in rp is "." or "..", and opts.Flags does
+ // not contain RENAME_NOREPLACE, RenameAt returns EBUSY.
+ //
+ // - If rp.Mount != oldParentVD.Mount(), RenameAt returns EXDEV.
//
- // The implementation is responsible for checking that vd.Mount() ==
- // rp.Mount().
- RenameAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry, opts RenameOptions) error
+ // - If the renamed file is not a directory, and opts.MustBeDir is true,
+ // RenameAt returns ENOTDIR.
+ //
+ // - If renaming would replace an existing file and opts.Flags contains
+ // RENAME_NOREPLACE, RenameAt returns EEXIST.
+ //
+ // - If there is no existing file at rp and opts.Flags contains
+ // RENAME_EXCHANGE, RenameAt returns ENOENT.
+ //
+ // - If there is an existing non-directory file at rp, and rp.MustBeDir()
+ // is true, RenameAt returns ENOTDIR.
+ //
+ // - If the renamed file is not a directory, opts.Flags does not contain
+ // RENAME_EXCHANGE, and rp.MustBeDir() is true, RenameAt returns ENOTDIR.
+ // (This check is not subsumed by the check for directory replacement below
+ // since it applies even if there is no file to replace.)
+ //
+ // - If the renamed file is a directory, and the new parent directory of
+ // the renamed file is either the renamed directory or a descendant
+ // subdirectory of the renamed directory, RenameAt returns EINVAL.
+ //
+ // - If renaming would exchange the renamed file with an ancestor directory
+ // of the renamed file, RenameAt returns EINVAL.
+ //
+ // - If renaming would replace an ancestor directory of the renamed file,
+ // RenameAt returns ENOTEMPTY. (This check would be subsumed by the
+ // non-empty directory check below; however, this check takes place before
+ // the self-rename check.)
+ //
+ // - If the renamed file would replace or exchange with itself (i.e. the
+ // source and destination paths resolve to the same file), RenameAt returns
+ // nil, skipping the checks described below.
+ //
+ // - If the source or destination directory is not writable by the provider
+ // of rp.Credentials(), RenameAt returns EACCES.
+ //
+ // - If the renamed file is a directory, and renaming would replace a
+ // non-directory file, RenameAt returns ENOTDIR.
+ //
+ // - If the renamed file is not a directory, and renaming would replace a
+ // directory, RenameAt returns EISDIR.
+ //
+ // - If the new parent directory of the renamed file has been removed by
+ // RmdirAt or a preceding call to RenameAt, RenameAt returns ENOENT.
+ //
+ // - If the renamed file is a directory, it is not writable by the
+ // provider of rp.Credentials(), and the source and destination parent
+ // directories are different, RenameAt returns EACCES. (This is nominally
+ // required to change the ".." entry in the renamed directory.)
+ //
+ // - If renaming would replace a non-empty directory, RenameAt returns
+ // ENOTEMPTY.
+ //
+ // Preconditions: !rp.Done(). For the final path component in rp,
+ // !rp.ShouldFollowSymlink(). oldName is not "." or "..".
+ //
+ // Postconditions: If RenameAt returns an error returned by
+ // ResolvingPath.Resolve*(), then !rp.Done().
+ //
+ // [1] "The worst of all namespace operations - renaming directory.
+ // "Perverted" doesn't even start to describe it. Somebody in UCB had a
+ // heck of a trip..." - fs/namei.c:vfs_rename()
+ RenameAt(ctx context.Context, rp *ResolvingPath, oldParentVD VirtualDentry, oldName string, opts RenameOptions) error
// RmdirAt removes the directory at rp.
+ //
+ // Errors:
+ //
+ // - If the last path component in rp is ".", RmdirAt returns EINVAL.
+ //
+ // - If the last path component in rp is "..", RmdirAt returns ENOTEMPTY.
+ //
+ // - If no file exists at rp, RmdirAt returns ENOENT.
+ //
+ // - If the file at rp exists but is not a directory, RmdirAt returns
+ // ENOTDIR.
+ //
+ // Preconditions: !rp.Done(). For the final path component in rp,
+ // !rp.ShouldFollowSymlink().
+ //
+ // Postconditions: If RmdirAt returns an error returned by
+ // ResolvingPath.Resolve*(), then !rp.Done().
RmdirAt(ctx context.Context, rp *ResolvingPath) error
// SetStatAt updates metadata for the file at the given path.
+ //
+ // Errors:
+ //
+ // - If opts specifies unsupported options, SetStatAt returns EINVAL.
SetStatAt(ctx context.Context, rp *ResolvingPath, opts SetStatOptions) error
// StatAt returns metadata for the file at rp.
@@ -181,9 +376,45 @@ type FilesystemImpl interface {
StatFSAt(ctx context.Context, rp *ResolvingPath) (linux.Statfs, error)
// SymlinkAt creates a symbolic link at rp referring to the given target.
+ //
+ // Errors:
+ //
+ // - If the last path component in rp is "." or "..", SymlinkAt returns
+ // EEXIST.
+ //
+ // - If a file already exists at rp, SymlinkAt returns EEXIST.
+ //
+ // - If rp.MustBeDir(), SymlinkAt returns ENOENT.
+ //
+ // - If the directory in which the symbolic link would be created has been
+ // removed by RmdirAt or RenameAt, SymlinkAt returns ENOENT.
+ //
+ // Preconditions: !rp.Done(). For the final path component in rp,
+ // !rp.ShouldFollowSymlink().
+ //
+ // Postconditions: If SymlinkAt returns an error returned by
+ // ResolvingPath.Resolve*(), then !rp.Done().
SymlinkAt(ctx context.Context, rp *ResolvingPath, target string) error
- // UnlinkAt removes the non-directory file at rp.
+ // UnlinkAt removes the file at rp.
+ //
+ // Errors:
+ //
+ // - If the last path component in rp is "." or "..", UnlinkAt returns
+ // EISDIR.
+ //
+ // - If no file exists at rp, UnlinkAt returns ENOENT.
+ //
+ // - If rp.MustBeDir(), and the file at rp exists and is not a directory,
+ // UnlinkAt returns ENOTDIR.
+ //
+ // - If the file at rp exists but is a directory, UnlinkAt returns EISDIR.
+ //
+ // Preconditions: !rp.Done(). For the final path component in rp,
+ // !rp.ShouldFollowSymlink().
+ //
+ // Postconditions: If UnlinkAt returns an error returned by
+ // ResolvingPath.Resolve*(), then !rp.Done().
UnlinkAt(ctx context.Context, rp *ResolvingPath) error
// ListxattrAt returns all extended attribute names for the file at rp.
diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go
index 97ee4a446..87d2b0d1c 100644
--- a/pkg/sentry/vfs/options.go
+++ b/pkg/sentry/vfs/options.go
@@ -83,6 +83,9 @@ type ReadOptions struct {
type RenameOptions struct {
// Flags contains flags as specified for renameat2(2).
Flags uint32
+
+ // If MustBeDir is true, the renamed file must be a directory.
+ MustBeDir bool
}
// SetStatOptions contains options to VirtualFilesystem.SetStatAt(),
diff --git a/pkg/sentry/vfs/resolving_path.go b/pkg/sentry/vfs/resolving_path.go
index d580fd39e..f0641d314 100644
--- a/pkg/sentry/vfs/resolving_path.go
+++ b/pkg/sentry/vfs/resolving_path.go
@@ -112,30 +112,26 @@ var resolvingPathPool = sync.Pool{
},
}
-func (vfs *VirtualFilesystem) getResolvingPath(creds *auth.Credentials, pop *PathOperation) (*ResolvingPath, error) {
- path, err := fspath.Parse(pop.Pathname)
- if err != nil {
- return nil, err
- }
+func (vfs *VirtualFilesystem) getResolvingPath(creds *auth.Credentials, pop *PathOperation) *ResolvingPath {
rp := resolvingPathPool.Get().(*ResolvingPath)
rp.vfs = vfs
rp.root = pop.Root
rp.mount = pop.Start.mount
rp.start = pop.Start.dentry
- rp.pit = path.Begin
+ rp.pit = pop.Path.Begin
rp.flags = 0
if pop.FollowFinalSymlink {
rp.flags |= rpflagsFollowFinalSymlink
}
- rp.mustBeDir = path.Dir
- rp.mustBeDirOrig = path.Dir
+ rp.mustBeDir = pop.Path.Dir
+ rp.mustBeDirOrig = pop.Path.Dir
rp.symlinks = 0
rp.curPart = 0
rp.numOrigParts = 1
rp.creds = creds
- rp.parts[0] = path.Begin
- rp.origParts[0] = path.Begin
- return rp, nil
+ rp.parts[0] = pop.Path.Begin
+ rp.origParts[0] = pop.Path.Begin
+ return rp
}
func (vfs *VirtualFilesystem) putResolvingPath(rp *ResolvingPath) {
@@ -345,29 +341,34 @@ func (rp *ResolvingPath) ShouldFollowSymlink() bool {
// symlink target and returns nil. Otherwise it returns a non-nil error.
//
// Preconditions: !rp.Done().
+//
+// Postconditions: If HandleSymlink returns a nil error, then !rp.Done().
func (rp *ResolvingPath) HandleSymlink(target string) error {
if rp.symlinks >= linux.MaxSymlinkTraversals {
return syserror.ELOOP
}
- targetPath, err := fspath.Parse(target)
- if err != nil {
- return err
+ if len(target) == 0 {
+ return syserror.ENOENT
}
rp.symlinks++
+ targetPath := fspath.Parse(target)
if targetPath.Absolute {
rp.absSymlinkTarget = targetPath
return resolveAbsSymlinkError{}
}
- if !targetPath.Begin.Ok() {
- panic(fmt.Sprintf("symbolic link has non-empty target %q that is both relative and has no path components?", target))
- }
// Consume the path component that represented the symlink.
rp.Advance()
// Prepend the symlink target to the relative path.
+ if checkInvariants {
+ if !targetPath.HasComponents() {
+ panic(fmt.Sprintf("non-empty pathname %q parsed to relative path with no components", target))
+ }
+ }
rp.relpathPrepend(targetPath)
return nil
}
+// Preconditions: path.HasComponents().
func (rp *ResolvingPath) relpathPrepend(path fspath.Path) {
if rp.pit.Ok() {
rp.parts[rp.curPart] = rp.pit
@@ -467,6 +468,17 @@ func (rp *ResolvingPath) handleError(err error) bool {
}
}
+// canHandleError returns true if err is an error returned by rp.Resolve*()
+// that rp.handleError() may attempt to handle.
+func (rp *ResolvingPath) canHandleError(err error) bool {
+ switch err.(type) {
+ case resolveMountRootOrJumpError, resolveMountPointError, resolveAbsSymlinkError:
+ return true
+ default:
+ return false
+ }
+}
+
// MustBeDir returns true if the file traversed by rp must be a directory.
func (rp *ResolvingPath) MustBeDir() bool {
return rp.mustBeDir
diff --git a/pkg/sentry/vfs/testutil.go b/pkg/sentry/vfs/testutil.go
index d94117bce..ee5c8b9e2 100644
--- a/pkg/sentry/vfs/testutil.go
+++ b/pkg/sentry/vfs/testutil.go
@@ -57,6 +57,11 @@ func (fs *FDTestFilesystem) GetDentryAt(ctx context.Context, rp *ResolvingPath,
return nil, syserror.EPERM
}
+// GetParentDentryAt implements FilesystemImpl.GetParentDentryAt.
+func (fs *FDTestFilesystem) GetParentDentryAt(ctx context.Context, rp *ResolvingPath) (*Dentry, error) {
+ return nil, syserror.EPERM
+}
+
// LinkAt implements FilesystemImpl.LinkAt.
func (fs *FDTestFilesystem) LinkAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry) error {
return syserror.EPERM
@@ -83,7 +88,7 @@ func (fs *FDTestFilesystem) ReadlinkAt(ctx context.Context, rp *ResolvingPath) (
}
// RenameAt implements FilesystemImpl.RenameAt.
-func (fs *FDTestFilesystem) RenameAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry, opts RenameOptions) error {
+func (fs *FDTestFilesystem) RenameAt(ctx context.Context, rp *ResolvingPath, oldParentVD VirtualDentry, oldName string, opts RenameOptions) error {
return syserror.EPERM
}
diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go
index e60898d7c..3e4df8558 100644
--- a/pkg/sentry/vfs/vfs.go
+++ b/pkg/sentry/vfs/vfs.go
@@ -28,9 +28,11 @@
package vfs
import (
+ "fmt"
"sync"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/syserror"
@@ -111,11 +113,11 @@ type PathOperation struct {
// are borrowed from the provider of the PathOperation (i.e. the caller of
// the VFS method to which the PathOperation was passed).
//
- // Invariants: Start.Ok(). If Pathname.Absolute, then Start == Root.
+ // Invariants: Start.Ok(). If Path.Absolute, then Start == Root.
Start VirtualDentry
// Path is the pathname traversed by this operation.
- Pathname string
+ Path fspath.Path
// If FollowFinalSymlink is true, and the Dentry traversed by the final
// path component represents a symbolic link, the symbolic link should be
@@ -126,10 +128,7 @@ type PathOperation struct {
// GetDentryAt returns a VirtualDentry representing the given path, at which a
// file must exist. A reference is taken on the returned VirtualDentry.
func (vfs *VirtualFilesystem) GetDentryAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *GetDentryOptions) (VirtualDentry, error) {
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return VirtualDentry{}, err
- }
+ rp := vfs.getResolvingPath(creds, pop)
for {
d, err := rp.mount.fs.impl.GetDentryAt(ctx, rp, *opts)
if err == nil {
@@ -148,6 +147,33 @@ func (vfs *VirtualFilesystem) GetDentryAt(ctx context.Context, creds *auth.Crede
}
}
+// Preconditions: pop.Path.Begin.Ok().
+func (vfs *VirtualFilesystem) getParentDirAndName(ctx context.Context, creds *auth.Credentials, pop *PathOperation) (VirtualDentry, string, error) {
+ rp := vfs.getResolvingPath(creds, pop)
+ for {
+ parent, err := rp.mount.fs.impl.GetParentDentryAt(ctx, rp)
+ if err == nil {
+ parentVD := VirtualDentry{
+ mount: rp.mount,
+ dentry: parent,
+ }
+ rp.mount.IncRef()
+ name := rp.Component()
+ vfs.putResolvingPath(rp)
+ return parentVD, name, nil
+ }
+ if checkInvariants {
+ if rp.canHandleError(err) && rp.Done() {
+ panic(fmt.Sprintf("%T.GetParentDentryAt() consumed all path components and returned %T", rp.mount.fs.impl, err))
+ }
+ }
+ if !rp.handleError(err) {
+ vfs.putResolvingPath(rp)
+ return VirtualDentry{}, "", err
+ }
+ }
+}
+
// LinkAt creates a hard link at newpop representing the existing file at
// oldpop.
func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credentials, oldpop, newpop *PathOperation) error {
@@ -155,21 +181,36 @@ func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credential
if err != nil {
return err
}
- rp, err := vfs.getResolvingPath(creds, newpop)
- if err != nil {
+
+ if !newpop.Path.Begin.Ok() {
oldVD.DecRef()
- return err
+ if newpop.Path.Absolute {
+ return syserror.EEXIST
+ }
+ return syserror.ENOENT
}
+ if newpop.FollowFinalSymlink {
+ oldVD.DecRef()
+ ctx.Warningf("VirtualFilesystem.LinkAt: file creation paths can't follow final symlink")
+ return syserror.EINVAL
+ }
+
+ rp := vfs.getResolvingPath(creds, newpop)
for {
err := rp.mount.fs.impl.LinkAt(ctx, rp, oldVD)
if err == nil {
- oldVD.DecRef()
vfs.putResolvingPath(rp)
+ oldVD.DecRef()
return nil
}
+ if checkInvariants {
+ if rp.canHandleError(err) && rp.Done() {
+ panic(fmt.Sprintf("%T.LinkAt() consumed all path components and returned %T", rp.mount.fs.impl, err))
+ }
+ }
if !rp.handleError(err) {
- oldVD.DecRef()
vfs.putResolvingPath(rp)
+ oldVD.DecRef()
return err
}
}
@@ -177,19 +218,32 @@ func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credential
// MkdirAt creates a directory at the given path.
func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *MkdirOptions) error {
+ if !pop.Path.Begin.Ok() {
+ if pop.Path.Absolute {
+ return syserror.EEXIST
+ }
+ return syserror.ENOENT
+ }
+ if pop.FollowFinalSymlink {
+ ctx.Warningf("VirtualFilesystem.MkdirAt: file creation paths can't follow final symlink")
+ return syserror.EINVAL
+ }
// "Under Linux, apart from the permission bits, the S_ISVTX mode bit is
// also honored." - mkdir(2)
opts.Mode &= 0777 | linux.S_ISVTX
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return err
- }
+
+ rp := vfs.getResolvingPath(creds, pop)
for {
err := rp.mount.fs.impl.MkdirAt(ctx, rp, *opts)
if err == nil {
vfs.putResolvingPath(rp)
return nil
}
+ if checkInvariants {
+ if rp.canHandleError(err) && rp.Done() {
+ panic(fmt.Sprintf("%T.MkdirAt() consumed all path components and returned %T", rp.mount.fs.impl, err))
+ }
+ }
if !rp.handleError(err) {
vfs.putResolvingPath(rp)
return err
@@ -200,16 +254,29 @@ func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentia
// MknodAt creates a file of the given mode at the given path. It returns an
// error from the syserror package.
func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *MknodOptions) error {
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return nil
+ if !pop.Path.Begin.Ok() {
+ if pop.Path.Absolute {
+ return syserror.EEXIST
+ }
+ return syserror.ENOENT
+ }
+ if pop.FollowFinalSymlink {
+ ctx.Warningf("VirtualFilesystem.MknodAt: file creation paths can't follow final symlink")
+ return syserror.EINVAL
}
+
+ rp := vfs.getResolvingPath(creds, pop)
for {
- if err = rp.mount.fs.impl.MknodAt(ctx, rp, *opts); err == nil {
+ err := rp.mount.fs.impl.MknodAt(ctx, rp, *opts)
+ if err != nil {
vfs.putResolvingPath(rp)
return nil
}
- // Handle mount traversals.
+ if checkInvariants {
+ if rp.canHandleError(err) && rp.Done() {
+ panic(fmt.Sprintf("%T.MknodAt() consumed all path components and returned %T", rp.mount.fs.impl, err))
+ }
+ }
if !rp.handleError(err) {
vfs.putResolvingPath(rp)
return err
@@ -259,10 +326,7 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential
if opts.Flags&linux.O_NOFOLLOW != 0 {
pop.FollowFinalSymlink = false
}
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return nil, err
- }
+ rp := vfs.getResolvingPath(creds, pop)
if opts.Flags&linux.O_DIRECTORY != 0 {
rp.mustBeDir = true
rp.mustBeDirOrig = true
@@ -282,10 +346,7 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential
// ReadlinkAt returns the target of the symbolic link at the given path.
func (vfs *VirtualFilesystem) ReadlinkAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) (string, error) {
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return "", err
- }
+ rp := vfs.getResolvingPath(creds, pop)
for {
target, err := rp.mount.fs.impl.ReadlinkAt(ctx, rp)
if err == nil {
@@ -301,25 +362,59 @@ func (vfs *VirtualFilesystem) ReadlinkAt(ctx context.Context, creds *auth.Creden
// RenameAt renames the file at oldpop to newpop.
func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credentials, oldpop, newpop *PathOperation, opts *RenameOptions) error {
- oldVD, err := vfs.GetDentryAt(ctx, creds, oldpop, &GetDentryOptions{})
- if err != nil {
- return err
+ if !oldpop.Path.Begin.Ok() {
+ if oldpop.Path.Absolute {
+ return syserror.EBUSY
+ }
+ return syserror.ENOENT
}
- rp, err := vfs.getResolvingPath(creds, newpop)
+ if oldpop.FollowFinalSymlink {
+ ctx.Warningf("VirtualFilesystem.RenameAt: source path can't follow final symlink")
+ return syserror.EINVAL
+ }
+
+ oldParentVD, oldName, err := vfs.getParentDirAndName(ctx, creds, oldpop)
if err != nil {
- oldVD.DecRef()
return err
}
+ if oldName == "." || oldName == ".." {
+ oldParentVD.DecRef()
+ return syserror.EBUSY
+ }
+
+ if !newpop.Path.Begin.Ok() {
+ oldParentVD.DecRef()
+ if newpop.Path.Absolute {
+ return syserror.EBUSY
+ }
+ return syserror.ENOENT
+ }
+ if newpop.FollowFinalSymlink {
+ oldParentVD.DecRef()
+ ctx.Warningf("VirtualFilesystem.RenameAt: destination path can't follow final symlink")
+ return syserror.EINVAL
+ }
+
+ rp := vfs.getResolvingPath(creds, newpop)
+ renameOpts := *opts
+ if oldpop.Path.Dir {
+ renameOpts.MustBeDir = true
+ }
for {
- err := rp.mount.fs.impl.RenameAt(ctx, rp, oldVD, *opts)
+ err := rp.mount.fs.impl.RenameAt(ctx, rp, oldParentVD, oldName, renameOpts)
if err == nil {
- oldVD.DecRef()
vfs.putResolvingPath(rp)
+ oldParentVD.DecRef()
return nil
}
+ if checkInvariants {
+ if rp.canHandleError(err) && rp.Done() {
+ panic(fmt.Sprintf("%T.RenameAt() consumed all path components and returned %T", rp.mount.fs.impl, err))
+ }
+ }
if !rp.handleError(err) {
- oldVD.DecRef()
vfs.putResolvingPath(rp)
+ oldParentVD.DecRef()
return err
}
}
@@ -327,16 +422,29 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti
// RmdirAt removes the directory at the given path.
func (vfs *VirtualFilesystem) RmdirAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) error {
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return err
+ if !pop.Path.Begin.Ok() {
+ if pop.Path.Absolute {
+ return syserror.EBUSY
+ }
+ return syserror.ENOENT
}
+ if pop.FollowFinalSymlink {
+ ctx.Warningf("VirtualFilesystem.RmdirAt: file deletion paths can't follow final symlink")
+ return syserror.EINVAL
+ }
+
+ rp := vfs.getResolvingPath(creds, pop)
for {
err := rp.mount.fs.impl.RmdirAt(ctx, rp)
if err == nil {
vfs.putResolvingPath(rp)
return nil
}
+ if checkInvariants {
+ if rp.canHandleError(err) && rp.Done() {
+ panic(fmt.Sprintf("%T.RmdirAt() consumed all path components and returned %T", rp.mount.fs.impl, err))
+ }
+ }
if !rp.handleError(err) {
vfs.putResolvingPath(rp)
return err
@@ -346,10 +454,7 @@ func (vfs *VirtualFilesystem) RmdirAt(ctx context.Context, creds *auth.Credentia
// SetStatAt changes metadata for the file at the given path.
func (vfs *VirtualFilesystem) SetStatAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *SetStatOptions) error {
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return err
- }
+ rp := vfs.getResolvingPath(creds, pop)
for {
err := rp.mount.fs.impl.SetStatAt(ctx, rp, *opts)
if err == nil {
@@ -365,10 +470,7 @@ func (vfs *VirtualFilesystem) SetStatAt(ctx context.Context, creds *auth.Credent
// StatAt returns metadata for the file at the given path.
func (vfs *VirtualFilesystem) StatAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *StatOptions) (linux.Statx, error) {
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return linux.Statx{}, err
- }
+ rp := vfs.getResolvingPath(creds, pop)
for {
stat, err := rp.mount.fs.impl.StatAt(ctx, rp, *opts)
if err == nil {
@@ -385,10 +487,7 @@ func (vfs *VirtualFilesystem) StatAt(ctx context.Context, creds *auth.Credential
// StatFSAt returns metadata for the filesystem containing the file at the
// given path.
func (vfs *VirtualFilesystem) StatFSAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) (linux.Statfs, error) {
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return linux.Statfs{}, err
- }
+ rp := vfs.getResolvingPath(creds, pop)
for {
statfs, err := rp.mount.fs.impl.StatFSAt(ctx, rp)
if err == nil {
@@ -404,16 +503,29 @@ func (vfs *VirtualFilesystem) StatFSAt(ctx context.Context, creds *auth.Credenti
// SymlinkAt creates a symbolic link at the given path with the given target.
func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, target string) error {
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return err
+ if !pop.Path.Begin.Ok() {
+ if pop.Path.Absolute {
+ return syserror.EEXIST
+ }
+ return syserror.ENOENT
+ }
+ if pop.FollowFinalSymlink {
+ ctx.Warningf("VirtualFilesystem.SymlinkAt: file creation paths can't follow final symlink")
+ return syserror.EINVAL
}
+
+ rp := vfs.getResolvingPath(creds, pop)
for {
err := rp.mount.fs.impl.SymlinkAt(ctx, rp, target)
if err == nil {
vfs.putResolvingPath(rp)
return nil
}
+ if checkInvariants {
+ if rp.canHandleError(err) && rp.Done() {
+ panic(fmt.Sprintf("%T.SymlinkAt() consumed all path components and returned %T", rp.mount.fs.impl, err))
+ }
+ }
if !rp.handleError(err) {
vfs.putResolvingPath(rp)
return err
@@ -423,16 +535,29 @@ func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credent
// UnlinkAt deletes the non-directory file at the given path.
func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) error {
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return err
+ if !pop.Path.Begin.Ok() {
+ if pop.Path.Absolute {
+ return syserror.EBUSY
+ }
+ return syserror.ENOENT
+ }
+ if pop.FollowFinalSymlink {
+ ctx.Warningf("VirtualFilesystem.UnlinkAt: file deletion paths can't follow final symlink")
+ return syserror.EINVAL
}
+
+ rp := vfs.getResolvingPath(creds, pop)
for {
err := rp.mount.fs.impl.UnlinkAt(ctx, rp)
if err == nil {
vfs.putResolvingPath(rp)
return nil
}
+ if checkInvariants {
+ if rp.canHandleError(err) && rp.Done() {
+ panic(fmt.Sprintf("%T.UnlinkAt() consumed all path components and returned %T", rp.mount.fs.impl, err))
+ }
+ }
if !rp.handleError(err) {
vfs.putResolvingPath(rp)
return err
@@ -443,10 +568,7 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti
// ListxattrAt returns all extended attribute names for the file at the given
// path.
func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) ([]string, error) {
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return nil, err
- }
+ rp := vfs.getResolvingPath(creds, pop)
for {
names, err := rp.mount.fs.impl.ListxattrAt(ctx, rp)
if err == nil {
@@ -471,10 +593,7 @@ func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Crede
// GetxattrAt returns the value associated with the given extended attribute
// for the file at the given path.
func (vfs *VirtualFilesystem) GetxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, name string) (string, error) {
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return "", err
- }
+ rp := vfs.getResolvingPath(creds, pop)
for {
val, err := rp.mount.fs.impl.GetxattrAt(ctx, rp, name)
if err == nil {
@@ -491,10 +610,7 @@ func (vfs *VirtualFilesystem) GetxattrAt(ctx context.Context, creds *auth.Creden
// SetxattrAt changes the value associated with the given extended attribute
// for the file at the given path.
func (vfs *VirtualFilesystem) SetxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *SetxattrOptions) error {
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return err
- }
+ rp := vfs.getResolvingPath(creds, pop)
for {
err := rp.mount.fs.impl.SetxattrAt(ctx, rp, *opts)
if err == nil {
@@ -510,10 +626,7 @@ func (vfs *VirtualFilesystem) SetxattrAt(ctx context.Context, creds *auth.Creden
// RemovexattrAt removes the given extended attribute from the file at rp.
func (vfs *VirtualFilesystem) RemovexattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, name string) error {
- rp, err := vfs.getResolvingPath(creds, pop)
- if err != nil {
- return err
- }
+ rp := vfs.getResolvingPath(creds, pop)
for {
err := rp.mount.fs.impl.RemovexattrAt(ctx, rp, name)
if err == nil {
diff --git a/pkg/syserror/syserror.go b/pkg/syserror/syserror.go
index 1987e89cc..2269f6237 100644
--- a/pkg/syserror/syserror.go
+++ b/pkg/syserror/syserror.go
@@ -45,6 +45,7 @@ var (
ELIBBAD = error(syscall.ELIBBAD)
ELOOP = error(syscall.ELOOP)
EMFILE = error(syscall.EMFILE)
+ EMLINK = error(syscall.EMLINK)
EMSGSIZE = error(syscall.EMSGSIZE)
ENAMETOOLONG = error(syscall.ENAMETOOLONG)
ENOATTR = ENODATA
diff --git a/pkg/tcpip/stack/ndp.go b/pkg/tcpip/stack/ndp.go
index 90664ba8a..d9ab59336 100644
--- a/pkg/tcpip/stack/ndp.go
+++ b/pkg/tcpip/stack/ndp.go
@@ -1155,3 +1155,38 @@ func (ndp *ndpState) autoGenAddrInvalidationTimer(addr tcpip.Address, vl time.Du
ndp.invalidateAutoGenAddress(addr)
})
}
+
+// cleanupHostOnlyState cleans up any state that is only useful for hosts.
+//
+// cleanupHostOnlyState MUST be called when ndp's NIC is transitioning from a
+// host to a router. This function will invalidate all discovered on-link
+// prefixes, discovered routers, and auto-generated addresses as routers do not
+// normally process Router Advertisements to discover default routers and
+// on-link prefixes, and auto-generate addresses via SLAAC.
+//
+// The NIC that ndp belongs to MUST be locked.
+func (ndp *ndpState) cleanupHostOnlyState() {
+ for addr, _ := range ndp.autoGenAddresses {
+ ndp.invalidateAutoGenAddress(addr)
+ }
+
+ if got := len(ndp.autoGenAddresses); got != 0 {
+ log.Fatalf("ndp: still have auto-generated addresses after cleaning up, found = %d", got)
+ }
+
+ for prefix, _ := range ndp.onLinkPrefixes {
+ ndp.invalidateOnLinkPrefix(prefix)
+ }
+
+ if got := len(ndp.onLinkPrefixes); got != 0 {
+ log.Fatalf("ndp: still have discovered on-link prefixes after cleaning up, found = %d", got)
+ }
+
+ for router, _ := range ndp.defaultRouters {
+ ndp.invalidateDefaultRouter(router)
+ }
+
+ if got := len(ndp.defaultRouters); got != 0 {
+ log.Fatalf("ndp: still have discovered default routers after cleaning up, found = %d", got)
+ }
+}
diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go
index 666f86c33..64a9a2b20 100644
--- a/pkg/tcpip/stack/ndp_test.go
+++ b/pkg/tcpip/stack/ndp_test.go
@@ -47,6 +47,19 @@ var (
llAddr3 = header.LinkLocalAddr(linkAddr3)
)
+func addrForSubnet(subnet tcpip.Subnet, linkAddr tcpip.LinkAddress) tcpip.AddressWithPrefix {
+ if !header.IsValidUnicastEthernetAddress(linkAddr) {
+ return tcpip.AddressWithPrefix{}
+ }
+
+ addrBytes := []byte(subnet.ID())
+ header.EthernetAdddressToModifiedEUI64IntoBuf(linkAddr, addrBytes[header.IIDOffsetInIPv6Address:])
+ return tcpip.AddressWithPrefix{
+ Address: tcpip.Address(addrBytes),
+ PrefixLen: 64,
+ }
+}
+
// prefixSubnetAddr returns a prefix (Address + Length), the prefix's equivalent
// tcpip.Subnet, and an address where the lower half of the address is composed
// of the EUI-64 of linkAddr if it is a valid unicast ethernet address.
@@ -59,17 +72,7 @@ func prefixSubnetAddr(offset uint8, linkAddr tcpip.LinkAddress) (tcpip.AddressWi
subnet := prefix.Subnet()
- var addr tcpip.AddressWithPrefix
- if header.IsValidUnicastEthernetAddress(linkAddr) {
- addrBytes := []byte(subnet.ID())
- header.EthernetAdddressToModifiedEUI64IntoBuf(linkAddr, addrBytes[header.IIDOffsetInIPv6Address:])
- addr = tcpip.AddressWithPrefix{
- Address: tcpip.Address(addrBytes),
- PrefixLen: 64,
- }
- }
-
- return prefix, subnet, addr
+ return prefix, subnet, addrForSubnet(subnet, linkAddr)
}
// TestDADDisabled tests that an address successfully resolves immediately
@@ -1772,7 +1775,7 @@ func TestAutoGenAddrValidLifetimeUpdates(t *testing.T) {
// test.evl.
select {
case <-ndpDisp.autoGenAddrC:
- t.Fatalf("unexpectedly received an auto gen addr event")
+ t.Fatal("unexpectedly received an auto gen addr event")
case <-time.After(time.Duration(test.evl)*time.Second - delta):
}
@@ -1846,7 +1849,7 @@ func TestAutoGenAddrRemoval(t *testing.T) {
// got stopped/cleaned up.
select {
case <-ndpDisp.autoGenAddrC:
- t.Fatalf("unexpectedly received an auto gen addr event")
+ t.Fatal("unexpectedly received an auto gen addr event")
case <-time.After(lifetimeSeconds*time.Second + defaultTimeout):
}
}
@@ -2055,3 +2058,257 @@ func TestNDPRecursiveDNSServerDispatch(t *testing.T) {
})
}
}
+
+// TestCleanupHostOnlyStateOnBecomingRouter tests that all discovered routers
+// and prefixes, and auto-generated addresses get invalidated when a NIC
+// becomes a router.
+func TestCleanupHostOnlyStateOnBecomingRouter(t *testing.T) {
+ t.Parallel()
+
+ const (
+ lifetimeSeconds = 5
+ maxEvents = 4
+ nicID1 = 1
+ nicID2 = 2
+ )
+
+ prefix1, subnet1, e1Addr1 := prefixSubnetAddr(0, linkAddr1)
+ prefix2, subnet2, e1Addr2 := prefixSubnetAddr(1, linkAddr1)
+ e2Addr1 := addrForSubnet(subnet1, linkAddr2)
+ e2Addr2 := addrForSubnet(subnet2, linkAddr2)
+
+ ndpDisp := ndpDispatcher{
+ routerC: make(chan ndpRouterEvent, maxEvents),
+ rememberRouter: true,
+ prefixC: make(chan ndpPrefixEvent, maxEvents),
+ rememberPrefix: true,
+ autoGenAddrC: make(chan ndpAutoGenAddrEvent, maxEvents),
+ }
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NDPConfigs: stack.NDPConfigurations{
+ HandleRAs: true,
+ DiscoverDefaultRouters: true,
+ DiscoverOnLinkPrefixes: true,
+ AutoGenGlobalAddresses: true,
+ },
+ NDPDisp: &ndpDisp,
+ })
+
+ e1 := channel.New(0, 1280, linkAddr1)
+ if err := s.CreateNIC(nicID1, e1); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID1, err)
+ }
+
+ e2 := channel.New(0, 1280, linkAddr2)
+ if err := s.CreateNIC(nicID2, e2); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID2, err)
+ }
+
+ expectRouterEvent := func() (bool, ndpRouterEvent) {
+ select {
+ case e := <-ndpDisp.routerC:
+ return true, e
+ default:
+ }
+
+ return false, ndpRouterEvent{}
+ }
+
+ expectPrefixEvent := func() (bool, ndpPrefixEvent) {
+ select {
+ case e := <-ndpDisp.prefixC:
+ return true, e
+ default:
+ }
+
+ return false, ndpPrefixEvent{}
+ }
+
+ expectAutoGenAddrEvent := func() (bool, ndpAutoGenAddrEvent) {
+ select {
+ case e := <-ndpDisp.autoGenAddrC:
+ return true, e
+ default:
+ }
+
+ return false, ndpAutoGenAddrEvent{}
+ }
+
+ // Receive RAs on NIC(1) and NIC(2) from default routers (llAddr1 and
+ // llAddr2) w/ PI (for prefix1 in RA from llAddr1 and prefix2 in RA from
+ // llAddr2) to discover multiple routers and prefixes, and auto-gen
+ // multiple addresses.
+
+ e1.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr1, lifetimeSeconds, prefix1, true, true, lifetimeSeconds, lifetimeSeconds))
+ // We have other tests that make sure we receive the *correct* events
+ // on normal discovery of routers/prefixes, and auto-generated
+ // addresses. Here we just make sure we get an event and let other tests
+ // handle the correctness check.
+ if ok, _ := expectRouterEvent(); !ok {
+ t.Errorf("expected router event for %s on NIC(%d)", llAddr1, nicID1)
+ }
+ if ok, _ := expectPrefixEvent(); !ok {
+ t.Errorf("expected prefix event for %s on NIC(%d)", prefix1, nicID1)
+ }
+ if ok, _ := expectAutoGenAddrEvent(); !ok {
+ t.Errorf("expected auto-gen addr event for %s on NIC(%d)", e1Addr1, nicID1)
+ }
+
+ e1.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, lifetimeSeconds, prefix2, true, true, lifetimeSeconds, lifetimeSeconds))
+ if ok, _ := expectRouterEvent(); !ok {
+ t.Errorf("expected router event for %s on NIC(%d)", llAddr2, nicID1)
+ }
+ if ok, _ := expectPrefixEvent(); !ok {
+ t.Errorf("expected prefix event for %s on NIC(%d)", prefix2, nicID1)
+ }
+ if ok, _ := expectAutoGenAddrEvent(); !ok {
+ t.Errorf("expected auto-gen addr event for %s on NIC(%d)", e1Addr2, nicID1)
+ }
+
+ e2.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr1, lifetimeSeconds, prefix1, true, true, lifetimeSeconds, lifetimeSeconds))
+ if ok, _ := expectRouterEvent(); !ok {
+ t.Errorf("expected router event for %s on NIC(%d)", llAddr1, nicID2)
+ }
+ if ok, _ := expectPrefixEvent(); !ok {
+ t.Errorf("expected prefix event for %s on NIC(%d)", prefix1, nicID2)
+ }
+ if ok, _ := expectAutoGenAddrEvent(); !ok {
+ t.Errorf("expected auto-gen addr event for %s on NIC(%d)", e1Addr2, nicID2)
+ }
+
+ e2.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, lifetimeSeconds, prefix2, true, true, lifetimeSeconds, lifetimeSeconds))
+ if ok, _ := expectRouterEvent(); !ok {
+ t.Errorf("expected router event for %s on NIC(%d)", llAddr2, nicID2)
+ }
+ if ok, _ := expectPrefixEvent(); !ok {
+ t.Errorf("expected prefix event for %s on NIC(%d)", prefix2, nicID2)
+ }
+ if ok, _ := expectAutoGenAddrEvent(); !ok {
+ t.Errorf("expected auto-gen addr event for %s on NIC(%d)", e2Addr2, nicID2)
+ }
+
+ // We should have the auto-generated addresses added.
+ nicinfo := s.NICInfo()
+ nic1Addrs := nicinfo[nicID1].ProtocolAddresses
+ nic2Addrs := nicinfo[nicID2].ProtocolAddresses
+ if !contains(nic1Addrs, e1Addr1) {
+ t.Errorf("missing %s from the list of addresses for NIC(%d): %+v", e1Addr1, nicID1, nic1Addrs)
+ }
+ if !contains(nic1Addrs, e1Addr2) {
+ t.Errorf("missing %s from the list of addresses for NIC(%d): %+v", e1Addr2, nicID1, nic1Addrs)
+ }
+ if !contains(nic2Addrs, e2Addr1) {
+ t.Errorf("missing %s from the list of addresses for NIC(%d): %+v", e2Addr1, nicID2, nic2Addrs)
+ }
+ if !contains(nic2Addrs, e2Addr2) {
+ t.Errorf("missing %s from the list of addresses for NIC(%d): %+v", e2Addr2, nicID2, nic2Addrs)
+ }
+
+ // We can't proceed any further if we already failed the test (missing
+ // some discovery/auto-generated address events or addresses).
+ if t.Failed() {
+ t.FailNow()
+ }
+
+ s.SetForwarding(true)
+
+ // Collect invalidation events after becoming a router
+ gotRouterEvents := make(map[ndpRouterEvent]int)
+ for i := 0; i < maxEvents; i++ {
+ ok, e := expectRouterEvent()
+ if !ok {
+ t.Errorf("expected %d router events after becoming a router; got = %d", maxEvents, i)
+ break
+ }
+ gotRouterEvents[e]++
+ }
+ gotPrefixEvents := make(map[ndpPrefixEvent]int)
+ for i := 0; i < maxEvents; i++ {
+ ok, e := expectPrefixEvent()
+ if !ok {
+ t.Errorf("expected %d prefix events after becoming a router; got = %d", maxEvents, i)
+ break
+ }
+ gotPrefixEvents[e]++
+ }
+ gotAutoGenAddrEvents := make(map[ndpAutoGenAddrEvent]int)
+ for i := 0; i < maxEvents; i++ {
+ ok, e := expectAutoGenAddrEvent()
+ if !ok {
+ t.Errorf("expected %d auto-generated address events after becoming a router; got = %d", maxEvents, i)
+ break
+ }
+ gotAutoGenAddrEvents[e]++
+ }
+
+ // No need to proceed any further if we already failed the test (missing
+ // some invalidation events).
+ if t.Failed() {
+ t.FailNow()
+ }
+
+ expectedRouterEvents := map[ndpRouterEvent]int{
+ {nicID: nicID1, addr: llAddr1, discovered: false}: 1,
+ {nicID: nicID1, addr: llAddr2, discovered: false}: 1,
+ {nicID: nicID2, addr: llAddr1, discovered: false}: 1,
+ {nicID: nicID2, addr: llAddr2, discovered: false}: 1,
+ }
+ if diff := cmp.Diff(expectedRouterEvents, gotRouterEvents); diff != "" {
+ t.Errorf("router events mismatch (-want +got):\n%s", diff)
+ }
+ expectedPrefixEvents := map[ndpPrefixEvent]int{
+ {nicID: nicID1, prefix: subnet1, discovered: false}: 1,
+ {nicID: nicID1, prefix: subnet2, discovered: false}: 1,
+ {nicID: nicID2, prefix: subnet1, discovered: false}: 1,
+ {nicID: nicID2, prefix: subnet2, discovered: false}: 1,
+ }
+ if diff := cmp.Diff(expectedPrefixEvents, gotPrefixEvents); diff != "" {
+ t.Errorf("prefix events mismatch (-want +got):\n%s", diff)
+ }
+ expectedAutoGenAddrEvents := map[ndpAutoGenAddrEvent]int{
+ {nicID: nicID1, addr: e1Addr1, eventType: invalidatedAddr}: 1,
+ {nicID: nicID1, addr: e1Addr2, eventType: invalidatedAddr}: 1,
+ {nicID: nicID2, addr: e2Addr1, eventType: invalidatedAddr}: 1,
+ {nicID: nicID2, addr: e2Addr2, eventType: invalidatedAddr}: 1,
+ }
+ if diff := cmp.Diff(expectedAutoGenAddrEvents, gotAutoGenAddrEvents); diff != "" {
+ t.Errorf("auto-generated address events mismatch (-want +got):\n%s", diff)
+ }
+
+ // Make sure the auto-generated addresses got removed.
+ nicinfo = s.NICInfo()
+ nic1Addrs = nicinfo[nicID1].ProtocolAddresses
+ nic2Addrs = nicinfo[nicID2].ProtocolAddresses
+ if contains(nic1Addrs, e1Addr1) {
+ t.Errorf("still have %s in the list of addresses for NIC(%d): %+v", e1Addr1, nicID1, nic1Addrs)
+ }
+ if contains(nic1Addrs, e1Addr2) {
+ t.Errorf("still have %s in the list of addresses for NIC(%d): %+v", e1Addr2, nicID1, nic1Addrs)
+ }
+ if contains(nic2Addrs, e2Addr1) {
+ t.Errorf("still have %s in the list of addresses for NIC(%d): %+v", e2Addr1, nicID2, nic2Addrs)
+ }
+ if contains(nic2Addrs, e2Addr2) {
+ t.Errorf("still have %s in the list of addresses for NIC(%d): %+v", e2Addr2, nicID2, nic2Addrs)
+ }
+
+ // Should not get any more events (invalidation timers should have been
+ // cancelled when we transitioned into a router).
+ time.Sleep(lifetimeSeconds*time.Second + defaultTimeout)
+ select {
+ case <-ndpDisp.routerC:
+ t.Error("unexpected router event")
+ default:
+ }
+ select {
+ case <-ndpDisp.prefixC:
+ t.Error("unexpected prefix event")
+ default:
+ }
+ select {
+ case <-ndpDisp.autoGenAddrC:
+ t.Error("unexpected auto-generated address event")
+ default:
+ }
+}
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index e8401c673..ddd014658 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -203,6 +203,18 @@ func (n *NIC) enable() *tcpip.Error {
return err
}
+// becomeIPv6Router transitions n into an IPv6 router.
+//
+// When transitioning into an IPv6 router, host-only state (NDP discovered
+// routers, discovered on-link prefixes, and auto-generated addresses) will
+// be cleaned up/invalidated.
+func (n *NIC) becomeIPv6Router() {
+ n.mu.Lock()
+ defer n.mu.Unlock()
+
+ n.ndp.cleanupHostOnlyState()
+}
+
// attachLinkEndpoint attaches the NIC to the endpoint, which will enable it
// to start delivering packets.
func (n *NIC) attachLinkEndpoint() {
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 0e88643a4..7a9600679 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -662,11 +662,31 @@ func (s *Stack) Stats() tcpip.Stats {
}
// SetForwarding enables or disables the packet forwarding between NICs.
+//
+// When forwarding becomes enabled, any host-only state on all NICs will be
+// cleaned up.
func (s *Stack) SetForwarding(enable bool) {
// TODO(igudger, bgeffon): Expose via /proc/sys/net/ipv4/ip_forward.
s.mu.Lock()
+ defer s.mu.Unlock()
+
+ // If forwarding status didn't change, do nothing further.
+ if s.forwarding == enable {
+ return
+ }
+
s.forwarding = enable
- s.mu.Unlock()
+
+ // If this stack does not support IPv6, do nothing further.
+ if _, ok := s.networkProtocols[header.IPv6ProtocolNumber]; !ok {
+ return
+ }
+
+ if enable {
+ for _, nic := range s.nics {
+ nic.becomeIPv6Router()
+ }
+ }
}
// Forwarding returns if the packet forwarding between NICs is enabled.