diff options
author | Ayush Ranjan <ayushranjan@google.com> | 2021-09-21 14:07:09 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-09-21 14:09:18 -0700 |
commit | 6fccc185609e37b0e3346f8df91bdcb37bc990db (patch) | |
tree | 073420614d4dfbc5c13617633d99b38c050e0a86 /runsc | |
parent | e819029f3ad059bfc1635b7f2a196c332fa7532f (diff) |
[lisa] Implement lisafs protocol methods in VFS2 gofer client and fsgofer.
Introduces RPC methods in lisafs. Makes that gofer client use lisafs RPCs
instead of p9 when lisafs is enabled.
Implements the handlers for those methods in fsgofer.
Fixes #5465
PiperOrigin-RevId: 398080310
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/fsgofer/BUILD | 14 | ||||
-rw-r--r-- | runsc/fsgofer/fsgofer_test.go | 50 | ||||
-rw-r--r-- | runsc/fsgofer/fsgofer_unsafe.go | 30 | ||||
-rw-r--r-- | runsc/fsgofer/lisafs.go | 1034 | ||||
-rw-r--r-- | runsc/fsgofer/lisafs_test.go | 56 |
5 files changed, 1158 insertions, 26 deletions
diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD index c6967cc57..8d5a6d300 100644 --- a/runsc/fsgofer/BUILD +++ b/runsc/fsgofer/BUILD @@ -13,10 +13,12 @@ go_library( ], visibility = ["//runsc:__subpackages__"], deps = [ + "//pkg/abi/linux", "//pkg/cleanup", "//pkg/fd", "//pkg/lisafs", "//pkg/log", + "//pkg/marshal/primitive", "//pkg/p9", "//pkg/sync", "//pkg/syserr", @@ -39,3 +41,15 @@ go_test( "@org_golang_x_sys//unix:go_default_library", ], ) + +go_test( + name = "lisafs_test", + size = "small", + srcs = ["lisafs_test.go"], + deps = [ + ":fsgofer", + "//pkg/lisafs", + "//pkg/lisafs/testsuite", + "//pkg/log", + ], +) diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go index ee6cc97df..6cdd6d695 100644 --- a/runsc/fsgofer/fsgofer_test.go +++ b/runsc/fsgofer/fsgofer_test.go @@ -105,14 +105,14 @@ func testReadWrite(f p9.File, flags p9.OpenFlags, content []byte) error { return nil } -type state struct { +type fileState struct { root *localFile file *localFile conf Config fileType uint32 } -func (s state) String() string { +func (s fileState) String() string { return fmt.Sprintf("type(%v)", s.fileType) } @@ -129,11 +129,11 @@ func typeName(fileType uint32) string { } } -func runAll(t *testing.T, test func(*testing.T, state)) { +func runAll(t *testing.T, test func(*testing.T, fileState)) { runCustom(t, allTypes, allConfs, test) } -func runCustom(t *testing.T, types []uint32, confs []Config, test func(*testing.T, state)) { +func runCustom(t *testing.T, types []uint32, confs []Config, test func(*testing.T, fileState)) { for _, c := range confs { for _, ft := range types { name := fmt.Sprintf("%s/%s", configTestName(&c), typeName(ft)) @@ -159,7 +159,7 @@ func runCustom(t *testing.T, types []uint32, confs []Config, test func(*testing. t.Fatalf("root.Walk({%q}) failed, err: %v", "symlink", err) } - st := state{ + st := fileState{ root: root.(*localFile), file: file.(*localFile), conf: c, @@ -227,7 +227,7 @@ func createFile(dir *localFile, name string) (*localFile, error) { } func TestReadWrite(t *testing.T) { - runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) { + runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s fileState) { child, err := createFile(s.file, "test") if err != nil { t.Fatalf("%v: createFile() failed, err: %v", s, err) @@ -261,7 +261,7 @@ func TestReadWrite(t *testing.T) { } func TestCreate(t *testing.T) { - runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) { + runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s fileState) { for i, flags := range allOpenFlags { _, l, _, _, err := s.file.Create(fmt.Sprintf("test-%d", i), flags, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())) if err != nil { @@ -296,7 +296,7 @@ func TestCreateSetGID(t *testing.T) { t.Skipf("Test requires CAP_CHOWN") } - runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) { + runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s fileState) { // Change group and set setgid to the parent dir. if err := unix.Chown(s.file.hostPath, os.Getuid(), nobody); err != nil { t.Fatalf("Chown() failed: %v", err) @@ -364,7 +364,7 @@ func TestCreateSetGID(t *testing.T) { // TestReadWriteDup tests that a file opened in any mode can be dup'ed and // reopened in any other mode. func TestReadWriteDup(t *testing.T) { - runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) { + runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s fileState) { child, err := createFile(s.file, "test") if err != nil { t.Fatalf("%v: createFile() failed, err: %v", s, err) @@ -410,7 +410,7 @@ func TestReadWriteDup(t *testing.T) { } func TestUnopened(t *testing.T) { - runCustom(t, []uint32{unix.S_IFREG}, allConfs, func(t *testing.T, s state) { + runCustom(t, []uint32{unix.S_IFREG}, allConfs, func(t *testing.T, s fileState) { b := []byte("foobar") if _, err := s.file.WriteAt(b, 0); err != unix.EBADF { t.Errorf("%v: WriteAt() should have failed, got: %v, expected: unix.EBADF", s, err) @@ -432,7 +432,7 @@ func TestUnopened(t *testing.T) { // was open with O_PATH, but Open() was not checking for it and allowing the // control file to be reused. func TestOpenOPath(t *testing.T) { - runCustom(t, []uint32{unix.S_IFREG}, rwConfs, func(t *testing.T, s state) { + runCustom(t, []uint32{unix.S_IFREG}, rwConfs, func(t *testing.T, s fileState) { // Fist remove all permissions on the file. if err := s.file.SetAttr(p9.SetAttrMask{Permissions: true}, p9.SetAttr{Permissions: p9.FileMode(0)}); err != nil { t.Fatalf("SetAttr(): %v", err) @@ -465,7 +465,7 @@ func SetGetAttr(l *localFile, valid p9.SetAttrMask, attr p9.SetAttr) (p9.Attr, e } func TestSetAttrPerm(t *testing.T) { - runCustom(t, allTypes, rwConfs, func(t *testing.T, s state) { + runCustom(t, allTypes, rwConfs, func(t *testing.T, s fileState) { valid := p9.SetAttrMask{Permissions: true} attr := p9.SetAttr{Permissions: 0777} got, err := SetGetAttr(s.file, valid, attr) @@ -485,7 +485,7 @@ func TestSetAttrPerm(t *testing.T) { } func TestSetAttrSize(t *testing.T) { - runCustom(t, allTypes, rwConfs, func(t *testing.T, s state) { + runCustom(t, allTypes, rwConfs, func(t *testing.T, s fileState) { for _, size := range []uint64{1024, 0, 1024 * 1024} { valid := p9.SetAttrMask{Size: true} attr := p9.SetAttr{Size: size} @@ -508,7 +508,7 @@ func TestSetAttrSize(t *testing.T) { } func TestSetAttrTime(t *testing.T) { - runCustom(t, allTypes, rwConfs, func(t *testing.T, s state) { + runCustom(t, allTypes, rwConfs, func(t *testing.T, s fileState) { valid := p9.SetAttrMask{ATime: true, ATimeNotSystemTime: true} attr := p9.SetAttr{ATimeSeconds: 123, ATimeNanoSeconds: 456} got, err := SetGetAttr(s.file, valid, attr) @@ -542,7 +542,7 @@ func TestSetAttrOwner(t *testing.T) { t.Skipf("SetAttr(owner) test requires CAP_CHOWN, running as %d", os.Getuid()) } - runCustom(t, allTypes, rwConfs, func(t *testing.T, s state) { + runCustom(t, allTypes, rwConfs, func(t *testing.T, s fileState) { newUID := os.Getuid() + 1 valid := p9.SetAttrMask{UID: true} attr := p9.SetAttr{UID: p9.UID(newUID)} @@ -571,7 +571,7 @@ func SetGetXattr(l *localFile, name string, value string) error { } func TestSetGetDisabledXattr(t *testing.T) { - runCustom(t, []uint32{unix.S_IFREG}, rwConfs, func(t *testing.T, s state) { + runCustom(t, []uint32{unix.S_IFREG}, rwConfs, func(t *testing.T, s fileState) { name := "user.merkle.offset" value := "tmp" err := SetGetXattr(s.file, name, value) @@ -582,7 +582,7 @@ func TestSetGetDisabledXattr(t *testing.T) { } func TestSetGetXattr(t *testing.T) { - runCustom(t, []uint32{unix.S_IFREG}, []Config{{ROMount: false, EnableVerityXattr: true}}, func(t *testing.T, s state) { + runCustom(t, []uint32{unix.S_IFREG}, []Config{{ROMount: false, EnableVerityXattr: true}}, func(t *testing.T, s fileState) { name := "user.merkle.offset" value := "tmp" err := SetGetXattr(s.file, name, value) @@ -596,7 +596,7 @@ func TestLink(t *testing.T) { if !specutils.HasCapabilities(capability.CAP_DAC_READ_SEARCH) { t.Skipf("Link test requires CAP_DAC_READ_SEARCH, running as %d", os.Getuid()) } - runCustom(t, allTypes, rwConfs, func(t *testing.T, s state) { + runCustom(t, allTypes, rwConfs, func(t *testing.T, s fileState) { const dirName = "linkdir" const linkFile = "link" if _, err := s.root.Mkdir(dirName, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != nil { @@ -625,7 +625,7 @@ func TestROMountChecks(t *testing.T) { uid := p9.UID(os.Getuid()) gid := p9.GID(os.Getgid()) - runCustom(t, allTypes, roConfs, func(t *testing.T, s state) { + runCustom(t, allTypes, roConfs, func(t *testing.T, s fileState) { if s.fileType != unix.S_IFLNK { if _, _, _, err := s.file.Open(p9.WriteOnly); err != want { t.Errorf("Open() should have failed, got: %v, expected: %v", err, want) @@ -676,7 +676,7 @@ func TestROMountChecks(t *testing.T) { } func TestWalkNotFound(t *testing.T) { - runCustom(t, []uint32{unix.S_IFDIR}, allConfs, func(t *testing.T, s state) { + runCustom(t, []uint32{unix.S_IFDIR}, allConfs, func(t *testing.T, s fileState) { if _, _, err := s.file.Walk([]string{"nobody-here"}); err != unix.ENOENT { t.Errorf("Walk(%q) should have failed, got: %v, expected: unix.ENOENT", "nobody-here", err) } @@ -695,7 +695,7 @@ func TestWalkNotFound(t *testing.T) { } func TestWalkDup(t *testing.T) { - runAll(t, func(t *testing.T, s state) { + runAll(t, func(t *testing.T, s fileState) { _, dup, err := s.file.Walk([]string{}) if err != nil { t.Fatalf("%v: Walk(nil) failed, err: %v", s, err) @@ -708,7 +708,7 @@ func TestWalkDup(t *testing.T) { } func TestWalkMultiple(t *testing.T) { - runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) { + runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s fileState) { var names []string var parent p9.File = s.file for i := 0; i < 5; i++ { @@ -729,7 +729,7 @@ func TestWalkMultiple(t *testing.T) { } func TestReaddir(t *testing.T) { - runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) { + runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s fileState) { name := "dir" if _, err := s.file.Mkdir(name, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != nil { t.Fatalf("%v: MkDir(%s) failed, err: %v", s, name, err) @@ -915,7 +915,7 @@ func TestDoubleAttachError(t *testing.T) { } func TestTruncate(t *testing.T) { - runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) { + runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s fileState) { child, err := createFile(s.file, "test") if err != nil { t.Fatalf("createFile() failed: %v", err) @@ -951,7 +951,7 @@ func TestTruncate(t *testing.T) { } func TestMknod(t *testing.T) { - runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s state) { + runCustom(t, []uint32{unix.S_IFDIR}, rwConfs, func(t *testing.T, s fileState) { _, err := s.file.Mknod("test", p9.ModeRegular|0777, 1, 2, p9.UID(os.Getuid()), p9.GID(os.Getgid())) if err != nil { t.Fatalf("Mknod() failed: %v", err) diff --git a/runsc/fsgofer/fsgofer_unsafe.go b/runsc/fsgofer/fsgofer_unsafe.go index f11fea40d..fb4fbe0d2 100644 --- a/runsc/fsgofer/fsgofer_unsafe.go +++ b/runsc/fsgofer/fsgofer_unsafe.go @@ -21,6 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/syserr" ) +var unixDirentMaxSize uint32 = uint32(unsafe.Sizeof(unix.Dirent{})) + func utimensat(dirFd int, name string, times [2]unix.Timespec, flags int) error { // utimensat(2) doesn't accept empty name, instead name must be nil to make it // operate directly on 'dirFd' unlike other *at syscalls. @@ -80,3 +82,31 @@ func renameat(oldDirFD int, oldName string, newDirFD int, newName string) error } return nil } + +func parseDirents(buf []byte, handleDirent func(ino uint64, off int64, ftype uint8, name string) bool) { + for len(buf) > 0 { + // Interpret the buf populated by unix.Getdents as unix.Dirent. + dirent := *(*unix.Dirent)(unsafe.Pointer(&buf[0])) + + // Extracting the name is pretty tedious... + var nameBuf [unix.NAME_MAX]byte + var nameLen int + for i := 0; i < len(dirent.Name); i++ { + // The name is null terminated. + if dirent.Name[i] == 0 { + nameLen = i + break + } + nameBuf[i] = byte(dirent.Name[i]) + } + name := string(nameBuf[:nameLen]) + + // Deliver results to caller. + if !handleDirent(dirent.Ino, dirent.Off, dirent.Type, name) { + return + } + + // Advance buf for the next dirent. + buf = buf[dirent.Reclen:] + } +} diff --git a/runsc/fsgofer/lisafs.go b/runsc/fsgofer/lisafs.go index 9d745f461..0db44ff6a 100644 --- a/runsc/fsgofer/lisafs.go +++ b/runsc/fsgofer/lisafs.go @@ -15,7 +15,20 @@ package fsgofer import ( + "io" + "math" + "path" + "strconv" + "sync/atomic" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cleanup" + rwfd "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/lisafs" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/marshal/primitive" + "gvisor.dev/gvisor/pkg/p9" ) // LisafsServer implements lisafs.ServerImpl for fsgofer. @@ -35,7 +48,19 @@ func NewLisafsServer(config Config) *LisafsServer { // Mount implements lisafs.ServerImpl.Mount. func (s *LisafsServer) Mount(c *lisafs.Connection, mountPath string) (lisafs.ControlFDImpl, lisafs.Inode, error) { - panic("unimplemented") + s.RenameMu.RLock() + defer s.RenameMu.RUnlock() + + rootFD, rootStat, err := tryStepLocked(c, mountPath, nil, func(flags int) (int, error) { + return unix.Open(mountPath, flags, 0) + }) + if err != nil { + return nil, lisafs.Inode{}, err + } + + var rootIno lisafs.Inode + rootFD.initInodeWithStat(&rootIno, &rootStat) + return rootFD, rootIno, nil } // MaxMessageSize implements lisafs.ServerImpl.MaxMessageSize. @@ -45,8 +70,1015 @@ func (s *LisafsServer) MaxMessageSize() uint32 { // SupportedMessages implements lisafs.ServerImpl.SupportedMessages. func (s *LisafsServer) SupportedMessages() []lisafs.MID { + // Note that Flush, FListXattr and FRemoveXattr are not supported. return []lisafs.MID{ lisafs.Mount, lisafs.Channel, + lisafs.FStat, + lisafs.SetStat, + lisafs.Walk, + lisafs.WalkStat, + lisafs.OpenAt, + lisafs.OpenCreateAt, + lisafs.Close, + lisafs.FSync, + lisafs.PWrite, + lisafs.PRead, + lisafs.MkdirAt, + lisafs.MknodAt, + lisafs.SymlinkAt, + lisafs.LinkAt, + lisafs.FStatFS, + lisafs.FAllocate, + lisafs.ReadLinkAt, + lisafs.Connect, + lisafs.UnlinkAt, + lisafs.RenameAt, + lisafs.Getdents64, + lisafs.FGetXattr, + lisafs.FSetXattr, + } +} + +// controlFDLisa implements lisafs.ControlFDImpl. +type controlFDLisa struct { + lisafs.ControlFD + + // hostFD is the file descriptor which can be used to make host syscalls. + hostFD int + + // writableHostFD is the file descriptor number for a writable FD opened on the + // same FD as `hostFD`. writableHostFD must only be accessed using atomic + // operations. It is initialized to -1, and can change in value exactly once. + writableHostFD int32 +} + +var _ lisafs.ControlFDImpl = (*controlFDLisa)(nil) + +// Precondition: server's rename mutex must be at least read locked. +func newControlFDLisaLocked(c *lisafs.Connection, hostFD int, parent *controlFDLisa, name string, mode linux.FileMode) *controlFDLisa { + fd := &controlFDLisa{ + hostFD: hostFD, + writableHostFD: -1, + } + fd.ControlFD.Init(c, parent.FD(), name, mode, fd) + return fd +} + +func (fd *controlFDLisa) initInode(inode *lisafs.Inode) error { + inode.ControlFD = fd.ID() + return fstatTo(fd.hostFD, &inode.Stat) +} + +func (fd *controlFDLisa) initInodeWithStat(inode *lisafs.Inode, unixStat *unix.Stat_t) { + inode.ControlFD = fd.ID() + unixToLinuxStat(unixStat, &inode.Stat) +} + +func (fd *controlFDLisa) getWritableFD() (int, error) { + if writableFD := atomic.LoadInt32(&fd.writableHostFD); writableFD != -1 { + return int(writableFD), nil + } + + writableFD, err := unix.Openat(int(procSelfFD.FD()), strconv.Itoa(fd.hostFD), (unix.O_WRONLY|openFlags)&^unix.O_NOFOLLOW, 0) + if err != nil { + return -1, err + } + if !atomic.CompareAndSwapInt32(&fd.writableHostFD, -1, int32(writableFD)) { + // Race detected, use the new value and clean this up. + unix.Close(writableFD) + return int(atomic.LoadInt32(&fd.writableHostFD)), nil + } + return writableFD, nil +} + +// FD implements lisafs.ControlFDImpl.FD. +func (fd *controlFDLisa) FD() *lisafs.ControlFD { + if fd == nil { + return nil + } + return &fd.ControlFD +} + +// Close implements lisafs.ControlFDImpl.Close. +func (fd *controlFDLisa) Close(c *lisafs.Connection) { + if fd.hostFD >= 0 { + _ = unix.Close(fd.hostFD) + fd.hostFD = -1 + } + // No concurrent access is possible so no need to use atomics. + if fd.writableHostFD >= 0 { + _ = unix.Close(int(fd.writableHostFD)) + fd.writableHostFD = -1 + } +} + +// Stat implements lisafs.ControlFDImpl.Stat. +func (fd *controlFDLisa) Stat(c *lisafs.Connection, comm lisafs.Communicator) (uint32, error) { + var resp linux.Statx + if err := fstatTo(fd.hostFD, &resp); err != nil { + return 0, err + } + + respLen := uint32(resp.SizeBytes()) + resp.MarshalUnsafe(comm.PayloadBuf(respLen)) + return respLen, nil +} + +// SetStat implements lisafs.ControlFDImpl.SetStat. +func (fd *controlFDLisa) SetStat(c *lisafs.Connection, comm lisafs.Communicator, stat lisafs.SetStatReq) (uint32, error) { + var resp lisafs.SetStatResp + if stat.Mask&unix.STATX_MODE != 0 { + if err := unix.Fchmod(fd.hostFD, stat.Mode&^unix.S_IFMT); err != nil { + log.Debugf("SetStat fchmod failed %q, err: %v", fd.FilePath(), err) + resp.FailureMask |= unix.STATX_MODE + resp.FailureErrNo = uint32(p9.ExtractErrno(err)) + } + } + + if stat.Mask&unix.STATX_SIZE != 0 { + // ftruncate(2) requires the FD to be open for writing. + writableFD, err := fd.getWritableFD() + if err == nil { + err = unix.Ftruncate(writableFD, int64(stat.Size)) + } + if err != nil { + log.Debugf("SetStat ftruncate failed %q, err: %v", fd.FilePath(), err) + resp.FailureMask |= unix.STATX_SIZE + resp.FailureErrNo = uint32(p9.ExtractErrno(err)) + } + } + + if stat.Mask&(unix.STATX_ATIME|unix.STATX_MTIME) != 0 { + utimes := [2]unix.Timespec{ + {Sec: 0, Nsec: unix.UTIME_OMIT}, + {Sec: 0, Nsec: unix.UTIME_OMIT}, + } + if stat.Mask&unix.STATX_ATIME != 0 { + utimes[0].Sec = stat.Atime.Sec + utimes[0].Nsec = stat.Atime.Nsec + } + if stat.Mask&unix.STATX_MTIME != 0 { + utimes[1].Sec = stat.Mtime.Sec + utimes[1].Nsec = stat.Mtime.Nsec + } + + if fd.IsSymlink() { + // utimensat operates different that other syscalls. To operate on a + // symlink it *requires* AT_SYMLINK_NOFOLLOW with dirFD and a non-empty + // name. + c.Server().WithRenameReadLock(func() error { + if err := utimensat(fd.ParentLocked().(*controlFDLisa).hostFD, fd.NameLocked(), utimes, unix.AT_SYMLINK_NOFOLLOW); err != nil { + log.Debugf("SetStat utimens failed %q, err: %v", fd.FilePathLocked(), err) + resp.FailureMask |= (stat.Mask & (unix.STATX_ATIME | unix.STATX_MTIME)) + resp.FailureErrNo = uint32(p9.ExtractErrno(err)) + } + return nil + }) + } else { + hostFD := fd.hostFD + if fd.IsRegular() { + // For regular files, utimensat(2) requires the FD to be open for + // writing, see BUGS section. + writableFD, err := fd.getWritableFD() + if err != nil { + return 0, err + } + hostFD = writableFD + } + // Directories and regular files can operate directly on the fd + // using empty name. + err := utimensat(hostFD, "", utimes, 0) + if err != nil { + log.Debugf("SetStat utimens failed %q, err: %v", fd.FilePath(), err) + resp.FailureMask |= (stat.Mask & (unix.STATX_ATIME | unix.STATX_MTIME)) + resp.FailureErrNo = uint32(p9.ExtractErrno(err)) + } + } + } + + if stat.Mask&(unix.STATX_UID|unix.STATX_GID) != 0 { + // "If the owner or group is specified as -1, then that ID is not changed" + // - chown(2) + uid := -1 + if stat.Mask&unix.STATX_UID != 0 { + uid = int(stat.UID) + } + gid := -1 + if stat.Mask&unix.STATX_GID != 0 { + gid = int(stat.GID) + } + if err := unix.Fchownat(fd.hostFD, "", uid, gid, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { + log.Debugf("SetStat fchown failed %q, err: %v", fd.FilePath(), err) + resp.FailureMask |= stat.Mask & (unix.STATX_UID | unix.STATX_GID) + resp.FailureErrNo = uint32(p9.ExtractErrno(err)) + } + } + + respLen := uint32(resp.SizeBytes()) + resp.MarshalUnsafe(comm.PayloadBuf(respLen)) + return respLen, nil +} + +// Walk implements lisafs.ControlFDImpl.Walk. +func (fd *controlFDLisa) Walk(c *lisafs.Connection, comm lisafs.Communicator, path lisafs.StringArray) (uint32, error) { + // We need to generate inodes for each component walked. We will manually + // marshal the inodes into the payload buffer as they are generated to avoid + // the slice allocation. The memory format should be lisafs.WalkResp's. + var numInodes primitive.Uint32 + var status lisafs.WalkStatus + maxPayloadSize := status.SizeBytes() + numInodes.SizeBytes() + (len(path) * (*lisafs.Inode)(nil).SizeBytes()) + if maxPayloadSize > math.MaxUint32 { + // Too much to walk, can't do. + return 0, unix.EIO + } + payloadBuf := comm.PayloadBuf(uint32(maxPayloadSize)) + payloadPos := status.SizeBytes() + numInodes.SizeBytes() + + s := c.Server() + s.RenameMu.RLock() + defer s.RenameMu.RUnlock() + + curDirFD := fd + cu := cleanup.Make(func() { + // Destroy all newly created FDs until now. Walk upward from curDirFD to + // fd. Do not destroy fd as the client still owns that. + for curDirFD != fd { + c.RemoveControlFDLocked(curDirFD.ID()) + curDirFD = curDirFD.ParentLocked().(*controlFDLisa) + } + }) + defer cu.Clean() + + for _, name := range path { + // Symlinks terminate walk. This client gets the symlink inode, but will + // have to invoke Walk again with the resolved path. + if curDirFD.IsSymlink() { + status = lisafs.WalkComponentSymlink + break + } + + child, childStat, err := tryStepLocked(c, name, curDirFD, func(flags int) (int, error) { + return unix.Openat(curDirFD.hostFD, name, flags, 0) + }) + if err == unix.ENOENT { + status = lisafs.WalkComponentDoesNotExist + break + } + if err != nil { + return 0, err + } + + // Write inode to payloadBuf and update state. + var childInode lisafs.Inode + child.initInodeWithStat(&childInode, &childStat) + childInode.MarshalUnsafe(payloadBuf[payloadPos:]) + payloadPos += childInode.SizeBytes() + numInodes++ + curDirFD = child + } + cu.Release() + + // lisafs.WalkResp writes the walk status followed by the number of inodes in + // the beginning. + status.MarshalUnsafe(payloadBuf) + numInodes.MarshalUnsafe(payloadBuf[status.SizeBytes():]) + return uint32(payloadPos), nil +} + +// WalkStat implements lisafs.ControlFDImpl.WalkStat. +func (fd *controlFDLisa) WalkStat(c *lisafs.Connection, comm lisafs.Communicator, path lisafs.StringArray) (uint32, error) { + // We may need to generate statx for dirFD + each component walked. We will + // manually marshal the statx results into the payload buffer as they are + // generated to avoid the slice allocation. The memory format should be the + // same as lisafs.WalkStatResp's. + var numStats primitive.Uint32 + maxPayloadSize := numStats.SizeBytes() + (len(path) * linux.SizeOfStatx) + if maxPayloadSize > math.MaxUint32 { + // Too much to walk, can't do. + return 0, unix.EIO + } + payloadBuf := comm.PayloadBuf(uint32(maxPayloadSize)) + payloadPos := numStats.SizeBytes() + + s := c.Server() + s.RenameMu.RLock() + defer s.RenameMu.RUnlock() + + curDirFD := fd.hostFD + closeCurDirFD := func() { + if curDirFD != fd.hostFD { + unix.Close(curDirFD) + } + } + defer closeCurDirFD() + var ( + stat linux.Statx + unixStat unix.Stat_t + ) + if len(path) > 0 && len(path[0]) == 0 { + // Write stat results for dirFD if the first path component is "". + if err := unix.Fstat(fd.hostFD, &unixStat); err != nil { + return 0, err + } + unixToLinuxStat(&unixStat, &stat) + stat.MarshalUnsafe(payloadBuf[payloadPos:]) + payloadPos += stat.SizeBytes() + path = path[1:] + numStats++ + } + + // Don't attempt walking if parent is a symlink. + if fd.IsSymlink() { + return 0, nil + } + for _, name := range path { + curFD, err := unix.Openat(curDirFD, name, unix.O_PATH|openFlags, 0) + if err == unix.ENOENT { + // No more path components exist on the filesystem. Return the partial + // walk to the client. + break + } + if err != nil { + return 0, err + } + closeCurDirFD() + curDirFD = curFD + + // Write stat results for curFD. + if err := unix.Fstat(curFD, &unixStat); err != nil { + return 0, err + } + unixToLinuxStat(&unixStat, &stat) + stat.MarshalUnsafe(payloadBuf[payloadPos:]) + payloadPos += stat.SizeBytes() + numStats++ + + // Symlinks terminate walk. This client gets the symlink stat result, but + // will have to invoke Walk again with the resolved path. + if unixStat.Mode&unix.S_IFMT == unix.S_IFLNK { + break + } + } + + // lisafs.WalkStatResp writes the number of stats in the beginning. + numStats.MarshalUnsafe(payloadBuf) + return uint32(payloadPos), nil +} + +// Open implements lisafs.ControlFDImpl.Open. +func (fd *controlFDLisa) Open(c *lisafs.Connection, comm lisafs.Communicator, flags uint32) (uint32, error) { + flags |= openFlags + newHostFD, err := unix.Openat(int(procSelfFD.FD()), strconv.Itoa(fd.hostFD), int(flags)&^unix.O_NOFOLLOW, 0) + if err != nil { + return 0, err + } + newFD := fd.newOpenFDLisa(newHostFD, flags) + + if fd.IsRegular() { + // Donate FD for regular files only. Since FD donation is a destructive + // operation, we should duplicate the to-be-donated FD. Eat the error if + // one occurs, it is better to have an FD without a host FD, than failing + // the Open attempt. + if dupFD, err := unix.Dup(newFD.hostFD); err == nil { + _ = comm.DonateFD(dupFD) + } + } + + resp := lisafs.OpenAtResp{NewFD: newFD.ID()} + respLen := uint32(resp.SizeBytes()) + resp.MarshalUnsafe(comm.PayloadBuf(respLen)) + return respLen, nil +} + +// OpenCreate implements lisafs.ControlFDImpl.OpenCreate. +func (fd *controlFDLisa) OpenCreate(c *lisafs.Connection, comm lisafs.Communicator, mode linux.FileMode, uid lisafs.UID, gid lisafs.GID, name string, flags uint32) (uint32, error) { + // Need to hold rename mutex for reading while performing the walk. Also keep + // holding it while the cleanup is still possible. + var resp lisafs.OpenCreateAtResp + var newFD *openFDLisa + if err := c.Server().WithRenameReadLock(func() error { + createFlags := unix.O_CREAT | unix.O_EXCL | unix.O_RDONLY | unix.O_NONBLOCK | openFlags + childHostFD, err := unix.Openat(fd.hostFD, name, createFlags, uint32(mode&^linux.FileTypeMask)) + if err != nil { + return err + } + + childFD := newControlFDLisaLocked(c, childHostFD, fd, name, linux.ModeRegular) + cu := cleanup.Make(func() { + // Best effort attempt to remove the file in case of failure. + if err := unix.Unlinkat(fd.hostFD, name, 0); err != nil { + log.Warningf("error unlinking file %q after failure: %v", path.Join(fd.FilePathLocked(), name), err) + } + c.RemoveControlFDLocked(childFD.ID()) + }) + defer cu.Clean() + + // Set the owners as requested by the client. + if err := unix.Fchownat(childFD.hostFD, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { + log.Infof("ayush: Fchownat %v", err) + return err + } + + // Do not use the stat result from tryOpen because the owners might have + // changed. initInode() will stat the FD again and use fresh results. + if err := childFD.initInode(&resp.Child); err != nil { + log.Infof("ayush: initInode %v", err) + return err + } + + // Now open an FD to the newly created file with the flags requested by the client. + flags |= openFlags + newHostFD, err := unix.Openat(int(procSelfFD.FD()), strconv.Itoa(childFD.hostFD), int(flags)&^unix.O_NOFOLLOW, 0) + if err != nil { + log.Infof("ayush: Openat %v", err) + return err + } + cu.Release() + + newFD = childFD.newOpenFDLisa(newHostFD, uint32(flags)) + resp.NewFD = newFD.ID() + return nil + }); err != nil { + return 0, err + } + + // Donate FD because open(O_CREAT|O_EXCL) always creates a regular file. + // Since FD donation is a destructive operation, we should duplicate the + // to-be-donated FD. Eat the error if one occurs, it is better to have an FD + // without a host FD, than failing the Open attempt. + if dupFD, err := unix.Dup(newFD.hostFD); err == nil { + _ = comm.DonateFD(dupFD) + } + + respLen := uint32(resp.SizeBytes()) + resp.MarshalUnsafe(comm.PayloadBuf(respLen)) + return respLen, nil +} + +// Mkdir implements lisafs.ControlFDImpl.Mkdir. +func (fd *controlFDLisa) Mkdir(c *lisafs.Connection, comm lisafs.Communicator, mode linux.FileMode, uid lisafs.UID, gid lisafs.GID, name string) (uint32, error) { + var resp lisafs.MkdirAtResp + if err := c.Server().WithRenameReadLock(func() error { + if err := unix.Mkdirat(fd.hostFD, name, uint32(mode&^linux.FileTypeMask)); err != nil { + return err + } + cu := cleanup.Make(func() { + // Best effort attempt to remove the dir in case of failure. + if err := unix.Unlinkat(fd.hostFD, name, unix.AT_REMOVEDIR); err != nil { + log.Warningf("error unlinking dir %q after failure: %v", path.Join(fd.FilePathLocked(), name), err) + } + }) + defer cu.Clean() + + // Open directory to change ownership. + childDirFd, err := unix.Openat(fd.hostFD, name, unix.O_DIRECTORY|unix.O_RDONLY|openFlags, 0) + if err != nil { + return err + } + if err := unix.Fchownat(childDirFd, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { + unix.Close(childDirFd) + return err + } + + childDir := newControlFDLisaLocked(c, childDirFd, fd, name, linux.ModeDirectory) + if err := childDir.initInode(&resp.ChildDir); err != nil { + c.RemoveControlFDLocked(childDir.ID()) + return err + } + cu.Release() + + return nil + }); err != nil { + return 0, err + } + + respLen := uint32(resp.SizeBytes()) + resp.MarshalUnsafe(comm.PayloadBuf(respLen)) + return respLen, nil +} + +// Mknod implements lisafs.ControlFDImpl.Mknod. +func (fd *controlFDLisa) Mknod(c *lisafs.Connection, comm lisafs.Communicator, mode linux.FileMode, uid lisafs.UID, gid lisafs.GID, name string, minor uint32, major uint32) (uint32, error) { + // From mknod(2) man page: + // "EPERM: [...] if the filesystem containing pathname does not support + // the type of node requested." + if mode.FileType() != linux.ModeRegular { + return 0, unix.EPERM + } + + var resp lisafs.MknodAtResp + if err := c.Server().WithRenameReadLock(func() error { + if err := unix.Mknodat(fd.hostFD, name, uint32(mode), 0); err != nil { + return err + } + cu := cleanup.Make(func() { + // Best effort attempt to remove the file in case of failure. + if err := unix.Unlinkat(fd.hostFD, name, 0); err != nil { + log.Warningf("error unlinking file %q after failure: %v", path.Join(fd.FilePathLocked(), name), err) + } + }) + defer cu.Clean() + + // Open file to change ownership. + childFD, err := unix.Openat(fd.hostFD, name, unix.O_PATH|openFlags, 0) + if err != nil { + return err + } + if err := unix.Fchownat(childFD, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { + unix.Close(childFD) + return err + } + + child := newControlFDLisaLocked(c, childFD, fd, name, mode) + if err := child.initInode(&resp.Child); err != nil { + c.RemoveControlFDLocked(child.ID()) + return err + } + cu.Release() + return nil + }); err != nil { + return 0, err + } + + respLen := uint32(resp.SizeBytes()) + resp.MarshalUnsafe(comm.PayloadBuf(respLen)) + return respLen, nil +} + +// Symlink implements lisafs.ControlFDImpl.Symlink. +func (fd *controlFDLisa) Symlink(c *lisafs.Connection, comm lisafs.Communicator, name string, target string, uid lisafs.UID, gid lisafs.GID) (uint32, error) { + var resp lisafs.SymlinkAtResp + if err := c.Server().WithRenameReadLock(func() error { + if err := unix.Symlinkat(target, fd.hostFD, name); err != nil { + return err + } + cu := cleanup.Make(func() { + // Best effort attempt to remove the symlink in case of failure. + if err := unix.Unlinkat(fd.hostFD, name, 0); err != nil { + log.Warningf("error unlinking file %q after failure: %v", path.Join(fd.FilePathLocked(), name), err) + } + }) + defer cu.Clean() + + // Open symlink to change ownership. + symlinkFD, err := unix.Openat(fd.hostFD, name, unix.O_PATH|openFlags, 0) + if err != nil { + return err + } + if err := unix.Fchownat(symlinkFD, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { + unix.Close(symlinkFD) + return err + } + + symlink := newControlFDLisaLocked(c, symlinkFD, fd, name, linux.ModeSymlink) + if err := symlink.initInode(&resp.Symlink); err != nil { + c.RemoveControlFDLocked(symlink.ID()) + return err + } + cu.Release() + return nil + }); err != nil { + return 0, err } + + respLen := uint32(resp.SizeBytes()) + resp.MarshalUnsafe(comm.PayloadBuf(respLen)) + return respLen, nil +} + +// Link implements lisafs.ControlFDImpl.Link. +func (fd *controlFDLisa) Link(c *lisafs.Connection, comm lisafs.Communicator, dir lisafs.ControlFDImpl, name string) (uint32, error) { + var resp lisafs.LinkAtResp + if err := c.Server().WithRenameReadLock(func() error { + dirFD := dir.(*controlFDLisa) + if err := unix.Linkat(fd.hostFD, "", dirFD.hostFD, name, unix.AT_EMPTY_PATH); err != nil { + return err + } + cu := cleanup.Make(func() { + // Best effort attempt to remove the hard link in case of failure. + if err := unix.Unlinkat(dirFD.hostFD, name, 0); err != nil { + log.Warningf("error unlinking file %q after failure: %v", path.Join(dirFD.FilePathLocked(), name), err) + } + }) + defer cu.Clean() + + linkFD, linkStat, err := tryStepLocked(c, name, dirFD, func(flags int) (int, error) { + return unix.Openat(dirFD.hostFD, name, flags, 0) + }) + if err != nil { + return err + } + cu.Release() + + linkFD.initInodeWithStat(&resp.Link, &linkStat) + return nil + }); err != nil { + return 0, err + } + + respLen := uint32(resp.SizeBytes()) + resp.MarshalUnsafe(comm.PayloadBuf(respLen)) + return respLen, nil +} + +// StatFS implements lisafs.ControlFDImpl.StatFS. +func (fd *controlFDLisa) StatFS(c *lisafs.Connection, comm lisafs.Communicator) (uint32, error) { + var s unix.Statfs_t + if err := unix.Fstatfs(fd.hostFD, &s); err != nil { + return 0, err + } + + resp := lisafs.StatFS{ + Type: uint64(s.Type), + BlockSize: s.Bsize, + Blocks: s.Blocks, + BlocksFree: s.Bfree, + BlocksAvailable: s.Bavail, + Files: s.Files, + FilesFree: s.Ffree, + NameLength: uint64(s.Namelen), + } + respLen := uint32(resp.SizeBytes()) + resp.MarshalUnsafe(comm.PayloadBuf(respLen)) + return respLen, nil +} + +// Readlink implements lisafs.ControlFDImpl.Readlink. +func (fd *controlFDLisa) Readlink(c *lisafs.Connection, comm lisafs.Communicator) (uint32, error) { + // We will manually marshal lisafs.ReadLinkAtResp, which just contains a + // lisafs.SizedString. Let unix.Readlinkat directly write into the payload + // buffer and manually write the string size before it. + + // This is similar to what os.Readlink does. + const limit = primitive.Uint32(1024 * 1024) + for linkLen := primitive.Uint32(128); linkLen < limit; linkLen *= 2 { + b := comm.PayloadBuf(uint32(linkLen) + uint32(linkLen.SizeBytes())) + n, err := unix.Readlinkat(fd.hostFD, "", b[linkLen.SizeBytes():]) + if err != nil { + return 0, err + } + if n < int(linkLen) { + linkLen = primitive.Uint32(n) + linkLen.MarshalUnsafe(b[:linkLen.SizeBytes()]) + return uint32(linkLen) + uint32(linkLen.SizeBytes()), nil + } + } + return 0, unix.ENOMEM +} + +// Connect implements lisafs.ControlFDImpl.Connect. +func (fd *controlFDLisa) Connect(c *lisafs.Connection, comm lisafs.Communicator, sockType uint32) error { + s := c.ServerImpl().(*LisafsServer) + if !s.config.HostUDS { + return unix.ECONNREFUSED + } + + // Lock RenameMu so that the hostPath read stays valid and is not tampered + // with until it is actually connected to. + s.RenameMu.RLock() + defer s.RenameMu.RUnlock() + + // TODO(gvisor.dev/issue/1003): Due to different app vs replacement + // mappings, the app path may have fit in the sockaddr, but we can't fit + // hostPath in our sockaddr. We'd need to redirect through a shorter path + // in order to actually connect to this socket. + hostPath := fd.FilePathLocked() + if len(hostPath) > 108 { // UNIX_PATH_MAX = 108 is defined in afunix.h. + return unix.ECONNREFUSED + } + + // Only the following types are supported. + switch sockType { + case unix.SOCK_STREAM, unix.SOCK_DGRAM, unix.SOCK_SEQPACKET: + default: + return unix.ENXIO + } + + sock, err := unix.Socket(unix.AF_UNIX, int(sockType), 0) + if err != nil { + return err + } + if err := comm.DonateFD(sock); err != nil { + return err + } + + sa := unix.SockaddrUnix{Name: hostPath} + if err := unix.Connect(sock, &sa); err != nil { + return err + } + return nil +} + +// Unlink implements lisafs.ControlFDImpl.Unlink. +func (fd *controlFDLisa) Unlink(c *lisafs.Connection, name string, flags uint32) error { + return c.Server().WithRenameReadLock(func() error { + return unix.Unlinkat(fd.hostFD, name, int(flags)) + }) +} + +// RenameLocked implements lisafs.ControlFDImpl.RenameLocked. +func (fd *controlFDLisa) RenameLocked(c *lisafs.Connection, newDir lisafs.ControlFDImpl, newName string) (func(lisafs.ControlFDImpl), func(), error) { + // Note that there is no controlFDLisa specific update needed on rename. + return nil, nil, renameat(fd.ParentLocked().(*controlFDLisa).hostFD, fd.NameLocked(), newDir.(*controlFDLisa).hostFD, newName) +} + +// GetXattr implements lisafs.ControlFDImpl.GetXattr. +func (fd *controlFDLisa) GetXattr(c *lisafs.Connection, comm lisafs.Communicator, name string, size uint32) (uint32, error) { + if !c.ServerImpl().(*LisafsServer).config.EnableVerityXattr { + return 0, unix.EOPNOTSUPP + } + if _, ok := verityXattrs[name]; !ok { + return 0, unix.EOPNOTSUPP + } + + // Manually marshal lisafs.FGetXattrResp to avoid allocations and copying. + var valueLen primitive.Uint32 + buf := comm.PayloadBuf(uint32(valueLen.SizeBytes()) + size) + n, err := unix.Fgetxattr(fd.hostFD, name, buf[valueLen.SizeBytes():]) + if err != nil { + return 0, err + } + valueLen = primitive.Uint32(n) + valueLen.MarshalBytes(buf[:valueLen.SizeBytes()]) + + return uint32(valueLen.SizeBytes() + n), nil +} + +// SetXattr implements lisafs.ControlFDImpl.SetXattr. +func (fd *controlFDLisa) SetXattr(c *lisafs.Connection, name string, value string, flags uint32) error { + if !c.ServerImpl().(*LisafsServer).config.EnableVerityXattr { + return unix.EOPNOTSUPP + } + if _, ok := verityXattrs[name]; !ok { + return unix.EOPNOTSUPP + } + return unix.Fsetxattr(fd.hostFD, name, []byte(value) /* sigh */, int(flags)) +} + +// ListXattr implements lisafs.ControlFDImpl.ListXattr. +func (fd *controlFDLisa) ListXattr(c *lisafs.Connection, comm lisafs.Communicator, size uint64) (uint32, error) { + return 0, unix.EOPNOTSUPP +} + +// RemoveXattr implements lisafs.ControlFDImpl.RemoveXattr. +func (fd *controlFDLisa) RemoveXattr(c *lisafs.Connection, comm lisafs.Communicator, name string) error { + return unix.EOPNOTSUPP +} + +// openFDLisa implements lisafs.OpenFDImpl. +type openFDLisa struct { + lisafs.OpenFD + + // hostFD is the host file descriptor which can be used to make syscalls. + hostFD int +} + +var _ lisafs.OpenFDImpl = (*openFDLisa)(nil) + +func (fd *controlFDLisa) newOpenFDLisa(hostFD int, flags uint32) *openFDLisa { + newFD := &openFDLisa{ + hostFD: hostFD, + } + newFD.OpenFD.Init(fd.FD(), flags, newFD) + return newFD +} + +// FD implements lisafs.OpenFDImpl.FD. +func (fd *openFDLisa) FD() *lisafs.OpenFD { + if fd == nil { + return nil + } + return &fd.OpenFD +} + +// Close implements lisafs.OpenFDImpl.Close. +func (fd *openFDLisa) Close(c *lisafs.Connection) { + if fd.hostFD >= 0 { + _ = unix.Close(fd.hostFD) + fd.hostFD = -1 + } +} + +// Stat implements lisafs.OpenFDImpl.Stat. +func (fd *openFDLisa) Stat(c *lisafs.Connection, comm lisafs.Communicator) (uint32, error) { + var resp linux.Statx + if err := fstatTo(fd.hostFD, &resp); err != nil { + return 0, err + } + + respLen := uint32(resp.SizeBytes()) + resp.MarshalUnsafe(comm.PayloadBuf(respLen)) + return respLen, nil +} + +// Sync implements lisafs.OpenFDImpl.Sync. +func (fd *openFDLisa) Sync(c *lisafs.Connection) error { + return unix.Fsync(fd.hostFD) +} + +// Write implements lisafs.OpenFDImpl.Write. +func (fd *openFDLisa) Write(c *lisafs.Connection, comm lisafs.Communicator, buf []byte, off uint64) (uint32, error) { + rw := rwfd.NewReadWriter(fd.hostFD) + n, err := rw.WriteAt(buf, int64(off)) + if err != nil { + return 0, err + } + + resp := &lisafs.PWriteResp{Count: uint64(n)} + respLen := uint32(resp.SizeBytes()) + resp.MarshalUnsafe(comm.PayloadBuf(respLen)) + return respLen, nil +} + +// Read implements lisafs.OpenFDImpl.Read. +func (fd *openFDLisa) Read(c *lisafs.Connection, comm lisafs.Communicator, off uint64, count uint32) (uint32, error) { + // To save an allocation and a copy, we directly read into the payload + // buffer. The rest of the response message is manually marshalled. + var resp lisafs.PReadResp + respMetaSize := uint32(resp.NumBytes.SizeBytes()) + maxRespLen := respMetaSize + count + + payloadBuf := comm.PayloadBuf(maxRespLen) + rw := rwfd.NewReadWriter(fd.hostFD) + n, err := rw.ReadAt(payloadBuf[respMetaSize:], int64(off)) + if err != nil && err != io.EOF { + return 0, err + } + + // Write the response metadata onto the payload buffer. The response contents + // already have been written immediately after it. + resp.NumBytes = primitive.Uint32(n) + resp.NumBytes.MarshalUnsafe(payloadBuf[:respMetaSize]) + return respMetaSize + uint32(n), nil +} + +// Allocate implements lisafs.OpenFDImpl.Allocate. +func (fd *openFDLisa) Allocate(c *lisafs.Connection, mode, off, length uint64) error { + return unix.Fallocate(fd.hostFD, uint32(mode), int64(off), int64(length)) +} + +// Flush implements lisafs.OpenFDImpl.Flush. +func (fd *openFDLisa) Flush(c *lisafs.Connection) error { + return nil +} + +// Getdent64 implements lisafs.OpenFDImpl.Getdent64. +func (fd *openFDLisa) Getdent64(c *lisafs.Connection, comm lisafs.Communicator, count uint32, seek0 bool) (uint32, error) { + if seek0 { + if _, err := unix.Seek(fd.hostFD, 0, 0); err != nil { + return 0, err + } + } + + // We will manually marshal the response lisafs.Getdents64Resp. + + // numDirents is the number of dirents marshalled into the payload. + var numDirents primitive.Uint32 + // The payload starts with numDirents, dirents go right after that. + // payloadBufPos represents the position at which to write the next dirent. + payloadBufPos := uint32(numDirents.SizeBytes()) + // Request enough payloadBuf for 10 dirents, we will extend when needed. + payloadBuf := comm.PayloadBuf(payloadBufPos + 10*unixDirentMaxSize) + + var direntsBuf [8192]byte + var bytesRead int + for bytesRead < int(count) { + bufEnd := len(direntsBuf) + if remaining := int(count) - bytesRead; remaining < bufEnd { + bufEnd = remaining + } + n, err := unix.Getdents(fd.hostFD, direntsBuf[:bufEnd]) + if err != nil { + if err == unix.EINVAL && bufEnd < 268 { + // getdents64(2) returns EINVAL is returned when the result + // buffer is too small. If bufEnd is smaller than the max + // size of unix.Dirent, then just break here to return all + // dirents collected till now. + break + } + return 0, err + } + if n <= 0 { + break + } + bytesRead += n + + var statErr error + parseDirents(direntsBuf[:n], func(ino uint64, off int64, ftype uint8, name string) bool { + dirent := lisafs.Dirent64{ + Ino: primitive.Uint64(ino), + Off: primitive.Uint64(off), + Type: primitive.Uint8(ftype), + Name: lisafs.SizedString(name), + } + + // The client also wants the device ID, which annoyingly incurs an + // additional syscall per dirent. Live with it. + stat, err := statAt(fd.hostFD, name) + if err != nil { + statErr = err + return false + } + dirent.DevMinor = primitive.Uint32(unix.Minor(stat.Dev)) + dirent.DevMajor = primitive.Uint32(unix.Major(stat.Dev)) + + // Paste the dirent into the payload buffer without having the dirent + // escape. Request a larger buffer if needed. + if int(payloadBufPos)+dirent.SizeBytes() > len(payloadBuf) { + // Ask for 10 large dirents worth of more space. + payloadBuf = comm.PayloadBuf(payloadBufPos + 10*unixDirentMaxSize) + } + dirent.MarshalBytes(payloadBuf[payloadBufPos:]) + payloadBufPos += uint32(dirent.SizeBytes()) + numDirents++ + return true + }) + if statErr != nil { + return 0, statErr + } + } + + // The number of dirents goes at the beginning of the payload. + numDirents.MarshalUnsafe(payloadBuf) + return payloadBufPos, nil +} + +// tryStepLocked tries to walk via open() with different modes as documented. +// It then initializes and returns the control FD. +// +// Precondition: server's rename mutex must at least be read locked. +func tryStepLocked(c *lisafs.Connection, name string, parent *controlFDLisa, open func(flags int) (int, error)) (*controlFDLisa, unix.Stat_t, error) { + // Attempt to open file in the following in order: + // 1. RDONLY | NONBLOCK: for all files, directories, ro mounts, FIFOs. + // Use non-blocking to prevent getting stuck inside open(2) for + // FIFOs. This option has no effect on regular files. + // 2. PATH: for symlinks, sockets. + options := []struct { + flag int + readable bool + }{ + { + flag: unix.O_RDONLY | unix.O_NONBLOCK, + readable: true, + }, + { + flag: unix.O_PATH, + readable: false, + }, + } + + for i, option := range options { + hostFD, err := open(option.flag | openFlags) + if err == nil { + var stat unix.Stat_t + if err = unix.Fstat(hostFD, &stat); err == nil { + return newControlFDLisaLocked(c, hostFD, parent, name, linux.FileMode(stat.Mode)), stat, nil + } + unix.Close(hostFD) + } + + e := extractErrno(err) + if e == unix.ENOENT { + // File doesn't exist, no point in retrying. + return nil, unix.Stat_t{}, e + } + if i < len(options)-1 { + continue + } + return nil, unix.Stat_t{}, e + } + panic("unreachable") +} + +func fstatTo(hostFD int, stat *linux.Statx) error { + var unixStat unix.Stat_t + if err := unix.Fstat(hostFD, &unixStat); err != nil { + return err + } + + unixToLinuxStat(&unixStat, stat) + return nil +} + +func unixToLinuxStat(from *unix.Stat_t, to *linux.Statx) { + to.Mask = unix.STATX_TYPE | unix.STATX_MODE | unix.STATX_INO | unix.STATX_NLINK | unix.STATX_UID | unix.STATX_GID | unix.STATX_SIZE | unix.STATX_BLOCKS | unix.STATX_ATIME | unix.STATX_MTIME | unix.STATX_CTIME + to.Mode = uint16(from.Mode) + to.DevMinor = unix.Minor(from.Dev) + to.DevMajor = unix.Major(from.Dev) + to.Ino = from.Ino + to.Nlink = uint32(from.Nlink) + to.UID = from.Uid + to.GID = from.Gid + to.RdevMinor = unix.Minor(from.Rdev) + to.RdevMajor = unix.Major(from.Rdev) + to.Size = uint64(from.Size) + to.Blksize = uint32(from.Blksize) + to.Blocks = uint64(from.Blocks) + to.Atime.Sec = from.Atim.Sec + to.Atime.Nsec = uint32(from.Atim.Nsec) + to.Mtime.Sec = from.Mtim.Sec + to.Mtime.Nsec = uint32(from.Mtim.Nsec) + to.Ctime.Sec = from.Ctim.Sec + to.Ctime.Nsec = uint32(from.Ctim.Nsec) } diff --git a/runsc/fsgofer/lisafs_test.go b/runsc/fsgofer/lisafs_test.go new file mode 100644 index 000000000..4653f9955 --- /dev/null +++ b/runsc/fsgofer/lisafs_test.go @@ -0,0 +1,56 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lisafs_test + +import ( + "testing" + + "gvisor.dev/gvisor/pkg/lisafs" + "gvisor.dev/gvisor/pkg/lisafs/testsuite" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/fsgofer" +) + +// Note that these are not supposed to be extensive or robust tests. These unit +// tests provide a sanity check that all RPCs at least work in obvious ways. + +func init() { + log.SetLevel(log.Debug) + if err := fsgofer.OpenProcSelfFD(); err != nil { + panic(err) + } +} + +// tester implements testsuite.Tester. +type tester struct{} + +// NewServer implements testsuite.Tester.NewServer. +func (tester) NewServer(t *testing.T) *lisafs.Server { + return &fsgofer.NewLisafsServer(fsgofer.Config{HostUDS: true, EnableVerityXattr: true}).Server +} + +// LinkSupported implements testsuite.Tester.LinkSupported. +func (tester) LinkSupported() bool { + return true +} + +// SetUserGroupIDSupported implements testsuite.Tester.SetUserGroupIDSupported. +func (tester) SetUserGroupIDSupported() bool { + return true +} + +func TestFSGofer(t *testing.T) { + testsuite.RunAllLocalFSTests(t, tester{}) +} |