diff options
44 files changed, 589 insertions, 44 deletions
diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go index 753fec3ed..81ff9fe9e 100644 --- a/pkg/abi/linux/file.go +++ b/pkg/abi/linux/file.go @@ -254,3 +254,14 @@ const ( F_SEAL_GROW = 0x0004 // Prevent file from growing. F_SEAL_WRITE = 0x0008 // Prevent writes. ) + +// Constants related to fallocate(2). Source: include/uapi/linux/falloc.h +const ( + FALLOC_FL_KEEP_SIZE = 0x01 + FALLOC_FL_PUNCH_HOLE = 0x02 + FALLOC_FL_NO_HIDE_STALE = 0x04 + FALLOC_FL_COLLAPSE_RANGE = 0x08 + FALLOC_FL_ZERO_RANGE = 0x10 + FALLOC_FL_INSERT_RANGE = 0x20 + FALLOC_FL_UNSHARE_RANGE = 0x40 +) diff --git a/pkg/p9/BUILD b/pkg/p9/BUILD index 5d972309d..36b2ec5f6 100644 --- a/pkg/p9/BUILD +++ b/pkg/p9/BUILD @@ -26,6 +26,7 @@ go_library( "//pkg/fd", "//pkg/log", "//pkg/unet", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/p9/client_file.go b/pkg/p9/client_file.go index 63c65129a..471c3a80b 100644 --- a/pkg/p9/client_file.go +++ b/pkg/p9/client_file.go @@ -171,6 +171,18 @@ func (c *clientFile) SetAttr(valid SetAttrMask, attr SetAttr) error { return c.client.sendRecv(&Tsetattr{FID: c.fid, Valid: valid, SetAttr: attr}, &Rsetattr{}) } +// Allocate implements File.Allocate. +func (c *clientFile) Allocate(mode AllocateMode, offset, length uint64) error { + if atomic.LoadUint32(&c.closed) != 0 { + return syscall.EBADF + } + if !versionSupportsTallocate(c.client.version) { + return syscall.EOPNOTSUPP + } + + return c.client.sendRecv(&Tallocate{FID: c.fid, Mode: mode, Offset: offset, Length: length}, &Rallocate{}) +} + // Remove implements File.Remove. // // N.B. This method is no longer part of the file interface and should be diff --git a/pkg/p9/file.go b/pkg/p9/file.go index a52a0f3e7..89e814d50 100644 --- a/pkg/p9/file.go +++ b/pkg/p9/file.go @@ -89,6 +89,10 @@ type File interface { // On the server, SetAttr has a write concurrency guarantee. SetAttr(valid SetAttrMask, attr SetAttr) error + // Allocate allows the caller to directly manipulate the allocated disk space + // for the file. See fallocate(2) for more details. + Allocate(mode AllocateMode, offset, length uint64) error + // Close is called when all references are dropped on the server side, // and Close should be called by the client to drop all references. // diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go index 6da2ce4e3..533ead98a 100644 --- a/pkg/p9/handlers.go +++ b/pkg/p9/handlers.go @@ -878,6 +878,40 @@ func (t *Tsetattr) handle(cs *connState) message { } // handle implements handler.handle. +func (t *Tallocate) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + if err := ref.safelyWrite(func() error { + // Has it been opened already? + openFlags, opened := ref.OpenFlags() + if !opened { + return syscall.EINVAL + } + + // Can it be written? Check permissions. + if openFlags&OpenFlagsModeMask == ReadOnly { + return syscall.EBADF + } + + // We don't allow allocate on files that have been deleted. + if ref.isDeleted() { + return syscall.EINVAL + } + + return ref.file.Allocate(t.Mode, t.Offset, t.Length) + }); err != nil { + return newErr(err) + } + + return &Rallocate{} +} + +// handle implements handler.handle. func (t *Txattrwalk) handle(cs *connState) message { // Lookup the FID. ref, ok := cs.LookupFID(t.FID) diff --git a/pkg/p9/local_server/local_server.go b/pkg/p9/local_server/local_server.go index f4077a9d4..d49d94550 100644 --- a/pkg/p9/local_server/local_server.go +++ b/pkg/p9/local_server/local_server.go @@ -323,6 +323,11 @@ func (l *local) Renamed(parent p9.File, newName string) { l.path = path.Join(parent.(*local).path, newName) } +// Allocate implements p9.File.Allocate. +func (l *local) Allocate(mode p9.AllocateMode, offset, length uint64) error { + return syscall.Fallocate(int(l.file.Fd()), mode.ToLinux(), int64(offset), int64(length)) +} + func main() { log.SetLevel(log.Debug) diff --git a/pkg/p9/messages.go b/pkg/p9/messages.go index 3c7898cc1..703753c31 100644 --- a/pkg/p9/messages.go +++ b/pkg/p9/messages.go @@ -1424,6 +1424,63 @@ func (r *Rsetattr) String() string { return fmt.Sprintf("Rsetattr{}") } +// Tallocate is an allocate request. This is an extension to 9P protocol, not +// present in the 9P2000.L standard. +type Tallocate struct { + FID FID + Mode AllocateMode + Offset uint64 + Length uint64 +} + +// Decode implements encoder.Decode. +func (t *Tallocate) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Mode.Decode(b) + t.Offset = b.Read64() + t.Length = b.Read64() +} + +// Encode implements encoder.Encode. +func (t *Tallocate) Encode(b *buffer) { + b.WriteFID(t.FID) + t.Mode.Encode(b) + b.Write64(t.Offset) + b.Write64(t.Length) +} + +// Type implements message.Type. +func (*Tallocate) Type() MsgType { + return MsgTallocate +} + +// String implements fmt.Stringer. +func (t *Tallocate) String() string { + return fmt.Sprintf("Tallocate{FID: %d, Offset: %d, Length: %d}", t.FID, t.Offset, t.Length) +} + +// Rallocate is an allocate response. +type Rallocate struct { +} + +// Decode implements encoder.Decode. +func (*Rallocate) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (*Rallocate) Encode(b *buffer) { +} + +// Type implements message.Type. +func (*Rallocate) Type() MsgType { + return MsgRallocate +} + +// String implements fmt.Stringer. +func (r *Rallocate) String() string { + return fmt.Sprintf("Rallocate{}") +} + // Txattrwalk walks extended attributes. type Txattrwalk struct { // FID is the FID to check for attributes. @@ -2297,4 +2354,6 @@ func init() { msgRegistry.register(MsgRusymlink, func() message { return &Rusymlink{} }) msgRegistry.register(MsgTlconnect, func() message { return &Tlconnect{} }) msgRegistry.register(MsgRlconnect, func() message { return &Rlconnect{} }) + msgRegistry.register(MsgTallocate, func() message { return &Tallocate{} }) + msgRegistry.register(MsgRallocate, func() message { return &Rallocate{} }) } diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go index 78c7d3f86..4039862e6 100644 --- a/pkg/p9/p9.go +++ b/pkg/p9/p9.go @@ -22,6 +22,8 @@ import ( "strings" "sync/atomic" "syscall" + + "golang.org/x/sys/unix" ) // OpenFlags is the mode passed to Open and Create operations. @@ -374,6 +376,8 @@ const ( MsgRusymlink = 135 MsgTlconnect = 136 MsgRlconnect = 137 + MsgTallocate = 138 + MsgRallocate = 139 ) // QIDType represents the file type for QIDs. @@ -1058,3 +1062,80 @@ func (d *Dirent) Encode(b *buffer) { b.WriteQIDType(d.Type) b.WriteString(d.Name) } + +// AllocateMode are possible modes to p9.File.Allocate(). +type AllocateMode struct { + KeepSize bool + PunchHole bool + NoHideStale bool + CollapseRange bool + ZeroRange bool + InsertRange bool + Unshare bool +} + +// ToLinux converts to a value compatible with fallocate(2)'s mode. +func (a *AllocateMode) ToLinux() uint32 { + rv := uint32(0) + if a.KeepSize { + rv |= unix.FALLOC_FL_KEEP_SIZE + } + if a.PunchHole { + rv |= unix.FALLOC_FL_PUNCH_HOLE + } + if a.NoHideStale { + rv |= unix.FALLOC_FL_NO_HIDE_STALE + } + if a.CollapseRange { + rv |= unix.FALLOC_FL_COLLAPSE_RANGE + } + if a.ZeroRange { + rv |= unix.FALLOC_FL_ZERO_RANGE + } + if a.InsertRange { + rv |= unix.FALLOC_FL_INSERT_RANGE + } + if a.Unshare { + rv |= unix.FALLOC_FL_UNSHARE_RANGE + } + return rv +} + +// Decode implements encoder.Decode. +func (a *AllocateMode) Decode(b *buffer) { + mask := b.Read32() + a.KeepSize = mask&0x01 != 0 + a.PunchHole = mask&0x02 != 0 + a.NoHideStale = mask&0x04 != 0 + a.CollapseRange = mask&0x08 != 0 + a.ZeroRange = mask&0x10 != 0 + a.InsertRange = mask&0x20 != 0 + a.Unshare = mask&0x40 != 0 +} + +// Encode implements encoder.Encode. +func (a *AllocateMode) Encode(b *buffer) { + mask := uint32(0) + if a.KeepSize { + mask |= 0x01 + } + if a.PunchHole { + mask |= 0x02 + } + if a.NoHideStale { + mask |= 0x04 + } + if a.CollapseRange { + mask |= 0x08 + } + if a.ZeroRange { + mask |= 0x10 + } + if a.InsertRange { + mask |= 0x20 + } + if a.Unshare { + mask |= 0x40 + } + b.Write32(mask) +} diff --git a/pkg/p9/version.go b/pkg/p9/version.go index a36a499a1..c2a2885ae 100644 --- a/pkg/p9/version.go +++ b/pkg/p9/version.go @@ -26,7 +26,7 @@ const ( // // Clients are expected to start requesting this version number and // to continuously decrement it until a Tversion request succeeds. - highestSupportedVersion uint32 = 6 + highestSupportedVersion uint32 = 7 // lowestSupportedVersion is the lowest supported version X in a // version string of the format 9P2000.L.Google.X. @@ -143,3 +143,8 @@ func VersionSupportsAnonymous(v uint32) bool { func VersionSupportsMultiUser(v uint32) bool { return v >= 6 } + +// versionSupportsTallocate returns true if version v supports Allocate(). +func versionSupportsTallocate(v uint32) bool { + return v >= 7 +} diff --git a/pkg/sentry/fs/ashmem/device.go b/pkg/sentry/fs/ashmem/device.go index 5e005bc2e..22e1530e9 100644 --- a/pkg/sentry/fs/ashmem/device.go +++ b/pkg/sentry/fs/ashmem/device.go @@ -29,6 +29,7 @@ import ( type Device struct { fsutil.InodeGenericChecker `state:"nosave"` fsutil.InodeNoExtendedAttributes `state:"nosave"` + fsutil.InodeNoopAllocate `state:"nosave"` fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopTruncate `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` diff --git a/pkg/sentry/fs/binder/binder.go b/pkg/sentry/fs/binder/binder.go index acbbd5466..a992253e6 100644 --- a/pkg/sentry/fs/binder/binder.go +++ b/pkg/sentry/fs/binder/binder.go @@ -46,6 +46,7 @@ const ( type Device struct { fsutil.InodeGenericChecker `state:"nosave"` fsutil.InodeNoExtendedAttributes `state:"nosave"` + fsutil.InodeNoopAllocate `state:"nosave"` fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopTruncate `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` diff --git a/pkg/sentry/fs/dev/full.go b/pkg/sentry/fs/dev/full.go index 6b11afa44..17d68b5c4 100644 --- a/pkg/sentry/fs/dev/full.go +++ b/pkg/sentry/fs/dev/full.go @@ -30,6 +30,7 @@ import ( type fullDevice struct { fsutil.InodeGenericChecker `state:"nosave"` fsutil.InodeNoExtendedAttributes `state:"nosave"` + fsutil.InodeNoopAllocate `state:"nosave"` fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopTruncate `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` @@ -59,7 +60,6 @@ func (f *fullDevice) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.Fi // +stateify savable type fullFileOperations struct { - waiter.AlwaysReady `state:"nosave"` fsutil.FileGenericSeek `state:"nosave"` fsutil.FileNoIoctl `state:"nosave"` fsutil.FileNoMMap `state:"nosave"` @@ -69,6 +69,7 @@ type fullFileOperations struct { fsutil.FileNotDirReaddir `state:"nosave"` fsutil.FileUseInodeUnstableAttr `state:"nosave"` readZeros `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` } var _ fs.FileOperations = (*fullFileOperations)(nil) diff --git a/pkg/sentry/fs/dev/null.go b/pkg/sentry/fs/dev/null.go index 069212b6d..ee13183c8 100644 --- a/pkg/sentry/fs/dev/null.go +++ b/pkg/sentry/fs/dev/null.go @@ -29,6 +29,7 @@ import ( type nullDevice struct { fsutil.InodeGenericChecker `state:"nosave"` fsutil.InodeNoExtendedAttributes `state:"nosave"` + fsutil.InodeNoopAllocate `state:"nosave"` fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopTruncate `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` @@ -60,17 +61,17 @@ func (n *nullDevice) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.Fi // +stateify savable type nullFileOperations struct { - waiter.AlwaysReady `state:"nosave"` fsutil.FileGenericSeek `state:"nosave"` fsutil.FileNoIoctl `state:"nosave"` fsutil.FileNoMMap `state:"nosave"` fsutil.FileNoopFlush `state:"nosave"` fsutil.FileNoopFsync `state:"nosave"` fsutil.FileNoopRead `state:"nosave"` - fsutil.FileNoopWrite `state:"nosave"` fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNoopWrite `state:"nosave"` fsutil.FileNotDirReaddir `state:"nosave"` fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` } var _ fs.FileOperations = (*nullFileOperations)(nil) @@ -101,16 +102,16 @@ func (zd *zeroDevice) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.F // +stateify savable type zeroFileOperations struct { - waiter.AlwaysReady `state:"nosave"` fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` fsutil.FileNoopFlush `state:"nosave"` fsutil.FileNoopFsync `state:"nosave"` fsutil.FileNoopRelease `state:"nosave"` fsutil.FileNoopWrite `state:"nosave"` fsutil.FileNotDirReaddir `state:"nosave"` - fsutil.FileNoIoctl `state:"nosave"` fsutil.FileUseInodeUnstableAttr `state:"nosave"` readZeros `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` } var _ fs.FileOperations = (*zeroFileOperations)(nil) diff --git a/pkg/sentry/fs/dev/random.go b/pkg/sentry/fs/dev/random.go index de0f3e5e5..b0a412382 100644 --- a/pkg/sentry/fs/dev/random.go +++ b/pkg/sentry/fs/dev/random.go @@ -29,6 +29,7 @@ import ( type randomDevice struct { fsutil.InodeGenericChecker `state:"nosave"` fsutil.InodeNoExtendedAttributes `state:"nosave"` + fsutil.InodeNoopAllocate `state:"nosave"` fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopTruncate `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` @@ -57,16 +58,16 @@ func (*randomDevice) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.Fi // +stateify savable type randomFileOperations struct { - waiter.AlwaysReady `state:"nosave"` fsutil.FileGenericSeek `state:"nosave"` - fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` fsutil.FileNoMMap `state:"nosave"` - fsutil.FileNoopFsync `state:"nosave"` fsutil.FileNoopFlush `state:"nosave"` - fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` fsutil.FileNoopRelease `state:"nosave"` fsutil.FileNoopWrite `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` } var _ fs.FileOperations = (*randomFileOperations)(nil) diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD index 01098675d..44f43b965 100644 --- a/pkg/sentry/fs/fsutil/BUILD +++ b/pkg/sentry/fs/fsutil/BUILD @@ -113,5 +113,6 @@ go_test( "//pkg/sentry/memmap", "//pkg/sentry/safemem", "//pkg/sentry/usermem", + "//pkg/syserror", ], ) diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go index 28686f3b3..ad0518b8f 100644 --- a/pkg/sentry/fs/fsutil/host_mappable.go +++ b/pkg/sentry/fs/fsutil/host_mappable.go @@ -149,7 +149,7 @@ func (h *HostMappable) Truncate(ctx context.Context, newSize int64) error { } // Invalidate COW mappings that may exist beyond the new size in case the file - // is being shrunk. Other mappinsg don't need to be invalidated because + // is being shrunk. Other mappings don't need to be invalidated because // translate will just return identical mappings after invalidation anyway, // and SIGBUS will be raised and handled when the mappings are touched. // @@ -167,6 +167,14 @@ func (h *HostMappable) Truncate(ctx context.Context, newSize int64) error { return nil } +// Allocate reserves space in the backing file. +func (h *HostMappable) Allocate(ctx context.Context, offset int64, length int64) error { + h.truncateMu.RLock() + err := h.backingFile.Allocate(ctx, offset, length) + h.truncateMu.RUnlock() + return err +} + // Write writes to the file backing this mappable. func (h *HostMappable) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { h.truncateMu.RLock() diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go index b6366d906..151be1d0d 100644 --- a/pkg/sentry/fs/fsutil/inode.go +++ b/pkg/sentry/fs/fsutil/inode.go @@ -34,6 +34,7 @@ type SimpleFileInode struct { InodeNoExtendedAttributes `state:"nosave"` InodeNoopRelease `state:"nosave"` InodeNoopWriteOut `state:"nosave"` + InodeNotAllocatable `state:"nosave"` InodeNotDirectory `state:"nosave"` InodeNotMappable `state:"nosave"` InodeNotOpenable `state:"nosave"` @@ -61,6 +62,7 @@ type NoReadWriteFileInode struct { InodeNoExtendedAttributes `state:"nosave"` InodeNoopRelease `state:"nosave"` InodeNoopWriteOut `state:"nosave"` + InodeNotAllocatable `state:"nosave"` InodeNotDirectory `state:"nosave"` InodeNotMappable `state:"nosave"` InodeNotSocket `state:"nosave"` @@ -465,3 +467,26 @@ func (InodeDenyWriteChecker) Check(ctx context.Context, inode *fs.Inode, p fs.Pe } return fs.ContextCanAccessFile(ctx, inode, p) } + +//InodeNotAllocatable can be used by Inodes that do not support Allocate(). +type InodeNotAllocatable struct{} + +func (InodeNotAllocatable) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error { + return syserror.EOPNOTSUPP +} + +// InodeNoopAllocate implements fs.InodeOperations.Allocate as a noop. +type InodeNoopAllocate struct{} + +// Allocate implements fs.InodeOperations.Allocate. +func (InodeNoopAllocate) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error { + return nil +} + +// InodeIsDirAllocate implements fs.InodeOperations.Allocate for directories. +type InodeIsDirAllocate struct{} + +// Allocate implements fs.InodeOperations.Allocate. +func (InodeIsDirAllocate) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error { + return syserror.EISDIR +} diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go index 76644e69d..03cad37f3 100644 --- a/pkg/sentry/fs/fsutil/inode_cached.go +++ b/pkg/sentry/fs/fsutil/inode_cached.go @@ -135,6 +135,10 @@ type CachedFileObject interface { // the file was opened. SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr) error + // Allocate allows the caller to reserve disk space for the inode. + // It's equivalent to fallocate(2) with 'mode=0'. + Allocate(ctx context.Context, offset int64, length int64) error + // Sync instructs the remote filesystem to sync the file to stable storage. Sync(ctx context.Context) error @@ -336,6 +340,30 @@ func (c *CachingInodeOperations) Truncate(ctx context.Context, inode *fs.Inode, return nil } +// Allocate implements fs.InodeOperations.Allocate. +func (c *CachingInodeOperations) Allocate(ctx context.Context, offset, length int64) error { + newSize := offset + length + + // c.attr.Size is protected by both c.attrMu and c.dataMu. + c.attrMu.Lock() + defer c.attrMu.Unlock() + c.dataMu.Lock() + defer c.dataMu.Unlock() + + if newSize <= c.attr.Size { + return nil + } + + now := ktime.NowFromContext(ctx) + if err := c.backingFile.Allocate(ctx, offset, length); err != nil { + return err + } + + c.attr.Size = newSize + c.touchModificationTimeLocked(now) + return nil +} + // WriteOut implements fs.InodeOperations.WriteOut. func (c *CachingInodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error { c.attrMu.Lock() diff --git a/pkg/sentry/fs/fsutil/inode_cached_test.go b/pkg/sentry/fs/fsutil/inode_cached_test.go index 3f10efc12..be3d4b6fc 100644 --- a/pkg/sentry/fs/fsutil/inode_cached_test.go +++ b/pkg/sentry/fs/fsutil/inode_cached_test.go @@ -26,6 +26,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" ) type noopBackingFile struct{} @@ -50,6 +51,10 @@ func (noopBackingFile) FD() int { return -1 } +func (noopBackingFile) Allocate(ctx context.Context, offset int64, length int64) error { + return nil +} + func TestSetPermissions(t *testing.T) { ctx := contexttest.Context(t) @@ -237,6 +242,10 @@ func (*sliceBackingFile) FD() int { return -1 } +func (f *sliceBackingFile) Allocate(ctx context.Context, offset int64, length int64) error { + return syserror.EOPNOTSUPP +} + type noopMappingSpace struct{} // Invalidate implements memmap.MappingSpace.Invalidate. diff --git a/pkg/sentry/fs/gofer/context_file.go b/pkg/sentry/fs/gofer/context_file.go index 842a34af8..be53ac4d9 100644 --- a/pkg/sentry/fs/gofer/context_file.go +++ b/pkg/sentry/fs/gofer/context_file.go @@ -59,6 +59,13 @@ func (c *contextFile) setAttr(ctx context.Context, valid p9.SetAttrMask, attr p9 return err } +func (c *contextFile) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error { + ctx.UninterruptibleSleepStart(false) + err := c.file.Allocate(mode, offset, length) + ctx.UninterruptibleSleepFinish(false) + return err +} + func (c *contextFile) rename(ctx context.Context, directory contextFile, name string) error { ctx.UninterruptibleSleepStart(false) err := c.file.Rename(directory.file, name) diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index f6f20844d..dcb3b2880 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -322,6 +322,15 @@ func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, err return unstable(ctx, valid, pattr, i.s.mounter, i.s.client), nil } +func (i *inodeFileState) Allocate(ctx context.Context, offset, length int64) error { + i.handlesMu.RLock() + defer i.handlesMu.RUnlock() + + // No options are supported for now. + mode := p9.AllocateMode{} + return i.writeHandles.File.allocate(ctx, mode, uint64(offset), uint64(length)) +} + // session extracts the gofer's session from the MountSource. func (i *inodeOperations) session() *session { return i.fileState.s @@ -498,6 +507,21 @@ func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, length return i.fileState.file.setAttr(ctx, p9.SetAttrMask{Size: true}, p9.SetAttr{Size: uint64(length)}) } +// Allocate implements fs.InodeOperations.Allocate. +func (i *inodeOperations) Allocate(ctx context.Context, inode *fs.Inode, offset, length int64) error { + // This can only be called for files anyway. + if i.session().cachePolicy.useCachingInodeOps(inode) { + return i.cachingInodeOps.Allocate(ctx, offset, length) + } + if i.session().cachePolicy == cacheRemoteRevalidating { + return i.fileState.hostMappable.Allocate(ctx, offset, length) + } + + // No options are supported for now. + mode := p9.AllocateMode{} + return i.fileState.file.allocate(ctx, mode, uint64(offset), uint64(length)) +} + // WriteOut implements fs.InodeOperations.WriteOut. func (i *inodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error { if !i.session().cachePolicy.cacheUAttrs(inode) { diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go index 20e077f77..d36ac9a87 100644 --- a/pkg/sentry/fs/host/inode.go +++ b/pkg/sentry/fs/host/inode.go @@ -163,6 +163,11 @@ func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, err return unstableAttr(i.mops, &s), nil } +// SetMaskedAttributes implements fsutil.CachedFileObject.SetMaskedAttributes. +func (i *inodeFileState) Allocate(_ context.Context, offset, length int64) error { + return syscall.Fallocate(i.FD(), 0, offset, length) +} + // inodeOperations implements fs.InodeOperations. var _ fs.InodeOperations = (*inodeOperations)(nil) @@ -397,6 +402,19 @@ func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, size in return i.cachingInodeOps.Truncate(ctx, inode, size) } +// Allocate implements fs.InodeOperations.Allocate. +func (i *inodeOperations) Allocate(ctx context.Context, inode *fs.Inode, offset, length int64) error { + // Is the file not memory-mappable? + if !canMap(inode) { + // Then just send the call to the FD, the host will synchronize the metadata + // update with any host inode and page cache. + return i.fileState.Allocate(ctx, offset, length) + } + // Otherwise we need to go through cachingInodeOps, even if the host page + // cache is in use, to invalidate private copies of truncated pages. + return i.cachingInodeOps.Allocate(ctx, offset, length) +} + // WriteOut implements fs.InodeOperations.WriteOut. func (i *inodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error { // Have we been using host kernel metadata caches? diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index d764ef93d..22f316daf 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -340,6 +340,13 @@ func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error { return i.InodeOperations.Truncate(ctx, i, size) } +func (i *Inode) Allocate(ctx context.Context, d *Dirent, offset int64, length int64) error { + if i.overlay != nil { + return overlayAllocate(ctx, i.overlay, d, offset, length) + } + return i.InodeOperations.Allocate(ctx, i, offset, length) +} + // Readlink calls i.InodeOperations.Readlnk with i as the Inode. func (i *Inode) Readlink(ctx context.Context) (string, error) { if i.overlay != nil { diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go index ac287e1e4..abafe4791 100644 --- a/pkg/sentry/fs/inode_operations.go +++ b/pkg/sentry/fs/inode_operations.go @@ -223,6 +223,10 @@ type InodeOperations interface { // Implementations need not check that length >= 0. Truncate(ctx context.Context, inode *Inode, size int64) error + // Allocate allows the caller to reserve disk space for the inode. + // It's equivalent to fallocate(2) with 'mode=0'. + Allocate(ctx context.Context, inode *Inode, offset int64, length int64) error + // WriteOut writes cached Inode state to a backing filesystem in a // synchronous manner. // diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index 3d015328e..ead487097 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -582,6 +582,13 @@ func overlayTruncate(ctx context.Context, o *overlayEntry, d *Dirent, size int64 return o.upper.InodeOperations.Truncate(ctx, o.upper, size) } +func overlayAllocate(ctx context.Context, o *overlayEntry, d *Dirent, offset, length int64) error { + if err := copyUp(ctx, d); err != nil { + return err + } + return o.upper.InodeOperations.Allocate(ctx, o.upper, offset, length) +} + func overlayReadlink(ctx context.Context, o *overlayEntry) (string, error) { o.copyMu.RLock() defer o.copyMu.RUnlock() diff --git a/pkg/sentry/fs/inode_overlay_test.go b/pkg/sentry/fs/inode_overlay_test.go index 66b3da2d0..52ce1d29e 100644 --- a/pkg/sentry/fs/inode_overlay_test.go +++ b/pkg/sentry/fs/inode_overlay_test.go @@ -422,6 +422,7 @@ type inode struct { fsutil.InodeNoExtendedAttributes `state:"nosave"` fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` + fsutil.InodeNotAllocatable `state:"nosave"` fsutil.InodeNotDirectory `state:"nosave"` fsutil.InodeNotMappable `state:"nosave"` fsutil.InodeNotSocket `state:"nosave"` diff --git a/pkg/sentry/fs/mock.go b/pkg/sentry/fs/mock.go index cf359a1f1..a71144b2c 100644 --- a/pkg/sentry/fs/mock.go +++ b/pkg/sentry/fs/mock.go @@ -150,6 +150,11 @@ func (n *MockInodeOperations) Truncate(ctx context.Context, inode *Inode, size i return nil } +// Allocate implements fs.InodeOperations.Allocate. +func (n *MockInodeOperations) Allocate(ctx context.Context, inode *Inode, offset, length int64) error { + return nil +} + // Remove implements fs.InodeOperations.Remove. func (n *MockInodeOperations) Remove(context.Context, *Inode, string) error { return nil diff --git a/pkg/sentry/fs/proc/inode.go b/pkg/sentry/fs/proc/inode.go index b03807043..379569823 100644 --- a/pkg/sentry/fs/proc/inode.go +++ b/pkg/sentry/fs/proc/inode.go @@ -55,6 +55,7 @@ func (i *taskOwnedInodeOps) UnstableAttr(ctx context.Context, inode *fs.Inode) ( type staticFileInodeOps struct { fsutil.InodeDenyWriteChecker `state:"nosave"` fsutil.InodeNoExtendedAttributes `state:"nosave"` + fsutil.InodeNoopAllocate `state:"nosave"` fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopTruncate `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` diff --git a/pkg/sentry/fs/proc/seqfile/seqfile.go b/pkg/sentry/fs/proc/seqfile/seqfile.go index 10ea1f55d..6b0ae9e60 100644 --- a/pkg/sentry/fs/proc/seqfile/seqfile.go +++ b/pkg/sentry/fs/proc/seqfile/seqfile.go @@ -93,6 +93,7 @@ type SeqFile struct { fsutil.InodeGenericChecker `state:"nosave"` fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` + fsutil.InodeNotAllocatable `state:"nosave"` fsutil.InodeNotDirectory `state:"nosave"` fsutil.InodeNotMappable `state:"nosave"` fsutil.InodeNotSocket `state:"nosave"` @@ -183,7 +184,6 @@ func (s *SeqFile) updateSourceLocked(ctx context.Context, record int) { // // +stateify savable type seqFileOperations struct { - waiter.AlwaysReady `state:"nosave"` fsutil.FileGenericSeek `state:"nosave"` fsutil.FileNoIoctl `state:"nosave"` fsutil.FileNoMMap `state:"nosave"` @@ -192,6 +192,7 @@ type seqFileOperations struct { fsutil.FileNoopRelease `state:"nosave"` fsutil.FileNotDirReaddir `state:"nosave"` fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` seqFile *SeqFile } diff --git a/pkg/sentry/fs/proc/uid_gid_map.go b/pkg/sentry/fs/proc/uid_gid_map.go index d649da0f1..5df3cee13 100644 --- a/pkg/sentry/fs/proc/uid_gid_map.go +++ b/pkg/sentry/fs/proc/uid_gid_map.go @@ -38,6 +38,7 @@ type idMapInodeOperations struct { fsutil.InodeGenericChecker `state:"nosave"` fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` + fsutil.InodeNotAllocatable `state:"nosave"` fsutil.InodeNotDirectory `state:"nosave"` fsutil.InodeNotMappable `state:"nosave"` fsutil.InodeNotSocket `state:"nosave"` @@ -81,7 +82,6 @@ func (imio *idMapInodeOperations) GetFile(ctx context.Context, dirent *fs.Dirent // +stateify savable type idMapFileOperations struct { - waiter.AlwaysReady `state:"nosave"` fsutil.FileGenericSeek `state:"nosave"` fsutil.FileNoIoctl `state:"nosave"` fsutil.FileNoMMap `state:"nosave"` @@ -90,6 +90,7 @@ type idMapFileOperations struct { fsutil.FileNoopRelease `state:"nosave"` fsutil.FileNotDirReaddir `state:"nosave"` fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` iops *idMapInodeOperations } diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go index a6b6a5c33..eb98b59cc 100644 --- a/pkg/sentry/fs/ramfs/dir.go +++ b/pkg/sentry/fs/ramfs/dir.go @@ -50,6 +50,7 @@ type CreateOps struct { // +stateify savable type Dir struct { fsutil.InodeGenericChecker `state:"nosave"` + fsutil.InodeIsDirAllocate `state:"nosave"` fsutil.InodeIsDirTruncate `state:"nosave"` fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` diff --git a/pkg/sentry/fs/ramfs/socket.go b/pkg/sentry/fs/ramfs/socket.go index 9406a07ca..a7cb1bb86 100644 --- a/pkg/sentry/fs/ramfs/socket.go +++ b/pkg/sentry/fs/ramfs/socket.go @@ -30,6 +30,7 @@ type Socket struct { fsutil.InodeGenericChecker `state:"nosave"` fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` + fsutil.InodeNotAllocatable `state:"nosave"` fsutil.InodeNotDirectory `state:"nosave"` fsutil.InodeNotMappable `state:"nosave"` fsutil.InodeNotSymlink `state:"nosave"` @@ -67,7 +68,6 @@ func (s *Socket) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFl // +stateify savable type socketFileOperations struct { - waiter.AlwaysReady `state:"nosave"` fsutil.FileNoIoctl `state:"nosave"` fsutil.FileNoMMap `state:"nosave"` fsutil.FileNoopFlush `state:"nosave"` @@ -78,6 +78,7 @@ type socketFileOperations struct { fsutil.FileNotDirReaddir `state:"nosave"` fsutil.FileNoWrite `state:"nosave"` fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` } var _ fs.FileOperations = (*socketFileOperations)(nil) diff --git a/pkg/sentry/fs/ramfs/symlink.go b/pkg/sentry/fs/ramfs/symlink.go index f7835fe05..dd2585b02 100644 --- a/pkg/sentry/fs/ramfs/symlink.go +++ b/pkg/sentry/fs/ramfs/symlink.go @@ -29,10 +29,11 @@ type Symlink struct { fsutil.InodeGenericChecker `state:"nosave"` fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` + fsutil.InodeNotAllocatable `state:"nosave"` fsutil.InodeNotDirectory `state:"nosave"` fsutil.InodeNotMappable `state:"nosave"` - fsutil.InodeNotTruncatable `state:"nosave"` fsutil.InodeNotSocket `state:"nosave"` + fsutil.InodeNotTruncatable `state:"nosave"` fsutil.InodeVirtual `state:"nosave"` fsutil.InodeSimpleAttributes @@ -88,7 +89,6 @@ func (s *Symlink) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileF // +stateify savable type symlinkFileOperations struct { - waiter.AlwaysReady `state:"nosave"` fsutil.FileNoIoctl `state:"nosave"` fsutil.FileNoMMap `state:"nosave"` fsutil.FileNoopFlush `state:"nosave"` @@ -99,6 +99,7 @@ type symlinkFileOperations struct { fsutil.FileNotDirReaddir `state:"nosave"` fsutil.FileNoWrite `state:"nosave"` fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` } var _ fs.FileOperations = (*symlinkFileOperations)(nil) diff --git a/pkg/sentry/fs/sys/devices.go b/pkg/sentry/fs/sys/devices.go index db91de435..bacc93af8 100644 --- a/pkg/sentry/fs/sys/devices.go +++ b/pkg/sentry/fs/sys/devices.go @@ -30,12 +30,13 @@ type cpunum struct { fsutil.InodeNoExtendedAttributes `state:"nosave"` fsutil.InodeNoopRelease `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` + fsutil.InodeNotAllocatable `state:"nosave"` fsutil.InodeNotDirectory `state:"nosave"` fsutil.InodeNotMappable `state:"nosave"` fsutil.InodeNotSocket `state:"nosave"` fsutil.InodeNotSymlink `state:"nosave"` - fsutil.InodeNotVirtual `state:"nosave"` fsutil.InodeNotTruncatable `state:"nosave"` + fsutil.InodeNotVirtual `state:"nosave"` fsutil.InodeSimpleAttributes fsutil.InodeStaticFileGetter diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go index f89d86c83..c90062a22 100644 --- a/pkg/sentry/fs/tmpfs/inode_file.go +++ b/pkg/sentry/fs/tmpfs/inode_file.go @@ -259,6 +259,33 @@ func (f *fileInodeOperations) Truncate(ctx context.Context, _ *fs.Inode, size in return nil } +// Allocate implements fs.InodeOperations.Allocate. +func (f *fileInodeOperations) Allocate(ctx context.Context, _ *fs.Inode, offset, length int64) error { + newSize := offset + length + + f.attrMu.Lock() + defer f.attrMu.Unlock() + f.dataMu.Lock() + defer f.dataMu.Unlock() + + if newSize <= f.attr.Size { + return nil + } + + // Check if current seals allow growth. + if f.seals&linux.F_SEAL_GROW != 0 { + return syserror.EPERM + } + + f.attr.Size = newSize + + now := ktime.NowFromContext(ctx) + f.attr.ModificationTime = now + f.attr.StatusChangeTime = now + + return nil +} + // AddLink implements fs.InodeOperations.AddLink. func (f *fileInodeOperations) AddLink() { f.attrMu.Lock() diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go index 832914453..6ad5c5adb 100644 --- a/pkg/sentry/fs/tmpfs/tmpfs.go +++ b/pkg/sentry/fs/tmpfs/tmpfs.go @@ -242,11 +242,16 @@ func (d *Dir) Rename(ctx context.Context, oldParent *fs.Inode, oldName string, n return rename(ctx, oldParent, oldName, newParent, newName, replacement) } -// StatFS implments fs.InodeOperations.StatFS. +// StatFS implements fs.InodeOperations.StatFS. func (*Dir) StatFS(context.Context) (fs.Info, error) { return fsInfo, nil } +// Allocate implements fs.InodeOperations.Allocate. +func (d *Dir) Allocate(ctx context.Context, node *fs.Inode, offset, length int64) error { + return d.ramfsDir.Allocate(ctx, node, offset, length) +} + // Symlink is a symlink. // // +stateify savable @@ -281,6 +286,7 @@ func (s *Symlink) StatFS(context.Context) (fs.Info, error) { type Socket struct { ramfs.Socket fsutil.InodeNotTruncatable `state:"nosave"` + fsutil.InodeNotAllocatable `state:"nosave"` } // NewSocket returns a new socket with the provided permissions. diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go index 0fc777e67..8dc40e1f2 100644 --- a/pkg/sentry/fs/tty/dir.go +++ b/pkg/sentry/fs/tty/dir.go @@ -53,13 +53,14 @@ import ( // +stateify savable type dirInodeOperations struct { fsutil.InodeGenericChecker `state:"nosave"` + fsutil.InodeIsDirAllocate `state:"nosave"` + fsutil.InodeIsDirTruncate `state:"nosave"` fsutil.InodeNoExtendedAttributes `state:"nosave"` fsutil.InodeNoopWriteOut `state:"nosave"` fsutil.InodeNotMappable `state:"nosave"` fsutil.InodeNotRenameable `state:"nosave"` - fsutil.InodeNotSymlink `state:"nosave"` fsutil.InodeNotSocket `state:"nosave"` - fsutil.InodeNotTruncatable `state:"nosave"` + fsutil.InodeNotSymlink `state:"nosave"` fsutil.InodeVirtual `state:"nosave"` fsutil.InodeSimpleAttributes diff --git a/pkg/sentry/kernel/pipe/node.go b/pkg/sentry/kernel/pipe/node.go index 99188dddf..7c3739360 100644 --- a/pkg/sentry/kernel/pipe/node.go +++ b/pkg/sentry/kernel/pipe/node.go @@ -191,3 +191,7 @@ func (*inodeOperations) newHandleLocked(wakeupChan *chan struct{}) { *wakeupChan = nil } } + +func (*inodeOperations) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error { + return syserror.EPIPE +} diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 893322647..1764bb4b6 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -1900,9 +1900,9 @@ func Renameat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } // Fallocate implements linux system call fallocate(2). -// (well, not really, but at least we return the expected error codes) func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { fd := kdefs.FD(args[0].Int()) + mode := args[1].Int64() offset := args[2].Int64() length := args[3].Int64() @@ -1915,8 +1915,42 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if offset < 0 || length <= 0 { return 0, nil, syserror.EINVAL } + if mode != 0 { + t.Kernel().EmitUnimplementedEvent(t) + return 0, nil, syserror.ENOTSUP + } + if !file.Flags().Write { + return 0, nil, syserror.EBADF + } + if fs.IsPipe(file.Dirent.Inode.StableAttr) { + return 0, nil, syserror.ESPIPE + } + if fs.IsDir(file.Dirent.Inode.StableAttr) { + return 0, nil, syserror.EISDIR + } + if !fs.IsRegular(file.Dirent.Inode.StableAttr) { + return 0, nil, syserror.ENODEV + } + size := offset + length + if size < 0 { + return 0, nil, syserror.EFBIG + } + if uint64(size) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur { + t.SendSignal(&arch.SignalInfo{ + Signo: int32(syscall.SIGXFSZ), + Code: arch.SignalInfoUser, + }) + return 0, nil, syserror.EFBIG + } + + if err := file.Dirent.Inode.Allocate(t, file.Dirent, offset, length); err != nil { + return 0, nil, err + } + + // File length modified, generate notification. + file.Dirent.InotifyEvent(linux.IN_MODIFY, 0) - return 0, nil, syserror.EOPNOTSUPP + return 0, nil, nil } // Flock implements linux syscall flock(2). diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index c1b33c551..c369e4d64 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -99,7 +99,7 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) { // args: cmd, ... tr = newArgsTracker(0) - case syscall.SYS_IOCTL, syscall.SYS_EPOLL_CTL, syscall.SYS_SHMCTL, syscall.SYS_FUTEX: + case syscall.SYS_IOCTL, syscall.SYS_EPOLL_CTL, syscall.SYS_SHMCTL, syscall.SYS_FUTEX, syscall.SYS_FALLOCATE: // args: fd/addr, cmd, ... tr = newArgsTracker(1) diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go index a1ad49fb2..4faab2946 100644 --- a/runsc/fsgofer/filter/config.go +++ b/runsc/fsgofer/filter/config.go @@ -62,8 +62,14 @@ var allowedSyscalls = seccomp.SyscallRules{ }, syscall.SYS_EXIT: {}, syscall.SYS_EXIT_GROUP: {}, - syscall.SYS_FCHMOD: {}, - syscall.SYS_FCHOWNAT: {}, + syscall.SYS_FALLOCATE: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(0), + }, + }, + syscall.SYS_FCHMOD: {}, + syscall.SYS_FCHOWNAT: {}, syscall.SYS_FCNTL: []seccomp.Rule{ { seccomp.AllowAny{}, diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 3a0806837..b185015b6 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -731,6 +731,18 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { return err } +// Allocate implements p9.File. +func (l *localFile) Allocate(mode p9.AllocateMode, offset, length uint64) error { + if !l.isOpen() { + return syscall.EBADF + } + + if err := syscall.Fallocate(l.file.FD(), mode.ToLinux(), int64(offset), int64(length)); err != nil { + return extractErrno(err) + } + return nil +} + // Rename implements p9.File; this should never be called. func (l *localFile) Rename(p9.File, string) error { panic("rename called directly") diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index d99733fc9..7ff4e4883 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -649,6 +649,8 @@ cc_binary( srcs = ["fallocate.cc"], linkstatic = 1, deps = [ + ":file_base", + "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:temp_path", "//test/util:test_main", diff --git a/test/syscalls/linux/fallocate.cc b/test/syscalls/linux/fallocate.cc index 61b8acc7a..1c3d00287 100644 --- a/test/syscalls/linux/fallocate.cc +++ b/test/syscalls/linux/fallocate.cc @@ -12,45 +12,130 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include <errno.h> #include <fcntl.h> +#include <signal.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <syscall.h> +#include <time.h> #include <unistd.h> #include "gtest/gtest.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/cleanup.h" #include "test/util/file_descriptor.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" namespace gvisor { namespace testing { - namespace { -// These tests are very rudimentary because fallocate is not -// implemented. We just want to make sure the expected error codes are -// returned. +int fallocate(int fd, int mode, off_t offset, off_t len) { + return syscall(__NR_fallocate, fd, mode, offset, len); +} + +class AllocateTest : public FileTest { + void SetUp() override { FileTest::SetUp(); } +}; + +TEST_F(AllocateTest, Fallocate) { + // Check that it starts at size zero. + struct stat buf; + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); + + // Grow to ten bytes. + EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, 10), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 10); -TEST(FallocateTest, NotImplemented) { - auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_path.path(), O_RDWR)); + // Allocate to a smaller size should be noop. + EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, 5), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 10); - // Test that a completely unassigned fallocate mode returns EOPNOTSUPP. - ASSERT_THAT(fallocate(fd.get(), 0x80, 0, 32768), - SyscallFailsWithErrno(EOPNOTSUPP)); + // Grow again. + EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, 20), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 20); + + // Grow with offset. + EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 10, 20), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 30); + + // Grow with offset beyond EOF. + EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 39, 1), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 40); } -TEST(FallocateTest, BadOffset) { - auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_path.path(), O_RDWR)); - ASSERT_THAT(fallocate(fd.get(), 0, -1, 32768), SyscallFailsWithErrno(EINVAL)); +TEST_F(AllocateTest, FallocateInvalid) { + // Invalid FD + EXPECT_THAT(fallocate(-1, 0, 0, 10), SyscallFailsWithErrno(EBADF)); + + // Negative offset and size. + EXPECT_THAT(fallocate(test_file_fd_.get(), 0, -1, 10), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, -1), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(fallocate(test_file_fd_.get(), 0, -1, -1), + SyscallFailsWithErrno(EINVAL)); } -TEST(FallocateTest, BadLength) { - auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_path.path(), O_RDWR)); - ASSERT_THAT(fallocate(fd.get(), 0, 0, -1), SyscallFailsWithErrno(EINVAL)); +TEST_F(AllocateTest, FallocateReadonly) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + EXPECT_THAT(fallocate(fd.get(), 0, 0, 10), SyscallFailsWithErrno(EBADF)); } -} // namespace +TEST_F(AllocateTest, FallocatePipe) { + int pipes[2]; + EXPECT_THAT(pipe(pipes), SyscallSucceeds()); + auto cleanup = Cleanup([&pipes] { + EXPECT_THAT(close(pipes[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipes[1]), SyscallSucceeds()); + }); + + EXPECT_THAT(fallocate(pipes[1], 0, 0, 10), SyscallFailsWithErrno(ESPIPE)); +} + +TEST_F(AllocateTest, FallocateChar) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDWR)); + EXPECT_THAT(fallocate(fd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV)); +} + +TEST_F(AllocateTest, FallocateRlimit) { + // Get the current rlimit and restore after test run. + struct rlimit initial_lim; + ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + auto cleanup = Cleanup([&initial_lim] { + EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + }); + + // Try growing past the file size limit. + sigset_t new_mask; + sigemptyset(&new_mask); + sigaddset(&new_mask, SIGXFSZ); + sigprocmask(SIG_BLOCK, &new_mask, nullptr); + struct rlimit setlim = {}; + setlim.rlim_cur = 1024; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds()); + + EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, 1025), + SyscallFailsWithErrno(EFBIG)); + + struct timespec timelimit = {}; + timelimit.tv_sec = 10; + EXPECT_EQ(sigtimedwait(&new_mask, nullptr, &timelimit), SIGXFSZ); + ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds()); +} + +} // namespace } // namespace testing } // namespace gvisor |