summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorFabricio Voznika <fvoznika@google.com>2019-05-09 15:34:44 -0700
committerShentubot <shentubot@google.com>2019-05-09 15:35:49 -0700
commit1bee43be13549b01e18d87df194ac219845de5cf (patch)
tree4b93064a7ec8b7de2cd515a692f11f0fc311b483
parent0f4be95a336bd5dbf214bc40eb5d1bbb5fce36a4 (diff)
Implement fallocate(2)
Closes #225 PiperOrigin-RevId: 247508791 Change-Id: I04f47cf2770b30043e5a272aba4ba6e11d0476cc
-rw-r--r--pkg/abi/linux/file.go11
-rw-r--r--pkg/p9/BUILD1
-rw-r--r--pkg/p9/client_file.go12
-rw-r--r--pkg/p9/file.go4
-rw-r--r--pkg/p9/handlers.go34
-rw-r--r--pkg/p9/local_server/local_server.go5
-rw-r--r--pkg/p9/messages.go59
-rw-r--r--pkg/p9/p9.go81
-rw-r--r--pkg/p9/version.go7
-rw-r--r--pkg/sentry/fs/ashmem/device.go1
-rw-r--r--pkg/sentry/fs/binder/binder.go1
-rw-r--r--pkg/sentry/fs/dev/full.go3
-rw-r--r--pkg/sentry/fs/dev/null.go9
-rw-r--r--pkg/sentry/fs/dev/random.go9
-rw-r--r--pkg/sentry/fs/fsutil/BUILD1
-rw-r--r--pkg/sentry/fs/fsutil/host_mappable.go10
-rw-r--r--pkg/sentry/fs/fsutil/inode.go25
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached.go28
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached_test.go9
-rw-r--r--pkg/sentry/fs/gofer/context_file.go7
-rw-r--r--pkg/sentry/fs/gofer/inode.go24
-rw-r--r--pkg/sentry/fs/host/inode.go18
-rw-r--r--pkg/sentry/fs/inode.go7
-rw-r--r--pkg/sentry/fs/inode_operations.go4
-rw-r--r--pkg/sentry/fs/inode_overlay.go7
-rw-r--r--pkg/sentry/fs/inode_overlay_test.go1
-rw-r--r--pkg/sentry/fs/mock.go5
-rw-r--r--pkg/sentry/fs/proc/inode.go1
-rw-r--r--pkg/sentry/fs/proc/seqfile/seqfile.go3
-rw-r--r--pkg/sentry/fs/proc/uid_gid_map.go3
-rw-r--r--pkg/sentry/fs/ramfs/dir.go1
-rw-r--r--pkg/sentry/fs/ramfs/socket.go3
-rw-r--r--pkg/sentry/fs/ramfs/symlink.go5
-rw-r--r--pkg/sentry/fs/sys/devices.go3
-rw-r--r--pkg/sentry/fs/tmpfs/inode_file.go27
-rw-r--r--pkg/sentry/fs/tmpfs/tmpfs.go8
-rw-r--r--pkg/sentry/fs/tty/dir.go5
-rw-r--r--pkg/sentry/kernel/pipe/node.go4
-rw-r--r--pkg/sentry/syscalls/linux/sys_file.go38
-rw-r--r--runsc/boot/compat.go2
-rw-r--r--runsc/fsgofer/filter/config.go10
-rw-r--r--runsc/fsgofer/fsgofer.go12
-rw-r--r--test/syscalls/linux/BUILD2
-rw-r--r--test/syscalls/linux/fallocate.cc123
44 files changed, 589 insertions, 44 deletions
diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go
index 753fec3ed..81ff9fe9e 100644
--- a/pkg/abi/linux/file.go
+++ b/pkg/abi/linux/file.go
@@ -254,3 +254,14 @@ const (
F_SEAL_GROW = 0x0004 // Prevent file from growing.
F_SEAL_WRITE = 0x0008 // Prevent writes.
)
+
+// Constants related to fallocate(2). Source: include/uapi/linux/falloc.h
+const (
+ FALLOC_FL_KEEP_SIZE = 0x01
+ FALLOC_FL_PUNCH_HOLE = 0x02
+ FALLOC_FL_NO_HIDE_STALE = 0x04
+ FALLOC_FL_COLLAPSE_RANGE = 0x08
+ FALLOC_FL_ZERO_RANGE = 0x10
+ FALLOC_FL_INSERT_RANGE = 0x20
+ FALLOC_FL_UNSHARE_RANGE = 0x40
+)
diff --git a/pkg/p9/BUILD b/pkg/p9/BUILD
index 5d972309d..36b2ec5f6 100644
--- a/pkg/p9/BUILD
+++ b/pkg/p9/BUILD
@@ -26,6 +26,7 @@ go_library(
"//pkg/fd",
"//pkg/log",
"//pkg/unet",
+ "@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/p9/client_file.go b/pkg/p9/client_file.go
index 63c65129a..471c3a80b 100644
--- a/pkg/p9/client_file.go
+++ b/pkg/p9/client_file.go
@@ -171,6 +171,18 @@ func (c *clientFile) SetAttr(valid SetAttrMask, attr SetAttr) error {
return c.client.sendRecv(&Tsetattr{FID: c.fid, Valid: valid, SetAttr: attr}, &Rsetattr{})
}
+// Allocate implements File.Allocate.
+func (c *clientFile) Allocate(mode AllocateMode, offset, length uint64) error {
+ if atomic.LoadUint32(&c.closed) != 0 {
+ return syscall.EBADF
+ }
+ if !versionSupportsTallocate(c.client.version) {
+ return syscall.EOPNOTSUPP
+ }
+
+ return c.client.sendRecv(&Tallocate{FID: c.fid, Mode: mode, Offset: offset, Length: length}, &Rallocate{})
+}
+
// Remove implements File.Remove.
//
// N.B. This method is no longer part of the file interface and should be
diff --git a/pkg/p9/file.go b/pkg/p9/file.go
index a52a0f3e7..89e814d50 100644
--- a/pkg/p9/file.go
+++ b/pkg/p9/file.go
@@ -89,6 +89,10 @@ type File interface {
// On the server, SetAttr has a write concurrency guarantee.
SetAttr(valid SetAttrMask, attr SetAttr) error
+ // Allocate allows the caller to directly manipulate the allocated disk space
+ // for the file. See fallocate(2) for more details.
+ Allocate(mode AllocateMode, offset, length uint64) error
+
// Close is called when all references are dropped on the server side,
// and Close should be called by the client to drop all references.
//
diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go
index 6da2ce4e3..533ead98a 100644
--- a/pkg/p9/handlers.go
+++ b/pkg/p9/handlers.go
@@ -878,6 +878,40 @@ func (t *Tsetattr) handle(cs *connState) message {
}
// handle implements handler.handle.
+func (t *Tallocate) handle(cs *connState) message {
+ // Lookup the FID.
+ ref, ok := cs.LookupFID(t.FID)
+ if !ok {
+ return newErr(syscall.EBADF)
+ }
+ defer ref.DecRef()
+
+ if err := ref.safelyWrite(func() error {
+ // Has it been opened already?
+ openFlags, opened := ref.OpenFlags()
+ if !opened {
+ return syscall.EINVAL
+ }
+
+ // Can it be written? Check permissions.
+ if openFlags&OpenFlagsModeMask == ReadOnly {
+ return syscall.EBADF
+ }
+
+ // We don't allow allocate on files that have been deleted.
+ if ref.isDeleted() {
+ return syscall.EINVAL
+ }
+
+ return ref.file.Allocate(t.Mode, t.Offset, t.Length)
+ }); err != nil {
+ return newErr(err)
+ }
+
+ return &Rallocate{}
+}
+
+// handle implements handler.handle.
func (t *Txattrwalk) handle(cs *connState) message {
// Lookup the FID.
ref, ok := cs.LookupFID(t.FID)
diff --git a/pkg/p9/local_server/local_server.go b/pkg/p9/local_server/local_server.go
index f4077a9d4..d49d94550 100644
--- a/pkg/p9/local_server/local_server.go
+++ b/pkg/p9/local_server/local_server.go
@@ -323,6 +323,11 @@ func (l *local) Renamed(parent p9.File, newName string) {
l.path = path.Join(parent.(*local).path, newName)
}
+// Allocate implements p9.File.Allocate.
+func (l *local) Allocate(mode p9.AllocateMode, offset, length uint64) error {
+ return syscall.Fallocate(int(l.file.Fd()), mode.ToLinux(), int64(offset), int64(length))
+}
+
func main() {
log.SetLevel(log.Debug)
diff --git a/pkg/p9/messages.go b/pkg/p9/messages.go
index 3c7898cc1..703753c31 100644
--- a/pkg/p9/messages.go
+++ b/pkg/p9/messages.go
@@ -1424,6 +1424,63 @@ func (r *Rsetattr) String() string {
return fmt.Sprintf("Rsetattr{}")
}
+// Tallocate is an allocate request. This is an extension to 9P protocol, not
+// present in the 9P2000.L standard.
+type Tallocate struct {
+ FID FID
+ Mode AllocateMode
+ Offset uint64
+ Length uint64
+}
+
+// Decode implements encoder.Decode.
+func (t *Tallocate) Decode(b *buffer) {
+ t.FID = b.ReadFID()
+ t.Mode.Decode(b)
+ t.Offset = b.Read64()
+ t.Length = b.Read64()
+}
+
+// Encode implements encoder.Encode.
+func (t *Tallocate) Encode(b *buffer) {
+ b.WriteFID(t.FID)
+ t.Mode.Encode(b)
+ b.Write64(t.Offset)
+ b.Write64(t.Length)
+}
+
+// Type implements message.Type.
+func (*Tallocate) Type() MsgType {
+ return MsgTallocate
+}
+
+// String implements fmt.Stringer.
+func (t *Tallocate) String() string {
+ return fmt.Sprintf("Tallocate{FID: %d, Offset: %d, Length: %d}", t.FID, t.Offset, t.Length)
+}
+
+// Rallocate is an allocate response.
+type Rallocate struct {
+}
+
+// Decode implements encoder.Decode.
+func (*Rallocate) Decode(b *buffer) {
+}
+
+// Encode implements encoder.Encode.
+func (*Rallocate) Encode(b *buffer) {
+}
+
+// Type implements message.Type.
+func (*Rallocate) Type() MsgType {
+ return MsgRallocate
+}
+
+// String implements fmt.Stringer.
+func (r *Rallocate) String() string {
+ return fmt.Sprintf("Rallocate{}")
+}
+
// Txattrwalk walks extended attributes.
type Txattrwalk struct {
// FID is the FID to check for attributes.
@@ -2297,4 +2354,6 @@ func init() {
msgRegistry.register(MsgRusymlink, func() message { return &Rusymlink{} })
msgRegistry.register(MsgTlconnect, func() message { return &Tlconnect{} })
msgRegistry.register(MsgRlconnect, func() message { return &Rlconnect{} })
+ msgRegistry.register(MsgTallocate, func() message { return &Tallocate{} })
+ msgRegistry.register(MsgRallocate, func() message { return &Rallocate{} })
}
diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go
index 78c7d3f86..4039862e6 100644
--- a/pkg/p9/p9.go
+++ b/pkg/p9/p9.go
@@ -22,6 +22,8 @@ import (
"strings"
"sync/atomic"
"syscall"
+
+ "golang.org/x/sys/unix"
)
// OpenFlags is the mode passed to Open and Create operations.
@@ -374,6 +376,8 @@ const (
MsgRusymlink = 135
MsgTlconnect = 136
MsgRlconnect = 137
+ MsgTallocate = 138
+ MsgRallocate = 139
)
// QIDType represents the file type for QIDs.
@@ -1058,3 +1062,80 @@ func (d *Dirent) Encode(b *buffer) {
b.WriteQIDType(d.Type)
b.WriteString(d.Name)
}
+
+// AllocateMode are possible modes to p9.File.Allocate().
+type AllocateMode struct {
+ KeepSize bool
+ PunchHole bool
+ NoHideStale bool
+ CollapseRange bool
+ ZeroRange bool
+ InsertRange bool
+ Unshare bool
+}
+
+// ToLinux converts to a value compatible with fallocate(2)'s mode.
+func (a *AllocateMode) ToLinux() uint32 {
+ rv := uint32(0)
+ if a.KeepSize {
+ rv |= unix.FALLOC_FL_KEEP_SIZE
+ }
+ if a.PunchHole {
+ rv |= unix.FALLOC_FL_PUNCH_HOLE
+ }
+ if a.NoHideStale {
+ rv |= unix.FALLOC_FL_NO_HIDE_STALE
+ }
+ if a.CollapseRange {
+ rv |= unix.FALLOC_FL_COLLAPSE_RANGE
+ }
+ if a.ZeroRange {
+ rv |= unix.FALLOC_FL_ZERO_RANGE
+ }
+ if a.InsertRange {
+ rv |= unix.FALLOC_FL_INSERT_RANGE
+ }
+ if a.Unshare {
+ rv |= unix.FALLOC_FL_UNSHARE_RANGE
+ }
+ return rv
+}
+
+// Decode implements encoder.Decode.
+func (a *AllocateMode) Decode(b *buffer) {
+ mask := b.Read32()
+ a.KeepSize = mask&0x01 != 0
+ a.PunchHole = mask&0x02 != 0
+ a.NoHideStale = mask&0x04 != 0
+ a.CollapseRange = mask&0x08 != 0
+ a.ZeroRange = mask&0x10 != 0
+ a.InsertRange = mask&0x20 != 0
+ a.Unshare = mask&0x40 != 0
+}
+
+// Encode implements encoder.Encode.
+func (a *AllocateMode) Encode(b *buffer) {
+ mask := uint32(0)
+ if a.KeepSize {
+ mask |= 0x01
+ }
+ if a.PunchHole {
+ mask |= 0x02
+ }
+ if a.NoHideStale {
+ mask |= 0x04
+ }
+ if a.CollapseRange {
+ mask |= 0x08
+ }
+ if a.ZeroRange {
+ mask |= 0x10
+ }
+ if a.InsertRange {
+ mask |= 0x20
+ }
+ if a.Unshare {
+ mask |= 0x40
+ }
+ b.Write32(mask)
+}
diff --git a/pkg/p9/version.go b/pkg/p9/version.go
index a36a499a1..c2a2885ae 100644
--- a/pkg/p9/version.go
+++ b/pkg/p9/version.go
@@ -26,7 +26,7 @@ const (
//
// Clients are expected to start requesting this version number and
// to continuously decrement it until a Tversion request succeeds.
- highestSupportedVersion uint32 = 6
+ highestSupportedVersion uint32 = 7
// lowestSupportedVersion is the lowest supported version X in a
// version string of the format 9P2000.L.Google.X.
@@ -143,3 +143,8 @@ func VersionSupportsAnonymous(v uint32) bool {
func VersionSupportsMultiUser(v uint32) bool {
return v >= 6
}
+
+// versionSupportsTallocate returns true if version v supports Allocate().
+func versionSupportsTallocate(v uint32) bool {
+ return v >= 7
+}
diff --git a/pkg/sentry/fs/ashmem/device.go b/pkg/sentry/fs/ashmem/device.go
index 5e005bc2e..22e1530e9 100644
--- a/pkg/sentry/fs/ashmem/device.go
+++ b/pkg/sentry/fs/ashmem/device.go
@@ -29,6 +29,7 @@ import (
type Device struct {
fsutil.InodeGenericChecker `state:"nosave"`
fsutil.InodeNoExtendedAttributes `state:"nosave"`
+ fsutil.InodeNoopAllocate `state:"nosave"`
fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopTruncate `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
diff --git a/pkg/sentry/fs/binder/binder.go b/pkg/sentry/fs/binder/binder.go
index acbbd5466..a992253e6 100644
--- a/pkg/sentry/fs/binder/binder.go
+++ b/pkg/sentry/fs/binder/binder.go
@@ -46,6 +46,7 @@ const (
type Device struct {
fsutil.InodeGenericChecker `state:"nosave"`
fsutil.InodeNoExtendedAttributes `state:"nosave"`
+ fsutil.InodeNoopAllocate `state:"nosave"`
fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopTruncate `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
diff --git a/pkg/sentry/fs/dev/full.go b/pkg/sentry/fs/dev/full.go
index 6b11afa44..17d68b5c4 100644
--- a/pkg/sentry/fs/dev/full.go
+++ b/pkg/sentry/fs/dev/full.go
@@ -30,6 +30,7 @@ import (
type fullDevice struct {
fsutil.InodeGenericChecker `state:"nosave"`
fsutil.InodeNoExtendedAttributes `state:"nosave"`
+ fsutil.InodeNoopAllocate `state:"nosave"`
fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopTruncate `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
@@ -59,7 +60,6 @@ func (f *fullDevice) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.Fi
// +stateify savable
type fullFileOperations struct {
- waiter.AlwaysReady `state:"nosave"`
fsutil.FileGenericSeek `state:"nosave"`
fsutil.FileNoIoctl `state:"nosave"`
fsutil.FileNoMMap `state:"nosave"`
@@ -69,6 +69,7 @@ type fullFileOperations struct {
fsutil.FileNotDirReaddir `state:"nosave"`
fsutil.FileUseInodeUnstableAttr `state:"nosave"`
readZeros `state:"nosave"`
+ waiter.AlwaysReady `state:"nosave"`
}
var _ fs.FileOperations = (*fullFileOperations)(nil)
diff --git a/pkg/sentry/fs/dev/null.go b/pkg/sentry/fs/dev/null.go
index 069212b6d..ee13183c8 100644
--- a/pkg/sentry/fs/dev/null.go
+++ b/pkg/sentry/fs/dev/null.go
@@ -29,6 +29,7 @@ import (
type nullDevice struct {
fsutil.InodeGenericChecker `state:"nosave"`
fsutil.InodeNoExtendedAttributes `state:"nosave"`
+ fsutil.InodeNoopAllocate `state:"nosave"`
fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopTruncate `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
@@ -60,17 +61,17 @@ func (n *nullDevice) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.Fi
// +stateify savable
type nullFileOperations struct {
- waiter.AlwaysReady `state:"nosave"`
fsutil.FileGenericSeek `state:"nosave"`
fsutil.FileNoIoctl `state:"nosave"`
fsutil.FileNoMMap `state:"nosave"`
fsutil.FileNoopFlush `state:"nosave"`
fsutil.FileNoopFsync `state:"nosave"`
fsutil.FileNoopRead `state:"nosave"`
- fsutil.FileNoopWrite `state:"nosave"`
fsutil.FileNoopRelease `state:"nosave"`
+ fsutil.FileNoopWrite `state:"nosave"`
fsutil.FileNotDirReaddir `state:"nosave"`
fsutil.FileUseInodeUnstableAttr `state:"nosave"`
+ waiter.AlwaysReady `state:"nosave"`
}
var _ fs.FileOperations = (*nullFileOperations)(nil)
@@ -101,16 +102,16 @@ func (zd *zeroDevice) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.F
// +stateify savable
type zeroFileOperations struct {
- waiter.AlwaysReady `state:"nosave"`
fsutil.FileGenericSeek `state:"nosave"`
+ fsutil.FileNoIoctl `state:"nosave"`
fsutil.FileNoopFlush `state:"nosave"`
fsutil.FileNoopFsync `state:"nosave"`
fsutil.FileNoopRelease `state:"nosave"`
fsutil.FileNoopWrite `state:"nosave"`
fsutil.FileNotDirReaddir `state:"nosave"`
- fsutil.FileNoIoctl `state:"nosave"`
fsutil.FileUseInodeUnstableAttr `state:"nosave"`
readZeros `state:"nosave"`
+ waiter.AlwaysReady `state:"nosave"`
}
var _ fs.FileOperations = (*zeroFileOperations)(nil)
diff --git a/pkg/sentry/fs/dev/random.go b/pkg/sentry/fs/dev/random.go
index de0f3e5e5..b0a412382 100644
--- a/pkg/sentry/fs/dev/random.go
+++ b/pkg/sentry/fs/dev/random.go
@@ -29,6 +29,7 @@ import (
type randomDevice struct {
fsutil.InodeGenericChecker `state:"nosave"`
fsutil.InodeNoExtendedAttributes `state:"nosave"`
+ fsutil.InodeNoopAllocate `state:"nosave"`
fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopTruncate `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
@@ -57,16 +58,16 @@ func (*randomDevice) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.Fi
// +stateify savable
type randomFileOperations struct {
- waiter.AlwaysReady `state:"nosave"`
fsutil.FileGenericSeek `state:"nosave"`
- fsutil.FileNotDirReaddir `state:"nosave"`
+ fsutil.FileNoIoctl `state:"nosave"`
fsutil.FileNoMMap `state:"nosave"`
- fsutil.FileNoopFsync `state:"nosave"`
fsutil.FileNoopFlush `state:"nosave"`
- fsutil.FileNoIoctl `state:"nosave"`
+ fsutil.FileNoopFsync `state:"nosave"`
fsutil.FileNoopRelease `state:"nosave"`
fsutil.FileNoopWrite `state:"nosave"`
+ fsutil.FileNotDirReaddir `state:"nosave"`
fsutil.FileUseInodeUnstableAttr `state:"nosave"`
+ waiter.AlwaysReady `state:"nosave"`
}
var _ fs.FileOperations = (*randomFileOperations)(nil)
diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD
index 01098675d..44f43b965 100644
--- a/pkg/sentry/fs/fsutil/BUILD
+++ b/pkg/sentry/fs/fsutil/BUILD
@@ -113,5 +113,6 @@ go_test(
"//pkg/sentry/memmap",
"//pkg/sentry/safemem",
"//pkg/sentry/usermem",
+ "//pkg/syserror",
],
)
diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go
index 28686f3b3..ad0518b8f 100644
--- a/pkg/sentry/fs/fsutil/host_mappable.go
+++ b/pkg/sentry/fs/fsutil/host_mappable.go
@@ -149,7 +149,7 @@ func (h *HostMappable) Truncate(ctx context.Context, newSize int64) error {
}
// Invalidate COW mappings that may exist beyond the new size in case the file
- // is being shrunk. Other mappinsg don't need to be invalidated because
+ // is being shrunk. Other mappings don't need to be invalidated because
// translate will just return identical mappings after invalidation anyway,
// and SIGBUS will be raised and handled when the mappings are touched.
//
@@ -167,6 +167,14 @@ func (h *HostMappable) Truncate(ctx context.Context, newSize int64) error {
return nil
}
+// Allocate reserves space in the backing file.
+func (h *HostMappable) Allocate(ctx context.Context, offset int64, length int64) error {
+ h.truncateMu.RLock()
+ err := h.backingFile.Allocate(ctx, offset, length)
+ h.truncateMu.RUnlock()
+ return err
+}
+
// Write writes to the file backing this mappable.
func (h *HostMappable) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
h.truncateMu.RLock()
diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go
index b6366d906..151be1d0d 100644
--- a/pkg/sentry/fs/fsutil/inode.go
+++ b/pkg/sentry/fs/fsutil/inode.go
@@ -34,6 +34,7 @@ type SimpleFileInode struct {
InodeNoExtendedAttributes `state:"nosave"`
InodeNoopRelease `state:"nosave"`
InodeNoopWriteOut `state:"nosave"`
+ InodeNotAllocatable `state:"nosave"`
InodeNotDirectory `state:"nosave"`
InodeNotMappable `state:"nosave"`
InodeNotOpenable `state:"nosave"`
@@ -61,6 +62,7 @@ type NoReadWriteFileInode struct {
InodeNoExtendedAttributes `state:"nosave"`
InodeNoopRelease `state:"nosave"`
InodeNoopWriteOut `state:"nosave"`
+ InodeNotAllocatable `state:"nosave"`
InodeNotDirectory `state:"nosave"`
InodeNotMappable `state:"nosave"`
InodeNotSocket `state:"nosave"`
@@ -465,3 +467,26 @@ func (InodeDenyWriteChecker) Check(ctx context.Context, inode *fs.Inode, p fs.Pe
}
return fs.ContextCanAccessFile(ctx, inode, p)
}
+
+//InodeNotAllocatable can be used by Inodes that do not support Allocate().
+type InodeNotAllocatable struct{}
+
+func (InodeNotAllocatable) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error {
+ return syserror.EOPNOTSUPP
+}
+
+// InodeNoopAllocate implements fs.InodeOperations.Allocate as a noop.
+type InodeNoopAllocate struct{}
+
+// Allocate implements fs.InodeOperations.Allocate.
+func (InodeNoopAllocate) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error {
+ return nil
+}
+
+// InodeIsDirAllocate implements fs.InodeOperations.Allocate for directories.
+type InodeIsDirAllocate struct{}
+
+// Allocate implements fs.InodeOperations.Allocate.
+func (InodeIsDirAllocate) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error {
+ return syserror.EISDIR
+}
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
index 76644e69d..03cad37f3 100644
--- a/pkg/sentry/fs/fsutil/inode_cached.go
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -135,6 +135,10 @@ type CachedFileObject interface {
// the file was opened.
SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr) error
+ // Allocate allows the caller to reserve disk space for the inode.
+ // It's equivalent to fallocate(2) with 'mode=0'.
+ Allocate(ctx context.Context, offset int64, length int64) error
+
// Sync instructs the remote filesystem to sync the file to stable storage.
Sync(ctx context.Context) error
@@ -336,6 +340,30 @@ func (c *CachingInodeOperations) Truncate(ctx context.Context, inode *fs.Inode,
return nil
}
+// Allocate implements fs.InodeOperations.Allocate.
+func (c *CachingInodeOperations) Allocate(ctx context.Context, offset, length int64) error {
+ newSize := offset + length
+
+ // c.attr.Size is protected by both c.attrMu and c.dataMu.
+ c.attrMu.Lock()
+ defer c.attrMu.Unlock()
+ c.dataMu.Lock()
+ defer c.dataMu.Unlock()
+
+ if newSize <= c.attr.Size {
+ return nil
+ }
+
+ now := ktime.NowFromContext(ctx)
+ if err := c.backingFile.Allocate(ctx, offset, length); err != nil {
+ return err
+ }
+
+ c.attr.Size = newSize
+ c.touchModificationTimeLocked(now)
+ return nil
+}
+
// WriteOut implements fs.InodeOperations.WriteOut.
func (c *CachingInodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error {
c.attrMu.Lock()
diff --git a/pkg/sentry/fs/fsutil/inode_cached_test.go b/pkg/sentry/fs/fsutil/inode_cached_test.go
index 3f10efc12..be3d4b6fc 100644
--- a/pkg/sentry/fs/fsutil/inode_cached_test.go
+++ b/pkg/sentry/fs/fsutil/inode_cached_test.go
@@ -26,6 +26,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
"gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
)
type noopBackingFile struct{}
@@ -50,6 +51,10 @@ func (noopBackingFile) FD() int {
return -1
}
+func (noopBackingFile) Allocate(ctx context.Context, offset int64, length int64) error {
+ return nil
+}
+
func TestSetPermissions(t *testing.T) {
ctx := contexttest.Context(t)
@@ -237,6 +242,10 @@ func (*sliceBackingFile) FD() int {
return -1
}
+func (f *sliceBackingFile) Allocate(ctx context.Context, offset int64, length int64) error {
+ return syserror.EOPNOTSUPP
+}
+
type noopMappingSpace struct{}
// Invalidate implements memmap.MappingSpace.Invalidate.
diff --git a/pkg/sentry/fs/gofer/context_file.go b/pkg/sentry/fs/gofer/context_file.go
index 842a34af8..be53ac4d9 100644
--- a/pkg/sentry/fs/gofer/context_file.go
+++ b/pkg/sentry/fs/gofer/context_file.go
@@ -59,6 +59,13 @@ func (c *contextFile) setAttr(ctx context.Context, valid p9.SetAttrMask, attr p9
return err
}
+func (c *contextFile) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error {
+ ctx.UninterruptibleSleepStart(false)
+ err := c.file.Allocate(mode, offset, length)
+ ctx.UninterruptibleSleepFinish(false)
+ return err
+}
+
func (c *contextFile) rename(ctx context.Context, directory contextFile, name string) error {
ctx.UninterruptibleSleepStart(false)
err := c.file.Rename(directory.file, name)
diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go
index f6f20844d..dcb3b2880 100644
--- a/pkg/sentry/fs/gofer/inode.go
+++ b/pkg/sentry/fs/gofer/inode.go
@@ -322,6 +322,15 @@ func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, err
return unstable(ctx, valid, pattr, i.s.mounter, i.s.client), nil
}
+func (i *inodeFileState) Allocate(ctx context.Context, offset, length int64) error {
+ i.handlesMu.RLock()
+ defer i.handlesMu.RUnlock()
+
+ // No options are supported for now.
+ mode := p9.AllocateMode{}
+ return i.writeHandles.File.allocate(ctx, mode, uint64(offset), uint64(length))
+}
+
// session extracts the gofer's session from the MountSource.
func (i *inodeOperations) session() *session {
return i.fileState.s
@@ -498,6 +507,21 @@ func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, length
return i.fileState.file.setAttr(ctx, p9.SetAttrMask{Size: true}, p9.SetAttr{Size: uint64(length)})
}
+// Allocate implements fs.InodeOperations.Allocate.
+func (i *inodeOperations) Allocate(ctx context.Context, inode *fs.Inode, offset, length int64) error {
+ // This can only be called for files anyway.
+ if i.session().cachePolicy.useCachingInodeOps(inode) {
+ return i.cachingInodeOps.Allocate(ctx, offset, length)
+ }
+ if i.session().cachePolicy == cacheRemoteRevalidating {
+ return i.fileState.hostMappable.Allocate(ctx, offset, length)
+ }
+
+ // No options are supported for now.
+ mode := p9.AllocateMode{}
+ return i.fileState.file.allocate(ctx, mode, uint64(offset), uint64(length))
+}
+
// WriteOut implements fs.InodeOperations.WriteOut.
func (i *inodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error {
if !i.session().cachePolicy.cacheUAttrs(inode) {
diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go
index 20e077f77..d36ac9a87 100644
--- a/pkg/sentry/fs/host/inode.go
+++ b/pkg/sentry/fs/host/inode.go
@@ -163,6 +163,11 @@ func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, err
return unstableAttr(i.mops, &s), nil
}
+// SetMaskedAttributes implements fsutil.CachedFileObject.SetMaskedAttributes.
+func (i *inodeFileState) Allocate(_ context.Context, offset, length int64) error {
+ return syscall.Fallocate(i.FD(), 0, offset, length)
+}
+
// inodeOperations implements fs.InodeOperations.
var _ fs.InodeOperations = (*inodeOperations)(nil)
@@ -397,6 +402,19 @@ func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, size in
return i.cachingInodeOps.Truncate(ctx, inode, size)
}
+// Allocate implements fs.InodeOperations.Allocate.
+func (i *inodeOperations) Allocate(ctx context.Context, inode *fs.Inode, offset, length int64) error {
+ // Is the file not memory-mappable?
+ if !canMap(inode) {
+ // Then just send the call to the FD, the host will synchronize the metadata
+ // update with any host inode and page cache.
+ return i.fileState.Allocate(ctx, offset, length)
+ }
+ // Otherwise we need to go through cachingInodeOps, even if the host page
+ // cache is in use, to invalidate private copies of truncated pages.
+ return i.cachingInodeOps.Allocate(ctx, offset, length)
+}
+
// WriteOut implements fs.InodeOperations.WriteOut.
func (i *inodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error {
// Have we been using host kernel metadata caches?
diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go
index d764ef93d..22f316daf 100644
--- a/pkg/sentry/fs/inode.go
+++ b/pkg/sentry/fs/inode.go
@@ -340,6 +340,13 @@ func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error {
return i.InodeOperations.Truncate(ctx, i, size)
}
+func (i *Inode) Allocate(ctx context.Context, d *Dirent, offset int64, length int64) error {
+ if i.overlay != nil {
+ return overlayAllocate(ctx, i.overlay, d, offset, length)
+ }
+ return i.InodeOperations.Allocate(ctx, i, offset, length)
+}
+
// Readlink calls i.InodeOperations.Readlnk with i as the Inode.
func (i *Inode) Readlink(ctx context.Context) (string, error) {
if i.overlay != nil {
diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go
index ac287e1e4..abafe4791 100644
--- a/pkg/sentry/fs/inode_operations.go
+++ b/pkg/sentry/fs/inode_operations.go
@@ -223,6 +223,10 @@ type InodeOperations interface {
// Implementations need not check that length >= 0.
Truncate(ctx context.Context, inode *Inode, size int64) error
+ // Allocate allows the caller to reserve disk space for the inode.
+ // It's equivalent to fallocate(2) with 'mode=0'.
+ Allocate(ctx context.Context, inode *Inode, offset int64, length int64) error
+
// WriteOut writes cached Inode state to a backing filesystem in a
// synchronous manner.
//
diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go
index 3d015328e..ead487097 100644
--- a/pkg/sentry/fs/inode_overlay.go
+++ b/pkg/sentry/fs/inode_overlay.go
@@ -582,6 +582,13 @@ func overlayTruncate(ctx context.Context, o *overlayEntry, d *Dirent, size int64
return o.upper.InodeOperations.Truncate(ctx, o.upper, size)
}
+func overlayAllocate(ctx context.Context, o *overlayEntry, d *Dirent, offset, length int64) error {
+ if err := copyUp(ctx, d); err != nil {
+ return err
+ }
+ return o.upper.InodeOperations.Allocate(ctx, o.upper, offset, length)
+}
+
func overlayReadlink(ctx context.Context, o *overlayEntry) (string, error) {
o.copyMu.RLock()
defer o.copyMu.RUnlock()
diff --git a/pkg/sentry/fs/inode_overlay_test.go b/pkg/sentry/fs/inode_overlay_test.go
index 66b3da2d0..52ce1d29e 100644
--- a/pkg/sentry/fs/inode_overlay_test.go
+++ b/pkg/sentry/fs/inode_overlay_test.go
@@ -422,6 +422,7 @@ type inode struct {
fsutil.InodeNoExtendedAttributes `state:"nosave"`
fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
+ fsutil.InodeNotAllocatable `state:"nosave"`
fsutil.InodeNotDirectory `state:"nosave"`
fsutil.InodeNotMappable `state:"nosave"`
fsutil.InodeNotSocket `state:"nosave"`
diff --git a/pkg/sentry/fs/mock.go b/pkg/sentry/fs/mock.go
index cf359a1f1..a71144b2c 100644
--- a/pkg/sentry/fs/mock.go
+++ b/pkg/sentry/fs/mock.go
@@ -150,6 +150,11 @@ func (n *MockInodeOperations) Truncate(ctx context.Context, inode *Inode, size i
return nil
}
+// Allocate implements fs.InodeOperations.Allocate.
+func (n *MockInodeOperations) Allocate(ctx context.Context, inode *Inode, offset, length int64) error {
+ return nil
+}
+
// Remove implements fs.InodeOperations.Remove.
func (n *MockInodeOperations) Remove(context.Context, *Inode, string) error {
return nil
diff --git a/pkg/sentry/fs/proc/inode.go b/pkg/sentry/fs/proc/inode.go
index b03807043..379569823 100644
--- a/pkg/sentry/fs/proc/inode.go
+++ b/pkg/sentry/fs/proc/inode.go
@@ -55,6 +55,7 @@ func (i *taskOwnedInodeOps) UnstableAttr(ctx context.Context, inode *fs.Inode) (
type staticFileInodeOps struct {
fsutil.InodeDenyWriteChecker `state:"nosave"`
fsutil.InodeNoExtendedAttributes `state:"nosave"`
+ fsutil.InodeNoopAllocate `state:"nosave"`
fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopTruncate `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
diff --git a/pkg/sentry/fs/proc/seqfile/seqfile.go b/pkg/sentry/fs/proc/seqfile/seqfile.go
index 10ea1f55d..6b0ae9e60 100644
--- a/pkg/sentry/fs/proc/seqfile/seqfile.go
+++ b/pkg/sentry/fs/proc/seqfile/seqfile.go
@@ -93,6 +93,7 @@ type SeqFile struct {
fsutil.InodeGenericChecker `state:"nosave"`
fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
+ fsutil.InodeNotAllocatable `state:"nosave"`
fsutil.InodeNotDirectory `state:"nosave"`
fsutil.InodeNotMappable `state:"nosave"`
fsutil.InodeNotSocket `state:"nosave"`
@@ -183,7 +184,6 @@ func (s *SeqFile) updateSourceLocked(ctx context.Context, record int) {
//
// +stateify savable
type seqFileOperations struct {
- waiter.AlwaysReady `state:"nosave"`
fsutil.FileGenericSeek `state:"nosave"`
fsutil.FileNoIoctl `state:"nosave"`
fsutil.FileNoMMap `state:"nosave"`
@@ -192,6 +192,7 @@ type seqFileOperations struct {
fsutil.FileNoopRelease `state:"nosave"`
fsutil.FileNotDirReaddir `state:"nosave"`
fsutil.FileUseInodeUnstableAttr `state:"nosave"`
+ waiter.AlwaysReady `state:"nosave"`
seqFile *SeqFile
}
diff --git a/pkg/sentry/fs/proc/uid_gid_map.go b/pkg/sentry/fs/proc/uid_gid_map.go
index d649da0f1..5df3cee13 100644
--- a/pkg/sentry/fs/proc/uid_gid_map.go
+++ b/pkg/sentry/fs/proc/uid_gid_map.go
@@ -38,6 +38,7 @@ type idMapInodeOperations struct {
fsutil.InodeGenericChecker `state:"nosave"`
fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
+ fsutil.InodeNotAllocatable `state:"nosave"`
fsutil.InodeNotDirectory `state:"nosave"`
fsutil.InodeNotMappable `state:"nosave"`
fsutil.InodeNotSocket `state:"nosave"`
@@ -81,7 +82,6 @@ func (imio *idMapInodeOperations) GetFile(ctx context.Context, dirent *fs.Dirent
// +stateify savable
type idMapFileOperations struct {
- waiter.AlwaysReady `state:"nosave"`
fsutil.FileGenericSeek `state:"nosave"`
fsutil.FileNoIoctl `state:"nosave"`
fsutil.FileNoMMap `state:"nosave"`
@@ -90,6 +90,7 @@ type idMapFileOperations struct {
fsutil.FileNoopRelease `state:"nosave"`
fsutil.FileNotDirReaddir `state:"nosave"`
fsutil.FileUseInodeUnstableAttr `state:"nosave"`
+ waiter.AlwaysReady `state:"nosave"`
iops *idMapInodeOperations
}
diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go
index a6b6a5c33..eb98b59cc 100644
--- a/pkg/sentry/fs/ramfs/dir.go
+++ b/pkg/sentry/fs/ramfs/dir.go
@@ -50,6 +50,7 @@ type CreateOps struct {
// +stateify savable
type Dir struct {
fsutil.InodeGenericChecker `state:"nosave"`
+ fsutil.InodeIsDirAllocate `state:"nosave"`
fsutil.InodeIsDirTruncate `state:"nosave"`
fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
diff --git a/pkg/sentry/fs/ramfs/socket.go b/pkg/sentry/fs/ramfs/socket.go
index 9406a07ca..a7cb1bb86 100644
--- a/pkg/sentry/fs/ramfs/socket.go
+++ b/pkg/sentry/fs/ramfs/socket.go
@@ -30,6 +30,7 @@ type Socket struct {
fsutil.InodeGenericChecker `state:"nosave"`
fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
+ fsutil.InodeNotAllocatable `state:"nosave"`
fsutil.InodeNotDirectory `state:"nosave"`
fsutil.InodeNotMappable `state:"nosave"`
fsutil.InodeNotSymlink `state:"nosave"`
@@ -67,7 +68,6 @@ func (s *Socket) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFl
// +stateify savable
type socketFileOperations struct {
- waiter.AlwaysReady `state:"nosave"`
fsutil.FileNoIoctl `state:"nosave"`
fsutil.FileNoMMap `state:"nosave"`
fsutil.FileNoopFlush `state:"nosave"`
@@ -78,6 +78,7 @@ type socketFileOperations struct {
fsutil.FileNotDirReaddir `state:"nosave"`
fsutil.FileNoWrite `state:"nosave"`
fsutil.FileUseInodeUnstableAttr `state:"nosave"`
+ waiter.AlwaysReady `state:"nosave"`
}
var _ fs.FileOperations = (*socketFileOperations)(nil)
diff --git a/pkg/sentry/fs/ramfs/symlink.go b/pkg/sentry/fs/ramfs/symlink.go
index f7835fe05..dd2585b02 100644
--- a/pkg/sentry/fs/ramfs/symlink.go
+++ b/pkg/sentry/fs/ramfs/symlink.go
@@ -29,10 +29,11 @@ type Symlink struct {
fsutil.InodeGenericChecker `state:"nosave"`
fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
+ fsutil.InodeNotAllocatable `state:"nosave"`
fsutil.InodeNotDirectory `state:"nosave"`
fsutil.InodeNotMappable `state:"nosave"`
- fsutil.InodeNotTruncatable `state:"nosave"`
fsutil.InodeNotSocket `state:"nosave"`
+ fsutil.InodeNotTruncatable `state:"nosave"`
fsutil.InodeVirtual `state:"nosave"`
fsutil.InodeSimpleAttributes
@@ -88,7 +89,6 @@ func (s *Symlink) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileF
// +stateify savable
type symlinkFileOperations struct {
- waiter.AlwaysReady `state:"nosave"`
fsutil.FileNoIoctl `state:"nosave"`
fsutil.FileNoMMap `state:"nosave"`
fsutil.FileNoopFlush `state:"nosave"`
@@ -99,6 +99,7 @@ type symlinkFileOperations struct {
fsutil.FileNotDirReaddir `state:"nosave"`
fsutil.FileNoWrite `state:"nosave"`
fsutil.FileUseInodeUnstableAttr `state:"nosave"`
+ waiter.AlwaysReady `state:"nosave"`
}
var _ fs.FileOperations = (*symlinkFileOperations)(nil)
diff --git a/pkg/sentry/fs/sys/devices.go b/pkg/sentry/fs/sys/devices.go
index db91de435..bacc93af8 100644
--- a/pkg/sentry/fs/sys/devices.go
+++ b/pkg/sentry/fs/sys/devices.go
@@ -30,12 +30,13 @@ type cpunum struct {
fsutil.InodeNoExtendedAttributes `state:"nosave"`
fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
+ fsutil.InodeNotAllocatable `state:"nosave"`
fsutil.InodeNotDirectory `state:"nosave"`
fsutil.InodeNotMappable `state:"nosave"`
fsutil.InodeNotSocket `state:"nosave"`
fsutil.InodeNotSymlink `state:"nosave"`
- fsutil.InodeNotVirtual `state:"nosave"`
fsutil.InodeNotTruncatable `state:"nosave"`
+ fsutil.InodeNotVirtual `state:"nosave"`
fsutil.InodeSimpleAttributes
fsutil.InodeStaticFileGetter
diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go
index f89d86c83..c90062a22 100644
--- a/pkg/sentry/fs/tmpfs/inode_file.go
+++ b/pkg/sentry/fs/tmpfs/inode_file.go
@@ -259,6 +259,33 @@ func (f *fileInodeOperations) Truncate(ctx context.Context, _ *fs.Inode, size in
return nil
}
+// Allocate implements fs.InodeOperations.Allocate.
+func (f *fileInodeOperations) Allocate(ctx context.Context, _ *fs.Inode, offset, length int64) error {
+ newSize := offset + length
+
+ f.attrMu.Lock()
+ defer f.attrMu.Unlock()
+ f.dataMu.Lock()
+ defer f.dataMu.Unlock()
+
+ if newSize <= f.attr.Size {
+ return nil
+ }
+
+ // Check if current seals allow growth.
+ if f.seals&linux.F_SEAL_GROW != 0 {
+ return syserror.EPERM
+ }
+
+ f.attr.Size = newSize
+
+ now := ktime.NowFromContext(ctx)
+ f.attr.ModificationTime = now
+ f.attr.StatusChangeTime = now
+
+ return nil
+}
+
// AddLink implements fs.InodeOperations.AddLink.
func (f *fileInodeOperations) AddLink() {
f.attrMu.Lock()
diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go
index 832914453..6ad5c5adb 100644
--- a/pkg/sentry/fs/tmpfs/tmpfs.go
+++ b/pkg/sentry/fs/tmpfs/tmpfs.go
@@ -242,11 +242,16 @@ func (d *Dir) Rename(ctx context.Context, oldParent *fs.Inode, oldName string, n
return rename(ctx, oldParent, oldName, newParent, newName, replacement)
}
-// StatFS implments fs.InodeOperations.StatFS.
+// StatFS implements fs.InodeOperations.StatFS.
func (*Dir) StatFS(context.Context) (fs.Info, error) {
return fsInfo, nil
}
+// Allocate implements fs.InodeOperations.Allocate.
+func (d *Dir) Allocate(ctx context.Context, node *fs.Inode, offset, length int64) error {
+ return d.ramfsDir.Allocate(ctx, node, offset, length)
+}
+
// Symlink is a symlink.
//
// +stateify savable
@@ -281,6 +286,7 @@ func (s *Symlink) StatFS(context.Context) (fs.Info, error) {
type Socket struct {
ramfs.Socket
fsutil.InodeNotTruncatable `state:"nosave"`
+ fsutil.InodeNotAllocatable `state:"nosave"`
}
// NewSocket returns a new socket with the provided permissions.
diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go
index 0fc777e67..8dc40e1f2 100644
--- a/pkg/sentry/fs/tty/dir.go
+++ b/pkg/sentry/fs/tty/dir.go
@@ -53,13 +53,14 @@ import (
// +stateify savable
type dirInodeOperations struct {
fsutil.InodeGenericChecker `state:"nosave"`
+ fsutil.InodeIsDirAllocate `state:"nosave"`
+ fsutil.InodeIsDirTruncate `state:"nosave"`
fsutil.InodeNoExtendedAttributes `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
fsutil.InodeNotMappable `state:"nosave"`
fsutil.InodeNotRenameable `state:"nosave"`
- fsutil.InodeNotSymlink `state:"nosave"`
fsutil.InodeNotSocket `state:"nosave"`
- fsutil.InodeNotTruncatable `state:"nosave"`
+ fsutil.InodeNotSymlink `state:"nosave"`
fsutil.InodeVirtual `state:"nosave"`
fsutil.InodeSimpleAttributes
diff --git a/pkg/sentry/kernel/pipe/node.go b/pkg/sentry/kernel/pipe/node.go
index 99188dddf..7c3739360 100644
--- a/pkg/sentry/kernel/pipe/node.go
+++ b/pkg/sentry/kernel/pipe/node.go
@@ -191,3 +191,7 @@ func (*inodeOperations) newHandleLocked(wakeupChan *chan struct{}) {
*wakeupChan = nil
}
}
+
+func (*inodeOperations) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error {
+ return syserror.EPIPE
+}
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index 893322647..1764bb4b6 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -1900,9 +1900,9 @@ func Renameat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
}
// Fallocate implements linux system call fallocate(2).
-// (well, not really, but at least we return the expected error codes)
func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := kdefs.FD(args[0].Int())
+ mode := args[1].Int64()
offset := args[2].Int64()
length := args[3].Int64()
@@ -1915,8 +1915,42 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
if offset < 0 || length <= 0 {
return 0, nil, syserror.EINVAL
}
+ if mode != 0 {
+ t.Kernel().EmitUnimplementedEvent(t)
+ return 0, nil, syserror.ENOTSUP
+ }
+ if !file.Flags().Write {
+ return 0, nil, syserror.EBADF
+ }
+ if fs.IsPipe(file.Dirent.Inode.StableAttr) {
+ return 0, nil, syserror.ESPIPE
+ }
+ if fs.IsDir(file.Dirent.Inode.StableAttr) {
+ return 0, nil, syserror.EISDIR
+ }
+ if !fs.IsRegular(file.Dirent.Inode.StableAttr) {
+ return 0, nil, syserror.ENODEV
+ }
+ size := offset + length
+ if size < 0 {
+ return 0, nil, syserror.EFBIG
+ }
+ if uint64(size) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur {
+ t.SendSignal(&arch.SignalInfo{
+ Signo: int32(syscall.SIGXFSZ),
+ Code: arch.SignalInfoUser,
+ })
+ return 0, nil, syserror.EFBIG
+ }
+
+ if err := file.Dirent.Inode.Allocate(t, file.Dirent, offset, length); err != nil {
+ return 0, nil, err
+ }
+
+ // File length modified, generate notification.
+ file.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
- return 0, nil, syserror.EOPNOTSUPP
+ return 0, nil, nil
}
// Flock implements linux syscall flock(2).
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go
index c1b33c551..c369e4d64 100644
--- a/runsc/boot/compat.go
+++ b/runsc/boot/compat.go
@@ -99,7 +99,7 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) {
// args: cmd, ...
tr = newArgsTracker(0)
- case syscall.SYS_IOCTL, syscall.SYS_EPOLL_CTL, syscall.SYS_SHMCTL, syscall.SYS_FUTEX:
+ case syscall.SYS_IOCTL, syscall.SYS_EPOLL_CTL, syscall.SYS_SHMCTL, syscall.SYS_FUTEX, syscall.SYS_FALLOCATE:
// args: fd/addr, cmd, ...
tr = newArgsTracker(1)
diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go
index a1ad49fb2..4faab2946 100644
--- a/runsc/fsgofer/filter/config.go
+++ b/runsc/fsgofer/filter/config.go
@@ -62,8 +62,14 @@ var allowedSyscalls = seccomp.SyscallRules{
},
syscall.SYS_EXIT: {},
syscall.SYS_EXIT_GROUP: {},
- syscall.SYS_FCHMOD: {},
- syscall.SYS_FCHOWNAT: {},
+ syscall.SYS_FALLOCATE: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(0),
+ },
+ },
+ syscall.SYS_FCHMOD: {},
+ syscall.SYS_FCHOWNAT: {},
syscall.SYS_FCNTL: []seccomp.Rule{
{
seccomp.AllowAny{},
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 3a0806837..b185015b6 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -731,6 +731,18 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
return err
}
+// Allocate implements p9.File.
+func (l *localFile) Allocate(mode p9.AllocateMode, offset, length uint64) error {
+ if !l.isOpen() {
+ return syscall.EBADF
+ }
+
+ if err := syscall.Fallocate(l.file.FD(), mode.ToLinux(), int64(offset), int64(length)); err != nil {
+ return extractErrno(err)
+ }
+ return nil
+}
+
// Rename implements p9.File; this should never be called.
func (l *localFile) Rename(p9.File, string) error {
panic("rename called directly")
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index d99733fc9..7ff4e4883 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -649,6 +649,8 @@ cc_binary(
srcs = ["fallocate.cc"],
linkstatic = 1,
deps = [
+ ":file_base",
+ "//test/util:cleanup",
"//test/util:file_descriptor",
"//test/util:temp_path",
"//test/util:test_main",
diff --git a/test/syscalls/linux/fallocate.cc b/test/syscalls/linux/fallocate.cc
index 61b8acc7a..1c3d00287 100644
--- a/test/syscalls/linux/fallocate.cc
+++ b/test/syscalls/linux/fallocate.cc
@@ -12,45 +12,130 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <errno.h>
#include <fcntl.h>
+#include <signal.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <syscall.h>
+#include <time.h>
#include <unistd.h>
#include "gtest/gtest.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/cleanup.h"
#include "test/util/file_descriptor.h"
#include "test/util/temp_path.h"
#include "test/util/test_util.h"
namespace gvisor {
namespace testing {
-
namespace {
-// These tests are very rudimentary because fallocate is not
-// implemented. We just want to make sure the expected error codes are
-// returned.
+int fallocate(int fd, int mode, off_t offset, off_t len) {
+ return syscall(__NR_fallocate, fd, mode, offset, len);
+}
+
+class AllocateTest : public FileTest {
+ void SetUp() override { FileTest::SetUp(); }
+};
+
+TEST_F(AllocateTest, Fallocate) {
+ // Check that it starts at size zero.
+ struct stat buf;
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 0);
+
+ // Grow to ten bytes.
+ EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, 10), SyscallSucceeds());
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 10);
-TEST(FallocateTest, NotImplemented) {
- auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
- FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_path.path(), O_RDWR));
+ // Allocate to a smaller size should be noop.
+ EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, 5), SyscallSucceeds());
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 10);
- // Test that a completely unassigned fallocate mode returns EOPNOTSUPP.
- ASSERT_THAT(fallocate(fd.get(), 0x80, 0, 32768),
- SyscallFailsWithErrno(EOPNOTSUPP));
+ // Grow again.
+ EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, 20), SyscallSucceeds());
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 20);
+
+ // Grow with offset.
+ EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 10, 20), SyscallSucceeds());
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 30);
+
+ // Grow with offset beyond EOF.
+ EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 39, 1), SyscallSucceeds());
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 40);
}
-TEST(FallocateTest, BadOffset) {
- auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
- FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_path.path(), O_RDWR));
- ASSERT_THAT(fallocate(fd.get(), 0, -1, 32768), SyscallFailsWithErrno(EINVAL));
+TEST_F(AllocateTest, FallocateInvalid) {
+ // Invalid FD
+ EXPECT_THAT(fallocate(-1, 0, 0, 10), SyscallFailsWithErrno(EBADF));
+
+ // Negative offset and size.
+ EXPECT_THAT(fallocate(test_file_fd_.get(), 0, -1, 10),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, -1),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(fallocate(test_file_fd_.get(), 0, -1, -1),
+ SyscallFailsWithErrno(EINVAL));
}
-TEST(FallocateTest, BadLength) {
- auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
- FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_path.path(), O_RDWR));
- ASSERT_THAT(fallocate(fd.get(), 0, 0, -1), SyscallFailsWithErrno(EINVAL));
+TEST_F(AllocateTest, FallocateReadonly) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+ EXPECT_THAT(fallocate(fd.get(), 0, 0, 10), SyscallFailsWithErrno(EBADF));
}
-} // namespace
+TEST_F(AllocateTest, FallocatePipe) {
+ int pipes[2];
+ EXPECT_THAT(pipe(pipes), SyscallSucceeds());
+ auto cleanup = Cleanup([&pipes] {
+ EXPECT_THAT(close(pipes[0]), SyscallSucceeds());
+ EXPECT_THAT(close(pipes[1]), SyscallSucceeds());
+ });
+
+ EXPECT_THAT(fallocate(pipes[1], 0, 0, 10), SyscallFailsWithErrno(ESPIPE));
+}
+
+TEST_F(AllocateTest, FallocateChar) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDWR));
+ EXPECT_THAT(fallocate(fd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+}
+
+TEST_F(AllocateTest, FallocateRlimit) {
+ // Get the current rlimit and restore after test run.
+ struct rlimit initial_lim;
+ ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+ auto cleanup = Cleanup([&initial_lim] {
+ EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+ });
+
+ // Try growing past the file size limit.
+ sigset_t new_mask;
+ sigemptyset(&new_mask);
+ sigaddset(&new_mask, SIGXFSZ);
+ sigprocmask(SIG_BLOCK, &new_mask, nullptr);
+ struct rlimit setlim = {};
+ setlim.rlim_cur = 1024;
+ setlim.rlim_max = RLIM_INFINITY;
+ ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds());
+
+ EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, 1025),
+ SyscallFailsWithErrno(EFBIG));
+
+ struct timespec timelimit = {};
+ timelimit.tv_sec = 10;
+ EXPECT_EQ(sigtimedwait(&new_mask, nullptr, &timelimit), SIGXFSZ);
+ ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds());
+}
+
+} // namespace
} // namespace testing
} // namespace gvisor