From 163ab5e9bab4f14923433967656d20f169d0f904 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Thu, 18 Jul 2019 15:09:14 -0700 Subject: Sentry virtual filesystem, v2 Major differences from the current ("v1") sentry VFS: - Path resolution is Filesystem-driven (FilesystemImpl methods call vfs.ResolvingPath methods) rather than VFS-driven (fs package owns a Dirent tree and calls fs.InodeOperations methods to populate it). This drastically improves performance, primarily by reducing overhead from inefficient synchronization and indirection. It also makes it possible to implement remote filesystem protocols that translate FS system calls into single RPCs, rather than having to make (at least) one RPC per path component, significantly reducing the latency of remote filesystems (especially during cold starts and for uncacheable shared filesystems). - Mounts are correctly represented as a separate check based on contextual state (current mount) rather than direct replacement in a fs.Dirent tree. This makes it possible to support (non-recursive) bind mounts and mount namespaces. Included in this CL is fsimpl/memfs, an incomplete in-memory filesystem that exists primarily to demonstrate intended filesystem implementation patterns and for benchmarking: BenchmarkVFS1TmpfsStat/1-6 3000000 497 ns/op BenchmarkVFS1TmpfsStat/2-6 2000000 676 ns/op BenchmarkVFS1TmpfsStat/3-6 2000000 904 ns/op BenchmarkVFS1TmpfsStat/8-6 1000000 1944 ns/op BenchmarkVFS1TmpfsStat/64-6 100000 14067 ns/op BenchmarkVFS1TmpfsStat/100-6 50000 21700 ns/op BenchmarkVFS2MemfsStat/1-6 10000000 197 ns/op BenchmarkVFS2MemfsStat/2-6 5000000 233 ns/op BenchmarkVFS2MemfsStat/3-6 5000000 268 ns/op BenchmarkVFS2MemfsStat/8-6 3000000 477 ns/op BenchmarkVFS2MemfsStat/64-6 500000 2592 ns/op BenchmarkVFS2MemfsStat/100-6 300000 4045 ns/op BenchmarkVFS1TmpfsMountStat/1-6 2000000 679 ns/op BenchmarkVFS1TmpfsMountStat/2-6 2000000 912 ns/op BenchmarkVFS1TmpfsMountStat/3-6 1000000 1113 ns/op BenchmarkVFS1TmpfsMountStat/8-6 1000000 2118 ns/op BenchmarkVFS1TmpfsMountStat/64-6 100000 14251 ns/op BenchmarkVFS1TmpfsMountStat/100-6 100000 22397 ns/op BenchmarkVFS2MemfsMountStat/1-6 5000000 317 ns/op BenchmarkVFS2MemfsMountStat/2-6 5000000 361 ns/op BenchmarkVFS2MemfsMountStat/3-6 5000000 387 ns/op BenchmarkVFS2MemfsMountStat/8-6 3000000 582 ns/op BenchmarkVFS2MemfsMountStat/64-6 500000 2699 ns/op BenchmarkVFS2MemfsMountStat/100-6 300000 4133 ns/op From this we can infer that, on this machine: - Constant cost for tmpfs stat() is ~160ns in VFS2 and ~280ns in VFS1. - Per-path-component cost is ~35ns in VFS2 and ~215ns in VFS1, a difference of about 6x. - The cost of crossing a mount boundary is about 80ns in VFS2 (MemfsMountStat/1 does approximately the same amount of work as MemfsStat/2, except that it also crosses a mount boundary). This is an inescapable cost of the separate mount lookup needed to support bind mounts and mount namespaces. PiperOrigin-RevId: 258853946 --- pkg/abi/linux/file.go | 101 ++++++++++++++++++++++++++++++-------------------- pkg/abi/linux/fs.go | 9 +++++ 2 files changed, 70 insertions(+), 40 deletions(-) (limited to 'pkg/abi/linux') diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go index 285338e47..4b0ea33dc 100644 --- a/pkg/abi/linux/file.go +++ b/pkg/abi/linux/file.go @@ -24,25 +24,27 @@ import ( // Constants for open(2). const ( - O_ACCMODE = 00000003 - O_RDONLY = 00000000 - O_WRONLY = 00000001 - O_RDWR = 00000002 - O_CREAT = 00000100 - O_EXCL = 00000200 - O_NOCTTY = 00000400 - O_TRUNC = 00001000 - O_APPEND = 00002000 - O_NONBLOCK = 00004000 - O_DSYNC = 00010000 - O_ASYNC = 00020000 - O_DIRECT = 00040000 - O_LARGEFILE = 00100000 - O_DIRECTORY = 00200000 - O_NOFOLLOW = 00400000 - O_CLOEXEC = 02000000 - O_SYNC = 04000000 + O_ACCMODE = 000000003 + O_RDONLY = 000000000 + O_WRONLY = 000000001 + O_RDWR = 000000002 + O_CREAT = 000000100 + O_EXCL = 000000200 + O_NOCTTY = 000000400 + O_TRUNC = 000001000 + O_APPEND = 000002000 + O_NONBLOCK = 000004000 + O_DSYNC = 000010000 + O_ASYNC = 000020000 + O_DIRECT = 000040000 + O_LARGEFILE = 000100000 + O_DIRECTORY = 000200000 + O_NOFOLLOW = 000400000 + O_NOATIME = 001000000 + O_CLOEXEC = 002000000 + O_SYNC = 004000000 // __O_SYNC in Linux O_PATH = 010000000 + O_TMPFILE = 020000000 // __O_TMPFILE in Linux ) // Constants for fstatat(2). @@ -124,14 +126,23 @@ const ( // Values for mode_t. const ( - FileTypeMask = 0170000 - ModeSocket = 0140000 - ModeSymlink = 0120000 - ModeRegular = 0100000 - ModeBlockDevice = 060000 - ModeDirectory = 040000 - ModeCharacterDevice = 020000 - ModeNamedPipe = 010000 + S_IFMT = 0170000 + S_IFSOCK = 0140000 + S_IFLNK = 0120000 + S_IFREG = 0100000 + S_IFBLK = 060000 + S_IFDIR = 040000 + S_IFCHR = 020000 + S_IFIFO = 010000 + + FileTypeMask = S_IFMT + ModeSocket = S_IFSOCK + ModeSymlink = S_IFLNK + ModeRegular = S_IFREG + ModeBlockDevice = S_IFBLK + ModeDirectory = S_IFDIR + ModeCharacterDevice = S_IFCHR + ModeNamedPipe = S_IFIFO ModeSetUID = 04000 ModeSetGID = 02000 @@ -152,6 +163,19 @@ const ( PermissionsMask = 0777 ) +// Values for linux_dirent64.d_type. +const ( + DT_UNKNOWN = 0 + DT_FIFO = 1 + DT_CHR = 2 + DT_DIR = 4 + DT_BLK = 6 + DT_REG = 8 + DT_LNK = 10 + DT_SOCK = 12 + DT_WHT = 14 +) + // Values for preadv2/pwritev2. const ( RWF_HIPRI = 0x00000001 @@ -179,19 +203,6 @@ type Stat struct { _ [3]int64 } -// File types. -const ( - DT_BLK = 0x6 - DT_CHR = 0x2 - DT_DIR = 0x4 - DT_FIFO = 0x1 - DT_LNK = 0xa - DT_REG = 0x8 - DT_SOCK = 0xc - DT_UNKNOWN = 0x0 - DT_WHT = 0xe -) - // SizeOfStat is the size of a Stat struct. var SizeOfStat = binary.Size(Stat{}) @@ -222,6 +233,17 @@ const ( STATX__RESERVED = 0x80000000 ) +// Bitmasks for Statx.Attributes and Statx.AttributesMask, from +// include/uapi/linux/stat.h. +const ( + STATX_ATTR_COMPRESSED = 0x00000004 + STATX_ATTR_IMMUTABLE = 0x00000010 + STATX_ATTR_APPEND = 0x00000020 + STATX_ATTR_NODUMP = 0x00000040 + STATX_ATTR_ENCRYPTED = 0x00000800 + STATX_ATTR_AUTOMOUNT = 0x00001000 +) + // Statx represents struct statx. type Statx struct { Mask uint32 @@ -231,7 +253,6 @@ type Statx struct { UID uint32 GID uint32 Mode uint16 - _ uint16 Ino uint64 Size uint64 Blocks uint64 diff --git a/pkg/abi/linux/fs.go b/pkg/abi/linux/fs.go index 549e0fb93..b416e3472 100644 --- a/pkg/abi/linux/fs.go +++ b/pkg/abi/linux/fs.go @@ -77,6 +77,15 @@ type Statfs struct { Spare [4]uint64 } +// Whence argument to lseek(2), from include/uapi/linux/fs.h. +const ( + SEEK_SET = 0 + SEEK_CUR = 1 + SEEK_END = 2 + SEEK_DATA = 3 + SEEK_HOLE = 4 +) + // Sync_file_range flags, from include/uapi/linux/fs.h const ( SYNC_FILE_RANGE_WAIT_BEFORE = 1 -- cgit v1.2.3