summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/vfs/filesystem.go
blob: c43dcff3d422ec83bde7fae68bf60adc80bb6d01 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package vfs

import (
	"sync/atomic"

	"gvisor.dev/gvisor/pkg/abi/linux"
	"gvisor.dev/gvisor/pkg/context"
	"gvisor.dev/gvisor/pkg/fspath"
	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)

// A Filesystem is a tree of nodes represented by Dentries, which forms part of
// a VirtualFilesystem.
//
// Filesystems are reference-counted. Unless otherwise specified, all
// Filesystem methods require that a reference is held.
//
// Filesystem is analogous to Linux's struct super_block.
//
// +stateify savable
type Filesystem struct {
	// refs is the reference count. refs is accessed using atomic memory
	// operations.
	refs int64

	// vfs is the VirtualFilesystem that uses this Filesystem. vfs is
	// immutable.
	vfs *VirtualFilesystem

	// impl is the FilesystemImpl associated with this Filesystem. impl is
	// immutable. This should be the last field in Dentry.
	impl FilesystemImpl
}

// Init must be called before first use of fs.
func (fs *Filesystem) Init(vfsObj *VirtualFilesystem, impl FilesystemImpl) {
	fs.refs = 1
	fs.vfs = vfsObj
	fs.impl = impl
	vfsObj.filesystemsMu.Lock()
	vfsObj.filesystems[fs] = struct{}{}
	vfsObj.filesystemsMu.Unlock()
}

// VirtualFilesystem returns the containing VirtualFilesystem.
func (fs *Filesystem) VirtualFilesystem() *VirtualFilesystem {
	return fs.vfs
}

// Impl returns the FilesystemImpl associated with fs.
func (fs *Filesystem) Impl() FilesystemImpl {
	return fs.impl
}

// IncRef increments fs' reference count.
func (fs *Filesystem) IncRef() {
	if atomic.AddInt64(&fs.refs, 1) <= 1 {
		panic("Filesystem.IncRef() called without holding a reference")
	}
}

// TryIncRef increments fs' reference count and returns true. If fs' reference
// count is zero, TryIncRef does nothing and returns false.
//
// TryIncRef does not require that a reference is held on fs.
func (fs *Filesystem) TryIncRef() bool {
	for {
		refs := atomic.LoadInt64(&fs.refs)
		if refs <= 0 {
			return false
		}
		if atomic.CompareAndSwapInt64(&fs.refs, refs, refs+1) {
			return true
		}
	}
}

// DecRef decrements fs' reference count.
func (fs *Filesystem) DecRef() {
	if refs := atomic.AddInt64(&fs.refs, -1); refs == 0 {
		fs.vfs.filesystemsMu.Lock()
		delete(fs.vfs.filesystems, fs)
		fs.vfs.filesystemsMu.Unlock()
		fs.impl.Release()
	} else if refs < 0 {
		panic("Filesystem.decRef() called without holding a reference")
	}
}

// FilesystemImpl contains implementation details for a Filesystem.
// Implementations of FilesystemImpl should contain their associated Filesystem
// by value as their first field.
//
// All methods that take a ResolvingPath must resolve the path before
// performing any other checks, including rejection of the operation if not
// supported by the FilesystemImpl. This is because the final FilesystemImpl
// (responsible for actually implementing the operation) isn't known until path
// resolution is complete.
//
// Unless otherwise specified, FilesystemImpl methods are responsible for
// performing permission checks. In many cases, vfs package functions in
// permissions.go may be used to help perform these checks.
//
// When multiple specified error conditions apply to a given method call, the
// implementation may return any applicable errno unless otherwise specified,
// but returning the earliest error specified is preferable to maximize
// compatibility with Linux.
//
// All methods may return errors not specified, notably including:
//
// - ENOENT if a required path component does not exist.
//
// - ENOTDIR if an intermediate path component is not a directory.
//
// - Errors from vfs-package functions (ResolvingPath.Resolve*(),
// Mount.CheckBeginWrite(), permission-checking functions, etc.)
//
// For all methods that take or return linux.Statx, Statx.Uid and Statx.Gid
// should be interpreted as IDs in the root UserNamespace (i.e. as auth.KUID
// and auth.KGID respectively).
//
// FilesystemImpl combines elements of Linux's struct super_operations and
// struct inode_operations, for reasons described in the documentation for
// Dentry.
type FilesystemImpl interface {
	// Release is called when the associated Filesystem reaches zero
	// references.
	Release()

	// Sync "causes all pending modifications to filesystem metadata and cached
	// file data to be written to the underlying [filesystem]", as by syncfs(2).
	Sync(ctx context.Context) error

	// AccessAt checks whether a user with creds can access the file at rp.
	AccessAt(ctx context.Context, rp *ResolvingPath, creds *auth.Credentials, ats AccessTypes) error

	// GetDentryAt returns a Dentry representing the file at rp. A reference is
	// taken on the returned Dentry.
	//
	// GetDentryAt does not correspond directly to a Linux syscall; it is used
	// in the implementation of:
	//
	// - Syscalls that need to resolve two paths: link(), linkat().
	//
	// - Syscalls that need to refer to a filesystem position outside the
	// context of a file description: chdir(), fchdir(), chroot(), mount(),
	// umount().
	GetDentryAt(ctx context.Context, rp *ResolvingPath, opts GetDentryOptions) (*Dentry, error)

	// GetParentDentryAt returns a Dentry representing the directory at the
	// second-to-last path component in rp. (Note that, despite the name, this
	// is not necessarily the parent directory of the file at rp, since the
	// last path component in rp may be "." or "..".) A reference is taken on
	// the returned Dentry.
	//
	// GetParentDentryAt does not correspond directly to a Linux syscall; it is
	// used in the implementation of the rename() family of syscalls, which
	// must resolve the parent directories of two paths.
	//
	// Preconditions: !rp.Done().
	//
	// Postconditions: If GetParentDentryAt returns a nil error, then
	// rp.Final(). If GetParentDentryAt returns an error returned by
	// ResolvingPath.Resolve*(), then !rp.Done().
	GetParentDentryAt(ctx context.Context, rp *ResolvingPath) (*Dentry, error)

	// LinkAt creates a hard link at rp representing the same file as vd. It
	// does not take ownership of references on vd.
	//
	// Errors:
	//
	// - If the last path component in rp is "." or "..", LinkAt returns
	// EEXIST.
	//
	// - If a file already exists at rp, LinkAt returns EEXIST.
	//
	// - If rp.MustBeDir(), LinkAt returns ENOENT.
	//
	// - If the directory in which the link would be created has been removed
	// by RmdirAt or RenameAt, LinkAt returns ENOENT.
	//
	// - If rp.Mount != vd.Mount(), LinkAt returns EXDEV.
	//
	// - If vd represents a directory, LinkAt returns EPERM.
	//
	// - If vd represents a file for which all existing links have been
	// removed, or a file created by open(O_TMPFILE|O_EXCL), LinkAt returns
	// ENOENT. Equivalently, if vd represents a file with a link count of 0 not
	// created by open(O_TMPFILE) without O_EXCL, LinkAt returns ENOENT.
	//
	// Preconditions: !rp.Done(). For the final path component in rp,
	// !rp.ShouldFollowSymlink().
	//
	// Postconditions: If LinkAt returns an error returned by
	// ResolvingPath.Resolve*(), then !rp.Done().
	LinkAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry) error

	// MkdirAt creates a directory at rp.
	//
	// Errors:
	//
	// - If the last path component in rp is "." or "..", MkdirAt returns
	// EEXIST.
	//
	// - If a file already exists at rp, MkdirAt returns EEXIST.
	//
	// - If the directory in which the new directory would be created has been
	// removed by RmdirAt or RenameAt, MkdirAt returns ENOENT.
	//
	// Preconditions: !rp.Done(). For the final path component in rp,
	// !rp.ShouldFollowSymlink().
	//
	// Postconditions: If MkdirAt returns an error returned by
	// ResolvingPath.Resolve*(), then !rp.Done().
	MkdirAt(ctx context.Context, rp *ResolvingPath, opts MkdirOptions) error

	// MknodAt creates a regular file, device special file, or named pipe at
	// rp.
	//
	// Errors:
	//
	// - If the last path component in rp is "." or "..", MknodAt returns
	// EEXIST.
	//
	// - If a file already exists at rp, MknodAt returns EEXIST.
	//
	// - If rp.MustBeDir(), MknodAt returns ENOENT.
	//
	// - If the directory in which the file would be created has been removed
	// by RmdirAt or RenameAt, MknodAt returns ENOENT.
	//
	// Preconditions: !rp.Done(). For the final path component in rp,
	// !rp.ShouldFollowSymlink().
	//
	// Postconditions: If MknodAt returns an error returned by
	// ResolvingPath.Resolve*(), then !rp.Done().
	MknodAt(ctx context.Context, rp *ResolvingPath, opts MknodOptions) error

	// OpenAt returns an FileDescription providing access to the file at rp. A
	// reference is taken on the returned FileDescription.
	//
	// Errors:
	//
	// - If opts.Flags specifies O_TMPFILE and this feature is unsupported by
	// the implementation, OpenAt returns EOPNOTSUPP. (All other unsupported
	// features are silently ignored, consistently with Linux's open*(2).)
	OpenAt(ctx context.Context, rp *ResolvingPath, opts OpenOptions) (*FileDescription, error)

	// ReadlinkAt returns the target of the symbolic link at rp.
	//
	// Errors:
	//
	// - If the file at rp is not a symbolic link, ReadlinkAt returns EINVAL.
	ReadlinkAt(ctx context.Context, rp *ResolvingPath) (string, error)

	// RenameAt renames the file named oldName in directory oldParentVD to rp.
	// It does not take ownership of references on oldParentVD.
	//
	// Errors [1]:
	//
	// - If opts.Flags specifies unsupported options, RenameAt returns EINVAL.
	//
	// - If the last path component in rp is "." or "..", and opts.Flags
	// contains RENAME_NOREPLACE, RenameAt returns EEXIST.
	//
	// - If the last path component in rp is "." or "..", and opts.Flags does
	// not contain RENAME_NOREPLACE, RenameAt returns EBUSY.
	//
	// - If rp.Mount != oldParentVD.Mount(), RenameAt returns EXDEV.
	//
	// - If the renamed file is not a directory, and opts.MustBeDir is true,
	// RenameAt returns ENOTDIR.
	//
	// - If renaming would replace an existing file and opts.Flags contains
	// RENAME_NOREPLACE, RenameAt returns EEXIST.
	//
	// - If there is no existing file at rp and opts.Flags contains
	// RENAME_EXCHANGE, RenameAt returns ENOENT.
	//
	// - If there is an existing non-directory file at rp, and rp.MustBeDir()
	// is true, RenameAt returns ENOTDIR.
	//
	// - If the renamed file is not a directory, opts.Flags does not contain
	// RENAME_EXCHANGE, and rp.MustBeDir() is true, RenameAt returns ENOTDIR.
	// (This check is not subsumed by the check for directory replacement below
	// since it applies even if there is no file to replace.)
	//
	// - If the renamed file is a directory, and the new parent directory of
	// the renamed file is either the renamed directory or a descendant
	// subdirectory of the renamed directory, RenameAt returns EINVAL.
	//
	// - If renaming would exchange the renamed file with an ancestor directory
	// of the renamed file, RenameAt returns EINVAL.
	//
	// - If renaming would replace an ancestor directory of the renamed file,
	// RenameAt returns ENOTEMPTY. (This check would be subsumed by the
	// non-empty directory check below; however, this check takes place before
	// the self-rename check.)
	//
	// - If the renamed file would replace or exchange with itself (i.e. the
	// source and destination paths resolve to the same file), RenameAt returns
	// nil, skipping the checks described below.
	//
	// - If the source or destination directory is not writable by the provider
	// of rp.Credentials(), RenameAt returns EACCES.
	//
	// - If the renamed file is a directory, and renaming would replace a
	// non-directory file, RenameAt returns ENOTDIR.
	//
	// - If the renamed file is not a directory, and renaming would replace a
	// directory, RenameAt returns EISDIR.
	//
	// - If the new parent directory of the renamed file has been removed by
	// RmdirAt or a preceding call to RenameAt, RenameAt returns ENOENT.
	//
	// - If the renamed file is a directory, it is not writable by the
	// provider of rp.Credentials(), and the source and destination parent
	// directories are different, RenameAt returns EACCES. (This is nominally
	// required to change the ".." entry in the renamed directory.)
	//
	// - If renaming would replace a non-empty directory, RenameAt returns
	// ENOTEMPTY.
	//
	// Preconditions: !rp.Done(). For the final path component in rp,
	// !rp.ShouldFollowSymlink(). oldName is not "." or "..".
	//
	// Postconditions: If RenameAt returns an error returned by
	// ResolvingPath.Resolve*(), then !rp.Done().
	//
	// [1] "The worst of all namespace operations - renaming directory.
	// "Perverted" doesn't even start to describe it. Somebody in UCB had a
	// heck of a trip..." - fs/namei.c:vfs_rename()
	RenameAt(ctx context.Context, rp *ResolvingPath, oldParentVD VirtualDentry, oldName string, opts RenameOptions) error

	// RmdirAt removes the directory at rp.
	//
	// Errors:
	//
	// - If the last path component in rp is ".", RmdirAt returns EINVAL.
	//
	// - If the last path component in rp is "..", RmdirAt returns ENOTEMPTY.
	//
	// - If no file exists at rp, RmdirAt returns ENOENT.
	//
	// - If the file at rp exists but is not a directory, RmdirAt returns
	// ENOTDIR.
	//
	// Preconditions: !rp.Done(). For the final path component in rp,
	// !rp.ShouldFollowSymlink().
	//
	// Postconditions: If RmdirAt returns an error returned by
	// ResolvingPath.Resolve*(), then !rp.Done().
	RmdirAt(ctx context.Context, rp *ResolvingPath) error

	// SetStatAt updates metadata for the file at the given path.
	//
	// Errors:
	//
	// - If opts specifies unsupported options, SetStatAt returns EINVAL.
	SetStatAt(ctx context.Context, rp *ResolvingPath, opts SetStatOptions) error

	// StatAt returns metadata for the file at rp.
	StatAt(ctx context.Context, rp *ResolvingPath, opts StatOptions) (linux.Statx, error)

	// StatFSAt returns metadata for the filesystem containing the file at rp.
	// (This method takes a path because a FilesystemImpl may consist of any
	// number of constituent filesystems.)
	StatFSAt(ctx context.Context, rp *ResolvingPath) (linux.Statfs, error)

	// SymlinkAt creates a symbolic link at rp referring to the given target.
	//
	// Errors:
	//
	// - If the last path component in rp is "." or "..", SymlinkAt returns
	// EEXIST.
	//
	// - If a file already exists at rp, SymlinkAt returns EEXIST.
	//
	// - If rp.MustBeDir(), SymlinkAt returns ENOENT.
	//
	// - If the directory in which the symbolic link would be created has been
	// removed by RmdirAt or RenameAt, SymlinkAt returns ENOENT.
	//
	// Preconditions: !rp.Done(). For the final path component in rp,
	// !rp.ShouldFollowSymlink().
	//
	// Postconditions: If SymlinkAt returns an error returned by
	// ResolvingPath.Resolve*(), then !rp.Done().
	SymlinkAt(ctx context.Context, rp *ResolvingPath, target string) error

	// UnlinkAt removes the file at rp.
	//
	// Errors:
	//
	// - If the last path component in rp is "." or "..", UnlinkAt returns
	// EISDIR.
	//
	// - If no file exists at rp, UnlinkAt returns ENOENT.
	//
	// - If rp.MustBeDir(), and the file at rp exists and is not a directory,
	// UnlinkAt returns ENOTDIR.
	//
	// - If the file at rp exists but is a directory, UnlinkAt returns EISDIR.
	//
	// Preconditions: !rp.Done(). For the final path component in rp,
	// !rp.ShouldFollowSymlink().
	//
	// Postconditions: If UnlinkAt returns an error returned by
	// ResolvingPath.Resolve*(), then !rp.Done().
	UnlinkAt(ctx context.Context, rp *ResolvingPath) error

	// ListxattrAt returns all extended attribute names for the file at rp.
	//
	// Errors:
	//
	// - If extended attributes are not supported by the filesystem,
	// ListxattrAt returns nil. (See FileDescription.Listxattr for an
	// explanation.)
	ListxattrAt(ctx context.Context, rp *ResolvingPath) ([]string, error)

	// GetxattrAt returns the value associated with the given extended
	// attribute for the file at rp.
	//
	// Errors:
	//
	// - If extended attributes are not supported by the filesystem, GetxattrAt
	// returns ENOTSUP.
	GetxattrAt(ctx context.Context, rp *ResolvingPath, name string) (string, error)

	// SetxattrAt changes the value associated with the given extended
	// attribute for the file at rp.
	//
	// Errors:
	//
	// - If extended attributes are not supported by the filesystem, SetxattrAt
	// returns ENOTSUP.
	SetxattrAt(ctx context.Context, rp *ResolvingPath, opts SetxattrOptions) error

	// RemovexattrAt removes the given extended attribute from the file at rp.
	//
	// Errors:
	//
	// - If extended attributes are not supported by the filesystem,
	// RemovexattrAt returns ENOTSUP.
	RemovexattrAt(ctx context.Context, rp *ResolvingPath, name string) error

	// PrependPath prepends a path from vd to vd.Mount().Root() to b.
	//
	// If vfsroot.Ok(), it is the contextual VFS root; if it is encountered
	// before vd.Mount().Root(), PrependPath should stop prepending path
	// components and return a PrependPathAtVFSRootError.
	//
	// If traversal of vd.Dentry()'s ancestors encounters an independent
	// ("root") Dentry that is not vd.Mount().Root() (i.e. vd.Dentry() is not a
	// descendant of vd.Mount().Root()), PrependPath should stop prepending
	// path components and return a PrependPathAtNonMountRootError.
	//
	// Filesystems for which Dentries do not have meaningful paths may prepend
	// an arbitrary descriptive string to b and then return a
	// PrependPathSyntheticError.
	//
	// Most implementations can acquire the appropriate locks to ensure that
	// Dentry.Name() and Dentry.Parent() are fixed for vd.Dentry() and all of
	// its ancestors, then call GenericPrependPath.
	//
	// Preconditions: vd.Mount().Filesystem().Impl() == this FilesystemImpl.
	PrependPath(ctx context.Context, vfsroot, vd VirtualDentry, b *fspath.Builder) error

	// TODO: inotify_add_watch(); bind()
}

// PrependPathAtVFSRootError is returned by implementations of
// FilesystemImpl.PrependPath() when they encounter the contextual VFS root.
type PrependPathAtVFSRootError struct{}

// Error implements error.Error.
func (PrependPathAtVFSRootError) Error() string {
	return "vfs.FilesystemImpl.PrependPath() reached VFS root"
}

// PrependPathAtNonMountRootError is returned by implementations of
// FilesystemImpl.PrependPath() when they encounter an independent ancestor
// Dentry that is not the Mount root.
type PrependPathAtNonMountRootError struct{}

// Error implements error.Error.
func (PrependPathAtNonMountRootError) Error() string {
	return "vfs.FilesystemImpl.PrependPath() reached root other than Mount root"
}

// PrependPathSyntheticError is returned by implementations of
// FilesystemImpl.PrependPath() for which prepended names do not represent real
// paths.
type PrependPathSyntheticError struct{}

// Error implements error.Error.
func (PrependPathSyntheticError) Error() string {
	return "vfs.FilesystemImpl.PrependPath() prepended synthetic name"
}