1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
|
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fs
import (
"errors"
"gvisor.dev/gvisor/pkg/context"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
)
var (
// ErrResolveViaReadlink is a special error value returned by
// InodeOperations.Getlink() to indicate that a link should be
// resolved automatically by walking to the path returned by
// InodeOperations.Readlink().
ErrResolveViaReadlink = errors.New("link should be resolved via Readlink()")
)
// TimeSpec contains access and modification timestamps. If either ATimeOmit or
// MTimeOmit is true, then the corresponding timestamp should not be updated.
// If either ATimeSetSystemTime or MTimeSetSystemTime are set then the
// corresponding timestamp should be ignored and the time will be set to the
// current system time.
type TimeSpec struct {
ATime ktime.Time
ATimeOmit bool
ATimeSetSystemTime bool
MTime ktime.Time
MTimeOmit bool
MTimeSetSystemTime bool
}
// InodeOperations are operations on an Inode that diverge per file system.
//
// Objects that implement InodeOperations may cache file system "private"
// data that is useful for implementing these methods. In contrast, Inode
// contains state that is common to all Inodes; this state may be optionally
// used by InodeOperations. An object that implements InodeOperations may
// not take a reference on an Inode.
type InodeOperations interface {
// Release releases all private file system data held by this object.
// Once Release is called, this object is dead (no other methods will
// ever be called).
Release(context.Context)
// Lookup loads an Inode at name under dir into a Dirent. The name
// is a valid component path: it contains no "/"s nor is the empty
// string.
//
// Lookup may return one of:
//
// * A nil Dirent and a non-nil error. If the reason that Lookup failed
// was because the name does not exist under Inode, then must return
// syserror.ENOENT.
//
// * If name does not exist under dir and the file system wishes this
// fact to be cached, a non-nil Dirent containing a nil Inode and a
// nil error. This is a negative Dirent and must have exactly one
// reference (at-construction reference).
//
// * If name does exist under this dir, a non-nil Dirent containing a
// non-nil Inode, and a nil error. File systems that take extra
// references on this Dirent should implement DirentOperations.
Lookup(ctx context.Context, dir *Inode, name string) (*Dirent, error)
// Create creates an Inode at name under dir and returns a new File
// whose Dirent backs the new Inode. Implementations must ensure that
// name does not already exist. Create may return one of:
//
// * A nil File and a non-nil error.
//
// * A non-nil File and a nil error. File.Dirent will be a new Dirent,
// with a single reference held by File. File systems that take extra
// references on this Dirent should implement DirentOperations.
//
// The caller must ensure that this operation is permitted.
Create(ctx context.Context, dir *Inode, name string, flags FileFlags, perm FilePermissions) (*File, error)
// CreateDirectory creates a new directory under this dir.
// CreateDirectory should otherwise do the same as Create.
//
// The caller must ensure that this operation is permitted.
CreateDirectory(ctx context.Context, dir *Inode, name string, perm FilePermissions) error
// CreateLink creates a symbolic link under dir between newname
// and oldname. CreateLink should otherwise do the same as Create.
//
// The caller must ensure that this operation is permitted.
CreateLink(ctx context.Context, dir *Inode, oldname string, newname string) error
// CreateHardLink creates a hard link under dir between the target
// Inode and name.
//
// The caller must ensure this operation is permitted.
CreateHardLink(ctx context.Context, dir *Inode, target *Inode, name string) error
// CreateFifo creates a new named pipe under dir at name.
//
// The caller must ensure that this operation is permitted.
CreateFifo(ctx context.Context, dir *Inode, name string, perm FilePermissions) error
// Remove removes the given named non-directory under dir.
//
// The caller must ensure that this operation is permitted.
Remove(ctx context.Context, dir *Inode, name string) error
// RemoveDirectory removes the given named directory under dir.
//
// The caller must ensure that this operation is permitted.
//
// RemoveDirectory should check that the directory to be
// removed is empty.
RemoveDirectory(ctx context.Context, dir *Inode, name string) error
// Rename atomically renames oldName under oldParent to newName under
// newParent where oldParent and newParent are directories. inode is
// the Inode of this InodeOperations.
//
// If replacement is true, then newName already exists and this call
// will replace it with oldName.
//
// Implementations are responsible for rejecting renames that replace
// non-empty directories.
Rename(ctx context.Context, inode *Inode, oldParent *Inode, oldName string, newParent *Inode, newName string, replacement bool) error
// Bind binds a new socket under dir at the given name.
//
// The caller must ensure that this operation is permitted.
Bind(ctx context.Context, dir *Inode, name string, data transport.BoundEndpoint, perm FilePermissions) (*Dirent, error)
// BoundEndpoint returns the socket endpoint at path stored in
// or generated by an Inode.
//
// The path is only relevant for generated endpoint because stored
// endpoints already know their path. It is ok for the endpoint to
// hold onto their path because the only way to change a bind
// address is to rebind the socket.
//
// This is valid iff the type of the Inode is a Socket, which
// generally implies that this Inode was created via CreateSocket.
//
// If there is no socket endpoint available, nil will be returned.
BoundEndpoint(inode *Inode, path string) transport.BoundEndpoint
// GetFile returns a new open File backed by a Dirent and FileFlags.
//
// Special Inode types may block using ctx.Sleeper. RegularFiles,
// Directories, and Symlinks must not block (see doCopyUp).
//
// The returned File will uniquely back an application fd.
GetFile(ctx context.Context, d *Dirent, flags FileFlags) (*File, error)
// UnstableAttr returns the most up-to-date "unstable" attributes of
// an Inode, where "unstable" means that they change in response to
// file system events.
UnstableAttr(ctx context.Context, inode *Inode) (UnstableAttr, error)
// GetXattr retrieves the value of extended attribute specified by name.
// Inodes that do not support extended attributes return EOPNOTSUPP. Inodes
// that support extended attributes but don't have a value at name return
// ENODATA.
//
// If this is called through the getxattr(2) syscall, size indicates the
// size of the buffer that the application has allocated to hold the
// attribute value. If the value is larger than size, implementations may
// return ERANGE to indicate that the buffer is too small, but they are also
// free to ignore the hint entirely (i.e. the value returned may be larger
// than size). All size checking is done independently at the syscall layer.
GetXattr(ctx context.Context, inode *Inode, name string, size uint64) (string, error)
// SetXattr sets the value of extended attribute specified by name. Inodes
// that do not support extended attributes return EOPNOTSUPP.
SetXattr(ctx context.Context, inode *Inode, name, value string, flags uint32) error
// ListXattr returns the set of all extended attributes names that
// have values. Inodes that do not support extended attributes return
// EOPNOTSUPP.
//
// If this is called through the listxattr(2) syscall, size indicates the
// size of the buffer that the application has allocated to hold the
// attribute list. If the list would be larger than size, implementations may
// return ERANGE to indicate that the buffer is too small, but they are also
// free to ignore the hint entirely. All size checking is done independently
// at the syscall layer.
ListXattr(ctx context.Context, inode *Inode, size uint64) (map[string]struct{}, error)
// RemoveXattr removes an extended attribute specified by name. Inodes that
// do not support extended attributes return EOPNOTSUPP.
RemoveXattr(ctx context.Context, inode *Inode, name string) error
// Check determines whether an Inode can be accessed with the
// requested permission mask using the context (which gives access
// to Credentials and UserNamespace).
Check(ctx context.Context, inode *Inode, p PermMask) bool
// SetPermissions sets new permissions for an Inode. Returns false
// if it was not possible to set the new permissions.
//
// The caller must ensure that this operation is permitted.
SetPermissions(ctx context.Context, inode *Inode, f FilePermissions) bool
// SetOwner sets the ownership for this file.
//
// If either UID or GID are set to auth.NoID, its value will not be
// changed.
//
// The caller must ensure that this operation is permitted.
SetOwner(ctx context.Context, inode *Inode, owner FileOwner) error
// SetTimestamps sets the access and modification timestamps of an
// Inode according to the access and modification times in the TimeSpec.
//
// If either ATimeOmit or MTimeOmit is set, then the corresponding
// timestamp is not updated.
//
// If either ATimeSetSystemTime or MTimeSetSystemTime is true, that
// timestamp is set to the current time instead.
//
// The caller must ensure that this operation is permitted.
SetTimestamps(ctx context.Context, inode *Inode, ts TimeSpec) error
// Truncate changes the size of an Inode. Truncate should not check
// permissions internally, as it is used for both sys_truncate and
// sys_ftruncate.
//
// Implementations need not check that length >= 0.
Truncate(ctx context.Context, inode *Inode, size int64) error
// Allocate allows the caller to reserve disk space for the inode.
// It's equivalent to fallocate(2) with 'mode=0'.
Allocate(ctx context.Context, inode *Inode, offset int64, length int64) error
// WriteOut writes cached Inode state to a backing filesystem in a
// synchronous manner.
//
// File systems that do not cache metadata or data via an Inode
// implement WriteOut as a no-op. File systems that are entirely in
// memory also implement WriteOut as a no-op. Otherwise file systems
// call Inode.Sync to write back page cached data and cached metadata
// followed by syncing writeback handles.
//
// It derives from include/linux/fs.h:super_operations->write_inode.
WriteOut(ctx context.Context, inode *Inode) error
// Readlink reads the symlink path of an Inode.
//
// Readlink is permitted to return a different path depending on ctx,
// the request originator.
//
// The caller must ensure that this operation is permitted.
//
// Readlink should check that Inode is a symlink and its content is
// at least readable.
Readlink(ctx context.Context, inode *Inode) (string, error)
// Getlink resolves a symlink to a target *Dirent.
//
// Filesystems that can resolve the link by walking to the path returned
// by Readlink should return (nil, ErrResolveViaReadlink), which
// triggers link resolution via Realink and Lookup.
//
// Some links cannot be followed by Lookup. In this case, Getlink can
// return the Dirent of the link target. The caller holds a reference
// to the Dirent. Filesystems that return a non-nil *Dirent from Getlink
// cannot participate in an overlay because it is impossible for the
// overlay to ascertain whether or not the *Dirent should contain an
// overlayEntry.
//
// Any error returned from Getlink other than ErrResolveViaReadlink
// indicates the caller's inability to traverse this Inode as a link
// (e.g. syserror.ENOLINK indicates that the Inode is not a link,
// syscall.EPERM indicates that traversing the link is not allowed, etc).
Getlink(context.Context, *Inode) (*Dirent, error)
// Mappable returns a memmap.Mappable that provides memory mappings of the
// Inode's data. Mappable may return nil if this is not supported. The
// returned Mappable must remain valid until InodeOperations.Release is
// called.
Mappable(*Inode) memmap.Mappable
// The below methods require cleanup.
// AddLink increments the hard link count of an Inode.
//
// Remove in favor of Inode.IncLink.
AddLink()
// DropLink decrements the hard link count of an Inode.
//
// Remove in favor of Inode.DecLink.
DropLink()
// NotifyStatusChange sets the status change time to the current time.
//
// Remove in favor of updating the Inode's cached status change time.
NotifyStatusChange(ctx context.Context)
// IsVirtual indicates whether or not this corresponds to a virtual
// resource.
//
// If IsVirtual returns true, then caching will be disabled for this
// node, and fs.Dirent.Freeze() will not stop operations on the node.
//
// Remove in favor of freezing specific mounts.
IsVirtual() bool
// StatFS returns a filesystem Info implementation or an error. If
// the filesystem does not support this operation (maybe in the future
// it will), then ENOSYS should be returned.
StatFS(context.Context) (Info, error)
}
|