diff options
author | Brielle Broder <bbroder@google.com> | 2018-08-10 14:31:56 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-08-10 14:33:20 -0700 |
commit | 4ececd8e8d1124cdd0884480bda5fabd2b48aa8d (patch) | |
tree | 5a4d6673db4dad19df91a7901d4e6387e09d83ab | |
parent | d5b702b64f05a200ed94f0cd977d3f84dae01162 (diff) |
Enable checkpoint/restore in cases of UDS use.
Previously, processes which used file-system Unix Domain Sockets could not be
checkpoint-ed in runsc because the sockets were saved with their inode
numbers which do not necessarily remain the same upon restore. Now,
the sockets are also saved with their paths so that the new inodes
can be determined for the sockets based on these paths after restoring.
Tests for cases with UDS use are included. Test cleanup to come.
PiperOrigin-RevId: 208268781
Change-Id: Ieaa5d5d9a64914ca105cae199fd8492710b1d7ec
-rw-r--r-- | pkg/sentry/fs/dirent.go | 53 | ||||
-rw-r--r-- | pkg/sentry/fs/fsutil/inode.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/gofer/gofer_test.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/gofer/path.go | 37 | ||||
-rw-r--r-- | pkg/sentry/fs/gofer/session.go | 149 | ||||
-rw-r--r-- | pkg/sentry/fs/gofer/session_state.go | 26 | ||||
-rw-r--r-- | pkg/sentry/fs/gofer/socket.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/host/inode.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/inode.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/inode_operations.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/inode_overlay.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/ramfs/dir.go | 15 | ||||
-rw-r--r-- | pkg/sentry/fs/ramfs/ramfs.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/tty/dir.go | 4 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/unix.go | 4 | ||||
-rw-r--r-- | runsc/container/BUILD | 10 | ||||
-rw-r--r-- | runsc/container/container_test.go | 228 | ||||
-rw-r--r-- | runsc/container/uds_test_app.go | 83 |
18 files changed, 541 insertions, 92 deletions
diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index 4658d044f..821cc5789 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -385,6 +385,19 @@ func (d *Dirent) fullName(root *Dirent) (string, bool) { return s, reachable } +// MountRoot finds and returns the mount-root for a given dirent. +func (d *Dirent) MountRoot() *Dirent { + renameMu.RLock() + defer renameMu.RUnlock() + + mountRoot := d + for !mountRoot.mounted && mountRoot.parent != nil { + mountRoot = mountRoot.parent + } + mountRoot.IncRef() + return mountRoot +} + func (d *Dirent) freeze() { if d.frozen { // Already frozen. @@ -665,6 +678,16 @@ func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags Fi } child := file.Dirent + d.finishCreate(child, name) + + // Return the reference and the new file. When the last reference to + // the file is dropped, file.Dirent may no longer be cached. + return file, nil +} + +// finishCreate validates the created file, adds it as a child of this dirent, +// and notifies any watchers. +func (d *Dirent) finishCreate(child *Dirent, name string) { // Sanity check c, its name must be consistent. if child.name != name { panic(fmt.Sprintf("create from %q to %q returned unexpected name %q", d.name, name, child.name)) @@ -697,10 +720,6 @@ func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags Fi // Allow the file system to take extra references on c. child.maybeExtendReference() - - // Return the reference and the new file. When the last reference to - // the file is dropped, file.Dirent may no longer be cached. - return file, nil } // genericCreate executes create if name does not exist. Removes a negative Dirent at name if @@ -718,11 +737,6 @@ func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, c return syscall.ENOENT } - // Execute the create operation. - if err := create(); err != nil { - return err - } - // Remove any negative Dirent. We've already asserted above with d.exists // that the only thing remaining here can be a negative Dirent. if w, ok := d.children[name]; ok { @@ -745,7 +759,8 @@ func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, c w.Drop() } - return nil + // Execute the create operation. + return create() } // CreateLink creates a new link in this directory. @@ -797,23 +812,29 @@ func (d *Dirent) CreateDirectory(ctx context.Context, root *Dirent, name string, } // Bind satisfies the InodeOperations interface; otherwise same as GetFile. -func (d *Dirent) Bind(ctx context.Context, root *Dirent, name string, socket unix.BoundEndpoint, perms FilePermissions) error { +func (d *Dirent) Bind(ctx context.Context, root *Dirent, name string, data unix.BoundEndpoint, perms FilePermissions) (*Dirent, error) { d.dirMu.Lock() defer d.dirMu.Unlock() d.mu.Lock() defer d.mu.Unlock() + var childDir *Dirent err := d.genericCreate(ctx, root, name, func() error { - if err := d.Inode.Bind(ctx, name, socket, perms); err != nil { - return err + var e error + childDir, e = d.Inode.Bind(ctx, name, data, perms) + if e != nil { + return e } - d.Inode.Watches.Notify(name, linux.IN_CREATE, 0) + d.finishCreate(childDir, name) return nil }) if err == syscall.EEXIST { - return syscall.EADDRINUSE + return nil, syscall.EADDRINUSE + } + if err != nil { + return nil, err } - return err + return childDir, err } // CreateFifo creates a new named pipe under this dirent. diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go index 177396fdc..3479f2fad 100644 --- a/pkg/sentry/fs/fsutil/inode.go +++ b/pkg/sentry/fs/fsutil/inode.go @@ -254,8 +254,8 @@ func (InodeNotDirectory) CreateDirectory(context.Context, *fs.Inode, string, fs. } // Bind implements fs.InodeOperations.Bind. -func (InodeNotDirectory) Bind(context.Context, *fs.Inode, string, unix.BoundEndpoint, fs.FilePermissions) error { - return syserror.ENOTDIR +func (InodeNotDirectory) Bind(context.Context, *fs.Inode, string, unix.BoundEndpoint, fs.FilePermissions) (*fs.Dirent, error) { + return nil, syserror.ENOTDIR } // CreateFifo implements fs.InodeOperations.CreateFifo. diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go index 764b530cb..45fdaacfd 100644 --- a/pkg/sentry/fs/gofer/gofer_test.go +++ b/pkg/sentry/fs/gofer/gofer_test.go @@ -74,7 +74,7 @@ func root(ctx context.Context, cp cachePolicy, mode p9.FileMode, size uint64) (* } rootFile := goodMockFile(mode, size) - sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{file: rootFile}, p9.QID{}, rootFile.GetAttrMock.Valid, rootFile.GetAttrMock.Attr) + sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{file: rootFile}, p9.QID{}, rootFile.GetAttrMock.Valid, rootFile.GetAttrMock.Attr, false /* socket */) m := fs.NewMountSource(s, &filesystem{}, fs.MountSourceFlags{}) return rootFile, fs.NewInode(rootInodeOperations, m, sattr), nil } diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go index bfeab3833..15e9863fb 100644 --- a/pkg/sentry/fs/gofer/path.go +++ b/pkg/sentry/fs/gofer/path.go @@ -57,7 +57,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string } // Construct the Inode operations. - sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr) + sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr, false) // Construct a positive Dirent. return fs.NewDirent(fs.NewInode(node, dir.MountSource, sattr), name), nil @@ -113,7 +113,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string } // Construct the InodeOperations. - sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr) + sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr, false) // Construct the positive Dirent. d := fs.NewDirent(fs.NewInode(iops, dir.MountSource, sattr), name) @@ -175,10 +175,10 @@ func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, s return nil } -// Bind implements InodeOperations. -func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, ep unix.BoundEndpoint, perm fs.FilePermissions) error { +// Bind implements InodeOperations.Bind. +func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, ep unix.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) { if i.session().endpoints == nil { - return syscall.EOPNOTSUPP + return nil, syscall.EOPNOTSUPP } // Create replaces the directory fid with the newly created/opened @@ -186,7 +186,7 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, // this node. _, newFile, err := i.fileState.file.walk(ctx, nil) if err != nil { - return err + return nil, err } // Stabilize the endpoint map while creation is in progress. @@ -198,7 +198,7 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, owner := fs.FileOwnerFromContext(ctx) hostFile, err := newFile.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)) if err != nil { - return err + return nil, err } // We're not going to use this file. hostFile.Close() @@ -206,10 +206,10 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, i.touchModificationTime(ctx, dir) // Get the attributes of the file to create inode key. - qid, _, attr, err := getattr(ctx, newFile) + qid, mask, attr, err := getattr(ctx, newFile) if err != nil { newFile.close(ctx) - return err + return nil, err } key := device.MultiDeviceKey{ @@ -217,9 +217,24 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, SecondaryDevice: i.session().connID, Inode: qid.Path, } - i.session().endpoints.add(key, ep) - return nil + // Create child dirent. + + // Get an unopened p9.File for the file we created so that it can be + // cloned and re-opened multiple times after creation. + _, unopened, err := i.fileState.file.walk(ctx, []string{name}) + if err != nil { + newFile.close(ctx) + return nil, err + } + + // Construct the InodeOperations. + sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr, true) + + // Construct the positive Dirent. + childDir := fs.NewDirent(fs.NewInode(iops, dir.MountSource, sattr), name) + i.session().endpoints.add(key, childDir, ep) + return childDir, nil } // CreateFifo implements fs.InodeOperations.CreateFifo. Gofer nodes do not support the diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index 648a11435..bfb1154dc 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -15,6 +15,7 @@ package gofer import ( + "fmt" "sync" "gvisor.googlesource.com/gvisor/pkg/p9" @@ -28,39 +29,60 @@ import ( ) // +stateify savable -type endpointMap struct { +type endpointMaps struct { + // mu protexts the direntMap, the keyMap, and the pathMap below. mu sync.RWMutex `state:"nosave"` - // TODO: Make map with private unix sockets savable. - m map[device.MultiDeviceKey]unix.BoundEndpoint + + // direntMap links sockets to their dirents. + // It is filled concurrently with the keyMap and is stored upon save. + // Before saving, this map is used to populate the pathMap. + direntMap map[unix.BoundEndpoint]*fs.Dirent + + // keyMap links MultiDeviceKeys (containing inode IDs) to their sockets. + // It is not stored during save because the inode ID may change upon restore. + keyMap map[device.MultiDeviceKey]unix.BoundEndpoint `state:"nosave"` + + // pathMap links the sockets to their paths. + // It is filled before saving from the direntMap and is stored upon save. + // Upon restore, this map is used to re-populate the keyMap. + pathMap map[unix.BoundEndpoint]string } -// add adds the endpoint to the map. +// add adds the endpoint to the maps. +// A reference is taken on the dirent argument. // -// Precondition: map must have been locked with 'lock'. -func (e *endpointMap) add(key device.MultiDeviceKey, ep unix.BoundEndpoint) { - e.m[key] = ep +// Precondition: maps must have been locked with 'lock'. +func (e *endpointMaps) add(key device.MultiDeviceKey, d *fs.Dirent, ep unix.BoundEndpoint) { + e.keyMap[key] = ep + d.IncRef() + e.direntMap[ep] = d } -// remove deletes the key from the map. +// remove deletes the key from the maps. // -// Precondition: map must have been locked with 'lock'. -func (e *endpointMap) remove(key device.MultiDeviceKey) { - delete(e.m, key) +// Precondition: maps must have been locked with 'lock'. +func (e *endpointMaps) remove(key device.MultiDeviceKey) { + endpoint := e.get(key) + delete(e.keyMap, key) + + d := e.direntMap[endpoint] + d.DecRef() + delete(e.direntMap, endpoint) } // lock blocks other addition and removal operations from happening while // the backing file is being created or deleted. Returns a function that unlocks // the endpoint map. -func (e *endpointMap) lock() func() { +func (e *endpointMaps) lock() func() { e.mu.Lock() return func() { e.mu.Unlock() } } -func (e *endpointMap) get(key device.MultiDeviceKey) unix.BoundEndpoint { - e.mu.RLock() - ep := e.m[key] - e.mu.RUnlock() - return ep +// get returns the endpoint mapped to the given key. +// +// Precondition: maps must have been locked for reading. +func (e *endpointMaps) get(key device.MultiDeviceKey) unix.BoundEndpoint { + return e.keyMap[key] } // session holds state for each 9p session established during sys_mount. @@ -115,7 +137,7 @@ type session struct { // TODO: there are few possible races with someone stat'ing the // file and another deleting it concurrently, where the file will not be // reported as socket file. - endpoints *endpointMap `state:"wait"` + endpoints *endpointMaps `state:"wait"` } // Destroy tears down the session. @@ -149,7 +171,9 @@ func (s *session) SaveInodeMapping(inode *fs.Inode, path string) { // newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File and attributes // (p9.QID, p9.AttrMask, p9.Attr). -func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr) (fs.StableAttr, *inodeOperations) { +// +// Endpoints lock must not be held if socket == false. +func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr, socket bool) (fs.StableAttr, *inodeOperations) { deviceKey := device.MultiDeviceKey{ Device: attr.RDev, SecondaryDevice: s.connID, @@ -164,10 +188,16 @@ func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p } if s.endpoints != nil { - // If unix sockets are allowed on this filesystem, check if this file is - // supposed to be a socket file. - if s.endpoints.get(deviceKey) != nil { + if socket { sattr.Type = fs.Socket + } else { + // If unix sockets are allowed on this filesystem, check if this file is + // supposed to be a socket file. + unlock := s.endpoints.lock() + if s.endpoints.get(deviceKey) != nil { + sattr.Type = fs.Socket + } + unlock() } } @@ -215,7 +245,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF } if o.privateunixsocket { - s.endpoints = &endpointMap{m: make(map[device.MultiDeviceKey]unix.BoundEndpoint)} + s.endpoints = newEndpointMaps() } // Construct the MountSource with the session and superBlockFlags. @@ -248,6 +278,77 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF return nil, err } - sattr, iops := newInodeOperations(ctx, s, s.attach, qid, valid, attr) + sattr, iops := newInodeOperations(ctx, s, s.attach, qid, valid, attr, false) return fs.NewInode(iops, m, sattr), nil } + +// newEndpointMaps creates a new endpointMaps. +func newEndpointMaps() *endpointMaps { + return &endpointMaps{ + direntMap: make(map[unix.BoundEndpoint]*fs.Dirent), + keyMap: make(map[device.MultiDeviceKey]unix.BoundEndpoint), + pathMap: make(map[unix.BoundEndpoint]string), + } +} + +// fillKeyMap populates key and dirent maps upon restore from saved +// pathmap. +func (s *session) fillKeyMap(ctx context.Context) error { + unlock := s.endpoints.lock() + defer unlock() + + for ep, dirPath := range s.endpoints.pathMap { + _, file, err := s.attach.walk(ctx, splitAbsolutePath(dirPath)) + if err != nil { + return fmt.Errorf("error filling endpointmaps, failed to walk to %q: %v", dirPath, err) + } + + qid, _, attr, err := file.getAttr(ctx, p9.AttrMaskAll()) + if err != nil { + return fmt.Errorf("failed to get file attributes of %s: %v", dirPath, err) + } + + key := device.MultiDeviceKey{ + Device: attr.RDev, + SecondaryDevice: s.connID, + Inode: qid.Path, + } + + s.endpoints.keyMap[key] = ep + } + return nil +} + +// fillPathMap populates paths for endpoints from dirents in direntMap +// before save. +func (s *session) fillPathMap() error { + unlock := s.endpoints.lock() + defer unlock() + + for ep, dir := range s.endpoints.direntMap { + mountRoot := dir.MountRoot() + defer mountRoot.DecRef() + dirPath, _ := dir.FullName(mountRoot) + if dirPath == "" { + return fmt.Errorf("error getting path from dirent") + } + s.endpoints.pathMap[ep] = dirPath + } + return nil +} + +// restoreEndpointMaps recreates and fills the key and dirent maps. +func (s *session) restoreEndpointMaps(ctx context.Context) error { + // When restoring, only need to create the keyMap because the dirent and path + // maps got stored through the save. + s.endpoints.keyMap = make(map[device.MultiDeviceKey]unix.BoundEndpoint) + if err := s.fillKeyMap(ctx); err != nil { + return fmt.Errorf("failed to insert sockets into endpoint map: %v", err) + } + + // Re-create pathMap because it can no longer be trusted as socket paths can + // change while process continues to run. Empty pathMap will be re-filled upon + // next save. + s.endpoints.pathMap = make(map[unix.BoundEndpoint]string) + return nil +} diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go index 0154810c8..8e6424492 100644 --- a/pkg/sentry/fs/gofer/session_state.go +++ b/pkg/sentry/fs/gofer/session_state.go @@ -18,16 +18,17 @@ import ( "fmt" "gvisor.googlesource.com/gvisor/pkg/p9" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/unet" ) // beforeSave is invoked by stateify. -// -// TODO: Make map with private unix sockets savable. -func (e *endpointMap) beforeSave() { - if len(e.m) != 0 { - panic("EndpointMap with existing private unix sockets cannot be saved") +func (s *session) beforeSave() { + if s.endpoints != nil { + if err := s.fillPathMap(); err != nil { + panic("failed to save paths to endpoint map before saving" + err.Error()) + } } } @@ -72,6 +73,9 @@ func (s *session) afterLoad() { if opts.aname != s.aname { panic(fmt.Sprintf("new attach name %v, want %v", opts.aname, s.aname)) } + + // Check if endpointMaps exist when uds sockets are enabled + // (only pathmap will actualy have been saved). if opts.privateunixsocket != (s.endpoints != nil) { panic(fmt.Sprintf("new privateunixsocket option %v, want %v", opts.privateunixsocket, s.endpoints != nil)) } @@ -96,4 +100,16 @@ func (s *session) afterLoad() { if err != nil { panic(fmt.Sprintf("failed to attach to aname: %v", err)) } + + // If private unix sockets are enabled, create and fill the session's endpoint + // maps. + if opts.privateunixsocket { + // TODO: Context is not plumbed to save/restore. + ctx := &dummyClockContext{context.Background()} + + if err = s.restoreEndpointMaps(ctx); err != nil { + panic("failed to restore endpoint maps: " + err.Error()) + } + } + } diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go index 406756f5f..8628b9c69 100644 --- a/pkg/sentry/fs/gofer/socket.go +++ b/pkg/sentry/fs/gofer/socket.go @@ -30,6 +30,8 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) unix.Bound } if i.session().endpoints != nil { + unlock := i.session().endpoints.lock() + defer unlock() ep := i.session().endpoints.get(i.fileState.key) if ep != nil { return ep diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go index 66c17debb..e7254fa7d 100644 --- a/pkg/sentry/fs/host/inode.go +++ b/pkg/sentry/fs/host/inode.go @@ -310,8 +310,8 @@ func (i *inodeOperations) Rename(ctx context.Context, oldParent *fs.Inode, oldNa } // Bind implements fs.InodeOperations.Bind. -func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data unix.BoundEndpoint, perm fs.FilePermissions) error { - return syserror.EOPNOTSUPP +func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data unix.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) { + return nil, syserror.EOPNOTSUPP } // BoundEndpoint implements fs.InodeOperations.BoundEndpoint. diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index d0dbce5dd..db7240dca 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -223,7 +223,7 @@ func (i *Inode) Rename(ctx context.Context, oldParent *Dirent, renamed *Dirent, } // Bind calls i.InodeOperations.Bind with i as the directory. -func (i *Inode) Bind(ctx context.Context, name string, data unix.BoundEndpoint, perm FilePermissions) error { +func (i *Inode) Bind(ctx context.Context, name string, data unix.BoundEndpoint, perm FilePermissions) (*Dirent, error) { if i.overlay != nil { return overlayBind(ctx, i.overlay, name, data, perm) } diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go index b33980178..952f9704d 100644 --- a/pkg/sentry/fs/inode_operations.go +++ b/pkg/sentry/fs/inode_operations.go @@ -146,7 +146,7 @@ type InodeOperations interface { // Implementations must ensure that name does not already exist. // // The caller must ensure that this operation is permitted. - Bind(ctx context.Context, dir *Inode, name string, data unix.BoundEndpoint, perm FilePermissions) error + Bind(ctx context.Context, dir *Inode, name string, data unix.BoundEndpoint, perm FilePermissions) (*Dirent, error) // BoundEndpoint returns the socket endpoint at path stored in // or generated by an Inode. diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index 53fbd1481..543db9ac7 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -334,13 +334,13 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena return nil } -func overlayBind(ctx context.Context, o *overlayEntry, name string, data unix.BoundEndpoint, perm FilePermissions) error { +func overlayBind(ctx context.Context, o *overlayEntry, name string, data unix.BoundEndpoint, perm FilePermissions) (*Dirent, error) { o.copyMu.RLock() defer o.copyMu.RUnlock() // We do not support doing anything exciting with sockets unless there // is already a directory in the upper filesystem. if o.upper == nil { - return syserror.EOPNOTSUPP + return nil, syserror.EOPNOTSUPP } return o.upper.InodeOperations.Bind(ctx, o.upper, name, data, perm) } diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go index 04432f28c..d8333194b 100644 --- a/pkg/sentry/fs/ramfs/dir.go +++ b/pkg/sentry/fs/ramfs/dir.go @@ -314,17 +314,22 @@ func (d *Dir) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, p } // Bind implements fs.InodeOperations.Bind. -func (d *Dir) Bind(ctx context.Context, dir *fs.Inode, name string, ep unix.BoundEndpoint, perms fs.FilePermissions) error { +func (d *Dir) Bind(ctx context.Context, dir *fs.Inode, name string, ep unix.BoundEndpoint, perms fs.FilePermissions) (*fs.Dirent, error) { if d.CreateOps == nil || d.CreateOps.NewBoundEndpoint == nil { - return ErrDenied + return nil, ErrDenied } - _, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) { + inode, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) { return d.NewBoundEndpoint(ctx, dir, ep, perms) }) if err == syscall.EEXIST { - return syscall.EADDRINUSE + return nil, syscall.EADDRINUSE } - return err + if err != nil { + return nil, err + } + // Take another ref on inode which will be donated to the new dirent. + inode.IncRef() + return fs.NewDirent(inode, name), nil } // CreateFifo implements fs.InodeOperations.CreateFifo. diff --git a/pkg/sentry/fs/ramfs/ramfs.go b/pkg/sentry/fs/ramfs/ramfs.go index 13e72e775..1028b5f1d 100644 --- a/pkg/sentry/fs/ramfs/ramfs.go +++ b/pkg/sentry/fs/ramfs/ramfs.go @@ -279,8 +279,8 @@ func (*Entry) CreateDirectory(context.Context, *fs.Inode, string, fs.FilePermiss } // Bind is not supported by default. -func (*Entry) Bind(context.Context, *fs.Inode, string, unix.BoundEndpoint, fs.FilePermissions) error { - return ErrInvalidOp +func (*Entry) Bind(context.Context, *fs.Inode, string, unix.BoundEndpoint, fs.FilePermissions) (*fs.Dirent, error) { + return nil, ErrInvalidOp } // CreateFifo implements fs.InodeOperations.CreateFifo. CreateFifo is not supported by diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go index c91091db4..c6f39fce3 100644 --- a/pkg/sentry/fs/tty/dir.go +++ b/pkg/sentry/fs/tty/dir.go @@ -215,8 +215,8 @@ func (d *dirInodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, } // Bind implements fs.InodeOperations.Bind. -func (d *dirInodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data unix.BoundEndpoint, perm fs.FilePermissions) error { - return syserror.EPERM +func (d *dirInodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data unix.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) { + return nil, syserror.EPERM } // GetFile implements fs.InodeOperations.GetFile. diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 5b6411f97..1c22e78b3 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -276,9 +276,11 @@ func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { } // Create the socket. - if err := d.Bind(t, t.FSContext().RootDirectory(), name, bep, fs.FilePermissions{User: fs.PermMask{Read: true}}); err != nil { + childDir, err := d.Bind(t, t.FSContext().RootDirectory(), name, bep, fs.FilePermissions{User: fs.PermMask{Read: true}}) + if err != nil { return tcpip.ErrPortInUse } + childDir.DecRef() } return nil diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 7ec68f573..d4c650892 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -1,6 +1,13 @@ package(licenses = ["notice"]) # Apache 2.0 -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test") + +go_binary( + name = "uds_test_app", + srcs = [ + "uds_test_app.go", + ], +) go_library( name = "container", @@ -29,6 +36,7 @@ go_test( size = "medium", srcs = ["container_test.go"], data = [ + ":uds_test_app", "//runsc", ], tags = [ diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 9e38f5f77..11edcd615 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -15,6 +15,7 @@ package container_test import ( + "bytes" "fmt" "io" "io/ioutil" @@ -108,7 +109,8 @@ func procListToString(pl []*control.Process) string { return fmt.Sprintf("[%s]", strings.Join(strs, ",")) } -// createWriteableOutputFile creates an output file that can be read and written to in the sandbox. +// createWriteableOutputFile creates an output file that can be read and +// written to in the sandbox. func createWriteableOutputFile(path string) (*os.File, error) { outputFile, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) if err != nil { @@ -136,13 +138,19 @@ func waitForFile(f *os.File) error { return testutil.Poll(op, 5*time.Second) } -func readOutputNum(f *os.File, first bool) (int, error) { - // Wait until file has contents. +// readOutputNum reads a file at given filepath and returns the int at the +// requested position. +func readOutputNum(file string, position int) (int, error) { + f, err := os.Open(file) + if err != nil { + return 0, fmt.Errorf("error opening file: %q, %v", file, err) + } + + // Ensure that there is content in output file. if err := waitForFile(f); err != nil { - return 0, err + return 0, fmt.Errorf("error waiting for output file: %v", err) } - // Read the first number in the new file b, err := ioutil.ReadAll(f) if err != nil { return 0, fmt.Errorf("error reading file: %v", err) @@ -151,14 +159,18 @@ func readOutputNum(f *os.File, first bool) (int, error) { return 0, fmt.Errorf("error no content was read") } + // Strip leading null bytes caused by file offset not being 0 upon restore. + b = bytes.Trim(b, "\x00") nums := strings.Split(string(b), "\n") - var num int - if first { - num, err = strconv.Atoi(nums[0]) - } else { - num, err = strconv.Atoi(nums[len(nums)-2]) + if position >= len(nums) { + return 0, fmt.Errorf("position %v is not within the length of content %v", position, nums) + } + if position == -1 { + // Expectation of newline at the end of last position. + position = len(nums) - 2 } + num, err := strconv.Atoi(nums[position]) if err != nil { return 0, fmt.Errorf("error getting number from file: %v", err) } @@ -194,6 +206,27 @@ func run(spec *specs.Spec, conf *boot.Config) error { return nil } +// findUDSApp finds the uds_test_app binary to be used in the UnixDomainSocket test. +func findUDSApp() (string, error) { + // TODO: Use bazel FindBinary function. + + // uds_test_app is in a directory like: + // './linux_amd64_pure_stripped/uds_test_app.go'. + // + // Since I don't want to construct 'linux_amd64_pure_stripped' based on the + // build type, do a quick search for: './*/uds_test_app' + // Note: This glob will only succeed when file is one directory deep. + matches, err := filepath.Glob("./*/uds_test_app") + if err != nil { + return "", fmt.Errorf("error globbing: %v", err) + } + if i := len(matches); i != 1 { + return "", fmt.Errorf("error identifying uds_test_app from matches: got %d matches", i) + } + + return matches[0], nil +} + type configOptions int const ( @@ -204,7 +237,8 @@ const all = overlay | kvm // configs generates different configurations to run tests. func configs(opts configOptions) []*boot.Config { - cs := []*boot.Config{testutil.TestConfig()} + cs := []*boot.Config{testutil.TestConfig(), testutil.TestConfig()} + return cs if opts&overlay != 0 { c := testutil.TestConfig() @@ -544,6 +578,7 @@ func TestCheckpointRestore(t *testing.T) { if err := os.Chmod(dir, 0777); err != nil { t.Fatalf("error chmoding file: %q, %v", dir, err) } + defer os.RemoveAll(dir) outputPath := filepath.Join(dir, "output") outputFile, err := createWriteableOutputFile(outputPath) @@ -598,7 +633,7 @@ func TestCheckpointRestore(t *testing.T) { } defer os.RemoveAll(imagePath) - lastNum, err := readOutputNum(outputFile, false) + lastNum, err := readOutputNum(outputPath, -1) if err != nil { t.Fatalf("error with outputFile: %v", err) } @@ -624,15 +659,22 @@ func TestCheckpointRestore(t *testing.T) { t.Fatalf("error restoring container: %v", err) } - firstNum, err := readOutputNum(outputFile2, true) + // Wait until application has ran. + if err := waitForFile(outputFile2); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } + + firstNum, err := readOutputNum(outputPath, 0) if err != nil { t.Fatalf("error with outputFile: %v", err) } - // Check that lastNum is one less than firstNum and that the container picks up from where it left off. + // Check that lastNum is one less than firstNum and that the container picks + // up from where it left off. if lastNum+1 != firstNum { t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum) } + cont2.Destroy() // Restore into another container! // Delete and recreate file before restoring. @@ -656,15 +698,169 @@ func TestCheckpointRestore(t *testing.T) { t.Fatalf("error restoring container: %v", err) } - firstNum2, err := readOutputNum(outputFile3, true) + // Wait until application has ran. + if err := waitForFile(outputFile3); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } + + firstNum2, err := readOutputNum(outputPath, 0) if err != nil { t.Fatalf("error with outputFile: %v", err) } - // Check that lastNum is one less than firstNum and that the container picks up from where it left off. + // Check that lastNum is one less than firstNum and that the container picks + // up from where it left off. if lastNum+1 != firstNum2 { t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum2) } + cont3.Destroy() + } +} + +// TestUnixDomainSockets checks that Checkpoint/Restore works in cases +// with filesystem Unix Domain Socket use. +func TestUnixDomainSockets(t *testing.T) { + const ( + output = "uds_output" + goferRoot = "/tmp2" + socket = "uds_socket" + ) + + // Skip overlay because test requires writing to host file. + for _, conf := range configs(kvm) { + t.Logf("Running test with conf: %+v", conf) + + dir, err := ioutil.TempDir("", "uds-test") + if err != nil { + t.Fatalf("ioutil.TempDir failed: %v", err) + } + if err := os.Chmod(dir, 0777); err != nil { + t.Fatalf("error chmoding file: %q, %v", dir, err) + } + defer os.RemoveAll(dir) + + outputPath := filepath.Join(dir, output) + + outputFile, err := createWriteableOutputFile(outputPath) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile.Close() + + // Get file path for corresponding output file in sandbox. + outputFileSandbox := filepath.Join(goferRoot, output) + + // Need to get working directory, even though not intuitive. + wd, _ := os.Getwd() + localPath, err := findUDSApp() + if err != nil { + t.Fatalf("error finding localPath: %v", err) + } + app := filepath.Join(wd, localPath) + + if _, err = os.Stat(app); err != nil { + t.Fatalf("error finding the uds_test_app: %v", err) + } + + socketPath := filepath.Join(dir, socket) + socketPathSandbox := filepath.Join(goferRoot, socket) + defer os.Remove(socketPath) + + spec := testutil.NewSpecWithArgs(app, "--file", outputFileSandbox, + "--socket", socketPathSandbox) + + spec.Mounts = append(spec.Mounts, specs.Mount{ + Type: "bind", + Destination: goferRoot, + Source: dir, + }) + + spec.Process.User = specs.User{ + UID: uint32(os.Getuid()), + GID: uint32(os.Getgid()), + } + + rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer os.RemoveAll(rootDir) + defer os.RemoveAll(bundleDir) + + // Create and start the container. + cont, err := container.Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "") + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } + + // Set the image path, the location where the checkpoint image will be saved. + imagePath := filepath.Join(dir, "test-image-file") + + // Create the image file and open for writing. + file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) + if err != nil { + t.Fatalf("error opening new file at imagePath: %v", err) + } + defer file.Close() + defer os.RemoveAll(imagePath) + + // Wait until application has ran. + if err := waitForFile(outputFile); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } + + // Checkpoint running container; save state into new file. + if err := cont.Checkpoint(file); err != nil { + t.Fatalf("error checkpointing container to empty file: %v", err) + } + + // Read last number outputted before checkpoint. + lastNum, err := readOutputNum(outputPath, -1) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } + + // Delete and recreate file before restoring. + if err := os.Remove(outputPath); err != nil { + t.Fatalf("error removing file") + } + outputFile2, err := createWriteableOutputFile(outputPath) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile2.Close() + + // Restore into a new container. + contRestore, err := container.Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "") + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer contRestore.Destroy() + + if err := contRestore.Restore(spec, conf, imagePath); err != nil { + t.Fatalf("error restoring container: %v", err) + } + + // Wait until application has ran. + if err := waitForFile(outputFile2); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } + + // Read first number outputted after restore. + firstNum, err := readOutputNum(outputPath, 0) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } + + // Check that lastNum is one less than firstNum. + if lastNum+1 != firstNum { + t.Errorf("error numbers not consecutive, previous: %d, next: %d", lastNum, firstNum) + } + contRestore.Destroy() } } diff --git a/runsc/container/uds_test_app.go b/runsc/container/uds_test_app.go new file mode 100644 index 000000000..bef98ac66 --- /dev/null +++ b/runsc/container/uds_test_app.go @@ -0,0 +1,83 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Binary uds-test-app opens a socket and reads a series of numbers +// which are then written to an output file. +package main + +import ( + "flag" + "fmt" + "log" + "net" + "os" + "strconv" + "time" +) + +var ( + fileName = flag.String("file", "", "name of output file") + socketPath = flag.String("socket", "", "path to socket") +) + +func server(listener net.Listener, f *os.File) { + buf := make([]byte, 16) + + for { + c, err := listener.Accept() + if err != nil { + log.Fatal("error accepting connection:", err) + } + nr, err := c.Read(buf) + if err != nil { + log.Fatal("error reading from buf:", err) + } + data := buf[0:nr] + fmt.Fprintf(f, string(data)+"\n") + } +} + +func main() { + flag.Parse() + if *fileName == "" || *socketPath == "" { + log.Fatalf("Flags cannot be empty, given: fileName=%s, socketPath=%s", *fileName, *socketPath) + } + outputFile, err := os.OpenFile(*fileName, os.O_WRONLY|os.O_CREATE, 0666) + if err != nil { + log.Fatal("error opening output file:", err) + } + + socket := *socketPath + defer os.Remove(socket) + + listener, err := net.Listen("unix", socket) + if err != nil { + log.Fatal("error listening on socket:", err) + } + + go server(listener, outputFile) + for i := 0; ; i++ { + + conn, err := net.Dial("unix", socket) + if err != nil { + log.Fatal("error dialing:", err) + } + if _, err := conn.Write([]byte(strconv.Itoa(i))); err != nil { + log.Fatal("error writing:", err) + } + conn.Close() + time.Sleep(100 * time.Millisecond) + } + +} |