diff options
-rw-r--r-- | pkg/sentry/fs/dirent.go | 53 | ||||
-rw-r--r-- | pkg/sentry/fs/fsutil/inode.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/gofer/gofer_test.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/gofer/path.go | 37 | ||||
-rw-r--r-- | pkg/sentry/fs/gofer/session.go | 149 | ||||
-rw-r--r-- | pkg/sentry/fs/gofer/session_state.go | 26 | ||||
-rw-r--r-- | pkg/sentry/fs/gofer/socket.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/host/inode.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/inode.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/inode_operations.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/inode_overlay.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/ramfs/dir.go | 15 | ||||
-rw-r--r-- | pkg/sentry/fs/ramfs/ramfs.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/tty/dir.go | 4 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/unix.go | 4 | ||||
-rw-r--r-- | runsc/container/BUILD | 10 | ||||
-rw-r--r-- | runsc/container/container_test.go | 228 | ||||
-rw-r--r-- | runsc/container/uds_test_app.go | 83 |
18 files changed, 541 insertions, 92 deletions
diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index 4658d044f..821cc5789 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -385,6 +385,19 @@ func (d *Dirent) fullName(root *Dirent) (string, bool) { return s, reachable } +// MountRoot finds and returns the mount-root for a given dirent. +func (d *Dirent) MountRoot() *Dirent { + renameMu.RLock() + defer renameMu.RUnlock() + + mountRoot := d + for !mountRoot.mounted && mountRoot.parent != nil { + mountRoot = mountRoot.parent + } + mountRoot.IncRef() + return mountRoot +} + func (d *Dirent) freeze() { if d.frozen { // Already frozen. @@ -665,6 +678,16 @@ func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags Fi } child := file.Dirent + d.finishCreate(child, name) + + // Return the reference and the new file. When the last reference to + // the file is dropped, file.Dirent may no longer be cached. + return file, nil +} + +// finishCreate validates the created file, adds it as a child of this dirent, +// and notifies any watchers. +func (d *Dirent) finishCreate(child *Dirent, name string) { // Sanity check c, its name must be consistent. if child.name != name { panic(fmt.Sprintf("create from %q to %q returned unexpected name %q", d.name, name, child.name)) @@ -697,10 +720,6 @@ func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags Fi // Allow the file system to take extra references on c. child.maybeExtendReference() - - // Return the reference and the new file. When the last reference to - // the file is dropped, file.Dirent may no longer be cached. - return file, nil } // genericCreate executes create if name does not exist. Removes a negative Dirent at name if @@ -718,11 +737,6 @@ func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, c return syscall.ENOENT } - // Execute the create operation. - if err := create(); err != nil { - return err - } - // Remove any negative Dirent. We've already asserted above with d.exists // that the only thing remaining here can be a negative Dirent. if w, ok := d.children[name]; ok { @@ -745,7 +759,8 @@ func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, c w.Drop() } - return nil + // Execute the create operation. + return create() } // CreateLink creates a new link in this directory. @@ -797,23 +812,29 @@ func (d *Dirent) CreateDirectory(ctx context.Context, root *Dirent, name string, } // Bind satisfies the InodeOperations interface; otherwise same as GetFile. -func (d *Dirent) Bind(ctx context.Context, root *Dirent, name string, socket unix.BoundEndpoint, perms FilePermissions) error { +func (d *Dirent) Bind(ctx context.Context, root *Dirent, name string, data unix.BoundEndpoint, perms FilePermissions) (*Dirent, error) { d.dirMu.Lock() defer d.dirMu.Unlock() d.mu.Lock() defer d.mu.Unlock() + var childDir *Dirent err := d.genericCreate(ctx, root, name, func() error { - if err := d.Inode.Bind(ctx, name, socket, perms); err != nil { - return err + var e error + childDir, e = d.Inode.Bind(ctx, name, data, perms) + if e != nil { + return e } - d.Inode.Watches.Notify(name, linux.IN_CREATE, 0) + d.finishCreate(childDir, name) return nil }) if err == syscall.EEXIST { - return syscall.EADDRINUSE + return nil, syscall.EADDRINUSE + } + if err != nil { + return nil, err } - return err + return childDir, err } // CreateFifo creates a new named pipe under this dirent. diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go index 177396fdc..3479f2fad 100644 --- a/pkg/sentry/fs/fsutil/inode.go +++ b/pkg/sentry/fs/fsutil/inode.go @@ -254,8 +254,8 @@ func (InodeNotDirectory) CreateDirectory(context.Context, *fs.Inode, string, fs. } // Bind implements fs.InodeOperations.Bind. -func (InodeNotDirectory) Bind(context.Context, *fs.Inode, string, unix.BoundEndpoint, fs.FilePermissions) error { - return syserror.ENOTDIR +func (InodeNotDirectory) Bind(context.Context, *fs.Inode, string, unix.BoundEndpoint, fs.FilePermissions) (*fs.Dirent, error) { + return nil, syserror.ENOTDIR } // CreateFifo implements fs.InodeOperations.CreateFifo. diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go index 764b530cb..45fdaacfd 100644 --- a/pkg/sentry/fs/gofer/gofer_test.go +++ b/pkg/sentry/fs/gofer/gofer_test.go @@ -74,7 +74,7 @@ func root(ctx context.Context, cp cachePolicy, mode p9.FileMode, size uint64) (* } rootFile := goodMockFile(mode, size) - sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{file: rootFile}, p9.QID{}, rootFile.GetAttrMock.Valid, rootFile.GetAttrMock.Attr) + sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{file: rootFile}, p9.QID{}, rootFile.GetAttrMock.Valid, rootFile.GetAttrMock.Attr, false /* socket */) m := fs.NewMountSource(s, &filesystem{}, fs.MountSourceFlags{}) return rootFile, fs.NewInode(rootInodeOperations, m, sattr), nil } diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go index bfeab3833..15e9863fb 100644 --- a/pkg/sentry/fs/gofer/path.go +++ b/pkg/sentry/fs/gofer/path.go @@ -57,7 +57,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string } // Construct the Inode operations. - sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr) + sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr, false) // Construct a positive Dirent. return fs.NewDirent(fs.NewInode(node, dir.MountSource, sattr), name), nil @@ -113,7 +113,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string } // Construct the InodeOperations. - sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr) + sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr, false) // Construct the positive Dirent. d := fs.NewDirent(fs.NewInode(iops, dir.MountSource, sattr), name) @@ -175,10 +175,10 @@ func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, s return nil } -// Bind implements InodeOperations. -func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, ep unix.BoundEndpoint, perm fs.FilePermissions) error { +// Bind implements InodeOperations.Bind. +func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, ep unix.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) { if i.session().endpoints == nil { - return syscall.EOPNOTSUPP + return nil, syscall.EOPNOTSUPP } // Create replaces the directory fid with the newly created/opened @@ -186,7 +186,7 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, // this node. _, newFile, err := i.fileState.file.walk(ctx, nil) if err != nil { - return err + return nil, err } // Stabilize the endpoint map while creation is in progress. @@ -198,7 +198,7 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, owner := fs.FileOwnerFromContext(ctx) hostFile, err := newFile.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)) if err != nil { - return err + return nil, err } // We're not going to use this file. hostFile.Close() @@ -206,10 +206,10 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, i.touchModificationTime(ctx, dir) // Get the attributes of the file to create inode key. - qid, _, attr, err := getattr(ctx, newFile) + qid, mask, attr, err := getattr(ctx, newFile) if err != nil { newFile.close(ctx) - return err + return nil, err } key := device.MultiDeviceKey{ @@ -217,9 +217,24 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, SecondaryDevice: i.session().connID, Inode: qid.Path, } - i.session().endpoints.add(key, ep) - return nil + // Create child dirent. + + // Get an unopened p9.File for the file we created so that it can be + // cloned and re-opened multiple times after creation. + _, unopened, err := i.fileState.file.walk(ctx, []string{name}) + if err != nil { + newFile.close(ctx) + return nil, err + } + + // Construct the InodeOperations. + sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr, true) + + // Construct the positive Dirent. + childDir := fs.NewDirent(fs.NewInode(iops, dir.MountSource, sattr), name) + i.session().endpoints.add(key, childDir, ep) + return childDir, nil } // CreateFifo implements fs.InodeOperations.CreateFifo. Gofer nodes do not support the diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index 648a11435..bfb1154dc 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -15,6 +15,7 @@ package gofer import ( + "fmt" "sync" "gvisor.googlesource.com/gvisor/pkg/p9" @@ -28,39 +29,60 @@ import ( ) // +stateify savable -type endpointMap struct { +type endpointMaps struct { + // mu protexts the direntMap, the keyMap, and the pathMap below. mu sync.RWMutex `state:"nosave"` - // TODO: Make map with private unix sockets savable. - m map[device.MultiDeviceKey]unix.BoundEndpoint + + // direntMap links sockets to their dirents. + // It is filled concurrently with the keyMap and is stored upon save. + // Before saving, this map is used to populate the pathMap. + direntMap map[unix.BoundEndpoint]*fs.Dirent + + // keyMap links MultiDeviceKeys (containing inode IDs) to their sockets. + // It is not stored during save because the inode ID may change upon restore. + keyMap map[device.MultiDeviceKey]unix.BoundEndpoint `state:"nosave"` + + // pathMap links the sockets to their paths. + // It is filled before saving from the direntMap and is stored upon save. + // Upon restore, this map is used to re-populate the keyMap. + pathMap map[unix.BoundEndpoint]string } -// add adds the endpoint to the map. +// add adds the endpoint to the maps. +// A reference is taken on the dirent argument. // -// Precondition: map must have been locked with 'lock'. -func (e *endpointMap) add(key device.MultiDeviceKey, ep unix.BoundEndpoint) { - e.m[key] = ep +// Precondition: maps must have been locked with 'lock'. +func (e *endpointMaps) add(key device.MultiDeviceKey, d *fs.Dirent, ep unix.BoundEndpoint) { + e.keyMap[key] = ep + d.IncRef() + e.direntMap[ep] = d } -// remove deletes the key from the map. +// remove deletes the key from the maps. // -// Precondition: map must have been locked with 'lock'. -func (e *endpointMap) remove(key device.MultiDeviceKey) { - delete(e.m, key) +// Precondition: maps must have been locked with 'lock'. +func (e *endpointMaps) remove(key device.MultiDeviceKey) { + endpoint := e.get(key) + delete(e.keyMap, key) + + d := e.direntMap[endpoint] + d.DecRef() + delete(e.direntMap, endpoint) } // lock blocks other addition and removal operations from happening while // the backing file is being created or deleted. Returns a function that unlocks // the endpoint map. -func (e *endpointMap) lock() func() { +func (e *endpointMaps) lock() func() { e.mu.Lock() return func() { e.mu.Unlock() } } -func (e *endpointMap) get(key device.MultiDeviceKey) unix.BoundEndpoint { - e.mu.RLock() - ep := e.m[key] - e.mu.RUnlock() - return ep +// get returns the endpoint mapped to the given key. +// +// Precondition: maps must have been locked for reading. +func (e *endpointMaps) get(key device.MultiDeviceKey) unix.BoundEndpoint { + return e.keyMap[key] } // session holds state for each 9p session established during sys_mount. @@ -115,7 +137,7 @@ type session struct { // TODO: there are few possible races with someone stat'ing the // file and another deleting it concurrently, where the file will not be // reported as socket file. - endpoints *endpointMap `state:"wait"` + endpoints *endpointMaps `state:"wait"` } // Destroy tears down the session. @@ -149,7 +171,9 @@ func (s *session) SaveInodeMapping(inode *fs.Inode, path string) { // newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File and attributes // (p9.QID, p9.AttrMask, p9.Attr). -func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr) (fs.StableAttr, *inodeOperations) { +// +// Endpoints lock must not be held if socket == false. +func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr, socket bool) (fs.StableAttr, *inodeOperations) { deviceKey := device.MultiDeviceKey{ Device: attr.RDev, SecondaryDevice: s.connID, @@ -164,10 +188,16 @@ func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p } if s.endpoints != nil { - // If unix sockets are allowed on this filesystem, check if this file is - // supposed to be a socket file. - if s.endpoints.get(deviceKey) != nil { + if socket { sattr.Type = fs.Socket + } else { + // If unix sockets are allowed on this filesystem, check if this file is + // supposed to be a socket file. + unlock := s.endpoints.lock() + if s.endpoints.get(deviceKey) != nil { + sattr.Type = fs.Socket + } + unlock() } } @@ -215,7 +245,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF } if o.privateunixsocket { - s.endpoints = &endpointMap{m: make(map[device.MultiDeviceKey]unix.BoundEndpoint)} + s.endpoints = newEndpointMaps() } // Construct the MountSource with the session and superBlockFlags. @@ -248,6 +278,77 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF return nil, err } - sattr, iops := newInodeOperations(ctx, s, s.attach, qid, valid, attr) + sattr, iops := newInodeOperations(ctx, s, s.attach, qid, valid, attr, false) return fs.NewInode(iops, m, sattr), nil } + +// newEndpointMaps creates a new endpointMaps. +func newEndpointMaps() *endpointMaps { + return &endpointMaps{ + direntMap: make(map[unix.BoundEndpoint]*fs.Dirent), + keyMap: make(map[device.MultiDeviceKey]unix.BoundEndpoint), + pathMap: make(map[unix.BoundEndpoint]string), + } +} + +// fillKeyMap populates key and dirent maps upon restore from saved +// pathmap. +func (s *session) fillKeyMap(ctx context.Context) error { + unlock := s.endpoints.lock() + defer unlock() + + for ep, dirPath := range s.endpoints.pathMap { + _, file, err := s.attach.walk(ctx, splitAbsolutePath(dirPath)) + if err != nil { + return fmt.Errorf("error filling endpointmaps, failed to walk to %q: %v", dirPath, err) + } + + qid, _, attr, err := file.getAttr(ctx, p9.AttrMaskAll()) + if err != nil { + return fmt.Errorf("failed to get file attributes of %s: %v", dirPath, err) + } + + key := device.MultiDeviceKey{ + Device: attr.RDev, + SecondaryDevice: s.connID, + Inode: qid.Path, + } + + s.endpoints.keyMap[key] = ep + } + return nil +} + +// fillPathMap populates paths for endpoints from dirents in direntMap +// before save. +func (s *session) fillPathMap() error { + unlock := s.endpoints.lock() + defer unlock() + + for ep, dir := range s.endpoints.direntMap { + mountRoot := dir.MountRoot() + defer mountRoot.DecRef() + dirPath, _ := dir.FullName(mountRoot) + if dirPath == "" { + return fmt.Errorf("error getting path from dirent") + } + s.endpoints.pathMap[ep] = dirPath + } + return nil +} + +// restoreEndpointMaps recreates and fills the key and dirent maps. +func (s *session) restoreEndpointMaps(ctx context.Context) error { + // When restoring, only need to create the keyMap because the dirent and path + // maps got stored through the save. + s.endpoints.keyMap = make(map[device.MultiDeviceKey]unix.BoundEndpoint) + if err := s.fillKeyMap(ctx); err != nil { + return fmt.Errorf("failed to insert sockets into endpoint map: %v", err) + } + + // Re-create pathMap because it can no longer be trusted as socket paths can + // change while process continues to run. Empty pathMap will be re-filled upon + // next save. + s.endpoints.pathMap = make(map[unix.BoundEndpoint]string) + return nil +} diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go index 0154810c8..8e6424492 100644 --- a/pkg/sentry/fs/gofer/session_state.go +++ b/pkg/sentry/fs/gofer/session_state.go @@ -18,16 +18,17 @@ import ( "fmt" "gvisor.googlesource.com/gvisor/pkg/p9" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/unet" ) // beforeSave is invoked by stateify. -// -// TODO: Make map with private unix sockets savable. -func (e *endpointMap) beforeSave() { - if len(e.m) != 0 { - panic("EndpointMap with existing private unix sockets cannot be saved") +func (s *session) beforeSave() { + if s.endpoints != nil { + if err := s.fillPathMap(); err != nil { + panic("failed to save paths to endpoint map before saving" + err.Error()) + } } } @@ -72,6 +73,9 @@ func (s *session) afterLoad() { if opts.aname != s.aname { panic(fmt.Sprintf("new attach name %v, want %v", opts.aname, s.aname)) } + + // Check if endpointMaps exist when uds sockets are enabled + // (only pathmap will actualy have been saved). if opts.privateunixsocket != (s.endpoints != nil) { panic(fmt.Sprintf("new privateunixsocket option %v, want %v", opts.privateunixsocket, s.endpoints != nil)) } @@ -96,4 +100,16 @@ func (s *session) afterLoad() { if err != nil { panic(fmt.Sprintf("failed to attach to aname: %v", err)) } + + // If private unix sockets are enabled, create and fill the session's endpoint + // maps. + if opts.privateunixsocket { + // TODO: Context is not plumbed to save/restore. + ctx := &dummyClockContext{context.Background()} + + if err = s.restoreEndpointMaps(ctx); err != nil { + panic("failed to restore endpoint maps: " + err.Error()) + } + } + } diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go index 406756f5f..8628b9c69 100644 --- a/pkg/sentry/fs/gofer/socket.go +++ b/pkg/sentry/fs/gofer/socket.go @@ -30,6 +30,8 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) unix.Bound } if i.session().endpoints != nil { + unlock := i.session().endpoints.lock() + defer unlock() ep := i.session().endpoints.get(i.fileState.key) if ep != nil { return ep diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go index 66c17debb..e7254fa7d 100644 --- a/pkg/sentry/fs/host/inode.go +++ b/pkg/sentry/fs/host/inode.go @@ -310,8 +310,8 @@ func (i *inodeOperations) Rename(ctx context.Context, oldParent *fs.Inode, oldNa } // Bind implements fs.InodeOperations.Bind. -func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data unix.BoundEndpoint, perm fs.FilePermissions) error { - return syserror.EOPNOTSUPP +func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data unix.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) { + return nil, syserror.EOPNOTSUPP } // BoundEndpoint implements fs.InodeOperations.BoundEndpoint. diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index d0dbce5dd..db7240dca 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -223,7 +223,7 @@ func (i *Inode) Rename(ctx context.Context, oldParent *Dirent, renamed *Dirent, } // Bind calls i.InodeOperations.Bind with i as the directory. -func (i *Inode) Bind(ctx context.Context, name string, data unix.BoundEndpoint, perm FilePermissions) error { +func (i *Inode) Bind(ctx context.Context, name string, data unix.BoundEndpoint, perm FilePermissions) (*Dirent, error) { if i.overlay != nil { return overlayBind(ctx, i.overlay, name, data, perm) } diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go index b33980178..952f9704d 100644 --- a/pkg/sentry/fs/inode_operations.go +++ b/pkg/sentry/fs/inode_operations.go @@ -146,7 +146,7 @@ type InodeOperations interface { // Implementations must ensure that name does not already exist. // // The caller must ensure that this operation is permitted. - Bind(ctx context.Context, dir *Inode, name string, data unix.BoundEndpoint, perm FilePermissions) error + Bind(ctx context.Context, dir *Inode, name string, data unix.BoundEndpoint, perm FilePermissions) (*Dirent, error) // BoundEndpoint returns the socket endpoint at path stored in // or generated by an Inode. diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index 53fbd1481..543db9ac7 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -334,13 +334,13 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena return nil } -func overlayBind(ctx context.Context, o *overlayEntry, name string, data unix.BoundEndpoint, perm FilePermissions) error { +func overlayBind(ctx context.Context, o *overlayEntry, name string, data unix.BoundEndpoint, perm FilePermissions) (*Dirent, error) { o.copyMu.RLock() defer o.copyMu.RUnlock() // We do not support doing anything exciting with sockets unless there // is already a directory in the upper filesystem. if o.upper == nil { - return syserror.EOPNOTSUPP + return nil, syserror.EOPNOTSUPP } return o.upper.InodeOperations.Bind(ctx, o.upper, name, data, perm) } diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go index 04432f28c..d8333194b 100644 --- a/pkg/sentry/fs/ramfs/dir.go +++ b/pkg/sentry/fs/ramfs/dir.go @@ -314,17 +314,22 @@ func (d *Dir) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, p } // Bind implements fs.InodeOperations.Bind. -func (d *Dir) Bind(ctx context.Context, dir *fs.Inode, name string, ep unix.BoundEndpoint, perms fs.FilePermissions) error { +func (d *Dir) Bind(ctx context.Context, dir *fs.Inode, name string, ep unix.BoundEndpoint, perms fs.FilePermissions) (*fs.Dirent, error) { if d.CreateOps == nil || d.CreateOps.NewBoundEndpoint == nil { - return ErrDenied + return nil, ErrDenied } - _, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) { + inode, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) { return d.NewBoundEndpoint(ctx, dir, ep, perms) }) if err == syscall.EEXIST { - return syscall.EADDRINUSE + return nil, syscall.EADDRINUSE } - return err + if err != nil { + return nil, err + } + // Take another ref on inode which will be donated to the new dirent. + inode.IncRef() + return fs.NewDirent(inode, name), nil } // CreateFifo implements fs.InodeOperations.CreateFifo. diff --git a/pkg/sentry/fs/ramfs/ramfs.go b/pkg/sentry/fs/ramfs/ramfs.go index 13e72e775..1028b5f1d 100644 --- a/pkg/sentry/fs/ramfs/ramfs.go +++ b/pkg/sentry/fs/ramfs/ramfs.go @@ -279,8 +279,8 @@ func (*Entry) CreateDirectory(context.Context, *fs.Inode, string, fs.FilePermiss } // Bind is not supported by default. -func (*Entry) Bind(context.Context, *fs.Inode, string, unix.BoundEndpoint, fs.FilePermissions) error { - return ErrInvalidOp +func (*Entry) Bind(context.Context, *fs.Inode, string, unix.BoundEndpoint, fs.FilePermissions) (*fs.Dirent, error) { + return nil, ErrInvalidOp } // CreateFifo implements fs.InodeOperations.CreateFifo. CreateFifo is not supported by diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go index c91091db4..c6f39fce3 100644 --- a/pkg/sentry/fs/tty/dir.go +++ b/pkg/sentry/fs/tty/dir.go @@ -215,8 +215,8 @@ func (d *dirInodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, } // Bind implements fs.InodeOperations.Bind. -func (d *dirInodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data unix.BoundEndpoint, perm fs.FilePermissions) error { - return syserror.EPERM +func (d *dirInodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data unix.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) { + return nil, syserror.EPERM } // GetFile implements fs.InodeOperations.GetFile. diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 5b6411f97..1c22e78b3 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -276,9 +276,11 @@ func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { } // Create the socket. - if err := d.Bind(t, t.FSContext().RootDirectory(), name, bep, fs.FilePermissions{User: fs.PermMask{Read: true}}); err != nil { + childDir, err := d.Bind(t, t.FSContext().RootDirectory(), name, bep, fs.FilePermissions{User: fs.PermMask{Read: true}}) + if err != nil { return tcpip.ErrPortInUse } + childDir.DecRef() } return nil diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 7ec68f573..d4c650892 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -1,6 +1,13 @@ package(licenses = ["notice"]) # Apache 2.0 -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test") + +go_binary( + name = "uds_test_app", + srcs = [ + "uds_test_app.go", + ], +) go_library( name = "container", @@ -29,6 +36,7 @@ go_test( size = "medium", srcs = ["container_test.go"], data = [ + ":uds_test_app", "//runsc", ], tags = [ diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 9e38f5f77..11edcd615 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -15,6 +15,7 @@ package container_test import ( + "bytes" "fmt" "io" "io/ioutil" @@ -108,7 +109,8 @@ func procListToString(pl []*control.Process) string { return fmt.Sprintf("[%s]", strings.Join(strs, ",")) } -// createWriteableOutputFile creates an output file that can be read and written to in the sandbox. +// createWriteableOutputFile creates an output file that can be read and +// written to in the sandbox. func createWriteableOutputFile(path string) (*os.File, error) { outputFile, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) if err != nil { @@ -136,13 +138,19 @@ func waitForFile(f *os.File) error { return testutil.Poll(op, 5*time.Second) } -func readOutputNum(f *os.File, first bool) (int, error) { - // Wait until file has contents. +// readOutputNum reads a file at given filepath and returns the int at the +// requested position. +func readOutputNum(file string, position int) (int, error) { + f, err := os.Open(file) + if err != nil { + return 0, fmt.Errorf("error opening file: %q, %v", file, err) + } + + // Ensure that there is content in output file. if err := waitForFile(f); err != nil { - return 0, err + return 0, fmt.Errorf("error waiting for output file: %v", err) } - // Read the first number in the new file b, err := ioutil.ReadAll(f) if err != nil { return 0, fmt.Errorf("error reading file: %v", err) @@ -151,14 +159,18 @@ func readOutputNum(f *os.File, first bool) (int, error) { return 0, fmt.Errorf("error no content was read") } + // Strip leading null bytes caused by file offset not being 0 upon restore. + b = bytes.Trim(b, "\x00") nums := strings.Split(string(b), "\n") - var num int - if first { - num, err = strconv.Atoi(nums[0]) - } else { - num, err = strconv.Atoi(nums[len(nums)-2]) + if position >= len(nums) { + return 0, fmt.Errorf("position %v is not within the length of content %v", position, nums) + } + if position == -1 { + // Expectation of newline at the end of last position. + position = len(nums) - 2 } + num, err := strconv.Atoi(nums[position]) if err != nil { return 0, fmt.Errorf("error getting number from file: %v", err) } @@ -194,6 +206,27 @@ func run(spec *specs.Spec, conf *boot.Config) error { return nil } +// findUDSApp finds the uds_test_app binary to be used in the UnixDomainSocket test. +func findUDSApp() (string, error) { + // TODO: Use bazel FindBinary function. + + // uds_test_app is in a directory like: + // './linux_amd64_pure_stripped/uds_test_app.go'. + // + // Since I don't want to construct 'linux_amd64_pure_stripped' based on the + // build type, do a quick search for: './*/uds_test_app' + // Note: This glob will only succeed when file is one directory deep. + matches, err := filepath.Glob("./*/uds_test_app") + if err != nil { + return "", fmt.Errorf("error globbing: %v", err) + } + if i := len(matches); i != 1 { + return "", fmt.Errorf("error identifying uds_test_app from matches: got %d matches", i) + } + + return matches[0], nil +} + type configOptions int const ( @@ -204,7 +237,8 @@ const all = overlay | kvm // configs generates different configurations to run tests. func configs(opts configOptions) []*boot.Config { - cs := []*boot.Config{testutil.TestConfig()} + cs := []*boot.Config{testutil.TestConfig(), testutil.TestConfig()} + return cs if opts&overlay != 0 { c := testutil.TestConfig() @@ -544,6 +578,7 @@ func TestCheckpointRestore(t *testing.T) { if err := os.Chmod(dir, 0777); err != nil { t.Fatalf("error chmoding file: %q, %v", dir, err) } + defer os.RemoveAll(dir) outputPath := filepath.Join(dir, "output") outputFile, err := createWriteableOutputFile(outputPath) @@ -598,7 +633,7 @@ func TestCheckpointRestore(t *testing.T) { } defer os.RemoveAll(imagePath) - lastNum, err := readOutputNum(outputFile, false) + lastNum, err := readOutputNum(outputPath, -1) if err != nil { t.Fatalf("error with outputFile: %v", err) } @@ -624,15 +659,22 @@ func TestCheckpointRestore(t *testing.T) { t.Fatalf("error restoring container: %v", err) } - firstNum, err := readOutputNum(outputFile2, true) + // Wait until application has ran. + if err := waitForFile(outputFile2); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } + + firstNum, err := readOutputNum(outputPath, 0) if err != nil { t.Fatalf("error with outputFile: %v", err) } - // Check that lastNum is one less than firstNum and that the container picks up from where it left off. + // Check that lastNum is one less than firstNum and that the container picks + // up from where it left off. if lastNum+1 != firstNum { t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum) } + cont2.Destroy() // Restore into another container! // Delete and recreate file before restoring. @@ -656,15 +698,169 @@ func TestCheckpointRestore(t *testing.T) { t.Fatalf("error restoring container: %v", err) } - firstNum2, err := readOutputNum(outputFile3, true) + // Wait until application has ran. + if err := waitForFile(outputFile3); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } + + firstNum2, err := readOutputNum(outputPath, 0) if err != nil { t.Fatalf("error with outputFile: %v", err) } - // Check that lastNum is one less than firstNum and that the container picks up from where it left off. + // Check that lastNum is one less than firstNum and that the container picks + // up from where it left off. if lastNum+1 != firstNum2 { t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum2) } + cont3.Destroy() + } +} + +// TestUnixDomainSockets checks that Checkpoint/Restore works in cases +// with filesystem Unix Domain Socket use. +func TestUnixDomainSockets(t *testing.T) { + const ( + output = "uds_output" + goferRoot = "/tmp2" + socket = "uds_socket" + ) + + // Skip overlay because test requires writing to host file. + for _, conf := range configs(kvm) { + t.Logf("Running test with conf: %+v", conf) + + dir, err := ioutil.TempDir("", "uds-test") + if err != nil { + t.Fatalf("ioutil.TempDir failed: %v", err) + } + if err := os.Chmod(dir, 0777); err != nil { + t.Fatalf("error chmoding file: %q, %v", dir, err) + } + defer os.RemoveAll(dir) + + outputPath := filepath.Join(dir, output) + + outputFile, err := createWriteableOutputFile(outputPath) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile.Close() + + // Get file path for corresponding output file in sandbox. + outputFileSandbox := filepath.Join(goferRoot, output) + + // Need to get working directory, even though not intuitive. + wd, _ := os.Getwd() + localPath, err := findUDSApp() + if err != nil { + t.Fatalf("error finding localPath: %v", err) + } + app := filepath.Join(wd, localPath) + + if _, err = os.Stat(app); err != nil { + t.Fatalf("error finding the uds_test_app: %v", err) + } + + socketPath := filepath.Join(dir, socket) + socketPathSandbox := filepath.Join(goferRoot, socket) + defer os.Remove(socketPath) + + spec := testutil.NewSpecWithArgs(app, "--file", outputFileSandbox, + "--socket", socketPathSandbox) + + spec.Mounts = append(spec.Mounts, specs.Mount{ + Type: "bind", + Destination: goferRoot, + Source: dir, + }) + + spec.Process.User = specs.User{ + UID: uint32(os.Getuid()), + GID: uint32(os.Getgid()), + } + + rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer os.RemoveAll(rootDir) + defer os.RemoveAll(bundleDir) + + // Create and start the container. + cont, err := container.Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "") + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } + + // Set the image path, the location where the checkpoint image will be saved. + imagePath := filepath.Join(dir, "test-image-file") + + // Create the image file and open for writing. + file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) + if err != nil { + t.Fatalf("error opening new file at imagePath: %v", err) + } + defer file.Close() + defer os.RemoveAll(imagePath) + + // Wait until application has ran. + if err := waitForFile(outputFile); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } + + // Checkpoint running container; save state into new file. + if err := cont.Checkpoint(file); err != nil { + t.Fatalf("error checkpointing container to empty file: %v", err) + } + + // Read last number outputted before checkpoint. + lastNum, err := readOutputNum(outputPath, -1) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } + + // Delete and recreate file before restoring. + if err := os.Remove(outputPath); err != nil { + t.Fatalf("error removing file") + } + outputFile2, err := createWriteableOutputFile(outputPath) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile2.Close() + + // Restore into a new container. + contRestore, err := container.Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "") + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer contRestore.Destroy() + + if err := contRestore.Restore(spec, conf, imagePath); err != nil { + t.Fatalf("error restoring container: %v", err) + } + + // Wait until application has ran. + if err := waitForFile(outputFile2); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } + + // Read first number outputted after restore. + firstNum, err := readOutputNum(outputPath, 0) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } + + // Check that lastNum is one less than firstNum. + if lastNum+1 != firstNum { + t.Errorf("error numbers not consecutive, previous: %d, next: %d", lastNum, firstNum) + } + contRestore.Destroy() } } diff --git a/runsc/container/uds_test_app.go b/runsc/container/uds_test_app.go new file mode 100644 index 000000000..bef98ac66 --- /dev/null +++ b/runsc/container/uds_test_app.go @@ -0,0 +1,83 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Binary uds-test-app opens a socket and reads a series of numbers +// which are then written to an output file. +package main + +import ( + "flag" + "fmt" + "log" + "net" + "os" + "strconv" + "time" +) + +var ( + fileName = flag.String("file", "", "name of output file") + socketPath = flag.String("socket", "", "path to socket") +) + +func server(listener net.Listener, f *os.File) { + buf := make([]byte, 16) + + for { + c, err := listener.Accept() + if err != nil { + log.Fatal("error accepting connection:", err) + } + nr, err := c.Read(buf) + if err != nil { + log.Fatal("error reading from buf:", err) + } + data := buf[0:nr] + fmt.Fprintf(f, string(data)+"\n") + } +} + +func main() { + flag.Parse() + if *fileName == "" || *socketPath == "" { + log.Fatalf("Flags cannot be empty, given: fileName=%s, socketPath=%s", *fileName, *socketPath) + } + outputFile, err := os.OpenFile(*fileName, os.O_WRONLY|os.O_CREATE, 0666) + if err != nil { + log.Fatal("error opening output file:", err) + } + + socket := *socketPath + defer os.Remove(socket) + + listener, err := net.Listen("unix", socket) + if err != nil { + log.Fatal("error listening on socket:", err) + } + + go server(listener, outputFile) + for i := 0; ; i++ { + + conn, err := net.Dial("unix", socket) + if err != nil { + log.Fatal("error dialing:", err) + } + if _, err := conn.Write([]byte(strconv.Itoa(i))); err != nil { + log.Fatal("error writing:", err) + } + conn.Close() + time.Sleep(100 * time.Millisecond) + } + +} |