summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/sentry/fs/dirent.go53
-rw-r--r--pkg/sentry/fs/fsutil/inode.go4
-rw-r--r--pkg/sentry/fs/gofer/gofer_test.go2
-rw-r--r--pkg/sentry/fs/gofer/path.go37
-rw-r--r--pkg/sentry/fs/gofer/session.go149
-rw-r--r--pkg/sentry/fs/gofer/session_state.go26
-rw-r--r--pkg/sentry/fs/gofer/socket.go2
-rw-r--r--pkg/sentry/fs/host/inode.go4
-rw-r--r--pkg/sentry/fs/inode.go2
-rw-r--r--pkg/sentry/fs/inode_operations.go2
-rw-r--r--pkg/sentry/fs/inode_overlay.go4
-rw-r--r--pkg/sentry/fs/ramfs/dir.go15
-rw-r--r--pkg/sentry/fs/ramfs/ramfs.go4
-rw-r--r--pkg/sentry/fs/tty/dir.go4
-rw-r--r--pkg/sentry/socket/unix/unix.go4
-rw-r--r--runsc/container/BUILD10
-rw-r--r--runsc/container/container_test.go228
-rw-r--r--runsc/container/uds_test_app.go83
18 files changed, 541 insertions, 92 deletions
diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go
index 4658d044f..821cc5789 100644
--- a/pkg/sentry/fs/dirent.go
+++ b/pkg/sentry/fs/dirent.go
@@ -385,6 +385,19 @@ func (d *Dirent) fullName(root *Dirent) (string, bool) {
return s, reachable
}
+// MountRoot finds and returns the mount-root for a given dirent.
+func (d *Dirent) MountRoot() *Dirent {
+ renameMu.RLock()
+ defer renameMu.RUnlock()
+
+ mountRoot := d
+ for !mountRoot.mounted && mountRoot.parent != nil {
+ mountRoot = mountRoot.parent
+ }
+ mountRoot.IncRef()
+ return mountRoot
+}
+
func (d *Dirent) freeze() {
if d.frozen {
// Already frozen.
@@ -665,6 +678,16 @@ func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags Fi
}
child := file.Dirent
+ d.finishCreate(child, name)
+
+ // Return the reference and the new file. When the last reference to
+ // the file is dropped, file.Dirent may no longer be cached.
+ return file, nil
+}
+
+// finishCreate validates the created file, adds it as a child of this dirent,
+// and notifies any watchers.
+func (d *Dirent) finishCreate(child *Dirent, name string) {
// Sanity check c, its name must be consistent.
if child.name != name {
panic(fmt.Sprintf("create from %q to %q returned unexpected name %q", d.name, name, child.name))
@@ -697,10 +720,6 @@ func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags Fi
// Allow the file system to take extra references on c.
child.maybeExtendReference()
-
- // Return the reference and the new file. When the last reference to
- // the file is dropped, file.Dirent may no longer be cached.
- return file, nil
}
// genericCreate executes create if name does not exist. Removes a negative Dirent at name if
@@ -718,11 +737,6 @@ func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, c
return syscall.ENOENT
}
- // Execute the create operation.
- if err := create(); err != nil {
- return err
- }
-
// Remove any negative Dirent. We've already asserted above with d.exists
// that the only thing remaining here can be a negative Dirent.
if w, ok := d.children[name]; ok {
@@ -745,7 +759,8 @@ func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, c
w.Drop()
}
- return nil
+ // Execute the create operation.
+ return create()
}
// CreateLink creates a new link in this directory.
@@ -797,23 +812,29 @@ func (d *Dirent) CreateDirectory(ctx context.Context, root *Dirent, name string,
}
// Bind satisfies the InodeOperations interface; otherwise same as GetFile.
-func (d *Dirent) Bind(ctx context.Context, root *Dirent, name string, socket unix.BoundEndpoint, perms FilePermissions) error {
+func (d *Dirent) Bind(ctx context.Context, root *Dirent, name string, data unix.BoundEndpoint, perms FilePermissions) (*Dirent, error) {
d.dirMu.Lock()
defer d.dirMu.Unlock()
d.mu.Lock()
defer d.mu.Unlock()
+ var childDir *Dirent
err := d.genericCreate(ctx, root, name, func() error {
- if err := d.Inode.Bind(ctx, name, socket, perms); err != nil {
- return err
+ var e error
+ childDir, e = d.Inode.Bind(ctx, name, data, perms)
+ if e != nil {
+ return e
}
- d.Inode.Watches.Notify(name, linux.IN_CREATE, 0)
+ d.finishCreate(childDir, name)
return nil
})
if err == syscall.EEXIST {
- return syscall.EADDRINUSE
+ return nil, syscall.EADDRINUSE
+ }
+ if err != nil {
+ return nil, err
}
- return err
+ return childDir, err
}
// CreateFifo creates a new named pipe under this dirent.
diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go
index 177396fdc..3479f2fad 100644
--- a/pkg/sentry/fs/fsutil/inode.go
+++ b/pkg/sentry/fs/fsutil/inode.go
@@ -254,8 +254,8 @@ func (InodeNotDirectory) CreateDirectory(context.Context, *fs.Inode, string, fs.
}
// Bind implements fs.InodeOperations.Bind.
-func (InodeNotDirectory) Bind(context.Context, *fs.Inode, string, unix.BoundEndpoint, fs.FilePermissions) error {
- return syserror.ENOTDIR
+func (InodeNotDirectory) Bind(context.Context, *fs.Inode, string, unix.BoundEndpoint, fs.FilePermissions) (*fs.Dirent, error) {
+ return nil, syserror.ENOTDIR
}
// CreateFifo implements fs.InodeOperations.CreateFifo.
diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go
index 764b530cb..45fdaacfd 100644
--- a/pkg/sentry/fs/gofer/gofer_test.go
+++ b/pkg/sentry/fs/gofer/gofer_test.go
@@ -74,7 +74,7 @@ func root(ctx context.Context, cp cachePolicy, mode p9.FileMode, size uint64) (*
}
rootFile := goodMockFile(mode, size)
- sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{file: rootFile}, p9.QID{}, rootFile.GetAttrMock.Valid, rootFile.GetAttrMock.Attr)
+ sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{file: rootFile}, p9.QID{}, rootFile.GetAttrMock.Valid, rootFile.GetAttrMock.Attr, false /* socket */)
m := fs.NewMountSource(s, &filesystem{}, fs.MountSourceFlags{})
return rootFile, fs.NewInode(rootInodeOperations, m, sattr), nil
}
diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go
index bfeab3833..15e9863fb 100644
--- a/pkg/sentry/fs/gofer/path.go
+++ b/pkg/sentry/fs/gofer/path.go
@@ -57,7 +57,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string
}
// Construct the Inode operations.
- sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr)
+ sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr, false)
// Construct a positive Dirent.
return fs.NewDirent(fs.NewInode(node, dir.MountSource, sattr), name), nil
@@ -113,7 +113,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string
}
// Construct the InodeOperations.
- sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr)
+ sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr, false)
// Construct the positive Dirent.
d := fs.NewDirent(fs.NewInode(iops, dir.MountSource, sattr), name)
@@ -175,10 +175,10 @@ func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, s
return nil
}
-// Bind implements InodeOperations.
-func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, ep unix.BoundEndpoint, perm fs.FilePermissions) error {
+// Bind implements InodeOperations.Bind.
+func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, ep unix.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) {
if i.session().endpoints == nil {
- return syscall.EOPNOTSUPP
+ return nil, syscall.EOPNOTSUPP
}
// Create replaces the directory fid with the newly created/opened
@@ -186,7 +186,7 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string,
// this node.
_, newFile, err := i.fileState.file.walk(ctx, nil)
if err != nil {
- return err
+ return nil, err
}
// Stabilize the endpoint map while creation is in progress.
@@ -198,7 +198,7 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string,
owner := fs.FileOwnerFromContext(ctx)
hostFile, err := newFile.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID))
if err != nil {
- return err
+ return nil, err
}
// We're not going to use this file.
hostFile.Close()
@@ -206,10 +206,10 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string,
i.touchModificationTime(ctx, dir)
// Get the attributes of the file to create inode key.
- qid, _, attr, err := getattr(ctx, newFile)
+ qid, mask, attr, err := getattr(ctx, newFile)
if err != nil {
newFile.close(ctx)
- return err
+ return nil, err
}
key := device.MultiDeviceKey{
@@ -217,9 +217,24 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string,
SecondaryDevice: i.session().connID,
Inode: qid.Path,
}
- i.session().endpoints.add(key, ep)
- return nil
+ // Create child dirent.
+
+ // Get an unopened p9.File for the file we created so that it can be
+ // cloned and re-opened multiple times after creation.
+ _, unopened, err := i.fileState.file.walk(ctx, []string{name})
+ if err != nil {
+ newFile.close(ctx)
+ return nil, err
+ }
+
+ // Construct the InodeOperations.
+ sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr, true)
+
+ // Construct the positive Dirent.
+ childDir := fs.NewDirent(fs.NewInode(iops, dir.MountSource, sattr), name)
+ i.session().endpoints.add(key, childDir, ep)
+ return childDir, nil
}
// CreateFifo implements fs.InodeOperations.CreateFifo. Gofer nodes do not support the
diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go
index 648a11435..bfb1154dc 100644
--- a/pkg/sentry/fs/gofer/session.go
+++ b/pkg/sentry/fs/gofer/session.go
@@ -15,6 +15,7 @@
package gofer
import (
+ "fmt"
"sync"
"gvisor.googlesource.com/gvisor/pkg/p9"
@@ -28,39 +29,60 @@ import (
)
// +stateify savable
-type endpointMap struct {
+type endpointMaps struct {
+ // mu protexts the direntMap, the keyMap, and the pathMap below.
mu sync.RWMutex `state:"nosave"`
- // TODO: Make map with private unix sockets savable.
- m map[device.MultiDeviceKey]unix.BoundEndpoint
+
+ // direntMap links sockets to their dirents.
+ // It is filled concurrently with the keyMap and is stored upon save.
+ // Before saving, this map is used to populate the pathMap.
+ direntMap map[unix.BoundEndpoint]*fs.Dirent
+
+ // keyMap links MultiDeviceKeys (containing inode IDs) to their sockets.
+ // It is not stored during save because the inode ID may change upon restore.
+ keyMap map[device.MultiDeviceKey]unix.BoundEndpoint `state:"nosave"`
+
+ // pathMap links the sockets to their paths.
+ // It is filled before saving from the direntMap and is stored upon save.
+ // Upon restore, this map is used to re-populate the keyMap.
+ pathMap map[unix.BoundEndpoint]string
}
-// add adds the endpoint to the map.
+// add adds the endpoint to the maps.
+// A reference is taken on the dirent argument.
//
-// Precondition: map must have been locked with 'lock'.
-func (e *endpointMap) add(key device.MultiDeviceKey, ep unix.BoundEndpoint) {
- e.m[key] = ep
+// Precondition: maps must have been locked with 'lock'.
+func (e *endpointMaps) add(key device.MultiDeviceKey, d *fs.Dirent, ep unix.BoundEndpoint) {
+ e.keyMap[key] = ep
+ d.IncRef()
+ e.direntMap[ep] = d
}
-// remove deletes the key from the map.
+// remove deletes the key from the maps.
//
-// Precondition: map must have been locked with 'lock'.
-func (e *endpointMap) remove(key device.MultiDeviceKey) {
- delete(e.m, key)
+// Precondition: maps must have been locked with 'lock'.
+func (e *endpointMaps) remove(key device.MultiDeviceKey) {
+ endpoint := e.get(key)
+ delete(e.keyMap, key)
+
+ d := e.direntMap[endpoint]
+ d.DecRef()
+ delete(e.direntMap, endpoint)
}
// lock blocks other addition and removal operations from happening while
// the backing file is being created or deleted. Returns a function that unlocks
// the endpoint map.
-func (e *endpointMap) lock() func() {
+func (e *endpointMaps) lock() func() {
e.mu.Lock()
return func() { e.mu.Unlock() }
}
-func (e *endpointMap) get(key device.MultiDeviceKey) unix.BoundEndpoint {
- e.mu.RLock()
- ep := e.m[key]
- e.mu.RUnlock()
- return ep
+// get returns the endpoint mapped to the given key.
+//
+// Precondition: maps must have been locked for reading.
+func (e *endpointMaps) get(key device.MultiDeviceKey) unix.BoundEndpoint {
+ return e.keyMap[key]
}
// session holds state for each 9p session established during sys_mount.
@@ -115,7 +137,7 @@ type session struct {
// TODO: there are few possible races with someone stat'ing the
// file and another deleting it concurrently, where the file will not be
// reported as socket file.
- endpoints *endpointMap `state:"wait"`
+ endpoints *endpointMaps `state:"wait"`
}
// Destroy tears down the session.
@@ -149,7 +171,9 @@ func (s *session) SaveInodeMapping(inode *fs.Inode, path string) {
// newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File and attributes
// (p9.QID, p9.AttrMask, p9.Attr).
-func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr) (fs.StableAttr, *inodeOperations) {
+//
+// Endpoints lock must not be held if socket == false.
+func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr, socket bool) (fs.StableAttr, *inodeOperations) {
deviceKey := device.MultiDeviceKey{
Device: attr.RDev,
SecondaryDevice: s.connID,
@@ -164,10 +188,16 @@ func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p
}
if s.endpoints != nil {
- // If unix sockets are allowed on this filesystem, check if this file is
- // supposed to be a socket file.
- if s.endpoints.get(deviceKey) != nil {
+ if socket {
sattr.Type = fs.Socket
+ } else {
+ // If unix sockets are allowed on this filesystem, check if this file is
+ // supposed to be a socket file.
+ unlock := s.endpoints.lock()
+ if s.endpoints.get(deviceKey) != nil {
+ sattr.Type = fs.Socket
+ }
+ unlock()
}
}
@@ -215,7 +245,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF
}
if o.privateunixsocket {
- s.endpoints = &endpointMap{m: make(map[device.MultiDeviceKey]unix.BoundEndpoint)}
+ s.endpoints = newEndpointMaps()
}
// Construct the MountSource with the session and superBlockFlags.
@@ -248,6 +278,77 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF
return nil, err
}
- sattr, iops := newInodeOperations(ctx, s, s.attach, qid, valid, attr)
+ sattr, iops := newInodeOperations(ctx, s, s.attach, qid, valid, attr, false)
return fs.NewInode(iops, m, sattr), nil
}
+
+// newEndpointMaps creates a new endpointMaps.
+func newEndpointMaps() *endpointMaps {
+ return &endpointMaps{
+ direntMap: make(map[unix.BoundEndpoint]*fs.Dirent),
+ keyMap: make(map[device.MultiDeviceKey]unix.BoundEndpoint),
+ pathMap: make(map[unix.BoundEndpoint]string),
+ }
+}
+
+// fillKeyMap populates key and dirent maps upon restore from saved
+// pathmap.
+func (s *session) fillKeyMap(ctx context.Context) error {
+ unlock := s.endpoints.lock()
+ defer unlock()
+
+ for ep, dirPath := range s.endpoints.pathMap {
+ _, file, err := s.attach.walk(ctx, splitAbsolutePath(dirPath))
+ if err != nil {
+ return fmt.Errorf("error filling endpointmaps, failed to walk to %q: %v", dirPath, err)
+ }
+
+ qid, _, attr, err := file.getAttr(ctx, p9.AttrMaskAll())
+ if err != nil {
+ return fmt.Errorf("failed to get file attributes of %s: %v", dirPath, err)
+ }
+
+ key := device.MultiDeviceKey{
+ Device: attr.RDev,
+ SecondaryDevice: s.connID,
+ Inode: qid.Path,
+ }
+
+ s.endpoints.keyMap[key] = ep
+ }
+ return nil
+}
+
+// fillPathMap populates paths for endpoints from dirents in direntMap
+// before save.
+func (s *session) fillPathMap() error {
+ unlock := s.endpoints.lock()
+ defer unlock()
+
+ for ep, dir := range s.endpoints.direntMap {
+ mountRoot := dir.MountRoot()
+ defer mountRoot.DecRef()
+ dirPath, _ := dir.FullName(mountRoot)
+ if dirPath == "" {
+ return fmt.Errorf("error getting path from dirent")
+ }
+ s.endpoints.pathMap[ep] = dirPath
+ }
+ return nil
+}
+
+// restoreEndpointMaps recreates and fills the key and dirent maps.
+func (s *session) restoreEndpointMaps(ctx context.Context) error {
+ // When restoring, only need to create the keyMap because the dirent and path
+ // maps got stored through the save.
+ s.endpoints.keyMap = make(map[device.MultiDeviceKey]unix.BoundEndpoint)
+ if err := s.fillKeyMap(ctx); err != nil {
+ return fmt.Errorf("failed to insert sockets into endpoint map: %v", err)
+ }
+
+ // Re-create pathMap because it can no longer be trusted as socket paths can
+ // change while process continues to run. Empty pathMap will be re-filled upon
+ // next save.
+ s.endpoints.pathMap = make(map[unix.BoundEndpoint]string)
+ return nil
+}
diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go
index 0154810c8..8e6424492 100644
--- a/pkg/sentry/fs/gofer/session_state.go
+++ b/pkg/sentry/fs/gofer/session_state.go
@@ -18,16 +18,17 @@ import (
"fmt"
"gvisor.googlesource.com/gvisor/pkg/p9"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
"gvisor.googlesource.com/gvisor/pkg/unet"
)
// beforeSave is invoked by stateify.
-//
-// TODO: Make map with private unix sockets savable.
-func (e *endpointMap) beforeSave() {
- if len(e.m) != 0 {
- panic("EndpointMap with existing private unix sockets cannot be saved")
+func (s *session) beforeSave() {
+ if s.endpoints != nil {
+ if err := s.fillPathMap(); err != nil {
+ panic("failed to save paths to endpoint map before saving" + err.Error())
+ }
}
}
@@ -72,6 +73,9 @@ func (s *session) afterLoad() {
if opts.aname != s.aname {
panic(fmt.Sprintf("new attach name %v, want %v", opts.aname, s.aname))
}
+
+ // Check if endpointMaps exist when uds sockets are enabled
+ // (only pathmap will actualy have been saved).
if opts.privateunixsocket != (s.endpoints != nil) {
panic(fmt.Sprintf("new privateunixsocket option %v, want %v", opts.privateunixsocket, s.endpoints != nil))
}
@@ -96,4 +100,16 @@ func (s *session) afterLoad() {
if err != nil {
panic(fmt.Sprintf("failed to attach to aname: %v", err))
}
+
+ // If private unix sockets are enabled, create and fill the session's endpoint
+ // maps.
+ if opts.privateunixsocket {
+ // TODO: Context is not plumbed to save/restore.
+ ctx := &dummyClockContext{context.Background()}
+
+ if err = s.restoreEndpointMaps(ctx); err != nil {
+ panic("failed to restore endpoint maps: " + err.Error())
+ }
+ }
+
}
diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go
index 406756f5f..8628b9c69 100644
--- a/pkg/sentry/fs/gofer/socket.go
+++ b/pkg/sentry/fs/gofer/socket.go
@@ -30,6 +30,8 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) unix.Bound
}
if i.session().endpoints != nil {
+ unlock := i.session().endpoints.lock()
+ defer unlock()
ep := i.session().endpoints.get(i.fileState.key)
if ep != nil {
return ep
diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go
index 66c17debb..e7254fa7d 100644
--- a/pkg/sentry/fs/host/inode.go
+++ b/pkg/sentry/fs/host/inode.go
@@ -310,8 +310,8 @@ func (i *inodeOperations) Rename(ctx context.Context, oldParent *fs.Inode, oldNa
}
// Bind implements fs.InodeOperations.Bind.
-func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data unix.BoundEndpoint, perm fs.FilePermissions) error {
- return syserror.EOPNOTSUPP
+func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data unix.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) {
+ return nil, syserror.EOPNOTSUPP
}
// BoundEndpoint implements fs.InodeOperations.BoundEndpoint.
diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go
index d0dbce5dd..db7240dca 100644
--- a/pkg/sentry/fs/inode.go
+++ b/pkg/sentry/fs/inode.go
@@ -223,7 +223,7 @@ func (i *Inode) Rename(ctx context.Context, oldParent *Dirent, renamed *Dirent,
}
// Bind calls i.InodeOperations.Bind with i as the directory.
-func (i *Inode) Bind(ctx context.Context, name string, data unix.BoundEndpoint, perm FilePermissions) error {
+func (i *Inode) Bind(ctx context.Context, name string, data unix.BoundEndpoint, perm FilePermissions) (*Dirent, error) {
if i.overlay != nil {
return overlayBind(ctx, i.overlay, name, data, perm)
}
diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go
index b33980178..952f9704d 100644
--- a/pkg/sentry/fs/inode_operations.go
+++ b/pkg/sentry/fs/inode_operations.go
@@ -146,7 +146,7 @@ type InodeOperations interface {
// Implementations must ensure that name does not already exist.
//
// The caller must ensure that this operation is permitted.
- Bind(ctx context.Context, dir *Inode, name string, data unix.BoundEndpoint, perm FilePermissions) error
+ Bind(ctx context.Context, dir *Inode, name string, data unix.BoundEndpoint, perm FilePermissions) (*Dirent, error)
// BoundEndpoint returns the socket endpoint at path stored in
// or generated by an Inode.
diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go
index 53fbd1481..543db9ac7 100644
--- a/pkg/sentry/fs/inode_overlay.go
+++ b/pkg/sentry/fs/inode_overlay.go
@@ -334,13 +334,13 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena
return nil
}
-func overlayBind(ctx context.Context, o *overlayEntry, name string, data unix.BoundEndpoint, perm FilePermissions) error {
+func overlayBind(ctx context.Context, o *overlayEntry, name string, data unix.BoundEndpoint, perm FilePermissions) (*Dirent, error) {
o.copyMu.RLock()
defer o.copyMu.RUnlock()
// We do not support doing anything exciting with sockets unless there
// is already a directory in the upper filesystem.
if o.upper == nil {
- return syserror.EOPNOTSUPP
+ return nil, syserror.EOPNOTSUPP
}
return o.upper.InodeOperations.Bind(ctx, o.upper, name, data, perm)
}
diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go
index 04432f28c..d8333194b 100644
--- a/pkg/sentry/fs/ramfs/dir.go
+++ b/pkg/sentry/fs/ramfs/dir.go
@@ -314,17 +314,22 @@ func (d *Dir) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, p
}
// Bind implements fs.InodeOperations.Bind.
-func (d *Dir) Bind(ctx context.Context, dir *fs.Inode, name string, ep unix.BoundEndpoint, perms fs.FilePermissions) error {
+func (d *Dir) Bind(ctx context.Context, dir *fs.Inode, name string, ep unix.BoundEndpoint, perms fs.FilePermissions) (*fs.Dirent, error) {
if d.CreateOps == nil || d.CreateOps.NewBoundEndpoint == nil {
- return ErrDenied
+ return nil, ErrDenied
}
- _, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) {
+ inode, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) {
return d.NewBoundEndpoint(ctx, dir, ep, perms)
})
if err == syscall.EEXIST {
- return syscall.EADDRINUSE
+ return nil, syscall.EADDRINUSE
}
- return err
+ if err != nil {
+ return nil, err
+ }
+ // Take another ref on inode which will be donated to the new dirent.
+ inode.IncRef()
+ return fs.NewDirent(inode, name), nil
}
// CreateFifo implements fs.InodeOperations.CreateFifo.
diff --git a/pkg/sentry/fs/ramfs/ramfs.go b/pkg/sentry/fs/ramfs/ramfs.go
index 13e72e775..1028b5f1d 100644
--- a/pkg/sentry/fs/ramfs/ramfs.go
+++ b/pkg/sentry/fs/ramfs/ramfs.go
@@ -279,8 +279,8 @@ func (*Entry) CreateDirectory(context.Context, *fs.Inode, string, fs.FilePermiss
}
// Bind is not supported by default.
-func (*Entry) Bind(context.Context, *fs.Inode, string, unix.BoundEndpoint, fs.FilePermissions) error {
- return ErrInvalidOp
+func (*Entry) Bind(context.Context, *fs.Inode, string, unix.BoundEndpoint, fs.FilePermissions) (*fs.Dirent, error) {
+ return nil, ErrInvalidOp
}
// CreateFifo implements fs.InodeOperations.CreateFifo. CreateFifo is not supported by
diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go
index c91091db4..c6f39fce3 100644
--- a/pkg/sentry/fs/tty/dir.go
+++ b/pkg/sentry/fs/tty/dir.go
@@ -215,8 +215,8 @@ func (d *dirInodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode,
}
// Bind implements fs.InodeOperations.Bind.
-func (d *dirInodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data unix.BoundEndpoint, perm fs.FilePermissions) error {
- return syserror.EPERM
+func (d *dirInodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data unix.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) {
+ return nil, syserror.EPERM
}
// GetFile implements fs.InodeOperations.GetFile.
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index 5b6411f97..1c22e78b3 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -276,9 +276,11 @@ func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
}
// Create the socket.
- if err := d.Bind(t, t.FSContext().RootDirectory(), name, bep, fs.FilePermissions{User: fs.PermMask{Read: true}}); err != nil {
+ childDir, err := d.Bind(t, t.FSContext().RootDirectory(), name, bep, fs.FilePermissions{User: fs.PermMask{Read: true}})
+ if err != nil {
return tcpip.ErrPortInUse
}
+ childDir.DecRef()
}
return nil
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 7ec68f573..d4c650892 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -1,6 +1,13 @@
package(licenses = ["notice"]) # Apache 2.0
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test")
+
+go_binary(
+ name = "uds_test_app",
+ srcs = [
+ "uds_test_app.go",
+ ],
+)
go_library(
name = "container",
@@ -29,6 +36,7 @@ go_test(
size = "medium",
srcs = ["container_test.go"],
data = [
+ ":uds_test_app",
"//runsc",
],
tags = [
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 9e38f5f77..11edcd615 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -15,6 +15,7 @@
package container_test
import (
+ "bytes"
"fmt"
"io"
"io/ioutil"
@@ -108,7 +109,8 @@ func procListToString(pl []*control.Process) string {
return fmt.Sprintf("[%s]", strings.Join(strs, ","))
}
-// createWriteableOutputFile creates an output file that can be read and written to in the sandbox.
+// createWriteableOutputFile creates an output file that can be read and
+// written to in the sandbox.
func createWriteableOutputFile(path string) (*os.File, error) {
outputFile, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666)
if err != nil {
@@ -136,13 +138,19 @@ func waitForFile(f *os.File) error {
return testutil.Poll(op, 5*time.Second)
}
-func readOutputNum(f *os.File, first bool) (int, error) {
- // Wait until file has contents.
+// readOutputNum reads a file at given filepath and returns the int at the
+// requested position.
+func readOutputNum(file string, position int) (int, error) {
+ f, err := os.Open(file)
+ if err != nil {
+ return 0, fmt.Errorf("error opening file: %q, %v", file, err)
+ }
+
+ // Ensure that there is content in output file.
if err := waitForFile(f); err != nil {
- return 0, err
+ return 0, fmt.Errorf("error waiting for output file: %v", err)
}
- // Read the first number in the new file
b, err := ioutil.ReadAll(f)
if err != nil {
return 0, fmt.Errorf("error reading file: %v", err)
@@ -151,14 +159,18 @@ func readOutputNum(f *os.File, first bool) (int, error) {
return 0, fmt.Errorf("error no content was read")
}
+ // Strip leading null bytes caused by file offset not being 0 upon restore.
+ b = bytes.Trim(b, "\x00")
nums := strings.Split(string(b), "\n")
- var num int
- if first {
- num, err = strconv.Atoi(nums[0])
- } else {
- num, err = strconv.Atoi(nums[len(nums)-2])
+ if position >= len(nums) {
+ return 0, fmt.Errorf("position %v is not within the length of content %v", position, nums)
+ }
+ if position == -1 {
+ // Expectation of newline at the end of last position.
+ position = len(nums) - 2
}
+ num, err := strconv.Atoi(nums[position])
if err != nil {
return 0, fmt.Errorf("error getting number from file: %v", err)
}
@@ -194,6 +206,27 @@ func run(spec *specs.Spec, conf *boot.Config) error {
return nil
}
+// findUDSApp finds the uds_test_app binary to be used in the UnixDomainSocket test.
+func findUDSApp() (string, error) {
+ // TODO: Use bazel FindBinary function.
+
+ // uds_test_app is in a directory like:
+ // './linux_amd64_pure_stripped/uds_test_app.go'.
+ //
+ // Since I don't want to construct 'linux_amd64_pure_stripped' based on the
+ // build type, do a quick search for: './*/uds_test_app'
+ // Note: This glob will only succeed when file is one directory deep.
+ matches, err := filepath.Glob("./*/uds_test_app")
+ if err != nil {
+ return "", fmt.Errorf("error globbing: %v", err)
+ }
+ if i := len(matches); i != 1 {
+ return "", fmt.Errorf("error identifying uds_test_app from matches: got %d matches", i)
+ }
+
+ return matches[0], nil
+}
+
type configOptions int
const (
@@ -204,7 +237,8 @@ const all = overlay | kvm
// configs generates different configurations to run tests.
func configs(opts configOptions) []*boot.Config {
- cs := []*boot.Config{testutil.TestConfig()}
+ cs := []*boot.Config{testutil.TestConfig(), testutil.TestConfig()}
+ return cs
if opts&overlay != 0 {
c := testutil.TestConfig()
@@ -544,6 +578,7 @@ func TestCheckpointRestore(t *testing.T) {
if err := os.Chmod(dir, 0777); err != nil {
t.Fatalf("error chmoding file: %q, %v", dir, err)
}
+ defer os.RemoveAll(dir)
outputPath := filepath.Join(dir, "output")
outputFile, err := createWriteableOutputFile(outputPath)
@@ -598,7 +633,7 @@ func TestCheckpointRestore(t *testing.T) {
}
defer os.RemoveAll(imagePath)
- lastNum, err := readOutputNum(outputFile, false)
+ lastNum, err := readOutputNum(outputPath, -1)
if err != nil {
t.Fatalf("error with outputFile: %v", err)
}
@@ -624,15 +659,22 @@ func TestCheckpointRestore(t *testing.T) {
t.Fatalf("error restoring container: %v", err)
}
- firstNum, err := readOutputNum(outputFile2, true)
+ // Wait until application has ran.
+ if err := waitForFile(outputFile2); err != nil {
+ t.Fatalf("Failed to wait for output file: %v", err)
+ }
+
+ firstNum, err := readOutputNum(outputPath, 0)
if err != nil {
t.Fatalf("error with outputFile: %v", err)
}
- // Check that lastNum is one less than firstNum and that the container picks up from where it left off.
+ // Check that lastNum is one less than firstNum and that the container picks
+ // up from where it left off.
if lastNum+1 != firstNum {
t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum)
}
+ cont2.Destroy()
// Restore into another container!
// Delete and recreate file before restoring.
@@ -656,15 +698,169 @@ func TestCheckpointRestore(t *testing.T) {
t.Fatalf("error restoring container: %v", err)
}
- firstNum2, err := readOutputNum(outputFile3, true)
+ // Wait until application has ran.
+ if err := waitForFile(outputFile3); err != nil {
+ t.Fatalf("Failed to wait for output file: %v", err)
+ }
+
+ firstNum2, err := readOutputNum(outputPath, 0)
if err != nil {
t.Fatalf("error with outputFile: %v", err)
}
- // Check that lastNum is one less than firstNum and that the container picks up from where it left off.
+ // Check that lastNum is one less than firstNum and that the container picks
+ // up from where it left off.
if lastNum+1 != firstNum2 {
t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum2)
}
+ cont3.Destroy()
+ }
+}
+
+// TestUnixDomainSockets checks that Checkpoint/Restore works in cases
+// with filesystem Unix Domain Socket use.
+func TestUnixDomainSockets(t *testing.T) {
+ const (
+ output = "uds_output"
+ goferRoot = "/tmp2"
+ socket = "uds_socket"
+ )
+
+ // Skip overlay because test requires writing to host file.
+ for _, conf := range configs(kvm) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ dir, err := ioutil.TempDir("", "uds-test")
+ if err != nil {
+ t.Fatalf("ioutil.TempDir failed: %v", err)
+ }
+ if err := os.Chmod(dir, 0777); err != nil {
+ t.Fatalf("error chmoding file: %q, %v", dir, err)
+ }
+ defer os.RemoveAll(dir)
+
+ outputPath := filepath.Join(dir, output)
+
+ outputFile, err := createWriteableOutputFile(outputPath)
+ if err != nil {
+ t.Fatalf("error creating output file: %v", err)
+ }
+ defer outputFile.Close()
+
+ // Get file path for corresponding output file in sandbox.
+ outputFileSandbox := filepath.Join(goferRoot, output)
+
+ // Need to get working directory, even though not intuitive.
+ wd, _ := os.Getwd()
+ localPath, err := findUDSApp()
+ if err != nil {
+ t.Fatalf("error finding localPath: %v", err)
+ }
+ app := filepath.Join(wd, localPath)
+
+ if _, err = os.Stat(app); err != nil {
+ t.Fatalf("error finding the uds_test_app: %v", err)
+ }
+
+ socketPath := filepath.Join(dir, socket)
+ socketPathSandbox := filepath.Join(goferRoot, socket)
+ defer os.Remove(socketPath)
+
+ spec := testutil.NewSpecWithArgs(app, "--file", outputFileSandbox,
+ "--socket", socketPathSandbox)
+
+ spec.Mounts = append(spec.Mounts, specs.Mount{
+ Type: "bind",
+ Destination: goferRoot,
+ Source: dir,
+ })
+
+ spec.Process.User = specs.User{
+ UID: uint32(os.Getuid()),
+ GID: uint32(os.Getgid()),
+ }
+
+ rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ // Create and start the container.
+ cont, err := container.Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "")
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer cont.Destroy()
+ if err := cont.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
+
+ // Set the image path, the location where the checkpoint image will be saved.
+ imagePath := filepath.Join(dir, "test-image-file")
+
+ // Create the image file and open for writing.
+ file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644)
+ if err != nil {
+ t.Fatalf("error opening new file at imagePath: %v", err)
+ }
+ defer file.Close()
+ defer os.RemoveAll(imagePath)
+
+ // Wait until application has ran.
+ if err := waitForFile(outputFile); err != nil {
+ t.Fatalf("Failed to wait for output file: %v", err)
+ }
+
+ // Checkpoint running container; save state into new file.
+ if err := cont.Checkpoint(file); err != nil {
+ t.Fatalf("error checkpointing container to empty file: %v", err)
+ }
+
+ // Read last number outputted before checkpoint.
+ lastNum, err := readOutputNum(outputPath, -1)
+ if err != nil {
+ t.Fatalf("error with outputFile: %v", err)
+ }
+
+ // Delete and recreate file before restoring.
+ if err := os.Remove(outputPath); err != nil {
+ t.Fatalf("error removing file")
+ }
+ outputFile2, err := createWriteableOutputFile(outputPath)
+ if err != nil {
+ t.Fatalf("error creating output file: %v", err)
+ }
+ defer outputFile2.Close()
+
+ // Restore into a new container.
+ contRestore, err := container.Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "")
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer contRestore.Destroy()
+
+ if err := contRestore.Restore(spec, conf, imagePath); err != nil {
+ t.Fatalf("error restoring container: %v", err)
+ }
+
+ // Wait until application has ran.
+ if err := waitForFile(outputFile2); err != nil {
+ t.Fatalf("Failed to wait for output file: %v", err)
+ }
+
+ // Read first number outputted after restore.
+ firstNum, err := readOutputNum(outputPath, 0)
+ if err != nil {
+ t.Fatalf("error with outputFile: %v", err)
+ }
+
+ // Check that lastNum is one less than firstNum.
+ if lastNum+1 != firstNum {
+ t.Errorf("error numbers not consecutive, previous: %d, next: %d", lastNum, firstNum)
+ }
+ contRestore.Destroy()
}
}
diff --git a/runsc/container/uds_test_app.go b/runsc/container/uds_test_app.go
new file mode 100644
index 000000000..bef98ac66
--- /dev/null
+++ b/runsc/container/uds_test_app.go
@@ -0,0 +1,83 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Binary uds-test-app opens a socket and reads a series of numbers
+// which are then written to an output file.
+package main
+
+import (
+ "flag"
+ "fmt"
+ "log"
+ "net"
+ "os"
+ "strconv"
+ "time"
+)
+
+var (
+ fileName = flag.String("file", "", "name of output file")
+ socketPath = flag.String("socket", "", "path to socket")
+)
+
+func server(listener net.Listener, f *os.File) {
+ buf := make([]byte, 16)
+
+ for {
+ c, err := listener.Accept()
+ if err != nil {
+ log.Fatal("error accepting connection:", err)
+ }
+ nr, err := c.Read(buf)
+ if err != nil {
+ log.Fatal("error reading from buf:", err)
+ }
+ data := buf[0:nr]
+ fmt.Fprintf(f, string(data)+"\n")
+ }
+}
+
+func main() {
+ flag.Parse()
+ if *fileName == "" || *socketPath == "" {
+ log.Fatalf("Flags cannot be empty, given: fileName=%s, socketPath=%s", *fileName, *socketPath)
+ }
+ outputFile, err := os.OpenFile(*fileName, os.O_WRONLY|os.O_CREATE, 0666)
+ if err != nil {
+ log.Fatal("error opening output file:", err)
+ }
+
+ socket := *socketPath
+ defer os.Remove(socket)
+
+ listener, err := net.Listen("unix", socket)
+ if err != nil {
+ log.Fatal("error listening on socket:", err)
+ }
+
+ go server(listener, outputFile)
+ for i := 0; ; i++ {
+
+ conn, err := net.Dial("unix", socket)
+ if err != nil {
+ log.Fatal("error dialing:", err)
+ }
+ if _, err := conn.Write([]byte(strconv.Itoa(i))); err != nil {
+ log.Fatal("error writing:", err)
+ }
+ conn.Close()
+ time.Sleep(100 * time.Millisecond)
+ }
+
+}