summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fs/attr.go14
-rw-r--r--pkg/sentry/fs/dev/dev.go8
-rw-r--r--pkg/sentry/fs/fsutil/host_mappable.go21
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached.go27
-rw-r--r--pkg/sentry/fs/gofer/file.go16
-rw-r--r--pkg/sentry/fs/gofer/inode.go18
-rw-r--r--pkg/sentry/fs/gofer/path.go20
-rw-r--r--pkg/sentry/fs/tmpfs/fs.go2
-rw-r--r--pkg/sentry/fs/tmpfs/inode_file.go15
-rw-r--r--pkg/sentry/fs/tmpfs/tmpfs.go31
-rw-r--r--pkg/sentry/fs/user/user_test.go5
-rw-r--r--pkg/sentry/fsimpl/overlay/regular_file.go13
-rw-r--r--pkg/sentry/fsimpl/testutil/kernel.go4
-rw-r--r--pkg/sentry/fsimpl/verity/verity.go8
-rw-r--r--pkg/sentry/kernel/cgroup.go29
-rw-r--r--pkg/sentry/kernel/kernel.go59
-rw-r--r--pkg/sentry/kernel/pipe/pipe_util.go6
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore.go10
-rw-r--r--pkg/sentry/kernel/task_context.go5
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go4
-rw-r--r--pkg/sentry/socket/netstack/netstack.go171
-rw-r--r--pkg/sentry/socket/netstack/tun.go2
-rw-r--r--pkg/sentry/syscalls/linux/sys_file.go22
23 files changed, 362 insertions, 148 deletions
diff --git a/pkg/sentry/fs/attr.go b/pkg/sentry/fs/attr.go
index b90f7c1be..4c99944e7 100644
--- a/pkg/sentry/fs/attr.go
+++ b/pkg/sentry/fs/attr.go
@@ -478,6 +478,20 @@ func (f FilePermissions) AnyRead() bool {
return f.User.Read || f.Group.Read || f.Other.Read
}
+// HasSetUIDOrGID returns true if either the setuid or setgid bit is set.
+func (f FilePermissions) HasSetUIDOrGID() bool {
+ return f.SetUID || f.SetGID
+}
+
+// DropSetUIDAndMaybeGID turns off setuid, and turns off setgid if f allows
+// group execution.
+func (f *FilePermissions) DropSetUIDAndMaybeGID() {
+ f.SetUID = false
+ if f.Group.Execute {
+ f.SetGID = false
+ }
+}
+
// FileOwner represents ownership of a file.
//
// +stateify savable
diff --git a/pkg/sentry/fs/dev/dev.go b/pkg/sentry/fs/dev/dev.go
index e84ba7a5d..c62effd52 100644
--- a/pkg/sentry/fs/dev/dev.go
+++ b/pkg/sentry/fs/dev/dev.go
@@ -16,6 +16,7 @@
package dev
import (
+ "fmt"
"math"
"gvisor.dev/gvisor/pkg/context"
@@ -90,6 +91,11 @@ func newSymlink(ctx context.Context, target string, msrc *fs.MountSource) *fs.In
// New returns the root node of a device filesystem.
func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
+ shm, err := tmpfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0777), msrc, nil /* parent */)
+ if err != nil {
+ panic(fmt.Sprintf("tmpfs.NewDir failed: %v", err))
+ }
+
contents := map[string]*fs.Inode{
"fd": newSymlink(ctx, "/proc/self/fd", msrc),
"stdin": newSymlink(ctx, "/proc/self/fd/0", msrc),
@@ -108,7 +114,7 @@ func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
"random": newMemDevice(ctx, newRandomDevice(ctx, fs.RootOwner, 0444), msrc, randomDevMinor),
"urandom": newMemDevice(ctx, newRandomDevice(ctx, fs.RootOwner, 0444), msrc, urandomDevMinor),
- "shm": tmpfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0777), msrc),
+ "shm": shm,
// A devpts is typically mounted at /dev/pts to provide
// pseudoterminal support. Place an empty directory there for
diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go
index e1e38b498..8ac3738e9 100644
--- a/pkg/sentry/fs/fsutil/host_mappable.go
+++ b/pkg/sentry/fs/fsutil/host_mappable.go
@@ -155,12 +155,20 @@ func (h *HostMappable) DecRef(fr memmap.FileRange) {
// T2: Appends to file causing it to grow
// T2: Writes to mapped pages and COW happens
// T1: Continues and wronly invalidates the page mapped in step above.
-func (h *HostMappable) Truncate(ctx context.Context, newSize int64) error {
+func (h *HostMappable) Truncate(ctx context.Context, newSize int64, uattr fs.UnstableAttr) error {
h.truncateMu.Lock()
defer h.truncateMu.Unlock()
mask := fs.AttrMask{Size: true}
attr := fs.UnstableAttr{Size: newSize}
+
+ // Truncating a file clears privilege bits.
+ if uattr.Perms.HasSetUIDOrGID() {
+ mask.Perms = true
+ attr.Perms = uattr.Perms
+ attr.Perms.DropSetUIDAndMaybeGID()
+ }
+
if err := h.backingFile.SetMaskedAttributes(ctx, mask, attr, false); err != nil {
return err
}
@@ -193,10 +201,17 @@ func (h *HostMappable) Allocate(ctx context.Context, offset int64, length int64)
}
// Write writes to the file backing this mappable.
-func (h *HostMappable) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+func (h *HostMappable) Write(ctx context.Context, src usermem.IOSequence, offset int64, uattr fs.UnstableAttr) (int64, error) {
h.truncateMu.RLock()
+ defer h.truncateMu.RUnlock()
n, err := src.CopyInTo(ctx, &writer{ctx: ctx, hostMappable: h, off: offset})
- h.truncateMu.RUnlock()
+ if n > 0 && uattr.Perms.HasSetUIDOrGID() {
+ mask := fs.AttrMask{Perms: true}
+ uattr.Perms.DropSetUIDAndMaybeGID()
+ if err := h.backingFile.SetMaskedAttributes(ctx, mask, uattr, false); err != nil {
+ return n, err
+ }
+ }
return n, err
}
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
index 7856b354b..855029b84 100644
--- a/pkg/sentry/fs/fsutil/inode_cached.go
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -310,6 +310,12 @@ func (c *CachingInodeOperations) Truncate(ctx context.Context, inode *fs.Inode,
now := ktime.NowFromContext(ctx)
masked := fs.AttrMask{Size: true}
attr := fs.UnstableAttr{Size: size}
+ if c.attr.Perms.HasSetUIDOrGID() {
+ masked.Perms = true
+ attr.Perms = c.attr.Perms
+ attr.Perms.DropSetUIDAndMaybeGID()
+ c.attr.Perms = attr.Perms
+ }
if err := c.backingFile.SetMaskedAttributes(ctx, masked, attr, false); err != nil {
c.dataMu.Unlock()
return err
@@ -685,13 +691,14 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
return done, nil
}
-// maybeGrowFile grows the file's size if data has been written past the old
-// size.
+// maybeUpdateAttrs updates the file's attributes after a write. It updates
+// size if data has been written past the old size, and setuid/setgid if any
+// bytes were written.
//
// Preconditions:
// * rw.c.attrMu must be locked.
// * rw.c.dataMu must be locked.
-func (rw *inodeReadWriter) maybeGrowFile() {
+func (rw *inodeReadWriter) maybeUpdateAttrs(nwritten uint64) {
// If the write ends beyond the file's previous size, it causes the
// file to grow.
if rw.offset > rw.c.attr.Size {
@@ -705,6 +712,12 @@ func (rw *inodeReadWriter) maybeGrowFile() {
rw.c.attr.Usage = rw.offset
rw.c.dirtyAttr.Usage = true
}
+
+ // If bytes were written, ensure setuid and setgid are cleared.
+ if nwritten > 0 && rw.c.attr.Perms.HasSetUIDOrGID() {
+ rw.c.dirtyAttr.Perms = true
+ rw.c.attr.Perms.DropSetUIDAndMaybeGID()
+ }
}
// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
@@ -732,7 +745,7 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error
segMR := seg.Range().Intersect(mr)
ims, err := mf.MapInternal(seg.FileRangeOf(segMR), hostarch.Write)
if err != nil {
- rw.maybeGrowFile()
+ rw.maybeUpdateAttrs(done)
rw.c.dataMu.Unlock()
return done, err
}
@@ -744,7 +757,7 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error
srcs = srcs.DropFirst64(n)
rw.c.dirty.MarkDirty(segMR)
if err != nil {
- rw.maybeGrowFile()
+ rw.maybeUpdateAttrs(done)
rw.c.dataMu.Unlock()
return done, err
}
@@ -765,7 +778,7 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error
srcs = srcs.DropFirst64(n)
// Partial writes are fine. But we must stop writing.
if n != src.NumBytes() || err != nil {
- rw.maybeGrowFile()
+ rw.maybeUpdateAttrs(done)
rw.c.dataMu.Unlock()
return done, err
}
@@ -774,7 +787,7 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error
seg, gap = gap.NextSegment(), FileRangeGapIterator{}
}
}
- rw.maybeGrowFile()
+ rw.maybeUpdateAttrs(done)
rw.c.dataMu.Unlock()
return done, nil
}
diff --git a/pkg/sentry/fs/gofer/file.go b/pkg/sentry/fs/gofer/file.go
index 819e140bc..73d80d9b5 100644
--- a/pkg/sentry/fs/gofer/file.go
+++ b/pkg/sentry/fs/gofer/file.go
@@ -237,10 +237,20 @@ func (f *fileOperations) Write(ctx context.Context, file *fs.File, src usermem.I
// and availability of a host-mappable FD.
if f.inodeOperations.session().cachePolicy.useCachingInodeOps(file.Dirent.Inode) {
n, err = f.inodeOperations.cachingInodeOps.Write(ctx, src, offset)
- } else if f.inodeOperations.fileState.hostMappable != nil {
- n, err = f.inodeOperations.fileState.hostMappable.Write(ctx, src, offset)
} else {
- n, err = src.CopyInTo(ctx, f.handles.readWriterAt(ctx, offset))
+ uattr, e := f.UnstableAttr(ctx, file)
+ if e != nil {
+ return 0, e
+ }
+ if f.inodeOperations.fileState.hostMappable != nil {
+ n, err = f.inodeOperations.fileState.hostMappable.Write(ctx, src, offset, uattr)
+ } else {
+ n, err = src.CopyInTo(ctx, f.handles.readWriterAt(ctx, offset))
+ if n > 0 && uattr.Perms.HasSetUIDOrGID() {
+ uattr.Perms.DropSetUIDAndMaybeGID()
+ f.inodeOperations.SetPermissions(ctx, file.Dirent.Inode, uattr.Perms)
+ }
+ }
}
if n == 0 {
diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go
index b97635ec4..da3178527 100644
--- a/pkg/sentry/fs/gofer/inode.go
+++ b/pkg/sentry/fs/gofer/inode.go
@@ -600,11 +600,25 @@ func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, length
if i.session().cachePolicy.useCachingInodeOps(inode) {
return i.cachingInodeOps.Truncate(ctx, inode, length)
}
+
+ uattr, err := i.fileState.unstableAttr(ctx)
+ if err != nil {
+ return err
+ }
+
if i.session().cachePolicy == cacheRemoteRevalidating {
- return i.fileState.hostMappable.Truncate(ctx, length)
+ return i.fileState.hostMappable.Truncate(ctx, length, uattr)
+ }
+
+ mask := p9.SetAttrMask{Size: true}
+ attr := p9.SetAttr{Size: uint64(length)}
+ if uattr.Perms.HasSetUIDOrGID() {
+ mask.Permissions = true
+ uattr.Perms.DropSetUIDAndMaybeGID()
+ attr.Permissions = p9.FileMode(uattr.Perms.LinuxMode())
}
- return i.fileState.file.setAttr(ctx, p9.SetAttrMask{Size: true}, p9.SetAttr{Size: uint64(length)})
+ return i.fileState.file.setAttr(ctx, mask, attr)
}
// GetXattr implements fs.InodeOperations.GetXattr.
diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go
index 6b3627813..940838a44 100644
--- a/pkg/sentry/fs/gofer/path.go
+++ b/pkg/sentry/fs/gofer/path.go
@@ -130,7 +130,16 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string
panic(fmt.Sprintf("Create called with unknown or unset open flags: %v", flags))
}
+ // If the parent directory has setgid enabled, change the new file's owner.
owner := fs.FileOwnerFromContext(ctx)
+ parentUattr, err := dir.UnstableAttr(ctx)
+ if err != nil {
+ return nil, err
+ }
+ if parentUattr.Perms.SetGID {
+ owner.GID = parentUattr.Owner.GID
+ }
+
hostFile, err := newFile.create(ctx, name, openFlags, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID))
if err != nil {
// Could not create the file.
@@ -225,7 +234,18 @@ func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, s
return syserror.ENAMETOOLONG
}
+ // If the parent directory has setgid enabled, change the new directory's
+ // owner and enable setgid.
owner := fs.FileOwnerFromContext(ctx)
+ parentUattr, err := dir.UnstableAttr(ctx)
+ if err != nil {
+ return err
+ }
+ if parentUattr.Perms.SetGID {
+ owner.GID = parentUattr.Owner.GID
+ perm.SetGID = true
+ }
+
if _, err := i.fileState.file.mkdir(ctx, s, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
return err
}
diff --git a/pkg/sentry/fs/tmpfs/fs.go b/pkg/sentry/fs/tmpfs/fs.go
index bc117ca6a..b48d475ed 100644
--- a/pkg/sentry/fs/tmpfs/fs.go
+++ b/pkg/sentry/fs/tmpfs/fs.go
@@ -151,5 +151,5 @@ func (f *Filesystem) Mount(ctx context.Context, device string, flags fs.MountSou
}
// Construct the tmpfs root.
- return NewDir(ctx, nil, owner, perms, msrc), nil
+ return NewDir(ctx, nil, owner, perms, msrc, nil /* parent */)
}
diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go
index f4de8c968..7faa822f0 100644
--- a/pkg/sentry/fs/tmpfs/inode_file.go
+++ b/pkg/sentry/fs/tmpfs/inode_file.go
@@ -226,6 +226,12 @@ func (f *fileInodeOperations) Truncate(ctx context.Context, _ *fs.Inode, size in
now := ktime.NowFromContext(ctx)
f.attr.ModificationTime = now
f.attr.StatusChangeTime = now
+
+ // Truncating clears privilege bits.
+ f.attr.Perms.SetUID = false
+ if f.attr.Perms.Group.Execute {
+ f.attr.Perms.SetGID = false
+ }
}
f.dataMu.Unlock()
@@ -363,7 +369,14 @@ func (f *fileInodeOperations) write(ctx context.Context, src usermem.IOSequence,
now := ktime.NowFromContext(ctx)
f.attr.ModificationTime = now
f.attr.StatusChangeTime = now
- return src.CopyInTo(ctx, &fileReadWriter{f, offset})
+ nwritten, err := src.CopyInTo(ctx, &fileReadWriter{f, offset})
+
+ // Writing clears privilege bits.
+ if nwritten > 0 {
+ f.attr.Perms.DropSetUIDAndMaybeGID()
+ }
+
+ return nwritten, err
}
type fileReadWriter struct {
diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go
index 577052888..6aa8ff331 100644
--- a/pkg/sentry/fs/tmpfs/tmpfs.go
+++ b/pkg/sentry/fs/tmpfs/tmpfs.go
@@ -87,7 +87,20 @@ type Dir struct {
var _ fs.InodeOperations = (*Dir)(nil)
// NewDir returns a new directory.
-func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwner, perms fs.FilePermissions, msrc *fs.MountSource) *fs.Inode {
+func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwner, perms fs.FilePermissions, msrc *fs.MountSource, parent *fs.Inode) (*fs.Inode, error) {
+ // If the parent has setgid enabled, the new directory enables it and changes
+ // its GID.
+ if parent != nil {
+ parentUattr, err := parent.UnstableAttr(ctx)
+ if err != nil {
+ return nil, err
+ }
+ if parentUattr.Perms.SetGID {
+ owner.GID = parentUattr.Owner.GID
+ perms.SetGID = true
+ }
+ }
+
d := &Dir{
ramfsDir: ramfs.NewDir(ctx, contents, owner, perms),
kernel: kernel.KernelFromContext(ctx),
@@ -101,7 +114,7 @@ func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwn
InodeID: tmpfsDevice.NextIno(),
BlockSize: hostarch.PageSize,
Type: fs.Directory,
- })
+ }), nil
}
// afterLoad is invoked by stateify.
@@ -219,11 +232,21 @@ func (d *Dir) SetTimestamps(ctx context.Context, i *fs.Inode, ts fs.TimeSpec) er
func (d *Dir) newCreateOps() *ramfs.CreateOps {
return &ramfs.CreateOps{
NewDir: func(ctx context.Context, dir *fs.Inode, perms fs.FilePermissions) (*fs.Inode, error) {
- return NewDir(ctx, nil, fs.FileOwnerFromContext(ctx), perms, dir.MountSource), nil
+ return NewDir(ctx, nil, fs.FileOwnerFromContext(ctx), perms, dir.MountSource, dir)
},
NewFile: func(ctx context.Context, dir *fs.Inode, perms fs.FilePermissions) (*fs.Inode, error) {
+ // If the parent has setgid enabled, change the GID of the new file.
+ owner := fs.FileOwnerFromContext(ctx)
+ parentUattr, err := dir.UnstableAttr(ctx)
+ if err != nil {
+ return nil, err
+ }
+ if parentUattr.Perms.SetGID {
+ owner.GID = parentUattr.Owner.GID
+ }
+
uattr := fs.WithCurrentTime(ctx, fs.UnstableAttr{
- Owner: fs.FileOwnerFromContext(ctx),
+ Owner: owner,
Perms: perms,
// Always start unlinked.
Links: 0,
diff --git a/pkg/sentry/fs/user/user_test.go b/pkg/sentry/fs/user/user_test.go
index 12b786224..7f8fa8038 100644
--- a/pkg/sentry/fs/user/user_test.go
+++ b/pkg/sentry/fs/user/user_test.go
@@ -104,7 +104,10 @@ func TestGetExecUserHome(t *testing.T) {
t.Run(name, func(t *testing.T) {
ctx := contexttest.Context(t)
msrc := fs.NewPseudoMountSource(ctx)
- rootInode := tmpfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0777), msrc)
+ rootInode, err := tmpfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0777), msrc, nil /* parent */)
+ if err != nil {
+ t.Fatalf("tmpfs.NewDir failed: %v", err)
+ }
mns, err := fs.NewMountNamespace(ctx, rootInode)
if err != nil {
diff --git a/pkg/sentry/fsimpl/overlay/regular_file.go b/pkg/sentry/fsimpl/overlay/regular_file.go
index 43bfd69a3..82491a0f8 100644
--- a/pkg/sentry/fsimpl/overlay/regular_file.go
+++ b/pkg/sentry/fsimpl/overlay/regular_file.go
@@ -207,9 +207,10 @@ func (fd *regularFileFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) e
return err
}
- // Changing owners may clear one or both of the setuid and setgid bits,
- // so we may have to update opts before setting d.mode.
- if opts.Stat.Mask&(linux.STATX_UID|linux.STATX_GID) != 0 {
+ // Changing owners or truncating may clear one or both of the setuid and
+ // setgid bits, so we may have to update opts before setting d.mode.
+ inotifyMask := opts.Stat.Mask
+ if opts.Stat.Mask&(linux.STATX_UID|linux.STATX_GID|linux.STATX_SIZE) != 0 {
stat, err := wrappedFD.Stat(ctx, vfs.StatOptions{
Mask: linux.STATX_MODE,
})
@@ -218,10 +219,14 @@ func (fd *regularFileFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) e
}
opts.Stat.Mode = stat.Mode
opts.Stat.Mask |= linux.STATX_MODE
+ // Don't generate inotify IN_ATTRIB for size-only changes (truncations).
+ if opts.Stat.Mask&(linux.STATX_UID|linux.STATX_GID) != 0 {
+ inotifyMask |= linux.STATX_MODE
+ }
}
d.updateAfterSetStatLocked(&opts)
- if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
+ if ev := vfs.InotifyEventFromStatMask(inotifyMask); ev != 0 {
d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent)
}
return nil
diff --git a/pkg/sentry/fsimpl/testutil/kernel.go b/pkg/sentry/fsimpl/testutil/kernel.go
index 33e52ce64..438840ae2 100644
--- a/pkg/sentry/fsimpl/testutil/kernel.go
+++ b/pkg/sentry/fsimpl/testutil/kernel.go
@@ -88,6 +88,7 @@ func Boot() (*kernel.Kernel, error) {
return nil, fmt.Errorf("creating timekeeper: %v", err)
}
tk.SetClocks(time.NewCalibratedClocks())
+ k.SetTimekeeper(tk)
creds := auth.NewRootCredentials(auth.NewRootUserNamespace())
@@ -96,7 +97,6 @@ func Boot() (*kernel.Kernel, error) {
if err = k.Init(kernel.InitKernelArgs{
ApplicationCores: uint(runtime.GOMAXPROCS(-1)),
FeatureSet: cpuid.HostFeatureSet(),
- Timekeeper: tk,
RootUserNamespace: creds.UserNamespace,
Vdso: vdso,
RootUTSNamespace: kernel.NewUTSNamespace("hostname", "domain", creds.UserNamespace),
@@ -181,7 +181,7 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
memfile := os.NewFile(uintptr(memfd), memfileName)
mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{})
if err != nil {
- memfile.Close()
+ _ = memfile.Close()
return nil, fmt.Errorf("error creating pgalloc.MemoryFile: %v", err)
}
return mf, nil
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
index fa7696ad6..969003613 100644
--- a/pkg/sentry/fsimpl/verity/verity.go
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -868,6 +868,10 @@ func (fd *fileDescription) IterDirents(ctx context.Context, cb vfs.IterDirentsCa
fd.mu.Lock()
defer fd.mu.Unlock()
+ if _, err := fd.lowerFD.Seek(ctx, fd.off, linux.SEEK_SET); err != nil {
+ return err
+ }
+
var ds []vfs.Dirent
err := fd.lowerFD.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error {
// Do not include the Merkle tree files.
@@ -890,8 +894,8 @@ func (fd *fileDescription) IterDirents(ctx context.Context, cb vfs.IterDirentsCa
return err
}
- // The result should contain all children plus "." and "..".
- if fd.d.verityEnabled() && len(ds) != len(fd.d.childrenNames)+2 {
+ // The result should be a part of all children plus "." and "..", counting from fd.off.
+ if fd.d.verityEnabled() && len(ds) != len(fd.d.childrenNames)+2-int(fd.off) {
return fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Unexpected children number %d", len(ds)))
}
diff --git a/pkg/sentry/kernel/cgroup.go b/pkg/sentry/kernel/cgroup.go
index 0fbf27f64..c93ef6ac1 100644
--- a/pkg/sentry/kernel/cgroup.go
+++ b/pkg/sentry/kernel/cgroup.go
@@ -181,7 +181,23 @@ func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Files
for _, h := range r.hierarchies {
if h.match(ctypes) {
- h.fs.IncRef()
+ if !h.fs.TryIncRef() {
+ // Racing with filesystem destruction, namely h.fs.Release.
+ // Since we hold r.mu, we know the hierarchy hasn't been
+ // unregistered yet, but its associated filesystem is tearing
+ // down.
+ //
+ // If we simply indicate the hierarchy wasn't found without
+ // cleaning up the registry, the caller can race with the
+ // unregister and find itself temporarily unable to create a new
+ // hierarchy with a subset of the relevant controllers.
+ //
+ // To keep the result of FindHierarchy consistent with the
+ // uniqueness of controllers enforced by Register, drop the
+ // dying hierarchy now. The eventual unregister by the FS
+ // teardown will become a no-op.
+ return nil
+ }
return h.fs
}
}
@@ -230,12 +246,17 @@ func (r *CgroupRegistry) Register(cs []CgroupController, fs cgroupFS) error {
return nil
}
-// Unregister removes a previously registered hierarchy from the registry. If
-// the controller was not previously registered, Unregister is a no-op.
+// Unregister removes a previously registered hierarchy from the registry. If no
+// such hierarchy is registered, Unregister is a no-op.
func (r *CgroupRegistry) Unregister(hid uint32) {
r.mu.Lock()
- defer r.mu.Unlock()
+ r.unregisterLocked(hid)
+ r.mu.Unlock()
+}
+// Precondition: Caller must hold r.mu.
+// +checklocks:r.mu
+func (r *CgroupRegistry) unregisterLocked(hid uint32) {
if h, ok := r.hierarchies[hid]; ok {
for name, _ := range h.controllers {
delete(r.controllers, name)
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index e6e9da898..febe7fe50 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -306,9 +306,6 @@ type InitKernelArgs struct {
// FeatureSet is the emulated CPU feature set.
FeatureSet *cpuid.FeatureSet
- // Timekeeper manages time for all tasks in the system.
- Timekeeper *Timekeeper
-
// RootUserNamespace is the root user namespace.
RootUserNamespace *auth.UserNamespace
@@ -348,29 +345,34 @@ type InitKernelArgs struct {
PIDNamespace *PIDNamespace
}
+// SetTimekeeper sets Kernel.timekeeper. SetTimekeeper must be called before
+// Init.
+func (k *Kernel) SetTimekeeper(tk *Timekeeper) {
+ k.timekeeper = tk
+}
+
// Init initialize the Kernel with no tasks.
//
// Callers must manually set Kernel.Platform and call Kernel.SetMemoryFile
-// before calling Init.
+// and Kernel.SetTimekeeper before calling Init.
func (k *Kernel) Init(args InitKernelArgs) error {
if args.FeatureSet == nil {
- return fmt.Errorf("FeatureSet is nil")
+ return fmt.Errorf("args.FeatureSet is nil")
}
- if args.Timekeeper == nil {
- return fmt.Errorf("Timekeeper is nil")
+ if k.timekeeper == nil {
+ return fmt.Errorf("timekeeper is nil")
}
- if args.Timekeeper.clocks == nil {
+ if k.timekeeper.clocks == nil {
return fmt.Errorf("must call Timekeeper.SetClocks() before Kernel.Init()")
}
if args.RootUserNamespace == nil {
- return fmt.Errorf("RootUserNamespace is nil")
+ return fmt.Errorf("args.RootUserNamespace is nil")
}
if args.ApplicationCores == 0 {
- return fmt.Errorf("ApplicationCores is 0")
+ return fmt.Errorf("args.ApplicationCores is 0")
}
k.featureSet = args.FeatureSet
- k.timekeeper = args.Timekeeper
k.tasks = newTaskSet(args.PIDNamespace)
k.rootUserNamespace = args.RootUserNamespace
k.rootUTSNamespace = args.RootUTSNamespace
@@ -395,8 +397,8 @@ func (k *Kernel) Init(args InitKernelArgs) error {
}
k.extraAuxv = args.ExtraAuxv
k.vdso = args.Vdso
- k.realtimeClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Realtime}
- k.monotonicClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Monotonic}
+ k.realtimeClock = &timekeeperClock{tk: k.timekeeper, c: sentrytime.Realtime}
+ k.monotonicClock = &timekeeperClock{tk: k.timekeeper, c: sentrytime.Monotonic}
k.futexes = futex.NewManager()
k.netlinkPorts = port.New()
k.ptraceExceptions = make(map[*Task]*Task)
@@ -654,12 +656,12 @@ func (k *Kernel) invalidateUnsavableMappings(ctx context.Context) error {
defer k.tasks.mu.RUnlock()
for t := range k.tasks.Root.tids {
// We can skip locking Task.mu here since the kernel is paused.
- if mm := t.image.MemoryManager; mm != nil {
- if _, ok := invalidated[mm]; !ok {
- if err := mm.InvalidateUnsavable(ctx); err != nil {
+ if memMgr := t.image.MemoryManager; memMgr != nil {
+ if _, ok := invalidated[memMgr]; !ok {
+ if err := memMgr.InvalidateUnsavable(ctx); err != nil {
return err
}
- invalidated[mm] = struct{}{}
+ invalidated[memMgr] = struct{}{}
}
}
// I really wish we just had a sync.Map of all MMs...
@@ -1553,22 +1555,23 @@ func (k *Kernel) SetSaveError(err error) {
var _ tcpip.Clock = (*Kernel)(nil)
-// NowNanoseconds implements tcpip.Clock.NowNanoseconds.
-func (k *Kernel) NowNanoseconds() int64 {
- now, err := k.timekeeper.GetTime(sentrytime.Realtime)
+// Now implements tcpip.Clock.NowNanoseconds.
+func (k *Kernel) Now() time.Time {
+ nsec, err := k.timekeeper.GetTime(sentrytime.Realtime)
if err != nil {
- panic("Kernel.NowNanoseconds: " + err.Error())
+ panic("timekeeper.GetTime(sentrytime.Realtime): " + err.Error())
}
- return now
+ return time.Unix(0, nsec)
}
// NowMonotonic implements tcpip.Clock.NowMonotonic.
-func (k *Kernel) NowMonotonic() int64 {
- now, err := k.timekeeper.GetTime(sentrytime.Monotonic)
+func (k *Kernel) NowMonotonic() tcpip.MonotonicTime {
+ nsec, err := k.timekeeper.GetTime(sentrytime.Monotonic)
if err != nil {
- panic("Kernel.NowMonotonic: " + err.Error())
+ panic("timekeeper.GetTime(sentrytime.Monotonic): " + err.Error())
}
- return now
+ var mt tcpip.MonotonicTime
+ return mt.Add(time.Duration(nsec) * time.Nanosecond)
}
// AfterFunc implements tcpip.Clock.AfterFunc.
@@ -1783,7 +1786,7 @@ func (k *Kernel) EmitUnimplementedEvent(ctx context.Context) {
})
t := TaskFromContext(ctx)
- k.unimplementedSyscallEmitter.Emit(&uspb.UnimplementedSyscall{
+ _, _ = k.unimplementedSyscallEmitter.Emit(&uspb.UnimplementedSyscall{
Tid: int32(t.ThreadID()),
Registers: t.Arch().StateData().Proto(),
})
@@ -1874,7 +1877,7 @@ func (k *Kernel) ReleaseCgroupHierarchy(hid uint32) {
return
}
t.mu.Lock()
- for cg, _ := range t.cgroups {
+ for cg := range t.cgroups {
if cg.HierarchyID() == hid {
t.leaveCgroupLocked(cg)
}
diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go
index 2d89b9ccd..24e467e93 100644
--- a/pkg/sentry/kernel/pipe/pipe_util.go
+++ b/pkg/sentry/kernel/pipe/pipe_util.go
@@ -86,6 +86,12 @@ func (p *Pipe) Write(ctx context.Context, src usermem.IOSequence) (int64, error)
if n > 0 {
p.Notify(waiter.ReadableEvents)
}
+ if err == unix.EPIPE {
+ // If we are returning EPIPE send SIGPIPE to the task.
+ if sendSig := linux.SignalNoInfoFuncFromContext(ctx); sendSig != nil {
+ sendSig(linux.SIGPIPE)
+ }
+ }
return n, err
}
diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go
index fe2ab1662..1d9edf118 100644
--- a/pkg/sentry/kernel/semaphore/semaphore.go
+++ b/pkg/sentry/kernel/semaphore/semaphore.go
@@ -35,10 +35,10 @@ const (
// Maximum number of semaphore sets.
setsMax = linux.SEMMNI
- // Maximum number of semaphroes in a semaphore set.
+ // Maximum number of semaphores in a semaphore set.
semsMax = linux.SEMMSL
- // Maximum number of semaphores in all semaphroe sets.
+ // Maximum number of semaphores in all semaphore sets.
semsTotalMax = linux.SEMMNS
)
@@ -220,7 +220,7 @@ func (r *Registry) HighestIndex() int32 {
defer r.mu.Unlock()
// By default, highest used index is 0 even though
- // there is no semaphroe set.
+ // there is no semaphore set.
var highestIndex int32
for index := range r.indexes {
if index > highestIndex {
@@ -702,7 +702,9 @@ func (s *Set) checkPerms(creds *auth.Credentials, reqPerms fs.PermMask) bool {
return s.checkCapability(creds)
}
-// destroy destroys the set. Caller must hold 's.mu'.
+// destroy destroys the set.
+//
+// Preconditions: Caller must hold 's.mu'.
func (s *Set) destroy() {
// Notify all waiters. They will fail on the next attempt to execute
// operations and return error.
diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go
index 70b0699dc..c82d9e82b 100644
--- a/pkg/sentry/kernel/task_context.go
+++ b/pkg/sentry/kernel/task_context.go
@@ -17,6 +17,7 @@ package kernel
import (
"time"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -113,6 +114,10 @@ func (t *Task) contextValue(key interface{}, isTaskGoroutine bool) interface{} {
return t.k.RealtimeClock()
case limits.CtxLimits:
return t.tg.limits
+ case linux.CtxSignalNoInfoFunc:
+ return func(sig linux.Signal) error {
+ return t.SendSignal(SignalInfoNoInfo(sig, t, t))
+ }
case pgalloc.CtxMemoryFile:
return t.k.mf
case pgalloc.CtxMemoryFileProvider:
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index f42d73178..e1c4b06fc 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -58,8 +58,8 @@ var nameToID = map[string]stack.TableID{
// DefaultLinuxTables returns the rules of stack.DefaultTables() wrapped for
// compatibility with netfilter extensions.
-func DefaultLinuxTables() *stack.IPTables {
- tables := stack.DefaultTables()
+func DefaultLinuxTables(seed uint32) *stack.IPTables {
+ tables := stack.DefaultTables(seed)
tables.VisitTargets(func(oldTarget stack.Target) stack.Target {
switch val := oldTarget.(type) {
case *stack.AcceptTarget:
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 0b64a24c3..037ccfec8 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -77,41 +77,59 @@ func mustCreateGauge(name, description string) *tcpip.StatCounter {
// Metrics contains metrics exported by netstack.
var Metrics = tcpip.Stats{
- UnknownProtocolRcvdPackets: mustCreateMetric("/netstack/unknown_protocol_received_packets", "Number of packets received by netstack that were for an unknown or unsupported protocol."),
- MalformedRcvdPackets: mustCreateMetric("/netstack/malformed_received_packets", "Number of packets received by netstack that were deemed malformed."),
- DroppedPackets: mustCreateMetric("/netstack/dropped_packets", "Number of packets dropped by netstack due to full queues."),
+ DroppedPackets: mustCreateMetric("/netstack/dropped_packets", "Number of packets dropped at the transport layer."),
+ NICs: tcpip.NICStats{
+ UnknownL3ProtocolRcvdPackets: mustCreateMetric("/netstack/nic/unknown_l3_protocol_received_packets", "Number of packets received that were for an unknown or unsupported L3 protocol."),
+ UnknownL4ProtocolRcvdPackets: mustCreateMetric("/netstack/nic/unknown_l4_protocol_received_packets", "Number of packets received that were for an unknown or unsupported L4 protocol."),
+ MalformedL4RcvdPackets: mustCreateMetric("/netstack/nic/malformed_l4_received_packets", "Number of packets received that failed L4 header parsing."),
+ Tx: tcpip.NICPacketStats{
+ Packets: mustCreateMetric("/netstack/nic/tx/packets", "Number of packets transmitted."),
+ Bytes: mustCreateMetric("/netstack/nic/tx/bytes", "Number of bytes transmitted."),
+ },
+ Rx: tcpip.NICPacketStats{
+ Packets: mustCreateMetric("/netstack/nic/rx/packets", "Number of packets received."),
+ Bytes: mustCreateMetric("/netstack/nic/rx/bytes", "Number of bytes received."),
+ },
+ DisabledRx: tcpip.NICPacketStats{
+ Packets: mustCreateMetric("/netstack/nic/disabled_rx/packets", "Number of packets received on disabled NICs."),
+ Bytes: mustCreateMetric("/netstack/nic/disabled_rx/bytes", "Number of bytes received on disabled NICs."),
+ },
+ Neighbor: tcpip.NICNeighborStats{
+ UnreachableEntryLookups: mustCreateMetric("/netstack/nic/neighbor/unreachable_entry_loopups", "Number of lookups performed on a neighbor entry in Unreachable state."),
+ },
+ },
ICMP: tcpip.ICMPStats{
V4: tcpip.ICMPv4Stats{
PacketsSent: tcpip.ICMPv4SentPacketStats{
ICMPv4PacketStats: tcpip.ICMPv4PacketStats{
- EchoRequest: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo_request", "Number of ICMPv4 echo request packets sent by netstack."),
- EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo_reply", "Number of ICMPv4 echo reply packets sent by netstack."),
- DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_sent/dst_unreachable", "Number of ICMPv4 destination unreachable packets sent by netstack."),
- SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_sent/src_quench", "Number of ICMPv4 source quench packets sent by netstack."),
- Redirect: mustCreateMetric("/netstack/icmp/v4/packets_sent/redirect", "Number of ICMPv4 redirect packets sent by netstack."),
- TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_sent/time_exceeded", "Number of ICMPv4 time exceeded packets sent by netstack."),
- ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_sent/param_problem", "Number of ICMPv4 parameter problem packets sent by netstack."),
- Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp", "Number of ICMPv4 timestamp packets sent by netstack."),
- TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp_reply", "Number of ICMPv4 timestamp reply packets sent by netstack."),
- InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_request", "Number of ICMPv4 information request packets sent by netstack."),
- InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_reply", "Number of ICMPv4 information reply packets sent by netstack."),
+ EchoRequest: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo_request", "Number of ICMPv4 echo request packets sent."),
+ EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo_reply", "Number of ICMPv4 echo reply packets sent."),
+ DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_sent/dst_unreachable", "Number of ICMPv4 destination unreachable packets sent."),
+ SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_sent/src_quench", "Number of ICMPv4 source quench packets sent."),
+ Redirect: mustCreateMetric("/netstack/icmp/v4/packets_sent/redirect", "Number of ICMPv4 redirect packets sent."),
+ TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_sent/time_exceeded", "Number of ICMPv4 time exceeded packets sent."),
+ ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_sent/param_problem", "Number of ICMPv4 parameter problem packets sent."),
+ Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp", "Number of ICMPv4 timestamp packets sent."),
+ TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp_reply", "Number of ICMPv4 timestamp reply packets sent."),
+ InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_request", "Number of ICMPv4 information request packets sent."),
+ InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_reply", "Number of ICMPv4 information reply packets sent."),
},
- Dropped: mustCreateMetric("/netstack/icmp/v4/packets_sent/dropped", "Number of ICMPv4 packets dropped by netstack due to link layer errors."),
- RateLimited: mustCreateMetric("/netstack/icmp/v4/packets_sent/rate_limited", "Number of ICMPv4 packets dropped by netstack due to rate limit being exceeded."),
+ Dropped: mustCreateMetric("/netstack/icmp/v4/packets_sent/dropped", "Number of ICMPv4 packets dropped due to link layer errors."),
+ RateLimited: mustCreateMetric("/netstack/icmp/v4/packets_sent/rate_limited", "Number of ICMPv4 packets dropped due to rate limit being exceeded."),
},
PacketsReceived: tcpip.ICMPv4ReceivedPacketStats{
ICMPv4PacketStats: tcpip.ICMPv4PacketStats{
- EchoRequest: mustCreateMetric("/netstack/icmp/v4/packets_received/echo_request", "Number of ICMPv4 echo request packets received by netstack."),
- EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/echo_reply", "Number of ICMPv4 echo reply packets received by netstack."),
- DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_received/dst_unreachable", "Number of ICMPv4 destination unreachable packets received by netstack."),
- SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_received/src_quench", "Number of ICMPv4 source quench packets received by netstack."),
- Redirect: mustCreateMetric("/netstack/icmp/v4/packets_received/redirect", "Number of ICMPv4 redirect packets received by netstack."),
- TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_received/time_exceeded", "Number of ICMPv4 time exceeded packets received by netstack."),
- ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_received/param_problem", "Number of ICMPv4 parameter problem packets received by netstack."),
- Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp", "Number of ICMPv4 timestamp packets received by netstack."),
- TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp_reply", "Number of ICMPv4 timestamp reply packets received by netstack."),
- InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_received/info_request", "Number of ICMPv4 information request packets received by netstack."),
- InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/info_reply", "Number of ICMPv4 information reply packets received by netstack."),
+ EchoRequest: mustCreateMetric("/netstack/icmp/v4/packets_received/echo_request", "Number of ICMPv4 echo request packets received."),
+ EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/echo_reply", "Number of ICMPv4 echo reply packets received."),
+ DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_received/dst_unreachable", "Number of ICMPv4 destination unreachable packets received."),
+ SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_received/src_quench", "Number of ICMPv4 source quench packets received."),
+ Redirect: mustCreateMetric("/netstack/icmp/v4/packets_received/redirect", "Number of ICMPv4 redirect packets received."),
+ TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_received/time_exceeded", "Number of ICMPv4 time exceeded packets received."),
+ ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_received/param_problem", "Number of ICMPv4 parameter problem packets received."),
+ Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp", "Number of ICMPv4 timestamp packets received."),
+ TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp_reply", "Number of ICMPv4 timestamp reply packets received."),
+ InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_received/info_request", "Number of ICMPv4 information request packets received."),
+ InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/info_reply", "Number of ICMPv4 information reply packets received."),
},
Invalid: mustCreateMetric("/netstack/icmp/v4/packets_received/invalid", "Number of ICMPv4 packets received that the transport layer could not parse."),
},
@@ -119,40 +137,40 @@ var Metrics = tcpip.Stats{
V6: tcpip.ICMPv6Stats{
PacketsSent: tcpip.ICMPv6SentPacketStats{
ICMPv6PacketStats: tcpip.ICMPv6PacketStats{
- EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_request", "Number of ICMPv6 echo request packets sent by netstack."),
- EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_reply", "Number of ICMPv6 echo reply packets sent by netstack."),
- DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_sent/dst_unreachable", "Number of ICMPv6 destination unreachable packets sent by netstack."),
- PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_sent/packet_too_big", "Number of ICMPv6 packet too big packets sent by netstack."),
- TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_sent/time_exceeded", "Number of ICMPv6 time exceeded packets sent by netstack."),
- ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_sent/param_problem", "Number of ICMPv6 parameter problem packets sent by netstack."),
- RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_solicit", "Number of ICMPv6 router solicit packets sent by netstack."),
- RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_advert", "Number of ICMPv6 router advert packets sent by netstack."),
- NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_solicit", "Number of ICMPv6 neighbor solicit packets sent by netstack."),
- NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_advert", "Number of ICMPv6 neighbor advert packets sent by netstack."),
- RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_sent/redirect_msg", "Number of ICMPv6 redirect message packets sent by netstack."),
- MulticastListenerQuery: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_query", "Number of ICMPv6 multicast listener query packets sent by netstack."),
- MulticastListenerReport: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_report", "Number of ICMPv6 multicast listener report packets sent by netstack."),
- MulticastListenerDone: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_done", "Number of ICMPv6 multicast listener done packets sent by netstack."),
+ EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_request", "Number of ICMPv6 echo request packets sent."),
+ EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_reply", "Number of ICMPv6 echo reply packets sent."),
+ DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_sent/dst_unreachable", "Number of ICMPv6 destination unreachable packets sent."),
+ PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_sent/packet_too_big", "Number of ICMPv6 packet too big packets sent."),
+ TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_sent/time_exceeded", "Number of ICMPv6 time exceeded packets sent."),
+ ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_sent/param_problem", "Number of ICMPv6 parameter problem packets sent."),
+ RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_solicit", "Number of ICMPv6 router solicit packets sent."),
+ RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_advert", "Number of ICMPv6 router advert packets sent."),
+ NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_solicit", "Number of ICMPv6 neighbor solicit packets sent."),
+ NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_advert", "Number of ICMPv6 neighbor advert packets sent."),
+ RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_sent/redirect_msg", "Number of ICMPv6 redirect message packets sent."),
+ MulticastListenerQuery: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_query", "Number of ICMPv6 multicast listener query packets sent."),
+ MulticastListenerReport: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_report", "Number of ICMPv6 multicast listener report packets sent."),
+ MulticastListenerDone: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_done", "Number of ICMPv6 multicast listener done packets sent."),
},
- Dropped: mustCreateMetric("/netstack/icmp/v6/packets_sent/dropped", "Number of ICMPv6 packets dropped by netstack due to link layer errors."),
- RateLimited: mustCreateMetric("/netstack/icmp/v6/packets_sent/rate_limited", "Number of ICMPv6 packets dropped by netstack due to rate limit being exceeded."),
+ Dropped: mustCreateMetric("/netstack/icmp/v6/packets_sent/dropped", "Number of ICMPv6 packets dropped due to link layer errors."),
+ RateLimited: mustCreateMetric("/netstack/icmp/v6/packets_sent/rate_limited", "Number of ICMPv6 packets dropped due to rate limit being exceeded."),
},
PacketsReceived: tcpip.ICMPv6ReceivedPacketStats{
ICMPv6PacketStats: tcpip.ICMPv6PacketStats{
- EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_request", "Number of ICMPv6 echo request packets received by netstack."),
- EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_reply", "Number of ICMPv6 echo reply packets received by netstack."),
- DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_received/dst_unreachable", "Number of ICMPv6 destination unreachable packets received by netstack."),
- PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_received/packet_too_big", "Number of ICMPv6 packet too big packets received by netstack."),
- TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_received/time_exceeded", "Number of ICMPv6 time exceeded packets received by netstack."),
- ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_received/param_problem", "Number of ICMPv6 parameter problem packets received by netstack."),
- RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/router_solicit", "Number of ICMPv6 router solicit packets received by netstack."),
- RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/router_advert", "Number of ICMPv6 router advert packets received by netstack."),
- NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_solicit", "Number of ICMPv6 neighbor solicit packets received by netstack."),
- NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_advert", "Number of ICMPv6 neighbor advert packets received by netstack."),
- RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_received/redirect_msg", "Number of ICMPv6 redirect message packets received by netstack."),
- MulticastListenerQuery: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_query", "Number of ICMPv6 multicast listener query packets received by netstack."),
- MulticastListenerReport: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_report", "Number of ICMPv6 multicast listener report packets sent by netstack."),
- MulticastListenerDone: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_done", "Number of ICMPv6 multicast listener done packets sent by netstack."),
+ EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_request", "Number of ICMPv6 echo request packets received."),
+ EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_reply", "Number of ICMPv6 echo reply packets received."),
+ DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_received/dst_unreachable", "Number of ICMPv6 destination unreachable packets received."),
+ PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_received/packet_too_big", "Number of ICMPv6 packet too big packets received."),
+ TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_received/time_exceeded", "Number of ICMPv6 time exceeded packets received."),
+ ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_received/param_problem", "Number of ICMPv6 parameter problem packets received."),
+ RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/router_solicit", "Number of ICMPv6 router solicit packets received."),
+ RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/router_advert", "Number of ICMPv6 router advert packets received."),
+ NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_solicit", "Number of ICMPv6 neighbor solicit packets received."),
+ NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_advert", "Number of ICMPv6 neighbor advert packets received."),
+ RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_received/redirect_msg", "Number of ICMPv6 redirect message packets received."),
+ MulticastListenerQuery: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_query", "Number of ICMPv6 multicast listener query packets received."),
+ MulticastListenerReport: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_report", "Number of ICMPv6 multicast listener report packets sent."),
+ MulticastListenerDone: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_done", "Number of ICMPv6 multicast listener done packets sent."),
},
Unrecognized: mustCreateMetric("/netstack/icmp/v6/packets_received/unrecognized", "Number of ICMPv6 packets received that the transport layer does not know how to parse."),
Invalid: mustCreateMetric("/netstack/icmp/v6/packets_received/invalid", "Number of ICMPv6 packets received that the transport layer could not parse."),
@@ -163,23 +181,23 @@ var Metrics = tcpip.Stats{
IGMP: tcpip.IGMPStats{
PacketsSent: tcpip.IGMPSentPacketStats{
IGMPPacketStats: tcpip.IGMPPacketStats{
- MembershipQuery: mustCreateMetric("/netstack/igmp/packets_sent/membership_query", "Number of IGMP Membership Query messages sent by netstack."),
- V1MembershipReport: mustCreateMetric("/netstack/igmp/packets_sent/v1_membership_report", "Number of IGMPv1 Membership Report messages sent by netstack."),
- V2MembershipReport: mustCreateMetric("/netstack/igmp/packets_sent/v2_membership_report", "Number of IGMPv2 Membership Report messages sent by netstack."),
- LeaveGroup: mustCreateMetric("/netstack/igmp/packets_sent/leave_group", "Number of IGMP Leave Group messages sent by netstack."),
+ MembershipQuery: mustCreateMetric("/netstack/igmp/packets_sent/membership_query", "Number of IGMP Membership Query messages sent."),
+ V1MembershipReport: mustCreateMetric("/netstack/igmp/packets_sent/v1_membership_report", "Number of IGMPv1 Membership Report messages sent."),
+ V2MembershipReport: mustCreateMetric("/netstack/igmp/packets_sent/v2_membership_report", "Number of IGMPv2 Membership Report messages sent."),
+ LeaveGroup: mustCreateMetric("/netstack/igmp/packets_sent/leave_group", "Number of IGMP Leave Group messages sent."),
},
- Dropped: mustCreateMetric("/netstack/igmp/packets_sent/dropped", "Number of IGMP packets dropped by netstack due to link layer errors."),
+ Dropped: mustCreateMetric("/netstack/igmp/packets_sent/dropped", "Number of IGMP packets dropped due to link layer errors."),
},
PacketsReceived: tcpip.IGMPReceivedPacketStats{
IGMPPacketStats: tcpip.IGMPPacketStats{
- MembershipQuery: mustCreateMetric("/netstack/igmp/packets_received/membership_query", "Number of IGMP Membership Query messages received by netstack."),
- V1MembershipReport: mustCreateMetric("/netstack/igmp/packets_received/v1_membership_report", "Number of IGMPv1 Membership Report messages received by netstack."),
- V2MembershipReport: mustCreateMetric("/netstack/igmp/packets_received/v2_membership_report", "Number of IGMPv2 Membership Report messages received by netstack."),
- LeaveGroup: mustCreateMetric("/netstack/igmp/packets_received/leave_group", "Number of IGMP Leave Group messages received by netstack."),
+ MembershipQuery: mustCreateMetric("/netstack/igmp/packets_received/membership_query", "Number of IGMP Membership Query messages received."),
+ V1MembershipReport: mustCreateMetric("/netstack/igmp/packets_received/v1_membership_report", "Number of IGMPv1 Membership Report messages received."),
+ V2MembershipReport: mustCreateMetric("/netstack/igmp/packets_received/v2_membership_report", "Number of IGMPv2 Membership Report messages received."),
+ LeaveGroup: mustCreateMetric("/netstack/igmp/packets_received/leave_group", "Number of IGMP Leave Group messages received."),
},
- Invalid: mustCreateMetric("/netstack/igmp/packets_received/invalid", "Number of IGMP packets received by netstack that could not be parsed."),
+ Invalid: mustCreateMetric("/netstack/igmp/packets_received/invalid", "Number of IGMP packets received that could not be parsed."),
ChecksumErrors: mustCreateMetric("/netstack/igmp/packets_received/checksum_errors", "Number of received IGMP packets with bad checksums."),
- Unrecognized: mustCreateMetric("/netstack/igmp/packets_received/unrecognized", "Number of unrecognized IGMP packets received by netstack."),
+ Unrecognized: mustCreateMetric("/netstack/igmp/packets_received/unrecognized", "Number of unrecognized IGMP packets received."),
},
},
IP: tcpip.IPStats{
@@ -205,7 +223,8 @@ var Metrics = tcpip.Stats{
LinkLocalSource: mustCreateMetric("/netstack/ip/forwarding/link_local_source_address", "Number of IP packets received which could not be forwarded due to a link-local source address."),
LinkLocalDestination: mustCreateMetric("/netstack/ip/forwarding/link_local_destination_address", "Number of IP packets received which could not be forwarded due to a link-local destination address."),
ExtensionHeaderProblem: mustCreateMetric("/netstack/ip/forwarding/extension_header_problem", "Number of IP packets received which could not be forwarded due to a problem processing their IPv6 extension headers."),
- PacketTooBig: mustCreateMetric("/netstack/ip/forwarding/packet_too_big", "Number of IP packets received which could not fit within the outgoing MTU."),
+ PacketTooBig: mustCreateMetric("/netstack/ip/forwarding/packet_too_big", "Number of IP packets received which could not be forwarded because they could not fit within the outgoing MTU."),
+ HostUnreachable: mustCreateMetric("/netstack/ip/forwarding/host_unreachable", "Number of IP packets received which could not be forwarded due to unresolvable next hop."),
Errors: mustCreateMetric("/netstack/ip/forwarding/errors", "Number of IP packets which couldn't be forwarded."),
},
},
@@ -1126,7 +1145,14 @@ func getSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name,
// TODO(b/64800844): Translate fields once they are added to
// tcpip.TCPInfoOption.
- info := linux.TCPInfo{}
+ info := linux.TCPInfo{
+ State: uint8(v.State),
+ RTO: uint32(v.RTO / time.Microsecond),
+ RTT: uint32(v.RTT / time.Microsecond),
+ RTTVar: uint32(v.RTTVar / time.Microsecond),
+ SndSsthresh: v.SndSsthresh,
+ SndCwnd: v.SndCwnd,
+ }
switch v.CcState {
case tcpip.RTORecovery:
info.CaState = linux.TCP_CA_Loss
@@ -1137,11 +1163,6 @@ func getSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name,
case tcpip.Open:
info.CaState = linux.TCP_CA_Open
}
- info.RTO = uint32(v.RTO / time.Microsecond)
- info.RTT = uint32(v.RTT / time.Microsecond)
- info.RTTVar = uint32(v.RTTVar / time.Microsecond)
- info.SndSsthresh = v.SndSsthresh
- info.SndCwnd = v.SndCwnd
// In netstack reorderSeen is updated only when RACK is enabled.
// We only track whether the reordering is seen, which is
diff --git a/pkg/sentry/socket/netstack/tun.go b/pkg/sentry/socket/netstack/tun.go
index 288dd0c9e..c7ed52702 100644
--- a/pkg/sentry/socket/netstack/tun.go
+++ b/pkg/sentry/socket/netstack/tun.go
@@ -40,7 +40,7 @@ func LinuxToTUNFlags(flags uint16) (tun.Flags, error) {
// Linux adds IFF_NOFILTER (the same value as IFF_NO_PI unfortunately)
// when there is no sk_filter. See __tun_chr_ioctl() in
// net/drivers/tun.c.
- if flags&^uint16(linux.IFF_TUN|linux.IFF_TAP|linux.IFF_NO_PI) != 0 {
+ if flags&^uint16(linux.IFF_TUN|linux.IFF_TAP|linux.IFF_NO_PI|linux.IFF_ONE_QUEUE) != 0 {
return tun.Flags{}, syserror.EINVAL
}
return tun.Flags{
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index 9cd238efd..37443ab78 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -1673,9 +1673,11 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error {
if err != nil {
return err
}
+
c := t.Credentials()
hasCap := d.Inode.CheckCapability(t, linux.CAP_CHOWN)
isOwner := uattr.Owner.UID == c.EffectiveKUID
+ var clearPrivilege bool
if uid.Ok() {
kuid := c.UserNamespace.MapToKUID(uid)
// Valid UID must be supplied if UID is to be changed.
@@ -1693,6 +1695,11 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error {
return syserror.EPERM
}
+ // The setuid and setgid bits are cleared during a chown.
+ if uattr.Owner.UID != kuid {
+ clearPrivilege = true
+ }
+
owner.UID = kuid
}
if gid.Ok() {
@@ -1711,6 +1718,11 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error {
return syserror.EPERM
}
+ // The setuid and setgid bits are cleared during a chown.
+ if uattr.Owner.GID != kgid {
+ clearPrivilege = true
+ }
+
owner.GID = kgid
}
@@ -1721,10 +1733,14 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error {
if err := d.Inode.SetOwner(t, d, owner); err != nil {
return err
}
+ // Clear privilege bits if needed and they are set.
+ if clearPrivilege && uattr.Perms.HasSetUIDOrGID() && !fs.IsDir(d.Inode.StableAttr) {
+ uattr.Perms.DropSetUIDAndMaybeGID()
+ if !d.Inode.SetPermissions(t, d, uattr.Perms) {
+ return syserror.EPERM
+ }
+ }
- // When the owner or group are changed by an unprivileged user,
- // chown(2) also clears the set-user-ID and set-group-ID bits, but
- // we do not support them.
return nil
}