diff options
Diffstat (limited to 'runsc')
86 files changed, 2259 insertions, 1273 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index a79afbdc4..ff7a5a44b 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -15,6 +15,7 @@ go_library( "limits.go", "loader.go", "network.go", + "profile.go", "strace.go", "vfs.go", ], @@ -32,6 +33,7 @@ go_library( "//pkg/control/server", "//pkg/coverage", "//pkg/cpuid", + "//pkg/errors/linuxerr", "//pkg/eventchannel", "//pkg/fd", "//pkg/flipcall", @@ -44,6 +46,7 @@ go_library( "//pkg/sentry/arch", "//pkg/sentry/arch:registers_go_proto", "//pkg/sentry/control", + "//pkg/sentry/control:control_go_proto", "//pkg/sentry/devices/memdev", "//pkg/sentry/devices/ttydev", "//pkg/sentry/devices/tundev", @@ -94,11 +97,10 @@ go_library( "//pkg/sentry/vfs", "//pkg/sentry/watchdog", "//pkg/sync", - "//pkg/syserror", "//pkg/tcpip", + "//pkg/tcpip/link/ethernet", "//pkg/tcpip/link/fdbased", "//pkg/tcpip/link/loopback", - "//pkg/tcpip/link/packetsocket", "//pkg/tcpip/link/qdisc/fifo", "//pkg/tcpip/link/sniffer", "//pkg/tcpip/network/arp", diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 9b270cbf2..76e1f596b 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "os" + gtime "time" specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" @@ -25,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" + controlpb "gvisor.dev/gvisor/pkg/sentry/control/control_go_proto" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket/netstack" @@ -40,80 +42,89 @@ import ( ) const ( - // ContainerCheckpoint checkpoints a container. - ContainerCheckpoint = "containerManager.Checkpoint" + // ContMgrCheckpoint checkpoints a container. + ContMgrCheckpoint = "containerManager.Checkpoint" - // ContainerCreate creates a container. - ContainerCreate = "containerManager.Create" + // ContMgrCreateSubcontainer creates a sub-container. + ContMgrCreateSubcontainer = "containerManager.CreateSubcontainer" - // ContainerDestroy is used to stop a non-root container and free all + // ContMgrDestroySubcontainer is used to stop a sub-container and free all // associated resources in the sandbox. - ContainerDestroy = "containerManager.Destroy" + ContMgrDestroySubcontainer = "containerManager.DestroySubcontainer" - // ContainerEvent is the URPC endpoint for getting stats about the - // container used by "runsc events". - ContainerEvent = "containerManager.Event" + // ContMgrEvent gets stats about the container used by "runsc events". + ContMgrEvent = "containerManager.Event" - // ContainerExecuteAsync is the URPC endpoint for executing a command in a - // container. - ContainerExecuteAsync = "containerManager.ExecuteAsync" + // ContMgrExecuteAsync executes a command in a container. + ContMgrExecuteAsync = "containerManager.ExecuteAsync" - // ContainerPause pauses the container. - ContainerPause = "containerManager.Pause" + // ContMgrProcesses lists processes running in a container. + ContMgrProcesses = "containerManager.Processes" - // ContainerProcesses is the URPC endpoint for getting the list of - // processes running in a container. - ContainerProcesses = "containerManager.Processes" + // ContMgrRestore restores a container from a statefile. + ContMgrRestore = "containerManager.Restore" - // ContainerRestore restores a container from a statefile. - ContainerRestore = "containerManager.Restore" + // ContMgrSignal sends a signal to a container. + ContMgrSignal = "containerManager.Signal" - // ContainerResume unpauses the paused container. - ContainerResume = "containerManager.Resume" + // ContMgrStartSubcontainer starts a sub-container inside a running sandbox. + ContMgrStartSubcontainer = "containerManager.StartSubcontainer" - // ContainerSignal is used to send a signal to a container. - ContainerSignal = "containerManager.Signal" + // ContMgrWait waits on the init process of the container and returns its + // ExitStatus. + ContMgrWait = "containerManager.Wait" - // ContainerSignalProcess is used to send a signal to a particular - // process in a container. - ContainerSignalProcess = "containerManager.SignalProcess" + // ContMgrWaitPID waits on a process with a certain PID in the sandbox and + // return its ExitStatus. + ContMgrWaitPID = "containerManager.WaitPID" - // ContainerStart is the URPC endpoint for running a non-root container - // within a sandbox. - ContainerStart = "containerManager.Start" - - // ContainerWait is used to wait on the init process of the container - // and return its ExitStatus. - ContainerWait = "containerManager.Wait" - - // ContainerWaitPID is used to wait on a process with a certain PID in - // the sandbox and return its ExitStatus. - ContainerWaitPID = "containerManager.WaitPID" + // ContMgrRootContainerStart starts a new sandbox with a root container. + ContMgrRootContainerStart = "containerManager.StartRoot" +) - // NetworkCreateLinksAndRoutes is the URPC endpoint for creating links - // and routes in a network stack. +const ( + // NetworkCreateLinksAndRoutes creates links and routes in a network stack. NetworkCreateLinksAndRoutes = "Network.CreateLinksAndRoutes" - // RootContainerStart is the URPC endpoint for starting a new sandbox - // with root container. - RootContainerStart = "containerManager.StartRoot" - - // SandboxStacks collects sandbox stacks for debugging. - SandboxStacks = "debug.Stacks" + // DebugStacks collects sandbox stacks for debugging. + DebugStacks = "debug.Stacks" ) // Profiling related commands (see pprof.go for more details). const ( - CPUProfile = "Profile.CPU" - HeapProfile = "Profile.Heap" - BlockProfile = "Profile.Block" - MutexProfile = "Profile.Mutex" - Trace = "Profile.Trace" + ProfileCPU = "Profile.CPU" + ProfileHeap = "Profile.Heap" + ProfileBlock = "Profile.Block" + ProfileMutex = "Profile.Mutex" + ProfileTrace = "Profile.Trace" ) // Logging related commands (see logging.go for more details). const ( - ChangeLogging = "Logging.Change" + LoggingChange = "Logging.Change" +) + +// Lifecycle related commands (see lifecycle.go for more details). +const ( + LifecyclePause = "Lifecycle.Pause" + LifecycleResume = "Lifecycle.Resume" +) + +// Filesystem related commands (see fs.go for more details). +const ( + FsCat = "Fs.Cat" +) + +// Usage related commands (see usage.go for more details). +const ( + UsageCollect = "Usage.Collect" + UsageUsageFD = "Usage.UsageFD" + UsageReduce = "Usage.Reduce" +) + +// Events related commands (see events.go for more details). +const ( + EventsAttachDebugEmitter = "Events.AttachDebugEmitter" ) // ControlSocketAddr generates an abstract unix socket name for the given ID. @@ -155,18 +166,41 @@ func newController(fd int, l *Loader) (*controller, error) { ctrl.srv.Register(net) } - ctrl.srv.Register(&debug{}) - ctrl.srv.Register(&control.Logging{}) - - if l.root.conf.ProfileEnable { - ctrl.srv.Register(control.NewProfile(l.k)) + if l.root.conf.Controls.Controls != nil { + for _, c := range l.root.conf.Controls.Controls.AllowedControls { + switch c { + case controlpb.ControlConfig_EVENTS: + ctrl.srv.Register(&control.Events{}) + case controlpb.ControlConfig_FS: + ctrl.srv.Register(&control.Fs{Kernel: l.k}) + case controlpb.ControlConfig_LIFECYCLE: + ctrl.srv.Register(&control.Lifecycle{Kernel: l.k}) + case controlpb.ControlConfig_LOGGING: + ctrl.srv.Register(&control.Logging{}) + case controlpb.ControlConfig_PROFILE: + if l.root.conf.ProfileEnable { + ctrl.srv.Register(control.NewProfile(l.k)) + } + case controlpb.ControlConfig_USAGE: + ctrl.srv.Register(&control.Usage{Kernel: l.k}) + case controlpb.ControlConfig_PROC: + ctrl.srv.Register(&control.Proc{Kernel: l.k}) + case controlpb.ControlConfig_STATE: + ctrl.srv.Register(&control.State{Kernel: l.k}) + case controlpb.ControlConfig_DEBUG: + ctrl.srv.Register(&debug{}) + } + } } return ctrl, nil } +// stopRPCTimeout is the time for clients to complete ongoing RPCs. +const stopRPCTimeout = 15 * gtime.Second + func (c *controller) stop() { - c.srv.Stop() + c.srv.Stop(stopRPCTimeout) } // containerManager manages sandbox containers. @@ -210,9 +244,9 @@ type CreateArgs struct { urpc.FilePayload } -// Create creates a container within a sandbox. -func (cm *containerManager) Create(args *CreateArgs, _ *struct{}) error { - log.Debugf("containerManager.Create: %s", args.CID) +// CreateSubcontainer creates a container within a sandbox. +func (cm *containerManager) CreateSubcontainer(args *CreateArgs, _ *struct{}) error { + log.Debugf("containerManager.CreateSubcontainer: %s", args.CID) if len(args.Files) > 1 { return fmt.Errorf("start arguments must have at most 1 files for TTY") @@ -225,7 +259,7 @@ func (cm *containerManager) Create(args *CreateArgs, _ *struct{}) error { return fmt.Errorf("error dup'ing TTY file: %w", err) } } - return cm.l.createContainer(args.CID, tty) + return cm.l.createSubcontainer(args.CID, tty) } // StartArgs contains arguments to the Start method. @@ -245,13 +279,13 @@ type StartArgs struct { urpc.FilePayload } -// Start runs a created container within a sandbox. -func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { +// StartSubcontainer runs a created container within a sandbox. +func (cm *containerManager) StartSubcontainer(args *StartArgs, _ *struct{}) error { // Validate arguments. if args == nil { return errors.New("start missing arguments") } - log.Debugf("containerManager.Start, cid: %s, args: %+v", args.CID, args) + log.Debugf("containerManager.StartSubcontainer, cid: %s, args: %+v", args.CID, args) if args.Spec == nil { return errors.New("start arguments missing spec") } @@ -299,19 +333,19 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { } }() - if err := cm.l.startContainer(args.Spec, args.Conf, args.CID, stdios, goferFDs); err != nil { - log.Debugf("containerManager.Start failed, cid: %s, args: %+v, err: %v", args.CID, args, err) + if err := cm.l.startSubcontainer(args.Spec, args.Conf, args.CID, stdios, goferFDs); err != nil { + log.Debugf("containerManager.StartSubcontainer failed, cid: %s, args: %+v, err: %v", args.CID, args, err) return err } log.Debugf("Container started, cid: %s", args.CID) return nil } -// Destroy stops a container if it is still running and cleans up its -// filesystem. -func (cm *containerManager) Destroy(cid *string, _ *struct{}) error { - log.Debugf("containerManager.destroy, cid: %s", *cid) - return cm.l.destroyContainer(*cid) +// DestroySubcontainer stops a container if it is still running and cleans up +// its filesystem. +func (cm *containerManager) DestroySubcontainer(cid *string, _ *struct{}) error { + log.Debugf("containerManager.DestroySubcontainer, cid: %s", *cid) + return cm.l.destroySubcontainer(*cid) } // ExecuteAsync starts running a command on a created or running sandbox. It @@ -330,6 +364,11 @@ func (cm *containerManager) ExecuteAsync(args *control.ExecArgs, pid *int32) err // Checkpoint pauses a sandbox and saves its state. func (cm *containerManager) Checkpoint(o *control.SaveOpts, _ *struct{}) error { log.Debugf("containerManager.Checkpoint") + // TODO(gvisor.dev/issues/6243): save/restore not supported w/ hostinet + if cm.l.root.conf.Network == config.NetworkHost { + return errors.New("checkpoint not supported when using hostinet") + } + state := control.State{ Kernel: cm.l.k, Watchdog: cm.l.watchdog, @@ -337,13 +376,6 @@ func (cm *containerManager) Checkpoint(o *control.SaveOpts, _ *struct{}) error { return state.Save(o, nil) } -// Pause suspends a container. -func (cm *containerManager) Pause(_, _ *struct{}) error { - log.Debugf("containerManager.Pause") - cm.l.k.Pause() - return nil -} - // RestoreOpts contains options related to restoring a container's file system. type RestoreOpts struct { // FilePayload contains the state file to be restored, followed by the @@ -439,7 +471,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { // Load the state. loadOpts := state.LoadOpts{Source: specFile} - if err := loadOpts.Load(ctx, k, networkStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil { + if err := loadOpts.Load(ctx, k, nil, networkStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil { return err } @@ -475,13 +507,6 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { return nil } -// Resume unpauses a container. -func (cm *containerManager) Resume(_, _ *struct{}) error { - log.Debugf("containerManager.Resume") - cm.l.k.Unpause() - return nil -} - // Wait waits for the init process in the given container. func (cm *containerManager) Wait(cid *string, waitStatus *uint32) error { log.Debugf("containerManager.Wait, cid: %s", *cid) diff --git a/runsc/boot/events.go b/runsc/boot/events.go index 0814b2a69..65137de8a 100644 --- a/runsc/boot/events.go +++ b/runsc/boot/events.go @@ -91,7 +91,7 @@ func (cm *containerManager) Event(_ *struct{}, out *EventOut) error { // Memory usage. // TODO(gvisor.dev/issue/172): Per-container accounting. mem := cm.l.k.MemoryFile() - mem.UpdateUsage() + _ = mem.UpdateUsage() // best effort to update. _, totalUsage := usage.MemoryAccounting.Copy() out.Event.Data.Memory.Usage = MemoryEntry{ Usage: totalUsage, diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 752fea0e1..703f34827 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -459,6 +459,14 @@ func hostInetFilters() seccomp.SyscallRules { seccomp.MatchAny{}, seccomp.EqualTo(unix.TIOCINQ), }, + { + seccomp.MatchAny{}, + seccomp.EqualTo(unix.SIOCGIFFLAGS), + }, + { + seccomp.MatchAny{}, + seccomp.EqualTo(unix.SIOCGIFCONF), + }, }, unix.SYS_LISTEN: {}, unix.SYS_READV: {}, diff --git a/runsc/boot/filter/config_amd64.go b/runsc/boot/filter/config_amd64.go index 42cb8ed3a..8015a0e52 100644 --- a/runsc/boot/filter/config_amd64.go +++ b/runsc/boot/filter/config_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package filter diff --git a/runsc/boot/filter/config_arm64.go b/runsc/boot/filter/config_arm64.go index f162f87ff..9f44379b4 100644 --- a/runsc/boot/filter/config_arm64.go +++ b/runsc/boot/filter/config_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package filter diff --git a/runsc/boot/filter/config_profile.go b/runsc/boot/filter/config_profile.go index 89b66a6da..214bf8b1d 100644 --- a/runsc/boot/filter/config_profile.go +++ b/runsc/boot/filter/config_profile.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package filter import ( diff --git a/runsc/boot/filter/extra_filters.go b/runsc/boot/filter/extra_filters.go index e28d4b8d6..5442add95 100644 --- a/runsc/boot/filter/extra_filters.go +++ b/runsc/boot/filter/extra_filters.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !msan && !race // +build !msan,!race package filter diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go index 41baa78cd..8873f9cf9 100644 --- a/runsc/boot/filter/extra_filters_msan.go +++ b/runsc/boot/filter/extra_filters_msan.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build msan // +build msan package filter diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go index 79b2104f0..046b39014 100644 --- a/runsc/boot/filter/extra_filters_race.go +++ b/runsc/boot/filter/extra_filters_race.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build race // +build race package filter diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index c4590aab1..40cf2a3df 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -25,6 +25,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -41,7 +42,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/specutils" @@ -69,7 +69,7 @@ const ( // tmpfs has some extra supported options that we must pass through. var tmpfsAllowedData = []string{"mode", "uid", "gid"} -func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) { +func addOverlay(ctx context.Context, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) { // Upper layer uses the same flags as lower, but it must be read-write. upperFlags := lowerFlags upperFlags.ReadOnly = false @@ -744,7 +744,7 @@ func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *config.C if useOverlay { log.Debugf("Adding overlay on top of shared mount %q", hint.name) - inode, err = addOverlay(ctx, conf, inode, hint.mount.Type, mf) + inode, err = addOverlay(ctx, inode, hint.mount.Type, mf) if err != nil { return nil, err } @@ -785,7 +785,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Con if conf.Overlay && !c.root.Readonly { log.Debugf("Adding overlay on top of root mount") // Overlay a tmpfs filesystem on top of the root. - rootInode, err = addOverlay(ctx, conf, rootInode, "root-overlay-upper", mf) + rootInode, err = addOverlay(ctx, rootInode, "root-overlay-upper", mf) if err != nil { return nil, err } @@ -901,7 +901,7 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Confi if useOverlay { log.Debugf("Adding overlay on top of mount %q", m.Destination) - inode, err = addOverlay(ctx, conf, inode, m.Type, mf) + inode, err = addOverlay(ctx, inode, m.Type, mf) if err != nil { return err } @@ -1039,8 +1039,8 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *config.Config, mn maxTraversals := uint(0) tmp, err := mns.FindInode(ctx, root, root, "tmp", &maxTraversals) - switch err { - case nil: + switch { + case err == nil: // Found '/tmp' in filesystem, check if it's empty. defer tmp.DecRef(ctx) f, err := tmp.Inode.GetFile(ctx, tmp, fs.FileFlags{Read: true, Directory: true}) @@ -1061,7 +1061,7 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *config.Config, mn log.Infof("Mounting internal tmpfs on top of empty %q", "/tmp") fallthrough - case syserror.ENOENT: + case linuxerr.Equals(linuxerr.ENOENT, err): // No '/tmp' found (or fallthrough from above). Safe to mount internal // tmpfs. tmpMount := specs.Mount{ diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index ad4d50008..b46d84e5a 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -58,6 +58,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/watchdog" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/ethernet" "gvisor.dev/gvisor/pkg/tcpip/link/loopback" "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" "gvisor.dev/gvisor/pkg/tcpip/network/arp" @@ -119,6 +120,10 @@ type Loader struct { // container. It should be called when a sandbox is destroyed. stopSignalForwarding func() + // stopProfiling stops profiling started at container creation. It + // should be called when a sandbox is destroyed. + stopProfiling func() + // restore is set to true if we are restoring a container. restore bool @@ -198,6 +203,21 @@ type Args struct { TotalMem uint64 // UserLogFD is the file descriptor to write user logs to. UserLogFD int + // ProfileBlockFD is the file descriptor to write a block profile to. + // Valid if >=0. + ProfileBlockFD int + // ProfileCPUFD is the file descriptor to write a CPU profile to. + // Valid if >=0. + ProfileCPUFD int + // ProfileHeapFD is the file descriptor to write a heap profile to. + // Valid if >=0. + ProfileHeapFD int + // ProfileMutexFD is the file descriptor to write a mutex profile to. + // Valid if >=0. + ProfileMutexFD int + // TraceFD is the file descriptor to write a Go execution trace to. + // Valid if >=0. + TraceFD int } // make sure stdioFDs are always the same on initial start and on restore @@ -206,6 +226,8 @@ const startingStdioFD = 256 // New initializes a new kernel loader configured by spec. // New also handles setting up a kernel for restoring a container. func New(args Args) (*Loader, error) { + stopProfiling := startProfiling(args) + // We initialize the rand package now to make sure /dev/urandom is pre-opened // on kernels that do not support getrandom(2). if err := rand.Init(); err != nil { @@ -278,19 +300,15 @@ func New(args Args) (*Loader, error) { } // Create timekeeper. - tk, err := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) - if err != nil { - return nil, fmt.Errorf("creating timekeeper: %w", err) - } + tk := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange()) tk.SetClocks(time.NewCalibratedClocks()) - k.SetTimekeeper(tk) if err := enableStrace(args.Conf); err != nil { return nil, fmt.Errorf("enabling strace: %w", err) } // Create root network namespace/stack. - netns, err := newRootNetworkNamespace(args.Conf, k, k) + netns, err := newRootNetworkNamespace(args.Conf, tk, k) if err != nil { return nil, fmt.Errorf("creating network: %w", err) } @@ -332,6 +350,7 @@ func New(args Args) (*Loader, error) { // to createVFS in order to mount (among other things) procfs. if err = k.Init(kernel.InitKernelArgs{ FeatureSet: cpuid.HostFeatureSet(), + Timekeeper: tk, RootUserNamespace: creds.UserNamespace, RootNetworkNamespace: netns, ApplicationCores: uint(args.NumCPU), @@ -402,12 +421,13 @@ func New(args Args) (*Loader, error) { eid := execID{cid: args.ID} l := &Loader{ - k: k, - watchdog: dog, - sandboxID: args.ID, - processes: map[execID]*execProcess{eid: {}}, - mountHints: mountHints, - root: info, + k: k, + watchdog: dog, + sandboxID: args.ID, + processes: map[execID]*execProcess{eid: {}}, + mountHints: mountHints, + root: info, + stopProfiling: stopProfiling, } // We don't care about child signals; some platforms can generate a @@ -500,6 +520,8 @@ func (l *Loader) Destroy() { for _, f := range l.root.goferFDs { _ = f.Close() } + + l.stopProfiling() } func createPlatform(conf *config.Config, deviceFile *os.File) (platform.Platform, error) { @@ -636,8 +658,8 @@ func (l *Loader) run() error { return l.k.Start() } -// createContainer creates a new container inside the sandbox. -func (l *Loader) createContainer(cid string, tty *fd.FD) error { +// createSubcontainer creates a new container inside the sandbox. +func (l *Loader) createSubcontainer(cid string, tty *fd.FD) error { l.mu.Lock() defer l.mu.Unlock() @@ -649,10 +671,10 @@ func (l *Loader) createContainer(cid string, tty *fd.FD) error { return nil } -// startContainer starts a child container. It returns the thread group ID of +// startSubcontainer starts a child container. It returns the thread group ID of // the newly created process. Used FDs are either closed or released. It's safe // for the caller to close any remaining files upon return. -func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid string, stdioFDs, goferFDs []*fd.FD) error { +func (l *Loader) startSubcontainer(spec *specs.Spec, conf *config.Config, cid string, stdioFDs, goferFDs []*fd.FD) error { // Create capabilities. caps, err := specutils.Capabilities(conf.EnableRaw, spec.Process.Capabilities) if err != nil { @@ -718,7 +740,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin return fmt.Errorf("using TTY, stdios not expected: %d", l) } if ep.hostTTY == nil { - return fmt.Errorf("terminal enabled but no TTY provided (--console-socket possibly passed)") + return fmt.Errorf("terminal enabled but no TTY provided. Did you set --console-socket on create?") } info.stdioFDs = []*fd.FD{ep.hostTTY, ep.hostTTY, ep.hostTTY} ep.hostTTY = nil @@ -737,7 +759,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin func (l *Loader) createContainerProcess(root bool, cid string, info *containerInfo) (*kernel.ThreadGroup, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { // Create the FD map, which will set stdin, stdout, and stderr. ctx := info.procArgs.NewContext(l.k) - fdTable, ttyFile, ttyFileVFS2, err := createFDTable(ctx, info.spec.Process.Terminal, info.stdioFDs) + fdTable, ttyFile, ttyFileVFS2, err := createFDTable(ctx, info.spec.Process.Terminal, info.stdioFDs, info.spec.Process.User) if err != nil { return nil, nil, nil, fmt.Errorf("importing fds: %w", err) } @@ -745,8 +767,11 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn // ours either way. info.procArgs.FDTable = fdTable - // Setup the child container file system. - l.startGoferMonitor(cid, info.goferFDs) + // Gofer FDs must be ordered and the first FD is always the rootfs. + if len(info.goferFDs) < 1 { + return nil, nil, nil, fmt.Errorf("rootfs gofer FD not found") + } + l.startGoferMonitor(cid, int32(info.goferFDs[0].FD())) mntr := newContainerMounter(info, l.k, l.mountHints, kernel.VFS2Enabled) if root { @@ -819,17 +844,21 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn } // startGoferMonitor runs a goroutine to monitor gofer's health. It polls on -// the gofer FDs looking for disconnects, and kills the container processes if a -// disconnect occurs in any of the gofer FDs. -func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) { +// the gofer FD looking for disconnects, and kills the container processes if +// the rootfs FD disconnects. +// +// Note that other gofer mounts are allowed to be unmounted and disconnected. +func (l *Loader) startGoferMonitor(cid string, rootfsGoferFD int32) { + if rootfsGoferFD < 0 { + panic(fmt.Sprintf("invalid FD: %d", rootfsGoferFD)) + } go func() { log.Debugf("Monitoring gofer health for container %q", cid) - var events []unix.PollFd - for _, goferFD := range goferFDs { - events = append(events, unix.PollFd{ - Fd: int32(goferFD.FD()), + events := []unix.PollFd{ + { + Fd: rootfsGoferFD, Events: unix.POLLHUP | unix.POLLRDHUP, - }) + }, } _, _, err := specutils.RetryEintr(func() (uintptr, uintptr, error) { // Use ppoll instead of poll because it's already whilelisted in seccomp. @@ -854,9 +883,9 @@ func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) { }() } -// destroyContainer stops a container if it is still running and cleans up its -// filesystem. -func (l *Loader) destroyContainer(cid string) error { +// destroySubcontainer stops a container if it is still running and cleans up +// its filesystem. +func (l *Loader) destroySubcontainer(cid string) error { l.mu.Lock() defer l.mu.Unlock() @@ -983,7 +1012,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { tty: ttyFile, ttyVFS2: ttyFileVFS2, } - log.Debugf("updated processes: %s", l.processes) + log.Debugf("updated processes: %v", l.processes) return tgid, nil } @@ -1004,7 +1033,7 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error { // Check for leaks and write coverage report after the root container has // exited. This guarantees that the report is written in cases where the - // sandbox is killed by a signal after the ContainerWait request is completed. + // sandbox is killed by a signal after the ContMgrWait request is completed. if l.root.procArgs.ContainerID == cid { // All sentry-created resources should have been released at this point. refsvfs2.DoLeakCheck() @@ -1027,7 +1056,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e l.mu.Lock() delete(l.processes, eid) - log.Debugf("updated processes (removal): %s", l.processes) + log.Debugf("updated processes (removal): %v", l.processes) l.mu.Unlock() return nil } @@ -1054,7 +1083,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e // to exit. func (l *Loader) wait(tg *kernel.ThreadGroup) uint32 { tg.WaitExited() - return tg.ExitStatus().Status() + return uint32(tg.ExitStatus()) } // WaitForStartSignal waits for a start signal from the control server. @@ -1063,7 +1092,7 @@ func (l *Loader) WaitForStartSignal() { } // WaitExit waits for the root container to exit, and returns its exit status. -func (l *Loader) WaitExit() kernel.ExitStatus { +func (l *Loader) WaitExit() linux.WaitStatus { // Wait for container. l.k.WaitExited() @@ -1084,23 +1113,24 @@ func newRootNetworkNamespace(conf *config.Config, clock tcpip.Clock, uniqueID st return inet.NewRootNamespace(hostinet.NewStack(), nil), nil case config.NetworkNone, config.NetworkSandbox: - s, err := newEmptySandboxNetworkStack(clock, uniqueID) + s, err := newEmptySandboxNetworkStack(clock, uniqueID, conf.AllowPacketEndpointWrite) if err != nil { return nil, err } creator := &sandboxNetstackCreator{ - clock: clock, - uniqueID: uniqueID, + clock: clock, + uniqueID: uniqueID, + allowPacketEndpointWrite: conf.AllowPacketEndpointWrite, } return inet.NewRootNamespace(s, creator), nil default: - panic(fmt.Sprintf("invalid network configuration: %d", conf.Network)) + panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) } } -func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) { +func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID, allowPacketEndpointWrite bool) (inet.Stack, error) { netProtos := []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol, arp.NewProtocol} transProtos := []stack.TransportProtocolFactory{ tcp.NewProtocol, @@ -1116,9 +1146,10 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in HandleLocal: true, // Enable raw sockets for users with sufficient // privileges. - RawFactory: raw.EndpointFactory{}, - UniqueID: uniqueID, - DefaultIPTables: netfilter.DefaultLinuxTables, + RawFactory: raw.EndpointFactory{}, + AllowPacketEndpointWrite: allowPacketEndpointWrite, + UniqueID: uniqueID, + DefaultIPTables: netfilter.DefaultLinuxTables, })} // Enable SACK Recovery. @@ -1155,13 +1186,14 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in // // +stateify savable type sandboxNetstackCreator struct { - clock tcpip.Clock - uniqueID stack.UniqueID + clock tcpip.Clock + uniqueID stack.UniqueID + allowPacketEndpointWrite bool } // CreateStack implements kernel.NetworkStackCreator.CreateStack. func (f *sandboxNetstackCreator) CreateStack() (inet.Stack, error) { - s, err := newEmptySandboxNetworkStack(f.clock, f.uniqueID) + s, err := newEmptySandboxNetworkStack(f.clock, f.uniqueID, f.allowPacketEndpointWrite) if err != nil { return nil, err } @@ -1170,7 +1202,7 @@ func (f *sandboxNetstackCreator) CreateStack() (inet.Stack, error) { n := &Network{Stack: s.(*netstack.Stack).Stack} nicID := tcpip.NICID(f.uniqueID.UniqueID()) link := DefaultLoopbackLink - linkEP := loopback.New() + linkEP := ethernet.New(loopback.New()) if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil { return nil, err } @@ -1215,7 +1247,7 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e return nil default: - panic(fmt.Sprintf("unknown signal delivery mode %s", mode)) + panic(fmt.Sprintf("unknown signal delivery mode %v", mode)) } } @@ -1340,14 +1372,14 @@ func (l *Loader) ttyFromIDLocked(key execID) (*host.TTYFileOperations, *hostvfs2 return ep.tty, ep.ttyVFS2, nil } -func createFDTable(ctx context.Context, console bool, stdioFDs []*fd.FD) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { +func createFDTable(ctx context.Context, console bool, stdioFDs []*fd.FD, user specs.User) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { if len(stdioFDs) != 3 { return nil, nil, nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs)) } k := kernel.KernelFromContext(ctx) fdTable := k.NewFDTable() - ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, stdioFDs) + ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, auth.KUID(user.UID), auth.KGID(user.GID), stdioFDs) if err != nil { fdTable.DecRef(ctx) return nil, nil, nil, err diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 93c476971..ac6c26d25 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -188,8 +188,8 @@ func doRun(t *testing.T, vfsEnabled bool) { } // Wait for the application to exit. It should succeed. - if status := l.WaitExit(); status.Code != 0 || status.Signo != 0 { - t.Errorf("application exited with status %+v, want 0", status) + if status := l.WaitExit(); !status.Exited() || status.ExitStatus() != 0 { + t.Errorf("application exited with %s, want exit status 0", status) } } @@ -214,7 +214,7 @@ func doStartSignal(t *testing.T, vfsEnabled bool) { // We aren't going to wait on this application, so the control server // needs to be shut down manually. - defer l.ctrl.srv.Stop() + defer l.ctrl.srv.Stop(time.Hour) // Start a goroutine that calls WaitForStartSignal and writes to a // channel when it returns. diff --git a/runsc/boot/network.go b/runsc/boot/network.go index 7e627e4c6..9fb3ebd95 100644 --- a/runsc/boot/network.go +++ b/runsc/boot/network.go @@ -23,9 +23,9 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/ethernet" "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" "gvisor.dev/gvisor/pkg/tcpip/link/loopback" - "gvisor.dev/gvisor/pkg/tcpip/link/packetsocket" "gvisor.dev/gvisor/pkg/tcpip/link/qdisc/fifo" "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" @@ -169,7 +169,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct nicID++ nicids[link.Name] = nicID - linkEP := loopback.New() + linkEP := ethernet.New(loopback.New()) log.Infof("Enabling loopback interface %q with id %d on addresses %+v", link.Name, nicID, link.Addresses) if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil { @@ -209,7 +209,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct linkEP, err := fdbased.New(&fdbased.Options{ FDs: FDs, MTU: uint32(link.MTU), - EthernetHeader: true, + EthernetHeader: mac != "", Address: mac, PacketDispatchMode: fdbased.RecvMMsg, GSOMaxSize: link.GSOMaxSize, @@ -228,9 +228,6 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct linkEP = fifo.New(linkEP, runtime.GOMAXPROCS(0), 1000) } - // Enable support for AF_PACKET sockets to receive outgoing packets. - linkEP = packetsocket.New(linkEP) - log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels) if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil { return err @@ -285,12 +282,15 @@ func (n *Network) createNICWithAddrs(id tcpip.NICID, name string, ep stack.LinkE for _, addr := range addrs { proto, tcpipAddr := ipToAddressAndProto(addr.Address) - ap := tcpip.AddressWithPrefix{ - Address: tcpipAddr, - PrefixLen: addr.PrefixLen, + protocolAddr := tcpip.ProtocolAddress{ + Protocol: proto, + AddressWithPrefix: tcpip.AddressWithPrefix{ + Address: tcpipAddr, + PrefixLen: addr.PrefixLen, + }, } - if err := n.Stack.AddAddressWithPrefix(id, proto, ap); err != nil { - return fmt.Errorf("AddAddress(%v, %v, %v) failed: %v", id, proto, tcpipAddr, err) + if err := n.Stack.AddProtocolAddress(id, protocolAddr, stack.AddressProperties{}); err != nil { + return fmt.Errorf("AddProtocolAddress(%d, %+v, {}) failed: %s", id, protocolAddr, err) } } return nil diff --git a/runsc/boot/pprof/pprof.go b/runsc/boot/pprof/pprof.go index 1ded20dee..36b78ad86 100644 --- a/runsc/boot/pprof/pprof.go +++ b/runsc/boot/pprof/pprof.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + // Package pprof provides a stub to initialize custom profilers. package pprof diff --git a/runsc/boot/profile.go b/runsc/boot/profile.go new file mode 100644 index 000000000..3ecd3e532 --- /dev/null +++ b/runsc/boot/profile.go @@ -0,0 +1,95 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +import ( + "os" + "runtime" + "runtime/pprof" + "runtime/trace" + + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" +) + +// startProfiling initiates profiling as defined by the ProfileConfig, and +// returns a function that should be called to stop profiling. +func startProfiling(args Args) func() { + var onStopProfiling []func() + stopProfiling := func() { + for _, f := range onStopProfiling { + f() + } + } + + if args.ProfileBlockFD >= 0 { + file := os.NewFile(uintptr(args.ProfileBlockFD), "profile-block") + + runtime.SetBlockProfileRate(control.DefaultBlockProfileRate) + onStopProfiling = append(onStopProfiling, func() { + if err := pprof.Lookup("block").WriteTo(file, 0); err != nil { + log.Warningf("Error writing block profile: %v", err) + } + file.Close() + runtime.SetBlockProfileRate(0) + }) + } + + if args.ProfileCPUFD >= 0 { + file := os.NewFile(uintptr(args.ProfileCPUFD), "profile-cpu") + + pprof.StartCPUProfile(file) + onStopProfiling = append(onStopProfiling, func() { + pprof.StopCPUProfile() + file.Close() + }) + } + + if args.ProfileHeapFD >= 0 { + file := os.NewFile(uintptr(args.ProfileHeapFD), "profile-heap") + + onStopProfiling = append(onStopProfiling, func() { + if err := pprof.Lookup("heap").WriteTo(file, 0); err != nil { + log.Warningf("Error writing heap profile: %v", err) + } + file.Close() + }) + } + + if args.ProfileMutexFD >= 0 { + file := os.NewFile(uintptr(args.ProfileMutexFD), "profile-mutex") + + prev := runtime.SetMutexProfileFraction(control.DefaultMutexProfileRate) + onStopProfiling = append(onStopProfiling, func() { + if err := pprof.Lookup("mutex").WriteTo(file, 0); err != nil { + log.Warningf("Error writing mutex profile: %v", err) + } + file.Close() + runtime.SetMutexProfileFraction(prev) + }) + } + + if args.TraceFD >= 0 { + file := os.NewFile(uintptr(args.TraceFD), "trace") + + trace.Start(file) + onStopProfiling = append(onStopProfiling, func() { + trace.Stop() + file.Close() + }) + } + + return stopProfiling +} diff --git a/runsc/boot/strace.go b/runsc/boot/strace.go index c21648a32..cf5be34cd 100644 --- a/runsc/boot/strace.go +++ b/runsc/boot/strace.go @@ -35,9 +35,14 @@ func enableStrace(conf *config.Config) error { } strace.LogMaximumSize = max + sink := strace.SinkTypeLog + if conf.StraceEvent { + sink = strace.SinkTypeEvent + } + if len(conf.StraceSyscalls) == 0 { - strace.EnableAll(strace.SinkTypeLog) + strace.EnableAll(sink) return nil } - return strace.Enable(strings.Split(conf.StraceSyscalls, ","), strace.SinkTypeLog) + return strace.Enable(strings.Split(conf.StraceSyscalls, ","), sink) } diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 52aa33529..346796d9c 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/devices/memdev" @@ -44,7 +45,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/specutils" ) @@ -656,20 +656,20 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config Path: fspath.Parse("/tmp"), } fd, err := c.k.VFS().OpenAt(ctx, creds, &pop, &vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_DIRECTORY}) - switch err { - case nil: + switch { + case err == nil: defer fd.DecRef(ctx) err := fd.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error { if dirent.Name != "." && dirent.Name != ".." { - return syserror.ENOTEMPTY + return linuxerr.ENOTEMPTY } return nil })) - switch err { - case nil: + switch { + case err == nil: log.Infof(`Mounting internal tmpfs on top of empty "/tmp"`) - case syserror.ENOTEMPTY: + case linuxerr.Equals(linuxerr.ENOTEMPTY, err): // If more than "." and ".." is found, skip internal tmpfs to prevent // hiding existing files. log.Infof(`Skipping internal tmpfs mount for "/tmp" because it's not empty`) @@ -679,7 +679,7 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config } fallthrough - case syserror.ENOENT: + case linuxerr.Equals(linuxerr.ENOENT, err): // No '/tmp' found (or fallthrough from above). It's safe to mount internal // tmpfs. tmpMount := specs.Mount{ @@ -692,7 +692,7 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config _, err := c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{mount: &tmpMount}) return err - case syserror.ENOTDIR: + case linuxerr.Equals(linuxerr.ENOTDIR, err): // Not a dir?! Let it be. return nil diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 66a6a0f68..5dbf14376 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -424,10 +424,9 @@ func (c *Cgroup) Uninstall() error { // restores cgroup to the original state. func (c *Cgroup) Join() (func(), error) { // First save the current state so it can be restored. - undo := func() {} paths, err := loadPaths("self") if err != nil { - return undo, err + return nil, err } var undoPaths []string for ctrlr, path := range paths { @@ -438,8 +437,7 @@ func (c *Cgroup) Join() (func(), error) { } } - // Replace empty undo with the real thing before changes are made to cgroups. - undo = func() { + cu := cleanup.Make(func() { for _, path := range undoPaths { log.Debugf("Restoring cgroup %q", path) // Writing the value 0 to a cgroup.procs file causes @@ -449,7 +447,8 @@ func (c *Cgroup) Join() (func(), error) { log.Warningf("Error restoring cgroup %q: %v", path, err) } } - } + }) + defer cu.Clean() // Now join the cgroups. for key, ctrlr := range controllers { @@ -461,10 +460,10 @@ func (c *Cgroup) Join() (func(), error) { if ctrlr.optional() && os.IsNotExist(err) { continue } - return undo, err + return nil, err } } - return undo, nil + return cu.Release(), nil } // CPUQuota returns the CFS CPU quota. diff --git a/runsc/cgroup/cgroup_test.go b/runsc/cgroup/cgroup_test.go index eba40621e..1431b4e8f 100644 --- a/runsc/cgroup/cgroup_test.go +++ b/runsc/cgroup/cgroup_test.go @@ -800,7 +800,7 @@ func TestLoadPaths(t *testing.T) { if err != nil { t.Fatalf("Unexpected error: %v", err) } - } else if !strings.Contains(err.Error(), tc.err) { + } else if err == nil || !strings.Contains(err.Error(), tc.err) { t.Fatalf("Wrong error message, want: *%s*, got: %v", tc.err, err) } for key, vWant := range tc.want { diff --git a/runsc/cli/main.go b/runsc/cli/main.go index 76184cd9c..3556d7665 100644 --- a/runsc/cli/main.go +++ b/runsc/cli/main.go @@ -243,7 +243,7 @@ func Main(version string) { subcmdCode := subcommands.Execute(context.Background(), conf, &ws) // Check for leaks and write coverage report before os.Exit(). refsvfs2.DoLeakCheck() - coverage.Report() + _ = coverage.Report() if subcmdCode == subcommands.ExitSuccess { log.Infof("Exiting with status: %v", ws) if ws.Signaled() { diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index 39c8ff603..c5e32807d 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -36,6 +36,7 @@ go_library( "statefile.go", "symbolize.go", "syscalls.go", + "usage.go", "verity_prepare.go", "wait.go", ], @@ -95,10 +96,10 @@ go_test( "//runsc/config", "//runsc/container", "//runsc/mitigate", - "//runsc/mitigate/mock", "//runsc/specutils", "@com_github_google_go_cmp//cmp:go_default_library", "@com_github_google_go_cmp//cmp/cmpopts:go_default_library", + "@com_github_google_subcommands//:go_default_library", "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", "@com_github_syndtr_gocapability//capability:go_default_library", ], diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index a14249641..e33a7f3cb 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -79,6 +79,26 @@ type Boot struct { // sandbox (e.g. gofer) and sent through this FD. mountsFD int + // profileBlockFD is the file descriptor to write a block profile to. + // Valid if >= 0. + profileBlockFD int + + // profileCPUFD is the file descriptor to write a CPU profile to. + // Valid if >= 0. + profileCPUFD int + + // profileHeapFD is the file descriptor to write a heap profile to. + // Valid if >= 0. + profileHeapFD int + + // profileMutexFD is the file descriptor to write a mutex profile to. + // Valid if >= 0. + profileMutexFD int + + // traceFD is the file descriptor to write a Go execution trace to. + // Valid if >= 0. + traceFD int + // pidns is set if the sandbox is in its own pid namespace. pidns bool @@ -119,6 +139,11 @@ func (b *Boot) SetFlags(f *flag.FlagSet) { f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.") f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup") f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).") + f.IntVar(&b.profileBlockFD, "profile-block-fd", -1, "file descriptor to write block profile to. -1 disables profiling.") + f.IntVar(&b.profileCPUFD, "profile-cpu-fd", -1, "file descriptor to write CPU profile to. -1 disables profiling.") + f.IntVar(&b.profileHeapFD, "profile-heap-fd", -1, "file descriptor to write heap profile to. -1 disables profiling.") + f.IntVar(&b.profileMutexFD, "profile-mutex-fd", -1, "file descriptor to write mutex profile to. -1 disables profiling.") + f.IntVar(&b.traceFD, "trace-fd", -1, "file descriptor to write Go execution trace to. -1 disables tracing.") f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates") } @@ -157,10 +182,8 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // we will read it again after the exec call. This works // because the ReadSpecFromFile function seeks to the beginning // of the file before reading. - if err := callSelfAsNobody(args); err != nil { - Fatalf("%v", err) - } - panic("callSelfAsNobody must never return success") + Fatalf("callSelfAsNobody(%v): %v", args, callSelfAsNobody(args)) + panic("unreachable") } } @@ -199,10 +222,8 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // we will read it again after the exec call. This works // because the ReadSpecFromFile function seeks to the beginning // of the file before reading. - if err := setCapsAndCallSelf(args, caps); err != nil { - Fatalf("%v", err) - } - panic("setCapsAndCallSelf must never return success") + Fatalf("setCapsAndCallSelf(%v, %v): %v", args, caps, setCapsAndCallSelf(args, caps)) + panic("unreachable") } // Read resolved mount list and replace the original one from the spec. @@ -217,16 +238,21 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Create the loader. bootArgs := boot.Args{ - ID: f.Arg(0), - Spec: spec, - Conf: conf, - ControllerFD: b.controllerFD, - Device: os.NewFile(uintptr(b.deviceFD), "platform device"), - GoferFDs: b.ioFDs.GetArray(), - StdioFDs: b.stdioFDs.GetArray(), - NumCPU: b.cpuNum, - TotalMem: b.totalMem, - UserLogFD: b.userLogFD, + ID: f.Arg(0), + Spec: spec, + Conf: conf, + ControllerFD: b.controllerFD, + Device: os.NewFile(uintptr(b.deviceFD), "platform device"), + GoferFDs: b.ioFDs.GetArray(), + StdioFDs: b.stdioFDs.GetArray(), + NumCPU: b.cpuNum, + TotalMem: b.totalMem, + UserLogFD: b.userLogFD, + ProfileBlockFD: b.profileBlockFD, + ProfileCPUFD: b.profileCPUFD, + ProfileHeapFD: b.profileHeapFD, + ProfileMutexFD: b.profileMutexFD, + TraceFD: b.traceFD, } l, err := boot.New(bootArgs) if err != nil { @@ -259,7 +285,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) ws := l.WaitExit() log.Infof("application exiting with %+v", ws) waitStatus := args[1].(*unix.WaitStatus) - *waitStatus = unix.WaitStatus(ws.Status()) + *waitStatus = unix.WaitStatus(ws) l.Destroy() return subcommands.ExitSuccess } diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go index e13a94486..99075d82d 100644 --- a/runsc/cmd/capability_test.go +++ b/runsc/cmd/capability_test.go @@ -122,6 +122,9 @@ func TestCapabilities(t *testing.T) { func TestMain(m *testing.M) { flag.Parse() - specutils.MaybeRunAsRoot() + if err := specutils.MaybeRunAsRoot(); err != nil { + fmt.Fprintf(os.Stderr, "Error running as root: %v", err) + os.Exit(123) + } os.Exit(m.Run()) } diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go index e988247da..1fe9c6435 100644 --- a/runsc/cmd/chroot.go +++ b/runsc/cmd/chroot.go @@ -30,7 +30,7 @@ func mountInChroot(chroot, src, dst, typ string, flags uint32) error { chrootDst := filepath.Join(chroot, dst) log.Infof("Mounting %q at %q", src, chrootDst) - if err := specutils.Mount(src, chrootDst, typ, flags); err != nil { + if err := specutils.SafeSetupAndMount(src, chrootDst, typ, flags, "/proc"); err != nil { return fmt.Errorf("error mounting %q at %q: %v", src, chrootDst, err) } return nil @@ -59,6 +59,23 @@ func pivotRoot(root string) error { return nil } +func copyFile(dst, src string) error { + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + + out, err := os.Create(dst) + if err != nil { + return err + } + defer out.Close() + + _, err = out.ReadFrom(in) + return err +} + // setUpChroot creates an empty directory with runsc mounted at /runsc and proc // mounted at /proc. func setUpChroot(pidns bool) error { @@ -70,14 +87,22 @@ func setUpChroot(pidns bool) error { // Convert all shared mounts into slave to be sure that nothing will be // propagated outside of our namespace. - if err := unix.Mount("", "/", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { + if err := specutils.SafeMount("", "/", "", unix.MS_SLAVE|unix.MS_REC, "", "/proc"); err != nil { return fmt.Errorf("error converting mounts: %v", err) } - if err := unix.Mount("runsc-root", chroot, "tmpfs", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC, ""); err != nil { + if err := specutils.SafeMount("runsc-root", chroot, "tmpfs", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC, "", "/proc"); err != nil { return fmt.Errorf("error mounting tmpfs in choot: %v", err) } + if err := os.Mkdir(filepath.Join(chroot, "etc"), 0755); err != nil { + return fmt.Errorf("error creating /etc in chroot: %v", err) + } + + if err := copyFile(filepath.Join(chroot, "etc/localtime"), "/etc/localtime"); err != nil { + log.Warningf("Failed to copy /etc/localtime: %v. UTC timezone will be used.", err) + } + if pidns { flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY) if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil { @@ -89,7 +114,7 @@ func setUpChroot(pidns bool) error { } } - if err := unix.Mount("", chroot, "", unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_BIND, ""); err != nil { + if err := specutils.SafeMount("", chroot, "", unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_BIND, "", "/proc"); err != nil { return fmt.Errorf("error remounting chroot in read-only: %v", err) } diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go index 6212ffb2e..318753728 100644 --- a/runsc/cmd/debug.go +++ b/runsc/cmd/debug.go @@ -37,9 +37,9 @@ type Debug struct { pid int stacks bool signal int - profileHeap string - profileCPU string profileBlock string + profileCPU string + profileHeap string profileMutex string trace string strace string @@ -48,6 +48,7 @@ type Debug struct { delay time.Duration duration time.Duration ps bool + cat stringSlice } // Name implements subcommands.Command. @@ -69,9 +70,9 @@ func (*Debug) Usage() string { func (d *Debug) SetFlags(f *flag.FlagSet) { f.IntVar(&d.pid, "pid", 0, "sandbox process ID. Container ID is not necessary if this is set") f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log") - f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.") - f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.") f.StringVar(&d.profileBlock, "profile-block", "", "writes block profile to the given file.") + f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.") + f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.") f.StringVar(&d.profileMutex, "profile-mutex", "", "writes mutex profile to the given file.") f.DurationVar(&d.delay, "delay", time.Hour, "amount of time to delay for collecting heap and goroutine profiles.") f.DurationVar(&d.duration, "duration", time.Hour, "amount of time to wait for CPU and trace profiles.") @@ -81,6 +82,7 @@ func (d *Debug) SetFlags(f *flag.FlagSet) { f.StringVar(&d.logLevel, "log-level", "", "The log level to set: warning (0), info (1), or debug (2).") f.StringVar(&d.logPackets, "log-packets", "", "A boolean value to enable or disable packet logging: true or false.") f.BoolVar(&d.ps, "ps", false, "lists processes") + f.Var(&d.cat, "cat", "reads files and print to standard output") } // Execute implements subcommands.Command.Execute. @@ -88,6 +90,13 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) var c *container.Container conf := args[0].(*config.Config) + if conf.ProfileBlock != "" || conf.ProfileCPU != "" || conf.ProfileHeap != "" || conf.ProfileMutex != "" { + return Errorf("global -profile-{block,cpu,heap,mutex} flags have no effect on runsc debug. Pass runsc debug -profile-{block,cpu,heap,mutex} instead") + } + if conf.TraceFile != "" { + return Errorf("global -trace flag has no effect on runsc debug. Pass runsc debug -trace instead") + } + if d.pid == 0 { // No pid, container ID must have been provided. if f.NArg() != 1 { @@ -166,7 +175,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) log.Infof("Enabling strace for syscalls: %s", d.strace) args.SetStrace = true args.EnableStrace = true - args.StraceWhitelist = strings.Split(d.strace, ",") + args.StraceAllowlist = strings.Split(d.strace, ",") } if len(d.logLevel) != 0 { @@ -217,19 +226,19 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Open profiling files. var ( - heapFile *os.File - cpuFile *os.File - traceFile *os.File blockFile *os.File + cpuFile *os.File + heapFile *os.File mutexFile *os.File + traceFile *os.File ) - if d.profileHeap != "" { - f, err := os.OpenFile(d.profileHeap, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if d.profileBlock != "" { + f, err := os.OpenFile(d.profileBlock, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) if err != nil { - return Errorf("error opening heap profile output: %v", err) + return Errorf("error opening blocking profile output: %v", err) } defer f.Close() - heapFile = f + blockFile = f } if d.profileCPU != "" { f, err := os.OpenFile(d.profileCPU, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) @@ -239,20 +248,13 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) defer f.Close() cpuFile = f } - if d.trace != "" { - f, err := os.OpenFile(d.trace, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) - if err != nil { - return Errorf("error opening trace profile output: %v", err) - } - traceFile = f - } - if d.profileBlock != "" { - f, err := os.OpenFile(d.profileBlock, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if d.profileHeap != "" { + f, err := os.OpenFile(d.profileHeap, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) if err != nil { - return Errorf("error opening blocking profile output: %v", err) + return Errorf("error opening heap profile output: %v", err) } defer f.Close() - blockFile = f + heapFile = f } if d.profileMutex != "" { f, err := os.OpenFile(d.profileMutex, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) @@ -262,21 +264,28 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) defer f.Close() mutexFile = f } + if d.trace != "" { + f, err := os.OpenFile(d.trace, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + return Errorf("error opening trace profile output: %v", err) + } + traceFile = f + } // Collect profiles. var ( wg sync.WaitGroup - heapErr error - cpuErr error - traceErr error blockErr error + cpuErr error + heapErr error mutexErr error + traceErr error ) - if heapFile != nil { + if blockFile != nil { wg.Add(1) go func() { defer wg.Done() - heapErr = c.Sandbox.HeapProfile(heapFile, d.delay) + blockErr = c.Sandbox.BlockProfile(blockFile, d.duration) }() } if cpuFile != nil { @@ -286,25 +295,25 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) cpuErr = c.Sandbox.CPUProfile(cpuFile, d.duration) }() } - if traceFile != nil { + if heapFile != nil { wg.Add(1) go func() { defer wg.Done() - traceErr = c.Sandbox.Trace(traceFile, d.duration) + heapErr = c.Sandbox.HeapProfile(heapFile, d.delay) }() } - if blockFile != nil { + if mutexFile != nil { wg.Add(1) go func() { defer wg.Done() - blockErr = c.Sandbox.BlockProfile(blockFile, d.duration) + mutexErr = c.Sandbox.MutexProfile(mutexFile, d.duration) }() } - if mutexFile != nil { + if traceFile != nil { wg.Add(1) go func() { defer wg.Done() - mutexErr = c.Sandbox.MutexProfile(mutexFile, d.duration) + traceErr = c.Sandbox.Trace(traceFile, d.duration) }() } @@ -337,35 +346,41 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Collect all errors. errorCount := 0 - if heapErr != nil { + if blockErr != nil { errorCount++ - log.Infof("error collecting heap profile: %v", heapErr) - os.Remove(heapFile.Name()) + log.Infof("error collecting block profile: %v", blockErr) + os.Remove(blockFile.Name()) } if cpuErr != nil { errorCount++ log.Infof("error collecting cpu profile: %v", cpuErr) os.Remove(cpuFile.Name()) } - if traceErr != nil { - errorCount++ - log.Infof("error collecting trace profile: %v", traceErr) - os.Remove(traceFile.Name()) - } - if blockErr != nil { + if heapErr != nil { errorCount++ - log.Infof("error collecting block profile: %v", blockErr) - os.Remove(blockFile.Name()) + log.Infof("error collecting heap profile: %v", heapErr) + os.Remove(heapFile.Name()) } if mutexErr != nil { errorCount++ log.Infof("error collecting mutex profile: %v", mutexErr) os.Remove(mutexFile.Name()) } + if traceErr != nil { + errorCount++ + log.Infof("error collecting trace profile: %v", traceErr) + os.Remove(traceFile.Name()) + } if errorCount > 0 { return subcommands.ExitFailure } + if d.cat != nil { + if err := c.Cat(d.cat, os.Stdout); err != nil { + return Errorf("Cat failed: %v", err) + } + } + return subcommands.ExitSuccess } diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go index 5485db149..4eb5a96f1 100644 --- a/runsc/cmd/do.go +++ b/runsc/cmd/do.go @@ -130,7 +130,6 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su if conf.Network == config.NetworkNone { addNamespace(spec, specs.LinuxNamespace{Type: specs.NetworkNamespace}) - } else if conf.Rootless { if conf.Network == config.NetworkSandbox { c.notifyUser("*** Warning: sandbox network isn't supported with --rootless, switching to host ***") @@ -225,25 +224,25 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) { args := strings.Split(cmd, " ") cmd := exec.Command(args[0], args[1:]...) if err := cmd.Run(); err != nil { - c.cleanupNet(cid, dev, "", "", "") + c.cleanupNet(cid, "", "", "") return nil, fmt.Errorf("failed to run %q: %v", cmd, err) } } resolvPath, err := makeFile("/etc/resolv.conf", "nameserver 8.8.8.8\n", spec) if err != nil { - c.cleanupNet(cid, dev, "", "", "") + c.cleanupNet(cid, "", "", "") return nil, err } hostnamePath, err := makeFile("/etc/hostname", cid+"\n", spec) if err != nil { - c.cleanupNet(cid, dev, resolvPath, "", "") + c.cleanupNet(cid, resolvPath, "", "") return nil, err } hosts := fmt.Sprintf("127.0.0.1\tlocalhost\n%s\t%s\n", c.ip, cid) hostsPath, err := makeFile("/etc/hosts", hosts, spec) if err != nil { - c.cleanupNet(cid, dev, resolvPath, hostnamePath, "") + c.cleanupNet(cid, resolvPath, hostnamePath, "") return nil, err } @@ -253,7 +252,7 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) { } addNamespace(spec, netns) - return func() { c.cleanupNet(cid, dev, resolvPath, hostnamePath, hostsPath) }, nil + return func() { c.cleanupNet(cid, resolvPath, hostnamePath, hostsPath) }, nil } // cleanupNet tries to cleanup the network setup in setupNet. @@ -263,7 +262,7 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) { // // Unfortunately none of this can be automatically cleaned up on process exit, // we must do so explicitly. -func (c *Do) cleanupNet(cid, dev, resolvPath, hostnamePath, hostsPath string) { +func (c *Do) cleanupNet(cid, resolvPath, hostnamePath, hostsPath string) { _, peer := deviceNames(cid) cmds := []string{ diff --git a/runsc/cmd/error.go b/runsc/cmd/error.go index 3585b5448..96c5c1e8d 100644 --- a/runsc/cmd/error.go +++ b/runsc/cmd/error.go @@ -58,7 +58,7 @@ func Errorf(format string, args ...interface{}) subcommands.ExitStatus { panic(err) } if ErrorLogger != nil { - ErrorLogger.Write(b) + _, _ = ErrorLogger.Write(b) } return subcommands.ExitFailure diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go index 06f00e8e7..08246e543 100644 --- a/runsc/cmd/events.go +++ b/runsc/cmd/events.go @@ -33,6 +33,10 @@ type Events struct { intervalSec int // If true, events will print a single group of stats and exit. stats bool + // If true, events will dump all filtered events to stdout. + stream bool + // filters for streamed events. + filters stringSlice } // Name implements subcommands.Command.Name. @@ -62,6 +66,8 @@ OPTIONS: func (evs *Events) SetFlags(f *flag.FlagSet) { f.IntVar(&evs.intervalSec, "interval", 5, "set the stats collection interval, in seconds") f.BoolVar(&evs.stats, "stats", false, "display the container's stats then exit") + f.BoolVar(&evs.stream, "stream", false, "dump all filtered events to stdout") + f.Var(&evs.filters, "filters", "only display matching events") } // Execute implements subcommands.Command.Execute. @@ -79,6 +85,13 @@ func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interfa Fatalf("loading sandbox: %v", err) } + if evs.stream { + if err := c.Stream(evs.filters, os.Stdout); err != nil { + Fatalf("Stream failed: %v", err) + } + return subcommands.ExitSuccess + } + // Repeatedly get stats from the container. for { // Get the event and print it as JSON. @@ -97,7 +110,9 @@ func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interfa if err != nil { log.Warningf("Error while marshalling event %v: %v", ev.Event, err) } else { - os.Stdout.Write(b) + if _, err := os.Stdout.Write(b); err != nil { + Fatalf("Error writing to stdout: %v", err) + } } // If we're only running once, break. If we're only running diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go index 242d474b8..2139fdf53 100644 --- a/runsc/cmd/exec.go +++ b/runsc/cmd/exec.go @@ -146,12 +146,12 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) if ex.detach { return ex.execChildAndWait(waitStatus) } - return ex.exec(c, e, waitStatus) + return ex.exec(conf, c, e, waitStatus) } -func (ex *Exec) exec(c *container.Container, e *control.ExecArgs, waitStatus *unix.WaitStatus) subcommands.ExitStatus { +func (ex *Exec) exec(conf *config.Config, c *container.Container, e *control.ExecArgs, waitStatus *unix.WaitStatus) subcommands.ExitStatus { // Start the new process and get its pid. - pid, err := c.Execute(e) + pid, err := c.Execute(conf, e) if err != nil { return Errorf("executing processes for container: %v", err) } diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 5ded7b946..2193e9040 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -116,9 +116,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Note: minimal argument handling for the default case to keep it simple. args := os.Args args = append(args, "--apply-caps=false", "--setup-root=false") - if err := setCapsAndCallSelf(args, goferCaps); err != nil { - Fatalf("Unable to apply caps: %v", err) - } + Fatalf("setCapsAndCallSelf(%v, %v): %v", args, goferCaps, setCapsAndCallSelf(args, goferCaps)) panic("unreachable") } @@ -267,7 +265,8 @@ func isReadonlyMount(opts []string) bool { func setupRootFS(spec *specs.Spec, conf *config.Config) error { // Convert all shared mounts into slaves to be sure that nothing will be // propagated outside of our namespace. - if err := unix.Mount("", "/", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { + procPath := "/proc" + if err := specutils.SafeMount("", "/", "", unix.MS_SLAVE|unix.MS_REC, "", procPath); err != nil { Fatalf("error converting mounts: %v", err) } @@ -280,21 +279,34 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error { // We need a directory to construct a new root and we know that // runsc can't start without /proc, so we can use it for this. flags := uintptr(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC) - if err := unix.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil { + if err := specutils.SafeMount("runsc-root", "/proc", "tmpfs", flags, "", procPath); err != nil { Fatalf("error mounting tmpfs: %v", err) } // Prepare tree structure for pivot_root(2). - os.Mkdir("/proc/proc", 0755) - os.Mkdir("/proc/root", 0755) + if err := os.Mkdir("/proc/proc", 0755); err != nil { + Fatalf("error creating /proc/proc: %v", err) + } + if err := os.Mkdir("/proc/root", 0755); err != nil { + Fatalf("error creating /proc/root: %v", err) + } + if err := os.Mkdir("/proc/etc", 0755); err != nil { + Fatalf("error creating /proc/etc: %v", err) + } + // This cannot use SafeMount because there's no available procfs. But we + // know that /proc is an empty tmpfs mount, so this is safe. if err := unix.Mount("runsc-proc", "/proc/proc", "proc", flags|unix.MS_RDONLY, ""); err != nil { Fatalf("error mounting proc: %v", err) } + if err := copyFile("/proc/etc/localtime", "/etc/localtime"); err != nil { + log.Warningf("Failed to copy /etc/localtime: %v. UTC timezone will be used.", err) + } root = "/proc/root" + procPath = "/proc/proc" } // Mount root path followed by submounts. - if err := unix.Mount(spec.Root.Path, root, "bind", unix.MS_BIND|unix.MS_REC, ""); err != nil { + if err := specutils.SafeMount(spec.Root.Path, root, "bind", unix.MS_BIND|unix.MS_REC, "", procPath); err != nil { return fmt.Errorf("mounting root on root (%q) err: %v", root, err) } @@ -302,12 +314,12 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error { if spec.Linux != nil && spec.Linux.RootfsPropagation != "" { flags = specutils.PropOptionsToFlags([]string{spec.Linux.RootfsPropagation}) } - if err := unix.Mount("", root, "", uintptr(flags), ""); err != nil { + if err := specutils.SafeMount("", root, "", uintptr(flags), "", procPath); err != nil { return fmt.Errorf("mounting root (%q) with flags: %#x, err: %v", root, flags, err) } // Replace the current spec, with the clean spec with symlinks resolved. - if err := setupMounts(conf, spec.Mounts, root); err != nil { + if err := setupMounts(conf, spec.Mounts, root, procPath); err != nil { Fatalf("error setting up FS: %v", err) } @@ -329,7 +341,7 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error { // to make it read-only for extra safety. log.Infof("Remounting root as readonly: %q", root) flags := uintptr(unix.MS_BIND | unix.MS_REMOUNT | unix.MS_RDONLY | unix.MS_REC) - if err := unix.Mount(root, root, "bind", flags, ""); err != nil { + if err := specutils.SafeMount(root, root, "bind", flags, "", procPath); err != nil { return fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", root, root, flags, err) } } @@ -345,10 +357,10 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error { return nil } -// setupMounts binds mount all mounts specified in the spec in their correct +// setupMounts bind mounts all mounts specified in the spec in their correct // location inside root. It will resolve relative paths and symlinks. It also // creates directories as needed. -func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error { +func setupMounts(conf *config.Config, mounts []specs.Mount, root, procPath string) error { for _, m := range mounts { if !specutils.Is9PMount(m, conf.VFS2) { continue @@ -366,14 +378,14 @@ func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error { } log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags) - if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil { - return fmt.Errorf("mounting %v: %v", m, err) + if err := specutils.SafeSetupAndMount(m.Source, dst, m.Type, flags, procPath); err != nil { + return fmt.Errorf("mounting %+v: %v", m, err) } // Set propagation options that cannot be set together with other options. flags = specutils.PropOptionsToFlags(m.Options) if flags != 0 { - if err := unix.Mount("", dst, "", uintptr(flags), ""); err != nil { + if err := specutils.SafeMount("", dst, "", uintptr(flags), "", procPath); err != nil { return fmt.Errorf("mount dst: %q, flags: %#x, err: %v", dst, flags, err) } } diff --git a/runsc/cmd/help.go b/runsc/cmd/help.go index cd85dabbb..35545e938 100644 --- a/runsc/cmd/help.go +++ b/runsc/cmd/help.go @@ -58,7 +58,7 @@ func (*Help) Usage() string { } // SetFlags implements subcommands.Command.SetFlags. -func (h *Help) SetFlags(f *flag.FlagSet) {} +func (h *Help) SetFlags(*flag.FlagSet) {} // Execute implements subcommands.Command.Execute. func (h *Help) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { diff --git a/runsc/cmd/install.go b/runsc/cmd/install.go index 2e223e3be..dc9e01d95 100644 --- a/runsc/cmd/install.go +++ b/runsc/cmd/install.go @@ -58,7 +58,7 @@ func (i *Install) SetFlags(fs *flag.FlagSet) { } // Execute implements subcommands.Command.Execute. -func (i *Install) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { +func (i *Install) Execute(_ context.Context, f *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus { // Grab the name and arguments. runtimeArgs := f.Args() @@ -134,7 +134,7 @@ func (u *Uninstall) SetFlags(fs *flag.FlagSet) { } // Execute implements subcommands.Command.Execute. -func (u *Uninstall) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { +func (u *Uninstall) Execute(context.Context, *flag.FlagSet, ...interface{}) subcommands.ExitStatus { log.Printf("Removing runtime %q from %q.", u.Runtime, u.ConfigFile) c, err := readConfig(u.ConfigFile) diff --git a/runsc/cmd/list.go b/runsc/cmd/list.go index 9f9a47bd8..2adfcced7 100644 --- a/runsc/cmd/list.go +++ b/runsc/cmd/list.go @@ -102,7 +102,7 @@ func (l *List) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) c.CreatedAt.Format(time.RFC3339Nano), c.Owner) } - w.Flush() + _ = w.Flush() case "json": // Print just the states. var states []specs.State diff --git a/runsc/cmd/mitigate.go b/runsc/cmd/mitigate.go index f4e65adb8..1aada5968 100644 --- a/runsc/cmd/mitigate.go +++ b/runsc/cmd/mitigate.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "io/ioutil" + "os" "runtime" "github.com/google/subcommands" @@ -29,8 +30,8 @@ import ( const ( // cpuInfo is the path used to parse CPU info. cpuInfo = "/proc/cpuinfo" - // allPossibleCPUs is the path used to enable CPUs. - allPossibleCPUs = "/sys/devices/system/cpu/possible" + // Path to enable/disable SMT. + smtPath = "/sys/devices/system/cpu/smt/control" ) // Mitigate implements subcommands.Command for the "mitigate" command. @@ -39,10 +40,10 @@ type Mitigate struct { dryRun bool // Reverse mitigate by turning on all CPU cores. reverse bool - // Path to file to read to create CPUSet. - path string // Extra data for post mitigate operations. data string + // Control to mitigate/reverse smt. + control machineControl } // Name implements subcommands.command.name. @@ -56,12 +57,12 @@ func (*Mitigate) Synopsis() string { } // Usage implements Usage for cmd.Mitigate. -func (m Mitigate) Usage() string { +func (m *Mitigate) Usage() string { return fmt.Sprintf(`mitigate [flags] -mitigate mitigates a system to the "MDS" vulnerability by implementing a manual shutdown of SMT. The command checks /proc/cpuinfo for cpus having the MDS vulnerability, and if found, shutdown all but one CPU per hyperthread pair via /sys/devices/system/cpu/cpu{N}/online. CPUs can be restored by writing "2" to each file in /sys/devices/system/cpu/cpu{N}/online or performing a system reboot. +mitigate mitigates a system to the "MDS" vulnerability by writing "off" to %q. CPUs can be restored by writing "on" to the same file or rebooting your system. -The command can be reversed with --reverse, which reads the total CPUs from /sys/devices/system/cpu/possible and enables all with /sys/devices/system/cpu/cpu{N}/online.%s`, m.usage()) +The command can be reversed with --reverse, which writes "on" to the file above.%s`, smtPath, m.usage()) } // SetFlags sets flags for the command Mitigate. @@ -74,104 +75,110 @@ func (m *Mitigate) SetFlags(f *flag.FlagSet) { // Execute implements subcommands.Command.Execute. func (m *Mitigate) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { if runtime.GOARCH == "arm64" || runtime.GOARCH == "arm" { - log.Warningf("As ARM is not affected by MDS, mitigate does not support") - return subcommands.ExitFailure + log.Warningf("As ARM is not affected by MDS, mitigate does not support ARM machines.") + // Set reverse flag so that we still perform post mitigate operations. mitigate reverse is a noop in this case. + m.reverse = true } if f.NArg() != 0 { f.Usage() return subcommands.ExitUsageError } + m.control = &machineControlImpl{} + return m.execute() +} - m.path = cpuInfo - if m.reverse { - m.path = allPossibleCPUs +// execute executes mitigate operations. Seperate from Execute method for +// easier mocking. +func (m *Mitigate) execute() subcommands.ExitStatus { + beforeSet, err := m.control.getCPUs() + if err != nil { + return Errorf("Get before CPUSet failed: %v", err) } + log.Infof("CPUs before: %s", beforeSet.String()) - set, err := m.doExecute() - if err != nil { - return Errorf("Execute failed: %v", err) + if err := m.doEnableDisable(beforeSet); err != nil { + return Errorf("Enabled/Disable action failed on %q: %v", smtPath, err) } - if m.data == "" { - return subcommands.ExitSuccess + afterSet, err := m.control.getCPUs() + if err != nil { + return Errorf("Get after CPUSet failed: %v", err) } + log.Infof("CPUs after: %s", afterSet.String()) - if err = m.postMitigate(set); err != nil { + if err = m.postMitigate(afterSet); err != nil { return Errorf("Post Mitigate failed: %v", err) } return subcommands.ExitSuccess } -// Execute executes the Mitigate command. -func (m *Mitigate) doExecute() (mitigate.CPUSet, error) { - if m.dryRun { - log.Infof("Running with DryRun. No cpu settings will be changed.") - } - data, err := ioutil.ReadFile(m.path) - if err != nil { - return nil, fmt.Errorf("failed to read %s: %w", m.path, err) - } +// doEnableDisable does either enable or disable operation based on flags. +func (m *Mitigate) doEnableDisable(set mitigate.CPUSet) error { if m.reverse { - set, err := m.doReverse(data) - if err != nil { - return nil, fmt.Errorf("reverse operation failed: %w", err) + if m.dryRun { + log.Infof("Skipping reverse action because dryrun is set.") + return nil } - return set, nil + return m.control.enable() } - set, err := m.doMitigate(data) - if err != nil { - return nil, fmt.Errorf("mitigate operation failed: %w", err) + if m.dryRun { + log.Infof("Skipping mitigate action because dryrun is set.") + return nil } - return set, nil + if set.IsVulnerable() { + return m.control.disable() + } + log.Infof("CPUs not vulnerable. Skipping disable call.") + return nil } -func (m *Mitigate) doMitigate(data []byte) (mitigate.CPUSet, error) { - set, err := mitigate.NewCPUSet(data) - if err != nil { - return nil, err - } +// Interface to wrap interactions with underlying machine. Done +// so testing with mocks can be done hermetically. +type machineControl interface { + enable() error + disable() error + isEnabled() (bool, error) + getCPUs() (mitigate.CPUSet, error) +} - log.Infof("Mitigate found the following CPUs...") - log.Infof("%s", set) +// Implementation of SMT control interaction with the underlying machine. +type machineControlImpl struct{} - disableList := set.GetShutdownList() - log.Infof("Disabling threads on thread pairs.") - for _, t := range disableList { - log.Infof("Disable thread: %s", t) - if m.dryRun { - continue - } - if err := t.Disable(); err != nil { - return nil, fmt.Errorf("error disabling thread: %s err: %w", t, err) - } - } - log.Infof("Shutdown successful.") - return set, nil +func (*machineControlImpl) enable() error { + return checkFileExistsOnWrite("enable", "on") } -func (m *Mitigate) doReverse(data []byte) (mitigate.CPUSet, error) { - set, err := mitigate.NewCPUSetFromPossible(data) - if err != nil { - return nil, err - } +func (*machineControlImpl) disable() error { + return checkFileExistsOnWrite("disable", "off") +} - log.Infof("Reverse mitigate found the following CPUs...") - log.Infof("%s", set) +// Writes data to SMT control. If file not found, logs file not exist error and returns nil +// error, which is done because machines without the file pointed to by smtPath only have one +// thread per core in the first place. Otherwise returns error from ioutil.WriteFile. +func checkFileExistsOnWrite(op, data string) error { + err := ioutil.WriteFile(smtPath, []byte(data), 0644) + if err != nil && os.IsExist(err) { + log.Infof("File %q does not exist for operation %s. This machine probably has no smt control.", smtPath, op) + return nil + } + return err +} - enableList := set.GetRemainingList() +func (*machineControlImpl) isEnabled() (bool, error) { + data, err := ioutil.ReadFile(cpuInfo) + return string(data) == "on", err +} - log.Infof("Enabling all CPUs...") - for _, t := range enableList { - log.Infof("Enabling thread: %s", t) - if m.dryRun { - continue - } - if err := t.Enable(); err != nil { - return nil, fmt.Errorf("error enabling thread: %s err: %w", t, err) - } +func (*machineControlImpl) getCPUs() (mitigate.CPUSet, error) { + data, err := ioutil.ReadFile(cpuInfo) + if err != nil { + return nil, fmt.Errorf("failed to read %s: %w", cpuInfo, err) + } + set, err := mitigate.NewCPUSet(string(data)) + if err != nil { + return nil, fmt.Errorf("getCPUs: %v", err) } - log.Infof("Enable successful.") return set, nil } diff --git a/runsc/cmd/mitigate_extras.go b/runsc/cmd/mitigate_extras.go index 2cb2833f0..2c3e17cd6 100644 --- a/runsc/cmd/mitigate_extras.go +++ b/runsc/cmd/mitigate_extras.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package cmd import ( diff --git a/runsc/cmd/mitigate_test.go b/runsc/cmd/mitigate_test.go index 2d3fef7c1..294fc645c 100644 --- a/runsc/cmd/mitigate_test.go +++ b/runsc/cmd/mitigate_test.go @@ -12,153 +12,139 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package cmd import ( - "fmt" - "io/ioutil" - "os" - "strings" "testing" - "gvisor.dev/gvisor/runsc/mitigate/mock" + "github.com/google/subcommands" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/mitigate" ) -type executeTestCase struct { - name string - mitigateData string - mitigateError error - mitigateCPU int - reverseData string - reverseError error - reverseCPU int +type mockMachineControl struct { + enabled bool + cpus mitigate.CPUSet } -func TestExecute(t *testing.T) { +func (m *mockMachineControl) enable() error { + m.enabled = true + return nil +} - partial := `processor : 1 -vendor_id : AuthenticAMD -cpu family : 23 -model : 49 -model name : AMD EPYC 7B12 -physical id : 0 -bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass -power management: -` +func (m *mockMachineControl) disable() error { + if m.cpus.IsVulnerable() { + m.enabled = false + } + return nil +} +func (m *mockMachineControl) isEnabled() (bool, error) { + return m.enabled, nil +} + +func (m *mockMachineControl) getCPUs() (mitigate.CPUSet, error) { + set := m.cpus + if !m.enabled { + set = m.cpus[:len(m.cpus)/2] + } + + // Instead of just returning the created CPU set stored in this struct, call + // NewCPUSet to exercise that code path as the machineControlImpl would. + return mitigate.NewCPUSet(set.String()) +} + +type executeTestCase struct { + name string + cpu mitigate.MockCPU + mitigateWantCPUs int + mitigateError subcommands.ExitStatus + mitigateWantEnabled bool + reverseWantCPUs int + reverseError subcommands.ExitStatus + reverseWantEnabled bool + dryrun bool +} + +func TestExecute(t *testing.T) { for _, tc := range []executeTestCase{ { - name: "CascadeLake4", - mitigateData: mock.CascadeLake4.MakeCPUString(), - mitigateCPU: 2, - reverseData: mock.CascadeLake4.MakeSysPossibleString(), - reverseCPU: 4, + name: "CascadeLake4", + cpu: mitigate.CascadeLake4, + mitigateWantCPUs: 2, + mitigateWantEnabled: false, + reverseWantCPUs: 4, + reverseWantEnabled: true, }, { - name: "Empty", - mitigateData: "", - mitigateError: fmt.Errorf(`mitigate operation failed: no cpus found for: ""`), - reverseData: "", - reverseError: fmt.Errorf(`reverse operation failed: mismatch regex from possible: ""`), + name: "CascadeLake4DryRun", + cpu: mitigate.CascadeLake4, + mitigateWantCPUs: 4, + mitigateWantEnabled: true, + reverseWantCPUs: 4, + reverseWantEnabled: true, + dryrun: true, }, { - name: "Partial", - mitigateData: `processor : 0 -vendor_id : AuthenticAMD -cpu family : 23 -model : 49 -model name : AMD EPYC 7B12 -physical id : 0 -core id : 0 -cpu cores : 1 -bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass -power management::84 - -` + partial, - mitigateError: fmt.Errorf(`mitigate operation failed: failed to match key "core id": %q`, partial), - reverseData: "1-", - reverseError: fmt.Errorf(`reverse operation failed: mismatch regex from possible: %q`, "1-"), + name: "AMD8", + cpu: mitigate.AMD8, + mitigateWantCPUs: 8, + mitigateWantEnabled: true, + reverseWantCPUs: 8, + reverseWantEnabled: true, + }, + { + name: "Empty", + cpu: mitigate.Empty, + mitigateError: Errorf(`mitigate operation failed: no cpus found for: ""`), + reverseError: Errorf(`mitigate operation failed: no cpus found for: ""`), }, } { t.Run(tc.name, func(t *testing.T) { + set := tc.cpu.MakeCPUSet() m := &Mitigate{ - dryRun: true, + control: &mockMachineControl{ + enabled: true, + cpus: set, + }, + dryRun: tc.dryrun, } - m.doExecuteTest(t, "Mitigate", tc.mitigateData, tc.mitigateCPU, tc.mitigateError) + t.Run("Mitigate", func(t *testing.T) { + m.doExecuteTest(t, tc.mitigateWantEnabled, tc.mitigateWantCPUs, tc.mitigateError) + }) m.reverse = true - m.doExecuteTest(t, "Reverse", tc.reverseData, tc.reverseCPU, tc.reverseError) + t.Run("Reverse", func(t *testing.T) { + m.doExecuteTest(t, tc.reverseWantEnabled, tc.reverseWantCPUs, tc.reverseError) + }) }) } } -func TestExecuteSmoke(t *testing.T) { - smokeMitigate, err := ioutil.ReadFile(cpuInfo) - if err != nil { - t.Fatalf("Failed to read %s: %v", cpuInfo, err) +// doExecuteTest runs Execute with the mitigate operation and reverse operation. +func (m *Mitigate) doExecuteTest(t *testing.T, wantEnabled bool, wantCPUs int, wantErr subcommands.ExitStatus) { + subError := m.execute() + if subError != wantErr { + t.Fatalf("Mitigate error mismatch: want: %v got: %v", wantErr, subError) } - m := &Mitigate{ - dryRun: true, + // case where test should end in error or we don't care + // about how many cpus are returned. + if wantErr != subcommands.ExitSuccess { + log.Infof("return") + return } - m.doExecuteTest(t, "Mitigate", string(smokeMitigate), 0, nil) - - smokeReverse, err := ioutil.ReadFile(allPossibleCPUs) - if err != nil { - t.Fatalf("Failed to read %s: %v", allPossibleCPUs, err) + gotEnabled, _ := m.control.isEnabled() + if wantEnabled != gotEnabled { + t.Fatalf("Incorrect enabled state: want: %t got: %t", wantEnabled, gotEnabled) } - m.reverse = true - m.doExecuteTest(t, "Reverse", string(smokeReverse), 0, nil) -} - -// doExecuteTest runs Execute with the mitigate operation and reverse operation. -func (m *Mitigate) doExecuteTest(t *testing.T, name, data string, want int, wantErr error) { - t.Run(name, func(t *testing.T) { - file, err := ioutil.TempFile("", "outfile.txt") - if err != nil { - t.Fatalf("Failed to create tmpfile: %v", err) - } - defer os.Remove(file.Name()) - - if _, err := file.WriteString(data); err != nil { - t.Fatalf("Failed to write to file: %v", err) - } - - // Set fields for mitigate and dryrun to keep test hermetic. - m.path = file.Name() - - set, err := m.doExecute() - if err = checkErr(wantErr, err); err != nil { - t.Fatalf("Mitigate error mismatch: %v", err) - } - - // case where test should end in error or we don't care - // about how many cpus are returned. - if wantErr != nil || want < 1 { - return - } - got := len(set.GetRemainingList()) - if want != got { - t.Fatalf("Failed wrong number of remaining CPUs: want %d, got %d", want, got) - } - - }) -} - -// checkErr checks error for equality. -func checkErr(want, got error) error { - switch { - case want == nil && got == nil: - case want != nil && got == nil: - fallthrough - case want == nil && got != nil: - fallthrough - case want.Error() != strings.Trim(got.Error(), " "): - return fmt.Errorf("got: %v want: %v", got, want) + gotCPUs, _ := m.control.getCPUs() + if len(gotCPUs) != wantCPUs { + t.Fatalf("Incorrect number of CPUs: want: %d got: %d", wantCPUs, len(gotCPUs)) } - return nil } diff --git a/runsc/cmd/pause.go b/runsc/cmd/pause.go index 15ef7b577..9768f1cfb 100644 --- a/runsc/cmd/pause.go +++ b/runsc/cmd/pause.go @@ -42,7 +42,7 @@ func (*Pause) Usage() string { } // SetFlags implements subcommands.Command.SetFlags. -func (*Pause) SetFlags(f *flag.FlagSet) { +func (*Pause) SetFlags(*flag.FlagSet) { } // Execute implements subcommands.Command.Execute. diff --git a/runsc/cmd/resume.go b/runsc/cmd/resume.go index 856469252..d62e89e80 100644 --- a/runsc/cmd/resume.go +++ b/runsc/cmd/resume.go @@ -43,7 +43,7 @@ func (*Resume) Usage() string { } // SetFlags implements subcommands.Command.SetFlags. -func (r *Resume) SetFlags(f *flag.FlagSet) { +func (r *Resume) SetFlags(*flag.FlagSet) { } // Execute implements subcommands.Command.Execute. diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go index 722181aff..da11c9d06 100644 --- a/runsc/cmd/run.go +++ b/runsc/cmd/run.go @@ -68,7 +68,14 @@ func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s waitStatus := args[1].(*unix.WaitStatus) if conf.Rootless { - return Errorf("Rootless mode not supported with %q", r.Name()) + if conf.Network == config.NetworkSandbox { + return Errorf("sandbox network isn't supported with --rootless, use --network=none or --network=host") + } + + if err := specutils.MaybeRunAsRoot(); err != nil { + return Errorf("Error executing inside namespace: %v", err) + } + // Execution will continue here if no more capabilities are needed... } bundleDir := r.bundleDir diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go index 964a65064..7c395d722 100644 --- a/runsc/cmd/start.go +++ b/runsc/cmd/start.go @@ -43,7 +43,7 @@ func (*Start) Usage() string { } // SetFlags implements subcommands.Command.SetFlags. -func (*Start) SetFlags(f *flag.FlagSet) {} +func (*Start) SetFlags(*flag.FlagSet) {} // Execute implements subcommands.Command.Execute. func (*Start) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { diff --git a/runsc/cmd/state.go b/runsc/cmd/state.go index 1f7913d5a..061003bab 100644 --- a/runsc/cmd/state.go +++ b/runsc/cmd/state.go @@ -45,7 +45,7 @@ func (*State) Usage() string { } // SetFlags implements subcommands.Command.SetFlags. -func (*State) SetFlags(f *flag.FlagSet) {} +func (*State) SetFlags(*flag.FlagSet) {} // Execute implements subcommands.Command.Execute. func (*State) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { @@ -71,6 +71,8 @@ func (*State) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s if err != nil { Fatalf("marshaling container state: %v", err) } - os.Stdout.Write(b) + if _, err := os.Stdout.Write(b); err != nil { + Fatalf("Error writing to stdout: %v", err) + } return subcommands.ExitSuccess } diff --git a/runsc/cmd/syscalls.go b/runsc/cmd/syscalls.go index a8c83d662..608be9bb4 100644 --- a/runsc/cmd/syscalls.go +++ b/runsc/cmd/syscalls.go @@ -103,7 +103,7 @@ func (s *Syscalls) SetFlags(f *flag.FlagSet) { } // Execute implements subcommands.Command.Execute. -func (s *Syscalls) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { +func (s *Syscalls) Execute(context.Context, *flag.FlagSet, ...interface{}) subcommands.ExitStatus { out, ok := outputMap[s.format] if !ok { Fatalf("Unsupported output format %q", s.format) diff --git a/runsc/cmd/usage.go b/runsc/cmd/usage.go new file mode 100644 index 000000000..d2aeafa28 --- /dev/null +++ b/runsc/cmd/usage.go @@ -0,0 +1,93 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "os" + + "github.com/google/subcommands" + "gvisor.dev/gvisor/runsc/config" + "gvisor.dev/gvisor/runsc/container" + "gvisor.dev/gvisor/runsc/flag" +) + +// Usage implements subcommands.Command for the "usage" command. +type Usage struct { + full bool + fd bool +} + +// Name implements subcommands.Command.Name. +func (*Usage) Name() string { + return "usage" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Usage) Synopsis() string { + return "Usage shows application memory usage across various categories in bytes." +} + +// Usage implements subcommands.Command.Usage. +func (*Usage) Usage() string { + return `usage [flags] <container id> - print memory usages to standard output.` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (u *Usage) SetFlags(f *flag.FlagSet) { + f.BoolVar(&u.full, "full", false, "enumerate all usage by categories") + f.BoolVar(&u.fd, "fd", false, "retrieves a subset of usage through the established usage FD") +} + +// Execute implements subcommands.Command.Execute. +func (u *Usage) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() < 1 { + f.Usage() + return subcommands.ExitUsageError + } + + id := f.Arg(0) + conf := args[0].(*config.Config) + + cont, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) + if err != nil { + Fatalf("loading container: %v", err) + } + + if !u.fd { + m, err := cont.Usage(u.full) + if err != nil { + Fatalf("usage failed: %v", err) + } + if err := json.NewEncoder(os.Stdout).Encode(m); err != nil { + Fatalf("Encode MemoryUsage failed: %v", err) + } + } else { + m, err := cont.UsageFD() + if err != nil { + Fatalf("usagefd failed: %v", err) + } + + mapped, unknown, total, err := m.Fetch() + if err != nil { + Fatalf("Fetch memory usage failed: %v", err) + } + + fmt.Printf("Mapped %v, Unknown %v, Total %v\n", mapped, unknown, total) + } + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/verity_prepare.go b/runsc/cmd/verity_prepare.go index 66128b2a3..44c1d05db 100644 --- a/runsc/cmd/verity_prepare.go +++ b/runsc/cmd/verity_prepare.go @@ -82,18 +82,23 @@ func (c *VerityPrepare) Execute(_ context.Context, f *flag.FlagSet, args ...inte }, Process: &specs.Process{ Cwd: absRoot, - Args: []string{c.tool, "--path", "/verityroot"}, + Args: []string{c.tool, "--path", "/verityroot", "--rawpath", "/rawroot"}, Env: os.Environ(), Capabilities: specutils.AllCapabilities(), }, Hostname: hostname, Mounts: []specs.Mount{ - specs.Mount{ + { Source: c.dir, Destination: "/verityroot", Type: "bind", Options: []string{"verity.roothash="}, }, + { + Source: c.dir, + Destination: "/rawroot", + Type: "bind", + }, }, } diff --git a/runsc/config/BUILD b/runsc/config/BUILD index b1672bb9d..64295d283 100644 --- a/runsc/config/BUILD +++ b/runsc/config/BUILD @@ -11,6 +11,7 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/refs", + "//pkg/sentry/control:control_go_proto", "//pkg/sentry/watchdog", "//pkg/sync", "//runsc/flag", @@ -24,5 +25,8 @@ go_test( "config_test.go", ], library = ":config", - deps = ["//runsc/flag"], + deps = [ + "//pkg/sentry/control:control_go_proto", + "//runsc/flag", + ], ) diff --git a/runsc/config/config.go b/runsc/config/config.go index 3d8c7a0ab..a562f7bf4 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -19,8 +19,10 @@ package config import ( "fmt" + "strings" "gvisor.dev/gvisor/pkg/refs" + controlpb "gvisor.dev/gvisor/pkg/sentry/control/control_go_proto" "gvisor.dev/gvisor/pkg/sentry/watchdog" ) @@ -84,6 +86,9 @@ type Config struct { // capabilities. EnableRaw bool `flag:"net-raw"` + // AllowPacketEndpointWrite enables write operations on packet endpoints. + AllowPacketEndpointWrite bool `flag:"TESTONLY-allow-packet-endpoint-write"` + // HardwareGSO indicates that hardware segmentation offload is enabled. HardwareGSO bool `flag:"gso"` @@ -117,6 +122,10 @@ type Config struct { // StraceLogSize is the max size of data blobs to display. StraceLogSize uint `flag:"strace-log-size"` + // StraceEvent indicates sending strace to events if true. Strace is + // sent to log if false. + StraceEvent bool `flag:"strace-event"` + // DisableSeccomp indicates whether seccomp syscall filters should be // disabled. Pardon the double negation, but default to enabled is important. DisableSeccomp bool @@ -131,6 +140,29 @@ type Config struct { // ProfileEnable is set to prepare the sandbox to be profiled. ProfileEnable bool `flag:"profile"` + // ProfileBlock collects a block profile to the passed file for the + // duration of the container execution. Requires ProfileEnabled. + ProfileBlock string `flag:"profile-block"` + + // ProfileCPU collects a CPU profile to the passed file for the + // duration of the container execution. Requires ProfileEnabled. + ProfileCPU string `flag:"profile-cpu"` + + // ProfileHeap collects a heap profile to the passed file for the + // duration of the container execution. Requires ProfileEnabled. + ProfileHeap string `flag:"profile-heap"` + + // ProfileMutex collects a mutex profile to the passed file for the + // duration of the container execution. Requires ProfileEnabled. + ProfileMutex string `flag:"profile-mutex"` + + // TraceFile collects a Go runtime execution trace to the passed file + // for the duration of the container execution. + TraceFile string `flag:"trace"` + + // Controls defines the controls that may be enabled. + Controls controlConfig `flag:"controls"` + // RestoreFile is the path to the saved container image RestoreFile string @@ -142,7 +174,8 @@ type Config struct { // Rootless allows the sandbox to be started with a user that is not root. // Defense in depth measures are weaker in rootless mode. Specifically, the // sandbox and Gofer process run as root inside a user namespace with root - // mapped to the caller's user. + // mapped to the caller's user. When using rootless, the container root path + // should not have a symlink. Rootless bool `flag:"rootless"` // AlsoLogToStderr allows to send log messages to stderr. @@ -175,7 +208,8 @@ type Config struct { // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in // tests. It allows runsc to start the sandbox process as the current // user, and without chrooting the sandbox process. This can be - // necessary in test environments that have limited capabilities. + // necessary in test environments that have limited capabilities. When + // disabling chroot, the container root path should not have a symlink. TestOnlyAllowRunAsCurrentUserWithoutChroot bool `flag:"TESTONLY-unsafe-nonroot"` // TestOnlyTestNameEnv should only be used in tests. It looks up for the @@ -193,6 +227,21 @@ func (c *Config) validate() error { if c.NumNetworkChannels <= 0 { return fmt.Errorf("num_network_channels must be > 0, got: %d", c.NumNetworkChannels) } + // Require profile flags to explicitly opt-in to profiling with + // -profile rather than implying it since these options have security + // implications. + if c.ProfileBlock != "" && !c.ProfileEnable { + return fmt.Errorf("profile-block flag requires enabling profiling with profile flag") + } + if c.ProfileCPU != "" && !c.ProfileEnable { + return fmt.Errorf("profile-cpu flag requires enabling profiling with profile flag") + } + if c.ProfileHeap != "" && !c.ProfileEnable { + return fmt.Errorf("profile-heap flag requires enabling profiling with profile flag") + } + if c.ProfileMutex != "" && !c.ProfileEnable { + return fmt.Errorf("profile-mutex flag requires enabling profiling with profile flag") + } return nil } @@ -345,6 +394,96 @@ func (q QueueingDiscipline) String() string { panic(fmt.Sprintf("Invalid qdisc %d", q)) } +// controlConfig represents control endpoints. +type controlConfig struct { + Controls *controlpb.ControlConfig +} + +// Set implements flag.Value. +func (c *controlConfig) Set(v string) error { + controls := strings.Split(v, ",") + var controlList []controlpb.ControlConfig_Endpoint + for _, control := range controls { + switch control { + case "EVENTS": + controlList = append(controlList, controlpb.ControlConfig_EVENTS) + case "FS": + controlList = append(controlList, controlpb.ControlConfig_FS) + case "LIFECYCLE": + controlList = append(controlList, controlpb.ControlConfig_LIFECYCLE) + case "LOGGING": + controlList = append(controlList, controlpb.ControlConfig_LOGGING) + case "PROFILE": + controlList = append(controlList, controlpb.ControlConfig_PROFILE) + case "USAGE": + controlList = append(controlList, controlpb.ControlConfig_USAGE) + case "PROC": + controlList = append(controlList, controlpb.ControlConfig_PROC) + case "STATE": + controlList = append(controlList, controlpb.ControlConfig_STATE) + case "DEBUG": + controlList = append(controlList, controlpb.ControlConfig_DEBUG) + default: + return fmt.Errorf("invalid control %q", control) + } + } + c.Controls.AllowedControls = controlList + return nil +} + +// Get implements flag.Value. +func (c *controlConfig) Get() interface{} { + return *c +} + +// String implements flag.Value. +func (c *controlConfig) String() string { + v := "" + for _, control := range c.Controls.GetAllowedControls() { + if len(v) > 0 { + v += "," + } + switch control { + case controlpb.ControlConfig_EVENTS: + v += "EVENTS" + case controlpb.ControlConfig_FS: + v += "FS" + case controlpb.ControlConfig_LIFECYCLE: + v += "LIFECYCLE" + case controlpb.ControlConfig_LOGGING: + v += "LOGGING" + case controlpb.ControlConfig_PROFILE: + v += "PROFILE" + case controlpb.ControlConfig_USAGE: + v += "USAGE" + case controlpb.ControlConfig_PROC: + v += "PROC" + case controlpb.ControlConfig_STATE: + v += "STATE" + case controlpb.ControlConfig_DEBUG: + v += "DEBUG" + default: + panic(fmt.Sprintf("Invalid control %d", control)) + } + } + return v +} + +func defaultControlConfig() *controlConfig { + c := controlConfig{} + c.Controls = &controlpb.ControlConfig{} + c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_EVENTS) + c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_FS) + c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_LIFECYCLE) + c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_LOGGING) + c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_PROFILE) + c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_USAGE) + c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_PROC) + c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_STATE) + c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_DEBUG) + return &c +} + func leakModePtr(v refs.LeakMode) *refs.LeakMode { return &v } diff --git a/runsc/config/config_test.go b/runsc/config/config_test.go index fb162b7eb..57c241c86 100644 --- a/runsc/config/config_test.go +++ b/runsc/config/config_test.go @@ -18,6 +18,7 @@ import ( "strings" "testing" + controlpb "gvisor.dev/gvisor/pkg/sentry/control/control_go_proto" "gvisor.dev/gvisor/runsc/flag" ) @@ -41,21 +42,43 @@ func TestDefault(t *testing.T) { } } -func setDefault(name string) { +func setDefault(name string) error { fl := flag.CommandLine.Lookup(name) - fl.Value.Set(fl.DefValue) + return fl.Value.Set(fl.DefValue) } func TestFromFlags(t *testing.T) { - flag.CommandLine.Lookup("root").Value.Set("some-path") - flag.CommandLine.Lookup("debug").Value.Set("true") - flag.CommandLine.Lookup("num-network-channels").Value.Set("123") - flag.CommandLine.Lookup("network").Value.Set("none") + if err := flag.CommandLine.Lookup("root").Value.Set("some-path"); err != nil { + t.Errorf("Flag set: %v", err) + } + if err := flag.CommandLine.Lookup("debug").Value.Set("true"); err != nil { + t.Errorf("Flag set: %v", err) + } + if err := flag.CommandLine.Lookup("num-network-channels").Value.Set("123"); err != nil { + t.Errorf("Flag set: %v", err) + } + if err := flag.CommandLine.Lookup("network").Value.Set("none"); err != nil { + t.Errorf("Flag set: %v", err) + } + if err := flag.CommandLine.Lookup("controls").Value.Set("EVENTS,FS"); err != nil { + t.Errorf("Flag set: %v", err) + } defer func() { - setDefault("root") - setDefault("debug") - setDefault("num-network-channels") - setDefault("network") + if err := setDefault("root"); err != nil { + t.Errorf("Flag set: %v", err) + } + if err := setDefault("debug"); err != nil { + t.Errorf("Flag set: %v", err) + } + if err := setDefault("num-network-channels"); err != nil { + t.Errorf("Flag set: %v", err) + } + if err := setDefault("network"); err != nil { + t.Errorf("Flag set: %v", err) + } + if err := setDefault("controls"); err != nil { + t.Errorf("Flag set: %v", err) + } }() c, err := NewFromFlags() @@ -74,6 +97,12 @@ func TestFromFlags(t *testing.T) { if want := NetworkNone; c.Network != want { t.Errorf("Network=%v, want: %v", c.Network, want) } + wants := []controlpb.ControlConfig_Endpoint{controlpb.ControlConfig_EVENTS, controlpb.ControlConfig_FS} + for i, want := range wants { + if c.Controls.Controls.AllowedControls[i] != want { + t.Errorf("Controls.Controls.AllowedControls[%d]=%v, want: %v", i, c.Controls.Controls.AllowedControls[i], want) + } + } } func TestToFlags(t *testing.T) { @@ -85,10 +114,15 @@ func TestToFlags(t *testing.T) { c.Debug = true c.NumNetworkChannels = 123 c.Network = NetworkNone + c.Controls = controlConfig{ + Controls: &controlpb.ControlConfig{ + AllowedControls: []controlpb.ControlConfig_Endpoint{controlpb.ControlConfig_EVENTS, controlpb.ControlConfig_FS}, + }, + } flags := c.ToFlags() - if len(flags) != 4 { - t.Errorf("wrong number of flags set, want: 4, got: %d: %s", len(flags), flags) + if len(flags) != 5 { + t.Errorf("wrong number of flags set, want: 5, got: %d: %s", len(flags), flags) } t.Logf("Flags: %s", flags) fm := map[string]string{} @@ -101,6 +135,7 @@ func TestToFlags(t *testing.T) { "--debug": "true", "--num-network-channels": "123", "--network": "none", + "--controls": "EVENTS,FS", } { if got, ok := fm[name]; ok { if got != want { diff --git a/runsc/config/flags.go b/runsc/config/flags.go index 6f1b5927a..1bf23951a 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -56,16 +56,23 @@ func RegisterFlags() { flag.Bool("strace", false, "enable strace.") flag.String("strace-syscalls", "", "comma-separated list of syscalls to trace. If --strace is true and this list is empty, then all syscalls will be traced.") flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs.") + flag.Bool("strace-event", false, "send strace to event.") // Flags that control sandbox runtime behavior. flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm.") flag.Var(watchdogActionPtr(watchdog.LogWarning), "watchdog-action", "sets what action the watchdog takes when triggered: log (default), panic.") flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.") flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).") + flag.String("profile-block", "", "collects a block profile to this file path for the duration of the container execution. Requires -profile=true.") + flag.String("profile-cpu", "", "collects a CPU profile to this file path for the duration of the container execution. Requires -profile=true.") + flag.String("profile-heap", "", "collects a heap profile to this file path for the duration of the container execution. Requires -profile=true.") + flag.String("profile-mutex", "", "collects a mutex profile to this file path for the duration of the container execution. Requires -profile=true.") + flag.String("trace", "", "collects a Go runtime execution trace to this file path for the duration of the container execution.") flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.") flag.Var(leakModePtr(refs.NoLeakChecking), "ref-leak-mode", "sets reference leak check mode: disabled (default), log-names, log-traces.") flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)") flag.Bool("oci-seccomp", false, "Enables loading OCI seccomp filters inside the sandbox.") + flag.Var(defaultControlConfig(), "controls", "Sentry control endpoints.") // Flags that control sandbox runtime behavior: FS related. flag.Var(fileAccessTypePtr(FileAccessExclusive), "file-access", "specifies which filesystem validation to use for the root mount: exclusive (default), shared.") @@ -90,6 +97,7 @@ func RegisterFlags() { // Test flags, not to be used outside tests, ever. flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.") flag.String("TESTONLY-test-name-env", "", "TEST ONLY; do not ever use! Used for automated tests to improve logging.") + flag.Bool("TESTONLY-allow-packet-endpoint-write", false, "TEST ONLY; do not ever use! Used for tests to allow writes on packet sockets.") }) } diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index 79b056fce..9d36086c3 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -288,7 +288,7 @@ func TestJobControlSignalExec(t *testing.T) { StdioIsPty: true, } - pid, err := c.Execute(execArgs) + pid, err := c.Execute(conf, execArgs) if err != nil { t.Fatalf("error executing: %v", err) } @@ -308,7 +308,9 @@ func TestJobControlSignalExec(t *testing.T) { } // Execute sleep. - ptyMaster.Write([]byte("sleep 100\n")) + if _, err := ptyMaster.Write([]byte("sleep 100\n")); err != nil { + t.Fatalf("ptyMaster.Write: %v", err) + } // Wait for it to start. Sleep's PPID is bash's PID. expectedPL = append(expectedPL, newProcessBuilder().PID(3).PPID(2).Cmd("sleep").Process()) @@ -411,7 +413,9 @@ func TestJobControlSignalRootContainer(t *testing.T) { // which makes this a suitable Reader for WaitUntilRead. ptyBuf := newBlockingBuffer() tee := io.TeeReader(ptyMaster, ptyBuf) - go io.Copy(os.Stderr, tee) + go func() { + _, _ = io.Copy(os.Stderr, tee) + }() // Start the container. if err := c.Start(conf); err != nil { @@ -444,7 +448,9 @@ func TestJobControlSignalRootContainer(t *testing.T) { } // Execute sleep via the terminal. - ptyMaster.Write([]byte("sleep 100\n")) + if _, err := ptyMaster.Write([]byte("sleep 100\n")); err != nil { + t.Fatalf("ptyMaster.Write(): %v", err) + } // Wait for sleep to start. expectedPL = append(expectedPL, newProcessBuilder().PID(2).PPID(1).Cmd("sleep").Process()) @@ -563,13 +569,15 @@ func TestMultiContainerTerminal(t *testing.T) { // file. Writes after a certain point will block unless we drain the // PTY, so we must continually copy from it. // - // We log the output to stderr for debugabilitly, and also to a buffer, + // We log the output to stderr for debuggability, and also to a buffer, // since we wait on particular output from bash below. We use a custom // blockingBuffer which is thread-safe and also blocks on Read calls, // which makes this a suitable Reader for WaitUntilRead. ptyBuf := newBlockingBuffer() tee := io.TeeReader(tc.master, ptyBuf) - go io.Copy(os.Stderr, tee) + go func() { + _, _ = io.Copy(os.Stderr, tee) + }() // Wait for bash to start. expectedPL := []*control.Process{ @@ -581,7 +589,9 @@ func TestMultiContainerTerminal(t *testing.T) { // Execute echo command and check that it was executed correctly. Use // a variable to ensure it's not matching against command echo. - tc.master.Write([]byte("echo foo-${PWD}-123\n")) + if _, err := tc.master.Write([]byte("echo foo-${PWD}-123\n")); err != nil { + t.Fatalf("master.Write(): %v", err) + } if err := testutil.WaitUntilRead(ptyBuf, "foo-/-123", 5*time.Second); err != nil { t.Fatalf("echo didn't execute: %v", err) } diff --git a/runsc/container/container.go b/runsc/container/container.go index 0820edaec..50b0dd5e7 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -208,7 +208,7 @@ func New(conf *config.Config, args Args) (*Container, error) { if err := c.Saver.lockForNew(); err != nil { return nil, err } - defer c.Saver.unlock() + defer c.Saver.unlockOrDie() // If the metadata annotations indicate that this container should be started // in an existing sandbox, we must do so. These are the possible metadata @@ -310,7 +310,7 @@ func New(conf *config.Config, args Args) (*Container, error) { defer tty.Close() } - if err := c.Sandbox.CreateContainer(c.ID, tty); err != nil { + if err := c.Sandbox.CreateSubcontainer(conf, c.ID, tty); err != nil { return nil, err } } @@ -340,7 +340,7 @@ func (c *Container) Start(conf *config.Config) error { if err := c.Saver.lock(); err != nil { return err } - unlock := cleanup.Make(func() { c.Saver.unlock() }) + unlock := cleanup.Make(c.Saver.unlockOrDie) defer unlock.Clean() if err := c.requireStatus("start", Created); err != nil { @@ -388,7 +388,7 @@ func (c *Container) Start(conf *config.Config) error { stdios = []*os.File{os.Stdin, os.Stdout, os.Stderr} } - return c.Sandbox.StartContainer(c.Spec, conf, c.ID, stdios, goferFiles) + return c.Sandbox.StartSubcontainer(c.Spec, conf, c.ID, stdios, goferFiles) }); err != nil { return err } @@ -426,7 +426,7 @@ func (c *Container) Restore(spec *specs.Spec, conf *config.Config, restoreFile s if err := c.Saver.lock(); err != nil { return err } - defer c.Saver.unlock() + defer c.Saver.unlockOrDie() if err := c.requireStatus("restore", Created); err != nil { return err @@ -480,13 +480,13 @@ func Run(conf *config.Config, args Args) (unix.WaitStatus, error) { // Execute runs the specified command in the container. It returns the PID of // the newly created process. -func (c *Container) Execute(args *control.ExecArgs) (int32, error) { +func (c *Container) Execute(conf *config.Config, args *control.ExecArgs) (int32, error) { log.Debugf("Execute in container, cid: %s, args: %+v", c.ID, args) if err := c.requireStatus("execute in", Created, Running); err != nil { return 0, err } args.ContainerID = c.ID - return c.Sandbox.Execute(args) + return c.Sandbox.Execute(conf, args) } // Event returns events for the container. @@ -614,7 +614,7 @@ func (c *Container) Pause() error { if err := c.Saver.lock(); err != nil { return err } - defer c.Saver.unlock() + defer c.Saver.unlockOrDie() if c.Status != Created && c.Status != Running { return fmt.Errorf("cannot pause container %q in state %v", c.ID, c.Status) @@ -634,7 +634,7 @@ func (c *Container) Resume() error { if err := c.Saver.lock(); err != nil { return err } - defer c.Saver.unlock() + defer c.Saver.unlockOrDie() if c.Status != Paused { return fmt.Errorf("cannot resume container %q in state %v", c.ID, c.Status) @@ -646,6 +646,36 @@ func (c *Container) Resume() error { return c.saveLocked() } +// Cat prints out the content of the files. +func (c *Container) Cat(files []string, out *os.File) error { + log.Debugf("Cat in container, cid: %s, files: %+v", c.ID, files) + return c.Sandbox.Cat(c.ID, files, out) +} + +// Usage displays memory used by the application. +func (c *Container) Usage(full bool) (control.MemoryUsage, error) { + log.Debugf("Usage in container, cid: %s, full: %v", c.ID, full) + return c.Sandbox.Usage(c.ID, full) +} + +// UsageFD shows application memory usage using two donated FDs. +func (c *Container) UsageFD() (*control.MemoryUsageRecord, error) { + log.Debugf("UsageFD in container, cid: %s", c.ID) + return c.Sandbox.UsageFD(c.ID) +} + +// Reduce requests that the sentry attempt to reduce its memory usage. +func (c *Container) Reduce(wait bool) error { + log.Debugf("Reduce in container, cid: %s", c.ID) + return c.Sandbox.Reduce(c.ID, wait) +} + +// Stream dumps all events to out. +func (c *Container) Stream(filters []string, out *os.File) error { + log.Debugf("Stream in container, cid: %s", c.ID) + return c.Sandbox.Stream(c.ID, filters, out) +} + // State returns the metadata of the container. func (c *Container) State() specs.State { return specs.State{ @@ -675,8 +705,8 @@ func (c *Container) Destroy() error { return err } defer func() { - c.Saver.unlock() - c.Saver.close() + c.Saver.unlockOrDie() + _ = c.Saver.close() }() // Stored for later use as stop() sets c.Sandbox to nil. @@ -789,30 +819,31 @@ func (c *Container) stop() error { } func (c *Container) waitForStopped() error { + if c.GoferPid == 0 { + return nil + } + + if c.IsSandboxRunning() { + if err := c.SignalContainer(unix.Signal(0), false); err == nil { + return fmt.Errorf("container is still running") + } + } + + if c.goferIsChild { + // The gofer process is a child of the current process, + // so we can wait it and collect its zombie. + if _, err := unix.Wait4(int(c.GoferPid), nil, 0, nil); err != nil { + return fmt.Errorf("error waiting the gofer process: %v", err) + } + c.GoferPid = 0 + return nil + } + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) op := func() error { - if c.IsSandboxRunning() { - if err := c.SignalContainer(unix.Signal(0), false); err == nil { - return fmt.Errorf("container is still running") - } - } - if c.GoferPid == 0 { - return nil - } - if c.goferIsChild { - // The gofer process is a child of the current process, - // so we can wait it and collect its zombie. - wpid, err := unix.Wait4(int(c.GoferPid), nil, unix.WNOHANG, nil) - if err != nil { - return fmt.Errorf("error waiting the gofer process: %v", err) - } - if wpid == 0 { - return fmt.Errorf("gofer is still running") - } - - } else if err := unix.Kill(c.GoferPid, 0); err == nil { + if err := unix.Kill(c.GoferPid, 0); err == nil { return fmt.Errorf("gofer is still running") } c.GoferPid = 0 @@ -910,6 +941,9 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu binPath := specutils.ExePath cmd := exec.Command(binPath, args...) cmd.ExtraFiles = goferEnds + + // Set Args[0] to make easier to spot the gofer process. Otherwise it's + // shown as `exe`. cmd.Args[0] = "runsc-gofer" if attached { @@ -1020,10 +1054,10 @@ func runInCgroup(cg *cgroup.Cgroup, fn func() error) error { return fn() } restore, err := cg.Join() - defer restore() if err != nil { return err } + defer restore() return fn() } diff --git a/runsc/container/container_norace_test.go b/runsc/container/container_norace_test.go index 838c1e20a..a4daf16ed 100644 --- a/runsc/container/container_norace_test.go +++ b/runsc/container/container_norace_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !race // +build !race package container diff --git a/runsc/container/container_race_test.go b/runsc/container/container_race_test.go index 9fb4c4fc0..86a57145c 100644 --- a/runsc/container/container_race_test.go +++ b/runsc/container/container_race_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build race // +build race package container diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 249324c5a..69dcf3f03 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -53,19 +53,22 @@ func TestMain(m *testing.M) { if err := testutil.ConfigureExePath(); err != nil { panic(err.Error()) } - specutils.MaybeRunAsRoot() + if err := specutils.MaybeRunAsRoot(); err != nil { + fmt.Fprintf(os.Stderr, "Error running as root: %v", err) + os.Exit(123) + } os.Exit(m.Run()) } -func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) { +func execute(conf *config.Config, cont *Container, name string, arg ...string) (unix.WaitStatus, error) { args := &control.ExecArgs{ Filename: name, Argv: append([]string{name}, arg...), } - return cont.executeSync(args) + return cont.executeSync(conf, args) } -func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, error) { +func executeCombinedOutput(conf *config.Config, cont *Container, name string, arg ...string) ([]byte, error) { r, w, err := os.Pipe() if err != nil { return nil, err @@ -77,7 +80,7 @@ func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, Argv: append([]string{name}, arg...), FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}}, } - ws, err := cont.executeSync(args) + ws, err := cont.executeSync(conf, args) w.Close() if err != nil { return nil, err @@ -91,8 +94,8 @@ func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, } // executeSync synchronously executes a new process. -func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) { - pid, err := c.Execute(args) +func (c *Container) executeSync(conf *config.Config, args *control.ExecArgs) (unix.WaitStatus, error) { + pid, err := c.Execute(conf, args) if err != nil { return 0, fmt.Errorf("error executing: %v", err) } @@ -169,8 +172,8 @@ func blockUntilWaitable(pid int) error { } // execPS executes `ps` inside the container and return the processes. -func execPS(c *Container) ([]*control.Process, error) { - out, err := executeCombinedOutput(c, "/bin/ps", "-e") +func execPS(conf *config.Config, c *Container) ([]*control.Process, error) { + out, err := executeCombinedOutput(conf, c, "/bin/ps", "-e") if err != nil { return nil, err } @@ -439,6 +442,11 @@ func configs(t *testing.T, opts ...configOption) map[string]*config.Config { return all } +// sleepSpec generates a spec with sleep 1000 and a conf. +func sleepSpecConf(t *testing.T) (*specs.Spec, *config.Config) { + return testutil.NewSpecWithArgs("sleep", "1000"), testutil.TestConfig(t) +} + // TestLifecycle tests the basic Create/Start/Signal/Destroy container lifecycle. // It verifies after each step that the container can be loaded from disk, and // has the correct status. @@ -452,7 +460,7 @@ func TestLifecycle(t *testing.T) { t.Run(name, func(t *testing.T) { // The container will just sleep for a long time. We will kill it before // it finishes sleeping. - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, _ := sleepSpecConf(t) rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { @@ -523,9 +531,11 @@ func TestLifecycle(t *testing.T) { ws, err := c.Wait() if err != nil { ch <- err + return } if got, want := ws.Signal(), unix.SIGTERM; got != want { ch <- fmt.Errorf("got signal %v, want %v", got, want) + return } ch <- nil }() @@ -859,7 +869,7 @@ func TestExec(t *testing.T) { } { t.Run(tc.name, func(t *testing.T) { // t.Parallel() - if ws, err := cont.executeSync(&tc.args); err != nil { + if ws, err := cont.executeSync(conf, &tc.args); err != nil { t.Fatalf("executeAsync(%+v): %v", tc.args, err) } else if ws != 0 { t.Fatalf("executeAsync(%+v) failed with exit: %v", tc.args, ws) @@ -877,7 +887,7 @@ func TestExec(t *testing.T) { } defer unix.Close(fds[0]) - _, err = cont.executeSync(&control.ExecArgs{ + _, err = cont.executeSync(conf, &control.ExecArgs{ Argv: []string{"/nonexist"}, FilePayload: urpc.FilePayload{ Files: []*os.File{os.NewFile(uintptr(fds[1]), "sock")}, @@ -898,7 +908,7 @@ func TestExecProcList(t *testing.T) { for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { const uid = 343 - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, _ := sleepSpecConf(t) _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { @@ -932,7 +942,7 @@ func TestExecProcList(t *testing.T) { // start running exec (which blocks). ch := make(chan error) go func() { - exitStatus, err := cont.executeSync(execArgs) + exitStatus, err := cont.executeSync(conf, execArgs) if err != nil { ch <- err } else if exitStatus != 0 { @@ -1417,8 +1427,7 @@ func TestPauseResume(t *testing.T) { // with calls to pause and resume and that pausing and resuming only // occurs given the correct state. func TestPauseResumeStatus(t *testing.T) { - spec := testutil.NewSpecWithArgs("sleep", "20") - conf := testutil.TestConfig(t) + spec, conf := sleepSpecConf(t) _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -1485,7 +1494,7 @@ func TestCapabilities(t *testing.T) { for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, _ := sleepSpecConf(t) rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -1525,7 +1534,9 @@ func TestCapabilities(t *testing.T) { defer os.Remove(exePath) // Need to traverse the intermediate directory. - os.Chmod(rootDir, 0755) + if err := os.Chmod(rootDir, 0755); err != nil { + t.Fatal(err) + } execArgs := &control.ExecArgs{ Filename: exePath, @@ -1537,7 +1548,7 @@ func TestCapabilities(t *testing.T) { } // "exe" should fail because we don't have the necessary permissions. - if _, err := cont.executeSync(execArgs); err == nil { + if _, err := cont.executeSync(conf, execArgs); err == nil { t.Fatalf("container executed without error, but an error was expected") } @@ -1546,7 +1557,7 @@ func TestCapabilities(t *testing.T) { EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE), } // "exe" should not fail this time. - if _, err := cont.executeSync(execArgs); err != nil { + if _, err := cont.executeSync(conf, execArgs); err != nil { t.Fatalf("container failed to exec %v: %v", args, err) } }) @@ -1633,7 +1644,7 @@ func TestMountNewDir(t *testing.T) { func TestReadonlyRoot(t *testing.T) { for name, conf := range configs(t, all...) { t.Run(name, func(t *testing.T) { - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, _ := sleepSpecConf(t) spec.Root.Readonly = true _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) @@ -1657,7 +1668,7 @@ func TestReadonlyRoot(t *testing.T) { } // Read mounts to check that root is readonly. - out, err := executeCombinedOutput(c, "/bin/sh", "-c", "mount | grep ' / ' | grep -o -e '(.*)'") + out, err := executeCombinedOutput(conf, c, "/bin/sh", "-c", "mount | grep ' / ' | grep -o -e '(.*)'") if err != nil { t.Fatalf("exec failed: %v", err) } @@ -1667,7 +1678,7 @@ func TestReadonlyRoot(t *testing.T) { } // Check that file cannot be created. - ws, err := execute(c, "/bin/touch", "/foo") + ws, err := execute(conf, c, "/bin/touch", "/foo") if err != nil { t.Fatalf("touch file in ro mount: %v", err) } @@ -1685,7 +1696,7 @@ func TestReadonlyMount(t *testing.T) { if err != nil { t.Fatalf("ioutil.TempDir() failed: %v", err) } - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, _ := sleepSpecConf(t) spec.Mounts = append(spec.Mounts, specs.Mount{ Destination: dir, Source: dir, @@ -1716,7 +1727,7 @@ func TestReadonlyMount(t *testing.T) { // Read mounts to check that volume is readonly. cmd := fmt.Sprintf("mount | grep ' %s ' | grep -o -e '(.*)'", dir) - out, err := executeCombinedOutput(c, "/bin/sh", "-c", cmd) + out, err := executeCombinedOutput(conf, c, "/bin/sh", "-c", cmd) if err != nil { t.Fatalf("exec failed, err: %v", err) } @@ -1726,7 +1737,7 @@ func TestReadonlyMount(t *testing.T) { } // Check that file cannot be created. - ws, err := execute(c, "/bin/touch", path.Join(dir, "file")) + ws, err := execute(conf, c, "/bin/touch", path.Join(dir, "file")) if err != nil { t.Fatalf("touch file in ro mount: %v", err) } @@ -1845,7 +1856,7 @@ func doAbbreviatedIDsTest(t *testing.T, vfs2 bool) { "baz-" + testutil.RandomContainerID(), } for _, cid := range cids { - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, _ := sleepSpecConf(t) bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -2153,7 +2164,7 @@ func doDestroyStartingTest(t *testing.T, vfs2 bool) { go func() { defer wg.Done() // Ignore failures, start can fail if destroy runs first. - startCont.Start(conf) + _ = startCont.Start(conf) }() wg.Add(1) @@ -2222,7 +2233,7 @@ func TestMountPropagation(t *testing.T) { t.Fatalf("mount(%q, MS_SHARED): %v", srcMnt, err) } - spec := testutil.NewSpecWithArgs("sleep", "1000") + spec, conf := sleepSpecConf(t) priv := filepath.Join(tmpDir, "priv") slave := filepath.Join(tmpDir, "slave") @@ -2241,7 +2252,6 @@ func TestMountPropagation(t *testing.T) { }, } - conf := testutil.TestConfig(t) _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -2271,13 +2281,13 @@ func TestMountPropagation(t *testing.T) { // Check that mount didn't propagate to private mount. privFile := filepath.Join(priv, "mnt", "file") - if ws, err := execute(cont, "/usr/bin/test", "!", "-f", privFile); err != nil || ws != 0 { + if ws, err := execute(conf, cont, "/usr/bin/test", "!", "-f", privFile); err != nil || ws != 0 { t.Fatalf("exec: test ! -f %q, ws: %v, err: %v", privFile, ws, err) } // Check that mount propagated to slave mount. slaveFile := filepath.Join(slave, "mnt", "file") - if ws, err := execute(cont, "/usr/bin/test", "-f", slaveFile); err != nil || ws != 0 { + if ws, err := execute(conf, cont, "/usr/bin/test", "-f", slaveFile); err != nil || ws != 0 { t.Fatalf("exec: test -f %q, ws: %v, err: %v", privFile, ws, err) } } @@ -2343,7 +2353,7 @@ func TestMountSymlink(t *testing.T) { // Check that symlink was resolved and mount was created where the symlink // is pointing to. file := path.Join(target, "file") - if ws, err := execute(cont, "/usr/bin/test", "-f", file); err != nil || ws != 0 { + if ws, err := execute(conf, cont, "/usr/bin/test", "-f", file); err != nil || ws != 0 { t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err) } }) @@ -2556,12 +2566,11 @@ func TestRlimits(t *testing.T) { // TestRlimitsExec sets limit to number of open files and checks that the limit // is propagated to exec'd processes. func TestRlimitsExec(t *testing.T) { - spec := testutil.NewSpecWithArgs("sleep", "100") + spec, conf := sleepSpecConf(t) spec.Process.Rlimits = []specs.POSIXRlimit{ {Type: "RLIMIT_NOFILE", Hard: 1000, Soft: 100}, } - conf := testutil.TestConfig(t) _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -2582,7 +2591,7 @@ func TestRlimitsExec(t *testing.T) { t.Fatalf("error starting container: %v", err) } - got, err := executeCombinedOutput(cont, "/bin/sh", "-c", "ulimit -n") + got, err := executeCombinedOutput(conf, cont, "/bin/sh", "-c", "ulimit -n") if err != nil { t.Fatal(err) } @@ -2590,3 +2599,276 @@ func TestRlimitsExec(t *testing.T) { t.Errorf("ulimit result, got: %q, want: %q", got, want) } } + +// TestCat creates a file and checks that cat generates the expected output. +func TestCat(t *testing.T) { + f, err := ioutil.TempFile(testutil.TmpDir(), "test-case") + if err != nil { + t.Fatalf("ioutil.TempFile failed: %v", err) + } + defer os.RemoveAll(f.Name()) + + content := "test-cat" + if _, err := f.WriteString(content); err != nil { + t.Fatalf("f.WriteString(): %v", err) + } + f.Close() + + spec, conf := sleepSpecConf(t) + + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + + cont, err := New(conf, args) + if err != nil { + t.Fatalf("Creating container: %v", err) + } + defer cont.Destroy() + + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } + + r, w, err := os.Pipe() + if err != nil { + t.Fatalf("os.Create(): %v", err) + } + + if err := cont.Cat([]string{f.Name()}, w); err != nil { + t.Fatalf("error cat from container: %v", err) + } + + buf := make([]byte, 1024) + if _, err := r.Read(buf); err != nil { + t.Fatalf("Read out: %v", err) + } + if got, want := string(buf), content; !strings.Contains(got, want) { + t.Errorf("out got %s, want include %s", buf, want) + } +} + +// TestUsage checks that usage generates the expected memory usage. +func TestUsage(t *testing.T) { + spec, conf := sleepSpecConf(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + + cont, err := New(conf, args) + if err != nil { + t.Fatalf("Creating container: %v", err) + } + defer cont.Destroy() + + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } + + for _, full := range []bool{false, true} { + m, err := cont.Usage(full) + if err != nil { + t.Fatalf("error usage from container: %v", err) + } + if m.Mapped == 0 { + t.Errorf("Usage mapped got zero") + } + if m.Total == 0 { + t.Errorf("Usage total got zero") + } + if full { + if m.System == 0 { + t.Errorf("Usage system got zero") + } + if m.Anonymous == 0 { + t.Errorf("Usage anonymous got zero") + } + } + } +} + +// TestUsageFD checks that usagefd generates the expected memory usage. +func TestUsageFD(t *testing.T) { + spec, conf := sleepSpecConf(t) + + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + + cont, err := New(conf, args) + if err != nil { + t.Fatalf("Creating container: %v", err) + } + defer cont.Destroy() + + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } + + m, err := cont.UsageFD() + if err != nil { + t.Fatalf("error usageFD from container: %v", err) + } + + mapped, unknown, total, err := m.Fetch() + if err != nil { + t.Fatalf("error Fetch memory usage: %v", err) + } + + if mapped == 0 { + t.Errorf("UsageFD Mapped got zero") + } + if unknown == 0 { + t.Errorf("UsageFD unknown got zero") + } + if total == 0 { + t.Errorf("UsageFD total got zero") + } +} + +// TestReduce checks that reduce call succeeds. +func TestReduce(t *testing.T) { + spec, conf := sleepSpecConf(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + + cont, err := New(conf, args) + if err != nil { + t.Fatalf("Creating container: %v", err) + } + defer cont.Destroy() + + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } + + if err := cont.Reduce(false); err != nil { + t.Fatalf("error reduce from container: %v", err) + } +} + +// TestStream checks that Stream dumps expected events. +func TestStream(t *testing.T) { + spec, conf := sleepSpecConf(t) + conf.Strace = true + conf.StraceEvent = true + conf.StraceSyscalls = "" + + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + + cont, err := New(conf, args) + if err != nil { + t.Fatalf("Creating container: %v", err) + } + defer cont.Destroy() + + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } + + r, w, err := os.Pipe() + if err != nil { + t.Fatalf("os.Create(): %v", err) + } + + // Spawn a new thread to Stream events as it blocks indefinitely. + go func() { + cont.Stream(nil, w) + }() + + buf := make([]byte, 1024) + if _, err := r.Read(buf); err != nil { + t.Fatalf("Read out: %v", err) + } + + // A syscall strace event includes "Strace". + if got, want := string(buf), "Strace"; !strings.Contains(got, want) { + t.Errorf("out got %s, want include %s", buf, want) + } +} + +// TestProfile checks that profiling options generate profiles. +func TestProfile(t *testing.T) { + // Perform a non-trivial amount of work so we actually capture + // something in the profiles. + spec := testutil.NewSpecWithArgs("/bin/bash", "-c", "true") + conf := testutil.TestConfig(t) + conf.ProfileEnable = true + conf.ProfileBlock = filepath.Join(t.TempDir(), "block.pprof") + conf.ProfileCPU = filepath.Join(t.TempDir(), "cpu.pprof") + conf.ProfileHeap = filepath.Join(t.TempDir(), "heap.pprof") + conf.ProfileMutex = filepath.Join(t.TempDir(), "mutex.pprof") + conf.TraceFile = filepath.Join(t.TempDir(), "trace.out") + + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + Attached: true, + } + + _, err = Run(conf, args) + if err != nil { + t.Fatalf("Creating container: %v", err) + } + + // Basic test; simply assert that the profiles are not empty. + for _, name := range []string{conf.ProfileBlock, conf.ProfileCPU, conf.ProfileHeap, conf.ProfileMutex, conf.TraceFile} { + fi, err := os.Stat(name) + if err != nil { + t.Fatalf("Unable to stat profile file %s: %v", name, err) + } + if fi.Size() == 0 { + t.Errorf("Profile file %s is empty: %+v", name, fi) + } + } +} diff --git a/runsc/container/hook.go b/runsc/container/hook.go index 901607aee..ce1c9e1de 100644 --- a/runsc/container/hook.go +++ b/runsc/container/hook.go @@ -101,8 +101,8 @@ func executeHook(h specs.Hook, s specs.State) error { return fmt.Errorf("failure executing hook %q, err: %v\nstdout: %s\nstderr: %s", h.Path, err, stdout.String(), stderr.String()) } case <-timer: - cmd.Process.Kill() - cmd.Wait() + _ = cmd.Process.Kill() + _ = cmd.Wait() return fmt.Errorf("timeout executing hook %q\nstdout: %s\nstderr: %s", h.Path, stdout.String(), stderr.String()) } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 0dbe1e323..9d8022e50 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -105,11 +105,11 @@ type execDesc struct { name string } -func execMany(t *testing.T, execs []execDesc) { +func execMany(t *testing.T, conf *config.Config, execs []execDesc) { for _, exec := range execs { t.Run(exec.name, func(t *testing.T) { args := &control.ExecArgs{Argv: exec.cmd} - if ws, err := exec.c.executeSync(args); err != nil { + if ws, err := exec.c.executeSync(conf, args); err != nil { t.Errorf("error executing %+v: %v", args, err) } else if ws.ExitStatus() != exec.want { t.Errorf("%q: exec %q got exit status: %d, want: %d", exec.name, exec.cmd, ws.ExitStatus(), exec.want) @@ -217,7 +217,7 @@ func TestMultiPIDNS(t *testing.T) { newProcessBuilder().PID(2).Cmd("sleep").Process(), newProcessBuilder().Cmd("ps").Process(), } - got, err := execPS(containers[0]) + got, err := execPS(conf, containers[0]) if err != nil { t.Fatal(err) } @@ -229,7 +229,7 @@ func TestMultiPIDNS(t *testing.T) { newProcessBuilder().PID(1).Cmd("sleep").Process(), newProcessBuilder().Cmd("ps").Process(), } - got, err = execPS(containers[1]) + got, err = execPS(conf, containers[1]) if err != nil { t.Fatal(err) } @@ -313,7 +313,7 @@ func TestMultiPIDNSPath(t *testing.T) { newProcessBuilder().PID(3).Cmd("sleep").Process(), newProcessBuilder().Cmd("ps").Process(), } - got, err := execPS(containers[0]) + got, err := execPS(conf, containers[0]) if err != nil { t.Fatal(err) } @@ -328,7 +328,7 @@ func TestMultiPIDNSPath(t *testing.T) { newProcessBuilder().PID(3).Cmd("sleep").Process(), newProcessBuilder().Cmd("ps").Process(), } - got, err = execPS(containers[1]) + got, err = execPS(conf, containers[1]) if err != nil { t.Fatal(err) } @@ -341,7 +341,7 @@ func TestMultiPIDNSPath(t *testing.T) { newProcessBuilder().PID(1).Cmd("sleep").Process(), newProcessBuilder().Cmd("ps").Process(), } - got, err = execPS(containers[2]) + got, err = execPS(conf, containers[2]) if err != nil { t.Fatal(err) } @@ -541,7 +541,7 @@ func TestExecWait(t *testing.T) { WorkingDirectory: "/", KUID: 0, } - pid, err := containers[0].Execute(args) + pid, err := containers[0].Execute(conf, args) if err != nil { t.Fatalf("error executing: %v", err) } @@ -744,7 +744,7 @@ func TestMultiContainerDestroy(t *testing.T) { Filename: app, Argv: []string{app, "fork-bomb"}, } - if _, err := containers[1].Execute(args); err != nil { + if _, err := containers[1].Execute(conf, args); err != nil { t.Fatalf("error exec'ing: %v", err) } @@ -821,7 +821,7 @@ func TestMultiContainerProcesses(t *testing.T) { Filename: "/bin/sleep", Argv: []string{"/bin/sleep", "100"}, } - if _, err := containers[1].Execute(args); err != nil { + if _, err := containers[1].Execute(conf, args); err != nil { t.Fatalf("error exec'ing: %v", err) } expectedPL1 = append(expectedPL1, newProcessBuilder().PID(4).Cmd("sleep").Process()) @@ -882,7 +882,7 @@ func TestMultiContainerKillAll(t *testing.T) { Filename: app, Argv: []string{app, "task-tree", "--depth=2", "--width=2"}, } - if _, err := containers[1].Execute(args); err != nil { + if _, err := containers[1].Execute(conf, args); err != nil { t.Fatalf("error exec'ing: %v", err) } // Wait for these new processes to start. @@ -894,7 +894,9 @@ func TestMultiContainerKillAll(t *testing.T) { if tc.killContainer { // First kill the init process to make the container be stopped with // processes still running inside. - containers[1].SignalContainer(unix.SIGKILL, false) + if err := containers[1].SignalContainer(unix.SIGKILL, false); err != nil { + t.Fatalf("SignalContainer(): %v", err) + } op := func() error { c, err := Load(conf.RootDir, FullID{ContainerID: ids[1]}, LoadOpts{}) if err != nil { @@ -912,7 +914,7 @@ func TestMultiContainerKillAll(t *testing.T) { c, err := Load(conf.RootDir, FullID{ContainerID: ids[1]}, LoadOpts{}) if err != nil { - t.Fatalf("failed to load child container %q: %v", c.ID, err) + t.Fatalf("failed to load child container %q: %v", ids[1], err) } // Kill'Em All if err := c.SignalContainer(unix.SIGKILL, true); err != nil { @@ -1040,7 +1042,8 @@ func TestMultiContainerDestroyStarting(t *testing.T) { wg.Add(1) go func() { defer wg.Done() - startCont.Start(conf) // ignore failures, start can fail if destroy runs first. + // Ignore failures, start can fail if destroy runs first. + _ = startCont.Start(conf) }() wg.Add(1) @@ -1314,7 +1317,7 @@ func TestMultiContainerSharedMount(t *testing.T) { name: "dir removed from container1", }, } - execMany(t, execs) + execMany(t, conf, execs) }) } } @@ -1379,7 +1382,7 @@ func TestMultiContainerSharedMountReadonly(t *testing.T) { name: "fails to write to container1", }, } - execMany(t, execs) + execMany(t, conf, execs) }) } } @@ -1437,7 +1440,7 @@ func TestMultiContainerSharedMountRestart(t *testing.T) { name: "file appears in container1", }, } - execMany(t, execs) + execMany(t, conf, execs) containers[1].Destroy() @@ -1487,7 +1490,7 @@ func TestMultiContainerSharedMountRestart(t *testing.T) { name: "file removed from container1", }, } - execMany(t, execs) + execMany(t, conf, execs) }) } } @@ -1540,7 +1543,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { name: "directory is mounted in container1", }, } - execMany(t, execs) + execMany(t, conf, execs) }) } } @@ -1651,7 +1654,7 @@ func TestMultiContainerGoferKilled(t *testing.T) { } // Check that container isn't running anymore. - if _, err := execute(c, "/bin/true"); err == nil { + if _, err := execute(conf, c, "/bin/true"); err == nil { t.Fatalf("Container %q was not stopped after gofer death", c.ID) } @@ -1666,7 +1669,7 @@ func TestMultiContainerGoferKilled(t *testing.T) { if err := waitForProcessList(c, pl); err != nil { t.Errorf("Container %q was affected by another container: %v", c.ID, err) } - if _, err := execute(c, "/bin/true"); err != nil { + if _, err := execute(conf, c, "/bin/true"); err != nil { t.Fatalf("Container %q was affected by another container: %v", c.ID, err) } } @@ -1688,7 +1691,7 @@ func TestMultiContainerGoferKilled(t *testing.T) { // Check that entire sandbox isn't running anymore. for _, c := range containers { - if _, err := execute(c, "/bin/true"); err == nil { + if _, err := execute(conf, c, "/bin/true"); err == nil { t.Fatalf("Container %q was not stopped after gofer death", c.ID) } } @@ -1864,7 +1867,7 @@ func TestMultiContainerHomeEnvDir(t *testing.T) { defer cleanup() // Exec into the root container synchronously. - if _, err := execute(containers[0], "/bin/sh", "-c", execCmd); err != nil { + if _, err := execute(conf, containers[0], "/bin/sh", "-c", execCmd); err != nil { t.Errorf("error executing %+v: %v", execCmd, err) } @@ -1980,7 +1983,7 @@ func TestMultiContainerEvent(t *testing.T) { if busyUsage <= sleepUsage { t.Logf("Busy container usage lower than sleep (busy: %d, sleep: %d), retrying...", busyUsage, sleepUsage) - return fmt.Errorf("Busy container should have higher usage than sleep, busy: %d, sleep: %d", busyUsage, sleepUsage) + return fmt.Errorf("busy container should have higher usage than sleep, busy: %d, sleep: %d", busyUsage, sleepUsage) } return nil } @@ -2053,7 +2056,7 @@ func TestDuplicateEnvVariable(t *testing.T) { Argv: []string{"/bin/sh", "-c", cmdExec}, Envv: []string{"VAR=foo", "VAR=bar"}, } - if ws, err := containers[0].executeSync(execArgs); err != nil || ws.ExitStatus() != 0 { + if ws, err := containers[0].executeSync(conf, execArgs); err != nil || ws.ExitStatus() != 0 { t.Fatalf("exec failed, ws: %v, err: %v", ws, err) } diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go index cb5bffb89..f16b2bd02 100644 --- a/runsc/container/shared_volume_test.go +++ b/runsc/container/shared_volume_test.go @@ -72,7 +72,7 @@ func TestSharedVolume(t *testing.T) { Filename: "/usr/bin/test", Argv: []string{"test", "-f", filename}, } - if ws, err := c.executeSync(argsTestFile); err != nil { + if ws, err := c.executeSync(conf, argsTestFile); err != nil { t.Fatalf("unexpected error testing file %q: %v", filename, err) } else if ws.ExitStatus() == 0 { t.Errorf("test %q exited with code %v, wanted not zero", ws.ExitStatus(), err) @@ -84,7 +84,7 @@ func TestSharedVolume(t *testing.T) { } // Now we should be able to test the file from within the sandbox. - if ws, err := c.executeSync(argsTestFile); err != nil { + if ws, err := c.executeSync(conf, argsTestFile); err != nil { t.Fatalf("unexpected error testing file %q: %v", filename, err) } else if ws.ExitStatus() != 0 { t.Errorf("test %q exited with code %v, wanted zero", filename, ws.ExitStatus()) @@ -97,7 +97,7 @@ func TestSharedVolume(t *testing.T) { } // File should no longer exist at the old path within the sandbox. - if ws, err := c.executeSync(argsTestFile); err != nil { + if ws, err := c.executeSync(conf, argsTestFile); err != nil { t.Fatalf("unexpected error testing file %q: %v", filename, err) } else if ws.ExitStatus() == 0 { t.Errorf("test %q exited with code %v, wanted not zero", filename, ws.ExitStatus()) @@ -108,7 +108,7 @@ func TestSharedVolume(t *testing.T) { Filename: "/usr/bin/test", Argv: []string{"test", "-f", newFilename}, } - if ws, err := c.executeSync(argsTestNewFile); err != nil { + if ws, err := c.executeSync(conf, argsTestNewFile); err != nil { t.Fatalf("unexpected error testing file %q: %v", newFilename, err) } else if ws.ExitStatus() != 0 { t.Errorf("test %q exited with code %v, wanted zero", newFilename, ws.ExitStatus()) @@ -120,7 +120,7 @@ func TestSharedVolume(t *testing.T) { } // Renamed file should no longer exist at the old path within the sandbox. - if ws, err := c.executeSync(argsTestNewFile); err != nil { + if ws, err := c.executeSync(conf, argsTestNewFile); err != nil { t.Fatalf("unexpected error testing file %q: %v", newFilename, err) } else if ws.ExitStatus() == 0 { t.Errorf("test %q exited with code %v, wanted not zero", newFilename, ws.ExitStatus()) @@ -133,7 +133,7 @@ func TestSharedVolume(t *testing.T) { KUID: auth.KUID(os.Getuid()), KGID: auth.KGID(os.Getgid()), } - if ws, err := c.executeSync(argsTouch); err != nil { + if ws, err := c.executeSync(conf, argsTouch); err != nil { t.Fatalf("unexpected error touching file %q: %v", filename, err) } else if ws.ExitStatus() != 0 { t.Errorf("touch %q exited with code %v, wanted zero", filename, ws.ExitStatus()) @@ -154,7 +154,7 @@ func TestSharedVolume(t *testing.T) { Filename: "/bin/rm", Argv: []string{"rm", filename}, } - if ws, err := c.executeSync(argsRemove); err != nil { + if ws, err := c.executeSync(conf, argsRemove); err != nil { t.Fatalf("unexpected error removing file %q: %v", filename, err) } else if ws.ExitStatus() != 0 { t.Errorf("remove %q exited with code %v, wanted zero", filename, ws.ExitStatus()) @@ -166,14 +166,14 @@ func TestSharedVolume(t *testing.T) { } } -func checkFile(c *Container, filename string, want []byte) error { +func checkFile(conf *config.Config, c *Container, filename string, want []byte) error { cpy := filename + ".copy" - if _, err := execute(c, "/bin/cp", "-f", filename, cpy); err != nil { + if _, err := execute(conf, c, "/bin/cp", "-f", filename, cpy); err != nil { return fmt.Errorf("unexpected error copying file %q to %q: %v", filename, cpy, err) } got, err := ioutil.ReadFile(cpy) if err != nil { - return fmt.Errorf("Error reading file %q: %v", filename, err) + return fmt.Errorf("error reading file %q: %v", filename, err) } if !bytes.Equal(got, want) { return fmt.Errorf("file content inside the sandbox is wrong, got: %q, want: %q", got, want) @@ -226,16 +226,16 @@ func TestSharedVolumeFile(t *testing.T) { if err := ioutil.WriteFile(filename, []byte(want), 0666); err != nil { t.Fatalf("Error writing to %q: %v", filename, err) } - if err := checkFile(c, filename, want); err != nil { + if err := checkFile(conf, c, filename, want); err != nil { t.Fatal(err.Error()) } // Append to file inside the container and check that content is not lost. - if _, err := execute(c, "/bin/bash", "-c", "echo -n sandbox- >> "+filename); err != nil { + if _, err := execute(conf, c, "/bin/bash", "-c", "echo -n sandbox- >> "+filename); err != nil { t.Fatalf("unexpected error appending file %q: %v", filename, err) } want = []byte("host-sandbox-") - if err := checkFile(c, filename, want); err != nil { + if err := checkFile(conf, c, filename, want); err != nil { t.Fatal(err.Error()) } @@ -250,7 +250,7 @@ func TestSharedVolumeFile(t *testing.T) { t.Fatalf("Error writing to file %q: %v", filename, err) } want = []byte("host-sandbox-host") - if err := checkFile(c, filename, want); err != nil { + if err := checkFile(conf, c, filename, want); err != nil { t.Fatal(err.Error()) } @@ -259,7 +259,7 @@ func TestSharedVolumeFile(t *testing.T) { t.Fatalf("Error truncating file %q: %v", filename, err) } want = want[:5] - if err := checkFile(c, filename, want); err != nil { + if err := checkFile(conf, c, filename, want); err != nil { t.Fatal(err.Error()) } } diff --git a/runsc/container/state_file.go b/runsc/container/state_file.go index 0399903a0..23810f593 100644 --- a/runsc/container/state_file.go +++ b/runsc/container/state_file.go @@ -264,10 +264,10 @@ func (s *StateFile) lockForNew() error { // Checks if the container already exists by looking for the metadata file. if _, err := os.Stat(s.statePath()); err == nil { - s.unlock() + s.unlockOrDie() return fmt.Errorf("container already exists") } else if !os.IsNotExist(err) { - s.unlock() + s.unlockOrDie() return fmt.Errorf("looking for existing container: %v", err) } return nil @@ -286,6 +286,15 @@ func (s *StateFile) unlock() error { return nil } +func (s *StateFile) unlockOrDie() { + if !s.flock.Locked() { + panic("unlock called without lock held") + } + if err := s.flock.Unlock(); err != nil { + panic(fmt.Sprintf("Error releasing lock on %q: %v", s.flock, err)) + } +} + // saveLocked saves 'v' to the state file. // // Preconditions: lock() must been called before. @@ -308,7 +317,7 @@ func (s *StateFile) load(v interface{}) error { if err := s.lock(); err != nil { return err } - defer s.unlock() + defer s.unlockOrDie() metaBytes, err := ioutil.ReadFile(s.statePath()) if err != nil { diff --git a/runsc/flag/flag.go b/runsc/flag/flag.go index f921a8107..6b25da904 100644 --- a/runsc/flag/flag.go +++ b/runsc/flag/flag.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + // Package flag wraps flag primitives. package flag diff --git a/runsc/fsgofer/filter/config_amd64.go b/runsc/fsgofer/filter/config_amd64.go index 2d0151dcc..1cb9d312a 100644 --- a/runsc/fsgofer/filter/config_amd64.go +++ b/runsc/fsgofer/filter/config_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package filter diff --git a/runsc/fsgofer/filter/config_arm64.go b/runsc/fsgofer/filter/config_arm64.go index 7d458c02d..ab750c3be 100644 --- a/runsc/fsgofer/filter/config_arm64.go +++ b/runsc/fsgofer/filter/config_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package filter diff --git a/runsc/fsgofer/filter/extra_filters.go b/runsc/fsgofer/filter/extra_filters.go index e28d4b8d6..5442add95 100644 --- a/runsc/fsgofer/filter/extra_filters.go +++ b/runsc/fsgofer/filter/extra_filters.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !msan && !race // +build !msan,!race package filter diff --git a/runsc/fsgofer/filter/extra_filters_msan.go b/runsc/fsgofer/filter/extra_filters_msan.go index d768ed0bb..e5915652f 100644 --- a/runsc/fsgofer/filter/extra_filters_msan.go +++ b/runsc/fsgofer/filter/extra_filters_msan.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build msan // +build msan package filter diff --git a/runsc/fsgofer/filter/extra_filters_race.go b/runsc/fsgofer/filter/extra_filters_race.go index 9e75c025d..1a4862e1b 100644 --- a/runsc/fsgofer/filter/extra_filters_race.go +++ b/runsc/fsgofer/filter/extra_filters_race.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build race // +build race package filter diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 3f362b25e..600b21189 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -51,10 +51,10 @@ const ( // verityXattrs are the extended attributes used by verity file system. var verityXattrs = map[string]struct{}{ - "user.merkle.offset": struct{}{}, - "user.merkle.size": struct{}{}, - "user.merkle.childrenOffset": struct{}{}, - "user.merkle.childrenSize": struct{}{}, + "user.merkle.offset": {}, + "user.merkle.size": {}, + "user.merkle.childrenOffset": {}, + "user.merkle.childrenSize": {}, } // join is equivalent to path.Join() but skips path.Clean() which is expensive. @@ -1242,13 +1242,14 @@ func (l *localFile) MultiGetAttr(names []string) ([]p9.FullStat, error) { } parent := l.file.FD() - for _, name := range names { - child, err := unix.Openat(parent, name, openFlags|unix.O_PATH, 0) + closeParent := func() { if parent != l.file.FD() { - // Parent is no longer needed. _ = unix.Close(parent) - parent = -1 } + } + defer closeParent() + for _, name := range names { + child, err := unix.Openat(parent, name, openFlags|unix.O_PATH, 0) if err != nil { if errors.Is(err, unix.ENOENT) { // No pont in continuing any further. @@ -1256,10 +1257,11 @@ func (l *localFile) MultiGetAttr(names []string) ([]p9.FullStat, error) { } return nil, err } + closeParent() + parent = child var stat unix.Stat_t if err := unix.Fstat(child, &stat); err != nil { - _ = unix.Close(child) return nil, err } valid, attr := l.fillAttr(&stat) @@ -1271,13 +1273,9 @@ func (l *localFile) MultiGetAttr(names []string) ([]p9.FullStat, error) { if (stat.Mode & unix.S_IFMT) != unix.S_IFDIR { // Doesn't need to continue if entry is not a dir. Including symlinks // that cannot be followed. - _ = unix.Close(child) break } parent = child } - if parent != -1 && parent != l.file.FD() { - _ = unix.Close(parent) - } return stats, nil } diff --git a/runsc/fsgofer/fsgofer_amd64_unsafe.go b/runsc/fsgofer/fsgofer_amd64_unsafe.go index 29ebf8500..884f7fc26 100644 --- a/runsc/fsgofer/fsgofer_amd64_unsafe.go +++ b/runsc/fsgofer/fsgofer_amd64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package fsgofer diff --git a/runsc/fsgofer/fsgofer_arm64_unsafe.go b/runsc/fsgofer/fsgofer_arm64_unsafe.go index 9fd5d0871..1207d9e8a 100644 --- a/runsc/fsgofer/fsgofer_arm64_unsafe.go +++ b/runsc/fsgofer/fsgofer_arm64_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package fsgofer diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go index 77723827a..ee6cc97df 100644 --- a/runsc/fsgofer/fsgofer_test.go +++ b/runsc/fsgofer/fsgofer_test.go @@ -65,15 +65,6 @@ func configTestName(conf *Config) string { return "RWMount" } -func assertPanic(t *testing.T, f func()) { - defer func() { - if r := recover(); r == nil { - t.Errorf("function did not panic") - } - }() - f() -} - func testReadWrite(f p9.File, flags p9.OpenFlags, content []byte) error { want := make([]byte, len(content)) copy(want, content) @@ -195,7 +186,7 @@ func setup(fileType uint32) (string, string, error) { } root, err := a.Attach() if err != nil { - return "", "", fmt.Errorf("Attach failed, err: %v", err) + return "", "", fmt.Errorf("attach failed, err: %v", err) } defer root.Close() @@ -290,10 +281,10 @@ func checkIDs(f p9.File, uid, gid int) error { return fmt.Errorf("GetAttr() failed, err: %v", err) } if want := p9.UID(uid); stat.UID != want { - return fmt.Errorf("Wrong UID, want: %v, got: %v", want, stat.UID) + return fmt.Errorf("wrong UID, want: %v, got: %v", want, stat.UID) } if want := p9.GID(gid); stat.GID != want { - return fmt.Errorf("Wrong GID, want: %v, got: %v", want, stat.GID) + return fmt.Errorf("wrong GID, want: %v, got: %v", want, stat.GID) } return nil } @@ -574,7 +565,7 @@ func SetGetXattr(l *localFile, name string, value string) error { return err } if ret != value { - return fmt.Errorf("Got value %s, want %s", ret, value) + return fmt.Errorf("got value %s, want %s", ret, value) } return nil } diff --git a/runsc/mitigate/BUILD b/runsc/mitigate/BUILD index 1238890fc..9f4a7ba8d 100644 --- a/runsc/mitigate/BUILD +++ b/runsc/mitigate/BUILD @@ -4,7 +4,10 @@ package(licenses = ["notice"]) go_library( name = "mitigate", - srcs = ["mitigate.go"], + srcs = [ + "mitigate.go", + "mock.go", + ], visibility = [ "//runsc:__subpackages__", ], @@ -16,8 +19,5 @@ go_test( size = "small", srcs = ["mitigate_test.go"], library = ":mitigate", - deps = [ - "//runsc/mitigate/mock", - "@com_github_google_go_cmp//cmp:go_default_library", - ], + deps = ["@com_github_google_go_cmp//cmp:go_default_library"], ) diff --git a/runsc/mitigate/mitigate.go b/runsc/mitigate/mitigate.go index 88409af8f..00e5bf2a9 100644 --- a/runsc/mitigate/mitigate.go +++ b/runsc/mitigate/mitigate.go @@ -19,10 +19,7 @@ package mitigate import ( "fmt" - "io/ioutil" - "os" "regexp" - "sort" "strconv" "strings" ) @@ -39,128 +36,20 @@ const ( physicalIDKey = "physical id" coreIDKey = "core id" bugsKey = "bugs" - - // Path to shutdown a CPU. - cpuOnlineTemplate = "/sys/devices/system/cpu/cpu%d/online" ) // CPUSet contains a map of all CPUs on the system, mapped // by Physical ID and CoreIDs. threads with the same // Core and Physical ID are Hyperthread pairs. -type CPUSet map[threadID]*ThreadGroup +type CPUSet []*CPU // NewCPUSet creates a CPUSet from data read from /proc/cpuinfo. -func NewCPUSet(data []byte) (CPUSet, error) { - processors, err := getThreads(string(data)) - if err != nil { - return nil, err - } - - set := make(CPUSet) - for _, p := range processors { - // Each ID is of the form physicalID:coreID. Hyperthread pairs - // have identical physical and core IDs. We need to match - // Hyperthread pairs so that we can shutdown all but one per - // pair. - core, ok := set[p.id] - if !ok { - core = &ThreadGroup{} - set[p.id] = core - } - core.isVulnerable = core.isVulnerable || p.IsVulnerable() - core.threads = append(core.threads, p) - } - - // We need to make sure we shutdown the lowest number processor per - // thread group. - for _, tg := range set { - sort.Slice(tg.threads, func(i, j int) bool { - return tg.threads[i].processorNumber < tg.threads[j].processorNumber - }) - } - return set, nil -} - -// NewCPUSetFromPossible makes a cpuSet data read from -// /sys/devices/system/cpu/possible. This is used in enable operations -// where the caller simply wants to enable all CPUS. -func NewCPUSetFromPossible(data []byte) (CPUSet, error) { - threads, err := GetThreadsFromPossible(data) - if err != nil { - return nil, err - } - - // We don't care if a CPU is vulnerable or not, we just - // want to return a list of all CPUs on the host. - set := CPUSet{ - threads[0].id: &ThreadGroup{ - threads: threads, - isVulnerable: false, - }, - } - return set, nil -} - -// String implements the String method for CPUSet. -func (c CPUSet) String() string { - ret := "" - for _, tg := range c { - ret += fmt.Sprintf("%s\n", tg) - } - return ret -} - -// GetRemainingList returns the list of threads that will remain active -// after mitigation. -func (c CPUSet) GetRemainingList() []Thread { - threads := make([]Thread, 0, len(c)) - for _, core := range c { - // If we're vulnerable, take only one thread from the pair. - if core.isVulnerable { - threads = append(threads, core.threads[0]) - continue - } - // Otherwise don't shutdown anything. - threads = append(threads, core.threads...) - } - return threads -} - -// GetShutdownList returns the list of threads that will be shutdown on -// mitigation. -func (c CPUSet) GetShutdownList() []Thread { - threads := make([]Thread, 0) - for _, core := range c { - // Only if we're vulnerable do shutdown anything. In this case, - // shutdown all but the first entry. - if core.isVulnerable && len(core.threads) > 1 { - threads = append(threads, core.threads[1:]...) - } - } - return threads -} - -// ThreadGroup represents Hyperthread pairs on the same physical/core ID. -type ThreadGroup struct { - threads []Thread - isVulnerable bool -} - -// String implements the String method for threadGroup. -func (c ThreadGroup) String() string { - ret := fmt.Sprintf("ThreadGroup:\nIsVulnerable: %t\n", c.isVulnerable) - for _, processor := range c.threads { - ret += fmt.Sprintf("%s\n", processor) - } - return ret -} - -// getThreads returns threads structs from reading /proc/cpuinfo. -func getThreads(data string) ([]Thread, error) { +func NewCPUSet(data string) (CPUSet, error) { // Each processor entry should start with the // processor key. Find the beginings of each. - r := buildRegex(processorKey, `\d+`) + r := buildRegex(processorKey) indices := r.FindAllStringIndex(data, -1) + if len(indices) < 1 { return nil, fmt.Errorf("no cpus found for: %q", data) } @@ -172,193 +61,132 @@ func getThreads(data string) ([]Thread, error) { // indexes (e.g. data[index[i], index[i+1]]). // There should be len(indicies) - 1 CPUs // since the last index is the end of the string. - cpus := make([]Thread, 0, len(indices)) + var set CPUSet // Find each string that represents a CPU. These begin "processor". for i := 1; i < len(indices); i++ { start := indices[i-1][0] end := indices[i][0] // Parse the CPU entry, which should be between start/end. - c, err := newThread(data[start:end]) + c, err := newCPU(data[start:end]) if err != nil { return nil, err } - cpus = append(cpus, c) + set = append(set, c) } - return cpus, nil + return set, nil } -// GetThreadsFromPossible makes threads from data read from /sys/devices/system/cpu/possible. -func GetThreadsFromPossible(data []byte) ([]Thread, error) { - possibleRegex := regexp.MustCompile(`(?m)^(\d+)(-(\d+))?$`) - matches := possibleRegex.FindStringSubmatch(string(data)) - if len(matches) != 4 { - return nil, fmt.Errorf("mismatch regex from possible: %q", string(data)) - } - - // If matches[3] is empty, we only have one cpu entry. - if matches[3] == "" { - matches[3] = matches[1] - } - - begin, err := strconv.ParseInt(matches[1], 10, 64) - if err != nil { - return nil, fmt.Errorf("failed to parse begin: %v", err) - } - end, err := strconv.ParseInt(matches[3], 10, 64) - if err != nil { - return nil, fmt.Errorf("failed to parse end: %v", err) - } - if begin > end || begin < 0 || end < 0 { - return nil, fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", begin, end) - } - - ret := make([]Thread, 0, end-begin) - for i := begin; i <= end; i++ { - ret = append(ret, Thread{ - processorNumber: i, - id: threadID{ - physicalID: 0, // we don't care about id for enable ops. - coreID: 0, - }, - }) +// IsVulnerable checks if this CPUSet is vulnerable to MDS. +func (c CPUSet) IsVulnerable() bool { + for _, cpu := range c { + if cpu.IsVulnerable() { + return true + } } - - return ret, nil + return false } -// threadID for each thread is defined by the physical and -// core IDs. If equal, two threads are Hyperthread pairs. -type threadID struct { - physicalID int64 - coreID int64 +// String implements the String method for CPUSet. +func (c CPUSet) String() string { + parts := make([]string, len(c)) + for i, cpu := range c { + parts[i] = cpu.String() + } + return strings.Join(parts, "\n") } -// Thread represents pertinent info about a single hyperthread in a pair. -type Thread struct { +// CPU represents pertinent info about a single hyperthread in a pair. +type CPU struct { processorNumber int64 // the processor number of this CPU. vendorID string // the vendorID of CPU (e.g. AuthenticAMD). cpuFamily int64 // CPU family number (e.g. 6 for CascadeLake/Skylake). model int64 // CPU model number (e.g. 85 for CascadeLake/Skylake). - id threadID // id for this thread + physicalID int64 // Physical ID of this CPU. + coreID int64 // Core ID of this CPU. bugs map[string]struct{} // map of vulnerabilities parsed from the 'bugs' field. } -// newThread parses a CPU from a single cpu entry from /proc/cpuinfo. -func newThread(data string) (Thread, error) { - empty := Thread{} +func newCPU(data string) (*CPU, error) { processor, err := parseProcessor(data) if err != nil { - return empty, err + return nil, err } vendorID, err := parseVendorID(data) if err != nil { - return empty, err + return nil, err } cpuFamily, err := parseCPUFamily(data) if err != nil { - return empty, err + return nil, err } model, err := parseModel(data) if err != nil { - return empty, err + return nil, err } physicalID, err := parsePhysicalID(data) if err != nil { - return empty, err + return nil, err } coreID, err := parseCoreID(data) if err != nil { - return empty, err + return nil, err } bugs, err := parseBugs(data) if err != nil { - return empty, err + return nil, err } - return Thread{ + return &CPU{ processorNumber: processor, vendorID: vendorID, cpuFamily: cpuFamily, model: model, - id: threadID{ - physicalID: physicalID, - coreID: coreID, - }, - bugs: bugs, + physicalID: physicalID, + coreID: coreID, + bugs: bugs, }, nil } -// String implements the String method for thread. -func (t Thread) String() string { - template := `CPU: %d -CPU ID: %+v -Vendor: %s -Family/Model: %d/%d -Bugs: %s +// String implements the String method for CPU. +func (t *CPU) String() string { + template := `%s: %d +%s: %s +%s: %d +%s: %d +%s: %d +%s: %d +%s: %s ` - bugs := make([]string, 0) + var bugs []string for bug := range t.bugs { bugs = append(bugs, bug) } - return fmt.Sprintf(template, t.processorNumber, t.id, t.vendorID, t.cpuFamily, t.model, strings.Join(bugs, ",")) -} - -// Enable turns on the CPU by writing 1 to /sys/devices/cpu/cpu{N}/online. -func (t Thread) Enable() error { - // Linux ensures that "cpu0" is always online. - if t.processorNumber == 0 { - return nil - } - cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) - f, err := os.OpenFile(cpuPath, os.O_WRONLY|os.O_CREATE, 0644) - if err != nil { - return fmt.Errorf("failed to open file %s: %v", cpuPath, err) - } - if _, err = f.Write([]byte{'1'}); err != nil { - return fmt.Errorf("failed to write '1' to %s: %v", cpuPath, err) - } - return nil -} - -// Disable turns off the CPU by writing 0 to /sys/devices/cpu/cpu{N}/online. -func (t Thread) Disable() error { - // The core labeled "cpu0" can never be taken offline via this method. - // Linux will return EPERM if the user even creates a file at the /sys - // path above. - if t.processorNumber == 0 { - return fmt.Errorf("invalid shutdown operation: cpu0 cannot be disabled") - } - cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) - return ioutil.WriteFile(cpuPath, []byte{'0'}, 0644) + return fmt.Sprintf(template, + processorKey, t.processorNumber, + vendorIDKey, t.vendorID, + cpuFamilyKey, t.cpuFamily, + modelKey, t.model, + physicalIDKey, t.physicalID, + coreIDKey, t.coreID, + bugsKey, strings.Join(bugs, " ")) } // IsVulnerable checks if a CPU is vulnerable to mds. -func (t Thread) IsVulnerable() bool { +func (t *CPU) IsVulnerable() bool { _, ok := t.bugs[mds] return ok } -// isActive checks if a CPU is active from /sys/devices/system/cpu/cpu{N}/online -// If the file does not exist (ioutil returns in error), we assume the CPU is on. -func (t Thread) isActive() bool { - cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) - data, err := ioutil.ReadFile(cpuPath) - if err != nil { - return true - } - return len(data) > 0 && data[0] != '0' -} - // SimilarTo checks family/model/bugs fields for equality of two // processors. -func (t Thread) SimilarTo(other Thread) bool { +func (t *CPU) SimilarTo(other *CPU) bool { if t.vendorID != other.vendorID { return false } @@ -437,14 +265,14 @@ func parseIntegerResult(data, key string) (int64, error) { } // buildRegex builds a regex for parsing each CPU field. -func buildRegex(key, match string) *regexp.Regexp { +func buildRegex(key string) *regexp.Regexp { reg := fmt.Sprintf(`(?m)^%s\s*:\s*(.*)$`, key) return regexp.MustCompile(reg) } // parseRegex parses data with key inserted into a standard regex template. func parseRegex(data, key, match string) (string, error) { - r := buildRegex(key, match) + r := buildRegex(key) matches := r.FindStringSubmatch(data) if len(matches) < 2 { diff --git a/runsc/mitigate/mitigate_test.go b/runsc/mitigate/mitigate_test.go index 890c65f05..e79d879e9 100644 --- a/runsc/mitigate/mitigate_test.go +++ b/runsc/mitigate/mitigate_test.go @@ -12,95 +12,59 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package mitigate import ( - "fmt" "io/ioutil" "strings" "testing" - - "gvisor.dev/gvisor/runsc/mitigate/mock" ) // TestMockCPUSet tests mock cpu test cases against the cpuSet functions. func TestMockCPUSet(t *testing.T) { for _, tc := range []struct { - testCase mock.CPU + testCase MockCPU isVulnerable bool }{ { - testCase: mock.AMD8, + testCase: AMD8, isVulnerable: false, }, { - testCase: mock.Haswell2, + testCase: Haswell2, isVulnerable: true, }, { - testCase: mock.Haswell2core, + testCase: Haswell2core, isVulnerable: true, }, { - testCase: mock.CascadeLake2, + testCase: CascadeLake2, isVulnerable: true, }, { - testCase: mock.CascadeLake4, + testCase: CascadeLake4, isVulnerable: true, }, } { t.Run(tc.testCase.Name, func(t *testing.T) { - data := tc.testCase.MakeCPUString() - set, err := NewCPUSet([]byte(data)) + data := tc.testCase.MakeCPUSet().String() + set, err := NewCPUSet(data) if err != nil { t.Fatalf("Failed to create cpuSet: %v", err) } - t.Logf("data: %s", data) - - for _, tg := range set { - if err := checkSorted(tg.threads); err != nil { - t.Fatalf("Failed to sort cpuSet: %v", err) - } - } - - remaining := set.GetRemainingList() - // In the non-vulnerable case, no cores should be shutdown so all should remain. - want := tc.testCase.PhysicalCores * tc.testCase.Cores * tc.testCase.ThreadsPerCore - if tc.isVulnerable { - want = tc.testCase.PhysicalCores * tc.testCase.Cores - } - - if want != len(remaining) { - t.Fatalf("Failed to shutdown the correct number of cores: want: %d got: %d", want, len(remaining)) - } - - if !tc.isVulnerable { - return + if tc.testCase.NumCPUs() != len(set) { + t.Fatalf("Got wrong number of CPUs: want: %d got: %d", tc.testCase.NumCPUs(), len(set)) } - // If the set is vulnerable, we expect only 1 thread per hyperthread pair. - for _, r := range remaining { - if _, ok := set[r.id]; !ok { - t.Fatalf("Entry %+v not in map, there must be two entries in the same thread group.", r) - } - delete(set, r.id) - } - - possible := tc.testCase.MakeSysPossibleString() - set, err = NewCPUSetFromPossible([]byte(possible)) - if err != nil { - t.Fatalf("Failed to make cpuSet: %v", err) - } - - want = tc.testCase.PhysicalCores * tc.testCase.Cores * tc.testCase.ThreadsPerCore - got := len(set.GetRemainingList()) - if got != want { - t.Fatalf("Returned the wrong number of CPUs want: %d got: %d", want, got) + if set.IsVulnerable() != tc.isVulnerable { + t.Fatalf("incorrect vulnerable value: got: %t want: %t", set.IsVulnerable(), tc.isVulnerable) } + t.Logf("data: %s", data) }) } } @@ -116,29 +80,27 @@ physical id: 0 core id : 0 bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa itlb_multihit ` - want := Thread{ + want := CPU{ processorNumber: 0, vendorID: "GenuineIntel", cpuFamily: 6, model: 85, - id: threadID{ - physicalID: 0, - coreID: 0, - }, + physicalID: 0, + coreID: 0, bugs: map[string]struct{}{ - "cpu_meltdown": struct{}{}, - "spectre_v1": struct{}{}, - "spectre_v2": struct{}{}, - "spec_store_bypass": struct{}{}, - "l1tf": struct{}{}, - "mds": struct{}{}, - "swapgs": struct{}{}, - "taa": struct{}{}, - "itlb_multihit": struct{}{}, + "cpu_meltdown": {}, + "spectre_v1": {}, + "spectre_v2": {}, + "spec_store_bypass": {}, + "l1tf": {}, + "mds": {}, + "swapgs": {}, + "taa": {}, + "itlb_multihit": {}, }, } - got, err := newThread(data) + got, err := newCPU(data) if err != nil { t.Fatalf("getCpu failed with error: %v", err) } @@ -153,12 +115,12 @@ bugs : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa } func TestInvalid(t *testing.T) { - result, err := getThreads(`something not a processor`) + result, err := newCPU(`something not a processor`) if err == nil { t.Fatalf("getCPU set didn't return an error: %+v", result) } - if !strings.Contains(err.Error(), "no cpus") { + if !strings.Contains(err.Error(), "failed to match key \"processor\"") { t.Fatalf("Incorrect error returned: %v", err) } } @@ -220,7 +182,7 @@ cache_alignment : 64 address sizes : 46 bits physical, 48 bits virtual power management: ` - cpuSet, err := getThreads(data) + cpuSet, err := NewCPUSet(data) if err != nil { t.Fatalf("getCPUSet failed: %v", err) } @@ -230,18 +192,18 @@ power management: t.Fatalf("Num CPU mismatch: want: %d, got: %d", wantCPULen, len(cpuSet)) } - wantCPU := Thread{ + wantCPU := CPU{ vendorID: "GenuineIntel", cpuFamily: 6, model: 63, bugs: map[string]struct{}{ - "cpu_meltdown": struct{}{}, - "spectre_v1": struct{}{}, - "spectre_v2": struct{}{}, - "spec_store_bypass": struct{}{}, - "l1tf": struct{}{}, - "mds": struct{}{}, - "swapgs": struct{}{}, + "cpu_meltdown": {}, + "spectre_v1": {}, + "spectre_v2": {}, + "spec_store_bypass": {}, + "l1tf": {}, + "mds": {}, + "swapgs": {}, }, } @@ -259,17 +221,11 @@ func TestReadFile(t *testing.T) { t.Fatalf("Failed to read cpuinfo: %v", err) } - set, err := NewCPUSet(data) + set, err := NewCPUSet(string(data)) if err != nil { t.Fatalf("Failed to parse CPU data %v\n%s", err, data) } - for _, tg := range set { - if err := checkSorted(tg.threads); err != nil { - t.Fatalf("Failed to sort cpuSet: %v", err) - } - } - if len(set) < 1 { t.Fatalf("Failed to parse any CPUs: %d", len(set)) } @@ -334,38 +290,6 @@ cache_alignment : 64 address sizes : 46 bits physical, 48 bits virtual power management:` - const cascade = `processor : 0 -vendor_id : GenuineIntel -cpu family : 6 -model : 85 -model name : Intel(R) Xeon(R) CPU -stepping : 7 -microcode : 0x1 -cpu MHz : 2800.198 -cache size : 33792 KB -physical id : 0 -siblings : 2 -core id : 0 -cpu cores : 1 -apicid : 0 -initial apicid : 0 -fpu : yes -fpu_exception : yes -cpuid level : 13 -wp : yes -flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 - ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmu -lqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowpr -efetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid r -tm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves a -rat avx512_vnni md_clear arch_capabilities -bugs : spectre_v1 spectre_v2 spec_store_bypass mds swapgs taa -bogomips : 5600.39 -clflush size : 64 -cache_alignment : 64 -address sizes : 46 bits physical, 48 bits virtual -power management:` - const amd = `processor : 0 vendor_id : AuthenticAMD cpu family : 23 @@ -414,7 +338,7 @@ power management:` }, } { t.Run(tc.name, func(t *testing.T) { - set, err := getThreads(tc.cpuString) + set, err := NewCPUSet(tc.cpuString) if err != nil { t.Fatalf("Failed to getCPUSet:%v\n %s", err, tc.cpuString) } @@ -429,104 +353,9 @@ power management:` }() if got != tc.vulnerable { - t.Fatalf("Mismatch vulnerable for cpu %+s: got %t want: %t", tc.name, tc.vulnerable, got) + t.Fatalf("Mismatch vulnerable for cpu %s: got %t want: %t", tc.name, tc.vulnerable, got) } } }) } } - -func TestReverse(t *testing.T) { - const noParse = "-1-" - for _, tc := range []struct { - name string - output string - wantErr error - wantCount int - }{ - { - name: "base", - output: "0-7", - wantErr: nil, - wantCount: 8, - }, - { - name: "huge", - output: "0-111", - wantErr: nil, - wantCount: 112, - }, - { - name: "not zero", - output: "50-53", - wantErr: nil, - wantCount: 4, - }, - { - name: "small", - output: "0", - wantErr: nil, - wantCount: 1, - }, - { - name: "invalid order", - output: "10-6", - wantErr: fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", 10, 6), - }, - { - name: "no parse", - output: noParse, - wantErr: fmt.Errorf(`mismatch regex from possible: %q`, noParse), - }, - } { - t.Run(tc.name, func(t *testing.T) { - threads, err := GetThreadsFromPossible([]byte(tc.output)) - - switch { - case tc.wantErr == nil: - if err != nil { - t.Fatalf("Wanted nil err, got: %v", err) - } - case err == nil: - t.Fatalf("Want error: %v got: %v", tc.wantErr, err) - default: - if tc.wantErr.Error() != err.Error() { - t.Fatalf("Want error: %v got error: %v", tc.wantErr, err) - } - } - - if len(threads) != tc.wantCount { - t.Fatalf("Want count: %d got: %d", tc.wantCount, len(threads)) - } - }) - } -} - -func TestReverseSmoke(t *testing.T) { - data, err := ioutil.ReadFile("/sys/devices/system/cpu/possible") - if err != nil { - t.Fatalf("Failed to read from possible: %v", err) - } - threads, err := GetThreadsFromPossible(data) - if err != nil { - t.Fatalf("Could not parse possible output: %v", err) - } - - if len(threads) <= 0 { - t.Fatalf("Didn't get any CPU cores: %d", len(threads)) - } -} - -func checkSorted(threads []Thread) error { - if len(threads) < 2 { - return nil - } - last := threads[0].processorNumber - for _, t := range threads[1:] { - if last >= t.processorNumber { - return fmt.Errorf("threads out of order: thread %d before %d", t.processorNumber, last) - } - last = t.processorNumber - } - return nil -} diff --git a/runsc/mitigate/mock/mock.go b/runsc/mitigate/mock.go index 12c59e356..4588ae2ed 100644 --- a/runsc/mitigate/mock/mock.go +++ b/runsc/mitigate/mock.go @@ -12,26 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package mock contains mock CPUs for mitigate tests. -package mock +package mitigate -import "fmt" +import "strings" -// CPU represents data from CPUs that will be mitigated. -type CPU struct { +// MockCPU represents data from CPUs that will be mitigated. +type MockCPU struct { Name string VendorID string - Family int - Model int + Family int64 + Model int64 ModelName string Bugs string - PhysicalCores int - Cores int - ThreadsPerCore int + PhysicalCores int64 + Cores int64 + ThreadsPerCore int64 } // CascadeLake2 is a two core Intel CascadeLake machine. -var CascadeLake2 = CPU{ +var CascadeLake2 = MockCPU{ Name: "CascadeLake", VendorID: "GenuineIntel", Family: 6, @@ -44,7 +43,7 @@ var CascadeLake2 = CPU{ } // CascadeLake4 is a four core Intel CascadeLake machine. -var CascadeLake4 = CPU{ +var CascadeLake4 = MockCPU{ Name: "CascadeLake", VendorID: "GenuineIntel", Family: 6, @@ -57,7 +56,7 @@ var CascadeLake4 = CPU{ } // Haswell2 is a two core Intel Haswell machine. -var Haswell2 = CPU{ +var Haswell2 = MockCPU{ Name: "Haswell", VendorID: "GenuineIntel", Family: 6, @@ -70,7 +69,7 @@ var Haswell2 = CPU{ } // Haswell2core is a 2 core Intel Haswell machine with no hyperthread pairs. -var Haswell2core = CPU{ +var Haswell2core = MockCPU{ Name: "Haswell2Physical", VendorID: "GenuineIntel", Family: 6, @@ -83,7 +82,7 @@ var Haswell2core = CPU{ } // AMD2 is an two core AMD machine. -var AMD2 = CPU{ +var AMD2 = MockCPU{ Name: "AMD", VendorID: "AuthenticAMD", Family: 23, @@ -96,7 +95,7 @@ var AMD2 = CPU{ } // AMD8 is an eight core AMD machine. -var AMD8 = CPU{ +var AMD8 = MockCPU{ Name: "AMD", VendorID: "AuthenticAMD", Family: 23, @@ -108,47 +107,39 @@ var AMD8 = CPU{ ThreadsPerCore: 2, } -// MakeCPUString makes a string formated like /proc/cpuinfo for each cpuTestCase -func (tc CPU) MakeCPUString() string { - template := `processor : %d -vendor_id : %s -cpu family : %d -model : %d -model name : %s -physical id : %d -core id : %d -cpu cores : %d -bugs : %s - -` +// Empty is an empty CPU set. +var Empty = MockCPU{ + Name: "Empty", +} - ret := `` - for i := 0; i < tc.PhysicalCores; i++ { - for j := 0; j < tc.Cores; j++ { - for k := 0; k < tc.ThreadsPerCore; k++ { +// MakeCPUSet makes a cpuSet from a MockCPU. +func (tc MockCPU) MakeCPUSet() CPUSet { + bugs := make(map[string]struct{}) + for _, bug := range strings.Split(tc.Bugs, " ") { + bugs[bug] = struct{}{} + } + var cpus CPUSet = []*CPU{} + for i := int64(0); i < tc.PhysicalCores; i++ { + for j := int64(0); j < tc.Cores; j++ { + for k := int64(0); k < tc.ThreadsPerCore; k++ { processorNum := (i*tc.Cores+j)*tc.ThreadsPerCore + k - ret += fmt.Sprintf(template, - processorNum, /*processor*/ - tc.VendorID, /*vendor_id*/ - tc.Family, /*cpu family*/ - tc.Model, /*model*/ - tc.ModelName, /*model name*/ - i, /*physical id*/ - j, /*core id*/ - k, /*cpu cores*/ - tc.Bugs, /*bugs*/ - ) + cpu := &CPU{ + processorNumber: processorNum, + vendorID: tc.VendorID, + cpuFamily: tc.Family, + model: tc.Model, + physicalID: i, + coreID: j, + bugs: bugs, + } + cpus = append(cpus, cpu) } } } - return ret + return cpus } -// MakeSysPossibleString makes a string representing a the contents of /sys/devices/system/cpu/possible. -func (tc CPU) MakeSysPossibleString() string { - max := tc.PhysicalCores * tc.Cores * tc.ThreadsPerCore - if max == 1 { - return "0" - } - return fmt.Sprintf("0-%d", max-1) +// NumCPUs returns the number of CPUs for this CPU. +func (tc MockCPU) NumCPUs() int { + return int(tc.PhysicalCores * tc.Cores * tc.ThreadsPerCore) } diff --git a/runsc/mitigate/mock/BUILD b/runsc/mitigate/mock/BUILD deleted file mode 100644 index 5019ff9ee..000000000 --- a/runsc/mitigate/mock/BUILD +++ /dev/null @@ -1,11 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "mock", - srcs = ["mock.go"], - visibility = [ - "//runsc:__subpackages__", - ], -) diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index bc4a3fa32..d625230dd 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -17,12 +17,14 @@ go_library( "//pkg/control/client", "//pkg/control/server", "//pkg/coverage", + "//pkg/eventchannel", "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/platform", "//pkg/sync", "//pkg/tcpip/header", "//pkg/tcpip/stack", + "//pkg/unet", "//pkg/urpc", "//runsc/boot", "//runsc/boot/platforms", diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go index f69558021..3451d1037 100644 --- a/runsc/sandbox/network.go +++ b/runsc/sandbox/network.go @@ -49,7 +49,7 @@ import ( // // Run the following container to test it: // docker run -di --runtime=runsc -p 8080:80 -v $PWD:/usr/local/apache2/htdocs/ httpd:2.4 -func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *config.Config) error { +func setupNetwork(conn *urpc.Client, pid int, conf *config.Config) error { log.Infof("Setting up network") switch conf.Network { @@ -301,13 +301,13 @@ func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) ( // Use SO_RCVBUFFORCE/SO_SNDBUFFORCE because on linux the receive/send buffer // for an AF_PACKET socket is capped by "net.core.rmem_max/wmem_max". - // wmem_max/rmem_max default to a unusually low value of 208KB. This is too low - // for gVisor to be able to receive packets at high throughputs without + // wmem_max/rmem_max default to a unusually low value of 208KB. This is too + // low for gVisor to be able to receive packets at high throughputs without // incurring packet drops. const bufSize = 4 << 20 // 4MB. if err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUFFORCE, bufSize); err != nil { - unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, bufSize) + _ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, bufSize) sz, _ := unix.GetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF) if sz < bufSize { @@ -316,10 +316,10 @@ func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) ( } if err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUFFORCE, bufSize); err != nil { - unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUF, bufSize) + _ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUF, bufSize) sz, _ := unix.GetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUF) if sz < bufSize { - log.Warningf("Failed to increase snd buffer to %d on SOCK_RAW on %s. Curent buffer %d: %v", bufSize, iface.Name, sz, err) + log.Warningf("Failed to increase snd buffer to %d on SOCK_RAW on %s. Current buffer %d: %v", bufSize, iface.Name, sz, err) } } diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 29e202b7d..f4a37cedc 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -35,10 +35,12 @@ import ( "gvisor.dev/gvisor/pkg/control/client" "gvisor.dev/gvisor/pkg/control/server" "gvisor.dev/gvisor/pkg/coverage" + "gvisor.dev/gvisor/pkg/eventchannel" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/pkg/urpc" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/boot/platforms" @@ -65,6 +67,11 @@ type Sandbox struct { // is not running. Pid int `json:"pid"` + // UID is the user ID in the parent namespace that the sandbox is running as. + UID int `json:"uid"` + // GID is the group ID in the parent namespace that the sandbox is running as. + GID int `json:"gid"` + // Cgroup has the cgroup configuration for the sandbox. Cgroup *cgroup.Cgroup `json:"cgroup"` @@ -175,26 +182,30 @@ func New(conf *config.Config, args *Args) (*Sandbox, error) { return s, nil } -// CreateContainer creates a non-root container inside the sandbox. -func (s *Sandbox) CreateContainer(cid string, tty *os.File) error { - log.Debugf("Create non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid) - sandboxConn, err := s.sandboxConnect() - if err != nil { - return fmt.Errorf("couldn't connect to sandbox: %v", err) - } - defer sandboxConn.Close() +// CreateSubcontainer creates a container inside the sandbox. +func (s *Sandbox) CreateSubcontainer(conf *config.Config, cid string, tty *os.File) error { + log.Debugf("Create sub-container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid) var files []*os.File if tty != nil { files = []*os.File{tty} } + if err := s.configureStdios(conf, files); err != nil { + return err + } + + sandboxConn, err := s.sandboxConnect() + if err != nil { + return fmt.Errorf("couldn't connect to sandbox: %v", err) + } + defer sandboxConn.Close() args := boot.CreateArgs{ CID: cid, FilePayload: urpc.FilePayload{Files: files}, } - if err := sandboxConn.Call(boot.ContainerCreate, &args, nil); err != nil { - return fmt.Errorf("creating non-root container %q: %v", cid, err) + if err := sandboxConn.Call(boot.ContMgrCreateSubcontainer, &args, nil); err != nil { + return fmt.Errorf("creating sub-container %q: %v", cid, err) } return nil } @@ -209,22 +220,27 @@ func (s *Sandbox) StartRoot(spec *specs.Spec, conf *config.Config) error { defer conn.Close() // Configure the network. - if err := setupNetwork(conn, s.Pid, spec, conf); err != nil { + if err := setupNetwork(conn, s.Pid, conf); err != nil { return fmt.Errorf("setting up network: %v", err) } // Send a message to the sandbox control server to start the root // container. - if err := conn.Call(boot.RootContainerStart, &s.ID, nil); err != nil { + if err := conn.Call(boot.ContMgrRootContainerStart, &s.ID, nil); err != nil { return fmt.Errorf("starting root container: %v", err) } return nil } -// StartContainer starts running a non-root container inside the sandbox. -func (s *Sandbox) StartContainer(spec *specs.Spec, conf *config.Config, cid string, stdios, goferFiles []*os.File) error { - log.Debugf("Start non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid) +// StartSubcontainer starts running a sub-container inside the sandbox. +func (s *Sandbox) StartSubcontainer(spec *specs.Spec, conf *config.Config, cid string, stdios, goferFiles []*os.File) error { + log.Debugf("Start sub-container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid) + + if err := s.configureStdios(conf, stdios); err != nil { + return err + } + sandboxConn, err := s.sandboxConnect() if err != nil { return fmt.Errorf("couldn't connect to sandbox: %v", err) @@ -244,8 +260,8 @@ func (s *Sandbox) StartContainer(spec *specs.Spec, conf *config.Config, cid stri CID: cid, FilePayload: payload, } - if err := sandboxConn.Call(boot.ContainerStart, &args, nil); err != nil { - return fmt.Errorf("starting non-root container %v: %v", spec.Process.Args, err) + if err := sandboxConn.Call(boot.ContMgrStartSubcontainer, &args, nil); err != nil { + return fmt.Errorf("starting sub-container %v: %v", spec.Process.Args, err) } return nil } @@ -282,12 +298,12 @@ func (s *Sandbox) Restore(cid string, spec *specs.Spec, conf *config.Config, fil defer conn.Close() // Configure the network. - if err := setupNetwork(conn, s.Pid, spec, conf); err != nil { + if err := setupNetwork(conn, s.Pid, conf); err != nil { return fmt.Errorf("setting up network: %v", err) } // Restore the container and start the root container. - if err := conn.Call(boot.ContainerRestore, &opt, nil); err != nil { + if err := conn.Call(boot.ContMgrRestore, &opt, nil); err != nil { return fmt.Errorf("restoring container %q: %v", cid, err) } @@ -305,7 +321,7 @@ func (s *Sandbox) Processes(cid string) ([]*control.Process, error) { defer conn.Close() var pl []*control.Process - if err := conn.Call(boot.ContainerProcesses, &cid, &pl); err != nil { + if err := conn.Call(boot.ContMgrProcesses, &cid, &pl); err != nil { return nil, fmt.Errorf("retrieving process data from sandbox: %v", err) } return pl, nil @@ -318,8 +334,13 @@ func (s *Sandbox) NewCGroup() (*cgroup.Cgroup, error) { // Execute runs the specified command in the container. It returns the PID of // the newly created process. -func (s *Sandbox) Execute(args *control.ExecArgs) (int32, error) { +func (s *Sandbox) Execute(conf *config.Config, args *control.ExecArgs) (int32, error) { log.Debugf("Executing new process in container %q in sandbox %q", args.ContainerID, s.ID) + + if err := s.configureStdios(conf, args.Files); err != nil { + return 0, err + } + conn, err := s.sandboxConnect() if err != nil { return 0, s.connError(err) @@ -328,7 +349,7 @@ func (s *Sandbox) Execute(args *control.ExecArgs) (int32, error) { // Send a message to the sandbox control server to start the container. var pid int32 - if err := conn.Call(boot.ContainerExecuteAsync, args, &pid); err != nil { + if err := conn.Call(boot.ContMgrExecuteAsync, args, &pid); err != nil { return 0, fmt.Errorf("executing command %q in sandbox: %v", args, err) } return pid, nil @@ -346,7 +367,7 @@ func (s *Sandbox) Event(cid string) (*boot.EventOut, error) { var e boot.EventOut // TODO(b/129292330): Pass in the container id (cid) here. The sandbox // should return events only for that container. - if err := conn.Call(boot.ContainerEvent, nil, &e); err != nil { + if err := conn.Call(boot.ContMgrEvent, nil, &e); err != nil { return nil, fmt.Errorf("retrieving event data from sandbox: %v", err) } e.Event.ID = cid @@ -469,6 +490,61 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn cmd.Args = append(cmd.Args, "--start-sync-fd="+strconv.Itoa(nextFD)) nextFD++ + if conf.ProfileBlock != "" { + blockFile, err := os.OpenFile(conf.ProfileBlock, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("opening block profiling file %q: %v", conf.ProfileBlock, err) + } + defer blockFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, blockFile) + cmd.Args = append(cmd.Args, "--profile-block-fd="+strconv.Itoa(nextFD)) + nextFD++ + } + + if conf.ProfileCPU != "" { + cpuFile, err := os.OpenFile(conf.ProfileCPU, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("opening cpu profiling file %q: %v", conf.ProfileCPU, err) + } + defer cpuFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, cpuFile) + cmd.Args = append(cmd.Args, "--profile-cpu-fd="+strconv.Itoa(nextFD)) + nextFD++ + } + + if conf.ProfileHeap != "" { + heapFile, err := os.OpenFile(conf.ProfileHeap, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("opening heap profiling file %q: %v", conf.ProfileHeap, err) + } + defer heapFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, heapFile) + cmd.Args = append(cmd.Args, "--profile-heap-fd="+strconv.Itoa(nextFD)) + nextFD++ + } + + if conf.ProfileMutex != "" { + mutexFile, err := os.OpenFile(conf.ProfileMutex, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("opening mutex profiling file %q: %v", conf.ProfileMutex, err) + } + defer mutexFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, mutexFile) + cmd.Args = append(cmd.Args, "--profile-mutex-fd="+strconv.Itoa(nextFD)) + nextFD++ + } + + if conf.TraceFile != "" { + traceFile, err := os.OpenFile(conf.TraceFile, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("opening trace file %q: %v", conf.TraceFile, err) + } + defer traceFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, traceFile) + cmd.Args = append(cmd.Args, "--trace-fd="+strconv.Itoa(nextFD)) + nextFD++ + } + // If there is a gofer, sends all socket ends to the sandbox. for _, f := range args.IOFiles { defer f.Close() @@ -505,6 +581,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn cmd.Stdin = nil cmd.Stdout = nil cmd.Stderr = nil + var stdios [3]*os.File // If the console control socket file is provided, then create a new // pty master/replica pair and set the TTY on the sandbox process. @@ -525,11 +602,9 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn cmd.SysProcAttr.Ctty = nextFD // Pass the tty as all stdio fds to sandbox. - for i := 0; i < 3; i++ { - cmd.ExtraFiles = append(cmd.ExtraFiles, tty) - cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD)) - nextFD++ - } + stdios[0] = tty + stdios[1] = tty + stdios[2] = tty if conf.Debug { // If debugging, send the boot process stdio to the @@ -541,11 +616,9 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn } else { // If not using a console, pass our current stdio as the // container stdio via flags. - for _, f := range []*os.File{os.Stdin, os.Stdout, os.Stderr} { - cmd.ExtraFiles = append(cmd.ExtraFiles, f) - cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD)) - nextFD++ - } + stdios[0] = os.Stdin + stdios[1] = os.Stdout + stdios[2] = os.Stderr if conf.Debug { // If debugging, send the boot process stdio to the @@ -595,6 +668,10 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn nss = append(nss, specs.LinuxNamespace{Type: specs.NetworkNamespace}) } + // These are set to the uid/gid that the sandbox process will use. + s.UID = os.Getuid() + s.GID = os.Getgid() + // User namespace depends on the network type. Host network requires to run // inside the user namespace specified in the spec or the current namespace // if none is configured. @@ -636,51 +713,49 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn const nobody = 65534 if conf.Rootless { log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid()) - cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ - { - ContainerID: nobody, - HostID: os.Getuid(), - Size: 1, - }, - } - cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{ - { - ContainerID: nobody, - HostID: os.Getgid(), - Size: 1, - }, - } - } else { // Map nobody in the new namespace to nobody in the parent namespace. - // - // A sandbox process will construct an empty - // root for itself, so it has to have - // CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities. - cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ - { - ContainerID: nobody, - HostID: nobody, - Size: 1, - }, - } - cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{ - { - ContainerID: nobody, - HostID: nobody, - Size: 1, - }, - } + s.UID = nobody + s.GID = nobody } // Set credentials to run as user and group nobody. cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody} + cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ + { + ContainerID: nobody, + HostID: s.UID, + Size: 1, + }, + } + cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{ + { + ContainerID: nobody, + HostID: s.GID, + Size: 1, + }, + } + + // A sandbox process will construct an empty root for itself, so it has + // to have CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities. cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT)) + } else { return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID") } } + if err := s.configureStdios(conf, stdios[:]); err != nil { + return fmt.Errorf("configuring stdios: %w", err) + } + for _, file := range stdios { + cmd.ExtraFiles = append(cmd.ExtraFiles, file) + cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD)) + nextFD++ + } + + // Set Args[0] to make easier to spot the sandbox process. Otherwise it's + // shown as `exe`. cmd.Args[0] = "runsc-sandbox" if s.Cgroup != nil { @@ -796,8 +871,14 @@ func (s *Sandbox) Wait(cid string) (unix.WaitStatus, error) { // Try the Wait RPC to the sandbox. var ws unix.WaitStatus - err = conn.Call(boot.ContainerWait, &cid, &ws) + err = conn.Call(boot.ContMgrWait, &cid, &ws) + conn.Close() if err == nil { + if s.IsRootContainer(cid) { + if err := s.waitForStopped(); err != nil { + return unix.WaitStatus(0), err + } + } // It worked! return ws, nil } @@ -841,7 +922,7 @@ func (s *Sandbox) WaitPID(cid string, pid int32) (unix.WaitStatus, error) { PID: pid, CID: cid, } - if err := conn.Call(boot.ContainerWaitPID, args, &ws); err != nil { + if err := conn.Call(boot.ContMgrWaitPID, args, &ws); err != nil { return ws, fmt.Errorf("waiting on PID %d in sandbox %q: %v", pid, s.ID, err) } return ws, nil @@ -891,7 +972,7 @@ func (s *Sandbox) SignalContainer(cid string, sig unix.Signal, all bool) error { Signo: int32(sig), Mode: mode, } - if err := conn.Call(boot.ContainerSignal, &args, nil); err != nil { + if err := conn.Call(boot.ContMgrSignal, &args, nil); err != nil { return fmt.Errorf("signaling container %q: %v", cid, err) } return nil @@ -920,7 +1001,7 @@ func (s *Sandbox) SignalProcess(cid string, pid int32, sig unix.Signal, fgProces PID: pid, Mode: mode, } - if err := conn.Call(boot.ContainerSignal, &args, nil); err != nil { + if err := conn.Call(boot.ContMgrSignal, &args, nil); err != nil { return fmt.Errorf("signaling container %q PID %d: %v", cid, pid, err) } return nil @@ -942,7 +1023,7 @@ func (s *Sandbox) Checkpoint(cid string, f *os.File) error { }, } - if err := conn.Call(boot.ContainerCheckpoint, &opt, nil); err != nil { + if err := conn.Call(boot.ContMgrCheckpoint, &opt, nil); err != nil { return fmt.Errorf("checkpointing container %q: %v", cid, err) } return nil @@ -957,7 +1038,7 @@ func (s *Sandbox) Pause(cid string) error { } defer conn.Close() - if err := conn.Call(boot.ContainerPause, nil, nil); err != nil { + if err := conn.Call(boot.LifecyclePause, nil, nil); err != nil { return fmt.Errorf("pausing container %q: %v", cid, err) } return nil @@ -972,12 +1053,114 @@ func (s *Sandbox) Resume(cid string) error { } defer conn.Close() - if err := conn.Call(boot.ContainerResume, nil, nil); err != nil { + if err := conn.Call(boot.LifecycleResume, nil, nil); err != nil { return fmt.Errorf("resuming container %q: %v", cid, err) } return nil } +// Cat sends the cat call for a container in the sandbox. +func (s *Sandbox) Cat(cid string, files []string, out *os.File) error { + log.Debugf("Cat sandbox %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + if err := conn.Call(boot.FsCat, &control.CatOpts{ + Files: files, + FilePayload: urpc.FilePayload{Files: []*os.File{out}}, + }, nil); err != nil { + return fmt.Errorf("Cat container %q: %v", cid, err) + } + return nil +} + +// Usage sends the collect call for a container in the sandbox. +func (s *Sandbox) Usage(cid string, Full bool) (control.MemoryUsage, error) { + log.Debugf("Usage sandbox %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return control.MemoryUsage{}, err + } + defer conn.Close() + + var m control.MemoryUsage + err = conn.Call(boot.UsageCollect, &control.MemoryUsageOpts{ + Full: Full, + }, &m) + return m, err +} + +// UsageFD sends the usagefd call for a container in the sandbox. +func (s *Sandbox) UsageFD(cid string) (*control.MemoryUsageRecord, error) { + log.Debugf("Usage sandbox %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return nil, err + } + defer conn.Close() + + var m control.MemoryUsageFile + if err := conn.Call(boot.UsageUsageFD, &control.MemoryUsageFileOpts{ + Version: 1, + }, &m); err != nil { + return nil, fmt.Errorf("UsageFD failed: %v", err) + } + + if len(m.FilePayload.Files) != 2 { + return nil, fmt.Errorf("wants exactly two fds") + } + + return control.NewMemoryUsageRecord(*m.FilePayload.Files[0], *m.FilePayload.Files[1]) +} + +// Reduce sends the reduce call for a container in the sandbox. +func (s *Sandbox) Reduce(cid string, wait bool) error { + log.Debugf("Reduce sandbox %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + return conn.Call(boot.UsageReduce, &control.UsageReduceOpts{ + Wait: wait, + }, nil) +} + +// Stream sends the AttachDebugEmitter call for a container in the sandbox, and +// dumps filtered events to out. +func (s *Sandbox) Stream(cid string, filters []string, out *os.File) error { + log.Debugf("Stream sandbox %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + r, w, err := unet.SocketPair(false) + if err != nil { + return err + } + + wfd, err := w.Release() + if err != nil { + return fmt.Errorf("failed to release write socket FD: %v", err) + } + + if err := conn.Call(boot.EventsAttachDebugEmitter, &control.EventsOpts{ + FilePayload: urpc.FilePayload{Files: []*os.File{ + os.NewFile(uintptr(wfd), "event sink"), + }}, + }, nil); err != nil { + return fmt.Errorf("AttachDebugEmitter failed: %v", err) + } + + return eventchannel.ProcessAll(r, filters, out) +} + // IsRunning returns true if the sandbox or gofer process is running. func (s *Sandbox) IsRunning() bool { if s.Pid != 0 { @@ -1000,7 +1183,7 @@ func (s *Sandbox) Stacks() (string, error) { defer conn.Close() var stacks string - if err := conn.Call(boot.SandboxStacks, nil, &stacks); err != nil { + if err := conn.Call(boot.DebugStacks, nil, &stacks); err != nil { return "", fmt.Errorf("getting sandbox %q stacks: %v", s.ID, err) } return stacks, nil @@ -1019,7 +1202,7 @@ func (s *Sandbox) HeapProfile(f *os.File, delay time.Duration) error { FilePayload: urpc.FilePayload{Files: []*os.File{f}}, Delay: delay, } - return conn.Call(boot.HeapProfile, &opts, nil) + return conn.Call(boot.ProfileHeap, &opts, nil) } // CPUProfile collects a CPU profile. @@ -1035,7 +1218,7 @@ func (s *Sandbox) CPUProfile(f *os.File, duration time.Duration) error { FilePayload: urpc.FilePayload{Files: []*os.File{f}}, Duration: duration, } - return conn.Call(boot.CPUProfile, &opts, nil) + return conn.Call(boot.ProfileCPU, &opts, nil) } // BlockProfile writes a block profile to the given file. @@ -1051,7 +1234,7 @@ func (s *Sandbox) BlockProfile(f *os.File, duration time.Duration) error { FilePayload: urpc.FilePayload{Files: []*os.File{f}}, Duration: duration, } - return conn.Call(boot.BlockProfile, &opts, nil) + return conn.Call(boot.ProfileBlock, &opts, nil) } // MutexProfile writes a mutex profile to the given file. @@ -1067,7 +1250,7 @@ func (s *Sandbox) MutexProfile(f *os.File, duration time.Duration) error { FilePayload: urpc.FilePayload{Files: []*os.File{f}}, Duration: duration, } - return conn.Call(boot.MutexProfile, &opts, nil) + return conn.Call(boot.ProfileMutex, &opts, nil) } // Trace collects an execution trace. @@ -1083,7 +1266,7 @@ func (s *Sandbox) Trace(f *os.File, duration time.Duration) error { FilePayload: urpc.FilePayload{Files: []*os.File{f}}, Duration: duration, } - return conn.Call(boot.Trace, &opts, nil) + return conn.Call(boot.ProfileTrace, &opts, nil) } // ChangeLogging changes logging options. @@ -1095,7 +1278,7 @@ func (s *Sandbox) ChangeLogging(args control.LoggingArgs) error { } defer conn.Close() - if err := conn.Call(boot.ChangeLogging, &args, nil); err != nil { + if err := conn.Call(boot.LoggingChange, &args, nil); err != nil { return fmt.Errorf("changing sandbox %q logging: %v", s.ID, err) } return nil @@ -1126,34 +1309,33 @@ func (s *Sandbox) destroyContainer(cid string) error { return err } defer conn.Close() - if err := conn.Call(boot.ContainerDestroy, &cid, nil); err != nil { + if err := conn.Call(boot.ContMgrDestroySubcontainer, &cid, nil); err != nil { return fmt.Errorf("destroying container %q: %v", cid, err) } return nil } func (s *Sandbox) waitForStopped() error { + if s.child { + s.statusMu.Lock() + defer s.statusMu.Unlock() + if s.Pid == 0 { + return nil + } + // The sandbox process is a child of the current process, + // so we can wait it and collect its zombie. + if _, err := unix.Wait4(int(s.Pid), &s.status, 0, nil); err != nil { + return fmt.Errorf("error waiting the sandbox process: %v", err) + } + s.Pid = 0 + return nil + } + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) op := func() error { - if s.child { - s.statusMu.Lock() - defer s.statusMu.Unlock() - if s.Pid == 0 { - return nil - } - // The sandbox process is a child of the current process, - // so we can wait it and collect its zombie. - wpid, err := unix.Wait4(int(s.Pid), &s.status, unix.WNOHANG, nil) - if err != nil { - return fmt.Errorf("error waiting the sandbox process: %v", err) - } - if wpid == 0 { - return fmt.Errorf("sandbox is still running") - } - s.Pid = 0 - } else if s.IsRunning() { + if s.IsRunning() { return fmt.Errorf("sandbox is still running") } return nil @@ -1161,6 +1343,23 @@ func (s *Sandbox) waitForStopped() error { return backoff.Retry(op, b) } +// configureStdios change stdios ownership to give access to the sandbox +// process. This may be skipped depending on the configuration. +func (s *Sandbox) configureStdios(conf *config.Config, stdios []*os.File) error { + if conf.Rootless || conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + // Cannot change ownership without CAP_CHOWN. + return nil + } + + for _, file := range stdios { + log.Debugf("Changing %q ownership to %d/%d", file.Name(), s.UID, s.GID) + if err := file.Chown(s.UID, s.GID); err != nil { + return err + } + } + return nil +} + // deviceFileForPlatform opens the device file for the given platform. If the // platform does not need a device file, then nil is returned. func deviceFileForPlatform(name string) (*os.File, error) { diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go index 9ecd0fde6..ac20696ee 100644 --- a/runsc/specutils/fs.go +++ b/runsc/specutils/fs.go @@ -67,8 +67,8 @@ var optionsMap = map[string]mapping{ // verityMountOptions is the set of valid verity mount option keys. var verityMountOptions = map[string]struct{}{ - "verity.roothash": struct{}{}, - "verity.action": struct{}{}, + "verity.roothash": {}, + "verity.action": {}, } // propOptionsMap is similar to optionsMap, but it lists propagation options diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index 69d7ba5c4..21559f5e5 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -270,7 +270,10 @@ func MaybeRunAsRoot() error { go func() { for { // Forward all signals to child process. - cmd.Process.Signal(<-ch) + sig := <-ch + if err := cmd.Process.Signal(sig); err != nil { + log.Warningf("Error forwarding signal %v to child (PID %d)", sig, cmd.Process.Pid) + } } }() if err := cmd.Wait(); err != nil { diff --git a/runsc/specutils/safemount_test/BUILD b/runsc/specutils/safemount_test/BUILD new file mode 100644 index 000000000..c39c40492 --- /dev/null +++ b/runsc/specutils/safemount_test/BUILD @@ -0,0 +1,23 @@ +load("//tools:defs.bzl", "go_binary", "go_test") + +package(licenses = ["notice"]) + +go_test( + name = "safemount_test", + size = "small", + srcs = ["safemount_test.go"], + data = [":safemount_runner"], + deps = [ + "//pkg/test/testutil", + "@org_golang_x_sys//unix:go_default_library", + ], +) + +go_binary( + name = "safemount_runner", + srcs = ["safemount_runner.go"], + deps = [ + "//runsc/specutils", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/runsc/specutils/safemount_test/safemount_runner.go b/runsc/specutils/safemount_test/safemount_runner.go new file mode 100644 index 000000000..b23193033 --- /dev/null +++ b/runsc/specutils/safemount_test/safemount_runner.go @@ -0,0 +1,117 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// safemount_runner is used to test the SafeMount function. Because use of +// unix.Mount requires privilege, tests must launch this process with +// CLONE_NEWNS and CLONE_NEWUSER. +package main + +import ( + "errors" + "fmt" + "log" + "os" + "path/filepath" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/runsc/specutils" +) + +func main() { + // The test temporary directory is the first argument. + tempdir := os.Args[1] + + tcs := []struct { + name string + testfunc func() error + }{{ + name: "unix.Mount to folder succeeds", + testfunc: func() error { + dir2Path := filepath.Join(tempdir, "subdir2") + if err := unix.Mount(filepath.Join(tempdir, "subdir"), dir2Path, "bind", unix.MS_BIND, ""); err != nil { + return fmt.Errorf("mount: %v", err) + } + return unix.Unmount(dir2Path, unix.MNT_DETACH) + }, + }, { + // unix.Mount doesn't care whether the target is a symlink. + name: "unix.Mount to symlink succeeds", + testfunc: func() error { + symlinkPath := filepath.Join(tempdir, "symlink") + if err := unix.Mount(filepath.Join(tempdir, "subdir"), symlinkPath, "bind", unix.MS_BIND, ""); err != nil { + return fmt.Errorf("mount: %v", err) + } + return unix.Unmount(symlinkPath, unix.MNT_DETACH) + }, + }, { + name: "SafeMount to folder succeeds", + testfunc: func() error { + dir2Path := filepath.Join(tempdir, "subdir2") + if err := specutils.SafeMount(filepath.Join(tempdir, "subdir"), dir2Path, "bind", unix.MS_BIND, "", "/proc"); err != nil { + return fmt.Errorf("SafeMount: %v", err) + } + return unix.Unmount(dir2Path, unix.MNT_DETACH) + }, + }, { + name: "SafeMount to symlink fails", + testfunc: func() error { + err := specutils.SafeMount(filepath.Join(tempdir, "subdir"), filepath.Join(tempdir, "symlink"), "bind", unix.MS_BIND, "", "/proc") + if err == nil { + return fmt.Errorf("SafeMount didn't fail, but should have") + } + var symErr *specutils.ErrSymlinkMount + if !errors.As(err, &symErr) { + return fmt.Errorf("expected SafeMount to fail with ErrSymlinkMount, but got: %v", err) + } + return nil + }, + }} + + for _, tc := range tcs { + if err := runTest(tempdir, tc.testfunc); err != nil { + log.Fatalf("failed test %q: %v", tc.name, err) + } + } +} + +// runTest runs testfunc with the following directory structure: +// tempdir/ +// subdir/ +// subdir2/ +// symlink --> ./subdir2 +func runTest(tempdir string, testfunc func() error) error { + // Create tempdir/subdir/. + dirPath := filepath.Join(tempdir, "subdir") + if err := os.Mkdir(dirPath, 0777); err != nil { + return fmt.Errorf("os.Mkdir(%s, 0777)", dirPath) + } + defer os.Remove(dirPath) + + // Create tempdir/subdir2/. + dir2Path := filepath.Join(tempdir, "subdir2") + if err := os.Mkdir(dir2Path, 0777); err != nil { + return fmt.Errorf("os.Mkdir(%s, 0777)", dir2Path) + } + defer os.Remove(dir2Path) + + // Create tempdir/symlink, which points to ./subdir2. + symlinkPath := filepath.Join(tempdir, "symlink") + if err := os.Symlink("./subdir2", symlinkPath); err != nil { + return fmt.Errorf("failed to create symlink %s: %v", symlinkPath, err) + } + defer os.Remove(symlinkPath) + + // Run the actual test. + return testfunc() +} diff --git a/runsc/specutils/safemount_test/safemount_test.go b/runsc/specutils/safemount_test/safemount_test.go new file mode 100644 index 000000000..8820978c4 --- /dev/null +++ b/runsc/specutils/safemount_test/safemount_test.go @@ -0,0 +1,53 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package safemount_test + +import ( + "os" + "os/exec" + "syscall" + "testing" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/test/testutil" +) + +func TestSafeMount(t *testing.T) { + // We run the actual tests in another process, as we need CAP_SYS_ADMIN to + // call mount(2). The new process runs in its own user and mount namespaces. + runner, err := testutil.FindFile("runsc/specutils/safemount_test/safemount_runner") + if err != nil { + t.Fatalf("failed to find test runner binary: %v", err) + } + cmd := exec.Command(runner, t.TempDir()) + cmd.SysProcAttr = &unix.SysProcAttr{ + Cloneflags: unix.CLONE_NEWNS | unix.CLONE_NEWUSER, + UidMappings: []syscall.SysProcIDMap{ + {ContainerID: 0, HostID: os.Getuid(), Size: 1}, + }, + GidMappings: []syscall.SysProcIDMap{ + {ContainerID: 0, HostID: os.Getgid(), Size: 1}, + }, + GidMappingsEnableSetgroups: false, + Credential: &syscall.Credential{ + Uid: 0, + Gid: 0, + }, + } + output, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("failed running %s with error: %v\ntest output:\n%s", cmd, err, output) + } +} diff --git a/runsc/specutils/seccomp/audit_amd64.go b/runsc/specutils/seccomp/audit_amd64.go index 417cf4a7a..5ef3edaea 100644 --- a/runsc/specutils/seccomp/audit_amd64.go +++ b/runsc/specutils/seccomp/audit_amd64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build amd64 // +build amd64 package seccomp diff --git a/runsc/specutils/seccomp/audit_arm64.go b/runsc/specutils/seccomp/audit_arm64.go index b727ceff2..6253cba61 100644 --- a/runsc/specutils/seccomp/audit_arm64.go +++ b/runsc/specutils/seccomp/audit_arm64.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build arm64 // +build arm64 package seccomp diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index c228d6299..5365b5b1b 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -217,7 +217,7 @@ func ReadMounts(f *os.File) ([]specs.Mount, error) { } var mounts []specs.Mount if err := json.Unmarshal(bytes, &mounts); err != nil { - return nil, fmt.Errorf("error unmarshaling mounts: %v\n %s", err, string(bytes)) + return nil, fmt.Errorf("error unmarshaling mounts: %v\nJSON bytes:\n%s", err, string(bytes)) } return mounts, nil } @@ -434,10 +434,12 @@ func DebugLogFile(logPattern, command, test string) (*os.File, error) { return os.OpenFile(logPattern, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664) } -// Mount creates the mount point and calls Mount with the given flags. -func Mount(src, dst, typ string, flags uint32) error { - // Create the mount point inside. The type must be the same as the - // source (file or directory). +// SafeSetupAndMount creates the mount point and calls Mount with the given +// flags. procPath is the path to procfs. If it is "", procfs is assumed to be +// mounted at /proc. +func SafeSetupAndMount(src, dst, typ string, flags uint32, procPath string) error { + // Create the mount point inside. The type must be the same as the source + // (file or directory). var isDir bool if typ == "proc" { // Special case, as there is no source directory for proc mounts. @@ -468,12 +470,50 @@ func Mount(src, dst, typ string, flags uint32) error { } // Do the mount. - if err := unix.Mount(src, dst, typ, uintptr(flags), ""); err != nil { + if err := SafeMount(src, dst, typ, uintptr(flags), "", procPath); err != nil { return fmt.Errorf("mount(%q, %q, %d) failed: %v", src, dst, flags, err) } return nil } +// ErrSymlinkMount is returned by SafeMount when the mount destination is found +// to be a symlink. +type ErrSymlinkMount struct { + error +} + +// SafeMount is like unix.Mount, but will fail if dst is a symlink. procPath is +// the path to procfs. If it is "", procfs is assumed to be mounted at /proc. +// +// SafeMount can fail when dst contains a symlink. However, it is called in the +// normal case with a destination consisting of a known root (/proc/root) and +// symlink-free path (from resolveSymlink). +func SafeMount(src, dst, fstype string, flags uintptr, data, procPath string) error { + // Open the destination. + fd, err := unix.Open(dst, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + return fmt.Errorf("failed to safely mount: Open(%s, _, _): %w", dst, err) + } + defer unix.Close(fd) + + // Use /proc/self/fd/ to verify that we opened the intended destination. This + // guards against dst being a symlink, in which case we could accidentally + // mount over the symlink's target. + if procPath == "" { + procPath = "/proc" + } + safePath := filepath.Join(procPath, "self/fd", strconv.Itoa(fd)) + target, err := os.Readlink(safePath) + if err != nil { + return fmt.Errorf("failed to safely mount: Readlink(%s): %w", safePath, err) + } + if dst != target { + return &ErrSymlinkMount{fmt.Errorf("failed to safely mount: expected to open %s, but found %s", dst, target)} + } + + return unix.Mount(src, safePath, fstype, flags, data) +} + // ContainsStr returns true if 'str' is inside 'strs'. func ContainsStr(strs []string, str string) bool { for _, s := range strs { diff --git a/runsc/specutils/specutils_test.go b/runsc/specutils/specutils_test.go index 2c86fffe8..e2d3a75dc 100644 --- a/runsc/specutils/specutils_test.go +++ b/runsc/specutils/specutils_test.go @@ -29,7 +29,7 @@ func TestWaitForReadyHappy(t *testing.T) { if err := cmd.Start(); err != nil { t.Fatalf("cmd.Start() failed, err: %v", err) } - defer cmd.Wait() + defer func() { _ = cmd.Wait() }() var count int err := WaitForReady(cmd.Process.Pid, 5*time.Second, func() (bool, error) { @@ -42,7 +42,9 @@ func TestWaitForReadyHappy(t *testing.T) { if err != nil { t.Errorf("ProcessWaitReady got: %v, expected: nil", err) } - cmd.Process.Kill() + if err := cmd.Process.Kill(); err != nil { + t.Errorf("cmd.ProcessKill(): %v", err) + } } func TestWaitForReadyFail(t *testing.T) { @@ -50,7 +52,7 @@ func TestWaitForReadyFail(t *testing.T) { if err := cmd.Start(); err != nil { t.Fatalf("cmd.Start() failed, err: %v", err) } - defer cmd.Wait() + defer func() { _ = cmd.Wait() }() var count int err := WaitForReady(cmd.Process.Pid, 5*time.Second, func() (bool, error) { @@ -58,12 +60,14 @@ func TestWaitForReadyFail(t *testing.T) { count++ return false, nil } - return false, fmt.Errorf("Fake error") + return false, fmt.Errorf("fake error") }) if err == nil { t.Errorf("ProcessWaitReady got: nil, expected: error") } - cmd.Process.Kill() + if err := cmd.Process.Kill(); err != nil { + t.Errorf("cmd.ProcessKill(): %v", err) + } } func TestWaitForReadyNotRunning(t *testing.T) { @@ -71,7 +75,7 @@ func TestWaitForReadyNotRunning(t *testing.T) { if err := cmd.Start(); err != nil { t.Fatalf("cmd.Start() failed, err: %v", err) } - defer cmd.Wait() + defer func() { _ = cmd.Wait() }() err := WaitForReady(cmd.Process.Pid, 5*time.Second, func() (bool, error) { return false, nil @@ -89,15 +93,17 @@ func TestWaitForReadyTimeout(t *testing.T) { if err := cmd.Start(); err != nil { t.Fatalf("cmd.Start() failed, err: %v", err) } - defer cmd.Wait() + defer func() { _ = cmd.Wait() }() err := WaitForReady(cmd.Process.Pid, 50*time.Millisecond, func() (bool, error) { return false, nil }) - if !strings.Contains(err.Error(), "not running yet") { + if err == nil || !strings.Contains(err.Error(), "not running yet") { t.Errorf("ProcessWaitReady got: %v, expected: not running yet", err) } - cmd.Process.Kill() + if err := cmd.Process.Kill(); err != nil { + t.Errorf("cmd.ProcessKill(): %v", err) + } } func TestSpecInvalid(t *testing.T) { diff --git a/runsc/version.go b/runsc/version.go index ab9194b9d..c250f4a2a 100644 --- a/runsc/version.go +++ b/runsc/version.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.1 +// +build go1.1 + package main // version is set during linking. |