diff options
-rw-r--r-- | pkg/sentry/control/proc.go | 8 | ||||
-rw-r--r-- | pkg/sentry/kernel/kernel.go | 19 | ||||
-rw-r--r-- | pkg/sentry/kernel/threads.go | 12 | ||||
-rw-r--r-- | runsc/boot/fs.go | 1 | ||||
-rw-r--r-- | runsc/boot/loader.go | 33 | ||||
-rw-r--r-- | runsc/container/container_test.go | 10 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 98 |
7 files changed, 168 insertions, 13 deletions
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index 60e6c9285..3f9772b87 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -92,6 +92,9 @@ type ExecArgs struct { // ContainerID is the container for the process being executed. ContainerID string + + // PIDNamespace is the pid namespace for the process being executed. + PIDNamespace *kernel.PIDNamespace } // String prints the arguments as a string. @@ -162,6 +165,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI IPCNamespace: proc.Kernel.RootIPCNamespace(), AbstractSocketNamespace: proc.Kernel.RootAbstractSocketNamespace(), ContainerID: args.ContainerID, + PIDNamespace: args.PIDNamespace, } if initArgs.Root != nil { // initArgs must hold a reference on Root, which will be @@ -341,7 +345,7 @@ func Processes(k *kernel.Kernel, containerID string, out *[]*Process) error { ts := k.TaskSet() now := k.RealtimeClock().Now() for _, tg := range ts.Root.ThreadGroups() { - pid := ts.Root.IDOfThreadGroup(tg) + pid := tg.PIDNamespace().IDOfThreadGroup(tg) // If tg has already been reaped ignore it. if pid == 0 { continue @@ -352,7 +356,7 @@ func Processes(k *kernel.Kernel, containerID string, out *[]*Process) error { ppid := kernel.ThreadID(0) if p := tg.Leader().Parent(); p != nil { - ppid = ts.Root.IDOfThreadGroup(p.ThreadGroup()) + ppid = p.PIDNamespace().IDOfThreadGroup(p.ThreadGroup()) } *out = append(*out, &Process{ UID: tg.Leader().Credentials().EffectiveKUID, diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 38b49cba2..4c2d48e65 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -240,6 +240,9 @@ type InitKernelArgs struct { // RootAbstractSocketNamespace is the root Abstract Socket namespace. RootAbstractSocketNamespace *AbstractSocketNamespace + + // PIDNamespace is the root PID namespace. + PIDNamespace *PIDNamespace } // Init initialize the Kernel with no tasks. @@ -262,7 +265,7 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.featureSet = args.FeatureSet k.timekeeper = args.Timekeeper - k.tasks = newTaskSet() + k.tasks = newTaskSet(args.PIDNamespace) k.rootUserNamespace = args.RootUserNamespace k.rootUTSNamespace = args.RootUTSNamespace k.rootIPCNamespace = args.RootIPCNamespace @@ -622,6 +625,9 @@ type CreateProcessArgs struct { // IPCNamespace is the initial IPC namespace. IPCNamespace *IPCNamespace + // PIDNamespace is the initial PID Namespace. + PIDNamespace *PIDNamespace + // AbstractSocketNamespace is the initial Abstract Socket namespace. AbstractSocketNamespace *AbstractSocketNamespace @@ -668,9 +674,7 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} { case CtxKernel: return ctx.k case CtxPIDNamespace: - // "The new task ... is in the root PID namespace." - - // Kernel.CreateProcess - return ctx.k.tasks.Root + return ctx.args.PIDNamespace case CtxUTSNamespace: return ctx.args.UTSNamespace case CtxIPCNamespace: @@ -745,7 +749,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, mounts.IncRef() } - tg := k.newThreadGroup(mounts, k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock) + tg := k.newThreadGroup(mounts, args.PIDNamespace, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock) ctx := args.NewContext(k) // Grab the root directory. @@ -1018,6 +1022,11 @@ func (k *Kernel) RootIPCNamespace() *IPCNamespace { return k.rootIPCNamespace } +// RootPIDNamespace returns the root PIDNamespace. +func (k *Kernel) RootPIDNamespace() *PIDNamespace { + return k.tasks.Root +} + // RootAbstractSocketNamespace returns the root AbstractSocketNamespace. func (k *Kernel) RootAbstractSocketNamespace() *AbstractSocketNamespace { return k.rootAbstractSocketNamespace diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go index b21b182fc..8267929a6 100644 --- a/pkg/sentry/kernel/threads.go +++ b/pkg/sentry/kernel/threads.go @@ -90,9 +90,9 @@ type TaskSet struct { } // newTaskSet returns a new, empty TaskSet. -func newTaskSet() *TaskSet { - ts := &TaskSet{} - ts.Root = newPIDNamespace(ts, nil /* parent */, auth.NewRootUserNamespace()) +func newTaskSet(pidns *PIDNamespace) *TaskSet { + ts := &TaskSet{Root: pidns} + pidns.owner = ts return ts } @@ -186,6 +186,12 @@ func newPIDNamespace(ts *TaskSet, parent *PIDNamespace, userns *auth.UserNamespa } } +// NewRootPIDNamespace creates the root PID namespace. 'owner' is not available +// yet when root namespace is created and must be set by caller. +func NewRootPIDNamespace(userns *auth.UserNamespace) *PIDNamespace { + return newPIDNamespace(nil, nil, userns) +} + // NewChild returns a new, empty PID namespace that is a child of ns. Authority // over the new PID namespace is controlled by userns. func (ns *PIDNamespace) NewChild(userns *auth.UserNamespace) *PIDNamespace { diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index aaad0121b..7e95e1f41 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -518,6 +518,7 @@ func (c *containerMounter) setupFS(ctx context.Context, conf *Config, procArgs * Credentials: auth.NewRootCredentials(creds.UserNamespace), Umask: 0022, MaxSymlinkTraversals: linux.MaxSymlinkTraversals, + PIDNamespace: procArgs.PIDNamespace, } rootCtx := rootProcArgs.NewContext(c.k) diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index a8adaf292..50cac0433 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -138,6 +138,9 @@ type execProcess struct { // tty will be nil if the process is not attached to a terminal. tty *host.TTYFileOperations + + // pidnsPath is the pid namespace path in spec + pidnsPath string } func init() { @@ -278,6 +281,7 @@ func New(args Args) (*Loader, error) { RootUTSNamespace: kernel.NewUTSNamespace(args.Spec.Hostname, args.Spec.Hostname, creds.UserNamespace), RootIPCNamespace: kernel.NewIPCNamespace(creds.UserNamespace), RootAbstractSocketNamespace: kernel.NewAbstractSocketNamespace(), + PIDNamespace: kernel.NewRootPIDNamespace(creds.UserNamespace), }); err != nil { return nil, fmt.Errorf("initializing kernel: %v", err) } @@ -298,7 +302,7 @@ func New(args Args) (*Loader, error) { // Create a watchdog. dog := watchdog.New(k, watchdog.DefaultTimeout, args.Conf.WatchdogAction) - procArgs, err := newProcess(args.ID, args.Spec, creds, k) + procArgs, err := newProcess(args.ID, args.Spec, creds, k, k.RootPIDNamespace()) if err != nil { return nil, fmt.Errorf("creating init process for root container: %v", err) } @@ -376,7 +380,7 @@ func New(args Args) (*Loader, error) { } // newProcess creates a process that can be run with kernel.CreateProcess. -func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.Kernel) (kernel.CreateProcessArgs, error) { +func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.Kernel, pidns *kernel.PIDNamespace) (kernel.CreateProcessArgs, error) { // Create initial limits. ls, err := createLimitSet(spec) if err != nil { @@ -396,7 +400,9 @@ func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel. IPCNamespace: k.RootIPCNamespace(), AbstractSocketNamespace: k.RootAbstractSocketNamespace(), ContainerID: id, + PIDNamespace: pidns, } + return procArgs, nil } @@ -559,6 +565,9 @@ func (l *Loader) run() error { } ep.tg = l.k.GlobalInit() + if ns, ok := specutils.GetNS(specs.PIDNamespace, l.spec); ok { + ep.pidnsPath = ns.Path + } if l.console { ttyFile, _ := l.rootProcArgs.FDTable.Get(0) defer ttyFile.DecRef() @@ -627,7 +636,24 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file caps, l.k.RootUserNamespace()) - procArgs, err := newProcess(cid, spec, creds, l.k) + var pidns *kernel.PIDNamespace + if ns, ok := specutils.GetNS(specs.PIDNamespace, spec); ok { + if ns.Path != "" { + for _, p := range l.processes { + if ns.Path == p.pidnsPath { + pidns = p.tg.PIDNamespace() + break + } + } + } + if pidns == nil { + pidns = l.k.RootPIDNamespace().NewChild(l.k.RootUserNamespace()) + } + l.processes[eid].pidnsPath = ns.Path + } else { + pidns = l.k.RootPIDNamespace() + } + procArgs, err := newProcess(cid, spec, creds, l.k, pidns) if err != nil { return fmt.Errorf("creating new process: %v", err) } @@ -749,6 +775,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { // Start the process. proc := control.Proc{Kernel: l.k} + args.PIDNamespace = tg.PIDNamespace() newTG, tgid, ttyFile, err := control.ExecAsync(&proc, args) if err != nil { return 0, err diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index c1d6ca7b8..ff68c586e 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -714,6 +714,16 @@ func TestKillPid(t *testing.T) { if err := waitForProcessCount(cont, nProcs-1); err != nil { t.Fatal(err) } + + procs, err = cont.Processes() + if err != nil { + t.Fatalf("failed to get process list: %v", err) + } + for _, p := range procs { + if pid == int32(p.PID) { + t.Fatalf("pid %d is still alive, which should be killed", pid) + } + } } } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index e299a0e88..978a422f5 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -165,6 +165,104 @@ func TestMultiContainerSanity(t *testing.T) { } } +// TestMultiPIDNS checks that it is possible to run 2 dead-simple +// containers in the same sandbox with different pidns. +func TestMultiPIDNS(t *testing.T) { + for _, conf := range configs(all...) { + t.Logf("Running test with conf: %+v", conf) + + // Setup the containers. + sleep := []string{"sleep", "100"} + testSpecs, ids := createSpecs(sleep, sleep) + testSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + }, + }, + } + + containers, cleanup, err := startContainers(conf, testSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep"}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + expectedPL = []*control.Process{ + {PID: 1, Cmd: "sleep"}, + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + } +} + +// TestMultiPIDNSPath checks the pidns path. +func TestMultiPIDNSPath(t *testing.T) { + for _, conf := range configs(all...) { + t.Logf("Running test with conf: %+v", conf) + + // Setup the containers. + sleep := []string{"sleep", "100"} + testSpecs, ids := createSpecs(sleep, sleep, sleep) + testSpecs[0].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/1/ns/pid", + }, + }, + } + testSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/1/ns/pid", + }, + }, + } + testSpecs[2].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/2/ns/pid", + }, + }, + } + + containers, cleanup, err := startContainers(conf, testSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep"}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + if err := waitForProcessList(containers[2], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + + expectedPL = []*control.Process{ + {PID: 2, Cmd: "sleep"}, + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + } +} + func TestMultiContainerWait(t *testing.T) { // The first container should run the entire duration of the test. cmd1 := []string{"sleep", "100"} |