summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorchris.zn <chris.zn@antfin.com>2019-04-30 23:35:36 +0800
committerchris.zn <chris.zn@antfin.com>2019-07-24 13:38:23 +0800
commit1c5b6d9bd26ba090610d05366df90d4fee91c677 (patch)
tree964de3a578b4100c5a445ba798cd24f20873c6de
parent7e38d643334647fb79c7cc8be35745699de264e6 (diff)
Use different pidns among different containers
The different containers in a sandbox used only one pid namespace before. This results in that a container can see the processes in another container in the same sandbox. This patch use different pid namespace for different containers. Signed-off-by: chris.zn <chris.zn@antfin.com>
-rw-r--r--pkg/sentry/control/proc.go8
-rw-r--r--pkg/sentry/kernel/kernel.go14
-rw-r--r--runsc/boot/fs.go1
-rw-r--r--runsc/boot/loader.go32
-rw-r--r--runsc/container/container_test.go10
-rw-r--r--runsc/container/multi_container_test.go98
6 files changed, 154 insertions, 9 deletions
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go
index 60e6c9285..3f9772b87 100644
--- a/pkg/sentry/control/proc.go
+++ b/pkg/sentry/control/proc.go
@@ -92,6 +92,9 @@ type ExecArgs struct {
// ContainerID is the container for the process being executed.
ContainerID string
+
+ // PIDNamespace is the pid namespace for the process being executed.
+ PIDNamespace *kernel.PIDNamespace
}
// String prints the arguments as a string.
@@ -162,6 +165,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
IPCNamespace: proc.Kernel.RootIPCNamespace(),
AbstractSocketNamespace: proc.Kernel.RootAbstractSocketNamespace(),
ContainerID: args.ContainerID,
+ PIDNamespace: args.PIDNamespace,
}
if initArgs.Root != nil {
// initArgs must hold a reference on Root, which will be
@@ -341,7 +345,7 @@ func Processes(k *kernel.Kernel, containerID string, out *[]*Process) error {
ts := k.TaskSet()
now := k.RealtimeClock().Now()
for _, tg := range ts.Root.ThreadGroups() {
- pid := ts.Root.IDOfThreadGroup(tg)
+ pid := tg.PIDNamespace().IDOfThreadGroup(tg)
// If tg has already been reaped ignore it.
if pid == 0 {
continue
@@ -352,7 +356,7 @@ func Processes(k *kernel.Kernel, containerID string, out *[]*Process) error {
ppid := kernel.ThreadID(0)
if p := tg.Leader().Parent(); p != nil {
- ppid = ts.Root.IDOfThreadGroup(p.ThreadGroup())
+ ppid = p.PIDNamespace().IDOfThreadGroup(p.ThreadGroup())
}
*out = append(*out, &Process{
UID: tg.Leader().Credentials().EffectiveKUID,
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 38b49cba2..70f5a3f0b 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -622,6 +622,9 @@ type CreateProcessArgs struct {
// IPCNamespace is the initial IPC namespace.
IPCNamespace *IPCNamespace
+ // PIDNamespace is the initial PID Namespace.
+ PIDNamespace *PIDNamespace
+
// AbstractSocketNamespace is the initial Abstract Socket namespace.
AbstractSocketNamespace *AbstractSocketNamespace
@@ -668,9 +671,7 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} {
case CtxKernel:
return ctx.k
case CtxPIDNamespace:
- // "The new task ... is in the root PID namespace." -
- // Kernel.CreateProcess
- return ctx.k.tasks.Root
+ return ctx.args.PIDNamespace
case CtxUTSNamespace:
return ctx.args.UTSNamespace
case CtxIPCNamespace:
@@ -745,7 +746,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
mounts.IncRef()
}
- tg := k.newThreadGroup(mounts, k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock)
+ tg := k.newThreadGroup(mounts, args.PIDNamespace, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock)
ctx := args.NewContext(k)
// Grab the root directory.
@@ -1018,6 +1019,11 @@ func (k *Kernel) RootIPCNamespace() *IPCNamespace {
return k.rootIPCNamespace
}
+// RootPIDNamespace returns the root PIDNamespace.
+func (k *Kernel) RootPIDNamespace() *PIDNamespace {
+ return k.tasks.Root
+}
+
// RootAbstractSocketNamespace returns the root AbstractSocketNamespace.
func (k *Kernel) RootAbstractSocketNamespace() *AbstractSocketNamespace {
return k.rootAbstractSocketNamespace
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 55bfc27ff..4bff0d034 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -505,6 +505,7 @@ func (c *containerMounter) setupFS(ctx context.Context, conf *Config, procArgs *
Credentials: auth.NewRootCredentials(creds.UserNamespace),
Umask: 0022,
MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
+ PIDNamespace: procArgs.PIDNamespace,
}
rootCtx := rootProcArgs.NewContext(c.k)
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index a8adaf292..b91553c4c 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -138,6 +138,9 @@ type execProcess struct {
// tty will be nil if the process is not attached to a terminal.
tty *host.TTYFileOperations
+
+ // pidnsPath is the pid namespace path in spec
+ pidnsPath string
}
func init() {
@@ -298,7 +301,7 @@ func New(args Args) (*Loader, error) {
// Create a watchdog.
dog := watchdog.New(k, watchdog.DefaultTimeout, args.Conf.WatchdogAction)
- procArgs, err := newProcess(args.ID, args.Spec, creds, k)
+ procArgs, err := newProcess(args.ID, args.Spec, creds, k, k.RootPIDNamespace())
if err != nil {
return nil, fmt.Errorf("creating init process for root container: %v", err)
}
@@ -376,7 +379,7 @@ func New(args Args) (*Loader, error) {
}
// newProcess creates a process that can be run with kernel.CreateProcess.
-func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.Kernel) (kernel.CreateProcessArgs, error) {
+func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.Kernel, pidns *kernel.PIDNamespace) (kernel.CreateProcessArgs, error) {
// Create initial limits.
ls, err := createLimitSet(spec)
if err != nil {
@@ -396,7 +399,9 @@ func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.
IPCNamespace: k.RootIPCNamespace(),
AbstractSocketNamespace: k.RootAbstractSocketNamespace(),
ContainerID: id,
+ PIDNamespace: pidns,
}
+
return procArgs, nil
}
@@ -559,6 +564,9 @@ func (l *Loader) run() error {
}
ep.tg = l.k.GlobalInit()
+ if ns, ok := specutils.GetNS(specs.PIDNamespace, l.spec); ok {
+ ep.pidnsPath = ns.Path
+ }
if l.console {
ttyFile, _ := l.rootProcArgs.FDTable.Get(0)
defer ttyFile.DecRef()
@@ -627,7 +635,24 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
caps,
l.k.RootUserNamespace())
- procArgs, err := newProcess(cid, spec, creds, l.k)
+ var pidns *kernel.PIDNamespace
+ if ns, ok := specutils.GetNS(specs.PIDNamespace, spec); ok {
+ if ns.Path != "" {
+ for _, p := range l.processes {
+ if ns.Path == p.pidnsPath {
+ pidns = p.tg.PIDNamespace()
+ break
+ }
+ }
+ }
+ if pidns == nil {
+ pidns = l.k.RootPIDNamespace().NewChild(l.k.RootUserNamespace())
+ }
+ l.processes[eid].pidnsPath = ns.Path
+ } else {
+ pidns = l.k.RootPIDNamespace()
+ }
+ procArgs, err := newProcess(cid, spec, creds, l.k, pidns)
if err != nil {
return fmt.Errorf("creating new process: %v", err)
}
@@ -749,6 +774,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
// Start the process.
proc := control.Proc{Kernel: l.k}
+ args.PIDNamespace = tg.PIDNamespace()
newTG, tgid, ttyFile, err := control.ExecAsync(&proc, args)
if err != nil {
return 0, err
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index c1d6ca7b8..ff68c586e 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -714,6 +714,16 @@ func TestKillPid(t *testing.T) {
if err := waitForProcessCount(cont, nProcs-1); err != nil {
t.Fatal(err)
}
+
+ procs, err = cont.Processes()
+ if err != nil {
+ t.Fatalf("failed to get process list: %v", err)
+ }
+ for _, p := range procs {
+ if pid == int32(p.PID) {
+ t.Fatalf("pid %d is still alive, which should be killed", pid)
+ }
+ }
}
}
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index e299a0e88..978a422f5 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -165,6 +165,104 @@ func TestMultiContainerSanity(t *testing.T) {
}
}
+// TestMultiPIDNS checks that it is possible to run 2 dead-simple
+// containers in the same sandbox with different pidns.
+func TestMultiPIDNS(t *testing.T) {
+ for _, conf := range configs(all...) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ testSpecs, ids := createSpecs(sleep, sleep)
+ testSpecs[1].Linux = &specs.Linux{
+ Namespaces: []specs.LinuxNamespace{
+ {
+ Type: "pid",
+ },
+ },
+ }
+
+ containers, cleanup, err := startContainers(conf, testSpecs, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
+
+ // Check via ps that multiple processes are running.
+ expectedPL := []*control.Process{
+ {PID: 1, Cmd: "sleep"},
+ }
+ if err := waitForProcessList(containers[0], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
+ expectedPL = []*control.Process{
+ {PID: 1, Cmd: "sleep"},
+ }
+ if err := waitForProcessList(containers[1], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
+ }
+}
+
+// TestMultiPIDNSPath checks the pidns path.
+func TestMultiPIDNSPath(t *testing.T) {
+ for _, conf := range configs(all...) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ testSpecs, ids := createSpecs(sleep, sleep, sleep)
+ testSpecs[0].Linux = &specs.Linux{
+ Namespaces: []specs.LinuxNamespace{
+ {
+ Type: "pid",
+ Path: "/proc/1/ns/pid",
+ },
+ },
+ }
+ testSpecs[1].Linux = &specs.Linux{
+ Namespaces: []specs.LinuxNamespace{
+ {
+ Type: "pid",
+ Path: "/proc/1/ns/pid",
+ },
+ },
+ }
+ testSpecs[2].Linux = &specs.Linux{
+ Namespaces: []specs.LinuxNamespace{
+ {
+ Type: "pid",
+ Path: "/proc/2/ns/pid",
+ },
+ },
+ }
+
+ containers, cleanup, err := startContainers(conf, testSpecs, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
+
+ // Check via ps that multiple processes are running.
+ expectedPL := []*control.Process{
+ {PID: 1, Cmd: "sleep"},
+ }
+ if err := waitForProcessList(containers[0], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
+ if err := waitForProcessList(containers[2], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
+
+ expectedPL = []*control.Process{
+ {PID: 2, Cmd: "sleep"},
+ }
+ if err := waitForProcessList(containers[1], expectedPL); err != nil {
+ t.Errorf("failed to wait for sleep to start: %v", err)
+ }
+ }
+}
+
func TestMultiContainerWait(t *testing.T) {
// The first container should run the entire duration of the test.
cmd1 := []string{"sleep", "100"}