diff options
Diffstat (limited to 'vendor/github.com/opencontainers/runc')
24 files changed, 0 insertions, 3715 deletions
diff --git a/vendor/github.com/opencontainers/runc/LICENSE b/vendor/github.com/opencontainers/runc/LICENSE deleted file mode 100644 index 27448585a..000000000 --- a/vendor/github.com/opencontainers/runc/LICENSE +++ /dev/null @@ -1,191 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - Copyright 2014 Docker, Inc. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/vendor/github.com/opencontainers/runc/NOTICE b/vendor/github.com/opencontainers/runc/NOTICE deleted file mode 100644 index 5c97abce4..000000000 --- a/vendor/github.com/opencontainers/runc/NOTICE +++ /dev/null @@ -1,17 +0,0 @@ -runc - -Copyright 2012-2015 Docker, Inc. - -This product includes software developed at Docker, Inc. (http://www.docker.com). - -The following is courtesy of our legal counsel: - - -Use and transfer of Docker may be subject to certain restrictions by the -United States and other governments. -It is your responsibility to ensure that your use and/or transfer does not -violate applicable laws. - -For more information, please see http://www.bis.doc.gov - -See also http://www.apache.org/dev/crypto.html and/or seek legal counsel. diff --git a/vendor/github.com/opencontainers/runc/README.md b/vendor/github.com/opencontainers/runc/README.md deleted file mode 100644 index 11fa4138b..000000000 --- a/vendor/github.com/opencontainers/runc/README.md +++ /dev/null @@ -1,269 +0,0 @@ -# runc - -[![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc) -[![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc) -[![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc) - -## Introduction - -`runc` is a CLI tool for spawning and running containers according to the OCI specification. - -## Releases - -`runc` depends on and tracks the [runtime-spec](https://github.com/opencontainers/runtime-spec) repository. -We will try to make sure that `runc` and the OCI specification major versions stay in lockstep. -This means that `runc` 1.0.0 should implement the 1.0 version of the specification. - -You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page. - -## Security - -Reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/) - -## Building - -`runc` currently supports the Linux platform with various architecture support. -It must be built with Go version 1.6 or higher in order for some features to function properly. - -In order to enable seccomp support you will need to install `libseccomp` on your platform. -> e.g. `libseccomp-devel` for CentOS, or `libseccomp-dev` for Ubuntu - -Otherwise, if you do not want to build `runc` with seccomp support you can add `BUILDTAGS=""` when running make. - -```bash -# create a 'github.com/opencontainers' in your GOPATH/src -cd github.com/opencontainers -git clone https://github.com/opencontainers/runc -cd runc - -make -sudo make install -``` - -You can also use `go get` to install to your `GOPATH`, assuming that you have a `github.com` parent folder already created under `src`: - -```bash -go get github.com/opencontainers/runc -cd $GOPATH/src/github.com/opencontainers/runc -make -sudo make install -``` - -`runc` will be installed to `/usr/local/sbin/runc` on your system. - - -#### Build Tags - -`runc` supports optional build tags for compiling support of various features. -To add build tags to the make option the `BUILDTAGS` variable must be set. - -```bash -make BUILDTAGS='seccomp apparmor' -``` - -| Build Tag | Feature | Dependency | -|-----------|------------------------------------|-------------| -| seccomp | Syscall filtering | libseccomp | -| selinux | selinux process and mount labeling | <none> | -| apparmor | apparmor profile support | <none> | -| ambient | ambient capability support | kernel 4.3 | -| nokmem | disable kernel memory account | <none> | - - -### Running the test suite - -`runc` currently supports running its test suite via Docker. -To run the suite just type `make test`. - -```bash -make test -``` - -There are additional make targets for running the tests outside of a container but this is not recommended as the tests are written with the expectation that they can write and remove anywhere. - -You can run a specific test case by setting the `TESTFLAGS` variable. - -```bash -# make test TESTFLAGS="-run=SomeTestFunction" -``` - -You can run a specific integration test by setting the `TESTPATH` variable. - -```bash -# make test TESTPATH="/checkpoint.bats" -``` - -You can run a test in your proxy environment by setting `DOCKER_BUILD_PROXY` and `DOCKER_RUN_PROXY` variables. - -```bash -# make test DOCKER_BUILD_PROXY="--build-arg HTTP_PROXY=http://yourproxy/" DOCKER_RUN_PROXY="-e HTTP_PROXY=http://yourproxy/" -``` - -### Dependencies Management - -`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management. -Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update -new dependencies. - -## Using runc - -### Creating an OCI Bundle - -In order to use runc you must have your container in the format of an OCI bundle. -If you have Docker installed you can use its `export` method to acquire a root filesystem from an existing Docker container. - -```bash -# create the top most bundle directory -mkdir /mycontainer -cd /mycontainer - -# create the rootfs directory -mkdir rootfs - -# export busybox via Docker into the rootfs directory -docker export $(docker create busybox) | tar -C rootfs -xvf - -``` - -After a root filesystem is populated you just generate a spec in the format of a `config.json` file inside your bundle. -`runc` provides a `spec` command to generate a base template spec that you are then able to edit. -To find features and documentation for fields in the spec please refer to the [specs](https://github.com/opencontainers/runtime-spec) repository. - -```bash -runc spec -``` - -### Running Containers - -Assuming you have an OCI bundle from the previous step you can execute the container in two different ways. - -The first way is to use the convenience command `run` that will handle creating, starting, and deleting the container after it exits. - -```bash -# run as root -cd /mycontainer -runc run mycontainerid -``` - -If you used the unmodified `runc spec` template this should give you a `sh` session inside the container. - -The second way to start a container is using the specs lifecycle operations. -This gives you more power over how the container is created and managed while it is running. -This will also launch the container in the background so you will have to edit the `config.json` to remove the `terminal` setting for the simple examples here. -Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`. - - -```json - "process": { - "terminal": false, - "user": { - "uid": 0, - "gid": 0 - }, - "args": [ - "sleep", "5" - ], - "env": [ - "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", - "TERM=xterm" - ], - "cwd": "/", - "capabilities": { - "bounding": [ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE" - ], - "effective": [ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE" - ], - "inheritable": [ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE" - ], - "permitted": [ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE" - ], - "ambient": [ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE" - ] - }, - "rlimits": [ - { - "type": "RLIMIT_NOFILE", - "hard": 1024, - "soft": 1024 - } - ], - "noNewPrivileges": true - }, -``` - -Now we can go through the lifecycle operations in your shell. - - -```bash -# run as root -cd /mycontainer -runc create mycontainerid - -# view the container is created and in the "created" state -runc list - -# start the process inside the container -runc start mycontainerid - -# after 5 seconds view that the container has exited and is now in the stopped state -runc list - -# now delete the container -runc delete mycontainerid -``` - -This allows higher level systems to augment the containers creation logic with setup of various settings after the container is created and/or before it is deleted. For example, the container's network stack is commonly set up after `create` but before `start`. - -#### Rootless containers -`runc` has the ability to run containers without root privileges. This is called `rootless`. You need to pass some parameters to `runc` in order to run rootless containers. See below and compare with the previous version. Run the following commands as an ordinary user: -```bash -# Same as the first example -mkdir ~/mycontainer -cd ~/mycontainer -mkdir rootfs -docker export $(docker create busybox) | tar -C rootfs -xvf - - -# The --rootless parameter instructs runc spec to generate a configuration for a rootless container, which will allow you to run the container as a non-root user. -runc spec --rootless - -# The --root parameter tells runc where to store the container state. It must be writable by the user. -runc --root /tmp/runc run mycontainerid -``` - -#### Supervisors - -`runc` can be used with process supervisors and init systems to ensure that containers are restarted when they exit. -An example systemd unit file looks something like this. - -```systemd -[Unit] -Description=Start My Container - -[Service] -Type=forking -ExecStart=/usr/local/sbin/runc run -d --pid-file /run/mycontainerid.pid mycontainerid -ExecStopPost=/usr/local/sbin/runc delete mycontainerid -WorkingDirectory=/mycontainer -PIDFile=/run/mycontainerid.pid - -[Install] -WantedBy=multi-user.target -``` - -## License - -The code and docs are released under the [Apache 2.0 license](LICENSE). diff --git a/vendor/github.com/opencontainers/runc/libcontainer/README.md b/vendor/github.com/opencontainers/runc/libcontainer/README.md deleted file mode 100644 index 1d7fa04c0..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/README.md +++ /dev/null @@ -1,330 +0,0 @@ -# libcontainer - -[![GoDoc](https://godoc.org/github.com/opencontainers/runc/libcontainer?status.svg)](https://godoc.org/github.com/opencontainers/runc/libcontainer) - -Libcontainer provides a native Go implementation for creating containers -with namespaces, cgroups, capabilities, and filesystem access controls. -It allows you to manage the lifecycle of the container performing additional operations -after the container is created. - - -#### Container -A container is a self contained execution environment that shares the kernel of the -host system and which is (optionally) isolated from other containers in the system. - -#### Using libcontainer - -Because containers are spawned in a two step process you will need a binary that -will be executed as the init process for the container. In libcontainer, we use -the current binary (/proc/self/exe) to be executed as the init process, and use -arg "init", we call the first step process "bootstrap", so you always need a "init" -function as the entry of "bootstrap". - -In addition to the go init function the early stage bootstrap is handled by importing -[nsenter](https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/README.md). - -```go -import ( - _ "github.com/opencontainers/runc/libcontainer/nsenter" -) - -func init() { - if len(os.Args) > 1 && os.Args[1] == "init" { - runtime.GOMAXPROCS(1) - runtime.LockOSThread() - factory, _ := libcontainer.New("") - if err := factory.StartInitialization(); err != nil { - logrus.Fatal(err) - } - panic("--this line should have never been executed, congratulations--") - } -} -``` - -Then to create a container you first have to initialize an instance of a factory -that will handle the creation and initialization for a container. - -```go -factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init")) -if err != nil { - logrus.Fatal(err) - return -} -``` - -Once you have an instance of the factory created we can create a configuration -struct describing how the container is to be created. A sample would look similar to this: - -```go -defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV -config := &configs.Config{ - Rootfs: "/your/path/to/rootfs", - Capabilities: &configs.Capabilities{ - Bounding: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - Effective: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - Inheritable: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - Permitted: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - Ambient: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - }, - Namespaces: configs.Namespaces([]configs.Namespace{ - {Type: configs.NEWNS}, - {Type: configs.NEWUTS}, - {Type: configs.NEWIPC}, - {Type: configs.NEWPID}, - {Type: configs.NEWUSER}, - {Type: configs.NEWNET}, - {Type: configs.NEWCGROUP}, - }), - Cgroups: &configs.Cgroup{ - Name: "test-container", - Parent: "system", - Resources: &configs.Resources{ - MemorySwappiness: nil, - AllowAllDevices: nil, - AllowedDevices: configs.DefaultAllowedDevices, - }, - }, - MaskPaths: []string{ - "/proc/kcore", - "/sys/firmware", - }, - ReadonlyPaths: []string{ - "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", - }, - Devices: configs.DefaultAutoCreatedDevices, - Hostname: "testing", - Mounts: []*configs.Mount{ - { - Source: "proc", - Destination: "/proc", - Device: "proc", - Flags: defaultMountFlags, - }, - { - Source: "tmpfs", - Destination: "/dev", - Device: "tmpfs", - Flags: unix.MS_NOSUID | unix.MS_STRICTATIME, - Data: "mode=755", - }, - { - Source: "devpts", - Destination: "/dev/pts", - Device: "devpts", - Flags: unix.MS_NOSUID | unix.MS_NOEXEC, - Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", - }, - { - Device: "tmpfs", - Source: "shm", - Destination: "/dev/shm", - Data: "mode=1777,size=65536k", - Flags: defaultMountFlags, - }, - { - Source: "mqueue", - Destination: "/dev/mqueue", - Device: "mqueue", - Flags: defaultMountFlags, - }, - { - Source: "sysfs", - Destination: "/sys", - Device: "sysfs", - Flags: defaultMountFlags | unix.MS_RDONLY, - }, - }, - UidMappings: []configs.IDMap{ - { - ContainerID: 0, - HostID: 1000, - Size: 65536, - }, - }, - GidMappings: []configs.IDMap{ - { - ContainerID: 0, - HostID: 1000, - Size: 65536, - }, - }, - Networks: []*configs.Network{ - { - Type: "loopback", - Address: "127.0.0.1/0", - Gateway: "localhost", - }, - }, - Rlimits: []configs.Rlimit{ - { - Type: unix.RLIMIT_NOFILE, - Hard: uint64(1025), - Soft: uint64(1025), - }, - }, -} -``` - -Once you have the configuration populated you can create a container: - -```go -container, err := factory.Create("container-id", config) -if err != nil { - logrus.Fatal(err) - return -} -``` - -To spawn bash as the initial process inside the container and have the -processes pid returned in order to wait, signal, or kill the process: - -```go -process := &libcontainer.Process{ - Args: []string{"/bin/bash"}, - Env: []string{"PATH=/bin"}, - User: "daemon", - Stdin: os.Stdin, - Stdout: os.Stdout, - Stderr: os.Stderr, -} - -err := container.Run(process) -if err != nil { - container.Destroy() - logrus.Fatal(err) - return -} - -// wait for the process to finish. -_, err := process.Wait() -if err != nil { - logrus.Fatal(err) -} - -// destroy the container. -container.Destroy() -``` - -Additional ways to interact with a running container are: - -```go -// return all the pids for all processes running inside the container. -processes, err := container.Processes() - -// get detailed cpu, memory, io, and network statistics for the container and -// it's processes. -stats, err := container.Stats() - -// pause all processes inside the container. -container.Pause() - -// resume all paused processes. -container.Resume() - -// send signal to container's init process. -container.Signal(signal) - -// update container resource constraints. -container.Set(config) - -// get current status of the container. -status, err := container.Status() - -// get current container's state information. -state, err := container.State() -``` - - -#### Checkpoint & Restore - -libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers. -This let's you save the state of a process running inside a container to disk, and then restore -that state into a new process, on the same machine or on another machine. - -`criu` version 1.5.2 or higher is required to use checkpoint and restore. -If you don't already have `criu` installed, you can build it from source, following the -[online instructions](http://criu.org/Installation). `criu` is also installed in the docker image -generated when building libcontainer with docker. - - -## Copyright and license - -Code and documentation copyright 2014 Docker, inc. -The code and documentation are released under the [Apache 2.0 license](../LICENSE). -The documentation is also released under Creative Commons Attribution 4.0 International License. -You may obtain a copy of the license, titled CC-BY-4.0, at http://creativecommons.org/licenses/by/4.0/. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md deleted file mode 100644 index 9ec6c3931..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md +++ /dev/null @@ -1,44 +0,0 @@ -## nsenter - -The `nsenter` package registers a special init constructor that is called before -the Go runtime has a chance to boot. This provides us the ability to `setns` on -existing namespaces and avoid the issues that the Go runtime has with multiple -threads. This constructor will be called if this package is registered, -imported, in your go application. - -The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd/cgo/) -package. In cgo, if the import of "C" is immediately preceded by a comment, that comment, -called the preamble, is used as a header when compiling the C parts of the package. -So every time we import package `nsenter`, the C code function `nsexec()` would be -called. And package `nsenter` is only imported in `init.go`, so every time the runc -`init` command is invoked, that C code is run. - -Because `nsexec()` must be run before the Go runtime in order to use the -Linux kernel namespace, you must `import` this library into a package if -you plan to use `libcontainer` directly. Otherwise Go will not execute -the `nsexec()` constructor, which means that the re-exec will not cause -the namespaces to be joined. You can import it like this: - -```go -import _ "github.com/opencontainers/runc/libcontainer/nsenter" -``` - -`nsexec()` will first get the file descriptor number for the init pipe -from the environment variable `_LIBCONTAINER_INITPIPE` (which was opened -by the parent and kept open across the fork-exec of the `nsexec()` init -process). The init pipe is used to read bootstrap data (namespace paths, -clone flags, uid and gid mappings, and the console path) from the parent -process. `nsexec()` will then call `setns(2)` to join the namespaces -provided in the bootstrap data (if available), `clone(2)` a child process -with the provided clone flags, update the user and group ID mappings, do -some further miscellaneous setup steps, and then send the PID of the -child process to the parent of the `nsexec()` "caller". Finally, -the parent `nsexec()` will exit and the child `nsexec()` process will -return to allow the Go runtime take over. - -NOTE: We do both `setns(2)` and `clone(2)` even if we don't have any -`CLONE_NEW*` clone flags because we must fork a new process in order to -enter the PID namespace. - - - diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c deleted file mode 100644 index ad10f1406..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c +++ /dev/null @@ -1,516 +0,0 @@ -/* - * Copyright (C) 2019 Aleksa Sarai <cyphar@cyphar.com> - * Copyright (C) 2019 SUSE LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define _GNU_SOURCE -#include <unistd.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> -#include <limits.h> -#include <fcntl.h> -#include <errno.h> - -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/statfs.h> -#include <sys/vfs.h> -#include <sys/mman.h> -#include <sys/mount.h> -#include <sys/sendfile.h> -#include <sys/syscall.h> - -/* Use our own wrapper for memfd_create. */ -#if !defined(SYS_memfd_create) && defined(__NR_memfd_create) -# define SYS_memfd_create __NR_memfd_create -#endif -/* memfd_create(2) flags -- copied from <linux/memfd.h>. */ -#ifndef MFD_CLOEXEC -# define MFD_CLOEXEC 0x0001U -# define MFD_ALLOW_SEALING 0x0002U -#endif -int memfd_create(const char *name, unsigned int flags) -{ -#ifdef SYS_memfd_create - return syscall(SYS_memfd_create, name, flags); -#else - errno = ENOSYS; - return -1; -#endif -} - - -/* This comes directly from <linux/fcntl.h>. */ -#ifndef F_LINUX_SPECIFIC_BASE -# define F_LINUX_SPECIFIC_BASE 1024 -#endif -#ifndef F_ADD_SEALS -# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) -# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) -#endif -#ifndef F_SEAL_SEAL -# define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ -# define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ -# define F_SEAL_GROW 0x0004 /* prevent file from growing */ -# define F_SEAL_WRITE 0x0008 /* prevent writes */ -#endif - -#define CLONED_BINARY_ENV "_LIBCONTAINER_CLONED_BINARY" -#define RUNC_MEMFD_COMMENT "runc_cloned:/proc/self/exe" -#define RUNC_MEMFD_SEALS \ - (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE) - -static void *must_realloc(void *ptr, size_t size) -{ - void *old = ptr; - do { - ptr = realloc(old, size); - } while(!ptr); - return ptr; -} - -/* - * Verify whether we are currently in a self-cloned program (namely, is - * /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather - * for shmem files), and we want to be sure it's actually sealed. - */ -static int is_self_cloned(void) -{ - int fd, ret, is_cloned = 0; - struct stat statbuf = {}; - struct statfs fsbuf = {}; - - fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC); - if (fd < 0) - return -ENOTRECOVERABLE; - - /* - * Is the binary a fully-sealed memfd? We don't need CLONED_BINARY_ENV for - * this, because you cannot write to a sealed memfd no matter what (so - * sharing it isn't a bad thing -- and an admin could bind-mount a sealed - * memfd to /usr/bin/runc to allow re-use). - */ - ret = fcntl(fd, F_GET_SEALS); - if (ret >= 0) { - is_cloned = (ret == RUNC_MEMFD_SEALS); - goto out; - } - - /* - * All other forms require CLONED_BINARY_ENV, since they are potentially - * writeable (or we can't tell if they're fully safe) and thus we must - * check the environment as an extra layer of defence. - */ - if (!getenv(CLONED_BINARY_ENV)) { - is_cloned = false; - goto out; - } - - /* - * Is the binary on a read-only filesystem? We can't detect bind-mounts in - * particular (in-kernel they are identical to regular mounts) but we can - * at least be sure that it's read-only. In addition, to make sure that - * it's *our* bind-mount we check CLONED_BINARY_ENV. - */ - if (fstatfs(fd, &fsbuf) >= 0) - is_cloned |= (fsbuf.f_flags & MS_RDONLY); - - /* - * Okay, we're a tmpfile -- or we're currently running on RHEL <=7.6 - * which appears to have a borked backport of F_GET_SEALS. Either way, - * having a file which has no hardlinks indicates that we aren't using - * a host-side "runc" binary and this is something that a container - * cannot fake (because unlinking requires being able to resolve the - * path that you want to unlink). - */ - if (fstat(fd, &statbuf) >= 0) - is_cloned |= (statbuf.st_nlink == 0); - -out: - close(fd); - return is_cloned; -} - -/* Read a given file into a new buffer, and providing the length. */ -static char *read_file(char *path, size_t *length) -{ - int fd; - char buf[4096], *copy = NULL; - - if (!length) - return NULL; - - fd = open(path, O_RDONLY | O_CLOEXEC); - if (fd < 0) - return NULL; - - *length = 0; - for (;;) { - ssize_t n; - - n = read(fd, buf, sizeof(buf)); - if (n < 0) - goto error; - if (!n) - break; - - copy = must_realloc(copy, (*length + n) * sizeof(*copy)); - memcpy(copy + *length, buf, n); - *length += n; - } - close(fd); - return copy; - -error: - close(fd); - free(copy); - return NULL; -} - -/* - * A poor-man's version of "xargs -0". Basically parses a given block of - * NUL-delimited data, within the given length and adds a pointer to each entry - * to the array of pointers. - */ -static int parse_xargs(char *data, int data_length, char ***output) -{ - int num = 0; - char *cur = data; - - if (!data || *output != NULL) - return -1; - - while (cur < data + data_length) { - num++; - *output = must_realloc(*output, (num + 1) * sizeof(**output)); - (*output)[num - 1] = cur; - cur += strlen(cur) + 1; - } - (*output)[num] = NULL; - return num; -} - -/* - * "Parse" out argv from /proc/self/cmdline. - * This is necessary because we are running in a context where we don't have a - * main() that we can just get the arguments from. - */ -static int fetchve(char ***argv) -{ - char *cmdline = NULL; - size_t cmdline_size; - - cmdline = read_file("/proc/self/cmdline", &cmdline_size); - if (!cmdline) - goto error; - - if (parse_xargs(cmdline, cmdline_size, argv) <= 0) - goto error; - - return 0; - -error: - free(cmdline); - return -EINVAL; -} - -enum { - EFD_NONE = 0, - EFD_MEMFD, - EFD_FILE, -}; - -/* - * This comes from <linux/fcntl.h>. We can't hard-code __O_TMPFILE because it - * changes depending on the architecture. If we don't have O_TMPFILE we always - * have the mkostemp(3) fallback. - */ -#ifndef O_TMPFILE -# if defined(__O_TMPFILE) && defined(O_DIRECTORY) -# define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) -# endif -#endif - -static int make_execfd(int *fdtype) -{ - int fd = -1; - char template[PATH_MAX] = {0}; - char *prefix = getenv("_LIBCONTAINER_STATEDIR"); - - if (!prefix || *prefix != '/') - prefix = "/tmp"; - if (snprintf(template, sizeof(template), "%s/runc.XXXXXX", prefix) < 0) - return -1; - - /* - * Now try memfd, it's much nicer than actually creating a file in STATEDIR - * since it's easily detected thanks to sealing and also doesn't require - * assumptions about STATEDIR. - */ - *fdtype = EFD_MEMFD; - fd = memfd_create(RUNC_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING); - if (fd >= 0) - return fd; - if (errno != ENOSYS && errno != EINVAL) - goto error; - -#ifdef O_TMPFILE - /* - * Try O_TMPFILE to avoid races where someone might snatch our file. Note - * that O_EXCL isn't actually a security measure here (since you can just - * fd re-open it and clear O_EXCL). - */ - *fdtype = EFD_FILE; - fd = open(prefix, O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC, 0700); - if (fd >= 0) { - struct stat statbuf = {}; - bool working_otmpfile = false; - - /* - * open(2) ignores unknown O_* flags -- yeah, I was surprised when I - * found this out too. As a result we can't check for EINVAL. However, - * if we get nlink != 0 (or EISDIR) then we know that this kernel - * doesn't support O_TMPFILE. - */ - if (fstat(fd, &statbuf) >= 0) - working_otmpfile = (statbuf.st_nlink == 0); - - if (working_otmpfile) - return fd; - - /* Pretend that we got EISDIR since O_TMPFILE failed. */ - close(fd); - errno = EISDIR; - } - if (errno != EISDIR) - goto error; -#endif /* defined(O_TMPFILE) */ - - /* - * Our final option is to create a temporary file the old-school way, and - * then unlink it so that nothing else sees it by accident. - */ - *fdtype = EFD_FILE; - fd = mkostemp(template, O_CLOEXEC); - if (fd >= 0) { - if (unlink(template) >= 0) - return fd; - close(fd); - } - -error: - *fdtype = EFD_NONE; - return -1; -} - -static int seal_execfd(int *fd, int fdtype) -{ - switch (fdtype) { - case EFD_MEMFD: - return fcntl(*fd, F_ADD_SEALS, RUNC_MEMFD_SEALS); - case EFD_FILE: { - /* Need to re-open our pseudo-memfd as an O_PATH to avoid execve(2) giving -ETXTBSY. */ - int newfd; - char fdpath[PATH_MAX] = {0}; - - if (fchmod(*fd, 0100) < 0) - return -1; - - if (snprintf(fdpath, sizeof(fdpath), "/proc/self/fd/%d", *fd) < 0) - return -1; - - newfd = open(fdpath, O_PATH | O_CLOEXEC); - if (newfd < 0) - return -1; - - close(*fd); - *fd = newfd; - return 0; - } - default: - break; - } - return -1; -} - -static int try_bindfd(void) -{ - int fd, ret = -1; - char template[PATH_MAX] = {0}; - char *prefix = getenv("_LIBCONTAINER_STATEDIR"); - - if (!prefix || *prefix != '/') - prefix = "/tmp"; - if (snprintf(template, sizeof(template), "%s/runc.XXXXXX", prefix) < 0) - return ret; - - /* - * We need somewhere to mount it, mounting anything over /proc/self is a - * BAD idea on the host -- even if we do it temporarily. - */ - fd = mkstemp(template); - if (fd < 0) - return ret; - close(fd); - - /* - * For obvious reasons this won't work in rootless mode because we haven't - * created a userns+mntns -- but getting that to work will be a bit - * complicated and it's only worth doing if someone actually needs it. - */ - ret = -EPERM; - if (mount("/proc/self/exe", template, "", MS_BIND, "") < 0) - goto out; - if (mount("", template, "", MS_REMOUNT | MS_BIND | MS_RDONLY, "") < 0) - goto out_umount; - - - /* Get read-only handle that we're sure can't be made read-write. */ - ret = open(template, O_PATH | O_CLOEXEC); - -out_umount: - /* - * Make sure the MNT_DETACH works, otherwise we could get remounted - * read-write and that would be quite bad (the fd would be made read-write - * too, invalidating the protection). - */ - if (umount2(template, MNT_DETACH) < 0) { - if (ret >= 0) - close(ret); - ret = -ENOTRECOVERABLE; - } - -out: - /* - * We don't care about unlink errors, the worst that happens is that - * there's an empty file left around in STATEDIR. - */ - unlink(template); - return ret; -} - -static ssize_t fd_to_fd(int outfd, int infd) -{ - ssize_t total = 0; - char buffer[4096]; - - for (;;) { - ssize_t nread, nwritten = 0; - - nread = read(infd, buffer, sizeof(buffer)); - if (nread < 0) - return -1; - if (!nread) - break; - - do { - ssize_t n = write(outfd, buffer + nwritten, nread - nwritten); - if (n < 0) - return -1; - nwritten += n; - } while(nwritten < nread); - - total += nwritten; - } - - return total; -} - -static int clone_binary(void) -{ - int binfd, execfd; - struct stat statbuf = {}; - size_t sent = 0; - int fdtype = EFD_NONE; - - /* - * Before we resort to copying, let's try creating an ro-binfd in one shot - * by getting a handle for a read-only bind-mount of the execfd. - */ - execfd = try_bindfd(); - if (execfd >= 0) - return execfd; - - /* - * Dammit, that didn't work -- time to copy the binary to a safe place we - * can seal the contents. - */ - execfd = make_execfd(&fdtype); - if (execfd < 0 || fdtype == EFD_NONE) - return -ENOTRECOVERABLE; - - binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC); - if (binfd < 0) - goto error; - - if (fstat(binfd, &statbuf) < 0) - goto error_binfd; - - while (sent < statbuf.st_size) { - int n = sendfile(execfd, binfd, NULL, statbuf.st_size - sent); - if (n < 0) { - /* sendfile can fail so we fallback to a dumb user-space copy. */ - n = fd_to_fd(execfd, binfd); - if (n < 0) - goto error_binfd; - } - sent += n; - } - close(binfd); - if (sent != statbuf.st_size) - goto error; - - if (seal_execfd(&execfd, fdtype) < 0) - goto error; - - return execfd; - -error_binfd: - close(binfd); -error: - close(execfd); - return -EIO; -} - -/* Get cheap access to the environment. */ -extern char **environ; - -int ensure_cloned_binary(void) -{ - int execfd; - char **argv = NULL; - - /* Check that we're not self-cloned, and if we are then bail. */ - int cloned = is_self_cloned(); - if (cloned > 0 || cloned == -ENOTRECOVERABLE) - return cloned; - - if (fetchve(&argv) < 0) - return -EINVAL; - - execfd = clone_binary(); - if (execfd < 0) - return -EIO; - - if (putenv(CLONED_BINARY_ENV "=1")) - goto error; - - fexecve(execfd, argv, environ); -error: - close(execfd); - return -ENOEXEC; -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h deleted file mode 100644 index 9e9bdca05..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef NSENTER_NAMESPACE_H -#define NSENTER_NAMESPACE_H - -#ifndef _GNU_SOURCE -# define _GNU_SOURCE -#endif -#include <sched.h> - -/* All of these are taken from include/uapi/linux/sched.h */ -#ifndef CLONE_NEWNS -# define CLONE_NEWNS 0x00020000 /* New mount namespace group */ -#endif -#ifndef CLONE_NEWCGROUP -# define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */ -#endif -#ifndef CLONE_NEWUTS -# define CLONE_NEWUTS 0x04000000 /* New utsname namespace */ -#endif -#ifndef CLONE_NEWIPC -# define CLONE_NEWIPC 0x08000000 /* New ipc namespace */ -#endif -#ifndef CLONE_NEWUSER -# define CLONE_NEWUSER 0x10000000 /* New user namespace */ -#endif -#ifndef CLONE_NEWPID -# define CLONE_NEWPID 0x20000000 /* New pid namespace */ -#endif -#ifndef CLONE_NEWNET -# define CLONE_NEWNET 0x40000000 /* New network namespace */ -#endif - -#endif /* NSENTER_NAMESPACE_H */ diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go deleted file mode 100644 index 07f4d63e4..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go +++ /dev/null @@ -1,12 +0,0 @@ -// +build linux,!gccgo - -package nsenter - -/* -#cgo CFLAGS: -Wall -extern void nsexec(); -void __attribute__((constructor)) init(void) { - nsexec(); -} -*/ -import "C" diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go deleted file mode 100644 index 63c7a3ec2..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go +++ /dev/null @@ -1,25 +0,0 @@ -// +build linux,gccgo - -package nsenter - -/* -#cgo CFLAGS: -Wall -extern void nsexec(); -void __attribute__((constructor)) init(void) { - nsexec(); -} -*/ -import "C" - -// AlwaysFalse is here to stay false -// (and be exported so the compiler doesn't optimize out its reference) -var AlwaysFalse bool - -func init() { - if AlwaysFalse { - // by referencing this C init() in a noop test, it will ensure the compiler - // links in the C function. - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134 - C.init() - } -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go deleted file mode 100644 index ac701ca39..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go +++ /dev/null @@ -1,5 +0,0 @@ -// +build !linux !cgo - -package nsenter - -import "C" diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c deleted file mode 100644 index 7750af35e..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c +++ /dev/null @@ -1,1006 +0,0 @@ - -#define _GNU_SOURCE -#include <endian.h> -#include <errno.h> -#include <fcntl.h> -#include <grp.h> -#include <sched.h> -#include <setjmp.h> -#include <signal.h> -#include <stdarg.h> -#include <stdbool.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> -#include <unistd.h> - -#include <sys/ioctl.h> -#include <sys/prctl.h> -#include <sys/socket.h> -#include <sys/types.h> -#include <sys/wait.h> - -#include <linux/limits.h> -#include <linux/netlink.h> -#include <linux/types.h> - -/* Get all of the CLONE_NEW* flags. */ -#include "namespace.h" - -/* Synchronisation values. */ -enum sync_t { - SYNC_USERMAP_PLS = 0x40, /* Request parent to map our users. */ - SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */ - SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */ - SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */ - SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */ - SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */ - - /* XXX: This doesn't help with segfaults and other such issues. */ - SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */ -}; - -/* - * Synchronisation value for cgroup namespace setup. - * The same constant is defined in process_linux.go as "createCgroupns". - */ -#define CREATECGROUPNS 0x80 - -/* longjmp() arguments. */ -#define JUMP_PARENT 0x00 -#define JUMP_CHILD 0xA0 -#define JUMP_INIT 0xA1 - -/* JSON buffer. */ -#define JSON_MAX 4096 - -/* Assume the stack grows down, so arguments should be above it. */ -struct clone_t { - /* - * Reserve some space for clone() to locate arguments - * and retcode in this place - */ - char stack[4096] __attribute__ ((aligned(16))); - char stack_ptr[0]; - - /* There's two children. This is used to execute the different code. */ - jmp_buf *env; - int jmpval; -}; - -struct nlconfig_t { - char *data; - - /* Process settings. */ - uint32_t cloneflags; - char *oom_score_adj; - size_t oom_score_adj_len; - - /* User namespace settings. */ - char *uidmap; - size_t uidmap_len; - char *gidmap; - size_t gidmap_len; - char *namespaces; - size_t namespaces_len; - uint8_t is_setgroup; - - /* Rootless container settings. */ - uint8_t is_rootless_euid; /* boolean */ - char *uidmappath; - size_t uidmappath_len; - char *gidmappath; - size_t gidmappath_len; -}; - -/* - * List of netlink message types sent to us as part of bootstrapping the init. - * These constants are defined in libcontainer/message_linux.go. - */ -#define INIT_MSG 62000 -#define CLONE_FLAGS_ATTR 27281 -#define NS_PATHS_ATTR 27282 -#define UIDMAP_ATTR 27283 -#define GIDMAP_ATTR 27284 -#define SETGROUP_ATTR 27285 -#define OOM_SCORE_ADJ_ATTR 27286 -#define ROOTLESS_EUID_ATTR 27287 -#define UIDMAPPATH_ATTR 27288 -#define GIDMAPPATH_ATTR 27289 - -/* - * Use the raw syscall for versions of glibc which don't include a function for - * it, namely (glibc 2.12). - */ -#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 -# define _GNU_SOURCE -# include "syscall.h" -# if !defined(SYS_setns) && defined(__NR_setns) -# define SYS_setns __NR_setns -# endif - -#ifndef SYS_setns -# error "setns(2) syscall not supported by glibc version" -#endif - -int setns(int fd, int nstype) -{ - return syscall(SYS_setns, fd, nstype); -} -#endif - -/* XXX: This is ugly. */ -static int syncfd = -1; - -/* TODO(cyphar): Fix this so it correctly deals with syncT. */ -#define bail(fmt, ...) \ - do { \ - int ret = __COUNTER__ + 1; \ - fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \ - if (syncfd >= 0) { \ - enum sync_t s = SYNC_ERR; \ - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) \ - fprintf(stderr, "nsenter: failed: write(s)"); \ - if (write(syncfd, &ret, sizeof(ret)) != sizeof(ret)) \ - fprintf(stderr, "nsenter: failed: write(ret)"); \ - } \ - exit(ret); \ - } while(0) - -static int write_file(char *data, size_t data_len, char *pathfmt, ...) -{ - int fd, len, ret = 0; - char path[PATH_MAX]; - - va_list ap; - va_start(ap, pathfmt); - len = vsnprintf(path, PATH_MAX, pathfmt, ap); - va_end(ap); - if (len < 0) - return -1; - - fd = open(path, O_RDWR); - if (fd < 0) { - return -1; - } - - len = write(fd, data, data_len); - if (len != data_len) { - ret = -1; - goto out; - } - - out: - close(fd); - return ret; -} - -enum policy_t { - SETGROUPS_DEFAULT = 0, - SETGROUPS_ALLOW, - SETGROUPS_DENY, -}; - -/* This *must* be called before we touch gid_map. */ -static void update_setgroups(int pid, enum policy_t setgroup) -{ - char *policy; - - switch (setgroup) { - case SETGROUPS_ALLOW: - policy = "allow"; - break; - case SETGROUPS_DENY: - policy = "deny"; - break; - case SETGROUPS_DEFAULT: - default: - /* Nothing to do. */ - return; - } - - if (write_file(policy, strlen(policy), "/proc/%d/setgroups", pid) < 0) { - /* - * If the kernel is too old to support /proc/pid/setgroups, - * open(2) or write(2) will return ENOENT. This is fine. - */ - if (errno != ENOENT) - bail("failed to write '%s' to /proc/%d/setgroups", policy, pid); - } -} - -static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len) -{ - int child; - - /* - * If @app is NULL, execve will segfault. Just check it here and bail (if - * we're in this path, the caller is already getting desperate and there - * isn't a backup to this failing). This usually would be a configuration - * or programming issue. - */ - if (!app) - bail("mapping tool not present"); - - child = fork(); - if (child < 0) - bail("failed to fork"); - - if (!child) { -#define MAX_ARGV 20 - char *argv[MAX_ARGV]; - char *envp[] = { NULL }; - char pid_fmt[16]; - int argc = 0; - char *next; - - snprintf(pid_fmt, 16, "%d", pid); - - argv[argc++] = (char *)app; - argv[argc++] = pid_fmt; - /* - * Convert the map string into a list of argument that - * newuidmap/newgidmap can understand. - */ - - while (argc < MAX_ARGV) { - if (*map == '\0') { - argv[argc++] = NULL; - break; - } - argv[argc++] = map; - next = strpbrk(map, "\n "); - if (next == NULL) - break; - *next++ = '\0'; - map = next + strspn(next, "\n "); - } - - execve(app, argv, envp); - bail("failed to execv"); - } else { - int status; - - while (true) { - if (waitpid(child, &status, 0) < 0) { - if (errno == EINTR) - continue; - bail("failed to waitpid"); - } - if (WIFEXITED(status) || WIFSIGNALED(status)) - return WEXITSTATUS(status); - } - } - - return -1; -} - -static void update_uidmap(const char *path, int pid, char *map, size_t map_len) -{ - if (map == NULL || map_len <= 0) - return; - - if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) { - if (errno != EPERM) - bail("failed to update /proc/%d/uid_map", pid); - if (try_mapping_tool(path, pid, map, map_len)) - bail("failed to use newuid map on %d", pid); - } -} - -static void update_gidmap(const char *path, int pid, char *map, size_t map_len) -{ - if (map == NULL || map_len <= 0) - return; - - if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) { - if (errno != EPERM) - bail("failed to update /proc/%d/gid_map", pid); - if (try_mapping_tool(path, pid, map, map_len)) - bail("failed to use newgid map on %d", pid); - } -} - -static void update_oom_score_adj(char *data, size_t len) -{ - if (data == NULL || len <= 0) - return; - - if (write_file(data, len, "/proc/self/oom_score_adj") < 0) - bail("failed to update /proc/self/oom_score_adj"); -} - -/* A dummy function that just jumps to the given jumpval. */ -static int child_func(void *arg) __attribute__ ((noinline)); -static int child_func(void *arg) -{ - struct clone_t *ca = (struct clone_t *)arg; - longjmp(*ca->env, ca->jmpval); -} - -static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline)); -static int clone_parent(jmp_buf *env, int jmpval) -{ - struct clone_t ca = { - .env = env, - .jmpval = jmpval, - }; - - return clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca); -} - -/* - * Gets the init pipe fd from the environment, which is used to read the - * bootstrap data and tell the parent what the new pid is after we finish - * setting up the environment. - */ -static int initpipe(void) -{ - int pipenum; - char *initpipe, *endptr; - - initpipe = getenv("_LIBCONTAINER_INITPIPE"); - if (initpipe == NULL || *initpipe == '\0') - return -1; - - pipenum = strtol(initpipe, &endptr, 10); - if (*endptr != '\0') - bail("unable to parse _LIBCONTAINER_INITPIPE"); - - return pipenum; -} - -/* Returns the clone(2) flag for a namespace, given the name of a namespace. */ -static int nsflag(char *name) -{ - if (!strcmp(name, "cgroup")) - return CLONE_NEWCGROUP; - else if (!strcmp(name, "ipc")) - return CLONE_NEWIPC; - else if (!strcmp(name, "mnt")) - return CLONE_NEWNS; - else if (!strcmp(name, "net")) - return CLONE_NEWNET; - else if (!strcmp(name, "pid")) - return CLONE_NEWPID; - else if (!strcmp(name, "user")) - return CLONE_NEWUSER; - else if (!strcmp(name, "uts")) - return CLONE_NEWUTS; - - /* If we don't recognise a name, fallback to 0. */ - return 0; -} - -static uint32_t readint32(char *buf) -{ - return *(uint32_t *) buf; -} - -static uint8_t readint8(char *buf) -{ - return *(uint8_t *) buf; -} - -static void nl_parse(int fd, struct nlconfig_t *config) -{ - size_t len, size; - struct nlmsghdr hdr; - char *data, *current; - - /* Retrieve the netlink header. */ - len = read(fd, &hdr, NLMSG_HDRLEN); - if (len != NLMSG_HDRLEN) - bail("invalid netlink header length %zu", len); - - if (hdr.nlmsg_type == NLMSG_ERROR) - bail("failed to read netlink message"); - - if (hdr.nlmsg_type != INIT_MSG) - bail("unexpected msg type %d", hdr.nlmsg_type); - - /* Retrieve data. */ - size = NLMSG_PAYLOAD(&hdr, 0); - current = data = malloc(size); - if (!data) - bail("failed to allocate %zu bytes of memory for nl_payload", size); - - len = read(fd, data, size); - if (len != size) - bail("failed to read netlink payload, %zu != %zu", len, size); - - /* Parse the netlink payload. */ - config->data = data; - while (current < data + size) { - struct nlattr *nlattr = (struct nlattr *)current; - size_t payload_len = nlattr->nla_len - NLA_HDRLEN; - - /* Advance to payload. */ - current += NLA_HDRLEN; - - /* Handle payload. */ - switch (nlattr->nla_type) { - case CLONE_FLAGS_ATTR: - config->cloneflags = readint32(current); - break; - case ROOTLESS_EUID_ATTR: - config->is_rootless_euid = readint8(current); /* boolean */ - break; - case OOM_SCORE_ADJ_ATTR: - config->oom_score_adj = current; - config->oom_score_adj_len = payload_len; - break; - case NS_PATHS_ATTR: - config->namespaces = current; - config->namespaces_len = payload_len; - break; - case UIDMAP_ATTR: - config->uidmap = current; - config->uidmap_len = payload_len; - break; - case GIDMAP_ATTR: - config->gidmap = current; - config->gidmap_len = payload_len; - break; - case UIDMAPPATH_ATTR: - config->uidmappath = current; - config->uidmappath_len = payload_len; - break; - case GIDMAPPATH_ATTR: - config->gidmappath = current; - config->gidmappath_len = payload_len; - break; - case SETGROUP_ATTR: - config->is_setgroup = readint8(current); - break; - default: - bail("unknown netlink message type %d", nlattr->nla_type); - } - - current += NLA_ALIGN(payload_len); - } -} - -void nl_free(struct nlconfig_t *config) -{ - free(config->data); -} - -void join_namespaces(char *nslist) -{ - int num = 0, i; - char *saveptr = NULL; - char *namespace = strtok_r(nslist, ",", &saveptr); - struct namespace_t { - int fd; - int ns; - char type[PATH_MAX]; - char path[PATH_MAX]; - } *namespaces = NULL; - - if (!namespace || !strlen(namespace) || !strlen(nslist)) - bail("ns paths are empty"); - - /* - * We have to open the file descriptors first, since after - * we join the mnt namespace we might no longer be able to - * access the paths. - */ - do { - int fd; - char *path; - struct namespace_t *ns; - - /* Resize the namespace array. */ - namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t)); - if (!namespaces) - bail("failed to reallocate namespace array"); - ns = &namespaces[num - 1]; - - /* Split 'ns:path'. */ - path = strstr(namespace, ":"); - if (!path) - bail("failed to parse %s", namespace); - *path++ = '\0'; - - fd = open(path, O_RDONLY); - if (fd < 0) - bail("failed to open %s", path); - - ns->fd = fd; - ns->ns = nsflag(namespace); - strncpy(ns->path, path, PATH_MAX - 1); - ns->path[PATH_MAX - 1] = '\0'; - } while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL); - - /* - * The ordering in which we join namespaces is important. We should - * always join the user namespace *first*. This is all guaranteed - * from the container_linux.go side of this, so we're just going to - * follow the order given to us. - */ - - for (i = 0; i < num; i++) { - struct namespace_t ns = namespaces[i]; - - if (setns(ns.fd, ns.ns) < 0) - bail("failed to setns to %s", ns.path); - - close(ns.fd); - } - - free(namespaces); -} - -/* Defined in cloned_binary.c. */ -extern int ensure_cloned_binary(void); - -void nsexec(void) -{ - int pipenum; - jmp_buf env; - int sync_child_pipe[2], sync_grandchild_pipe[2]; - struct nlconfig_t config = { 0 }; - - /* - * If we don't have an init pipe, just return to the go routine. - * We'll only get an init pipe for start or exec. - */ - pipenum = initpipe(); - if (pipenum == -1) - return; - - /* - * We need to re-exec if we are not in a cloned binary. This is necessary - * to ensure that containers won't be able to access the host binary - * through /proc/self/exe. See CVE-2019-5736. - */ - if (ensure_cloned_binary() < 0) - bail("could not ensure we are a cloned binary"); - - /* Parse all of the netlink configuration. */ - nl_parse(pipenum, &config); - - /* Set oom_score_adj. This has to be done before !dumpable because - * /proc/self/oom_score_adj is not writeable unless you're an privileged - * user (if !dumpable is set). All children inherit their parent's - * oom_score_adj value on fork(2) so this will always be propagated - * properly. - */ - update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len); - - /* - * Make the process non-dumpable, to avoid various race conditions that - * could cause processes in namespaces we're joining to access host - * resources (or potentially execute code). - * - * However, if the number of namespaces we are joining is 0, we are not - * going to be switching to a different security context. Thus setting - * ourselves to be non-dumpable only breaks things (like rootless - * containers), which is the recommendation from the kernel folks. - */ - if (config.namespaces) { - if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) - bail("failed to set process as non-dumpable"); - } - - /* Pipe so we can tell the child when we've finished setting up. */ - if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0) - bail("failed to setup sync pipe between parent and child"); - - /* - * We need a new socketpair to sync with grandchild so we don't have - * race condition with child. - */ - if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_grandchild_pipe) < 0) - bail("failed to setup sync pipe between parent and grandchild"); - - /* TODO: Currently we aren't dealing with child deaths properly. */ - - /* - * Okay, so this is quite annoying. - * - * In order for this unsharing code to be more extensible we need to split - * up unshare(CLONE_NEWUSER) and clone() in various ways. The ideal case - * would be if we did clone(CLONE_NEWUSER) and the other namespaces - * separately, but because of SELinux issues we cannot really do that. But - * we cannot just dump the namespace flags into clone(...) because several - * usecases (such as rootless containers) require more granularity around - * the namespace setup. In addition, some older kernels had issues where - * CLONE_NEWUSER wasn't handled before other namespaces (but we cannot - * handle this while also dealing with SELinux so we choose SELinux support - * over broken kernel support). - * - * However, if we unshare(2) the user namespace *before* we clone(2), then - * all hell breaks loose. - * - * The parent no longer has permissions to do many things (unshare(2) drops - * all capabilities in your old namespace), and the container cannot be set - * up to have more than one {uid,gid} mapping. This is obviously less than - * ideal. In order to fix this, we have to first clone(2) and then unshare. - * - * Unfortunately, it's not as simple as that. We have to fork to enter the - * PID namespace (the PID namespace only applies to children). Since we'll - * have to double-fork, this clone_parent() call won't be able to get the - * PID of the _actual_ init process (without doing more synchronisation than - * I can deal with at the moment). So we'll just get the parent to send it - * for us, the only job of this process is to update - * /proc/pid/{setgroups,uid_map,gid_map}. - * - * And as a result of the above, we also need to setns(2) in the first child - * because if we join a PID namespace in the topmost parent then our child - * will be in that namespace (and it will not be able to give us a PID value - * that makes sense without resorting to sending things with cmsg). - * - * This also deals with an older issue caused by dumping cloneflags into - * clone(2): On old kernels, CLONE_PARENT didn't work with CLONE_NEWPID, so - * we have to unshare(2) before clone(2) in order to do this. This was fixed - * in upstream commit 1f7f4dde5c945f41a7abc2285be43d918029ecc5, and was - * introduced by 40a0d32d1eaffe6aac7324ca92604b6b3977eb0e. As far as we're - * aware, the last mainline kernel which had this bug was Linux 3.12. - * However, we cannot comment on which kernels the broken patch was - * backported to. - * - * -- Aleksa "what has my life come to?" Sarai - */ - - switch (setjmp(env)) { - /* - * Stage 0: We're in the parent. Our job is just to create a new child - * (stage 1: JUMP_CHILD) process and write its uid_map and - * gid_map. That process will go on to create a new process, then - * it will send us its PID which we will send to the bootstrap - * process. - */ - case JUMP_PARENT:{ - int len; - pid_t child, first_child = -1; - bool ready = false; - - /* For debugging. */ - prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, 0, 0); - - /* Start the process of getting a container. */ - child = clone_parent(&env, JUMP_CHILD); - if (child < 0) - bail("unable to fork: child_func"); - - /* - * State machine for synchronisation with the children. - * - * Father only return when both child and grandchild are - * ready, so we can receive all possible error codes - * generated by children. - */ - while (!ready) { - enum sync_t s; - int ret; - - syncfd = sync_child_pipe[1]; - close(sync_child_pipe[0]); - - if (read(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with child: next state"); - - switch (s) { - case SYNC_ERR: - /* We have to mirror the error code of the child. */ - if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret)) - bail("failed to sync with child: read(error code)"); - - exit(ret); - case SYNC_USERMAP_PLS: - /* - * Enable setgroups(2) if we've been asked to. But we also - * have to explicitly disable setgroups(2) if we're - * creating a rootless container for single-entry mapping. - * i.e. config.is_setgroup == false. - * (this is required since Linux 3.19). - * - * For rootless multi-entry mapping, config.is_setgroup shall be true and - * newuidmap/newgidmap shall be used. - */ - - if (config.is_rootless_euid && !config.is_setgroup) - update_setgroups(child, SETGROUPS_DENY); - - /* Set up mappings. */ - update_uidmap(config.uidmappath, child, config.uidmap, config.uidmap_len); - update_gidmap(config.gidmappath, child, config.gidmap, config.gidmap_len); - - s = SYNC_USERMAP_ACK; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - kill(child, SIGKILL); - bail("failed to sync with child: write(SYNC_USERMAP_ACK)"); - } - break; - case SYNC_RECVPID_PLS:{ - first_child = child; - - /* Get the init_func pid. */ - if (read(syncfd, &child, sizeof(child)) != sizeof(child)) { - kill(first_child, SIGKILL); - bail("failed to sync with child: read(childpid)"); - } - - /* Send ACK. */ - s = SYNC_RECVPID_ACK; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - kill(first_child, SIGKILL); - kill(child, SIGKILL); - bail("failed to sync with child: write(SYNC_RECVPID_ACK)"); - } - - /* Send the init_func pid back to our parent. - * - * Send the init_func pid and the pid of the first child back to our parent. - * We need to send both back because we can't reap the first child we created (CLONE_PARENT). - * It becomes the responsibility of our parent to reap the first child. - */ - len = dprintf(pipenum, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child); - if (len < 0) { - kill(child, SIGKILL); - bail("unable to generate JSON for child pid"); - } - } - break; - case SYNC_CHILD_READY: - ready = true; - break; - default: - bail("unexpected sync value: %u", s); - } - } - - /* Now sync with grandchild. */ - - ready = false; - while (!ready) { - enum sync_t s; - int ret; - - syncfd = sync_grandchild_pipe[1]; - close(sync_grandchild_pipe[0]); - - s = SYNC_GRANDCHILD; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - kill(child, SIGKILL); - bail("failed to sync with child: write(SYNC_GRANDCHILD)"); - } - - if (read(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with child: next state"); - - switch (s) { - case SYNC_ERR: - /* We have to mirror the error code of the child. */ - if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret)) - bail("failed to sync with child: read(error code)"); - - exit(ret); - case SYNC_CHILD_READY: - ready = true; - break; - default: - bail("unexpected sync value: %u", s); - } - } - exit(0); - } - - /* - * Stage 1: We're in the first child process. Our job is to join any - * provided namespaces in the netlink payload and unshare all - * of the requested namespaces. If we've been asked to - * CLONE_NEWUSER, we will ask our parent (stage 0) to set up - * our user mappings for us. Then, we create a new child - * (stage 2: JUMP_INIT) for PID namespace. We then send the - * child's PID to our parent (stage 0). - */ - case JUMP_CHILD:{ - pid_t child; - enum sync_t s; - - /* We're in a child and thus need to tell the parent if we die. */ - syncfd = sync_child_pipe[0]; - close(sync_child_pipe[1]); - - /* For debugging. */ - prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, 0, 0); - - /* - * We need to setns first. We cannot do this earlier (in stage 0) - * because of the fact that we forked to get here (the PID of - * [stage 2: JUMP_INIT]) would be meaningless). We could send it - * using cmsg(3) but that's just annoying. - */ - if (config.namespaces) - join_namespaces(config.namespaces); - - /* - * Deal with user namespaces first. They are quite special, as they - * affect our ability to unshare other namespaces and are used as - * context for privilege checks. - * - * We don't unshare all namespaces in one go. The reason for this - * is that, while the kernel documentation may claim otherwise, - * there are certain cases where unsharing all namespaces at once - * will result in namespace objects being owned incorrectly. - * Ideally we should just fix these kernel bugs, but it's better to - * be safe than sorry, and fix them separately. - * - * A specific case of this is that the SELinux label of the - * internal kern-mount that mqueue uses will be incorrect if the - * UTS namespace is cloned before the USER namespace is mapped. - * I've also heard of similar problems with the network namespace - * in some scenarios. This also mirrors how LXC deals with this - * problem. - */ - if (config.cloneflags & CLONE_NEWUSER) { - if (unshare(CLONE_NEWUSER) < 0) - bail("failed to unshare user namespace"); - config.cloneflags &= ~CLONE_NEWUSER; - - /* - * We don't have the privileges to do any mapping here (see the - * clone_parent rant). So signal our parent to hook us up. - */ - - /* Switching is only necessary if we joined namespaces. */ - if (config.namespaces) { - if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0) - bail("failed to set process as dumpable"); - } - s = SYNC_USERMAP_PLS; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with parent: write(SYNC_USERMAP_PLS)"); - - /* ... wait for mapping ... */ - - if (read(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with parent: read(SYNC_USERMAP_ACK)"); - if (s != SYNC_USERMAP_ACK) - bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s); - /* Switching is only necessary if we joined namespaces. */ - if (config.namespaces) { - if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) - bail("failed to set process as dumpable"); - } - - /* Become root in the namespace proper. */ - if (setresuid(0, 0, 0) < 0) - bail("failed to become root in user namespace"); - } - /* - * Unshare all of the namespaces. Now, it should be noted that this - * ordering might break in the future (especially with rootless - * containers). But for now, it's not possible to split this into - * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues. - * - * Note that we don't merge this with clone() because there were - * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) - * was broken, so we'll just do it the long way anyway. - */ - if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0) - bail("failed to unshare namespaces"); - - /* - * TODO: What about non-namespace clone flags that we're dropping here? - * - * We fork again because of PID namespace, setns(2) or unshare(2) don't - * change the PID namespace of the calling process, because doing so - * would change the caller's idea of its own PID (as reported by getpid()), - * which would break many applications and libraries, so we must fork - * to actually enter the new PID namespace. - */ - child = clone_parent(&env, JUMP_INIT); - if (child < 0) - bail("unable to fork: init_func"); - - /* Send the child to our parent, which knows what it's doing. */ - s = SYNC_RECVPID_PLS; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - kill(child, SIGKILL); - bail("failed to sync with parent: write(SYNC_RECVPID_PLS)"); - } - if (write(syncfd, &child, sizeof(child)) != sizeof(child)) { - kill(child, SIGKILL); - bail("failed to sync with parent: write(childpid)"); - } - - /* ... wait for parent to get the pid ... */ - - if (read(syncfd, &s, sizeof(s)) != sizeof(s)) { - kill(child, SIGKILL); - bail("failed to sync with parent: read(SYNC_RECVPID_ACK)"); - } - if (s != SYNC_RECVPID_ACK) { - kill(child, SIGKILL); - bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s); - } - - s = SYNC_CHILD_READY; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - kill(child, SIGKILL); - bail("failed to sync with parent: write(SYNC_CHILD_READY)"); - } - - /* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */ - exit(0); - } - - /* - * Stage 2: We're the final child process, and the only process that will - * actually return to the Go runtime. Our job is to just do the - * final cleanup steps and then return to the Go runtime to allow - * init_linux.go to run. - */ - case JUMP_INIT:{ - /* - * We're inside the child now, having jumped from the - * start_child() code after forking in the parent. - */ - enum sync_t s; - - /* We're in a child and thus need to tell the parent if we die. */ - syncfd = sync_grandchild_pipe[0]; - close(sync_grandchild_pipe[1]); - close(sync_child_pipe[0]); - close(sync_child_pipe[1]); - - /* For debugging. */ - prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, 0); - - if (read(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with parent: read(SYNC_GRANDCHILD)"); - if (s != SYNC_GRANDCHILD) - bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s); - - if (setsid() < 0) - bail("setsid failed"); - - if (setuid(0) < 0) - bail("setuid failed"); - - if (setgid(0) < 0) - bail("setgid failed"); - - if (!config.is_rootless_euid && config.is_setgroup) { - if (setgroups(0, NULL) < 0) - bail("setgroups failed"); - } - - /* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */ - if (config.cloneflags & CLONE_NEWCGROUP) { - uint8_t value; - if (read(pipenum, &value, sizeof(value)) != sizeof(value)) - bail("read synchronisation value failed"); - if (value == CREATECGROUPNS) { - if (unshare(CLONE_NEWCGROUP) < 0) - bail("failed to unshare cgroup namespace"); - } else - bail("received unknown synchronisation value"); - } - - s = SYNC_CHILD_READY; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with patent: write(SYNC_CHILD_READY)"); - - /* Close sync pipes. */ - close(sync_grandchild_pipe[0]); - - /* Free netlink data. */ - nl_free(&config); - - /* Finish executing, let the Go runtime take over. */ - return; - } - default: - bail("unexpected jump value"); - } - - /* Should never be reached. */ - bail("should never be reached"); -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go deleted file mode 100644 index a4ae8901a..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go +++ /dev/null @@ -1,155 +0,0 @@ -// +build linux - -package system - -import ( - "os" - "os/exec" - "syscall" // only for exec - "unsafe" - - "github.com/opencontainers/runc/libcontainer/user" - "golang.org/x/sys/unix" -) - -// If arg2 is nonzero, set the "child subreaper" attribute of the -// calling process; if arg2 is zero, unset the attribute. When a -// process is marked as a child subreaper, all of the children -// that it creates, and their descendants, will be marked as -// having a subreaper. In effect, a subreaper fulfills the role -// of init(1) for its descendant processes. Upon termination of -// a process that is orphaned (i.e., its immediate parent has -// already terminated) and marked as having a subreaper, the -// nearest still living ancestor subreaper will receive a SIGCHLD -// signal and be able to wait(2) on the process to discover its -// termination status. -const PR_SET_CHILD_SUBREAPER = 36 - -type ParentDeathSignal int - -func (p ParentDeathSignal) Restore() error { - if p == 0 { - return nil - } - current, err := GetParentDeathSignal() - if err != nil { - return err - } - if p == current { - return nil - } - return p.Set() -} - -func (p ParentDeathSignal) Set() error { - return SetParentDeathSignal(uintptr(p)) -} - -func Execv(cmd string, args []string, env []string) error { - name, err := exec.LookPath(cmd) - if err != nil { - return err - } - - return syscall.Exec(name, args, env) -} - -func Prlimit(pid, resource int, limit unix.Rlimit) error { - _, _, err := unix.RawSyscall6(unix.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0) - if err != 0 { - return err - } - return nil -} - -func SetParentDeathSignal(sig uintptr) error { - if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil { - return err - } - return nil -} - -func GetParentDeathSignal() (ParentDeathSignal, error) { - var sig int - if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil { - return -1, err - } - return ParentDeathSignal(sig), nil -} - -func SetKeepCaps() error { - if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil { - return err - } - - return nil -} - -func ClearKeepCaps() error { - if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil { - return err - } - - return nil -} - -func Setctty() error { - if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil { - return err - } - return nil -} - -// RunningInUserNS detects whether we are currently running in a user namespace. -// Originally copied from github.com/lxc/lxd/shared/util.go -func RunningInUserNS() bool { - uidmap, err := user.CurrentProcessUIDMap() - if err != nil { - // This kernel-provided file only exists if user namespaces are supported - return false - } - return UIDMapInUserNS(uidmap) -} - -func UIDMapInUserNS(uidmap []user.IDMap) bool { - /* - * We assume we are in the initial user namespace if we have a full - * range - 4294967295 uids starting at uid 0. - */ - if len(uidmap) == 1 && uidmap[0].ID == 0 && uidmap[0].ParentID == 0 && uidmap[0].Count == 4294967295 { - return false - } - return true -} - -// GetParentNSeuid returns the euid within the parent user namespace -func GetParentNSeuid() int64 { - euid := int64(os.Geteuid()) - uidmap, err := user.CurrentProcessUIDMap() - if err != nil { - // This kernel-provided file only exists if user namespaces are supported - return euid - } - for _, um := range uidmap { - if um.ID <= euid && euid <= um.ID+um.Count-1 { - return um.ParentID + euid - um.ID - } - } - return euid -} - -// SetSubreaper sets the value i as the subreaper setting for the calling process -func SetSubreaper(i int) error { - return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0) -} - -// GetSubreaper returns the subreaper setting for the calling process -func GetSubreaper() (int, error) { - var i uintptr - - if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil { - return -1, err - } - - return int(i), nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go b/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go deleted file mode 100644 index 79232a437..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go +++ /dev/null @@ -1,113 +0,0 @@ -package system - -import ( - "fmt" - "io/ioutil" - "path/filepath" - "strconv" - "strings" -) - -// State is the status of a process. -type State rune - -const ( // Only values for Linux 3.14 and later are listed here - Dead State = 'X' - DiskSleep State = 'D' - Running State = 'R' - Sleeping State = 'S' - Stopped State = 'T' - TracingStop State = 't' - Zombie State = 'Z' -) - -// String forms of the state from proc(5)'s documentation for -// /proc/[pid]/status' "State" field. -func (s State) String() string { - switch s { - case Dead: - return "dead" - case DiskSleep: - return "disk sleep" - case Running: - return "running" - case Sleeping: - return "sleeping" - case Stopped: - return "stopped" - case TracingStop: - return "tracing stop" - case Zombie: - return "zombie" - default: - return fmt.Sprintf("unknown (%c)", s) - } -} - -// Stat_t represents the information from /proc/[pid]/stat, as -// described in proc(5) with names based on the /proc/[pid]/status -// fields. -type Stat_t struct { - // PID is the process ID. - PID uint - - // Name is the command run by the process. - Name string - - // State is the state of the process. - State State - - // StartTime is the number of clock ticks after system boot (since - // Linux 2.6). - StartTime uint64 -} - -// Stat returns a Stat_t instance for the specified process. -func Stat(pid int) (stat Stat_t, err error) { - bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) - if err != nil { - return stat, err - } - return parseStat(string(bytes)) -} - -// GetProcessStartTime is deprecated. Use Stat(pid) and -// Stat_t.StartTime instead. -func GetProcessStartTime(pid int) (string, error) { - stat, err := Stat(pid) - if err != nil { - return "", err - } - return fmt.Sprintf("%d", stat.StartTime), nil -} - -func parseStat(data string) (stat Stat_t, err error) { - // From proc(5), field 2 could contain space and is inside `(` and `)`. - // The following is an example: - // 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0 - i := strings.LastIndex(data, ")") - if i <= 2 || i >= len(data)-1 { - return stat, fmt.Errorf("invalid stat data: %q", data) - } - - parts := strings.SplitN(data[:i], "(", 2) - if len(parts) != 2 { - return stat, fmt.Errorf("invalid stat data: %q", data) - } - - stat.Name = parts[1] - _, err = fmt.Sscanf(parts[0], "%d", &stat.PID) - if err != nil { - return stat, err - } - - // parts indexes should be offset by 3 from the field number given - // proc(5), because parts is zero-indexed and we've removed fields - // one (PID) and two (Name) in the paren-split. - parts = strings.Split(data[i+2:], " ") - var state int - fmt.Sscanf(parts[3-3], "%c", &state) - stat.State = State(state) - fmt.Sscanf(parts[22-3], "%d", &stat.StartTime) - return stat, nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_32.go b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_32.go deleted file mode 100644 index c5ca5d862..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_32.go +++ /dev/null @@ -1,26 +0,0 @@ -// +build linux -// +build 386 arm - -package system - -import ( - "golang.org/x/sys/unix" -) - -// Setuid sets the uid of the calling thread to the specified uid. -func Setuid(uid int) (err error) { - _, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0) - if e1 != 0 { - err = e1 - } - return -} - -// Setgid sets the gid of the calling thread to the specified gid. -func Setgid(gid int) (err error) { - _, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0) - if e1 != 0 { - err = e1 - } - return -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go deleted file mode 100644 index 11c3faafb..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go +++ /dev/null @@ -1,26 +0,0 @@ -// +build linux -// +build arm64 amd64 mips mipsle mips64 mips64le ppc ppc64 ppc64le s390x - -package system - -import ( - "golang.org/x/sys/unix" -) - -// Setuid sets the uid of the calling thread to the specified uid. -func Setuid(uid int) (err error) { - _, _, e1 := unix.RawSyscall(unix.SYS_SETUID, uintptr(uid), 0, 0) - if e1 != 0 { - err = e1 - } - return -} - -// Setgid sets the gid of the calling thread to the specified gid. -func Setgid(gid int) (err error) { - _, _, e1 := unix.RawSyscall(unix.SYS_SETGID, uintptr(gid), 0, 0) - if e1 != 0 { - err = e1 - } - return -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go b/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go deleted file mode 100644 index b8434f105..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go +++ /dev/null @@ -1,12 +0,0 @@ -// +build cgo,linux - -package system - -/* -#include <unistd.h> -*/ -import "C" - -func GetClockTicks() int { - return int(C.sysconf(C._SC_CLK_TCK)) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go b/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go deleted file mode 100644 index d93b5d5fd..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go +++ /dev/null @@ -1,15 +0,0 @@ -// +build !cgo windows - -package system - -func GetClockTicks() int { - // TODO figure out a better alternative for platforms where we're missing cgo - // - // TODO Windows. This could be implemented using Win32 QueryPerformanceFrequency(). - // https://msdn.microsoft.com/en-us/library/windows/desktop/ms644905(v=vs.85).aspx - // - // An example of its usage can be found here. - // https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408(v=vs.85).aspx - - return 100 -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go deleted file mode 100644 index b94be74a6..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go +++ /dev/null @@ -1,27 +0,0 @@ -// +build !linux - -package system - -import ( - "os" - - "github.com/opencontainers/runc/libcontainer/user" -) - -// RunningInUserNS is a stub for non-Linux systems -// Always returns false -func RunningInUserNS() bool { - return false -} - -// UIDMapInUserNS is a stub for non-Linux systems -// Always returns false -func UIDMapInUserNS(uidmap []user.IDMap) bool { - return false -} - -// GetParentNSeuid returns the euid within the parent user namespace -// Always returns os.Geteuid on non-linux -func GetParentNSeuid() int { - return os.Geteuid() -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go deleted file mode 100644 index a6823fc99..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go +++ /dev/null @@ -1,35 +0,0 @@ -package system - -import "golang.org/x/sys/unix" - -// Returns a []byte slice if the xattr is set and nil otherwise -// Requires path and its attribute as arguments -func Lgetxattr(path string, attr string) ([]byte, error) { - var sz int - // Start with a 128 length byte array - dest := make([]byte, 128) - sz, errno := unix.Lgetxattr(path, attr, dest) - - switch { - case errno == unix.ENODATA: - return nil, errno - case errno == unix.ENOTSUP: - return nil, errno - case errno == unix.ERANGE: - // 128 byte array might just not be good enough, - // A dummy buffer is used to get the real size - // of the xattrs on disk - sz, errno = unix.Lgetxattr(path, attr, []byte{}) - if errno != nil { - return nil, errno - } - dest = make([]byte, sz) - sz, errno = unix.Lgetxattr(path, attr, dest) - if errno != nil { - return nil, errno - } - case errno != nil: - return nil, errno - } - return dest[:sz], nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go deleted file mode 100644 index 6fd8dd0d4..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go +++ /dev/null @@ -1,41 +0,0 @@ -package user - -import ( - "errors" -) - -var ( - // The current operating system does not provide the required data for user lookups. - ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data") - // No matching entries found in file. - ErrNoPasswdEntries = errors.New("no matching entries in passwd file") - ErrNoGroupEntries = errors.New("no matching entries in group file") -) - -// LookupUser looks up a user by their username in /etc/passwd. If the user -// cannot be found (or there is no /etc/passwd file on the filesystem), then -// LookupUser returns an error. -func LookupUser(username string) (User, error) { - return lookupUser(username) -} - -// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot -// be found (or there is no /etc/passwd file on the filesystem), then LookupId -// returns an error. -func LookupUid(uid int) (User, error) { - return lookupUid(uid) -} - -// LookupGroup looks up a group by its name in /etc/group. If the group cannot -// be found (or there is no /etc/group file on the filesystem), then LookupGroup -// returns an error. -func LookupGroup(groupname string) (Group, error) { - return lookupGroup(groupname) -} - -// LookupGid looks up a group by its group id in /etc/group. If the group cannot -// be found (or there is no /etc/group file on the filesystem), then LookupGid -// returns an error. -func LookupGid(gid int) (Group, error) { - return lookupGid(gid) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go deleted file mode 100644 index 92b5ae8de..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go +++ /dev/null @@ -1,144 +0,0 @@ -// +build darwin dragonfly freebsd linux netbsd openbsd solaris - -package user - -import ( - "io" - "os" - "strconv" - - "golang.org/x/sys/unix" -) - -// Unix-specific path to the passwd and group formatted files. -const ( - unixPasswdPath = "/etc/passwd" - unixGroupPath = "/etc/group" -) - -func lookupUser(username string) (User, error) { - return lookupUserFunc(func(u User) bool { - return u.Name == username - }) -} - -func lookupUid(uid int) (User, error) { - return lookupUserFunc(func(u User) bool { - return u.Uid == uid - }) -} - -func lookupUserFunc(filter func(u User) bool) (User, error) { - // Get operating system-specific passwd reader-closer. - passwd, err := GetPasswd() - if err != nil { - return User{}, err - } - defer passwd.Close() - - // Get the users. - users, err := ParsePasswdFilter(passwd, filter) - if err != nil { - return User{}, err - } - - // No user entries found. - if len(users) == 0 { - return User{}, ErrNoPasswdEntries - } - - // Assume the first entry is the "correct" one. - return users[0], nil -} - -func lookupGroup(groupname string) (Group, error) { - return lookupGroupFunc(func(g Group) bool { - return g.Name == groupname - }) -} - -func lookupGid(gid int) (Group, error) { - return lookupGroupFunc(func(g Group) bool { - return g.Gid == gid - }) -} - -func lookupGroupFunc(filter func(g Group) bool) (Group, error) { - // Get operating system-specific group reader-closer. - group, err := GetGroup() - if err != nil { - return Group{}, err - } - defer group.Close() - - // Get the users. - groups, err := ParseGroupFilter(group, filter) - if err != nil { - return Group{}, err - } - - // No user entries found. - if len(groups) == 0 { - return Group{}, ErrNoGroupEntries - } - - // Assume the first entry is the "correct" one. - return groups[0], nil -} - -func GetPasswdPath() (string, error) { - return unixPasswdPath, nil -} - -func GetPasswd() (io.ReadCloser, error) { - return os.Open(unixPasswdPath) -} - -func GetGroupPath() (string, error) { - return unixGroupPath, nil -} - -func GetGroup() (io.ReadCloser, error) { - return os.Open(unixGroupPath) -} - -// CurrentUser looks up the current user by their user id in /etc/passwd. If the -// user cannot be found (or there is no /etc/passwd file on the filesystem), -// then CurrentUser returns an error. -func CurrentUser() (User, error) { - return LookupUid(unix.Getuid()) -} - -// CurrentGroup looks up the current user's group by their primary group id's -// entry in /etc/passwd. If the group cannot be found (or there is no -// /etc/group file on the filesystem), then CurrentGroup returns an error. -func CurrentGroup() (Group, error) { - return LookupGid(unix.Getgid()) -} - -func currentUserSubIDs(fileName string) ([]SubID, error) { - u, err := CurrentUser() - if err != nil { - return nil, err - } - filter := func(entry SubID) bool { - return entry.Name == u.Name || entry.Name == strconv.Itoa(u.Uid) - } - return ParseSubIDFileFilter(fileName, filter) -} - -func CurrentUserSubUIDs() ([]SubID, error) { - return currentUserSubIDs("/etc/subuid") -} - -func CurrentUserSubGIDs() ([]SubID, error) { - return currentUserSubIDs("/etc/subgid") -} - -func CurrentProcessUIDMap() ([]IDMap, error) { - return ParseIDMapFile("/proc/self/uid_map") -} - -func CurrentProcessGIDMap() ([]IDMap, error) { - return ParseIDMapFile("/proc/self/gid_map") -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go deleted file mode 100644 index 65cd40e92..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go +++ /dev/null @@ -1,40 +0,0 @@ -// +build windows - -package user - -import ( - "fmt" - "os/user" -) - -func lookupUser(username string) (User, error) { - u, err := user.Lookup(username) - if err != nil { - return User{}, err - } - return userFromOS(u) -} - -func lookupUid(uid int) (User, error) { - u, err := user.LookupId(fmt.Sprintf("%d", uid)) - if err != nil { - return User{}, err - } - return userFromOS(u) -} - -func lookupGroup(groupname string) (Group, error) { - g, err := user.LookupGroup(groupname) - if err != nil { - return Group{}, err - } - return groupFromOS(g) -} - -func lookupGid(gid int) (Group, error) { - g, err := user.LookupGroupId(fmt.Sprintf("%d", gid)) - if err != nil { - return Group{}, err - } - return groupFromOS(g) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go deleted file mode 100644 index 7b912bbf8..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go +++ /dev/null @@ -1,608 +0,0 @@ -package user - -import ( - "bufio" - "fmt" - "io" - "os" - "os/user" - "strconv" - "strings" -) - -const ( - minId = 0 - maxId = 1<<31 - 1 //for 32-bit systems compatibility -) - -var ( - ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minId, maxId) -) - -type User struct { - Name string - Pass string - Uid int - Gid int - Gecos string - Home string - Shell string -} - -// userFromOS converts an os/user.(*User) to local User -// -// (This does not include Pass, Shell or Gecos) -func userFromOS(u *user.User) (User, error) { - newUser := User{ - Name: u.Username, - Home: u.HomeDir, - } - id, err := strconv.Atoi(u.Uid) - if err != nil { - return newUser, err - } - newUser.Uid = id - - id, err = strconv.Atoi(u.Gid) - if err != nil { - return newUser, err - } - newUser.Gid = id - return newUser, nil -} - -type Group struct { - Name string - Pass string - Gid int - List []string -} - -// groupFromOS converts an os/user.(*Group) to local Group -// -// (This does not include Pass, Shell or Gecos) -func groupFromOS(g *user.Group) (Group, error) { - newGroup := Group{ - Name: g.Name, - } - - id, err := strconv.Atoi(g.Gid) - if err != nil { - return newGroup, err - } - newGroup.Gid = id - - return newGroup, nil -} - -// SubID represents an entry in /etc/sub{u,g}id -type SubID struct { - Name string - SubID int64 - Count int64 -} - -// IDMap represents an entry in /proc/PID/{u,g}id_map -type IDMap struct { - ID int64 - ParentID int64 - Count int64 -} - -func parseLine(line string, v ...interface{}) { - parseParts(strings.Split(line, ":"), v...) -} - -func parseParts(parts []string, v ...interface{}) { - if len(parts) == 0 { - return - } - - for i, p := range parts { - // Ignore cases where we don't have enough fields to populate the arguments. - // Some configuration files like to misbehave. - if len(v) <= i { - break - } - - // Use the type of the argument to figure out how to parse it, scanf() style. - // This is legit. - switch e := v[i].(type) { - case *string: - *e = p - case *int: - // "numbers", with conversion errors ignored because of some misbehaving configuration files. - *e, _ = strconv.Atoi(p) - case *int64: - *e, _ = strconv.ParseInt(p, 10, 64) - case *[]string: - // Comma-separated lists. - if p != "" { - *e = strings.Split(p, ",") - } else { - *e = []string{} - } - default: - // Someone goof'd when writing code using this function. Scream so they can hear us. - panic(fmt.Sprintf("parseLine only accepts {*string, *int, *int64, *[]string} as arguments! %#v is not a pointer!", e)) - } - } -} - -func ParsePasswdFile(path string) ([]User, error) { - passwd, err := os.Open(path) - if err != nil { - return nil, err - } - defer passwd.Close() - return ParsePasswd(passwd) -} - -func ParsePasswd(passwd io.Reader) ([]User, error) { - return ParsePasswdFilter(passwd, nil) -} - -func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) { - passwd, err := os.Open(path) - if err != nil { - return nil, err - } - defer passwd.Close() - return ParsePasswdFilter(passwd, filter) -} - -func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) { - if r == nil { - return nil, fmt.Errorf("nil source for passwd-formatted data") - } - - var ( - s = bufio.NewScanner(r) - out = []User{} - ) - - for s.Scan() { - if err := s.Err(); err != nil { - return nil, err - } - - line := strings.TrimSpace(s.Text()) - if line == "" { - continue - } - - // see: man 5 passwd - // name:password:UID:GID:GECOS:directory:shell - // Name:Pass:Uid:Gid:Gecos:Home:Shell - // root:x:0:0:root:/root:/bin/bash - // adm:x:3:4:adm:/var/adm:/bin/false - p := User{} - parseLine(line, &p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell) - - if filter == nil || filter(p) { - out = append(out, p) - } - } - - return out, nil -} - -func ParseGroupFile(path string) ([]Group, error) { - group, err := os.Open(path) - if err != nil { - return nil, err - } - - defer group.Close() - return ParseGroup(group) -} - -func ParseGroup(group io.Reader) ([]Group, error) { - return ParseGroupFilter(group, nil) -} - -func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) { - group, err := os.Open(path) - if err != nil { - return nil, err - } - defer group.Close() - return ParseGroupFilter(group, filter) -} - -func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) { - if r == nil { - return nil, fmt.Errorf("nil source for group-formatted data") - } - - var ( - s = bufio.NewScanner(r) - out = []Group{} - ) - - for s.Scan() { - if err := s.Err(); err != nil { - return nil, err - } - - text := s.Text() - if text == "" { - continue - } - - // see: man 5 group - // group_name:password:GID:user_list - // Name:Pass:Gid:List - // root:x:0:root - // adm:x:4:root,adm,daemon - p := Group{} - parseLine(text, &p.Name, &p.Pass, &p.Gid, &p.List) - - if filter == nil || filter(p) { - out = append(out, p) - } - } - - return out, nil -} - -type ExecUser struct { - Uid int - Gid int - Sgids []int - Home string -} - -// GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the -// given file paths and uses that data as the arguments to GetExecUser. If the -// files cannot be opened for any reason, the error is ignored and a nil -// io.Reader is passed instead. -func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) { - var passwd, group io.Reader - - if passwdFile, err := os.Open(passwdPath); err == nil { - passwd = passwdFile - defer passwdFile.Close() - } - - if groupFile, err := os.Open(groupPath); err == nil { - group = groupFile - defer groupFile.Close() - } - - return GetExecUser(userSpec, defaults, passwd, group) -} - -// GetExecUser parses a user specification string (using the passwd and group -// readers as sources for /etc/passwd and /etc/group data, respectively). In -// the case of blank fields or missing data from the sources, the values in -// defaults is used. -// -// GetExecUser will return an error if a user or group literal could not be -// found in any entry in passwd and group respectively. -// -// Examples of valid user specifications are: -// * "" -// * "user" -// * "uid" -// * "user:group" -// * "uid:gid -// * "user:gid" -// * "uid:group" -// -// It should be noted that if you specify a numeric user or group id, they will -// not be evaluated as usernames (only the metadata will be filled). So attempting -// to parse a user with user.Name = "1337" will produce the user with a UID of -// 1337. -func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) { - if defaults == nil { - defaults = new(ExecUser) - } - - // Copy over defaults. - user := &ExecUser{ - Uid: defaults.Uid, - Gid: defaults.Gid, - Sgids: defaults.Sgids, - Home: defaults.Home, - } - - // Sgids slice *cannot* be nil. - if user.Sgids == nil { - user.Sgids = []int{} - } - - // Allow for userArg to have either "user" syntax, or optionally "user:group" syntax - var userArg, groupArg string - parseLine(userSpec, &userArg, &groupArg) - - // Convert userArg and groupArg to be numeric, so we don't have to execute - // Atoi *twice* for each iteration over lines. - uidArg, uidErr := strconv.Atoi(userArg) - gidArg, gidErr := strconv.Atoi(groupArg) - - // Find the matching user. - users, err := ParsePasswdFilter(passwd, func(u User) bool { - if userArg == "" { - // Default to current state of the user. - return u.Uid == user.Uid - } - - if uidErr == nil { - // If the userArg is numeric, always treat it as a UID. - return uidArg == u.Uid - } - - return u.Name == userArg - }) - - // If we can't find the user, we have to bail. - if err != nil && passwd != nil { - if userArg == "" { - userArg = strconv.Itoa(user.Uid) - } - return nil, fmt.Errorf("unable to find user %s: %v", userArg, err) - } - - var matchedUserName string - if len(users) > 0 { - // First match wins, even if there's more than one matching entry. - matchedUserName = users[0].Name - user.Uid = users[0].Uid - user.Gid = users[0].Gid - user.Home = users[0].Home - } else if userArg != "" { - // If we can't find a user with the given username, the only other valid - // option is if it's a numeric username with no associated entry in passwd. - - if uidErr != nil { - // Not numeric. - return nil, fmt.Errorf("unable to find user %s: %v", userArg, ErrNoPasswdEntries) - } - user.Uid = uidArg - - // Must be inside valid uid range. - if user.Uid < minId || user.Uid > maxId { - return nil, ErrRange - } - - // Okay, so it's numeric. We can just roll with this. - } - - // On to the groups. If we matched a username, we need to do this because of - // the supplementary group IDs. - if groupArg != "" || matchedUserName != "" { - groups, err := ParseGroupFilter(group, func(g Group) bool { - // If the group argument isn't explicit, we'll just search for it. - if groupArg == "" { - // Check if user is a member of this group. - for _, u := range g.List { - if u == matchedUserName { - return true - } - } - return false - } - - if gidErr == nil { - // If the groupArg is numeric, always treat it as a GID. - return gidArg == g.Gid - } - - return g.Name == groupArg - }) - if err != nil && group != nil { - return nil, fmt.Errorf("unable to find groups for spec %v: %v", matchedUserName, err) - } - - // Only start modifying user.Gid if it is in explicit form. - if groupArg != "" { - if len(groups) > 0 { - // First match wins, even if there's more than one matching entry. - user.Gid = groups[0].Gid - } else { - // If we can't find a group with the given name, the only other valid - // option is if it's a numeric group name with no associated entry in group. - - if gidErr != nil { - // Not numeric. - return nil, fmt.Errorf("unable to find group %s: %v", groupArg, ErrNoGroupEntries) - } - user.Gid = gidArg - - // Must be inside valid gid range. - if user.Gid < minId || user.Gid > maxId { - return nil, ErrRange - } - - // Okay, so it's numeric. We can just roll with this. - } - } else if len(groups) > 0 { - // Supplementary group ids only make sense if in the implicit form. - user.Sgids = make([]int, len(groups)) - for i, group := range groups { - user.Sgids[i] = group.Gid - } - } - } - - return user, nil -} - -// GetAdditionalGroups looks up a list of groups by name or group id -// against the given /etc/group formatted data. If a group name cannot -// be found, an error will be returned. If a group id cannot be found, -// or the given group data is nil, the id will be returned as-is -// provided it is in the legal range. -func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, error) { - var groups = []Group{} - if group != nil { - var err error - groups, err = ParseGroupFilter(group, func(g Group) bool { - for _, ag := range additionalGroups { - if g.Name == ag || strconv.Itoa(g.Gid) == ag { - return true - } - } - return false - }) - if err != nil { - return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err) - } - } - - gidMap := make(map[int]struct{}) - for _, ag := range additionalGroups { - var found bool - for _, g := range groups { - // if we found a matched group either by name or gid, take the - // first matched as correct - if g.Name == ag || strconv.Itoa(g.Gid) == ag { - if _, ok := gidMap[g.Gid]; !ok { - gidMap[g.Gid] = struct{}{} - found = true - break - } - } - } - // we asked for a group but didn't find it. let's check to see - // if we wanted a numeric group - if !found { - gid, err := strconv.Atoi(ag) - if err != nil { - return nil, fmt.Errorf("Unable to find group %s", ag) - } - // Ensure gid is inside gid range. - if gid < minId || gid > maxId { - return nil, ErrRange - } - gidMap[gid] = struct{}{} - } - } - gids := []int{} - for gid := range gidMap { - gids = append(gids, gid) - } - return gids, nil -} - -// GetAdditionalGroupsPath is a wrapper around GetAdditionalGroups -// that opens the groupPath given and gives it as an argument to -// GetAdditionalGroups. -func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) { - var group io.Reader - - if groupFile, err := os.Open(groupPath); err == nil { - group = groupFile - defer groupFile.Close() - } - return GetAdditionalGroups(additionalGroups, group) -} - -func ParseSubIDFile(path string) ([]SubID, error) { - subid, err := os.Open(path) - if err != nil { - return nil, err - } - defer subid.Close() - return ParseSubID(subid) -} - -func ParseSubID(subid io.Reader) ([]SubID, error) { - return ParseSubIDFilter(subid, nil) -} - -func ParseSubIDFileFilter(path string, filter func(SubID) bool) ([]SubID, error) { - subid, err := os.Open(path) - if err != nil { - return nil, err - } - defer subid.Close() - return ParseSubIDFilter(subid, filter) -} - -func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) { - if r == nil { - return nil, fmt.Errorf("nil source for subid-formatted data") - } - - var ( - s = bufio.NewScanner(r) - out = []SubID{} - ) - - for s.Scan() { - if err := s.Err(); err != nil { - return nil, err - } - - line := strings.TrimSpace(s.Text()) - if line == "" { - continue - } - - // see: man 5 subuid - p := SubID{} - parseLine(line, &p.Name, &p.SubID, &p.Count) - - if filter == nil || filter(p) { - out = append(out, p) - } - } - - return out, nil -} - -func ParseIDMapFile(path string) ([]IDMap, error) { - r, err := os.Open(path) - if err != nil { - return nil, err - } - defer r.Close() - return ParseIDMap(r) -} - -func ParseIDMap(r io.Reader) ([]IDMap, error) { - return ParseIDMapFilter(r, nil) -} - -func ParseIDMapFileFilter(path string, filter func(IDMap) bool) ([]IDMap, error) { - r, err := os.Open(path) - if err != nil { - return nil, err - } - defer r.Close() - return ParseIDMapFilter(r, filter) -} - -func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) { - if r == nil { - return nil, fmt.Errorf("nil source for idmap-formatted data") - } - - var ( - s = bufio.NewScanner(r) - out = []IDMap{} - ) - - for s.Scan() { - if err := s.Err(); err != nil { - return nil, err - } - - line := strings.TrimSpace(s.Text()) - if line == "" { - continue - } - - // see: man 7 user_namespaces - p := IDMap{} - parseParts(strings.Fields(line), &p.ID, &p.ParentID, &p.Count) - - if filter == nil || filter(p) { - out = append(out, p) - } - } - - return out, nil -} diff --git a/vendor/github.com/opencontainers/runc/vendor.conf b/vendor/github.com/opencontainers/runc/vendor.conf deleted file mode 100644 index 22cba0f1b..000000000 --- a/vendor/github.com/opencontainers/runc/vendor.conf +++ /dev/null @@ -1,26 +0,0 @@ -# OCI runtime-spec. When updating this, make sure you use a version tag rather -# than a commit ID so it's much more obvious what version of the spec we are -# using. -github.com/opencontainers/runtime-spec 29686dbc5559d93fb1ef402eeda3e35c38d75af4 -# Core libcontainer functionality. -github.com/checkpoint-restore/go-criu v3.11 -github.com/mrunalp/fileutils ed869b029674c0e9ce4c0dfa781405c2d9946d08 -github.com/opencontainers/selinux v1.2.2 -github.com/seccomp/libseccomp-golang 84e90a91acea0f4e51e62bc1a75de18b1fc0790f -github.com/sirupsen/logrus a3f95b5c423586578a4e099b11a46c2479628cac -github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16 -github.com/vishvananda/netlink 1e2e08e8a2dcdacaae3f14ac44c5cfa31361f270 -# systemd integration. -github.com/coreos/go-systemd v14 -github.com/coreos/pkg v3 -github.com/godbus/dbus v3 -github.com/golang/protobuf 18c9bb3261723cd5401db4d0c9fbc5c3b6c70fe8 -# Command-line interface. -github.com/cyphar/filepath-securejoin v0.2.1 -github.com/docker/go-units v0.2.0 -github.com/urfave/cli d53eb991652b1d438abdd34ce4bfa3ef1539108e -golang.org/x/sys 41f3e6584952bb034a481797859f6ab34b6803bd https://github.com/golang/sys - -# console dependencies -github.com/containerd/console 2748ece16665b45a47f884001d5831ec79703880 -github.com/pkg/errors v0.8.0 |