From 7f8172edf583e0d26bee5e06578a442c7507ba6f Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 5 May 2020 18:06:46 -0700 Subject: Restructure shim packages for merging. --- .gitignore | 1 - .travis.yml | 29 - CODEOWNERS | 1 - CONTRIBUTING.md | 28 - LICENSE | 202 ------- Makefile | 42 -- README.md | 26 - cmd/containerd-shim-runsc-v1/main.go | 24 - cmd/gvisor-containerd-shim/config.go | 42 -- cmd/gvisor-containerd-shim/main.go | 320 ---------- docs/README.md | 9 - docs/configure-containerd-shim-runsc-v1.md | 72 --- docs/configure-gvisor-containerd-shim.md | 42 -- docs/runtime-handler-quickstart.md | 251 -------- docs/runtime-handler-shim-v2-quickstart.md | 232 -------- docs/untrusted-workload-quickstart.md | 212 ------- go.mod | 35 -- go.sum | 67 --- pkg/go-runsc/runsc.go | 513 ---------------- pkg/go-runsc/utils.go | 46 -- pkg/shim/runsc/runsc.go | 511 ++++++++++++++++ pkg/shim/runsc/utils.go | 44 ++ pkg/shim/v1/proc/deleted_state.go | 49 ++ pkg/shim/v1/proc/exec.go | 282 +++++++++ pkg/shim/v1/proc/exec_state.go | 154 +++++ pkg/shim/v1/proc/init.go | 462 ++++++++++++++ pkg/shim/v1/proc/init_state.go | 182 ++++++ pkg/shim/v1/proc/io.go | 167 ++++++ pkg/shim/v1/proc/process.go | 37 ++ pkg/shim/v1/proc/types.go | 70 +++ pkg/shim/v1/proc/utils.go | 92 +++ pkg/shim/v1/shim/platform.go | 110 ++++ pkg/shim/v1/shim/service.go | 579 ++++++++++++++++++ pkg/shim/v1/utils/utils.go | 57 ++ pkg/shim/v1/utils/volumes.go | 156 +++++ pkg/shim/v1/utils/volumes_test.go | 309 ++++++++++ pkg/shim/v2/epoll.go | 128 ++++ pkg/shim/v2/options/options.go | 33 + pkg/shim/v2/service.go | 821 +++++++++++++++++++++++++ pkg/shim/v2/service_linux.go | 112 ++++ pkg/v1/proc/deleted_state.go | 54 -- pkg/v1/proc/exec.go | 284 --------- pkg/v1/proc/exec_state.go | 156 ----- pkg/v1/proc/init.go | 464 --------------- pkg/v1/proc/init_state.go | 184 ------ pkg/v1/proc/io.go | 169 ------ pkg/v1/proc/process.go | 39 -- pkg/v1/proc/types.go | 72 --- pkg/v1/proc/utils.go | 94 --- pkg/v1/shim/platform.go | 112 ---- pkg/v1/shim/service.go | 581 ------------------ pkg/v1/utils/utils.go | 59 -- pkg/v1/utils/volumes.go | 158 ----- pkg/v1/utils/volumes_test.go | 311 ---------- pkg/v2/epoll.go | 129 ---- pkg/v2/options/options.go | 34 -- pkg/v2/service.go | 826 -------------------------- pkg/v2/service_linux.go | 111 ---- shim/README.md | 16 + shim/configure-containerd-shim-runsc-v1.md | 72 +++ shim/configure-gvisor-containerd-shim.md | 42 ++ shim/runtime-handler-quickstart.md | 251 ++++++++ shim/runtime-handler-shim-v2-quickstart.md | 232 ++++++++ shim/untrusted-workload-quickstart.md | 212 +++++++ shim/v1/main.go | 26 + shim/v2/config.go | 40 ++ shim/v2/main.go | 318 ++++++++++ test/e2e/containerd-install.sh | 44 -- test/e2e/crictl-install.sh | 17 - test/e2e/run-container.sh | 30 - test/e2e/runsc-install.sh | 8 - test/e2e/runtime-handler-shim-v2/install.sh | 21 - test/e2e/runtime-handler-shim-v2/test.sh | 34 -- test/e2e/runtime-handler-shim-v2/validate.sh | 7 - test/e2e/runtime-handler/install.sh | 24 - test/e2e/runtime-handler/test.sh | 33 - test/e2e/runtime-handler/usage.sh | 30 - test/e2e/runtimeclass-install.sh | 33 - test/e2e/shim-install.sh | 28 - test/e2e/untrusted-workload/install.sh | 27 - test/e2e/untrusted-workload/test.sh | 33 - test/e2e/untrusted-workload/usage.sh | 33 - test/e2e/validate.sh | 17 - test/shim/containerd-install.sh | 44 ++ test/shim/crictl-install.sh | 17 + test/shim/run-container.sh | 30 + test/shim/runsc-install.sh | 8 + test/shim/runtime-handler-shim-v2/install.sh | 21 + test/shim/runtime-handler-shim-v2/test.sh | 34 ++ test/shim/runtime-handler-shim-v2/validate.sh | 7 + test/shim/runtime-handler/install.sh | 24 + test/shim/runtime-handler/test.sh | 33 + test/shim/runtime-handler/usage.sh | 30 + test/shim/runtimeclass-install.sh | 33 + test/shim/shim-install.sh | 28 + test/shim/untrusted-workload/install.sh | 27 + test/shim/untrusted-workload/test.sh | 33 + test/shim/untrusted-workload/usage.sh | 33 + test/shim/validate.sh | 17 + 99 files changed, 5983 insertions(+), 6450 deletions(-) delete mode 100644 .gitignore delete mode 100644 .travis.yml delete mode 100644 CODEOWNERS delete mode 100644 CONTRIBUTING.md delete mode 100644 LICENSE delete mode 100644 Makefile delete mode 100644 README.md delete mode 100644 cmd/containerd-shim-runsc-v1/main.go delete mode 100644 cmd/gvisor-containerd-shim/config.go delete mode 100644 cmd/gvisor-containerd-shim/main.go delete mode 100644 docs/README.md delete mode 100644 docs/configure-containerd-shim-runsc-v1.md delete mode 100644 docs/configure-gvisor-containerd-shim.md delete mode 100644 docs/runtime-handler-quickstart.md delete mode 100644 docs/runtime-handler-shim-v2-quickstart.md delete mode 100644 docs/untrusted-workload-quickstart.md delete mode 100644 go.mod delete mode 100644 go.sum delete mode 100644 pkg/go-runsc/runsc.go delete mode 100644 pkg/go-runsc/utils.go create mode 100644 pkg/shim/runsc/runsc.go create mode 100644 pkg/shim/runsc/utils.go create mode 100644 pkg/shim/v1/proc/deleted_state.go create mode 100644 pkg/shim/v1/proc/exec.go create mode 100644 pkg/shim/v1/proc/exec_state.go create mode 100644 pkg/shim/v1/proc/init.go create mode 100644 pkg/shim/v1/proc/init_state.go create mode 100644 pkg/shim/v1/proc/io.go create mode 100644 pkg/shim/v1/proc/process.go create mode 100644 pkg/shim/v1/proc/types.go create mode 100644 pkg/shim/v1/proc/utils.go create mode 100644 pkg/shim/v1/shim/platform.go create mode 100644 pkg/shim/v1/shim/service.go create mode 100644 pkg/shim/v1/utils/utils.go create mode 100644 pkg/shim/v1/utils/volumes.go create mode 100644 pkg/shim/v1/utils/volumes_test.go create mode 100644 pkg/shim/v2/epoll.go create mode 100644 pkg/shim/v2/options/options.go create mode 100644 pkg/shim/v2/service.go create mode 100644 pkg/shim/v2/service_linux.go delete mode 100644 pkg/v1/proc/deleted_state.go delete mode 100644 pkg/v1/proc/exec.go delete mode 100644 pkg/v1/proc/exec_state.go delete mode 100644 pkg/v1/proc/init.go delete mode 100644 pkg/v1/proc/init_state.go delete mode 100644 pkg/v1/proc/io.go delete mode 100644 pkg/v1/proc/process.go delete mode 100644 pkg/v1/proc/types.go delete mode 100644 pkg/v1/proc/utils.go delete mode 100644 pkg/v1/shim/platform.go delete mode 100644 pkg/v1/shim/service.go delete mode 100644 pkg/v1/utils/utils.go delete mode 100644 pkg/v1/utils/volumes.go delete mode 100644 pkg/v1/utils/volumes_test.go delete mode 100644 pkg/v2/epoll.go delete mode 100644 pkg/v2/options/options.go delete mode 100644 pkg/v2/service.go delete mode 100644 pkg/v2/service_linux.go create mode 100644 shim/README.md create mode 100644 shim/configure-containerd-shim-runsc-v1.md create mode 100644 shim/configure-gvisor-containerd-shim.md create mode 100644 shim/runtime-handler-quickstart.md create mode 100644 shim/runtime-handler-shim-v2-quickstart.md create mode 100644 shim/untrusted-workload-quickstart.md create mode 100644 shim/v1/main.go create mode 100644 shim/v2/config.go create mode 100644 shim/v2/main.go delete mode 100755 test/e2e/containerd-install.sh delete mode 100755 test/e2e/crictl-install.sh delete mode 100755 test/e2e/run-container.sh delete mode 100755 test/e2e/runsc-install.sh delete mode 100755 test/e2e/runtime-handler-shim-v2/install.sh delete mode 100755 test/e2e/runtime-handler-shim-v2/test.sh delete mode 100755 test/e2e/runtime-handler-shim-v2/validate.sh delete mode 100755 test/e2e/runtime-handler/install.sh delete mode 100755 test/e2e/runtime-handler/test.sh delete mode 100755 test/e2e/runtime-handler/usage.sh delete mode 100755 test/e2e/runtimeclass-install.sh delete mode 100755 test/e2e/shim-install.sh delete mode 100755 test/e2e/untrusted-workload/install.sh delete mode 100755 test/e2e/untrusted-workload/test.sh delete mode 100755 test/e2e/untrusted-workload/usage.sh delete mode 100755 test/e2e/validate.sh create mode 100755 test/shim/containerd-install.sh create mode 100755 test/shim/crictl-install.sh create mode 100755 test/shim/run-container.sh create mode 100755 test/shim/runsc-install.sh create mode 100755 test/shim/runtime-handler-shim-v2/install.sh create mode 100755 test/shim/runtime-handler-shim-v2/test.sh create mode 100755 test/shim/runtime-handler-shim-v2/validate.sh create mode 100755 test/shim/runtime-handler/install.sh create mode 100755 test/shim/runtime-handler/test.sh create mode 100755 test/shim/runtime-handler/usage.sh create mode 100755 test/shim/runtimeclass-install.sh create mode 100755 test/shim/shim-install.sh create mode 100755 test/shim/untrusted-workload/install.sh create mode 100755 test/shim/untrusted-workload/test.sh create mode 100755 test/shim/untrusted-workload/usage.sh create mode 100755 test/shim/validate.sh diff --git a/.gitignore b/.gitignore deleted file mode 100644 index ae3c17260..000000000 --- a/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/bin/ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d9ef5e33f..000000000 --- a/.travis.yml +++ /dev/null @@ -1,29 +0,0 @@ -dist: bionic -sudo: required -language: go -go: "1.14.x" - -env: - - CONTAINERD_VERSION=1.1.8 RUNSC_VERSION=release/20200219.0 TEST=untrusted-workload - - CONTAINERD_VERSION=1.2.13 RUNSC_VERSION=release/20200219.0 TEST=untrusted-workload - - CONTAINERD_VERSION=1.2.13 RUNSC_VERSION=release/20200219.0 TEST=runtime-handler - - CONTAINERD_VERSION=1.2.13 RUNSC_VERSION=release/20200219.0 TEST=runtime-handler-shim-v2 - - CONTAINERD_VERSION=1.3.3 RUNSC_VERSION=release/20200219.0 TEST=runtime-handler - - CONTAINERD_VERSION=1.3.3 RUNSC_VERSION=release/20200219.0 TEST=runtime-handler-shim-v2 - -go_import_path: github.com/google/gvisor-containerd-shim - -addons: - apt: - packages: - - socat - - conntrack - - ipset - - libseccomp-dev - -before_install: - - uname -r - -script: - - make test - - ./test/e2e/${TEST}/test.sh diff --git a/CODEOWNERS b/CODEOWNERS deleted file mode 100644 index 7bf84e6dd..000000000 --- a/CODEOWNERS +++ /dev/null @@ -1 +0,0 @@ -* @google/gvisor diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 939e5341e..000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,28 +0,0 @@ -# How to Contribute - -We'd love to accept your patches and contributions to this project. There are -just a few small guidelines you need to follow. - -## Contributor License Agreement - -Contributions to this project must be accompanied by a Contributor License -Agreement. You (or your employer) retain the copyright to your contribution; -this simply gives us permission to use and redistribute your contributions as -part of the project. Head over to to see -your current agreements on file or to sign a new one. - -You generally only need to submit a CLA once, so if you've already submitted one -(even if it was for a different project), you probably don't need to do it -again. - -## Code reviews - -All submissions, including submissions by project members, require review. We -use GitHub pull requests for this purpose. Consult -[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more -information on using pull requests. - -## Community Guidelines - -This project follows [Google's Open Source Community -Guidelines](https://opensource.google.com/conduct/). diff --git a/LICENSE b/LICENSE deleted file mode 100644 index d64569567..000000000 --- a/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/Makefile b/Makefile deleted file mode 100644 index abfadc1b5..000000000 --- a/Makefile +++ /dev/null @@ -1,42 +0,0 @@ -# Base path used to install. -DESTDIR=/usr/local -GC=go -GO_BUILD_FLAGS= -GO_TAGS= -GO_LDFLAGS=-ldflags '-s -w -extldflags "-static"' -SOURCES=$(shell find cmd/ pkg/ -name '*.go') -DEPLOY_PATH=cri-containerd-staging/gvisor-containerd-shim -VERSION=$(shell git rev-parse HEAD) -GO_MODULE=on - -all: binaries - -binaries: bin/gvisor-containerd-shim bin/containerd-shim-runsc-v1 - -bin/gvisor-containerd-shim: $(SOURCES) - GO111MODULE=${GO_MODULE} CGO_ENABLED=0 ${GC} build ${GO_BUILD_FLAGS} -o bin/gvisor-containerd-shim ${SHIM_GO_LDFLAGS} ${GO_TAGS} ./cmd/gvisor-containerd-shim - -bin/containerd-shim-runsc-v1: $(SOURCES) - GO111MODULE=${GO_MODULE} CGO_ENABLED=0 ${GC} build ${GO_BUILD_FLAGS} -o bin/containerd-shim-runsc-v1 ${SHIM_GO_LDFLAGS} ${GO_TAGS} ./cmd/containerd-shim-runsc-v1 - -install: bin/gvisor-containerd-shim - mkdir -p $(DESTDIR)/bin - install bin/gvisor-containerd-shim $(DESTDIR)/bin - install bin/containerd-shim-runsc-v1 $(DESTDIR)/bin - -uninstall: - rm -f $(DESTDIR)/bin/gvisor-containerd-shim - rm -f $(DESTDIR)/bin/containerd-shim-runsc-v1 - -clean: - rm -rf bin/* - -push: binaries - gsutil cp ./bin/gvisor-containerd-shim gs://$(DEPLOY_PATH)/gvisor-containerd-shim-$(VERSION) - gsutil cp ./bin/containerd-shim-runsc-v1 gs://$(DEPLOY_PATH)/containerd-shim-runsc-v1-$(VERSION) - echo "$(VERSION)" | gsutil cp - "gs://$(DEPLOY_PATH)/latest" - -.PHONY: test - -test: - GO111MODULE=${GO_MODULE} CGO_ENABLED=0 ${GC} test -v ./pkg/... ${SHIM_GO_LDFLAGS} ${GO_TAGS} diff --git a/README.md b/README.md deleted file mode 100644 index 40a474c4f..000000000 --- a/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# gvisor-containerd-shim - -[![Build Status](https://travis-ci.org/google/gvisor-containerd-shim.svg?branch=master)](https://travis-ci.org/google/gvisor-containerd-shim) -[![Go Report Card](https://goreportcard.com/badge/github.com/google/gvisor-containerd-shim)](https://goreportcard.com/report/github.com/google/gvisor-containerd-shim) - -gvisor-containerd-shim is a containerd shim for [gVisor](https://github.com/google/gvisor/). It implements the containerd v1 shim API. It can be used as a drop-in replacement for [containerd-shim](https://github.com/containerd/containerd/tree/master/cmd/containerd-shim) (though containerd-shim must still be installed). It allows the use of both gVisor (runsc) and normal containers in the same containerd installation by deferring to the runc shim if the desired runtime engine is not runsc. - -## Requirements - -- gvisor (runsc) >= 2018-12-07 -- containerd, containerd-shim >= 1.1 - -## Installation - -- [Untrusted Workload Quick Start (containerd >=1.1)](docs/untrusted-workload-quickstart.md) -- [Runtime Handler/RuntimeClass Quick Start (containerd >=1.2)](docs/runtime-handler-quickstart.md) -- [Runtime Handler/RuntimeClass Quick Start (shim v2) (containerd >=1.2)](docs/runtime-handler-shim-v2-quickstart.md) - -## Configuration - -- [Configure containerd-shim-runsc-v1 (shim v2) (containerd >= 1.3)](docs/configure-containerd-shim-runsc-v1.md) -- [Configure gvisor-containerd-shim (shim v1) (containerd <= 1.2)](docs/configure-gvisor-containerd-shim.md) - -# Contributing - -See [CONTRIBUTING.md](CONTRIBUTING.md). diff --git a/cmd/containerd-shim-runsc-v1/main.go b/cmd/containerd-shim-runsc-v1/main.go deleted file mode 100644 index ec73edbd2..000000000 --- a/cmd/containerd-shim-runsc-v1/main.go +++ /dev/null @@ -1,24 +0,0 @@ -/* - Copyright The containerd Authors. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package main - -import ( - "github.com/containerd/containerd/runtime/v2/shim" - - runsc "github.com/google/gvisor-containerd-shim/pkg/v2" -) - -func main() { - shim.Run("io.containerd.runsc.v1", runsc.New) -} diff --git a/cmd/gvisor-containerd-shim/config.go b/cmd/gvisor-containerd-shim/config.go deleted file mode 100644 index f03690304..000000000 --- a/cmd/gvisor-containerd-shim/config.go +++ /dev/null @@ -1,42 +0,0 @@ -/* -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import "github.com/BurntSushi/toml" - -// config is the configuration for gvisor containerd shim. -type config struct { - // RuncShim is the shim binary path for standard containerd-shim for runc. - // When the runtime is `runc`, gvisor containerd shim will exec current - // process to standard containerd-shim. This is a work around for containerd - // 1.1. In containerd 1.2, containerd will choose different containerd-shims - // based on runtime. - RuncShim string `toml:"runc_shim"` - // RunscConfig is configuration for runsc. The key value will be converted - // to runsc flags --key=value directly. - RunscConfig map[string]string `toml:"runsc_config"` -} - -// loadConfig load gvisor containerd shim config from config file. -func loadConfig(path string) (*config, error) { - var c config - _, err := toml.DecodeFile(path, &c) - if err != nil { - return &c, err - } - return &c, nil -} diff --git a/cmd/gvisor-containerd-shim/main.go b/cmd/gvisor-containerd-shim/main.go deleted file mode 100644 index ea26aa105..000000000 --- a/cmd/gvisor-containerd-shim/main.go +++ /dev/null @@ -1,320 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import ( - "bytes" - "context" - "flag" - "fmt" - "net" - "os" - "os/exec" - "os/signal" - "path/filepath" - "runtime" - "runtime/debug" - "strings" - "sync" - "syscall" - "time" - - "github.com/containerd/containerd/events" - "github.com/containerd/containerd/namespaces" - "github.com/containerd/containerd/runtime/v1/linux/proc" - containerdshim "github.com/containerd/containerd/runtime/v1/shim" - shimapi "github.com/containerd/containerd/runtime/v1/shim/v1" - "github.com/containerd/ttrpc" - "github.com/containerd/typeurl" - ptypes "github.com/gogo/protobuf/types" - "github.com/opencontainers/runc/libcontainer/system" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" - - runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" - "github.com/google/gvisor-containerd-shim/pkg/v1/shim" -) - -var ( - debugFlag bool - namespaceFlag string - socketFlag string - addressFlag string - workdirFlag string - runtimeRootFlag string - containerdBinaryFlag string - shimConfigFlag string -) - -// ShimConfigPath is the default shim config file path. -const ShimConfigPath = "/etc/containerd/gvisor-containerd-shim.toml" - -func init() { - flag.BoolVar(&debugFlag, "debug", false, "enable debug output in logs") - flag.StringVar(&namespaceFlag, "namespace", "", "namespace that owns the shim") - flag.StringVar(&socketFlag, "socket", "", "abstract socket path to serve") - flag.StringVar(&addressFlag, "address", "", "grpc address back to main containerd") - flag.StringVar(&workdirFlag, "workdir", "", "path used to storge large temporary data") - // Containerd default to runc, unless another runtime is explicitly specified. - // We keep the same default to make the default behavior consistent. - flag.StringVar(&runtimeRootFlag, "runtime-root", proc.RuncRoot, "root directory for the runtime") - // currently, the `containerd publish` utility is embedded in the daemon binary. - // The daemon invokes `containerd-shim -containerd-binary ...` with its own os.Executable() path. - flag.StringVar(&containerdBinaryFlag, "containerd-binary", "containerd", "path to containerd binary (used for `containerd publish`)") - flag.StringVar(&shimConfigFlag, "config", ShimConfigPath, "path to the shim configuration file") - flag.Parse() -} - -func main() { - // This is a hack. Exec current process to run standard containerd-shim - // if runtime root is not `runsc`. We don't need this for shim v2 api. - if filepath.Base(runtimeRootFlag) != "runsc" { - if err := executeRuncShim(); err != nil { - fmt.Fprintf(os.Stderr, "gvisor-containerd-shim: %s\n", err) - os.Exit(1) - } - } - - debug.SetGCPercent(40) - go func() { - for range time.Tick(30 * time.Second) { - debug.FreeOSMemory() - } - }() - - if debugFlag { - logrus.SetLevel(logrus.DebugLevel) - } - - if os.Getenv("GOMAXPROCS") == "" { - // If GOMAXPROCS hasn't been set, we default to a value of 2 to reduce - // the number of Go stacks present in the shim. - runtime.GOMAXPROCS(2) - } - - // Run regular shim if needed. - if err := executeShim(); err != nil { - fmt.Fprintf(os.Stderr, "gvisor-containerd-shim: %s\n", err) - os.Exit(1) - } -} - -// executeRuncShim execs current process to a containerd-shim process and -// retains all flags and envs. -func executeRuncShim() error { - c, err := loadConfig(shimConfigFlag) - if err != nil && !os.IsNotExist(err) { - return errors.Wrap(err, "failed to load shim config") - } - shimPath := c.RuncShim - if shimPath == "" { - shimPath, err = exec.LookPath("containerd-shim") - if err != nil { - return errors.Wrapf(err, "lookup containerd-shim") - } - } - - args := append([]string{shimPath}, os.Args[1:]...) - if err := syscall.Exec(shimPath, args, os.Environ()); err != nil { - return errors.Wrapf(err, "exec containerd-shim %q", shimPath) - } - return nil -} - -func executeShim() error { - // start handling signals as soon as possible so that things are properly reaped - // or if runtime exits before we hit the handler - signals, err := setupSignals() - if err != nil { - return err - } - dump := make(chan os.Signal, 32) - signal.Notify(dump, syscall.SIGUSR1) - - path, err := os.Getwd() - if err != nil { - return err - } - server, err := ttrpc.NewServer(ttrpc.WithServerHandshaker(ttrpc.UnixSocketRequireSameUser())) - if err != nil { - return errors.Wrap(err, "failed creating server") - } - c, err := loadConfig(shimConfigFlag) - if err != nil && !os.IsNotExist(err) { - return errors.Wrap(err, "failed to load shim config") - } - sv, err := shim.NewService( - shim.Config{ - Path: path, - Namespace: namespaceFlag, - WorkDir: workdirFlag, - RuntimeRoot: runtimeRootFlag, - RunscConfig: c.RunscConfig, - }, - &remoteEventsPublisher{address: addressFlag}, - ) - if err != nil { - return err - } - logrus.Debug("registering ttrpc server") - shimapi.RegisterShimService(server, sv) - - socket := socketFlag - if err := serve(server, socket); err != nil { - return err - } - logger := logrus.WithFields(logrus.Fields{ - "pid": os.Getpid(), - "path": path, - "namespace": namespaceFlag, - }) - go func() { - for range dump { - dumpStacks(logger) - } - }() - return handleSignals(logger, signals, server, sv) -} - -// serve serves the ttrpc API over a unix socket at the provided path -// this function does not block -func serve(server *ttrpc.Server, path string) error { - var ( - l net.Listener - err error - ) - if path == "" { - l, err = net.FileListener(os.NewFile(3, "socket")) - path = "[inherited from parent]" - } else { - if len(path) > 106 { - return errors.Errorf("%q: unix socket path too long (> 106)", path) - } - l, err = net.Listen("unix", "\x00"+path) - } - if err != nil { - return err - } - logrus.WithField("socket", path).Debug("serving api on unix socket") - go func() { - defer l.Close() - if err := server.Serve(context.Background(), l); err != nil && - !strings.Contains(err.Error(), "use of closed network connection") { - logrus.WithError(err).Fatal("gvisor-containerd-shim: ttrpc server failure") - } - }() - return nil -} - -// setupSignals creates a new signal handler for all signals and sets the shim as a -// sub-reaper so that the container processes are reparented -func setupSignals() (chan os.Signal, error) { - signals := make(chan os.Signal, 32) - signal.Notify(signals, unix.SIGTERM, unix.SIGINT, unix.SIGCHLD, unix.SIGPIPE) - // make sure runc is setup to use the monitor - // for waiting on processes - // TODO(random-liu): Move shim/reaper.go to a separate package. - runsc.Monitor = containerdshim.Default - // set the shim as the subreaper for all orphaned processes created by the container - if err := system.SetSubreaper(1); err != nil { - return nil, err - } - return signals, nil -} - -func handleSignals(logger *logrus.Entry, signals chan os.Signal, server *ttrpc.Server, sv *shim.Service) error { - var ( - termOnce sync.Once - done = make(chan struct{}) - ) - - for { - select { - case <-done: - return nil - case s := <-signals: - switch s { - case unix.SIGCHLD: - if err := containerdshim.Reap(); err != nil { - logger.WithError(err).Error("reap exit status") - } - case unix.SIGTERM, unix.SIGINT: - go termOnce.Do(func() { - ctx := context.TODO() - if err := server.Shutdown(ctx); err != nil { - logger.WithError(err).Error("failed to shutdown server") - } - // Ensure our child is dead if any - sv.Kill(ctx, &shimapi.KillRequest{ - Signal: uint32(syscall.SIGKILL), - All: true, - }) - sv.Delete(context.Background(), &ptypes.Empty{}) - close(done) - }) - case unix.SIGPIPE: - } - } - } -} - -func dumpStacks(logger *logrus.Entry) { - var ( - buf []byte - stackSize int - ) - bufferLen := 16384 - for stackSize == len(buf) { - buf = make([]byte, bufferLen) - stackSize = runtime.Stack(buf, true) - bufferLen *= 2 - } - buf = buf[:stackSize] - logger.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf) -} - -type remoteEventsPublisher struct { - address string -} - -func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event events.Event) error { - ns, _ := namespaces.Namespace(ctx) - encoded, err := typeurl.MarshalAny(event) - if err != nil { - return err - } - data, err := encoded.Marshal() - if err != nil { - return err - } - cmd := exec.CommandContext(ctx, containerdBinaryFlag, "--address", l.address, "publish", "--topic", topic, "--namespace", ns) - cmd.Stdin = bytes.NewReader(data) - c, err := containerdshim.Default.Start(cmd) - if err != nil { - return err - } - status, err := containerdshim.Default.Wait(cmd, c) - if err != nil { - return err - } - if status != 0 { - return errors.New("failed to publish event") - } - return nil -} diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index f1091f934..000000000 --- a/docs/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# gvisor-containerd-shim docs - -Everything you need to know about gvisor-containerd-shim - -- [Untrusted Workload Quick Start (containerd >=1.1)](untrusted-workload-quickstart.md) -- [Runtime Handler Quick Start (containerd >=1.2)](runtime-handler-quickstart.md) -- [Runtime Handler Quick Start (shim v2) (containerd >=1.2)](runtime-handler-shim-v2-quickstart.md) -- [Configure containerd-shim-runsc-v1 (shim v2) (containerd >= 1.3)](configure-containerd-shim-runsc-v1.md) -- [Configure gvisor-containerd-shim (shim v1) (containerd <= 1.2)](configure-gvisor-containerd-shim.md) diff --git a/docs/configure-containerd-shim-runsc-v1.md b/docs/configure-containerd-shim-runsc-v1.md deleted file mode 100644 index 977ceacbd..000000000 --- a/docs/configure-containerd-shim-runsc-v1.md +++ /dev/null @@ -1,72 +0,0 @@ -# Configure containerd-shim-runsc-v1 (Shim V2) - -This document describes how to configure runtime options for -`containerd-shim-runsc-v1`. This is follows on to the instructions of -[Runtime Handler Quick Start (shim v2) (containerd >=1.2)](runtime-handler-shim-v2-quickstart.md) -and requires containerd 1.3 or later. - -### Update `/etc/containerd/config.toml` to point to a configuration file for `containerd-shim-runsc-v1`. - -`containerd-shim-runsc-v1` supports a few different configuration options based -on the version of containerd that is used. For versions >= 1.3, it supports a -configurable config path in the containerd runtime configuration. - -```shell -{ # Step 1: Update runtime options for runsc in containerd config.toml -cat < -[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 1\(dev\)/ /^}/) -```shell -{ # Step 1(dev): Build and install gvisor-containerd-shim and containerd-shim-runsc-v1 - make - sudo make install -} -``` - -### Configure containerd - -1. Update `/etc/containerd/config.toml`. Make sure `containerd-shim-runsc-v1` is - in `${PATH}`. - -[embedmd]:# (../test/e2e/runtime-handler-shim-v2/install.sh shell /{ # Step 1/ /^}/) -```shell -{ # Step 1: Create containerd config.toml -cat <: -// -func (r *Runsc) runOrError(cmd *exec.Cmd) error { - if cmd.Stdout != nil || cmd.Stderr != nil { - ec, err := Monitor.Start(cmd) - if err != nil { - return err - } - status, err := Monitor.Wait(cmd, ec) - if err == nil && status != 0 { - err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) - } - return err - } - data, err := cmdOutput(cmd, true) - if err != nil { - return fmt.Errorf("%s: %s", err, data) - } - return nil -} - -func (r *Runsc) command(context context.Context, args ...string) *exec.Cmd { - command := r.Command - if command == "" { - command = DefaultCommand - } - cmd := exec.CommandContext(context, command, append(r.args(), args...)...) - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: r.Setpgid, - } - if r.PdeathSignal != 0 { - cmd.SysProcAttr.Pdeathsig = r.PdeathSignal - } - - return cmd -} - -func cmdOutput(cmd *exec.Cmd, combined bool) ([]byte, error) { - b := getBuf() - defer putBuf(b) - - cmd.Stdout = b - if combined { - cmd.Stderr = b - } - ec, err := Monitor.Start(cmd) - if err != nil { - return nil, err - } - - status, err := Monitor.Wait(cmd, ec) - if err == nil && status != 0 { - err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) - } - - return b.Bytes(), err -} diff --git a/pkg/go-runsc/utils.go b/pkg/go-runsc/utils.go deleted file mode 100644 index e4c3c937a..000000000 --- a/pkg/go-runsc/utils.go +++ /dev/null @@ -1,46 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package runsc - -import ( - "bytes" - "strings" - "sync" -) - -var bytesBufferPool = sync.Pool{ - New: func() interface{} { - return bytes.NewBuffer(nil) - }, -} - -func getBuf() *bytes.Buffer { - return bytesBufferPool.Get().(*bytes.Buffer) -} - -func putBuf(b *bytes.Buffer) { - b.Reset() - bytesBufferPool.Put(b) -} - -// FormatLogPath parses runsc config, and fill in %ID% in the log path. -func FormatLogPath(id string, config map[string]string) { - if path, ok := config["debug-log"]; ok { - config["debug-log"] = strings.Replace(path, "%ID%", id, -1) - } -} diff --git a/pkg/shim/runsc/runsc.go b/pkg/shim/runsc/runsc.go new file mode 100644 index 000000000..41ba9c3af --- /dev/null +++ b/pkg/shim/runsc/runsc.go @@ -0,0 +1,511 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package runsc + +import ( + "context" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "strconv" + "syscall" + "time" + + runc "github.com/containerd/go-runc" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +var Monitor runc.ProcessMonitor = runc.Monitor + +// DefaultCommand is the default command for Runsc +const DefaultCommand = "runsc" + +// Runsc is the client to the runsc cli +type Runsc struct { + Command string + PdeathSignal syscall.Signal + Setpgid bool + Root string + Log string + LogFormat runc.Format + Config map[string]string +} + +// List returns all containers created inside the provided runsc root directory +func (r *Runsc) List(context context.Context) ([]*runc.Container, error) { + data, err := cmdOutput(r.command(context, "list", "--format=json"), false) + if err != nil { + return nil, err + } + var out []*runc.Container + if err := json.Unmarshal(data, &out); err != nil { + return nil, err + } + return out, nil +} + +// State returns the state for the container provided by id +func (r *Runsc) State(context context.Context, id string) (*runc.Container, error) { + data, err := cmdOutput(r.command(context, "state", id), true) + if err != nil { + return nil, fmt.Errorf("%s: %s", err, data) + } + var c runc.Container + if err := json.Unmarshal(data, &c); err != nil { + return nil, err + } + return &c, nil +} + +type CreateOpts struct { + runc.IO + // PidFile is a path to where a pid file should be created + PidFile string + ConsoleSocket runc.ConsoleSocket + // UserLog is a path to where runsc user log should be generated. + UserLog string +} + +func (o *CreateOpts) args() (out []string, err error) { + if o.PidFile != "" { + abs, err := filepath.Abs(o.PidFile) + if err != nil { + return nil, err + } + out = append(out, "--pid-file", abs) + } + if o.ConsoleSocket != nil { + out = append(out, "--console-socket", o.ConsoleSocket.Path()) + } + if o.UserLog != "" { + out = append(out, "--user-log", o.UserLog) + } + return out, nil +} + +// Create creates a new container and returns its pid if it was created successfully +func (r *Runsc) Create(context context.Context, id, bundle string, opts *CreateOpts) error { + args := []string{"create", "--bundle", bundle} + if opts != nil { + oargs, err := opts.args() + if err != nil { + return err + } + args = append(args, oargs...) + } + cmd := r.command(context, append(args, id)...) + if opts != nil && opts.IO != nil { + opts.Set(cmd) + } + + if cmd.Stdout == nil && cmd.Stderr == nil { + data, err := cmdOutput(cmd, true) + if err != nil { + return fmt.Errorf("%s: %s", err, data) + } + return nil + } + ec, err := Monitor.Start(cmd) + if err != nil { + return err + } + if opts != nil && opts.IO != nil { + if c, ok := opts.IO.(runc.StartCloser); ok { + if err := c.CloseAfterStart(); err != nil { + return err + } + } + } + status, err := Monitor.Wait(cmd, ec) + if err == nil && status != 0 { + err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) + } + + return err +} + +// Start will start an already created container +func (r *Runsc) Start(context context.Context, id string, cio runc.IO) error { + cmd := r.command(context, "start", id) + if cio != nil { + cio.Set(cmd) + } + + if cmd.Stdout == nil && cmd.Stderr == nil { + data, err := cmdOutput(cmd, true) + if err != nil { + return fmt.Errorf("%s: %s", err, data) + } + return nil + } + + ec, err := Monitor.Start(cmd) + if err != nil { + return err + } + if cio != nil { + if c, ok := cio.(runc.StartCloser); ok { + if err := c.CloseAfterStart(); err != nil { + return err + } + } + } + status, err := Monitor.Wait(cmd, ec) + if err == nil && status != 0 { + err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) + } + + return err +} + +type waitResult struct { + ID string `json:"id"` + ExitStatus int `json:"exitStatus"` +} + +// Wait will wait for a running container, and return its exit status. +// TODO(random-liu): Add exec process support. +func (r *Runsc) Wait(context context.Context, id string) (int, error) { + data, err := cmdOutput(r.command(context, "wait", id), true) + if err != nil { + return 0, fmt.Errorf("%s: %s", err, data) + } + var res waitResult + if err := json.Unmarshal(data, &res); err != nil { + return 0, err + } + return res.ExitStatus, nil +} + +type ExecOpts struct { + runc.IO + PidFile string + InternalPidFile string + ConsoleSocket runc.ConsoleSocket + Detach bool +} + +func (o *ExecOpts) args() (out []string, err error) { + if o.ConsoleSocket != nil { + out = append(out, "--console-socket", o.ConsoleSocket.Path()) + } + if o.Detach { + out = append(out, "--detach") + } + if o.PidFile != "" { + abs, err := filepath.Abs(o.PidFile) + if err != nil { + return nil, err + } + out = append(out, "--pid-file", abs) + } + if o.InternalPidFile != "" { + abs, err := filepath.Abs(o.InternalPidFile) + if err != nil { + return nil, err + } + out = append(out, "--internal-pid-file", abs) + } + return out, nil +} + +// Exec executres and additional process inside the container based on a full +// OCI Process specification +func (r *Runsc) Exec(context context.Context, id string, spec specs.Process, opts *ExecOpts) error { + f, err := ioutil.TempFile(os.Getenv("XDG_RUNTIME_DIR"), "runsc-process") + if err != nil { + return err + } + defer os.Remove(f.Name()) + err = json.NewEncoder(f).Encode(spec) + f.Close() + if err != nil { + return err + } + args := []string{"exec", "--process", f.Name()} + if opts != nil { + oargs, err := opts.args() + if err != nil { + return err + } + args = append(args, oargs...) + } + cmd := r.command(context, append(args, id)...) + if opts != nil && opts.IO != nil { + opts.Set(cmd) + } + if cmd.Stdout == nil && cmd.Stderr == nil { + data, err := cmdOutput(cmd, true) + if err != nil { + return fmt.Errorf("%s: %s", err, data) + } + return nil + } + ec, err := Monitor.Start(cmd) + if err != nil { + return err + } + if opts != nil && opts.IO != nil { + if c, ok := opts.IO.(runc.StartCloser); ok { + if err := c.CloseAfterStart(); err != nil { + return err + } + } + } + status, err := Monitor.Wait(cmd, ec) + if err == nil && status != 0 { + err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) + } + return err +} + +// Run runs the create, start, delete lifecycle of the container +// and returns its exit status after it has exited +func (r *Runsc) Run(context context.Context, id, bundle string, opts *CreateOpts) (int, error) { + args := []string{"run", "--bundle", bundle} + if opts != nil { + oargs, err := opts.args() + if err != nil { + return -1, err + } + args = append(args, oargs...) + } + cmd := r.command(context, append(args, id)...) + if opts != nil && opts.IO != nil { + opts.Set(cmd) + } + ec, err := Monitor.Start(cmd) + if err != nil { + return -1, err + } + return Monitor.Wait(cmd, ec) +} + +type DeleteOpts struct { + Force bool +} + +func (o *DeleteOpts) args() (out []string) { + if o.Force { + out = append(out, "--force") + } + return out +} + +// Delete deletes the container +func (r *Runsc) Delete(context context.Context, id string, opts *DeleteOpts) error { + args := []string{"delete"} + if opts != nil { + args = append(args, opts.args()...) + } + return r.runOrError(r.command(context, append(args, id)...)) +} + +// KillOpts specifies options for killing a container and its processes +type KillOpts struct { + All bool + Pid int +} + +func (o *KillOpts) args() (out []string) { + if o.All { + out = append(out, "--all") + } + if o.Pid != 0 { + out = append(out, "--pid", strconv.Itoa(o.Pid)) + } + return out +} + +// Kill sends the specified signal to the container +func (r *Runsc) Kill(context context.Context, id string, sig int, opts *KillOpts) error { + args := []string{ + "kill", + } + if opts != nil { + args = append(args, opts.args()...) + } + return r.runOrError(r.command(context, append(args, id, strconv.Itoa(sig))...)) +} + +// Stats return the stats for a container like cpu, memory, and io +func (r *Runsc) Stats(context context.Context, id string) (*runc.Stats, error) { + cmd := r.command(context, "events", "--stats", id) + rd, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + ec, err := Monitor.Start(cmd) + if err != nil { + return nil, err + } + defer func() { + rd.Close() + Monitor.Wait(cmd, ec) + }() + var e runc.Event + if err := json.NewDecoder(rd).Decode(&e); err != nil { + return nil, err + } + return e.Stats, nil +} + +// Events returns an event stream from runsc for a container with stats and OOM notifications +func (r *Runsc) Events(context context.Context, id string, interval time.Duration) (chan *runc.Event, error) { + cmd := r.command(context, "events", fmt.Sprintf("--interval=%ds", int(interval.Seconds())), id) + rd, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + ec, err := Monitor.Start(cmd) + if err != nil { + rd.Close() + return nil, err + } + var ( + dec = json.NewDecoder(rd) + c = make(chan *runc.Event, 128) + ) + go func() { + defer func() { + close(c) + rd.Close() + Monitor.Wait(cmd, ec) + }() + for { + var e runc.Event + if err := dec.Decode(&e); err != nil { + if err == io.EOF { + return + } + e = runc.Event{ + Type: "error", + Err: err, + } + } + c <- &e + } + }() + return c, nil +} + +// Ps lists all the processes inside the container returning their pids +func (r *Runsc) Ps(context context.Context, id string) ([]int, error) { + data, err := cmdOutput(r.command(context, "ps", "--format", "json", id), true) + if err != nil { + return nil, fmt.Errorf("%s: %s", err, data) + } + var pids []int + if err := json.Unmarshal(data, &pids); err != nil { + return nil, err + } + return pids, nil +} + +// Top lists all the processes inside the container returning the full ps data +func (r *Runsc) Top(context context.Context, id string) (*runc.TopResults, error) { + data, err := cmdOutput(r.command(context, "ps", "--format", "table", id), true) + if err != nil { + return nil, fmt.Errorf("%s: %s", err, data) + } + + topResults, err := runc.ParsePSOutput(data) + if err != nil { + return nil, fmt.Errorf("%s: ", err) + } + return topResults, nil +} + +func (r *Runsc) args() []string { + var args []string + if r.Root != "" { + args = append(args, fmt.Sprintf("--root=%s", r.Root)) + } + if r.Log != "" { + args = append(args, fmt.Sprintf("--log=%s", r.Log)) + } + if r.LogFormat != "" { + args = append(args, fmt.Sprintf("--log-format=%s", r.LogFormat)) + } + for k, v := range r.Config { + args = append(args, fmt.Sprintf("--%s=%s", k, v)) + } + return args +} + +// runOrError will run the provided command. If an error is +// encountered and neither Stdout or Stderr was set the error and the +// stderr of the command will be returned in the format of : +// +func (r *Runsc) runOrError(cmd *exec.Cmd) error { + if cmd.Stdout != nil || cmd.Stderr != nil { + ec, err := Monitor.Start(cmd) + if err != nil { + return err + } + status, err := Monitor.Wait(cmd, ec) + if err == nil && status != 0 { + err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) + } + return err + } + data, err := cmdOutput(cmd, true) + if err != nil { + return fmt.Errorf("%s: %s", err, data) + } + return nil +} + +func (r *Runsc) command(context context.Context, args ...string) *exec.Cmd { + command := r.Command + if command == "" { + command = DefaultCommand + } + cmd := exec.CommandContext(context, command, append(r.args(), args...)...) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: r.Setpgid, + } + if r.PdeathSignal != 0 { + cmd.SysProcAttr.Pdeathsig = r.PdeathSignal + } + + return cmd +} + +func cmdOutput(cmd *exec.Cmd, combined bool) ([]byte, error) { + b := getBuf() + defer putBuf(b) + + cmd.Stdout = b + if combined { + cmd.Stderr = b + } + ec, err := Monitor.Start(cmd) + if err != nil { + return nil, err + } + + status, err := Monitor.Wait(cmd, ec) + if err == nil && status != 0 { + err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) + } + + return b.Bytes(), err +} diff --git a/pkg/shim/runsc/utils.go b/pkg/shim/runsc/utils.go new file mode 100644 index 000000000..2732d5e98 --- /dev/null +++ b/pkg/shim/runsc/utils.go @@ -0,0 +1,44 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package runsc + +import ( + "bytes" + "strings" + "sync" +) + +var bytesBufferPool = sync.Pool{ + New: func() interface{} { + return bytes.NewBuffer(nil) + }, +} + +func getBuf() *bytes.Buffer { + return bytesBufferPool.Get().(*bytes.Buffer) +} + +func putBuf(b *bytes.Buffer) { + b.Reset() + bytesBufferPool.Put(b) +} + +// FormatLogPath parses runsc config, and fill in %ID% in the log path. +func FormatLogPath(id string, config map[string]string) { + if path, ok := config["debug-log"]; ok { + config["debug-log"] = strings.Replace(path, "%ID%", id, -1) + } +} diff --git a/pkg/shim/v1/proc/deleted_state.go b/pkg/shim/v1/proc/deleted_state.go new file mode 100644 index 000000000..0196c96dd --- /dev/null +++ b/pkg/shim/v1/proc/deleted_state.go @@ -0,0 +1,49 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + + "github.com/containerd/console" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/runtime/proc" + "github.com/pkg/errors" +) + +type deletedState struct{} + +func (*deletedState) Resize(ws console.WinSize) error { + return errors.Errorf("cannot resize a deleted process") +} + +func (*deletedState) Start(ctx context.Context) error { + return errors.Errorf("cannot start a deleted process") +} + +func (*deletedState) Delete(ctx context.Context) error { + return errors.Wrap(errdefs.ErrNotFound, "cannot delete a deleted process") +} + +func (*deletedState) Kill(ctx context.Context, sig uint32, all bool) error { + return errors.Wrap(errdefs.ErrNotFound, "cannot kill a deleted process") +} + +func (*deletedState) SetExited(status int) {} + +func (*deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { + return nil, errors.Errorf("cannot exec in a deleted state") +} diff --git a/pkg/shim/v1/proc/exec.go b/pkg/shim/v1/proc/exec.go new file mode 100644 index 000000000..17199960e --- /dev/null +++ b/pkg/shim/v1/proc/exec.go @@ -0,0 +1,282 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + "sync" + "syscall" + "time" + + "github.com/containerd/console" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/runtime/proc" + "github.com/containerd/fifo" + runc "github.com/containerd/go-runc" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "golang.org/x/sys/unix" + + runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" +) + +type execProcess struct { + wg sync.WaitGroup + + execState execState + + mu sync.Mutex + id string + console console.Console + io runc.IO + status int + exited time.Time + pid int + internalPid int + closers []io.Closer + stdin io.Closer + stdio proc.Stdio + path string + spec specs.Process + + parent *Init + waitBlock chan struct{} +} + +func (e *execProcess) Wait() { + <-e.waitBlock +} + +func (e *execProcess) ID() string { + return e.id +} + +func (e *execProcess) Pid() int { + e.mu.Lock() + defer e.mu.Unlock() + return e.pid +} + +func (e *execProcess) ExitStatus() int { + e.mu.Lock() + defer e.mu.Unlock() + return e.status +} + +func (e *execProcess) ExitedAt() time.Time { + e.mu.Lock() + defer e.mu.Unlock() + return e.exited +} + +func (e *execProcess) SetExited(status int) { + e.mu.Lock() + defer e.mu.Unlock() + + e.execState.SetExited(status) +} + +func (e *execProcess) setExited(status int) { + e.status = status + e.exited = time.Now() + e.parent.Platform.ShutdownConsole(context.Background(), e.console) + close(e.waitBlock) +} + +func (e *execProcess) Delete(ctx context.Context) error { + e.mu.Lock() + defer e.mu.Unlock() + + return e.execState.Delete(ctx) +} + +func (e *execProcess) delete(ctx context.Context) error { + e.wg.Wait() + if e.io != nil { + for _, c := range e.closers { + c.Close() + } + e.io.Close() + } + pidfile := filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) + // silently ignore error + os.Remove(pidfile) + internalPidfile := filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) + // silently ignore error + os.Remove(internalPidfile) + return nil +} + +func (e *execProcess) Resize(ws console.WinSize) error { + e.mu.Lock() + defer e.mu.Unlock() + + return e.execState.Resize(ws) +} + +func (e *execProcess) resize(ws console.WinSize) error { + if e.console == nil { + return nil + } + return e.console.Resize(ws) +} + +func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error { + e.mu.Lock() + defer e.mu.Unlock() + + return e.execState.Kill(ctx, sig, false) +} + +func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error { + internalPid := e.internalPid + if internalPid != 0 { + if err := e.parent.runtime.Kill(ctx, e.parent.id, int(sig), &runsc.KillOpts{ + Pid: internalPid, + }); err != nil { + // If this returns error, consider the process has already stopped. + // TODO: Fix after signal handling is fixed. + return errors.Wrapf(errdefs.ErrNotFound, err.Error()) + } + } + return nil +} + +func (e *execProcess) Stdin() io.Closer { + return e.stdin +} + +func (e *execProcess) Stdio() proc.Stdio { + return e.stdio +} + +func (e *execProcess) Start(ctx context.Context) error { + e.mu.Lock() + defer e.mu.Unlock() + + return e.execState.Start(ctx) +} + +func (e *execProcess) start(ctx context.Context) (err error) { + var ( + socket *runc.Socket + pidfile = filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) + internalPidfile = filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) + ) + if e.stdio.Terminal { + if socket, err = runc.NewTempConsoleSocket(); err != nil { + return errors.Wrap(err, "failed to create runc console socket") + } + defer socket.Close() + } else if e.stdio.IsNull() { + if e.io, err = runc.NewNullIO(); err != nil { + return errors.Wrap(err, "creating new NULL IO") + } + } else { + if e.io, err = runc.NewPipeIO(e.parent.IoUID, e.parent.IoGID, withConditionalIO(e.stdio)); err != nil { + return errors.Wrap(err, "failed to create runc io pipes") + } + } + opts := &runsc.ExecOpts{ + PidFile: pidfile, + InternalPidFile: internalPidfile, + IO: e.io, + Detach: true, + } + if socket != nil { + opts.ConsoleSocket = socket + } + eventCh := e.parent.Monitor.Subscribe() + defer func() { + // Unsubscribe if an error is returned. + if err != nil { + e.parent.Monitor.Unsubscribe(eventCh) + } + }() + if err := e.parent.runtime.Exec(ctx, e.parent.id, e.spec, opts); err != nil { + close(e.waitBlock) + return e.parent.runtimeError(err, "OCI runtime exec failed") + } + if e.stdio.Stdin != "" { + sc, err := fifo.OpenFifo(context.Background(), e.stdio.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) + if err != nil { + return errors.Wrapf(err, "failed to open stdin fifo %s", e.stdio.Stdin) + } + e.closers = append(e.closers, sc) + e.stdin = sc + } + var copyWaitGroup sync.WaitGroup + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + if socket != nil { + console, err := socket.ReceiveMaster() + if err != nil { + return errors.Wrap(err, "failed to retrieve console master") + } + if e.console, err = e.parent.Platform.CopyConsole(ctx, console, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, ©WaitGroup); err != nil { + return errors.Wrap(err, "failed to start console copy") + } + } else if !e.stdio.IsNull() { + if err := copyPipes(ctx, e.io, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, ©WaitGroup); err != nil { + return errors.Wrap(err, "failed to start io pipe copy") + } + } + copyWaitGroup.Wait() + pid, err := runc.ReadPidFile(opts.PidFile) + if err != nil { + return errors.Wrap(err, "failed to retrieve OCI runtime exec pid") + } + e.pid = pid + internalPid, err := runc.ReadPidFile(opts.InternalPidFile) + if err != nil { + return errors.Wrap(err, "failed to retrieve OCI runtime exec internal pid") + } + e.internalPid = internalPid + go func() { + defer e.parent.Monitor.Unsubscribe(eventCh) + for event := range eventCh { + if event.Pid == e.pid { + ExitCh <- Exit{ + Timestamp: event.Timestamp, + ID: e.id, + Status: event.Status, + } + break + } + } + }() + return nil +} + +func (e *execProcess) Status(ctx context.Context) (string, error) { + e.mu.Lock() + defer e.mu.Unlock() + // if we don't have a pid then the exec process has just been created + if e.pid == 0 { + return "created", nil + } + // if we have a pid and it can be signaled, the process is running + // TODO(random-liu): Use `runsc kill --pid`. + if err := unix.Kill(e.pid, 0); err == nil { + return "running", nil + } + // else if we have a pid but it can nolonger be signaled, it has stopped + return "stopped", nil +} diff --git a/pkg/shim/v1/proc/exec_state.go b/pkg/shim/v1/proc/exec_state.go new file mode 100644 index 000000000..5416cb601 --- /dev/null +++ b/pkg/shim/v1/proc/exec_state.go @@ -0,0 +1,154 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + + "github.com/containerd/console" + "github.com/pkg/errors" +) + +type execState interface { + Resize(console.WinSize) error + Start(context.Context) error + Delete(context.Context) error + Kill(context.Context, uint32, bool) error + SetExited(int) +} + +type execCreatedState struct { + p *execProcess +} + +func (s *execCreatedState) transition(name string) error { + switch name { + case "running": + s.p.execState = &execRunningState{p: s.p} + case "stopped": + s.p.execState = &execStoppedState{p: s.p} + case "deleted": + s.p.execState = &deletedState{} + default: + return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *execCreatedState) Resize(ws console.WinSize) error { + return s.p.resize(ws) +} + +func (s *execCreatedState) Start(ctx context.Context) error { + if err := s.p.start(ctx); err != nil { + return err + } + return s.transition("running") +} + +func (s *execCreatedState) Delete(ctx context.Context) error { + if err := s.p.delete(ctx); err != nil { + return err + } + return s.transition("deleted") +} + +func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *execCreatedState) SetExited(status int) { + s.p.setExited(status) + + if err := s.transition("stopped"); err != nil { + panic(err) + } +} + +type execRunningState struct { + p *execProcess +} + +func (s *execRunningState) transition(name string) error { + switch name { + case "stopped": + s.p.execState = &execStoppedState{p: s.p} + default: + return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *execRunningState) Resize(ws console.WinSize) error { + return s.p.resize(ws) +} + +func (s *execRunningState) Start(ctx context.Context) error { + return errors.Errorf("cannot start a running process") +} + +func (s *execRunningState) Delete(ctx context.Context) error { + return errors.Errorf("cannot delete a running process") +} + +func (s *execRunningState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *execRunningState) SetExited(status int) { + s.p.setExited(status) + + if err := s.transition("stopped"); err != nil { + panic(err) + } +} + +type execStoppedState struct { + p *execProcess +} + +func (s *execStoppedState) transition(name string) error { + switch name { + case "deleted": + s.p.execState = &deletedState{} + default: + return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *execStoppedState) Resize(ws console.WinSize) error { + return errors.Errorf("cannot resize a stopped container") +} + +func (s *execStoppedState) Start(ctx context.Context) error { + return errors.Errorf("cannot start a stopped process") +} + +func (s *execStoppedState) Delete(ctx context.Context) error { + if err := s.p.delete(ctx); err != nil { + return err + } + return s.transition("deleted") +} + +func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *execStoppedState) SetExited(status int) { + // no op +} diff --git a/pkg/shim/v1/proc/init.go b/pkg/shim/v1/proc/init.go new file mode 100644 index 000000000..fadfc9146 --- /dev/null +++ b/pkg/shim/v1/proc/init.go @@ -0,0 +1,462 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + "encoding/json" + "io" + "path/filepath" + "strings" + "sync" + "syscall" + "time" + + "github.com/containerd/console" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/runtime/proc" + "github.com/containerd/fifo" + runc "github.com/containerd/go-runc" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + + runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" +) + +// InitPidFile name of the file that contains the init pid +const InitPidFile = "init.pid" + +// Init represents an initial process for a container +type Init struct { + wg sync.WaitGroup + initState initState + + // mu is used to ensure that `Start()` and `Exited()` calls return in + // the right order when invoked in separate go routines. + // This is the case within the shim implementation as it makes use of + // the reaper interface. + mu sync.Mutex + + waitBlock chan struct{} + + WorkDir string + + id string + Bundle string + console console.Console + Platform proc.Platform + io runc.IO + runtime *runsc.Runsc + status int + exited time.Time + pid int + closers []io.Closer + stdin io.Closer + stdio proc.Stdio + Rootfs string + IoUID int + IoGID int + Sandbox bool + UserLog string + Monitor ProcessMonitor +} + +// NewRunsc returns a new runsc instance for a process +func NewRunsc(root, path, namespace, runtime string, config map[string]string) *runsc.Runsc { + if root == "" { + root = RunscRoot + } + return &runsc.Runsc{ + Command: runtime, + PdeathSignal: syscall.SIGKILL, + Log: filepath.Join(path, "log.json"), + LogFormat: runc.JSON, + Root: filepath.Join(root, namespace), + Config: config, + } +} + +// New returns a new init process +func New(id string, runtime *runsc.Runsc, stdio proc.Stdio) *Init { + p := &Init{ + id: id, + runtime: runtime, + stdio: stdio, + status: 0, + waitBlock: make(chan struct{}), + } + p.initState = &createdState{p: p} + return p +} + +// Create the process with the provided config +func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) { + var socket *runc.Socket + if r.Terminal { + if socket, err = runc.NewTempConsoleSocket(); err != nil { + return errors.Wrap(err, "failed to create OCI runtime console socket") + } + defer socket.Close() + } else if hasNoIO(r) { + if p.io, err = runc.NewNullIO(); err != nil { + return errors.Wrap(err, "creating new NULL IO") + } + } else { + if p.io, err = runc.NewPipeIO(p.IoUID, p.IoGID, withConditionalIO(p.stdio)); err != nil { + return errors.Wrap(err, "failed to create OCI runtime io pipes") + } + } + pidFile := filepath.Join(p.Bundle, InitPidFile) + opts := &runsc.CreateOpts{ + PidFile: pidFile, + } + if socket != nil { + opts.ConsoleSocket = socket + } + if p.Sandbox { + opts.IO = p.io + // UserLog is only useful for sandbox. + opts.UserLog = p.UserLog + } + if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil { + return p.runtimeError(err, "OCI runtime create failed") + } + if r.Stdin != "" { + sc, err := fifo.OpenFifo(context.Background(), r.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) + if err != nil { + return errors.Wrapf(err, "failed to open stdin fifo %s", r.Stdin) + } + p.stdin = sc + p.closers = append(p.closers, sc) + } + var copyWaitGroup sync.WaitGroup + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + if socket != nil { + console, err := socket.ReceiveMaster() + if err != nil { + return errors.Wrap(err, "failed to retrieve console master") + } + console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg, ©WaitGroup) + if err != nil { + return errors.Wrap(err, "failed to start console copy") + } + p.console = console + } else if !hasNoIO(r) { + if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg, ©WaitGroup); err != nil { + return errors.Wrap(err, "failed to start io pipe copy") + } + } + + copyWaitGroup.Wait() + pid, err := runc.ReadPidFile(pidFile) + if err != nil { + return errors.Wrap(err, "failed to retrieve OCI runtime container pid") + } + p.pid = pid + return nil +} + +// Wait for the process to exit +func (p *Init) Wait() { + <-p.waitBlock +} + +// ID of the process +func (p *Init) ID() string { + return p.id +} + +// Pid of the process +func (p *Init) Pid() int { + return p.pid +} + +// ExitStatus of the process +func (p *Init) ExitStatus() int { + p.mu.Lock() + defer p.mu.Unlock() + return p.status +} + +// ExitedAt at time when the process exited +func (p *Init) ExitedAt() time.Time { + p.mu.Lock() + defer p.mu.Unlock() + return p.exited +} + +// Status of the process +func (p *Init) Status(ctx context.Context) (string, error) { + p.mu.Lock() + defer p.mu.Unlock() + c, err := p.runtime.State(ctx, p.id) + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + return "stopped", nil + } + return "", p.runtimeError(err, "OCI runtime state failed") + } + return p.convertStatus(c.Status), nil +} + +// Start the init process +func (p *Init) Start(ctx context.Context) error { + p.mu.Lock() + defer p.mu.Unlock() + + return p.initState.Start(ctx) +} + +func (p *Init) start(ctx context.Context) error { + var cio runc.IO + if !p.Sandbox { + cio = p.io + } + if err := p.runtime.Start(ctx, p.id, cio); err != nil { + return p.runtimeError(err, "OCI runtime start failed") + } + go func() { + status, err := p.runtime.Wait(context.Background(), p.id) + if err != nil { + log.G(ctx).WithError(err).Errorf("Failed to wait for container %q", p.id) + // TODO(random-liu): Handle runsc kill error. + if err := p.killAll(ctx); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to kill container %q", p.id) + } + status = internalErrorCode + } + ExitCh <- Exit{ + Timestamp: time.Now(), + ID: p.id, + Status: status, + } + }() + return nil +} + +// SetExited of the init process with the next status +func (p *Init) SetExited(status int) { + p.mu.Lock() + defer p.mu.Unlock() + + p.initState.SetExited(status) +} + +func (p *Init) setExited(status int) { + p.exited = time.Now() + p.status = status + p.Platform.ShutdownConsole(context.Background(), p.console) + close(p.waitBlock) +} + +// Delete the init process +func (p *Init) Delete(ctx context.Context) error { + p.mu.Lock() + defer p.mu.Unlock() + + return p.initState.Delete(ctx) +} + +func (p *Init) delete(ctx context.Context) error { + p.killAll(ctx) + p.wg.Wait() + err := p.runtime.Delete(ctx, p.id, nil) + // ignore errors if a runtime has already deleted the process + // but we still hold metadata and pipes + // + // this is common during a checkpoint, runc will delete the container state + // after a checkpoint and the container will no longer exist within runc + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + err = nil + } else { + err = p.runtimeError(err, "failed to delete task") + } + } + if p.io != nil { + for _, c := range p.closers { + c.Close() + } + p.io.Close() + } + if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil { + log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount") + if err == nil { + err = errors.Wrap(err2, "failed rootfs umount") + } + } + return err +} + +// Resize the init processes console +func (p *Init) Resize(ws console.WinSize) error { + p.mu.Lock() + defer p.mu.Unlock() + + if p.console == nil { + return nil + } + return p.console.Resize(ws) +} + +func (p *Init) resize(ws console.WinSize) error { + if p.console == nil { + return nil + } + return p.console.Resize(ws) +} + +// Kill the init process +func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error { + p.mu.Lock() + defer p.mu.Unlock() + + return p.initState.Kill(ctx, signal, all) +} + +func (p *Init) kill(context context.Context, signal uint32, all bool) error { + var ( + killErr error + backoff = 100 * time.Millisecond + ) + timeout := 1 * time.Second + for start := time.Now(); time.Now().Sub(start) < timeout; { + c, err := p.runtime.State(context, p.id) + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + return errors.Wrapf(errdefs.ErrNotFound, "no such process") + } + return p.runtimeError(err, "OCI runtime state failed") + } + // For runsc, signal only works when container is running state. + // If the container is not in running state, directly return + // "no such process" + if p.convertStatus(c.Status) == "stopped" { + return errors.Wrapf(errdefs.ErrNotFound, "no such process") + } + killErr = p.runtime.Kill(context, p.id, int(signal), &runsc.KillOpts{ + All: all, + }) + if killErr == nil { + return nil + } + time.Sleep(backoff) + backoff *= 2 + } + return p.runtimeError(killErr, "kill timeout") +} + +// KillAll processes belonging to the init process +func (p *Init) KillAll(context context.Context) error { + p.mu.Lock() + defer p.mu.Unlock() + return p.killAll(context) +} + +func (p *Init) killAll(context context.Context) error { + p.runtime.Kill(context, p.id, int(syscall.SIGKILL), &runsc.KillOpts{ + All: true, + }) + // Ignore error handling for `runsc kill --all` for now. + // * If it doesn't return error, it is good; + // * If it returns error, consider the container has already stopped. + // TODO: Fix `runsc kill --all` error handling. + return nil +} + +// Stdin of the process +func (p *Init) Stdin() io.Closer { + return p.stdin +} + +// Runtime returns the OCI runtime configured for the init process +func (p *Init) Runtime() *runsc.Runsc { + return p.runtime +} + +// Exec returns a new child process +func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { + p.mu.Lock() + defer p.mu.Unlock() + + return p.initState.Exec(ctx, path, r) +} + +// exec returns a new exec'd process +func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { + // process exec request + var spec specs.Process + if err := json.Unmarshal(r.Spec.Value, &spec); err != nil { + return nil, err + } + spec.Terminal = r.Terminal + + e := &execProcess{ + id: r.ID, + path: path, + parent: p, + spec: spec, + stdio: proc.Stdio{ + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Terminal: r.Terminal, + }, + waitBlock: make(chan struct{}), + } + e.execState = &execCreatedState{p: e} + return e, nil +} + +// Stdio of the process +func (p *Init) Stdio() proc.Stdio { + return p.stdio +} + +func (p *Init) runtimeError(rErr error, msg string) error { + if rErr == nil { + return nil + } + + rMsg, err := getLastRuntimeError(p.runtime) + switch { + case err != nil: + return errors.Wrapf(rErr, "%s: %s (%s)", msg, "unable to retrieve OCI runtime error", err.Error()) + case rMsg == "": + return errors.Wrap(rErr, msg) + default: + return errors.Errorf("%s: %s", msg, rMsg) + } +} + +func (p *Init) convertStatus(status string) string { + if status == "created" && !p.Sandbox && p.status == internalErrorCode { + // Treat start failure state for non-root container as stopped. + return "stopped" + } + return status +} + +func withConditionalIO(c proc.Stdio) runc.IOOpt { + return func(o *runc.IOOption) { + o.OpenStdin = c.Stdin != "" + o.OpenStdout = c.Stdout != "" + o.OpenStderr = c.Stderr != "" + } +} diff --git a/pkg/shim/v1/proc/init_state.go b/pkg/shim/v1/proc/init_state.go new file mode 100644 index 000000000..868646b6c --- /dev/null +++ b/pkg/shim/v1/proc/init_state.go @@ -0,0 +1,182 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + + "github.com/containerd/console" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/runtime/proc" + "github.com/pkg/errors" +) + +type initState interface { + Resize(console.WinSize) error + Start(context.Context) error + Delete(context.Context) error + Exec(context.Context, string, *ExecConfig) (proc.Process, error) + Kill(context.Context, uint32, bool) error + SetExited(int) +} + +type createdState struct { + p *Init +} + +func (s *createdState) transition(name string) error { + switch name { + case "running": + s.p.initState = &runningState{p: s.p} + case "stopped": + s.p.initState = &stoppedState{p: s.p} + case "deleted": + s.p.initState = &deletedState{} + default: + return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *createdState) Resize(ws console.WinSize) error { + return s.p.resize(ws) +} + +func (s *createdState) Start(ctx context.Context) error { + if err := s.p.start(ctx); err != nil { + // Containerd doesn't allow deleting container in created state. + // However, for gvisor, a non-root container in created state can + // only go to running state. If the container can't be started, + // it can only stay in created state, and never be deleted. + // To work around that, we treat non-root container in start failure + // state as stopped. + if !s.p.Sandbox { + s.p.io.Close() + s.p.setExited(internalErrorCode) + if err := s.transition("stopped"); err != nil { + panic(err) + } + } + return err + } + return s.transition("running") +} + +func (s *createdState) Delete(ctx context.Context) error { + if err := s.p.delete(ctx); err != nil { + return err + } + return s.transition("deleted") +} + +func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *createdState) SetExited(status int) { + s.p.setExited(status) + + if err := s.transition("stopped"); err != nil { + panic(err) + } +} + +func (s *createdState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { + return s.p.exec(ctx, path, r) +} + +type runningState struct { + p *Init +} + +func (s *runningState) transition(name string) error { + switch name { + case "stopped": + s.p.initState = &stoppedState{p: s.p} + default: + return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *runningState) Resize(ws console.WinSize) error { + return s.p.resize(ws) +} + +func (s *runningState) Start(ctx context.Context) error { + return errors.Errorf("cannot start a running process") +} + +func (s *runningState) Delete(ctx context.Context) error { + return errors.Errorf("cannot delete a running process") +} + +func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *runningState) SetExited(status int) { + s.p.setExited(status) + + if err := s.transition("stopped"); err != nil { + panic(err) + } +} + +func (s *runningState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { + return s.p.exec(ctx, path, r) +} + +type stoppedState struct { + p *Init +} + +func (s *stoppedState) transition(name string) error { + switch name { + case "deleted": + s.p.initState = &deletedState{} + default: + return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *stoppedState) Resize(ws console.WinSize) error { + return errors.Errorf("cannot resize a stopped container") +} + +func (s *stoppedState) Start(ctx context.Context) error { + return errors.Errorf("cannot start a stopped process") +} + +func (s *stoppedState) Delete(ctx context.Context) error { + if err := s.p.delete(ctx); err != nil { + return err + } + return s.transition("deleted") +} + +func (s *stoppedState) Kill(ctx context.Context, sig uint32, all bool) error { + return errdefs.ToGRPCf(errdefs.ErrNotFound, "process %s not found", s.p.id) +} + +func (s *stoppedState) SetExited(status int) { + // no op +} + +func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { + return nil, errors.Errorf("cannot exec in a stopped state") +} diff --git a/pkg/shim/v1/proc/io.go b/pkg/shim/v1/proc/io.go new file mode 100644 index 000000000..2677b4e54 --- /dev/null +++ b/pkg/shim/v1/proc/io.go @@ -0,0 +1,167 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + "fmt" + "io" + "os" + "sync" + "sync/atomic" + "syscall" + + "github.com/containerd/containerd/log" + "github.com/containerd/fifo" + runc "github.com/containerd/go-runc" +) + +// TODO(random-liu): This file can be a util. + +var bufPool = sync.Pool{ + New: func() interface{} { + buffer := make([]byte, 32<<10) + return &buffer + }, +} + +func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) error { + var sameFile *countingWriteCloser + for _, i := range []struct { + name string + dest func(wc io.WriteCloser, rc io.Closer) + }{ + { + name: stdout, + dest: func(wc io.WriteCloser, rc io.Closer) { + wg.Add(1) + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + if _, err := io.CopyBuffer(wc, rio.Stdout(), *p); err != nil { + log.G(ctx).Warn("error copying stdout") + } + wg.Done() + wc.Close() + if rc != nil { + rc.Close() + } + }() + }, + }, { + name: stderr, + dest: func(wc io.WriteCloser, rc io.Closer) { + wg.Add(1) + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + if _, err := io.CopyBuffer(wc, rio.Stderr(), *p); err != nil { + log.G(ctx).Warn("error copying stderr") + } + wg.Done() + wc.Close() + if rc != nil { + rc.Close() + } + }() + }, + }, + } { + ok, err := isFifo(i.name) + if err != nil { + return err + } + var ( + fw io.WriteCloser + fr io.Closer + ) + if ok { + if fw, err = fifo.OpenFifo(ctx, i.name, syscall.O_WRONLY, 0); err != nil { + return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) + } + if fr, err = fifo.OpenFifo(ctx, i.name, syscall.O_RDONLY, 0); err != nil { + return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) + } + } else { + if sameFile != nil { + sameFile.count++ + i.dest(sameFile, nil) + continue + } + if fw, err = os.OpenFile(i.name, syscall.O_WRONLY|syscall.O_APPEND, 0); err != nil { + return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) + } + if stdout == stderr { + sameFile = &countingWriteCloser{ + WriteCloser: fw, + count: 1, + } + } + } + i.dest(fw, fr) + } + if stdin == "" { + return nil + } + f, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) + if err != nil { + return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", stdin, err) + } + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + + io.CopyBuffer(rio.Stdin(), f, *p) + rio.Stdin().Close() + f.Close() + }() + return nil +} + +// countingWriteCloser masks io.Closer() until close has been invoked a certain number of times. +type countingWriteCloser struct { + io.WriteCloser + count int64 +} + +func (c *countingWriteCloser) Close() error { + if atomic.AddInt64(&c.count, -1) > 0 { + return nil + } + return c.WriteCloser.Close() +} + +// isFifo checks if a file is a fifo +// if the file does not exist then it returns false +func isFifo(path string) (bool, error) { + stat, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + if stat.Mode()&os.ModeNamedPipe == os.ModeNamedPipe { + return true, nil + } + return false, nil +} diff --git a/pkg/shim/v1/proc/process.go b/pkg/shim/v1/proc/process.go new file mode 100644 index 000000000..1bfa99f4c --- /dev/null +++ b/pkg/shim/v1/proc/process.go @@ -0,0 +1,37 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "github.com/pkg/errors" +) + +// RunscRoot is the path to the root runsc state directory +const RunscRoot = "/run/containerd/runsc" + +func stateName(v interface{}) string { + switch v.(type) { + case *runningState, *execRunningState: + return "running" + case *createdState, *execCreatedState: + return "created" + case *deletedState: + return "deleted" + case *stoppedState: + return "stopped" + } + panic(errors.Errorf("invalid state %v", v)) +} diff --git a/pkg/shim/v1/proc/types.go b/pkg/shim/v1/proc/types.go new file mode 100644 index 000000000..dcd43bcca --- /dev/null +++ b/pkg/shim/v1/proc/types.go @@ -0,0 +1,70 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "time" + + google_protobuf "github.com/gogo/protobuf/types" + + runc "github.com/containerd/go-runc" +) + +// Mount holds filesystem mount configuration +type Mount struct { + Type string + Source string + Target string + Options []string +} + +// CreateConfig hold task creation configuration +type CreateConfig struct { + ID string + Bundle string + Runtime string + Rootfs []Mount + Terminal bool + Stdin string + Stdout string + Stderr string + Options *google_protobuf.Any +} + +// ExecConfig holds exec creation configuration +type ExecConfig struct { + ID string + Terminal bool + Stdin string + Stdout string + Stderr string + Spec *google_protobuf.Any +} + +// Exit is the type of exit events +type Exit struct { + Timestamp time.Time + ID string + Status int +} + +// ProcessMonitor monitors process exit changes +type ProcessMonitor interface { + // Subscribe to process exit changes + Subscribe() chan runc.Exit + // Unsubscribe to process exit changes + Unsubscribe(c chan runc.Exit) +} diff --git a/pkg/shim/v1/proc/utils.go b/pkg/shim/v1/proc/utils.go new file mode 100644 index 000000000..84bd702ab --- /dev/null +++ b/pkg/shim/v1/proc/utils.go @@ -0,0 +1,92 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "encoding/json" + "io" + "os" + "strings" + "time" + + runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" +) + +const ( + internalErrorCode = 128 + bufferSize = 32 +) + +// ExitCh is the exit events channel for containers and exec processes +// inside the sandbox. +var ExitCh = make(chan Exit, bufferSize) + +// TODO(random-liu): This can be a utility. + +// TODO(mlaventure): move to runc package? +func getLastRuntimeError(r *runsc.Runsc) (string, error) { + if r.Log == "" { + return "", nil + } + + f, err := os.OpenFile(r.Log, os.O_RDONLY, 0400) + if err != nil { + return "", err + } + + var ( + errMsg string + log struct { + Level string + Msg string + Time time.Time + } + ) + + dec := json.NewDecoder(f) + for err = nil; err == nil; { + if err = dec.Decode(&log); err != nil && err != io.EOF { + return "", err + } + if log.Level == "error" { + errMsg = strings.TrimSpace(log.Msg) + } + } + + return errMsg, nil +} + +func copyFile(to, from string) error { + ff, err := os.Open(from) + if err != nil { + return err + } + defer ff.Close() + tt, err := os.Create(to) + if err != nil { + return err + } + defer tt.Close() + + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + _, err = io.CopyBuffer(tt, ff, *p) + return err +} + +func hasNoIO(r *CreateConfig) bool { + return r.Stdin == "" && r.Stdout == "" && r.Stderr == "" +} diff --git a/pkg/shim/v1/shim/platform.go b/pkg/shim/v1/shim/platform.go new file mode 100644 index 000000000..86252c3f5 --- /dev/null +++ b/pkg/shim/v1/shim/platform.go @@ -0,0 +1,110 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package shim + +import ( + "context" + "io" + "sync" + "syscall" + + "github.com/containerd/console" + "github.com/containerd/fifo" + "github.com/pkg/errors" +) + +type linuxPlatform struct { + epoller *console.Epoller +} + +func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) (console.Console, error) { + if p.epoller == nil { + return nil, errors.New("uninitialized epoller") + } + + epollConsole, err := p.epoller.Add(console) + if err != nil { + return nil, err + } + + if stdin != "" { + in, err := fifo.OpenFifo(ctx, stdin, syscall.O_RDONLY, 0) + if err != nil { + return nil, err + } + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + io.CopyBuffer(epollConsole, in, *p) + }() + } + + outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0) + if err != nil { + return nil, err + } + outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0) + if err != nil { + return nil, err + } + wg.Add(1) + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + io.CopyBuffer(outw, epollConsole, *p) + epollConsole.Close() + outr.Close() + outw.Close() + wg.Done() + }() + return epollConsole, nil +} + +func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error { + if p.epoller == nil { + return errors.New("uninitialized epoller") + } + epollConsole, ok := cons.(*console.EpollConsole) + if !ok { + return errors.Errorf("expected EpollConsole, got %#v", cons) + } + return epollConsole.Shutdown(p.epoller.CloseConsole) +} + +func (p *linuxPlatform) Close() error { + return p.epoller.Close() +} + +// initialize a single epoll fd to manage our consoles. `initPlatform` should +// only be called once. +func (s *Service) initPlatform() error { + if s.platform != nil { + return nil + } + epoller, err := console.NewEpoller() + if err != nil { + return errors.Wrap(err, "failed to initialize epoller") + } + s.platform = &linuxPlatform{ + epoller: epoller, + } + go epoller.Wait() + return nil +} diff --git a/pkg/shim/v1/shim/service.go b/pkg/shim/v1/shim/service.go new file mode 100644 index 000000000..27e915c14 --- /dev/null +++ b/pkg/shim/v1/shim/service.go @@ -0,0 +1,579 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package shim + +import ( + "context" + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/containerd/console" + eventstypes "github.com/containerd/containerd/api/events" + "github.com/containerd/containerd/api/types/task" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/events" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/runtime" + "github.com/containerd/containerd/runtime/linux/runctypes" + rproc "github.com/containerd/containerd/runtime/proc" + "github.com/containerd/containerd/runtime/v1/shim" + shimapi "github.com/containerd/containerd/runtime/v1/shim/v1" + "github.com/containerd/typeurl" + ptypes "github.com/gogo/protobuf/types" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" + "github.com/google/gvisor-containerd-shim/pkg/v1/proc" + "github.com/google/gvisor-containerd-shim/pkg/v1/utils" +) + +var ( + empty = &ptypes.Empty{} + bufPool = sync.Pool{ + New: func() interface{} { + buffer := make([]byte, 32<<10) + return &buffer + }, + } +) + +// Config contains shim specific configuration +type Config struct { + Path string + Namespace string + WorkDir string + RuntimeRoot string + RunscConfig map[string]string +} + +// NewService returns a new shim service that can be used via GRPC +func NewService(config Config, publisher events.Publisher) (*Service, error) { + if config.Namespace == "" { + return nil, fmt.Errorf("shim namespace cannot be empty") + } + ctx := namespaces.WithNamespace(context.Background(), config.Namespace) + ctx = log.WithLogger(ctx, logrus.WithFields(logrus.Fields{ + "namespace": config.Namespace, + "path": config.Path, + "pid": os.Getpid(), + })) + s := &Service{ + config: config, + context: ctx, + processes: make(map[string]rproc.Process), + events: make(chan interface{}, 128), + ec: proc.ExitCh, + } + go s.processExits() + if err := s.initPlatform(); err != nil { + return nil, errors.Wrap(err, "failed to initialized platform behavior") + } + go s.forward(publisher) + return s, nil +} + +// Service is the shim implementation of a remote shim over GRPC +type Service struct { + mu sync.Mutex + + config Config + context context.Context + processes map[string]rproc.Process + events chan interface{} + platform rproc.Platform + ec chan proc.Exit + + // Filled by Create() + id string + bundle string +} + +// Create a new initial process and container with the underlying OCI runtime +func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (_ *shimapi.CreateTaskResponse, err error) { + s.mu.Lock() + defer s.mu.Unlock() + + var mounts []proc.Mount + for _, m := range r.Rootfs { + mounts = append(mounts, proc.Mount{ + Type: m.Type, + Source: m.Source, + Target: m.Target, + Options: m.Options, + }) + } + + rootfs := filepath.Join(r.Bundle, "rootfs") + if err := os.Mkdir(rootfs, 0711); err != nil && !os.IsExist(err) { + return nil, err + } + + config := &proc.CreateConfig{ + ID: r.ID, + Bundle: r.Bundle, + Runtime: r.Runtime, + Rootfs: mounts, + Terminal: r.Terminal, + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Options: r.Options, + } + defer func() { + if err != nil { + if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { + log.G(ctx).WithError(err2).Warn("Failed to cleanup rootfs mount") + } + } + }() + for _, rm := range mounts { + m := &mount.Mount{ + Type: rm.Type, + Source: rm.Source, + Options: rm.Options, + } + if err := m.Mount(rootfs); err != nil { + return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m) + } + } + process, err := newInit( + ctx, + s.config.Path, + s.config.WorkDir, + s.config.RuntimeRoot, + s.config.Namespace, + s.config.RunscConfig, + s.platform, + config, + ) + if err := process.Create(ctx, config); err != nil { + return nil, errdefs.ToGRPC(err) + } + // save the main task id and bundle to the shim for additional requests + s.id = r.ID + s.bundle = r.Bundle + pid := process.Pid() + s.processes[r.ID] = process + return &shimapi.CreateTaskResponse{ + Pid: uint32(pid), + }, nil +} + +// Start a process +func (s *Service) Start(ctx context.Context, r *shimapi.StartRequest) (*shimapi.StartResponse, error) { + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + if err := p.Start(ctx); err != nil { + return nil, err + } + return &shimapi.StartResponse{ + ID: p.ID(), + Pid: uint32(p.Pid()), + }, nil +} + +// Delete the initial process and container +func (s *Service) Delete(ctx context.Context, r *ptypes.Empty) (*shimapi.DeleteResponse, error) { + p, err := s.getInitProcess() + if err != nil { + return nil, err + } + if err := p.Delete(ctx); err != nil { + return nil, err + } + s.mu.Lock() + delete(s.processes, s.id) + s.mu.Unlock() + s.platform.Close() + return &shimapi.DeleteResponse{ + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + Pid: uint32(p.Pid()), + }, nil +} + +// DeleteProcess deletes an exec'd process +func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessRequest) (*shimapi.DeleteResponse, error) { + if r.ID == s.id { + return nil, status.Errorf(codes.InvalidArgument, "cannot delete init process with DeleteProcess") + } + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + if err := p.Delete(ctx); err != nil { + return nil, err + } + s.mu.Lock() + delete(s.processes, r.ID) + s.mu.Unlock() + return &shimapi.DeleteResponse{ + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + Pid: uint32(p.Pid()), + }, nil +} + +// Exec an additional process inside the container +func (s *Service) Exec(ctx context.Context, r *shimapi.ExecProcessRequest) (*ptypes.Empty, error) { + s.mu.Lock() + + if p := s.processes[r.ID]; p != nil { + s.mu.Unlock() + return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ID) + } + + p := s.processes[s.id] + s.mu.Unlock() + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + + process, err := p.(*proc.Init).Exec(ctx, s.config.Path, &proc.ExecConfig{ + ID: r.ID, + Terminal: r.Terminal, + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Spec: r.Spec, + }) + if err != nil { + return nil, errdefs.ToGRPC(err) + } + s.mu.Lock() + s.processes[r.ID] = process + s.mu.Unlock() + return empty, nil +} + +// ResizePty of a process +func (s *Service) ResizePty(ctx context.Context, r *shimapi.ResizePtyRequest) (*ptypes.Empty, error) { + if r.ID == "" { + return nil, errdefs.ToGRPCf(errdefs.ErrInvalidArgument, "id not provided") + } + ws := console.WinSize{ + Width: uint16(r.Width), + Height: uint16(r.Height), + } + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + if err := p.Resize(ws); err != nil { + return nil, errdefs.ToGRPC(err) + } + return empty, nil +} + +// State returns runtime state information for a process +func (s *Service) State(ctx context.Context, r *shimapi.StateRequest) (*shimapi.StateResponse, error) { + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + st, err := p.Status(ctx) + if err != nil { + return nil, err + } + status := task.StatusUnknown + switch st { + case "created": + status = task.StatusCreated + case "running": + status = task.StatusRunning + case "stopped": + status = task.StatusStopped + } + sio := p.Stdio() + return &shimapi.StateResponse{ + ID: p.ID(), + Bundle: s.bundle, + Pid: uint32(p.Pid()), + Status: status, + Stdin: sio.Stdin, + Stdout: sio.Stdout, + Stderr: sio.Stderr, + Terminal: sio.Terminal, + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + }, nil +} + +// Pause the container +func (s *Service) Pause(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Resume the container +func (s *Service) Resume(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Kill a process with the provided signal +func (s *Service) Kill(ctx context.Context, r *shimapi.KillRequest) (*ptypes.Empty, error) { + if r.ID == "" { + p, err := s.getInitProcess() + if err != nil { + return nil, err + } + if err := p.Kill(ctx, r.Signal, r.All); err != nil { + return nil, errdefs.ToGRPC(err) + } + return empty, nil + } + + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + if err := p.Kill(ctx, r.Signal, r.All); err != nil { + return nil, errdefs.ToGRPC(err) + } + return empty, nil +} + +// ListPids returns all pids inside the container +func (s *Service) ListPids(ctx context.Context, r *shimapi.ListPidsRequest) (*shimapi.ListPidsResponse, error) { + pids, err := s.getContainerPids(ctx, r.ID) + if err != nil { + return nil, errdefs.ToGRPC(err) + } + var processes []*task.ProcessInfo + for _, pid := range pids { + pInfo := task.ProcessInfo{ + Pid: pid, + } + for _, p := range s.processes { + if p.Pid() == int(pid) { + d := &runctypes.ProcessDetails{ + ExecID: p.ID(), + } + a, err := typeurl.MarshalAny(d) + if err != nil { + return nil, errors.Wrapf(err, "failed to marshal process %d info", pid) + } + pInfo.Info = a + break + } + } + processes = append(processes, &pInfo) + } + return &shimapi.ListPidsResponse{ + Processes: processes, + }, nil +} + +// CloseIO of a process +func (s *Service) CloseIO(ctx context.Context, r *shimapi.CloseIORequest) (*ptypes.Empty, error) { + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + if stdin := p.Stdin(); stdin != nil { + if err := stdin.Close(); err != nil { + return nil, errors.Wrap(err, "close stdin") + } + } + return empty, nil +} + +// Checkpoint the container +func (s *Service) Checkpoint(ctx context.Context, r *shimapi.CheckpointTaskRequest) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// ShimInfo returns shim information such as the shim's pid +func (s *Service) ShimInfo(ctx context.Context, r *ptypes.Empty) (*shimapi.ShimInfoResponse, error) { + return &shimapi.ShimInfoResponse{ + ShimPid: uint32(os.Getpid()), + }, nil +} + +// Update a running container +func (s *Service) Update(ctx context.Context, r *shimapi.UpdateTaskRequest) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Wait for a process to exit +func (s *Service) Wait(ctx context.Context, r *shimapi.WaitRequest) (*shimapi.WaitResponse, error) { + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + p.Wait() + + return &shimapi.WaitResponse{ + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + }, nil +} + +func (s *Service) processExits() { + for e := range s.ec { + s.checkProcesses(e) + } +} + +func (s *Service) allProcesses() []rproc.Process { + s.mu.Lock() + defer s.mu.Unlock() + + res := make([]rproc.Process, 0, len(s.processes)) + for _, p := range s.processes { + res = append(res, p) + } + return res +} + +func (s *Service) checkProcesses(e proc.Exit) { + for _, p := range s.allProcesses() { + if p.ID() == e.ID { + if ip, ok := p.(*proc.Init); ok { + // Ensure all children are killed + if err := ip.KillAll(s.context); err != nil { + log.G(s.context).WithError(err).WithField("id", ip.ID()). + Error("failed to kill init's children") + } + } + p.SetExited(e.Status) + s.events <- &eventstypes.TaskExit{ + ContainerID: s.id, + ID: p.ID(), + Pid: uint32(p.Pid()), + ExitStatus: uint32(e.Status), + ExitedAt: p.ExitedAt(), + } + return + } + } +} + +func (s *Service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { + p, err := s.getInitProcess() + if err != nil { + return nil, err + } + + ps, err := p.(*proc.Init).Runtime().Ps(ctx, id) + if err != nil { + return nil, err + } + pids := make([]uint32, 0, len(ps)) + for _, pid := range ps { + pids = append(pids, uint32(pid)) + } + return pids, nil +} + +func (s *Service) forward(publisher events.Publisher) { + for e := range s.events { + if err := publisher.Publish(s.context, getTopic(s.context, e), e); err != nil { + log.G(s.context).WithError(err).Error("post event") + } + } +} + +// getInitProcess returns initial process +func (s *Service) getInitProcess() (rproc.Process, error) { + s.mu.Lock() + defer s.mu.Unlock() + p := s.processes[s.id] + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + return p, nil +} + +// getExecProcess returns exec process +func (s *Service) getExecProcess(id string) (rproc.Process, error) { + s.mu.Lock() + defer s.mu.Unlock() + p := s.processes[id] + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process %s does not exist", id) + } + return p, nil +} + +func getTopic(ctx context.Context, e interface{}) string { + switch e.(type) { + case *eventstypes.TaskCreate: + return runtime.TaskCreateEventTopic + case *eventstypes.TaskStart: + return runtime.TaskStartEventTopic + case *eventstypes.TaskOOM: + return runtime.TaskOOMEventTopic + case *eventstypes.TaskExit: + return runtime.TaskExitEventTopic + case *eventstypes.TaskDelete: + return runtime.TaskDeleteEventTopic + case *eventstypes.TaskExecAdded: + return runtime.TaskExecAddedEventTopic + case *eventstypes.TaskExecStarted: + return runtime.TaskExecStartedEventTopic + default: + logrus.Warnf("no topic for type %#v", e) + } + return runtime.TaskUnknownTopic +} + +func newInit(ctx context.Context, path, workDir, runtimeRoot, namespace string, config map[string]string, platform rproc.Platform, r *proc.CreateConfig) (*proc.Init, error) { + var options runctypes.CreateOptions + if r.Options != nil { + v, err := typeurl.UnmarshalAny(r.Options) + if err != nil { + return nil, err + } + options = *v.(*runctypes.CreateOptions) + } + + spec, err := utils.ReadSpec(r.Bundle) + if err != nil { + return nil, errors.Wrap(err, "read oci spec") + } + if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { + return nil, errors.Wrap(err, "update volume annotations") + } + + runsc.FormatLogPath(r.ID, config) + rootfs := filepath.Join(path, "rootfs") + runtime := proc.NewRunsc(runtimeRoot, path, namespace, r.Runtime, config) + p := proc.New(r.ID, runtime, rproc.Stdio{ + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Terminal: r.Terminal, + }) + p.Bundle = r.Bundle + p.Platform = platform + p.Rootfs = rootfs + p.WorkDir = workDir + p.IoUID = int(options.IoUid) + p.IoGID = int(options.IoGid) + p.Sandbox = utils.IsSandbox(spec) + p.UserLog = utils.UserLogPath(spec) + p.Monitor = shim.Default + return p, nil +} diff --git a/pkg/shim/v1/utils/utils.go b/pkg/shim/v1/utils/utils.go new file mode 100644 index 000000000..7a400af1c --- /dev/null +++ b/pkg/shim/v1/utils/utils.go @@ -0,0 +1,57 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "encoding/json" + "io/ioutil" + "os" + "path/filepath" + + "github.com/containerd/cri/pkg/annotations" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +// ReadSpec reads OCI spec from the bundle directory. +func ReadSpec(bundle string) (*specs.Spec, error) { + f, err := os.Open(filepath.Join(bundle, "config.json")) + if err != nil { + return nil, err + } + b, err := ioutil.ReadAll(f) + if err != nil { + return nil, err + } + var spec specs.Spec + if err := json.Unmarshal(b, &spec); err != nil { + return nil, err + } + return &spec, nil +} + +// IsSandbox checks whether a container is a sandbox container. +func IsSandbox(spec *specs.Spec) bool { + t, ok := spec.Annotations[annotations.ContainerType] + return !ok || t == annotations.ContainerTypeSandbox +} + +// UserLogPath gets user log path from OCI annotation. +func UserLogPath(spec *specs.Spec) string { + sandboxLogDir := spec.Annotations[annotations.SandboxLogDir] + if sandboxLogDir == "" { + return "" + } + return filepath.Join(sandboxLogDir, "gvisor.log") +} diff --git a/pkg/shim/v1/utils/volumes.go b/pkg/shim/v1/utils/volumes.go new file mode 100644 index 000000000..7323e7245 --- /dev/null +++ b/pkg/shim/v1/utils/volumes.go @@ -0,0 +1,156 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "path/filepath" + "strings" + + "github.com/containerd/cri/pkg/annotations" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +const volumeKeyPrefix = "dev.gvisor.spec.mount." + +var kubeletPodsDir = "/var/lib/kubelet/pods" + +// volumeName gets volume name from volume annotation key, example: +// dev.gvisor.spec.mount.NAME.share +func volumeName(k string) string { + return strings.SplitN(strings.TrimPrefix(k, volumeKeyPrefix), ".", 2)[0] +} + +// volumeFieldName gets volume field name from volume annotation key, example: +// `type` is the field of dev.gvisor.spec.mount.NAME.type +func volumeFieldName(k string) string { + parts := strings.Split(strings.TrimPrefix(k, volumeKeyPrefix), ".") + return parts[len(parts)-1] +} + +// podUID gets pod UID from the pod log path. +func podUID(s *specs.Spec) (string, error) { + sandboxLogDir := s.Annotations[annotations.SandboxLogDir] + if sandboxLogDir == "" { + return "", errors.New("no sandbox log path annotation") + } + fields := strings.Split(filepath.Base(sandboxLogDir), "_") + switch len(fields) { + case 1: // This is the old CRI logging path + return fields[0], nil + case 3: // This is the new CRI logging path + return fields[2], nil + } + return "", errors.Errorf("unexpected sandbox log path %q", sandboxLogDir) +} + +// isVolumeKey checks whether an annotation key is for volume. +func isVolumeKey(k string) bool { + return strings.HasPrefix(k, volumeKeyPrefix) +} + +// volumeSourceKey constructs the annotation key for volume source. +func volumeSourceKey(volume string) string { + return volumeKeyPrefix + volume + ".source" +} + +// volumePath searches the volume path in the kubelet pod directory. +func volumePath(volume, uid string) (string, error) { + // TODO: Support subpath when gvisor supports pod volume bind mount. + volumeSearchPath := fmt.Sprintf("%s/%s/volumes/*/%s", kubeletPodsDir, uid, volume) + dirs, err := filepath.Glob(volumeSearchPath) + if err != nil { + return "", err + } + if len(dirs) != 1 { + return "", errors.Errorf("unexpected matched volume list %v", dirs) + } + return dirs[0], nil +} + +// isVolumePath checks whether a string is the volume path. +func isVolumePath(volume, path string) (bool, error) { + // TODO: Support subpath when gvisor supports pod volume bind mount. + volumeSearchPath := fmt.Sprintf("%s/*/volumes/*/%s", kubeletPodsDir, volume) + return filepath.Match(volumeSearchPath, path) +} + +// UpdateVolumeAnnotations add necessary OCI annotations for gvisor +// volume optimization. +func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { + var ( + uid string + err error + ) + if IsSandbox(s) { + uid, err = podUID(s) + if err != nil { + // Skip if we can't get pod UID, because this doesn't work + // for containerd 1.1. + logrus.WithError(err).Error("Can't get pod uid") + return nil + } + } + var updated bool + for k, v := range s.Annotations { + if !isVolumeKey(k) { + continue + } + if volumeFieldName(k) != "type" { + continue + } + volume := volumeName(k) + if uid != "" { + // This is a sandbox + path, err := volumePath(volume, uid) + if err != nil { + return errors.Wrapf(err, "get volume path for %q", volume) + } + s.Annotations[volumeSourceKey(volume)] = path + updated = true + } else { + // This is a container + for i := range s.Mounts { + // An error is returned for sandbox if source annotation + // is not successfully applied, so it is guaranteed that + // the source annotation for sandbox has already been + // successfully applied at this point. + // The volume name is unique inside a pod, so matching without + // podUID is fine here. + // TODO: Pass podUID down to shim for containers to do + // more accurate matching. + if yes, _ := isVolumePath(volume, s.Mounts[i].Source); yes { + // gVisor requires the container mount type to match + // sandbox mount type. + s.Mounts[i].Type = v + updated = true + } + } + } + } + if !updated { + return nil + } + // Update bundle + b, err := json.Marshal(s) + if err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(bundle, "config.json"), b, 0666) +} diff --git a/pkg/shim/v1/utils/volumes_test.go b/pkg/shim/v1/utils/volumes_test.go new file mode 100644 index 000000000..4b2639545 --- /dev/null +++ b/pkg/shim/v1/utils/volumes_test.go @@ -0,0 +1,309 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "reflect" + "testing" + + "github.com/containerd/cri/pkg/annotations" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func TestUpdateVolumeAnnotations(t *testing.T) { + dir, err := ioutil.TempDir("", "test-update-volume-annotations") + if err != nil { + t.Fatalf("create tempdir: %v", err) + } + defer os.RemoveAll(dir) + kubeletPodsDir = dir + + const ( + testPodUID = "testuid" + testVolumeName = "testvolume" + testLogDirPath = "/var/log/pods/testns_testname_" + testPodUID + testLegacyLogDirPath = "/var/log/pods/" + testPodUID + ) + testVolumePath := fmt.Sprintf("%s/%s/volumes/kubernetes.io~empty-dir/%s", dir, testPodUID, testVolumeName) + + if err := os.MkdirAll(testVolumePath, 0755); err != nil { + t.Fatalf("Create test volume: %v", err) + } + + for _, test := range []struct { + desc string + spec *specs.Spec + expected *specs.Spec + expectErr bool + expectUpdate bool + }{ + { + desc: "volume annotations for sandbox", + spec: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, + }, + }, + expectUpdate: true, + }, + { + desc: "volume annotations for sandbox with legacy log path", + spec: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLegacyLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLegacyLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, + }, + }, + expectUpdate: true, + }, + { + desc: "tmpfs: volume annotations for container", + spec: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: testVolumePath, + Options: []string{"ro"}, + }, + { + Destination: "/random", + Type: "bind", + Source: "/random", + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeContainer, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "tmpfs", + Source: testVolumePath, + Options: []string{"ro"}, + }, + { + Destination: "/random", + Type: "bind", + Source: "/random", + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeContainer, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expectUpdate: true, + }, + { + desc: "bind: volume annotations for container", + spec: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: testVolumePath, + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeContainer, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: testVolumePath, + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeContainer, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expectUpdate: true, + }, + { + desc: "should not return error without pod log directory", + spec: &specs.Spec{ + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + }, + { + desc: "should return error if volume path does not exist", + spec: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount.notexist.share": "pod", + "dev.gvisor.spec.mount.notexist.type": "tmpfs", + "dev.gvisor.spec.mount.notexist.options": "ro", + }, + }, + expectErr: true, + }, + { + desc: "no volume annotations for sandbox", + spec: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + }, + }, + }, + { + desc: "no volume annotations for container", + spec: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: "/test", + Options: []string{"ro"}, + }, + { + Destination: "/random", + Type: "bind", + Source: "/random", + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeContainer, + }, + }, + expected: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: "/test", + Options: []string{"ro"}, + }, + { + Destination: "/random", + Type: "bind", + Source: "/random", + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeContainer, + }, + }, + }, + } { + t.Run(test.desc, func(t *testing.T) { + bundle, err := ioutil.TempDir(dir, "test-bundle") + if err != nil { + t.Fatalf("Create test bundle: %v", err) + } + err = UpdateVolumeAnnotations(bundle, test.spec) + if test.expectErr { + if err == nil { + t.Fatal("Expected error, but got nil") + } + return + } + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if !reflect.DeepEqual(test.expected, test.spec) { + t.Fatalf("Expected %+v, got %+v", test.expected, test.spec) + } + if test.expectUpdate { + b, err := ioutil.ReadFile(filepath.Join(bundle, "config.json")) + if err != nil { + t.Fatalf("Read spec from bundle: %v", err) + } + var spec specs.Spec + if err := json.Unmarshal(b, &spec); err != nil { + t.Fatalf("Unmarshal spec: %v", err) + } + if !reflect.DeepEqual(test.expected, &spec) { + t.Fatalf("Expected %+v, got %+v", test.expected, &spec) + } + } + }) + } +} diff --git a/pkg/shim/v2/epoll.go b/pkg/shim/v2/epoll.go new file mode 100644 index 000000000..57a2c5452 --- /dev/null +++ b/pkg/shim/v2/epoll.go @@ -0,0 +1,128 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package v2 + +import ( + "context" + "sync" + + "github.com/containerd/cgroups" + eventstypes "github.com/containerd/containerd/api/events" + "github.com/containerd/containerd/events" + "github.com/containerd/containerd/runtime" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +func newOOMEpoller(publisher events.Publisher) (*epoller, error) { + fd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC) + if err != nil { + return nil, err + } + return &epoller{ + fd: fd, + publisher: publisher, + set: make(map[uintptr]*item), + }, nil +} + +type epoller struct { + mu sync.Mutex + + fd int + publisher events.Publisher + set map[uintptr]*item +} + +type item struct { + id string + cg cgroups.Cgroup +} + +func (e *epoller) Close() error { + return unix.Close(e.fd) +} + +func (e *epoller) run(ctx context.Context) { + var events [128]unix.EpollEvent + for { + select { + case <-ctx.Done(): + e.Close() + return + default: + n, err := unix.EpollWait(e.fd, events[:], -1) + if err != nil { + if err == unix.EINTR { + continue + } + logrus.WithError(err).Error("cgroups: epoll wait") + } + for i := 0; i < n; i++ { + e.process(ctx, uintptr(events[i].Fd)) + } + } + } +} + +func (e *epoller) add(id string, cg cgroups.Cgroup) error { + e.mu.Lock() + defer e.mu.Unlock() + fd, err := cg.OOMEventFD() + if err != nil { + return err + } + e.set[fd] = &item{ + id: id, + cg: cg, + } + event := unix.EpollEvent{ + Fd: int32(fd), + Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR, + } + return unix.EpollCtl(e.fd, unix.EPOLL_CTL_ADD, int(fd), &event) +} + +func (e *epoller) process(ctx context.Context, fd uintptr) { + flush(fd) + e.mu.Lock() + i, ok := e.set[fd] + if !ok { + e.mu.Unlock() + return + } + e.mu.Unlock() + if i.cg.State() == cgroups.Deleted { + e.mu.Lock() + delete(e.set, fd) + e.mu.Unlock() + unix.Close(int(fd)) + return + } + if err := e.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &eventstypes.TaskOOM{ + ContainerID: i.id, + }); err != nil { + logrus.WithError(err).Error("publish OOM event") + } +} + +func flush(fd uintptr) error { + var buf [8]byte + _, err := unix.Read(int(fd), buf[:]) + return err +} diff --git a/pkg/shim/v2/options/options.go b/pkg/shim/v2/options/options.go new file mode 100644 index 000000000..de09f2f79 --- /dev/null +++ b/pkg/shim/v2/options/options.go @@ -0,0 +1,33 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package options + +const OptionType = "io.containerd.runsc.v1.options" + +// Options is runtime options for io.containerd.runsc.v1. +type Options struct { + // ShimCgroup is the cgroup the shim should be in. + ShimCgroup string `toml:"shim_cgroup"` + // IoUid is the I/O's pipes uid. + IoUid uint32 `toml:"io_uid"` + // IoUid is the I/O's pipes gid. + IoGid uint32 `toml:"io_gid"` + // BinaryName is the binary name of the runsc binary. + BinaryName string `toml:"binary_name"` + // Root is the runsc root directory. + Root string `toml:"root"` + // RunscConfig is a key/value map of all runsc flags. + RunscConfig map[string]string `toml:"runsc_config"` +} diff --git a/pkg/shim/v2/service.go b/pkg/shim/v2/service.go new file mode 100644 index 000000000..3d226e29c --- /dev/null +++ b/pkg/shim/v2/service.go @@ -0,0 +1,821 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v2 + +import ( + "context" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "sync" + "syscall" + "time" + + "github.com/BurntSushi/toml" + "github.com/containerd/cgroups" + "github.com/containerd/console" + eventstypes "github.com/containerd/containerd/api/events" + "github.com/containerd/containerd/api/types/task" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/events" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/runtime" + "github.com/containerd/containerd/runtime/linux/runctypes" + rproc "github.com/containerd/containerd/runtime/proc" + "github.com/containerd/containerd/runtime/v2/shim" + taskAPI "github.com/containerd/containerd/runtime/v2/task" + runtimeoptions "github.com/containerd/cri/pkg/api/runtimeoptions/v1" + "github.com/containerd/typeurl" + ptypes "github.com/gogo/protobuf/types" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" + "github.com/google/gvisor-containerd-shim/pkg/v1/proc" + "github.com/google/gvisor-containerd-shim/pkg/v1/utils" + "github.com/google/gvisor-containerd-shim/pkg/v2/options" +) + +var ( + empty = &ptypes.Empty{} + bufPool = sync.Pool{ + New: func() interface{} { + buffer := make([]byte, 32<<10) + return &buffer + }, + } +) + +var _ = (taskAPI.TaskService)(&service{}) + +// configFile is the default config file name. For containerd 1.2, +// we assume that a config.toml should exist in the runtime root. +const configFile = "config.toml" + +// New returns a new shim service that can be used via GRPC +func New(ctx context.Context, id string, publisher events.Publisher) (shim.Shim, error) { + ep, err := newOOMEpoller(publisher) + if err != nil { + return nil, err + } + ctx, cancel := context.WithCancel(ctx) + go ep.run(ctx) + s := &service{ + id: id, + context: ctx, + processes: make(map[string]rproc.Process), + events: make(chan interface{}, 128), + ec: proc.ExitCh, + oomPoller: ep, + cancel: cancel, + } + go s.processExits() + runsc.Monitor = shim.Default + if err := s.initPlatform(); err != nil { + cancel() + return nil, errors.Wrap(err, "failed to initialized platform behavior") + } + go s.forward(publisher) + return s, nil +} + +// service is the shim implementation of a remote shim over GRPC +type service struct { + mu sync.Mutex + + context context.Context + task rproc.Process + processes map[string]rproc.Process + events chan interface{} + platform rproc.Platform + opts options.Options + ec chan proc.Exit + oomPoller *epoller + + id string + bundle string + cancel func() +} + +func newCommand(ctx context.Context, containerdBinary, containerdAddress string) (*exec.Cmd, error) { + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, err + } + self, err := os.Executable() + if err != nil { + return nil, err + } + cwd, err := os.Getwd() + if err != nil { + return nil, err + } + args := []string{ + "-namespace", ns, + "-address", containerdAddress, + "-publish-binary", containerdBinary, + } + cmd := exec.Command(self, args...) + cmd.Dir = cwd + cmd.Env = append(os.Environ(), "GOMAXPROCS=2") + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + return cmd, nil +} + +func (s *service) StartShim(ctx context.Context, id, containerdBinary, containerdAddress string) (string, error) { + cmd, err := newCommand(ctx, containerdBinary, containerdAddress) + if err != nil { + return "", err + } + address, err := shim.SocketAddress(ctx, id) + if err != nil { + return "", err + } + socket, err := shim.NewSocket(address) + if err != nil { + return "", err + } + defer socket.Close() + f, err := socket.File() + if err != nil { + return "", err + } + defer f.Close() + + cmd.ExtraFiles = append(cmd.ExtraFiles, f) + + if err := cmd.Start(); err != nil { + return "", err + } + defer func() { + if err != nil { + cmd.Process.Kill() + } + }() + // make sure to wait after start + go cmd.Wait() + if err := shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil { + return "", err + } + if err := shim.WriteAddress("address", address); err != nil { + return "", err + } + if err := shim.SetScore(cmd.Process.Pid); err != nil { + return "", errors.Wrap(err, "failed to set OOM Score on shim") + } + return address, nil +} + +func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) { + path, err := os.Getwd() + if err != nil { + return nil, err + } + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, err + } + runtime, err := s.readRuntime(path) + if err != nil { + return nil, err + } + r := proc.NewRunsc(s.opts.Root, path, ns, runtime, nil) + if err := r.Delete(ctx, s.id, &runsc.DeleteOpts{ + Force: true, + }); err != nil { + logrus.WithError(err).Warn("failed to remove runc container") + } + if err := mount.UnmountAll(filepath.Join(path, "rootfs"), 0); err != nil { + logrus.WithError(err).Warn("failed to cleanup rootfs mount") + } + return &taskAPI.DeleteResponse{ + ExitedAt: time.Now(), + ExitStatus: 128 + uint32(unix.SIGKILL), + }, nil +} + +func (s *service) readRuntime(path string) (string, error) { + data, err := ioutil.ReadFile(filepath.Join(path, "runtime")) + if err != nil { + return "", err + } + return string(data), nil +} + +func (s *service) writeRuntime(path, runtime string) error { + return ioutil.WriteFile(filepath.Join(path, "runtime"), []byte(runtime), 0600) +} + +// Create a new initial process and container with the underlying OCI runtime +func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *taskAPI.CreateTaskResponse, err error) { + s.mu.Lock() + defer s.mu.Unlock() + + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, errors.Wrap(err, "create namespace") + } + + // Read from root for now. + var opts options.Options + if r.Options != nil { + v, err := typeurl.UnmarshalAny(r.Options) + if err != nil { + return nil, err + } + var path string + switch o := v.(type) { + case *runctypes.CreateOptions: // containerd 1.2.x + opts.IoUid = o.IoUid + opts.IoGid = o.IoGid + opts.ShimCgroup = o.ShimCgroup + case *runctypes.RuncOptions: // containerd 1.2.x + root := proc.RunscRoot + if o.RuntimeRoot != "" { + root = o.RuntimeRoot + } + + opts.BinaryName = o.Runtime + + path = filepath.Join(root, configFile) + if _, err := os.Stat(path); err != nil { + if !os.IsNotExist(err) { + return nil, errors.Wrapf(err, "stat config file %q", path) + } + // A config file in runtime root is not required. + path = "" + } + case *runtimeoptions.Options: // containerd 1.3.x+ + if o.ConfigPath == "" { + break + } + if o.TypeUrl != options.OptionType { + return nil, errors.Errorf("unsupported runtimeoptions %q", o.TypeUrl) + } + path = o.ConfigPath + default: + return nil, errors.Errorf("unsupported option type %q", r.Options.TypeUrl) + } + if path != "" { + if _, err = toml.DecodeFile(path, &opts); err != nil { + return nil, errors.Wrapf(err, "decode config file %q", path) + } + } + } + + var mounts []proc.Mount + for _, m := range r.Rootfs { + mounts = append(mounts, proc.Mount{ + Type: m.Type, + Source: m.Source, + Target: m.Target, + Options: m.Options, + }) + } + + rootfs := filepath.Join(r.Bundle, "rootfs") + if err := os.Mkdir(rootfs, 0711); err != nil && !os.IsExist(err) { + return nil, err + } + + config := &proc.CreateConfig{ + ID: r.ID, + Bundle: r.Bundle, + Runtime: opts.BinaryName, + Rootfs: mounts, + Terminal: r.Terminal, + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Options: r.Options, + } + if err := s.writeRuntime(r.Bundle, opts.BinaryName); err != nil { + return nil, err + } + defer func() { + if err != nil { + if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { + logrus.WithError(err2).Warn("failed to cleanup rootfs mount") + } + } + }() + for _, rm := range mounts { + m := &mount.Mount{ + Type: rm.Type, + Source: rm.Source, + Options: rm.Options, + } + if err := m.Mount(rootfs); err != nil { + return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m) + } + } + process, err := newInit( + ctx, + r.Bundle, + filepath.Join(r.Bundle, "work"), + ns, + s.platform, + config, + &opts, + rootfs, + ) + if err != nil { + return nil, errdefs.ToGRPC(err) + } + if err := process.Create(ctx, config); err != nil { + return nil, errdefs.ToGRPC(err) + } + // save the main task id and bundle to the shim for additional requests + s.id = r.ID + s.bundle = r.Bundle + + // Set up OOM notification on the sandbox's cgroup. This is done on sandbox + // create since the sandbox process will be created here. + pid := process.Pid() + if pid > 0 { + cg, err := cgroups.Load(cgroups.V1, cgroups.PidPath(pid)) + if err != nil { + return nil, errors.Wrapf(err, "loading cgroup for %d", pid) + } + if err := s.oomPoller.add(s.id, cg); err != nil { + return nil, errors.Wrapf(err, "add cg to OOM monitor") + } + } + s.task = process + s.opts = opts + return &taskAPI.CreateTaskResponse{ + Pid: uint32(process.Pid()), + }, nil + +} + +// Start a process +func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.StartResponse, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if err := p.Start(ctx); err != nil { + return nil, err + } + // TODO: Set the cgroup and oom notifications on restore. + // https://github.com/google/gvisor-containerd-shim/issues/58 + return &taskAPI.StartResponse{ + Pid: uint32(p.Pid()), + }, nil +} + +// Delete the initial process and container +func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + if err := p.Delete(ctx); err != nil { + return nil, err + } + isTask := r.ExecID == "" + if !isTask { + s.mu.Lock() + delete(s.processes, r.ExecID) + s.mu.Unlock() + } + if isTask && s.platform != nil { + s.platform.Close() + } + return &taskAPI.DeleteResponse{ + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + Pid: uint32(p.Pid()), + }, nil +} + +// Exec an additional process inside the container +func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*ptypes.Empty, error) { + s.mu.Lock() + p := s.processes[r.ExecID] + s.mu.Unlock() + if p != nil { + return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID) + } + p = s.task + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + process, err := p.(*proc.Init).Exec(ctx, s.bundle, &proc.ExecConfig{ + ID: r.ExecID, + Terminal: r.Terminal, + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Spec: r.Spec, + }) + if err != nil { + return nil, errdefs.ToGRPC(err) + } + s.mu.Lock() + s.processes[r.ExecID] = process + s.mu.Unlock() + return empty, nil +} + +// ResizePty of a process +func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (*ptypes.Empty, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + ws := console.WinSize{ + Width: uint16(r.Width), + Height: uint16(r.Height), + } + if err := p.Resize(ws); err != nil { + return nil, errdefs.ToGRPC(err) + } + return empty, nil +} + +// State returns runtime state information for a process +func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.StateResponse, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + st, err := p.Status(ctx) + if err != nil { + return nil, err + } + status := task.StatusUnknown + switch st { + case "created": + status = task.StatusCreated + case "running": + status = task.StatusRunning + case "stopped": + status = task.StatusStopped + } + sio := p.Stdio() + return &taskAPI.StateResponse{ + ID: p.ID(), + Bundle: s.bundle, + Pid: uint32(p.Pid()), + Status: status, + Stdin: sio.Stdin, + Stdout: sio.Stdout, + Stderr: sio.Stderr, + Terminal: sio.Terminal, + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + }, nil +} + +// Pause the container +func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Resume the container +func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Kill a process with the provided signal +func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*ptypes.Empty, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + if err := p.Kill(ctx, r.Signal, r.All); err != nil { + return nil, errdefs.ToGRPC(err) + } + return empty, nil +} + +// Pids returns all pids inside the container +func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.PidsResponse, error) { + pids, err := s.getContainerPids(ctx, r.ID) + if err != nil { + return nil, errdefs.ToGRPC(err) + } + var processes []*task.ProcessInfo + for _, pid := range pids { + pInfo := task.ProcessInfo{ + Pid: pid, + } + for _, p := range s.processes { + if p.Pid() == int(pid) { + d := &runctypes.ProcessDetails{ + ExecID: p.ID(), + } + a, err := typeurl.MarshalAny(d) + if err != nil { + return nil, errors.Wrapf(err, "failed to marshal process %d info", pid) + } + pInfo.Info = a + break + } + } + processes = append(processes, &pInfo) + } + return &taskAPI.PidsResponse{ + Processes: processes, + }, nil +} + +// CloseIO of a process +func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*ptypes.Empty, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if stdin := p.Stdin(); stdin != nil { + if err := stdin.Close(); err != nil { + return nil, errors.Wrap(err, "close stdin") + } + } + return empty, nil +} + +// Checkpoint the container +func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Connect returns shim information such as the shim's pid +func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (*taskAPI.ConnectResponse, error) { + var pid int + if s.task != nil { + pid = s.task.Pid() + } + return &taskAPI.ConnectResponse{ + ShimPid: uint32(os.Getpid()), + TaskPid: uint32(pid), + }, nil +} + +func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*ptypes.Empty, error) { + s.cancel() + os.Exit(0) + return empty, nil +} + +func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) { + path, err := os.Getwd() + if err != nil { + return nil, err + } + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, err + } + runtime, err := s.readRuntime(path) + if err != nil { + return nil, err + } + rs := proc.NewRunsc(s.opts.Root, path, ns, runtime, nil) + stats, err := rs.Stats(ctx, s.id) + if err != nil { + return nil, err + } + + // gvisor currently (as of 2020-03-03) only returns the total memory + // usage and current PID value[0]. However, we copy the common fields here + // so that future updates will propagate correct information. We're + // using the cgroups.Metrics structure so we're returning the same type + // as runc. + // + // [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81 + data, err := typeurl.MarshalAny(&cgroups.Metrics{ + CPU: &cgroups.CPUStat{ + Usage: &cgroups.CPUUsage{ + Total: stats.Cpu.Usage.Total, + Kernel: stats.Cpu.Usage.Kernel, + User: stats.Cpu.Usage.User, + PerCPU: stats.Cpu.Usage.Percpu, + }, + Throttling: &cgroups.Throttle{ + Periods: stats.Cpu.Throttling.Periods, + ThrottledPeriods: stats.Cpu.Throttling.ThrottledPeriods, + ThrottledTime: stats.Cpu.Throttling.ThrottledTime, + }, + }, + Memory: &cgroups.MemoryStat{ + Cache: stats.Memory.Cache, + Usage: &cgroups.MemoryEntry{ + Limit: stats.Memory.Usage.Limit, + Usage: stats.Memory.Usage.Usage, + Max: stats.Memory.Usage.Max, + Failcnt: stats.Memory.Usage.Failcnt, + }, + Swap: &cgroups.MemoryEntry{ + Limit: stats.Memory.Swap.Limit, + Usage: stats.Memory.Swap.Usage, + Max: stats.Memory.Swap.Max, + Failcnt: stats.Memory.Swap.Failcnt, + }, + Kernel: &cgroups.MemoryEntry{ + Limit: stats.Memory.Kernel.Limit, + Usage: stats.Memory.Kernel.Usage, + Max: stats.Memory.Kernel.Max, + Failcnt: stats.Memory.Kernel.Failcnt, + }, + KernelTCP: &cgroups.MemoryEntry{ + Limit: stats.Memory.KernelTCP.Limit, + Usage: stats.Memory.KernelTCP.Usage, + Max: stats.Memory.KernelTCP.Max, + Failcnt: stats.Memory.KernelTCP.Failcnt, + }, + }, + Pids: &cgroups.PidsStat{ + Current: stats.Pids.Current, + Limit: stats.Pids.Limit, + }, + }) + if err != nil { + return nil, err + } + return &taskAPI.StatsResponse{ + Stats: data, + }, nil +} + +// Update a running container +func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Wait for a process to exit +func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.WaitResponse, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + p.Wait() + + return &taskAPI.WaitResponse{ + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + }, nil +} + +func (s *service) processExits() { + for e := range s.ec { + s.checkProcesses(e) + } +} + +func (s *service) checkProcesses(e proc.Exit) { + // TODO(random-liu): Add `shouldKillAll` logic if container pid + // namespace is supported. + for _, p := range s.allProcesses() { + if p.ID() == e.ID { + if ip, ok := p.(*proc.Init); ok { + // Ensure all children are killed + if err := ip.KillAll(s.context); err != nil { + log.G(s.context).WithError(err).WithField("id", ip.ID()). + Error("failed to kill init's children") + } + } + p.SetExited(e.Status) + s.events <- &eventstypes.TaskExit{ + ContainerID: s.id, + ID: p.ID(), + Pid: uint32(p.Pid()), + ExitStatus: uint32(e.Status), + ExitedAt: p.ExitedAt(), + } + return + } + } +} + +func (s *service) allProcesses() (o []rproc.Process) { + s.mu.Lock() + defer s.mu.Unlock() + for _, p := range s.processes { + o = append(o, p) + } + if s.task != nil { + o = append(o, s.task) + } + return o +} + +func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { + s.mu.Lock() + p := s.task + s.mu.Unlock() + if p == nil { + return nil, errors.Wrapf(errdefs.ErrFailedPrecondition, "container must be created") + } + ps, err := p.(*proc.Init).Runtime().Ps(ctx, id) + if err != nil { + return nil, err + } + pids := make([]uint32, 0, len(ps)) + for _, pid := range ps { + pids = append(pids, uint32(pid)) + } + return pids, nil +} + +func (s *service) forward(publisher events.Publisher) { + for e := range s.events { + ctx, cancel := context.WithTimeout(s.context, 5*time.Second) + err := publisher.Publish(ctx, getTopic(e), e) + cancel() + if err != nil { + logrus.WithError(err).Error("post event") + } + } +} + +func (s *service) getProcess(execID string) (rproc.Process, error) { + s.mu.Lock() + defer s.mu.Unlock() + if execID == "" { + return s.task, nil + } + p := s.processes[execID] + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID) + } + return p, nil +} + +func getTopic(e interface{}) string { + switch e.(type) { + case *eventstypes.TaskCreate: + return runtime.TaskCreateEventTopic + case *eventstypes.TaskStart: + return runtime.TaskStartEventTopic + case *eventstypes.TaskOOM: + return runtime.TaskOOMEventTopic + case *eventstypes.TaskExit: + return runtime.TaskExitEventTopic + case *eventstypes.TaskDelete: + return runtime.TaskDeleteEventTopic + case *eventstypes.TaskExecAdded: + return runtime.TaskExecAddedEventTopic + case *eventstypes.TaskExecStarted: + return runtime.TaskExecStartedEventTopic + default: + logrus.Warnf("no topic for type %#v", e) + } + return runtime.TaskUnknownTopic +} + +func newInit(ctx context.Context, path, workDir, namespace string, platform rproc.Platform, r *proc.CreateConfig, options *options.Options, rootfs string) (*proc.Init, error) { + spec, err := utils.ReadSpec(r.Bundle) + if err != nil { + return nil, errors.Wrap(err, "read oci spec") + } + if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { + return nil, errors.Wrap(err, "update volume annotations") + } + runsc.FormatLogPath(r.ID, options.RunscConfig) + runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig) + p := proc.New(r.ID, runtime, rproc.Stdio{ + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Terminal: r.Terminal, + }) + p.Bundle = r.Bundle + p.Platform = platform + p.Rootfs = rootfs + p.WorkDir = workDir + p.IoUID = int(options.IoUid) + p.IoGID = int(options.IoGid) + p.Sandbox = utils.IsSandbox(spec) + p.UserLog = utils.UserLogPath(spec) + p.Monitor = shim.Default + return p, nil +} diff --git a/pkg/shim/v2/service_linux.go b/pkg/shim/v2/service_linux.go new file mode 100644 index 000000000..cd259cd44 --- /dev/null +++ b/pkg/shim/v2/service_linux.go @@ -0,0 +1,112 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package v2 + +import ( + "context" + "io" + "sync" + "syscall" + + "github.com/containerd/console" + "github.com/containerd/fifo" + "github.com/pkg/errors" +) + +type linuxPlatform struct { + epoller *console.Epoller +} + +func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) (console.Console, error) { + if p.epoller == nil { + return nil, errors.New("uninitialized epoller") + } + + epollConsole, err := p.epoller.Add(console) + if err != nil { + return nil, err + } + + if stdin != "" { + in, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) + if err != nil { + return nil, err + } + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + io.CopyBuffer(epollConsole, in, *p) + }() + } + + outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0) + if err != nil { + return nil, err + } + outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0) + if err != nil { + return nil, err + } + wg.Add(1) + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + io.CopyBuffer(outw, epollConsole, *p) + epollConsole.Close() + outr.Close() + outw.Close() + wg.Done() + }() + return epollConsole, nil +} + +func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error { + if p.epoller == nil { + return errors.New("uninitialized epoller") + } + epollConsole, ok := cons.(*console.EpollConsole) + if !ok { + return errors.Errorf("expected EpollConsole, got %#v", cons) + } + return epollConsole.Shutdown(p.epoller.CloseConsole) +} + +func (p *linuxPlatform) Close() error { + return p.epoller.Close() +} + +// initialize a single epoll fd to manage our consoles. `initPlatform` should +// only be called once. +func (s *service) initPlatform() error { + if s.platform != nil { + return nil + } + epoller, err := console.NewEpoller() + if err != nil { + return errors.Wrap(err, "failed to initialize epoller") + } + s.platform = &linuxPlatform{ + epoller: epoller, + } + go epoller.Wait() + return nil +} diff --git a/pkg/v1/proc/deleted_state.go b/pkg/v1/proc/deleted_state.go deleted file mode 100644 index b0233841d..000000000 --- a/pkg/v1/proc/deleted_state.go +++ /dev/null @@ -1,54 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "context" - - "github.com/containerd/console" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/runtime/proc" - "github.com/pkg/errors" -) - -type deletedState struct { -} - -func (s *deletedState) Resize(ws console.WinSize) error { - return errors.Errorf("cannot resize a deleted process") -} - -func (s *deletedState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a deleted process") -} - -func (s *deletedState) Delete(ctx context.Context) error { - return errors.Wrap(errdefs.ErrNotFound, "cannot delete a deleted process") -} - -func (s *deletedState) Kill(ctx context.Context, sig uint32, all bool) error { - return errors.Wrap(errdefs.ErrNotFound, "cannot kill a deleted process") -} - -func (s *deletedState) SetExited(status int) { - // no op -} - -func (s *deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - return nil, errors.Errorf("cannot exec in a deleted state") -} diff --git a/pkg/v1/proc/exec.go b/pkg/v1/proc/exec.go deleted file mode 100644 index f02b73bb2..000000000 --- a/pkg/v1/proc/exec.go +++ /dev/null @@ -1,284 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "context" - "fmt" - "io" - "os" - "path/filepath" - "sync" - "syscall" - "time" - - "github.com/containerd/console" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/runtime/proc" - "github.com/containerd/fifo" - runc "github.com/containerd/go-runc" - specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" - "golang.org/x/sys/unix" - - runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" -) - -type execProcess struct { - wg sync.WaitGroup - - execState execState - - mu sync.Mutex - id string - console console.Console - io runc.IO - status int - exited time.Time - pid int - internalPid int - closers []io.Closer - stdin io.Closer - stdio proc.Stdio - path string - spec specs.Process - - parent *Init - waitBlock chan struct{} -} - -func (e *execProcess) Wait() { - <-e.waitBlock -} - -func (e *execProcess) ID() string { - return e.id -} - -func (e *execProcess) Pid() int { - e.mu.Lock() - defer e.mu.Unlock() - return e.pid -} - -func (e *execProcess) ExitStatus() int { - e.mu.Lock() - defer e.mu.Unlock() - return e.status -} - -func (e *execProcess) ExitedAt() time.Time { - e.mu.Lock() - defer e.mu.Unlock() - return e.exited -} - -func (e *execProcess) SetExited(status int) { - e.mu.Lock() - defer e.mu.Unlock() - - e.execState.SetExited(status) -} - -func (e *execProcess) setExited(status int) { - e.status = status - e.exited = time.Now() - e.parent.Platform.ShutdownConsole(context.Background(), e.console) - close(e.waitBlock) -} - -func (e *execProcess) Delete(ctx context.Context) error { - e.mu.Lock() - defer e.mu.Unlock() - - return e.execState.Delete(ctx) -} - -func (e *execProcess) delete(ctx context.Context) error { - e.wg.Wait() - if e.io != nil { - for _, c := range e.closers { - c.Close() - } - e.io.Close() - } - pidfile := filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) - // silently ignore error - os.Remove(pidfile) - internalPidfile := filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) - // silently ignore error - os.Remove(internalPidfile) - return nil -} - -func (e *execProcess) Resize(ws console.WinSize) error { - e.mu.Lock() - defer e.mu.Unlock() - - return e.execState.Resize(ws) -} - -func (e *execProcess) resize(ws console.WinSize) error { - if e.console == nil { - return nil - } - return e.console.Resize(ws) -} - -func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error { - e.mu.Lock() - defer e.mu.Unlock() - - return e.execState.Kill(ctx, sig, false) -} - -func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error { - internalPid := e.internalPid - if internalPid != 0 { - if err := e.parent.runtime.Kill(ctx, e.parent.id, int(sig), &runsc.KillOpts{ - Pid: internalPid, - }); err != nil { - // If this returns error, consider the process has already stopped. - // TODO: Fix after signal handling is fixed. - return errors.Wrapf(errdefs.ErrNotFound, err.Error()) - } - } - return nil -} - -func (e *execProcess) Stdin() io.Closer { - return e.stdin -} - -func (e *execProcess) Stdio() proc.Stdio { - return e.stdio -} - -func (e *execProcess) Start(ctx context.Context) error { - e.mu.Lock() - defer e.mu.Unlock() - - return e.execState.Start(ctx) -} - -func (e *execProcess) start(ctx context.Context) (err error) { - var ( - socket *runc.Socket - pidfile = filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) - internalPidfile = filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) - ) - if e.stdio.Terminal { - if socket, err = runc.NewTempConsoleSocket(); err != nil { - return errors.Wrap(err, "failed to create runc console socket") - } - defer socket.Close() - } else if e.stdio.IsNull() { - if e.io, err = runc.NewNullIO(); err != nil { - return errors.Wrap(err, "creating new NULL IO") - } - } else { - if e.io, err = runc.NewPipeIO(e.parent.IoUID, e.parent.IoGID, withConditionalIO(e.stdio)); err != nil { - return errors.Wrap(err, "failed to create runc io pipes") - } - } - opts := &runsc.ExecOpts{ - PidFile: pidfile, - InternalPidFile: internalPidfile, - IO: e.io, - Detach: true, - } - if socket != nil { - opts.ConsoleSocket = socket - } - eventCh := e.parent.Monitor.Subscribe() - defer func() { - // Unsubscribe if an error is returned. - if err != nil { - e.parent.Monitor.Unsubscribe(eventCh) - } - }() - if err := e.parent.runtime.Exec(ctx, e.parent.id, e.spec, opts); err != nil { - close(e.waitBlock) - return e.parent.runtimeError(err, "OCI runtime exec failed") - } - if e.stdio.Stdin != "" { - sc, err := fifo.OpenFifo(context.Background(), e.stdio.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) - if err != nil { - return errors.Wrapf(err, "failed to open stdin fifo %s", e.stdio.Stdin) - } - e.closers = append(e.closers, sc) - e.stdin = sc - } - var copyWaitGroup sync.WaitGroup - ctx, cancel := context.WithTimeout(ctx, 30*time.Second) - defer cancel() - if socket != nil { - console, err := socket.ReceiveMaster() - if err != nil { - return errors.Wrap(err, "failed to retrieve console master") - } - if e.console, err = e.parent.Platform.CopyConsole(ctx, console, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, ©WaitGroup); err != nil { - return errors.Wrap(err, "failed to start console copy") - } - } else if !e.stdio.IsNull() { - if err := copyPipes(ctx, e.io, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, ©WaitGroup); err != nil { - return errors.Wrap(err, "failed to start io pipe copy") - } - } - copyWaitGroup.Wait() - pid, err := runc.ReadPidFile(opts.PidFile) - if err != nil { - return errors.Wrap(err, "failed to retrieve OCI runtime exec pid") - } - e.pid = pid - internalPid, err := runc.ReadPidFile(opts.InternalPidFile) - if err != nil { - return errors.Wrap(err, "failed to retrieve OCI runtime exec internal pid") - } - e.internalPid = internalPid - go func() { - defer e.parent.Monitor.Unsubscribe(eventCh) - for event := range eventCh { - if event.Pid == e.pid { - ExitCh <- Exit{ - Timestamp: event.Timestamp, - ID: e.id, - Status: event.Status, - } - break - } - } - }() - return nil -} - -func (e *execProcess) Status(ctx context.Context) (string, error) { - e.mu.Lock() - defer e.mu.Unlock() - // if we don't have a pid then the exec process has just been created - if e.pid == 0 { - return "created", nil - } - // if we have a pid and it can be signaled, the process is running - // TODO(random-liu): Use `runsc kill --pid`. - if err := unix.Kill(e.pid, 0); err == nil { - return "running", nil - } - // else if we have a pid but it can nolonger be signaled, it has stopped - return "stopped", nil -} diff --git a/pkg/v1/proc/exec_state.go b/pkg/v1/proc/exec_state.go deleted file mode 100644 index e10954670..000000000 --- a/pkg/v1/proc/exec_state.go +++ /dev/null @@ -1,156 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "context" - - "github.com/containerd/console" - "github.com/pkg/errors" -) - -type execState interface { - Resize(console.WinSize) error - Start(context.Context) error - Delete(context.Context) error - Kill(context.Context, uint32, bool) error - SetExited(int) -} - -type execCreatedState struct { - p *execProcess -} - -func (s *execCreatedState) transition(name string) error { - switch name { - case "running": - s.p.execState = &execRunningState{p: s.p} - case "stopped": - s.p.execState = &execStoppedState{p: s.p} - case "deleted": - s.p.execState = &deletedState{} - default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *execCreatedState) Resize(ws console.WinSize) error { - return s.p.resize(ws) -} - -func (s *execCreatedState) Start(ctx context.Context) error { - if err := s.p.start(ctx); err != nil { - return err - } - return s.transition("running") -} - -func (s *execCreatedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } - return s.transition("deleted") -} - -func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *execCreatedState) SetExited(status int) { - s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } -} - -type execRunningState struct { - p *execProcess -} - -func (s *execRunningState) transition(name string) error { - switch name { - case "stopped": - s.p.execState = &execStoppedState{p: s.p} - default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *execRunningState) Resize(ws console.WinSize) error { - return s.p.resize(ws) -} - -func (s *execRunningState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a running process") -} - -func (s *execRunningState) Delete(ctx context.Context) error { - return errors.Errorf("cannot delete a running process") -} - -func (s *execRunningState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *execRunningState) SetExited(status int) { - s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } -} - -type execStoppedState struct { - p *execProcess -} - -func (s *execStoppedState) transition(name string) error { - switch name { - case "deleted": - s.p.execState = &deletedState{} - default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *execStoppedState) Resize(ws console.WinSize) error { - return errors.Errorf("cannot resize a stopped container") -} - -func (s *execStoppedState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a stopped process") -} - -func (s *execStoppedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } - return s.transition("deleted") -} - -func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *execStoppedState) SetExited(status int) { - // no op -} diff --git a/pkg/v1/proc/init.go b/pkg/v1/proc/init.go deleted file mode 100644 index 5dbb1daab..000000000 --- a/pkg/v1/proc/init.go +++ /dev/null @@ -1,464 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "context" - "encoding/json" - "io" - "path/filepath" - "strings" - "sync" - "syscall" - "time" - - "github.com/containerd/console" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/log" - "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/runtime/proc" - "github.com/containerd/fifo" - runc "github.com/containerd/go-runc" - specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" - - runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" -) - -// InitPidFile name of the file that contains the init pid -const InitPidFile = "init.pid" - -// Init represents an initial process for a container -type Init struct { - wg sync.WaitGroup - initState initState - - // mu is used to ensure that `Start()` and `Exited()` calls return in - // the right order when invoked in separate go routines. - // This is the case within the shim implementation as it makes use of - // the reaper interface. - mu sync.Mutex - - waitBlock chan struct{} - - WorkDir string - - id string - Bundle string - console console.Console - Platform proc.Platform - io runc.IO - runtime *runsc.Runsc - status int - exited time.Time - pid int - closers []io.Closer - stdin io.Closer - stdio proc.Stdio - Rootfs string - IoUID int - IoGID int - Sandbox bool - UserLog string - Monitor ProcessMonitor -} - -// NewRunsc returns a new runsc instance for a process -func NewRunsc(root, path, namespace, runtime string, config map[string]string) *runsc.Runsc { - if root == "" { - root = RunscRoot - } - return &runsc.Runsc{ - Command: runtime, - PdeathSignal: syscall.SIGKILL, - Log: filepath.Join(path, "log.json"), - LogFormat: runc.JSON, - Root: filepath.Join(root, namespace), - Config: config, - } -} - -// New returns a new init process -func New(id string, runtime *runsc.Runsc, stdio proc.Stdio) *Init { - p := &Init{ - id: id, - runtime: runtime, - stdio: stdio, - status: 0, - waitBlock: make(chan struct{}), - } - p.initState = &createdState{p: p} - return p -} - -// Create the process with the provided config -func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) { - var socket *runc.Socket - if r.Terminal { - if socket, err = runc.NewTempConsoleSocket(); err != nil { - return errors.Wrap(err, "failed to create OCI runtime console socket") - } - defer socket.Close() - } else if hasNoIO(r) { - if p.io, err = runc.NewNullIO(); err != nil { - return errors.Wrap(err, "creating new NULL IO") - } - } else { - if p.io, err = runc.NewPipeIO(p.IoUID, p.IoGID, withConditionalIO(p.stdio)); err != nil { - return errors.Wrap(err, "failed to create OCI runtime io pipes") - } - } - pidFile := filepath.Join(p.Bundle, InitPidFile) - opts := &runsc.CreateOpts{ - PidFile: pidFile, - } - if socket != nil { - opts.ConsoleSocket = socket - } - if p.Sandbox { - opts.IO = p.io - // UserLog is only useful for sandbox. - opts.UserLog = p.UserLog - } - if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil { - return p.runtimeError(err, "OCI runtime create failed") - } - if r.Stdin != "" { - sc, err := fifo.OpenFifo(context.Background(), r.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) - if err != nil { - return errors.Wrapf(err, "failed to open stdin fifo %s", r.Stdin) - } - p.stdin = sc - p.closers = append(p.closers, sc) - } - var copyWaitGroup sync.WaitGroup - ctx, cancel := context.WithTimeout(ctx, 30*time.Second) - defer cancel() - if socket != nil { - console, err := socket.ReceiveMaster() - if err != nil { - return errors.Wrap(err, "failed to retrieve console master") - } - console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg, ©WaitGroup) - if err != nil { - return errors.Wrap(err, "failed to start console copy") - } - p.console = console - } else if !hasNoIO(r) { - if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg, ©WaitGroup); err != nil { - return errors.Wrap(err, "failed to start io pipe copy") - } - } - - copyWaitGroup.Wait() - pid, err := runc.ReadPidFile(pidFile) - if err != nil { - return errors.Wrap(err, "failed to retrieve OCI runtime container pid") - } - p.pid = pid - return nil -} - -// Wait for the process to exit -func (p *Init) Wait() { - <-p.waitBlock -} - -// ID of the process -func (p *Init) ID() string { - return p.id -} - -// Pid of the process -func (p *Init) Pid() int { - return p.pid -} - -// ExitStatus of the process -func (p *Init) ExitStatus() int { - p.mu.Lock() - defer p.mu.Unlock() - return p.status -} - -// ExitedAt at time when the process exited -func (p *Init) ExitedAt() time.Time { - p.mu.Lock() - defer p.mu.Unlock() - return p.exited -} - -// Status of the process -func (p *Init) Status(ctx context.Context) (string, error) { - p.mu.Lock() - defer p.mu.Unlock() - c, err := p.runtime.State(ctx, p.id) - if err != nil { - if strings.Contains(err.Error(), "does not exist") { - return "stopped", nil - } - return "", p.runtimeError(err, "OCI runtime state failed") - } - return p.convertStatus(c.Status), nil -} - -// Start the init process -func (p *Init) Start(ctx context.Context) error { - p.mu.Lock() - defer p.mu.Unlock() - - return p.initState.Start(ctx) -} - -func (p *Init) start(ctx context.Context) error { - var cio runc.IO - if !p.Sandbox { - cio = p.io - } - if err := p.runtime.Start(ctx, p.id, cio); err != nil { - return p.runtimeError(err, "OCI runtime start failed") - } - go func() { - status, err := p.runtime.Wait(context.Background(), p.id) - if err != nil { - log.G(ctx).WithError(err).Errorf("Failed to wait for container %q", p.id) - // TODO(random-liu): Handle runsc kill error. - if err := p.killAll(ctx); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to kill container %q", p.id) - } - status = internalErrorCode - } - ExitCh <- Exit{ - Timestamp: time.Now(), - ID: p.id, - Status: status, - } - }() - return nil -} - -// SetExited of the init process with the next status -func (p *Init) SetExited(status int) { - p.mu.Lock() - defer p.mu.Unlock() - - p.initState.SetExited(status) -} - -func (p *Init) setExited(status int) { - p.exited = time.Now() - p.status = status - p.Platform.ShutdownConsole(context.Background(), p.console) - close(p.waitBlock) -} - -// Delete the init process -func (p *Init) Delete(ctx context.Context) error { - p.mu.Lock() - defer p.mu.Unlock() - - return p.initState.Delete(ctx) -} - -func (p *Init) delete(ctx context.Context) error { - p.killAll(ctx) - p.wg.Wait() - err := p.runtime.Delete(ctx, p.id, nil) - // ignore errors if a runtime has already deleted the process - // but we still hold metadata and pipes - // - // this is common during a checkpoint, runc will delete the container state - // after a checkpoint and the container will no longer exist within runc - if err != nil { - if strings.Contains(err.Error(), "does not exist") { - err = nil - } else { - err = p.runtimeError(err, "failed to delete task") - } - } - if p.io != nil { - for _, c := range p.closers { - c.Close() - } - p.io.Close() - } - if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil { - log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount") - if err == nil { - err = errors.Wrap(err2, "failed rootfs umount") - } - } - return err -} - -// Resize the init processes console -func (p *Init) Resize(ws console.WinSize) error { - p.mu.Lock() - defer p.mu.Unlock() - - if p.console == nil { - return nil - } - return p.console.Resize(ws) -} - -func (p *Init) resize(ws console.WinSize) error { - if p.console == nil { - return nil - } - return p.console.Resize(ws) -} - -// Kill the init process -func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error { - p.mu.Lock() - defer p.mu.Unlock() - - return p.initState.Kill(ctx, signal, all) -} - -func (p *Init) kill(context context.Context, signal uint32, all bool) error { - var ( - killErr error - backoff = 100 * time.Millisecond - ) - timeout := 1 * time.Second - for start := time.Now(); time.Now().Sub(start) < timeout; { - c, err := p.runtime.State(context, p.id) - if err != nil { - if strings.Contains(err.Error(), "does not exist") { - return errors.Wrapf(errdefs.ErrNotFound, "no such process") - } - return p.runtimeError(err, "OCI runtime state failed") - } - // For runsc, signal only works when container is running state. - // If the container is not in running state, directly return - // "no such process" - if p.convertStatus(c.Status) == "stopped" { - return errors.Wrapf(errdefs.ErrNotFound, "no such process") - } - killErr = p.runtime.Kill(context, p.id, int(signal), &runsc.KillOpts{ - All: all, - }) - if killErr == nil { - return nil - } - time.Sleep(backoff) - backoff *= 2 - } - return p.runtimeError(killErr, "kill timeout") -} - -// KillAll processes belonging to the init process -func (p *Init) KillAll(context context.Context) error { - p.mu.Lock() - defer p.mu.Unlock() - return p.killAll(context) -} - -func (p *Init) killAll(context context.Context) error { - p.runtime.Kill(context, p.id, int(syscall.SIGKILL), &runsc.KillOpts{ - All: true, - }) - // Ignore error handling for `runsc kill --all` for now. - // * If it doesn't return error, it is good; - // * If it returns error, consider the container has already stopped. - // TODO: Fix `runsc kill --all` error handling. - return nil -} - -// Stdin of the process -func (p *Init) Stdin() io.Closer { - return p.stdin -} - -// Runtime returns the OCI runtime configured for the init process -func (p *Init) Runtime() *runsc.Runsc { - return p.runtime -} - -// Exec returns a new child process -func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - p.mu.Lock() - defer p.mu.Unlock() - - return p.initState.Exec(ctx, path, r) -} - -// exec returns a new exec'd process -func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - // process exec request - var spec specs.Process - if err := json.Unmarshal(r.Spec.Value, &spec); err != nil { - return nil, err - } - spec.Terminal = r.Terminal - - e := &execProcess{ - id: r.ID, - path: path, - parent: p, - spec: spec, - stdio: proc.Stdio{ - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Terminal: r.Terminal, - }, - waitBlock: make(chan struct{}), - } - e.execState = &execCreatedState{p: e} - return e, nil -} - -// Stdio of the process -func (p *Init) Stdio() proc.Stdio { - return p.stdio -} - -func (p *Init) runtimeError(rErr error, msg string) error { - if rErr == nil { - return nil - } - - rMsg, err := getLastRuntimeError(p.runtime) - switch { - case err != nil: - return errors.Wrapf(rErr, "%s: %s (%s)", msg, "unable to retrieve OCI runtime error", err.Error()) - case rMsg == "": - return errors.Wrap(rErr, msg) - default: - return errors.Errorf("%s: %s", msg, rMsg) - } -} - -func (p *Init) convertStatus(status string) string { - if status == "created" && !p.Sandbox && p.status == internalErrorCode { - // Treat start failure state for non-root container as stopped. - return "stopped" - } - return status -} - -func withConditionalIO(c proc.Stdio) runc.IOOpt { - return func(o *runc.IOOption) { - o.OpenStdin = c.Stdin != "" - o.OpenStdout = c.Stdout != "" - o.OpenStderr = c.Stderr != "" - } -} diff --git a/pkg/v1/proc/init_state.go b/pkg/v1/proc/init_state.go deleted file mode 100644 index f56f6fe28..000000000 --- a/pkg/v1/proc/init_state.go +++ /dev/null @@ -1,184 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "context" - - "github.com/containerd/console" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/runtime/proc" - "github.com/pkg/errors" -) - -type initState interface { - Resize(console.WinSize) error - Start(context.Context) error - Delete(context.Context) error - Exec(context.Context, string, *ExecConfig) (proc.Process, error) - Kill(context.Context, uint32, bool) error - SetExited(int) -} - -type createdState struct { - p *Init -} - -func (s *createdState) transition(name string) error { - switch name { - case "running": - s.p.initState = &runningState{p: s.p} - case "stopped": - s.p.initState = &stoppedState{p: s.p} - case "deleted": - s.p.initState = &deletedState{} - default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *createdState) Resize(ws console.WinSize) error { - return s.p.resize(ws) -} - -func (s *createdState) Start(ctx context.Context) error { - if err := s.p.start(ctx); err != nil { - // Containerd doesn't allow deleting container in created state. - // However, for gvisor, a non-root container in created state can - // only go to running state. If the container can't be started, - // it can only stay in created state, and never be deleted. - // To work around that, we treat non-root container in start failure - // state as stopped. - if !s.p.Sandbox { - s.p.io.Close() - s.p.setExited(internalErrorCode) - if err := s.transition("stopped"); err != nil { - panic(err) - } - } - return err - } - return s.transition("running") -} - -func (s *createdState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } - return s.transition("deleted") -} - -func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *createdState) SetExited(status int) { - s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } -} - -func (s *createdState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - return s.p.exec(ctx, path, r) -} - -type runningState struct { - p *Init -} - -func (s *runningState) transition(name string) error { - switch name { - case "stopped": - s.p.initState = &stoppedState{p: s.p} - default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *runningState) Resize(ws console.WinSize) error { - return s.p.resize(ws) -} - -func (s *runningState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a running process") -} - -func (s *runningState) Delete(ctx context.Context) error { - return errors.Errorf("cannot delete a running process") -} - -func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *runningState) SetExited(status int) { - s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } -} - -func (s *runningState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - return s.p.exec(ctx, path, r) -} - -type stoppedState struct { - p *Init -} - -func (s *stoppedState) transition(name string) error { - switch name { - case "deleted": - s.p.initState = &deletedState{} - default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *stoppedState) Resize(ws console.WinSize) error { - return errors.Errorf("cannot resize a stopped container") -} - -func (s *stoppedState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a stopped process") -} - -func (s *stoppedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } - return s.transition("deleted") -} - -func (s *stoppedState) Kill(ctx context.Context, sig uint32, all bool) error { - return errdefs.ToGRPCf(errdefs.ErrNotFound, "process %s not found", s.p.id) -} - -func (s *stoppedState) SetExited(status int) { - // no op -} - -func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - return nil, errors.Errorf("cannot exec in a stopped state") -} diff --git a/pkg/v1/proc/io.go b/pkg/v1/proc/io.go deleted file mode 100644 index 4afa94cf2..000000000 --- a/pkg/v1/proc/io.go +++ /dev/null @@ -1,169 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "context" - "fmt" - "io" - "os" - "sync" - "sync/atomic" - "syscall" - - "github.com/containerd/containerd/log" - "github.com/containerd/fifo" - runc "github.com/containerd/go-runc" -) - -// TODO(random-liu): This file can be a util. - -var bufPool = sync.Pool{ - New: func() interface{} { - buffer := make([]byte, 32<<10) - return &buffer - }, -} - -func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) error { - var sameFile *countingWriteCloser - for _, i := range []struct { - name string - dest func(wc io.WriteCloser, rc io.Closer) - }{ - { - name: stdout, - dest: func(wc io.WriteCloser, rc io.Closer) { - wg.Add(1) - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - if _, err := io.CopyBuffer(wc, rio.Stdout(), *p); err != nil { - log.G(ctx).Warn("error copying stdout") - } - wg.Done() - wc.Close() - if rc != nil { - rc.Close() - } - }() - }, - }, { - name: stderr, - dest: func(wc io.WriteCloser, rc io.Closer) { - wg.Add(1) - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - if _, err := io.CopyBuffer(wc, rio.Stderr(), *p); err != nil { - log.G(ctx).Warn("error copying stderr") - } - wg.Done() - wc.Close() - if rc != nil { - rc.Close() - } - }() - }, - }, - } { - ok, err := isFifo(i.name) - if err != nil { - return err - } - var ( - fw io.WriteCloser - fr io.Closer - ) - if ok { - if fw, err = fifo.OpenFifo(ctx, i.name, syscall.O_WRONLY, 0); err != nil { - return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) - } - if fr, err = fifo.OpenFifo(ctx, i.name, syscall.O_RDONLY, 0); err != nil { - return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) - } - } else { - if sameFile != nil { - sameFile.count++ - i.dest(sameFile, nil) - continue - } - if fw, err = os.OpenFile(i.name, syscall.O_WRONLY|syscall.O_APPEND, 0); err != nil { - return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) - } - if stdout == stderr { - sameFile = &countingWriteCloser{ - WriteCloser: fw, - count: 1, - } - } - } - i.dest(fw, fr) - } - if stdin == "" { - return nil - } - f, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) - if err != nil { - return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", stdin, err) - } - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - - io.CopyBuffer(rio.Stdin(), f, *p) - rio.Stdin().Close() - f.Close() - }() - return nil -} - -// countingWriteCloser masks io.Closer() until close has been invoked a certain number of times. -type countingWriteCloser struct { - io.WriteCloser - count int64 -} - -func (c *countingWriteCloser) Close() error { - if atomic.AddInt64(&c.count, -1) > 0 { - return nil - } - return c.WriteCloser.Close() -} - -// isFifo checks if a file is a fifo -// if the file does not exist then it returns false -func isFifo(path string) (bool, error) { - stat, err := os.Stat(path) - if err != nil { - if os.IsNotExist(err) { - return false, nil - } - return false, err - } - if stat.Mode()&os.ModeNamedPipe == os.ModeNamedPipe { - return true, nil - } - return false, nil -} diff --git a/pkg/v1/proc/process.go b/pkg/v1/proc/process.go deleted file mode 100644 index 7dbcd823d..000000000 --- a/pkg/v1/proc/process.go +++ /dev/null @@ -1,39 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "github.com/pkg/errors" -) - -// RunscRoot is the path to the root runsc state directory -const RunscRoot = "/run/containerd/runsc" - -func stateName(v interface{}) string { - switch v.(type) { - case *runningState, *execRunningState: - return "running" - case *createdState, *execCreatedState: - return "created" - case *deletedState: - return "deleted" - case *stoppedState: - return "stopped" - } - panic(errors.Errorf("invalid state %v", v)) -} diff --git a/pkg/v1/proc/types.go b/pkg/v1/proc/types.go deleted file mode 100644 index 1d7c8ade3..000000000 --- a/pkg/v1/proc/types.go +++ /dev/null @@ -1,72 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "time" - - google_protobuf "github.com/gogo/protobuf/types" - - runc "github.com/containerd/go-runc" -) - -// Mount holds filesystem mount configuration -type Mount struct { - Type string - Source string - Target string - Options []string -} - -// CreateConfig hold task creation configuration -type CreateConfig struct { - ID string - Bundle string - Runtime string - Rootfs []Mount - Terminal bool - Stdin string - Stdout string - Stderr string - Options *google_protobuf.Any -} - -// ExecConfig holds exec creation configuration -type ExecConfig struct { - ID string - Terminal bool - Stdin string - Stdout string - Stderr string - Spec *google_protobuf.Any -} - -// Exit is the type of exit events -type Exit struct { - Timestamp time.Time - ID string - Status int -} - -// ProcessMonitor monitors process exit changes -type ProcessMonitor interface { - // Subscribe to process exit changes - Subscribe() chan runc.Exit - // Unsubscribe to process exit changes - Unsubscribe(c chan runc.Exit) -} diff --git a/pkg/v1/proc/utils.go b/pkg/v1/proc/utils.go deleted file mode 100644 index e770a6810..000000000 --- a/pkg/v1/proc/utils.go +++ /dev/null @@ -1,94 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "encoding/json" - "io" - "os" - "strings" - "time" - - runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" -) - -const ( - internalErrorCode = 128 - bufferSize = 32 -) - -// ExitCh is the exit events channel for containers and exec processes -// inside the sandbox. -var ExitCh = make(chan Exit, bufferSize) - -// TODO(random-liu): This can be a utility. - -// TODO(mlaventure): move to runc package? -func getLastRuntimeError(r *runsc.Runsc) (string, error) { - if r.Log == "" { - return "", nil - } - - f, err := os.OpenFile(r.Log, os.O_RDONLY, 0400) - if err != nil { - return "", err - } - - var ( - errMsg string - log struct { - Level string - Msg string - Time time.Time - } - ) - - dec := json.NewDecoder(f) - for err = nil; err == nil; { - if err = dec.Decode(&log); err != nil && err != io.EOF { - return "", err - } - if log.Level == "error" { - errMsg = strings.TrimSpace(log.Msg) - } - } - - return errMsg, nil -} - -func copyFile(to, from string) error { - ff, err := os.Open(from) - if err != nil { - return err - } - defer ff.Close() - tt, err := os.Create(to) - if err != nil { - return err - } - defer tt.Close() - - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - _, err = io.CopyBuffer(tt, ff, *p) - return err -} - -func hasNoIO(r *CreateConfig) bool { - return r.Stdin == "" && r.Stdout == "" && r.Stderr == "" -} diff --git a/pkg/v1/shim/platform.go b/pkg/v1/shim/platform.go deleted file mode 100644 index 10c54958f..000000000 --- a/pkg/v1/shim/platform.go +++ /dev/null @@ -1,112 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package shim - -import ( - "context" - "io" - "sync" - "syscall" - - "github.com/containerd/console" - "github.com/containerd/fifo" - "github.com/pkg/errors" -) - -type linuxPlatform struct { - epoller *console.Epoller -} - -func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) (console.Console, error) { - if p.epoller == nil { - return nil, errors.New("uninitialized epoller") - } - - epollConsole, err := p.epoller.Add(console) - if err != nil { - return nil, err - } - - if stdin != "" { - in, err := fifo.OpenFifo(ctx, stdin, syscall.O_RDONLY, 0) - if err != nil { - return nil, err - } - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - io.CopyBuffer(epollConsole, in, *p) - }() - } - - outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0) - if err != nil { - return nil, err - } - outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0) - if err != nil { - return nil, err - } - wg.Add(1) - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - io.CopyBuffer(outw, epollConsole, *p) - epollConsole.Close() - outr.Close() - outw.Close() - wg.Done() - }() - return epollConsole, nil -} - -func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error { - if p.epoller == nil { - return errors.New("uninitialized epoller") - } - epollConsole, ok := cons.(*console.EpollConsole) - if !ok { - return errors.Errorf("expected EpollConsole, got %#v", cons) - } - return epollConsole.Shutdown(p.epoller.CloseConsole) -} - -func (p *linuxPlatform) Close() error { - return p.epoller.Close() -} - -// initialize a single epoll fd to manage our consoles. `initPlatform` should -// only be called once. -func (s *Service) initPlatform() error { - if s.platform != nil { - return nil - } - epoller, err := console.NewEpoller() - if err != nil { - return errors.Wrap(err, "failed to initialize epoller") - } - s.platform = &linuxPlatform{ - epoller: epoller, - } - go epoller.Wait() - return nil -} diff --git a/pkg/v1/shim/service.go b/pkg/v1/shim/service.go deleted file mode 100644 index e06a5562c..000000000 --- a/pkg/v1/shim/service.go +++ /dev/null @@ -1,581 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package shim - -import ( - "context" - "fmt" - "os" - "path/filepath" - "sync" - - "github.com/containerd/console" - eventstypes "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/api/types/task" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/events" - "github.com/containerd/containerd/log" - "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/namespaces" - "github.com/containerd/containerd/runtime" - "github.com/containerd/containerd/runtime/linux/runctypes" - rproc "github.com/containerd/containerd/runtime/proc" - "github.com/containerd/containerd/runtime/v1/shim" - shimapi "github.com/containerd/containerd/runtime/v1/shim/v1" - "github.com/containerd/typeurl" - ptypes "github.com/gogo/protobuf/types" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - - runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" - "github.com/google/gvisor-containerd-shim/pkg/v1/proc" - "github.com/google/gvisor-containerd-shim/pkg/v1/utils" -) - -var ( - empty = &ptypes.Empty{} - bufPool = sync.Pool{ - New: func() interface{} { - buffer := make([]byte, 32<<10) - return &buffer - }, - } -) - -// Config contains shim specific configuration -type Config struct { - Path string - Namespace string - WorkDir string - RuntimeRoot string - RunscConfig map[string]string -} - -// NewService returns a new shim service that can be used via GRPC -func NewService(config Config, publisher events.Publisher) (*Service, error) { - if config.Namespace == "" { - return nil, fmt.Errorf("shim namespace cannot be empty") - } - ctx := namespaces.WithNamespace(context.Background(), config.Namespace) - ctx = log.WithLogger(ctx, logrus.WithFields(logrus.Fields{ - "namespace": config.Namespace, - "path": config.Path, - "pid": os.Getpid(), - })) - s := &Service{ - config: config, - context: ctx, - processes: make(map[string]rproc.Process), - events: make(chan interface{}, 128), - ec: proc.ExitCh, - } - go s.processExits() - if err := s.initPlatform(); err != nil { - return nil, errors.Wrap(err, "failed to initialized platform behavior") - } - go s.forward(publisher) - return s, nil -} - -// Service is the shim implementation of a remote shim over GRPC -type Service struct { - mu sync.Mutex - - config Config - context context.Context - processes map[string]rproc.Process - events chan interface{} - platform rproc.Platform - ec chan proc.Exit - - // Filled by Create() - id string - bundle string -} - -// Create a new initial process and container with the underlying OCI runtime -func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (_ *shimapi.CreateTaskResponse, err error) { - s.mu.Lock() - defer s.mu.Unlock() - - var mounts []proc.Mount - for _, m := range r.Rootfs { - mounts = append(mounts, proc.Mount{ - Type: m.Type, - Source: m.Source, - Target: m.Target, - Options: m.Options, - }) - } - - rootfs := filepath.Join(r.Bundle, "rootfs") - if err := os.Mkdir(rootfs, 0711); err != nil && !os.IsExist(err) { - return nil, err - } - - config := &proc.CreateConfig{ - ID: r.ID, - Bundle: r.Bundle, - Runtime: r.Runtime, - Rootfs: mounts, - Terminal: r.Terminal, - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Options: r.Options, - } - defer func() { - if err != nil { - if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { - log.G(ctx).WithError(err2).Warn("Failed to cleanup rootfs mount") - } - } - }() - for _, rm := range mounts { - m := &mount.Mount{ - Type: rm.Type, - Source: rm.Source, - Options: rm.Options, - } - if err := m.Mount(rootfs); err != nil { - return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m) - } - } - process, err := newInit( - ctx, - s.config.Path, - s.config.WorkDir, - s.config.RuntimeRoot, - s.config.Namespace, - s.config.RunscConfig, - s.platform, - config, - ) - if err := process.Create(ctx, config); err != nil { - return nil, errdefs.ToGRPC(err) - } - // save the main task id and bundle to the shim for additional requests - s.id = r.ID - s.bundle = r.Bundle - pid := process.Pid() - s.processes[r.ID] = process - return &shimapi.CreateTaskResponse{ - Pid: uint32(pid), - }, nil -} - -// Start a process -func (s *Service) Start(ctx context.Context, r *shimapi.StartRequest) (*shimapi.StartResponse, error) { - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if err := p.Start(ctx); err != nil { - return nil, err - } - return &shimapi.StartResponse{ - ID: p.ID(), - Pid: uint32(p.Pid()), - }, nil -} - -// Delete the initial process and container -func (s *Service) Delete(ctx context.Context, r *ptypes.Empty) (*shimapi.DeleteResponse, error) { - p, err := s.getInitProcess() - if err != nil { - return nil, err - } - if err := p.Delete(ctx); err != nil { - return nil, err - } - s.mu.Lock() - delete(s.processes, s.id) - s.mu.Unlock() - s.platform.Close() - return &shimapi.DeleteResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - Pid: uint32(p.Pid()), - }, nil -} - -// DeleteProcess deletes an exec'd process -func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessRequest) (*shimapi.DeleteResponse, error) { - if r.ID == s.id { - return nil, status.Errorf(codes.InvalidArgument, "cannot delete init process with DeleteProcess") - } - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if err := p.Delete(ctx); err != nil { - return nil, err - } - s.mu.Lock() - delete(s.processes, r.ID) - s.mu.Unlock() - return &shimapi.DeleteResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - Pid: uint32(p.Pid()), - }, nil -} - -// Exec an additional process inside the container -func (s *Service) Exec(ctx context.Context, r *shimapi.ExecProcessRequest) (*ptypes.Empty, error) { - s.mu.Lock() - - if p := s.processes[r.ID]; p != nil { - s.mu.Unlock() - return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ID) - } - - p := s.processes[s.id] - s.mu.Unlock() - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - - process, err := p.(*proc.Init).Exec(ctx, s.config.Path, &proc.ExecConfig{ - ID: r.ID, - Terminal: r.Terminal, - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Spec: r.Spec, - }) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - s.mu.Lock() - s.processes[r.ID] = process - s.mu.Unlock() - return empty, nil -} - -// ResizePty of a process -func (s *Service) ResizePty(ctx context.Context, r *shimapi.ResizePtyRequest) (*ptypes.Empty, error) { - if r.ID == "" { - return nil, errdefs.ToGRPCf(errdefs.ErrInvalidArgument, "id not provided") - } - ws := console.WinSize{ - Width: uint16(r.Width), - Height: uint16(r.Height), - } - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if err := p.Resize(ws); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil -} - -// State returns runtime state information for a process -func (s *Service) State(ctx context.Context, r *shimapi.StateRequest) (*shimapi.StateResponse, error) { - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - st, err := p.Status(ctx) - if err != nil { - return nil, err - } - status := task.StatusUnknown - switch st { - case "created": - status = task.StatusCreated - case "running": - status = task.StatusRunning - case "stopped": - status = task.StatusStopped - } - sio := p.Stdio() - return &shimapi.StateResponse{ - ID: p.ID(), - Bundle: s.bundle, - Pid: uint32(p.Pid()), - Status: status, - Stdin: sio.Stdin, - Stdout: sio.Stdout, - Stderr: sio.Stderr, - Terminal: sio.Terminal, - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - }, nil -} - -// Pause the container -func (s *Service) Pause(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Resume the container -func (s *Service) Resume(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Kill a process with the provided signal -func (s *Service) Kill(ctx context.Context, r *shimapi.KillRequest) (*ptypes.Empty, error) { - if r.ID == "" { - p, err := s.getInitProcess() - if err != nil { - return nil, err - } - if err := p.Kill(ctx, r.Signal, r.All); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil - } - - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if err := p.Kill(ctx, r.Signal, r.All); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil -} - -// ListPids returns all pids inside the container -func (s *Service) ListPids(ctx context.Context, r *shimapi.ListPidsRequest) (*shimapi.ListPidsResponse, error) { - pids, err := s.getContainerPids(ctx, r.ID) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - var processes []*task.ProcessInfo - for _, pid := range pids { - pInfo := task.ProcessInfo{ - Pid: pid, - } - for _, p := range s.processes { - if p.Pid() == int(pid) { - d := &runctypes.ProcessDetails{ - ExecID: p.ID(), - } - a, err := typeurl.MarshalAny(d) - if err != nil { - return nil, errors.Wrapf(err, "failed to marshal process %d info", pid) - } - pInfo.Info = a - break - } - } - processes = append(processes, &pInfo) - } - return &shimapi.ListPidsResponse{ - Processes: processes, - }, nil -} - -// CloseIO of a process -func (s *Service) CloseIO(ctx context.Context, r *shimapi.CloseIORequest) (*ptypes.Empty, error) { - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if stdin := p.Stdin(); stdin != nil { - if err := stdin.Close(); err != nil { - return nil, errors.Wrap(err, "close stdin") - } - } - return empty, nil -} - -// Checkpoint the container -func (s *Service) Checkpoint(ctx context.Context, r *shimapi.CheckpointTaskRequest) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// ShimInfo returns shim information such as the shim's pid -func (s *Service) ShimInfo(ctx context.Context, r *ptypes.Empty) (*shimapi.ShimInfoResponse, error) { - return &shimapi.ShimInfoResponse{ - ShimPid: uint32(os.Getpid()), - }, nil -} - -// Update a running container -func (s *Service) Update(ctx context.Context, r *shimapi.UpdateTaskRequest) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Wait for a process to exit -func (s *Service) Wait(ctx context.Context, r *shimapi.WaitRequest) (*shimapi.WaitResponse, error) { - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - p.Wait() - - return &shimapi.WaitResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - }, nil -} - -func (s *Service) processExits() { - for e := range s.ec { - s.checkProcesses(e) - } -} - -func (s *Service) allProcesses() []rproc.Process { - s.mu.Lock() - defer s.mu.Unlock() - - res := make([]rproc.Process, 0, len(s.processes)) - for _, p := range s.processes { - res = append(res, p) - } - return res -} - -func (s *Service) checkProcesses(e proc.Exit) { - for _, p := range s.allProcesses() { - if p.ID() == e.ID { - if ip, ok := p.(*proc.Init); ok { - // Ensure all children are killed - if err := ip.KillAll(s.context); err != nil { - log.G(s.context).WithError(err).WithField("id", ip.ID()). - Error("failed to kill init's children") - } - } - p.SetExited(e.Status) - s.events <- &eventstypes.TaskExit{ - ContainerID: s.id, - ID: p.ID(), - Pid: uint32(p.Pid()), - ExitStatus: uint32(e.Status), - ExitedAt: p.ExitedAt(), - } - return - } - } -} - -func (s *Service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { - p, err := s.getInitProcess() - if err != nil { - return nil, err - } - - ps, err := p.(*proc.Init).Runtime().Ps(ctx, id) - if err != nil { - return nil, err - } - pids := make([]uint32, 0, len(ps)) - for _, pid := range ps { - pids = append(pids, uint32(pid)) - } - return pids, nil -} - -func (s *Service) forward(publisher events.Publisher) { - for e := range s.events { - if err := publisher.Publish(s.context, getTopic(s.context, e), e); err != nil { - log.G(s.context).WithError(err).Error("post event") - } - } -} - -// getInitProcess returns initial process -func (s *Service) getInitProcess() (rproc.Process, error) { - s.mu.Lock() - defer s.mu.Unlock() - p := s.processes[s.id] - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - return p, nil -} - -// getExecProcess returns exec process -func (s *Service) getExecProcess(id string) (rproc.Process, error) { - s.mu.Lock() - defer s.mu.Unlock() - p := s.processes[id] - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process %s does not exist", id) - } - return p, nil -} - -func getTopic(ctx context.Context, e interface{}) string { - switch e.(type) { - case *eventstypes.TaskCreate: - return runtime.TaskCreateEventTopic - case *eventstypes.TaskStart: - return runtime.TaskStartEventTopic - case *eventstypes.TaskOOM: - return runtime.TaskOOMEventTopic - case *eventstypes.TaskExit: - return runtime.TaskExitEventTopic - case *eventstypes.TaskDelete: - return runtime.TaskDeleteEventTopic - case *eventstypes.TaskExecAdded: - return runtime.TaskExecAddedEventTopic - case *eventstypes.TaskExecStarted: - return runtime.TaskExecStartedEventTopic - default: - logrus.Warnf("no topic for type %#v", e) - } - return runtime.TaskUnknownTopic -} - -func newInit(ctx context.Context, path, workDir, runtimeRoot, namespace string, config map[string]string, platform rproc.Platform, r *proc.CreateConfig) (*proc.Init, error) { - var options runctypes.CreateOptions - if r.Options != nil { - v, err := typeurl.UnmarshalAny(r.Options) - if err != nil { - return nil, err - } - options = *v.(*runctypes.CreateOptions) - } - - spec, err := utils.ReadSpec(r.Bundle) - if err != nil { - return nil, errors.Wrap(err, "read oci spec") - } - if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { - return nil, errors.Wrap(err, "update volume annotations") - } - - runsc.FormatLogPath(r.ID, config) - rootfs := filepath.Join(path, "rootfs") - runtime := proc.NewRunsc(runtimeRoot, path, namespace, r.Runtime, config) - p := proc.New(r.ID, runtime, rproc.Stdio{ - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Terminal: r.Terminal, - }) - p.Bundle = r.Bundle - p.Platform = platform - p.Rootfs = rootfs - p.WorkDir = workDir - p.IoUID = int(options.IoUid) - p.IoGID = int(options.IoGid) - p.Sandbox = utils.IsSandbox(spec) - p.UserLog = utils.UserLogPath(spec) - p.Monitor = shim.Default - return p, nil -} diff --git a/pkg/v1/utils/utils.go b/pkg/v1/utils/utils.go deleted file mode 100644 index 83840c047..000000000 --- a/pkg/v1/utils/utils.go +++ /dev/null @@ -1,59 +0,0 @@ -/* -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package utils - -import ( - "encoding/json" - "io/ioutil" - "os" - "path/filepath" - - "github.com/containerd/cri/pkg/annotations" - specs "github.com/opencontainers/runtime-spec/specs-go" -) - -// ReadSpec reads OCI spec from the bundle directory. -func ReadSpec(bundle string) (*specs.Spec, error) { - f, err := os.Open(filepath.Join(bundle, "config.json")) - if err != nil { - return nil, err - } - b, err := ioutil.ReadAll(f) - if err != nil { - return nil, err - } - var spec specs.Spec - if err := json.Unmarshal(b, &spec); err != nil { - return nil, err - } - return &spec, nil -} - -// IsSandbox checks whether a container is a sandbox container. -func IsSandbox(spec *specs.Spec) bool { - t, ok := spec.Annotations[annotations.ContainerType] - return !ok || t == annotations.ContainerTypeSandbox -} - -// UserLogPath gets user log path from OCI annotation. -func UserLogPath(spec *specs.Spec) string { - sandboxLogDir := spec.Annotations[annotations.SandboxLogDir] - if sandboxLogDir == "" { - return "" - } - return filepath.Join(sandboxLogDir, "gvisor.log") -} diff --git a/pkg/v1/utils/volumes.go b/pkg/v1/utils/volumes.go deleted file mode 100644 index 61adeaa54..000000000 --- a/pkg/v1/utils/volumes.go +++ /dev/null @@ -1,158 +0,0 @@ -/* -Copyright 2019 Google LLC. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package utils - -import ( - "encoding/json" - "fmt" - "io/ioutil" - "path/filepath" - "strings" - - "github.com/containerd/cri/pkg/annotations" - specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" -) - -const volumeKeyPrefix = "dev.gvisor.spec.mount." - -var kubeletPodsDir = "/var/lib/kubelet/pods" - -// volumeName gets volume name from volume annotation key, example: -// dev.gvisor.spec.mount.NAME.share -func volumeName(k string) string { - return strings.SplitN(strings.TrimPrefix(k, volumeKeyPrefix), ".", 2)[0] -} - -// volumeFieldName gets volume field name from volume annotation key, example: -// `type` is the field of dev.gvisor.spec.mount.NAME.type -func volumeFieldName(k string) string { - parts := strings.Split(strings.TrimPrefix(k, volumeKeyPrefix), ".") - return parts[len(parts)-1] -} - -// podUID gets pod UID from the pod log path. -func podUID(s *specs.Spec) (string, error) { - sandboxLogDir := s.Annotations[annotations.SandboxLogDir] - if sandboxLogDir == "" { - return "", errors.New("no sandbox log path annotation") - } - fields := strings.Split(filepath.Base(sandboxLogDir), "_") - switch len(fields) { - case 1: // This is the old CRI logging path - return fields[0], nil - case 3: // This is the new CRI logging path - return fields[2], nil - } - return "", errors.Errorf("unexpected sandbox log path %q", sandboxLogDir) -} - -// isVolumeKey checks whether an annotation key is for volume. -func isVolumeKey(k string) bool { - return strings.HasPrefix(k, volumeKeyPrefix) -} - -// volumeSourceKey constructs the annotation key for volume source. -func volumeSourceKey(volume string) string { - return volumeKeyPrefix + volume + ".source" -} - -// volumePath searches the volume path in the kubelet pod directory. -func volumePath(volume, uid string) (string, error) { - // TODO: Support subpath when gvisor supports pod volume bind mount. - volumeSearchPath := fmt.Sprintf("%s/%s/volumes/*/%s", kubeletPodsDir, uid, volume) - dirs, err := filepath.Glob(volumeSearchPath) - if err != nil { - return "", err - } - if len(dirs) != 1 { - return "", errors.Errorf("unexpected matched volume list %v", dirs) - } - return dirs[0], nil -} - -// isVolumePath checks whether a string is the volume path. -func isVolumePath(volume, path string) (bool, error) { - // TODO: Support subpath when gvisor supports pod volume bind mount. - volumeSearchPath := fmt.Sprintf("%s/*/volumes/*/%s", kubeletPodsDir, volume) - return filepath.Match(volumeSearchPath, path) -} - -// UpdateVolumeAnnotations add necessary OCI annotations for gvisor -// volume optimization. -func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { - var ( - uid string - err error - ) - if IsSandbox(s) { - uid, err = podUID(s) - if err != nil { - // Skip if we can't get pod UID, because this doesn't work - // for containerd 1.1. - logrus.WithError(err).Error("Can't get pod uid") - return nil - } - } - var updated bool - for k, v := range s.Annotations { - if !isVolumeKey(k) { - continue - } - if volumeFieldName(k) != "type" { - continue - } - volume := volumeName(k) - if uid != "" { - // This is a sandbox - path, err := volumePath(volume, uid) - if err != nil { - return errors.Wrapf(err, "get volume path for %q", volume) - } - s.Annotations[volumeSourceKey(volume)] = path - updated = true - } else { - // This is a container - for i := range s.Mounts { - // An error is returned for sandbox if source annotation - // is not successfully applied, so it is guaranteed that - // the source annotation for sandbox has already been - // successfully applied at this point. - // The volume name is unique inside a pod, so matching without - // podUID is fine here. - // TODO: Pass podUID down to shim for containers to do - // more accurate matching. - if yes, _ := isVolumePath(volume, s.Mounts[i].Source); yes { - // gVisor requires the container mount type to match - // sandbox mount type. - s.Mounts[i].Type = v - updated = true - } - } - } - } - if !updated { - return nil - } - // Update bundle - b, err := json.Marshal(s) - if err != nil { - return err - } - return ioutil.WriteFile(filepath.Join(bundle, "config.json"), b, 0666) -} diff --git a/pkg/v1/utils/volumes_test.go b/pkg/v1/utils/volumes_test.go deleted file mode 100644 index 472cd6e86..000000000 --- a/pkg/v1/utils/volumes_test.go +++ /dev/null @@ -1,311 +0,0 @@ -/* -Copyright 2019 Google LLC. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package utils - -import ( - "encoding/json" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "reflect" - "testing" - - "github.com/containerd/cri/pkg/annotations" - specs "github.com/opencontainers/runtime-spec/specs-go" -) - -func TestUpdateVolumeAnnotations(t *testing.T) { - dir, err := ioutil.TempDir("", "test-update-volume-annotations") - if err != nil { - t.Fatalf("create tempdir: %v", err) - } - defer os.RemoveAll(dir) - kubeletPodsDir = dir - - const ( - testPodUID = "testuid" - testVolumeName = "testvolume" - testLogDirPath = "/var/log/pods/testns_testname_" + testPodUID - testLegacyLogDirPath = "/var/log/pods/" + testPodUID - ) - testVolumePath := fmt.Sprintf("%s/%s/volumes/kubernetes.io~empty-dir/%s", dir, testPodUID, testVolumeName) - - if err := os.MkdirAll(testVolumePath, 0755); err != nil { - t.Fatalf("Create test volume: %v", err) - } - - for _, test := range []struct { - desc string - spec *specs.Spec - expected *specs.Spec - expectErr bool - expectUpdate bool - }{ - { - desc: "volume annotations for sandbox", - spec: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, - }, - }, - expectUpdate: true, - }, - { - desc: "volume annotations for sandbox with legacy log path", - spec: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLegacyLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLegacyLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, - }, - }, - expectUpdate: true, - }, - { - desc: "tmpfs: volume annotations for container", - spec: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: testVolumePath, - Options: []string{"ro"}, - }, - { - Destination: "/random", - Type: "bind", - Source: "/random", - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "tmpfs", - Source: testVolumePath, - Options: []string{"ro"}, - }, - { - Destination: "/random", - Type: "bind", - Source: "/random", - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expectUpdate: true, - }, - { - desc: "bind: volume annotations for container", - spec: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: testVolumePath, - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: testVolumePath, - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expectUpdate: true, - }, - { - desc: "should not return error without pod log directory", - spec: &specs.Spec{ - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - }, - { - desc: "should return error if volume path does not exist", - spec: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount.notexist.share": "pod", - "dev.gvisor.spec.mount.notexist.type": "tmpfs", - "dev.gvisor.spec.mount.notexist.options": "ro", - }, - }, - expectErr: true, - }, - { - desc: "no volume annotations for sandbox", - spec: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - }, - }, - expected: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - }, - }, - }, - { - desc: "no volume annotations for container", - spec: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: "/test", - Options: []string{"ro"}, - }, - { - Destination: "/random", - Type: "bind", - Source: "/random", - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeContainer, - }, - }, - expected: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: "/test", - Options: []string{"ro"}, - }, - { - Destination: "/random", - Type: "bind", - Source: "/random", - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeContainer, - }, - }, - }, - } { - t.Run(test.desc, func(t *testing.T) { - bundle, err := ioutil.TempDir(dir, "test-bundle") - if err != nil { - t.Fatalf("Create test bundle: %v", err) - } - err = UpdateVolumeAnnotations(bundle, test.spec) - if test.expectErr { - if err == nil { - t.Fatal("Expected error, but got nil") - } - return - } - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - if !reflect.DeepEqual(test.expected, test.spec) { - t.Fatalf("Expected %+v, got %+v", test.expected, test.spec) - } - if test.expectUpdate { - b, err := ioutil.ReadFile(filepath.Join(bundle, "config.json")) - if err != nil { - t.Fatalf("Read spec from bundle: %v", err) - } - var spec specs.Spec - if err := json.Unmarshal(b, &spec); err != nil { - t.Fatalf("Unmarshal spec: %v", err) - } - if !reflect.DeepEqual(test.expected, &spec) { - t.Fatalf("Expected %+v, got %+v", test.expected, &spec) - } - } - }) - } -} diff --git a/pkg/v2/epoll.go b/pkg/v2/epoll.go deleted file mode 100644 index 76c7b54d6..000000000 --- a/pkg/v2/epoll.go +++ /dev/null @@ -1,129 +0,0 @@ -// +build linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package v2 - -import ( - "context" - "sync" - - "github.com/containerd/cgroups" - eventstypes "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/events" - "github.com/containerd/containerd/runtime" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -func newOOMEpoller(publisher events.Publisher) (*epoller, error) { - fd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC) - if err != nil { - return nil, err - } - return &epoller{ - fd: fd, - publisher: publisher, - set: make(map[uintptr]*item), - }, nil -} - -type epoller struct { - mu sync.Mutex - - fd int - publisher events.Publisher - set map[uintptr]*item -} - -type item struct { - id string - cg cgroups.Cgroup -} - -func (e *epoller) Close() error { - return unix.Close(e.fd) -} - -func (e *epoller) run(ctx context.Context) { - var events [128]unix.EpollEvent - for { - select { - case <-ctx.Done(): - e.Close() - return - default: - n, err := unix.EpollWait(e.fd, events[:], -1) - if err != nil { - if err == unix.EINTR { - continue - } - logrus.WithError(err).Error("cgroups: epoll wait") - } - for i := 0; i < n; i++ { - e.process(ctx, uintptr(events[i].Fd)) - } - } - } -} - -func (e *epoller) add(id string, cg cgroups.Cgroup) error { - e.mu.Lock() - defer e.mu.Unlock() - fd, err := cg.OOMEventFD() - if err != nil { - return err - } - e.set[fd] = &item{ - id: id, - cg: cg, - } - event := unix.EpollEvent{ - Fd: int32(fd), - Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR, - } - return unix.EpollCtl(e.fd, unix.EPOLL_CTL_ADD, int(fd), &event) -} - -func (e *epoller) process(ctx context.Context, fd uintptr) { - flush(fd) - e.mu.Lock() - i, ok := e.set[fd] - if !ok { - e.mu.Unlock() - return - } - e.mu.Unlock() - if i.cg.State() == cgroups.Deleted { - e.mu.Lock() - delete(e.set, fd) - e.mu.Unlock() - unix.Close(int(fd)) - return - } - if err := e.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &eventstypes.TaskOOM{ - ContainerID: i.id, - }); err != nil { - logrus.WithError(err).Error("publish OOM event") - } -} - -func flush(fd uintptr) error { - var buf [8]byte - _, err := unix.Read(int(fd), buf[:]) - return err -} diff --git a/pkg/v2/options/options.go b/pkg/v2/options/options.go deleted file mode 100644 index 34b002ac4..000000000 --- a/pkg/v2/options/options.go +++ /dev/null @@ -1,34 +0,0 @@ -/* -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ -package options - -const OptionType = "io.containerd.runsc.v1.options" - -// Options is runtime options for io.containerd.runsc.v1. -type Options struct { - // ShimCgroup is the cgroup the shim should be in. - ShimCgroup string `toml:"shim_cgroup"` - // IoUid is the I/O's pipes uid. - IoUid uint32 `toml:"io_uid"` - // IoUid is the I/O's pipes gid. - IoGid uint32 `toml:"io_gid"` - // BinaryName is the binary name of the runsc binary. - BinaryName string `toml:"binary_name"` - // Root is the runsc root directory. - Root string `toml:"root"` - // RunscConfig is a key/value map of all runsc flags. - RunscConfig map[string]string `toml:"runsc_config"` -} diff --git a/pkg/v2/service.go b/pkg/v2/service.go deleted file mode 100644 index f99456f63..000000000 --- a/pkg/v2/service.go +++ /dev/null @@ -1,826 +0,0 @@ -// +build linux - -/* - Copyright The containerd Authors. - Copyright 2018 Google LLC - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package v2 - -import ( - "context" - "io/ioutil" - "os" - "os/exec" - "path/filepath" - "sync" - "syscall" - "time" - - "github.com/BurntSushi/toml" - "github.com/containerd/cgroups" - "github.com/containerd/console" - eventstypes "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/api/types/task" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/events" - "github.com/containerd/containerd/log" - "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/namespaces" - "github.com/containerd/containerd/runtime" - "github.com/containerd/containerd/runtime/linux/runctypes" - rproc "github.com/containerd/containerd/runtime/proc" - "github.com/containerd/containerd/runtime/v2/shim" - taskAPI "github.com/containerd/containerd/runtime/v2/task" - runtimeoptions "github.com/containerd/cri/pkg/api/runtimeoptions/v1" - "github.com/containerd/typeurl" - ptypes "github.com/gogo/protobuf/types" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" - - runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" - "github.com/google/gvisor-containerd-shim/pkg/v1/proc" - "github.com/google/gvisor-containerd-shim/pkg/v1/utils" - "github.com/google/gvisor-containerd-shim/pkg/v2/options" -) - -var ( - empty = &ptypes.Empty{} - bufPool = sync.Pool{ - New: func() interface{} { - buffer := make([]byte, 32<<10) - return &buffer - }, - } -) - -var _ = (taskAPI.TaskService)(&service{}) - -// configFile is the default config file name. For containerd 1.2, -// we assume that a config.toml should exist in the runtime root. -const configFile = "config.toml" - -// New returns a new shim service that can be used via GRPC -func New(ctx context.Context, id string, publisher events.Publisher) (shim.Shim, error) { - ep, err := newOOMEpoller(publisher) - if err != nil { - return nil, err - } - ctx, cancel := context.WithCancel(ctx) - go ep.run(ctx) - s := &service{ - id: id, - context: ctx, - processes: make(map[string]rproc.Process), - events: make(chan interface{}, 128), - ec: proc.ExitCh, - oomPoller: ep, - cancel: cancel, - } - go s.processExits() - runsc.Monitor = shim.Default - if err := s.initPlatform(); err != nil { - cancel() - return nil, errors.Wrap(err, "failed to initialized platform behavior") - } - go s.forward(publisher) - return s, nil -} - -// service is the shim implementation of a remote shim over GRPC -type service struct { - mu sync.Mutex - - context context.Context - task rproc.Process - processes map[string]rproc.Process - events chan interface{} - platform rproc.Platform - opts options.Options - ec chan proc.Exit - oomPoller *epoller - - id string - bundle string - cancel func() -} - -func newCommand(ctx context.Context, containerdBinary, containerdAddress string) (*exec.Cmd, error) { - ns, err := namespaces.NamespaceRequired(ctx) - if err != nil { - return nil, err - } - self, err := os.Executable() - if err != nil { - return nil, err - } - cwd, err := os.Getwd() - if err != nil { - return nil, err - } - args := []string{ - "-namespace", ns, - "-address", containerdAddress, - "-publish-binary", containerdBinary, - } - cmd := exec.Command(self, args...) - cmd.Dir = cwd - cmd.Env = append(os.Environ(), "GOMAXPROCS=2") - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - return cmd, nil -} - -func (s *service) StartShim(ctx context.Context, id, containerdBinary, containerdAddress string) (string, error) { - cmd, err := newCommand(ctx, containerdBinary, containerdAddress) - if err != nil { - return "", err - } - address, err := shim.SocketAddress(ctx, id) - if err != nil { - return "", err - } - socket, err := shim.NewSocket(address) - if err != nil { - return "", err - } - defer socket.Close() - f, err := socket.File() - if err != nil { - return "", err - } - defer f.Close() - - cmd.ExtraFiles = append(cmd.ExtraFiles, f) - - if err := cmd.Start(); err != nil { - return "", err - } - defer func() { - if err != nil { - cmd.Process.Kill() - } - }() - // make sure to wait after start - go cmd.Wait() - if err := shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil { - return "", err - } - if err := shim.WriteAddress("address", address); err != nil { - return "", err - } - if err := shim.SetScore(cmd.Process.Pid); err != nil { - return "", errors.Wrap(err, "failed to set OOM Score on shim") - } - return address, nil -} - -func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) { - path, err := os.Getwd() - if err != nil { - return nil, err - } - ns, err := namespaces.NamespaceRequired(ctx) - if err != nil { - return nil, err - } - runtime, err := s.readRuntime(path) - if err != nil { - return nil, err - } - r := proc.NewRunsc(s.opts.Root, path, ns, runtime, nil) - if err := r.Delete(ctx, s.id, &runsc.DeleteOpts{ - Force: true, - }); err != nil { - logrus.WithError(err).Warn("failed to remove runc container") - } - if err := mount.UnmountAll(filepath.Join(path, "rootfs"), 0); err != nil { - logrus.WithError(err).Warn("failed to cleanup rootfs mount") - } - return &taskAPI.DeleteResponse{ - ExitedAt: time.Now(), - ExitStatus: 128 + uint32(unix.SIGKILL), - }, nil -} - -func (s *service) readRuntime(path string) (string, error) { - data, err := ioutil.ReadFile(filepath.Join(path, "runtime")) - if err != nil { - return "", err - } - return string(data), nil -} - -func (s *service) writeRuntime(path, runtime string) error { - return ioutil.WriteFile(filepath.Join(path, "runtime"), []byte(runtime), 0600) -} - -// Create a new initial process and container with the underlying OCI runtime -func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *taskAPI.CreateTaskResponse, err error) { - s.mu.Lock() - defer s.mu.Unlock() - - ns, err := namespaces.NamespaceRequired(ctx) - if err != nil { - return nil, errors.Wrap(err, "create namespace") - } - - // Read from root for now. - var opts options.Options - if r.Options != nil { - v, err := typeurl.UnmarshalAny(r.Options) - if err != nil { - return nil, err - } - var path string - switch o := v.(type) { - case *runctypes.CreateOptions: // containerd 1.2.x - opts.IoUid = o.IoUid - opts.IoGid = o.IoGid - opts.ShimCgroup = o.ShimCgroup - case *runctypes.RuncOptions: // containerd 1.2.x - root := proc.RunscRoot - if o.RuntimeRoot != "" { - root = o.RuntimeRoot - } - - opts.BinaryName = o.Runtime - - path = filepath.Join(root, configFile) - if _, err := os.Stat(path); err != nil { - if !os.IsNotExist(err) { - return nil, errors.Wrapf(err, "stat config file %q", path) - } - // A config file in runtime root is not required. - path = "" - } - case *runtimeoptions.Options: // containerd 1.3.x+ - if o.ConfigPath == "" { - break - } - if o.TypeUrl != options.OptionType { - return nil, errors.Errorf("unsupported runtimeoptions %q", o.TypeUrl) - } - path = o.ConfigPath - default: - return nil, errors.Errorf("unsupported option type %q", r.Options.TypeUrl) - } - if path != "" { - if _, err = toml.DecodeFile(path, &opts); err != nil { - return nil, errors.Wrapf(err, "decode config file %q", path) - } - } - } - - var mounts []proc.Mount - for _, m := range r.Rootfs { - mounts = append(mounts, proc.Mount{ - Type: m.Type, - Source: m.Source, - Target: m.Target, - Options: m.Options, - }) - } - - rootfs := filepath.Join(r.Bundle, "rootfs") - if err := os.Mkdir(rootfs, 0711); err != nil && !os.IsExist(err) { - return nil, err - } - - config := &proc.CreateConfig{ - ID: r.ID, - Bundle: r.Bundle, - Runtime: opts.BinaryName, - Rootfs: mounts, - Terminal: r.Terminal, - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Options: r.Options, - } - if err := s.writeRuntime(r.Bundle, opts.BinaryName); err != nil { - return nil, err - } - defer func() { - if err != nil { - if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { - logrus.WithError(err2).Warn("failed to cleanup rootfs mount") - } - } - }() - for _, rm := range mounts { - m := &mount.Mount{ - Type: rm.Type, - Source: rm.Source, - Options: rm.Options, - } - if err := m.Mount(rootfs); err != nil { - return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m) - } - } - process, err := newInit( - ctx, - r.Bundle, - filepath.Join(r.Bundle, "work"), - ns, - s.platform, - config, - &opts, - rootfs, - ) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - if err := process.Create(ctx, config); err != nil { - return nil, errdefs.ToGRPC(err) - } - // save the main task id and bundle to the shim for additional requests - s.id = r.ID - s.bundle = r.Bundle - - // Set up OOM notification on the sandbox's cgroup. This is done on sandbox - // create since the sandbox process will be created here. - pid := process.Pid() - if pid > 0 { - cg, err := cgroups.Load(cgroups.V1, cgroups.PidPath(pid)) - if err != nil { - return nil, errors.Wrapf(err, "loading cgroup for %d", pid) - } - if err := s.oomPoller.add(s.id, cg); err != nil { - return nil, errors.Wrapf(err, "add cg to OOM monitor") - } - } - s.task = process - s.opts = opts - return &taskAPI.CreateTaskResponse{ - Pid: uint32(process.Pid()), - }, nil - -} - -// Start a process -func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.StartResponse, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if err := p.Start(ctx); err != nil { - return nil, err - } - // TODO: Set the cgroup and oom notifications on restore. - // https://github.com/google/gvisor-containerd-shim/issues/58 - return &taskAPI.StartResponse{ - Pid: uint32(p.Pid()), - }, nil -} - -// Delete the initial process and container -func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - if err := p.Delete(ctx); err != nil { - return nil, err - } - isTask := r.ExecID == "" - if !isTask { - s.mu.Lock() - delete(s.processes, r.ExecID) - s.mu.Unlock() - } - if isTask && s.platform != nil { - s.platform.Close() - } - return &taskAPI.DeleteResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - Pid: uint32(p.Pid()), - }, nil -} - -// Exec an additional process inside the container -func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*ptypes.Empty, error) { - s.mu.Lock() - p := s.processes[r.ExecID] - s.mu.Unlock() - if p != nil { - return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID) - } - p = s.task - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - process, err := p.(*proc.Init).Exec(ctx, s.bundle, &proc.ExecConfig{ - ID: r.ExecID, - Terminal: r.Terminal, - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Spec: r.Spec, - }) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - s.mu.Lock() - s.processes[r.ExecID] = process - s.mu.Unlock() - return empty, nil -} - -// ResizePty of a process -func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (*ptypes.Empty, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - ws := console.WinSize{ - Width: uint16(r.Width), - Height: uint16(r.Height), - } - if err := p.Resize(ws); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil -} - -// State returns runtime state information for a process -func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.StateResponse, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - st, err := p.Status(ctx) - if err != nil { - return nil, err - } - status := task.StatusUnknown - switch st { - case "created": - status = task.StatusCreated - case "running": - status = task.StatusRunning - case "stopped": - status = task.StatusStopped - } - sio := p.Stdio() - return &taskAPI.StateResponse{ - ID: p.ID(), - Bundle: s.bundle, - Pid: uint32(p.Pid()), - Status: status, - Stdin: sio.Stdin, - Stdout: sio.Stdout, - Stderr: sio.Stderr, - Terminal: sio.Terminal, - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - }, nil -} - -// Pause the container -func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Resume the container -func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Kill a process with the provided signal -func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*ptypes.Empty, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - if err := p.Kill(ctx, r.Signal, r.All); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil -} - -// Pids returns all pids inside the container -func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.PidsResponse, error) { - pids, err := s.getContainerPids(ctx, r.ID) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - var processes []*task.ProcessInfo - for _, pid := range pids { - pInfo := task.ProcessInfo{ - Pid: pid, - } - for _, p := range s.processes { - if p.Pid() == int(pid) { - d := &runctypes.ProcessDetails{ - ExecID: p.ID(), - } - a, err := typeurl.MarshalAny(d) - if err != nil { - return nil, errors.Wrapf(err, "failed to marshal process %d info", pid) - } - pInfo.Info = a - break - } - } - processes = append(processes, &pInfo) - } - return &taskAPI.PidsResponse{ - Processes: processes, - }, nil -} - -// CloseIO of a process -func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*ptypes.Empty, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if stdin := p.Stdin(); stdin != nil { - if err := stdin.Close(); err != nil { - return nil, errors.Wrap(err, "close stdin") - } - } - return empty, nil -} - -// Checkpoint the container -func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Connect returns shim information such as the shim's pid -func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (*taskAPI.ConnectResponse, error) { - var pid int - if s.task != nil { - pid = s.task.Pid() - } - return &taskAPI.ConnectResponse{ - ShimPid: uint32(os.Getpid()), - TaskPid: uint32(pid), - }, nil -} - -func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*ptypes.Empty, error) { - s.cancel() - os.Exit(0) - return empty, nil -} - -func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) { - path, err := os.Getwd() - if err != nil { - return nil, err - } - ns, err := namespaces.NamespaceRequired(ctx) - if err != nil { - return nil, err - } - runtime, err := s.readRuntime(path) - if err != nil { - return nil, err - } - rs := proc.NewRunsc(s.opts.Root, path, ns, runtime, nil) - stats, err := rs.Stats(ctx, s.id) - if err != nil { - return nil, err - } - - // gvisor currently (as of 2020-03-03) only returns the total memory - // usage and current PID value[0]. However, we copy the common fields here - // so that future updates will propagate correct information. We're - // using the cgroups.Metrics structure so we're returning the same type - // as runc. - // - // [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81 - data, err := typeurl.MarshalAny(&cgroups.Metrics{ - CPU: &cgroups.CPUStat{ - Usage: &cgroups.CPUUsage{ - Total: stats.Cpu.Usage.Total, - Kernel: stats.Cpu.Usage.Kernel, - User: stats.Cpu.Usage.User, - PerCPU: stats.Cpu.Usage.Percpu, - }, - Throttling: &cgroups.Throttle{ - Periods: stats.Cpu.Throttling.Periods, - ThrottledPeriods: stats.Cpu.Throttling.ThrottledPeriods, - ThrottledTime: stats.Cpu.Throttling.ThrottledTime, - }, - }, - Memory: &cgroups.MemoryStat{ - Cache: stats.Memory.Cache, - Usage: &cgroups.MemoryEntry{ - Limit: stats.Memory.Usage.Limit, - Usage: stats.Memory.Usage.Usage, - Max: stats.Memory.Usage.Max, - Failcnt: stats.Memory.Usage.Failcnt, - }, - Swap: &cgroups.MemoryEntry{ - Limit: stats.Memory.Swap.Limit, - Usage: stats.Memory.Swap.Usage, - Max: stats.Memory.Swap.Max, - Failcnt: stats.Memory.Swap.Failcnt, - }, - Kernel: &cgroups.MemoryEntry{ - Limit: stats.Memory.Kernel.Limit, - Usage: stats.Memory.Kernel.Usage, - Max: stats.Memory.Kernel.Max, - Failcnt: stats.Memory.Kernel.Failcnt, - }, - KernelTCP: &cgroups.MemoryEntry{ - Limit: stats.Memory.KernelTCP.Limit, - Usage: stats.Memory.KernelTCP.Usage, - Max: stats.Memory.KernelTCP.Max, - Failcnt: stats.Memory.KernelTCP.Failcnt, - }, - }, - Pids: &cgroups.PidsStat{ - Current: stats.Pids.Current, - Limit: stats.Pids.Limit, - }, - }) - if err != nil { - return nil, err - } - return &taskAPI.StatsResponse{ - Stats: data, - }, nil -} - -// Update a running container -func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Wait for a process to exit -func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.WaitResponse, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - p.Wait() - - return &taskAPI.WaitResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - }, nil -} - -func (s *service) processExits() { - for e := range s.ec { - s.checkProcesses(e) - } -} - -func (s *service) checkProcesses(e proc.Exit) { - // TODO(random-liu): Add `shouldKillAll` logic if container pid - // namespace is supported. - for _, p := range s.allProcesses() { - if p.ID() == e.ID { - if ip, ok := p.(*proc.Init); ok { - // Ensure all children are killed - if err := ip.KillAll(s.context); err != nil { - log.G(s.context).WithError(err).WithField("id", ip.ID()). - Error("failed to kill init's children") - } - } - p.SetExited(e.Status) - s.events <- &eventstypes.TaskExit{ - ContainerID: s.id, - ID: p.ID(), - Pid: uint32(p.Pid()), - ExitStatus: uint32(e.Status), - ExitedAt: p.ExitedAt(), - } - return - } - } -} - -func (s *service) allProcesses() (o []rproc.Process) { - s.mu.Lock() - defer s.mu.Unlock() - for _, p := range s.processes { - o = append(o, p) - } - if s.task != nil { - o = append(o, s.task) - } - return o -} - -func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { - s.mu.Lock() - p := s.task - s.mu.Unlock() - if p == nil { - return nil, errors.Wrapf(errdefs.ErrFailedPrecondition, "container must be created") - } - ps, err := p.(*proc.Init).Runtime().Ps(ctx, id) - if err != nil { - return nil, err - } - pids := make([]uint32, 0, len(ps)) - for _, pid := range ps { - pids = append(pids, uint32(pid)) - } - return pids, nil -} - -func (s *service) forward(publisher events.Publisher) { - for e := range s.events { - ctx, cancel := context.WithTimeout(s.context, 5*time.Second) - err := publisher.Publish(ctx, getTopic(e), e) - cancel() - if err != nil { - logrus.WithError(err).Error("post event") - } - } -} - -func (s *service) getProcess(execID string) (rproc.Process, error) { - s.mu.Lock() - defer s.mu.Unlock() - if execID == "" { - return s.task, nil - } - p := s.processes[execID] - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID) - } - return p, nil -} - -func getTopic(e interface{}) string { - switch e.(type) { - case *eventstypes.TaskCreate: - return runtime.TaskCreateEventTopic - case *eventstypes.TaskStart: - return runtime.TaskStartEventTopic - case *eventstypes.TaskOOM: - return runtime.TaskOOMEventTopic - case *eventstypes.TaskExit: - return runtime.TaskExitEventTopic - case *eventstypes.TaskDelete: - return runtime.TaskDeleteEventTopic - case *eventstypes.TaskExecAdded: - return runtime.TaskExecAddedEventTopic - case *eventstypes.TaskExecStarted: - return runtime.TaskExecStartedEventTopic - default: - logrus.Warnf("no topic for type %#v", e) - } - return runtime.TaskUnknownTopic -} - -func newInit(ctx context.Context, path, workDir, namespace string, platform rproc.Platform, r *proc.CreateConfig, options *options.Options, rootfs string) (*proc.Init, error) { - spec, err := utils.ReadSpec(r.Bundle) - if err != nil { - return nil, errors.Wrap(err, "read oci spec") - } - if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { - return nil, errors.Wrap(err, "update volume annotations") - } - runsc.FormatLogPath(r.ID, options.RunscConfig) - runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig) - p := proc.New(r.ID, runtime, rproc.Stdio{ - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Terminal: r.Terminal, - }) - p.Bundle = r.Bundle - p.Platform = platform - p.Rootfs = rootfs - p.WorkDir = workDir - p.IoUID = int(options.IoUid) - p.IoGID = int(options.IoGid) - p.Sandbox = utils.IsSandbox(spec) - p.UserLog = utils.UserLogPath(spec) - p.Monitor = shim.Default - return p, nil -} diff --git a/pkg/v2/service_linux.go b/pkg/v2/service_linux.go deleted file mode 100644 index cbd431537..000000000 --- a/pkg/v2/service_linux.go +++ /dev/null @@ -1,111 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package v2 - -import ( - "context" - "io" - "sync" - "syscall" - - "github.com/containerd/console" - "github.com/containerd/fifo" - "github.com/pkg/errors" -) - -type linuxPlatform struct { - epoller *console.Epoller -} - -func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) (console.Console, error) { - if p.epoller == nil { - return nil, errors.New("uninitialized epoller") - } - - epollConsole, err := p.epoller.Add(console) - if err != nil { - return nil, err - } - - if stdin != "" { - in, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) - if err != nil { - return nil, err - } - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - io.CopyBuffer(epollConsole, in, *p) - }() - } - - outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0) - if err != nil { - return nil, err - } - outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0) - if err != nil { - return nil, err - } - wg.Add(1) - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - io.CopyBuffer(outw, epollConsole, *p) - epollConsole.Close() - outr.Close() - outw.Close() - wg.Done() - }() - return epollConsole, nil -} - -func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error { - if p.epoller == nil { - return errors.New("uninitialized epoller") - } - epollConsole, ok := cons.(*console.EpollConsole) - if !ok { - return errors.Errorf("expected EpollConsole, got %#v", cons) - } - return epollConsole.Shutdown(p.epoller.CloseConsole) -} - -func (p *linuxPlatform) Close() error { - return p.epoller.Close() -} - -// initialize a single epoll fd to manage our consoles. `initPlatform` should -// only be called once. -func (s *service) initPlatform() error { - if s.platform != nil { - return nil - } - epoller, err := console.NewEpoller() - if err != nil { - return errors.Wrap(err, "failed to initialize epoller") - } - s.platform = &linuxPlatform{ - epoller: epoller, - } - go epoller.Wait() - return nil -} diff --git a/shim/README.md b/shim/README.md new file mode 100644 index 000000000..e446ec970 --- /dev/null +++ b/shim/README.md @@ -0,0 +1,16 @@ +# gvisor-containerd-shim + +gvisor-containerd-shim is a containerd shim. It implements the containerd v1 +shim API. It can be used as a drop-in replacement for +[containerd-shim][containerd-shim] +(though containerd-shim must still be installed). It allows the use of both +gVisor (runsc) and normal containers in the same containerd installation by +deferring to the runc shim if the desired runtime engine is not runsc. + +- [Untrusted Workload Quick Start (containerd >=1.1)](docs/untrusted-workload-quickstart.md) +- [Runtime Handler/RuntimeClass Quick Start (containerd >=1.2)](docs/runtime-handler-quickstart.md) +- [Runtime Handler/RuntimeClass Quick Start (shim v2) (containerd >=1.2)](docs/runtime-handler-shim-v2-quickstart.md) +- [Configure containerd-shim-runsc-v1 (shim v2) (containerd >= 1.3)](docs/configure-containerd-shim-runsc-v1.md) +- [Configure gvisor-containerd-shim (shim v1) (containerd <= 1.2)](docs/configure-gvisor-containerd-shim.md) + +[containerd-shim]: https://github.com/containerd/containerd/tree/master/cmd/containerd-shim diff --git a/shim/configure-containerd-shim-runsc-v1.md b/shim/configure-containerd-shim-runsc-v1.md new file mode 100644 index 000000000..977ceacbd --- /dev/null +++ b/shim/configure-containerd-shim-runsc-v1.md @@ -0,0 +1,72 @@ +# Configure containerd-shim-runsc-v1 (Shim V2) + +This document describes how to configure runtime options for +`containerd-shim-runsc-v1`. This is follows on to the instructions of +[Runtime Handler Quick Start (shim v2) (containerd >=1.2)](runtime-handler-shim-v2-quickstart.md) +and requires containerd 1.3 or later. + +### Update `/etc/containerd/config.toml` to point to a configuration file for `containerd-shim-runsc-v1`. + +`containerd-shim-runsc-v1` supports a few different configuration options based +on the version of containerd that is used. For versions >= 1.3, it supports a +configurable config path in the containerd runtime configuration. + +```shell +{ # Step 1: Update runtime options for runsc in containerd config.toml +cat < +[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 1\(dev\)/ /^}/) +```shell +{ # Step 1(dev): Build and install gvisor-containerd-shim and containerd-shim-runsc-v1 + make + sudo make install +} +``` + +### Configure containerd + +1. Update `/etc/containerd/config.toml`. Make sure `containerd-shim-runsc-v1` is + in `${PATH}`. + +[embedmd]:# (../test/e2e/runtime-handler-shim-v2/install.sh shell /{ # Step 1/ /^}/) +```shell +{ # Step 1: Create containerd config.toml +cat < 106 { + return errors.Errorf("%q: unix socket path too long (> 106)", path) + } + l, err = net.Listen("unix", "\x00"+path) + } + if err != nil { + return err + } + logrus.WithField("socket", path).Debug("serving api on unix socket") + go func() { + defer l.Close() + if err := server.Serve(context.Background(), l); err != nil && + !strings.Contains(err.Error(), "use of closed network connection") { + logrus.WithError(err).Fatal("gvisor-containerd-shim: ttrpc server failure") + } + }() + return nil +} + +// setupSignals creates a new signal handler for all signals and sets the shim as a +// sub-reaper so that the container processes are reparented +func setupSignals() (chan os.Signal, error) { + signals := make(chan os.Signal, 32) + signal.Notify(signals, unix.SIGTERM, unix.SIGINT, unix.SIGCHLD, unix.SIGPIPE) + // make sure runc is setup to use the monitor + // for waiting on processes + // TODO(random-liu): Move shim/reaper.go to a separate package. + runsc.Monitor = containerdshim.Default + // set the shim as the subreaper for all orphaned processes created by the container + if err := system.SetSubreaper(1); err != nil { + return nil, err + } + return signals, nil +} + +func handleSignals(logger *logrus.Entry, signals chan os.Signal, server *ttrpc.Server, sv *shim.Service) error { + var ( + termOnce sync.Once + done = make(chan struct{}) + ) + + for { + select { + case <-done: + return nil + case s := <-signals: + switch s { + case unix.SIGCHLD: + if err := containerdshim.Reap(); err != nil { + logger.WithError(err).Error("reap exit status") + } + case unix.SIGTERM, unix.SIGINT: + go termOnce.Do(func() { + ctx := context.TODO() + if err := server.Shutdown(ctx); err != nil { + logger.WithError(err).Error("failed to shutdown server") + } + // Ensure our child is dead if any + sv.Kill(ctx, &shimapi.KillRequest{ + Signal: uint32(syscall.SIGKILL), + All: true, + }) + sv.Delete(context.Background(), &ptypes.Empty{}) + close(done) + }) + case unix.SIGPIPE: + } + } + } +} + +func dumpStacks(logger *logrus.Entry) { + var ( + buf []byte + stackSize int + ) + bufferLen := 16384 + for stackSize == len(buf) { + buf = make([]byte, bufferLen) + stackSize = runtime.Stack(buf, true) + bufferLen *= 2 + } + buf = buf[:stackSize] + logger.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf) +} + +type remoteEventsPublisher struct { + address string +} + +func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event events.Event) error { + ns, _ := namespaces.Namespace(ctx) + encoded, err := typeurl.MarshalAny(event) + if err != nil { + return err + } + data, err := encoded.Marshal() + if err != nil { + return err + } + cmd := exec.CommandContext(ctx, containerdBinaryFlag, "--address", l.address, "publish", "--topic", topic, "--namespace", ns) + cmd.Stdin = bytes.NewReader(data) + c, err := containerdshim.Default.Start(cmd) + if err != nil { + return err + } + status, err := containerdshim.Default.Wait(cmd, c) + if err != nil { + return err + } + if status != 0 { + return errors.New("failed to publish event") + } + return nil +} diff --git a/test/e2e/containerd-install.sh b/test/e2e/containerd-install.sh deleted file mode 100755 index 400819245..000000000 --- a/test/e2e/containerd-install.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -# A script to install containerd and CNI plugins for e2e testing - -wget -q --https-only \ - https://github.com/containerd/containerd/releases/download/v${CONTAINERD_VERSION}/containerd-${CONTAINERD_VERSION}.linux-amd64.tar.gz \ - https://github.com/containernetworking/plugins/releases/download/v0.7.0/cni-plugins-amd64-v0.7.0.tgz - -sudo mkdir -p /etc/containerd /etc/cni/net.d /opt/cni/bin -sudo tar -xvf cni-plugins-amd64-v0.7.0.tgz -C /opt/cni/bin/ -sudo tar -xvf containerd-${CONTAINERD_VERSION}.linux-amd64.tar.gz -C / - -cat </tmp/containerd-cri.log & diff --git a/test/e2e/crictl-install.sh b/test/e2e/crictl-install.sh deleted file mode 100755 index 1d63c889b..000000000 --- a/test/e2e/crictl-install.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -# A sample script for installing crictl. - -set -ex - -{ # Step 1: Download crictl -wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.13.0/crictl-v1.13.0-linux-amd64.tar.gz -tar xf crictl-v1.13.0-linux-amd64.tar.gz -sudo mv crictl /usr/local/bin -} - -{ # Step 2: Configure crictl -cat < /tmp/containerd-cri.log & -} diff --git a/test/e2e/runtime-handler-shim-v2/test.sh b/test/e2e/runtime-handler-shim-v2/test.sh deleted file mode 100755 index e33655ec1..000000000 --- a/test/e2e/runtime-handler-shim-v2/test.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime -# handler. This should work on containerd 1.2+ - -# This is meant to be run in a VM as it makes a fairly invasive install of -# containerd. - -set -ex - -# Install containerd -. ./test/e2e/containerd-install.sh - -# Install gVisor -. ./test/e2e/runsc-install.sh - -# Install gvisor-containerd-shim -. ./test/e2e/shim-install.sh - -# Test installation/configuration -. ./test/e2e/runtime-handler-shim-v2/install.sh - -# Install crictl -. ./test/e2e/crictl-install.sh - -# Test usage (the same with runtime-handler) -. ./test/e2e/runtime-handler/usage.sh - -# Run a container in the sandbox -. ./test/e2e/run-container.sh - -# Validate the pod and container -. ./test/e2e/validate.sh -. ./test/e2e/runtime-handler-shim-v2/validate.sh diff --git a/test/e2e/runtime-handler-shim-v2/validate.sh b/test/e2e/runtime-handler-shim-v2/validate.sh deleted file mode 100755 index b74a059ef..000000000 --- a/test/e2e/runtime-handler-shim-v2/validate.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -# A sample script to validating the running containerd-shim-runsc-v1. - -set -ex - -ps aux | grep [c]ontainerd-shim-runsc-v1 diff --git a/test/e2e/runtime-handler/install.sh b/test/e2e/runtime-handler/install.sh deleted file mode 100755 index ebe9d3580..000000000 --- a/test/e2e/runtime-handler/install.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -# A sample script for installing and configuring the gvisor-containerd-shim to -# use the containerd runtime handler. - -set -ex - -{ # Step 1: Create containerd config.toml -cat < /tmp/containerd-cri.log & -} diff --git a/test/e2e/runtime-handler/test.sh b/test/e2e/runtime-handler/test.sh deleted file mode 100755 index 99f3565b6..000000000 --- a/test/e2e/runtime-handler/test.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime -# handler. This should work on containerd 1.2+ - -# This is meant to be run in a VM as it makes a fairly invasive install of -# containerd. - -set -ex - -# Install containerd -. ./test/e2e/containerd-install.sh - -# Install gVisor -. ./test/e2e/runsc-install.sh - -# Install gvisor-containerd-shim -. ./test/e2e/shim-install.sh - -# Test installation/configuration -. ./test/e2e/runtime-handler/install.sh - -# Install crictl -. ./test/e2e/crictl-install.sh - -# Test usage -. ./test/e2e/runtime-handler/usage.sh - -# Run a container in the sandbox -. ./test/e2e/run-container.sh - -# Validate the pod and container -. ./test/e2e/validate.sh diff --git a/test/e2e/runtime-handler/usage.sh b/test/e2e/runtime-handler/usage.sh deleted file mode 100755 index 350c720c2..000000000 --- a/test/e2e/runtime-handler/usage.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -# A sample script for testing the gvisor-containerd-shim -# using runtime handler. - -set -ex - -{ # Step 1: Pull the nginx image -sudo crictl pull nginx -} - -{ # Step 2: Create sandbox.json -cat </tmp/containerd-cri.log & -} diff --git a/test/e2e/untrusted-workload/test.sh b/test/e2e/untrusted-workload/test.sh deleted file mode 100755 index 6e312cf6d..000000000 --- a/test/e2e/untrusted-workload/test.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# Runs end-to-end tests for gvisor-containerd-shim to test using the -# untrusted workload extension. This should work on containerd 1.1+ - -# This is meant to be run in a VM as it makes a fairly invasive install of -# containerd. - -set -ex - -# Install containerd -. ./test/e2e/containerd-install.sh - -# Install gVisor -. ./test/e2e/runsc-install.sh - -# Install gvisor-containerd-shim -. ./test/e2e/shim-install.sh - -# Test installation/configuration -. ./test/e2e/untrusted-workload/install.sh - -# Install crictl -. ./test/e2e/crictl-install.sh - -# Test usage -. ./test/e2e/untrusted-workload/usage.sh - -# Run a container in the sandbox -. ./test/e2e/run-container.sh - -# Validate the pod and container -. ./test/e2e/validate.sh diff --git a/test/e2e/untrusted-workload/usage.sh b/test/e2e/untrusted-workload/usage.sh deleted file mode 100755 index db8206964..000000000 --- a/test/e2e/untrusted-workload/usage.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# A sample script for testing the gvisor-containerd-shim # using untrusted -# workload extension. - -set -ex - -{ # Step 1: Pull the nginx image -sudo crictl pull nginx -} - -{ # Step 2: Create sandbox.json -cat </tmp/containerd-cri.log & diff --git a/test/shim/crictl-install.sh b/test/shim/crictl-install.sh new file mode 100755 index 000000000..1d63c889b --- /dev/null +++ b/test/shim/crictl-install.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# A sample script for installing crictl. + +set -ex + +{ # Step 1: Download crictl +wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.13.0/crictl-v1.13.0-linux-amd64.tar.gz +tar xf crictl-v1.13.0-linux-amd64.tar.gz +sudo mv crictl /usr/local/bin +} + +{ # Step 2: Configure crictl +cat < /tmp/containerd-cri.log & +} diff --git a/test/shim/runtime-handler-shim-v2/test.sh b/test/shim/runtime-handler-shim-v2/test.sh new file mode 100755 index 000000000..e33655ec1 --- /dev/null +++ b/test/shim/runtime-handler-shim-v2/test.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime +# handler. This should work on containerd 1.2+ + +# This is meant to be run in a VM as it makes a fairly invasive install of +# containerd. + +set -ex + +# Install containerd +. ./test/e2e/containerd-install.sh + +# Install gVisor +. ./test/e2e/runsc-install.sh + +# Install gvisor-containerd-shim +. ./test/e2e/shim-install.sh + +# Test installation/configuration +. ./test/e2e/runtime-handler-shim-v2/install.sh + +# Install crictl +. ./test/e2e/crictl-install.sh + +# Test usage (the same with runtime-handler) +. ./test/e2e/runtime-handler/usage.sh + +# Run a container in the sandbox +. ./test/e2e/run-container.sh + +# Validate the pod and container +. ./test/e2e/validate.sh +. ./test/e2e/runtime-handler-shim-v2/validate.sh diff --git a/test/shim/runtime-handler-shim-v2/validate.sh b/test/shim/runtime-handler-shim-v2/validate.sh new file mode 100755 index 000000000..b74a059ef --- /dev/null +++ b/test/shim/runtime-handler-shim-v2/validate.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# A sample script to validating the running containerd-shim-runsc-v1. + +set -ex + +ps aux | grep [c]ontainerd-shim-runsc-v1 diff --git a/test/shim/runtime-handler/install.sh b/test/shim/runtime-handler/install.sh new file mode 100755 index 000000000..ebe9d3580 --- /dev/null +++ b/test/shim/runtime-handler/install.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# A sample script for installing and configuring the gvisor-containerd-shim to +# use the containerd runtime handler. + +set -ex + +{ # Step 1: Create containerd config.toml +cat < /tmp/containerd-cri.log & +} diff --git a/test/shim/runtime-handler/test.sh b/test/shim/runtime-handler/test.sh new file mode 100755 index 000000000..99f3565b6 --- /dev/null +++ b/test/shim/runtime-handler/test.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime +# handler. This should work on containerd 1.2+ + +# This is meant to be run in a VM as it makes a fairly invasive install of +# containerd. + +set -ex + +# Install containerd +. ./test/e2e/containerd-install.sh + +# Install gVisor +. ./test/e2e/runsc-install.sh + +# Install gvisor-containerd-shim +. ./test/e2e/shim-install.sh + +# Test installation/configuration +. ./test/e2e/runtime-handler/install.sh + +# Install crictl +. ./test/e2e/crictl-install.sh + +# Test usage +. ./test/e2e/runtime-handler/usage.sh + +# Run a container in the sandbox +. ./test/e2e/run-container.sh + +# Validate the pod and container +. ./test/e2e/validate.sh diff --git a/test/shim/runtime-handler/usage.sh b/test/shim/runtime-handler/usage.sh new file mode 100755 index 000000000..350c720c2 --- /dev/null +++ b/test/shim/runtime-handler/usage.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# A sample script for testing the gvisor-containerd-shim +# using runtime handler. + +set -ex + +{ # Step 1: Pull the nginx image +sudo crictl pull nginx +} + +{ # Step 2: Create sandbox.json +cat </tmp/containerd-cri.log & +} diff --git a/test/shim/untrusted-workload/test.sh b/test/shim/untrusted-workload/test.sh new file mode 100755 index 000000000..6e312cf6d --- /dev/null +++ b/test/shim/untrusted-workload/test.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Runs end-to-end tests for gvisor-containerd-shim to test using the +# untrusted workload extension. This should work on containerd 1.1+ + +# This is meant to be run in a VM as it makes a fairly invasive install of +# containerd. + +set -ex + +# Install containerd +. ./test/e2e/containerd-install.sh + +# Install gVisor +. ./test/e2e/runsc-install.sh + +# Install gvisor-containerd-shim +. ./test/e2e/shim-install.sh + +# Test installation/configuration +. ./test/e2e/untrusted-workload/install.sh + +# Install crictl +. ./test/e2e/crictl-install.sh + +# Test usage +. ./test/e2e/untrusted-workload/usage.sh + +# Run a container in the sandbox +. ./test/e2e/run-container.sh + +# Validate the pod and container +. ./test/e2e/validate.sh diff --git a/test/shim/untrusted-workload/usage.sh b/test/shim/untrusted-workload/usage.sh new file mode 100755 index 000000000..db8206964 --- /dev/null +++ b/test/shim/untrusted-workload/usage.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# A sample script for testing the gvisor-containerd-shim # using untrusted +# workload extension. + +set -ex + +{ # Step 1: Pull the nginx image +sudo crictl pull nginx +} + +{ # Step 2: Create sandbox.json +cat <