summaryrefslogtreecommitdiffhomepage
path: root/vendor/github.com/opencontainers
diff options
context:
space:
mode:
authorLantao Liu <lantaol@google.com>2019-05-10 18:27:49 -0700
committerIan Lewis <ianlewis@google.com>2019-05-11 10:27:49 +0900
commit97875daf63d03cda6ff3c4f393a004a8295a748e (patch)
tree30dce772c1d7bb9d77fab8b224675630e3c10b2d /vendor/github.com/opencontainers
parent14f1de4a45daa75ef016fabb56d86cbd9b902504 (diff)
Port shim fix (#27)
Port shim fixes containerd/containerd#3264, containerd/containerd#3264 Update containerd to newest release/1.2 commit.
Diffstat (limited to 'vendor/github.com/opencontainers')
-rw-r--r--vendor/github.com/opencontainers/runc/README.md5
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c516
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c11
-rw-r--r--vendor/github.com/opencontainers/runc/vendor.conf7
4 files changed, 533 insertions, 6 deletions
diff --git a/vendor/github.com/opencontainers/runc/README.md b/vendor/github.com/opencontainers/runc/README.md
index e755fb7bc..11fa4138b 100644
--- a/vendor/github.com/opencontainers/runc/README.md
+++ b/vendor/github.com/opencontainers/runc/README.md
@@ -16,10 +16,9 @@ This means that `runc` 1.0.0 should implement the 1.0 version of the specificati
You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page.
-### Security
+## Security
-If you wish to report a security issue, please disclose the issue responsibly
-to security@opencontainers.org.
+Reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/)
## Building
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c
new file mode 100644
index 000000000..ad10f1406
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c
@@ -0,0 +1,516 @@
+/*
+ * Copyright (C) 2019 Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2019 SUSE LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/vfs.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/sendfile.h>
+#include <sys/syscall.h>
+
+/* Use our own wrapper for memfd_create. */
+#if !defined(SYS_memfd_create) && defined(__NR_memfd_create)
+# define SYS_memfd_create __NR_memfd_create
+#endif
+/* memfd_create(2) flags -- copied from <linux/memfd.h>. */
+#ifndef MFD_CLOEXEC
+# define MFD_CLOEXEC 0x0001U
+# define MFD_ALLOW_SEALING 0x0002U
+#endif
+int memfd_create(const char *name, unsigned int flags)
+{
+#ifdef SYS_memfd_create
+ return syscall(SYS_memfd_create, name, flags);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+
+/* This comes directly from <linux/fcntl.h>. */
+#ifndef F_LINUX_SPECIFIC_BASE
+# define F_LINUX_SPECIFIC_BASE 1024
+#endif
+#ifndef F_ADD_SEALS
+# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+#endif
+#ifndef F_SEAL_SEAL
+# define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
+# define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
+# define F_SEAL_GROW 0x0004 /* prevent file from growing */
+# define F_SEAL_WRITE 0x0008 /* prevent writes */
+#endif
+
+#define CLONED_BINARY_ENV "_LIBCONTAINER_CLONED_BINARY"
+#define RUNC_MEMFD_COMMENT "runc_cloned:/proc/self/exe"
+#define RUNC_MEMFD_SEALS \
+ (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE)
+
+static void *must_realloc(void *ptr, size_t size)
+{
+ void *old = ptr;
+ do {
+ ptr = realloc(old, size);
+ } while(!ptr);
+ return ptr;
+}
+
+/*
+ * Verify whether we are currently in a self-cloned program (namely, is
+ * /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather
+ * for shmem files), and we want to be sure it's actually sealed.
+ */
+static int is_self_cloned(void)
+{
+ int fd, ret, is_cloned = 0;
+ struct stat statbuf = {};
+ struct statfs fsbuf = {};
+
+ fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -ENOTRECOVERABLE;
+
+ /*
+ * Is the binary a fully-sealed memfd? We don't need CLONED_BINARY_ENV for
+ * this, because you cannot write to a sealed memfd no matter what (so
+ * sharing it isn't a bad thing -- and an admin could bind-mount a sealed
+ * memfd to /usr/bin/runc to allow re-use).
+ */
+ ret = fcntl(fd, F_GET_SEALS);
+ if (ret >= 0) {
+ is_cloned = (ret == RUNC_MEMFD_SEALS);
+ goto out;
+ }
+
+ /*
+ * All other forms require CLONED_BINARY_ENV, since they are potentially
+ * writeable (or we can't tell if they're fully safe) and thus we must
+ * check the environment as an extra layer of defence.
+ */
+ if (!getenv(CLONED_BINARY_ENV)) {
+ is_cloned = false;
+ goto out;
+ }
+
+ /*
+ * Is the binary on a read-only filesystem? We can't detect bind-mounts in
+ * particular (in-kernel they are identical to regular mounts) but we can
+ * at least be sure that it's read-only. In addition, to make sure that
+ * it's *our* bind-mount we check CLONED_BINARY_ENV.
+ */
+ if (fstatfs(fd, &fsbuf) >= 0)
+ is_cloned |= (fsbuf.f_flags & MS_RDONLY);
+
+ /*
+ * Okay, we're a tmpfile -- or we're currently running on RHEL <=7.6
+ * which appears to have a borked backport of F_GET_SEALS. Either way,
+ * having a file which has no hardlinks indicates that we aren't using
+ * a host-side "runc" binary and this is something that a container
+ * cannot fake (because unlinking requires being able to resolve the
+ * path that you want to unlink).
+ */
+ if (fstat(fd, &statbuf) >= 0)
+ is_cloned |= (statbuf.st_nlink == 0);
+
+out:
+ close(fd);
+ return is_cloned;
+}
+
+/* Read a given file into a new buffer, and providing the length. */
+static char *read_file(char *path, size_t *length)
+{
+ int fd;
+ char buf[4096], *copy = NULL;
+
+ if (!length)
+ return NULL;
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return NULL;
+
+ *length = 0;
+ for (;;) {
+ ssize_t n;
+
+ n = read(fd, buf, sizeof(buf));
+ if (n < 0)
+ goto error;
+ if (!n)
+ break;
+
+ copy = must_realloc(copy, (*length + n) * sizeof(*copy));
+ memcpy(copy + *length, buf, n);
+ *length += n;
+ }
+ close(fd);
+ return copy;
+
+error:
+ close(fd);
+ free(copy);
+ return NULL;
+}
+
+/*
+ * A poor-man's version of "xargs -0". Basically parses a given block of
+ * NUL-delimited data, within the given length and adds a pointer to each entry
+ * to the array of pointers.
+ */
+static int parse_xargs(char *data, int data_length, char ***output)
+{
+ int num = 0;
+ char *cur = data;
+
+ if (!data || *output != NULL)
+ return -1;
+
+ while (cur < data + data_length) {
+ num++;
+ *output = must_realloc(*output, (num + 1) * sizeof(**output));
+ (*output)[num - 1] = cur;
+ cur += strlen(cur) + 1;
+ }
+ (*output)[num] = NULL;
+ return num;
+}
+
+/*
+ * "Parse" out argv from /proc/self/cmdline.
+ * This is necessary because we are running in a context where we don't have a
+ * main() that we can just get the arguments from.
+ */
+static int fetchve(char ***argv)
+{
+ char *cmdline = NULL;
+ size_t cmdline_size;
+
+ cmdline = read_file("/proc/self/cmdline", &cmdline_size);
+ if (!cmdline)
+ goto error;
+
+ if (parse_xargs(cmdline, cmdline_size, argv) <= 0)
+ goto error;
+
+ return 0;
+
+error:
+ free(cmdline);
+ return -EINVAL;
+}
+
+enum {
+ EFD_NONE = 0,
+ EFD_MEMFD,
+ EFD_FILE,
+};
+
+/*
+ * This comes from <linux/fcntl.h>. We can't hard-code __O_TMPFILE because it
+ * changes depending on the architecture. If we don't have O_TMPFILE we always
+ * have the mkostemp(3) fallback.
+ */
+#ifndef O_TMPFILE
+# if defined(__O_TMPFILE) && defined(O_DIRECTORY)
+# define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
+# endif
+#endif
+
+static int make_execfd(int *fdtype)
+{
+ int fd = -1;
+ char template[PATH_MAX] = {0};
+ char *prefix = getenv("_LIBCONTAINER_STATEDIR");
+
+ if (!prefix || *prefix != '/')
+ prefix = "/tmp";
+ if (snprintf(template, sizeof(template), "%s/runc.XXXXXX", prefix) < 0)
+ return -1;
+
+ /*
+ * Now try memfd, it's much nicer than actually creating a file in STATEDIR
+ * since it's easily detected thanks to sealing and also doesn't require
+ * assumptions about STATEDIR.
+ */
+ *fdtype = EFD_MEMFD;
+ fd = memfd_create(RUNC_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ if (fd >= 0)
+ return fd;
+ if (errno != ENOSYS && errno != EINVAL)
+ goto error;
+
+#ifdef O_TMPFILE
+ /*
+ * Try O_TMPFILE to avoid races where someone might snatch our file. Note
+ * that O_EXCL isn't actually a security measure here (since you can just
+ * fd re-open it and clear O_EXCL).
+ */
+ *fdtype = EFD_FILE;
+ fd = open(prefix, O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC, 0700);
+ if (fd >= 0) {
+ struct stat statbuf = {};
+ bool working_otmpfile = false;
+
+ /*
+ * open(2) ignores unknown O_* flags -- yeah, I was surprised when I
+ * found this out too. As a result we can't check for EINVAL. However,
+ * if we get nlink != 0 (or EISDIR) then we know that this kernel
+ * doesn't support O_TMPFILE.
+ */
+ if (fstat(fd, &statbuf) >= 0)
+ working_otmpfile = (statbuf.st_nlink == 0);
+
+ if (working_otmpfile)
+ return fd;
+
+ /* Pretend that we got EISDIR since O_TMPFILE failed. */
+ close(fd);
+ errno = EISDIR;
+ }
+ if (errno != EISDIR)
+ goto error;
+#endif /* defined(O_TMPFILE) */
+
+ /*
+ * Our final option is to create a temporary file the old-school way, and
+ * then unlink it so that nothing else sees it by accident.
+ */
+ *fdtype = EFD_FILE;
+ fd = mkostemp(template, O_CLOEXEC);
+ if (fd >= 0) {
+ if (unlink(template) >= 0)
+ return fd;
+ close(fd);
+ }
+
+error:
+ *fdtype = EFD_NONE;
+ return -1;
+}
+
+static int seal_execfd(int *fd, int fdtype)
+{
+ switch (fdtype) {
+ case EFD_MEMFD:
+ return fcntl(*fd, F_ADD_SEALS, RUNC_MEMFD_SEALS);
+ case EFD_FILE: {
+ /* Need to re-open our pseudo-memfd as an O_PATH to avoid execve(2) giving -ETXTBSY. */
+ int newfd;
+ char fdpath[PATH_MAX] = {0};
+
+ if (fchmod(*fd, 0100) < 0)
+ return -1;
+
+ if (snprintf(fdpath, sizeof(fdpath), "/proc/self/fd/%d", *fd) < 0)
+ return -1;
+
+ newfd = open(fdpath, O_PATH | O_CLOEXEC);
+ if (newfd < 0)
+ return -1;
+
+ close(*fd);
+ *fd = newfd;
+ return 0;
+ }
+ default:
+ break;
+ }
+ return -1;
+}
+
+static int try_bindfd(void)
+{
+ int fd, ret = -1;
+ char template[PATH_MAX] = {0};
+ char *prefix = getenv("_LIBCONTAINER_STATEDIR");
+
+ if (!prefix || *prefix != '/')
+ prefix = "/tmp";
+ if (snprintf(template, sizeof(template), "%s/runc.XXXXXX", prefix) < 0)
+ return ret;
+
+ /*
+ * We need somewhere to mount it, mounting anything over /proc/self is a
+ * BAD idea on the host -- even if we do it temporarily.
+ */
+ fd = mkstemp(template);
+ if (fd < 0)
+ return ret;
+ close(fd);
+
+ /*
+ * For obvious reasons this won't work in rootless mode because we haven't
+ * created a userns+mntns -- but getting that to work will be a bit
+ * complicated and it's only worth doing if someone actually needs it.
+ */
+ ret = -EPERM;
+ if (mount("/proc/self/exe", template, "", MS_BIND, "") < 0)
+ goto out;
+ if (mount("", template, "", MS_REMOUNT | MS_BIND | MS_RDONLY, "") < 0)
+ goto out_umount;
+
+
+ /* Get read-only handle that we're sure can't be made read-write. */
+ ret = open(template, O_PATH | O_CLOEXEC);
+
+out_umount:
+ /*
+ * Make sure the MNT_DETACH works, otherwise we could get remounted
+ * read-write and that would be quite bad (the fd would be made read-write
+ * too, invalidating the protection).
+ */
+ if (umount2(template, MNT_DETACH) < 0) {
+ if (ret >= 0)
+ close(ret);
+ ret = -ENOTRECOVERABLE;
+ }
+
+out:
+ /*
+ * We don't care about unlink errors, the worst that happens is that
+ * there's an empty file left around in STATEDIR.
+ */
+ unlink(template);
+ return ret;
+}
+
+static ssize_t fd_to_fd(int outfd, int infd)
+{
+ ssize_t total = 0;
+ char buffer[4096];
+
+ for (;;) {
+ ssize_t nread, nwritten = 0;
+
+ nread = read(infd, buffer, sizeof(buffer));
+ if (nread < 0)
+ return -1;
+ if (!nread)
+ break;
+
+ do {
+ ssize_t n = write(outfd, buffer + nwritten, nread - nwritten);
+ if (n < 0)
+ return -1;
+ nwritten += n;
+ } while(nwritten < nread);
+
+ total += nwritten;
+ }
+
+ return total;
+}
+
+static int clone_binary(void)
+{
+ int binfd, execfd;
+ struct stat statbuf = {};
+ size_t sent = 0;
+ int fdtype = EFD_NONE;
+
+ /*
+ * Before we resort to copying, let's try creating an ro-binfd in one shot
+ * by getting a handle for a read-only bind-mount of the execfd.
+ */
+ execfd = try_bindfd();
+ if (execfd >= 0)
+ return execfd;
+
+ /*
+ * Dammit, that didn't work -- time to copy the binary to a safe place we
+ * can seal the contents.
+ */
+ execfd = make_execfd(&fdtype);
+ if (execfd < 0 || fdtype == EFD_NONE)
+ return -ENOTRECOVERABLE;
+
+ binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
+ if (binfd < 0)
+ goto error;
+
+ if (fstat(binfd, &statbuf) < 0)
+ goto error_binfd;
+
+ while (sent < statbuf.st_size) {
+ int n = sendfile(execfd, binfd, NULL, statbuf.st_size - sent);
+ if (n < 0) {
+ /* sendfile can fail so we fallback to a dumb user-space copy. */
+ n = fd_to_fd(execfd, binfd);
+ if (n < 0)
+ goto error_binfd;
+ }
+ sent += n;
+ }
+ close(binfd);
+ if (sent != statbuf.st_size)
+ goto error;
+
+ if (seal_execfd(&execfd, fdtype) < 0)
+ goto error;
+
+ return execfd;
+
+error_binfd:
+ close(binfd);
+error:
+ close(execfd);
+ return -EIO;
+}
+
+/* Get cheap access to the environment. */
+extern char **environ;
+
+int ensure_cloned_binary(void)
+{
+ int execfd;
+ char **argv = NULL;
+
+ /* Check that we're not self-cloned, and if we are then bail. */
+ int cloned = is_self_cloned();
+ if (cloned > 0 || cloned == -ENOTRECOVERABLE)
+ return cloned;
+
+ if (fetchve(&argv) < 0)
+ return -EINVAL;
+
+ execfd = clone_binary();
+ if (execfd < 0)
+ return -EIO;
+
+ if (putenv(CLONED_BINARY_ENV "=1"))
+ goto error;
+
+ fexecve(execfd, argv, environ);
+error:
+ close(execfd);
+ return -ENOEXEC;
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
index 28269dfc0..7750af35e 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
@@ -534,6 +534,9 @@ void join_namespaces(char *nslist)
free(namespaces);
}
+/* Defined in cloned_binary.c. */
+extern int ensure_cloned_binary(void);
+
void nsexec(void)
{
int pipenum;
@@ -549,6 +552,14 @@ void nsexec(void)
if (pipenum == -1)
return;
+ /*
+ * We need to re-exec if we are not in a cloned binary. This is necessary
+ * to ensure that containers won't be able to access the host binary
+ * through /proc/self/exe. See CVE-2019-5736.
+ */
+ if (ensure_cloned_binary() < 0)
+ bail("could not ensure we are a cloned binary");
+
/* Parse all of the netlink configuration. */
nl_parse(pipenum, &config);
diff --git a/vendor/github.com/opencontainers/runc/vendor.conf b/vendor/github.com/opencontainers/runc/vendor.conf
index fadbe0707..22cba0f1b 100644
--- a/vendor/github.com/opencontainers/runc/vendor.conf
+++ b/vendor/github.com/opencontainers/runc/vendor.conf
@@ -1,10 +1,11 @@
# OCI runtime-spec. When updating this, make sure you use a version tag rather
# than a commit ID so it's much more obvious what version of the spec we are
# using.
-github.com/opencontainers/runtime-spec 5684b8af48c1ac3b1451fa499724e30e3c20a294
+github.com/opencontainers/runtime-spec 29686dbc5559d93fb1ef402eeda3e35c38d75af4
# Core libcontainer functionality.
+github.com/checkpoint-restore/go-criu v3.11
github.com/mrunalp/fileutils ed869b029674c0e9ce4c0dfa781405c2d9946d08
-github.com/opencontainers/selinux v1.0.0-rc1
+github.com/opencontainers/selinux v1.2.2
github.com/seccomp/libseccomp-golang 84e90a91acea0f4e51e62bc1a75de18b1fc0790f
github.com/sirupsen/logrus a3f95b5c423586578a4e099b11a46c2479628cac
github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16
@@ -18,7 +19,7 @@ github.com/golang/protobuf 18c9bb3261723cd5401db4d0c9fbc5c3b6c70fe8
github.com/cyphar/filepath-securejoin v0.2.1
github.com/docker/go-units v0.2.0
github.com/urfave/cli d53eb991652b1d438abdd34ce4bfa3ef1539108e
-golang.org/x/sys 7ddbeae9ae08c6a06a59597f0c9edbc5ff2444ce https://github.com/golang/sys
+golang.org/x/sys 41f3e6584952bb034a481797859f6ab34b6803bd https://github.com/golang/sys
# console dependencies
github.com/containerd/console 2748ece16665b45a47f884001d5831ec79703880