diff options
author | Googler <noreply@google.com> | 2018-04-27 10:37:02 -0700 |
---|---|---|
committer | Adin Scannell <ascannell@google.com> | 2018-04-28 01:44:26 -0400 |
commit | d02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch) | |
tree | 54f95eef73aee6bacbfc736fffc631be2605ed53 /pkg/sentry/kernel/auth | |
parent | f70210e742919f40aa2f0934a22f1c9ba6dada62 (diff) |
Check in gVisor.
PiperOrigin-RevId: 194583126
Change-Id: Ica1d8821a90f74e7e745962d71801c598c652463
Diffstat (limited to 'pkg/sentry/kernel/auth')
-rw-r--r-- | pkg/sentry/kernel/auth/BUILD | 73 | ||||
-rw-r--r-- | pkg/sentry/kernel/auth/auth.go | 22 | ||||
-rw-r--r-- | pkg/sentry/kernel/auth/capability_set.go | 61 | ||||
-rw-r--r-- | pkg/sentry/kernel/auth/context.go | 36 | ||||
-rw-r--r-- | pkg/sentry/kernel/auth/credentials.go | 227 | ||||
-rw-r--r-- | pkg/sentry/kernel/auth/id.go | 121 | ||||
-rw-r--r-- | pkg/sentry/kernel/auth/id_map.go | 283 | ||||
-rw-r--r-- | pkg/sentry/kernel/auth/id_map_functions.go | 45 | ||||
-rw-r--r-- | pkg/sentry/kernel/auth/user_namespace.go | 130 |
9 files changed, 998 insertions, 0 deletions
diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD new file mode 100644 index 000000000..7f0680b88 --- /dev/null +++ b/pkg/sentry/kernel/auth/BUILD @@ -0,0 +1,73 @@ +package(licenses = ["notice"]) # Apache 2.0 + +load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools/go_generics:defs.bzl", "go_template_instance") +load("//tools/go_stateify:defs.bzl", "go_stateify") + +go_stateify( + name = "auth_state", + srcs = [ + "credentials.go", + "id.go", + "id_map_range.go", + "id_map_set.go", + "user_namespace.go", + ], + out = "auth_state.go", + package = "auth", +) + +go_template_instance( + name = "id_map_range", + out = "id_map_range.go", + package = "auth", + prefix = "idMap", + template = "//pkg/segment:generic_range", + types = { + "T": "uint32", + }, +) + +go_template_instance( + name = "id_map_set", + out = "id_map_set.go", + consts = { + "minDegree": "3", + }, + package = "auth", + prefix = "idMap", + template = "//pkg/segment:generic_set", + types = { + "Key": "uint32", + "Range": "idMapRange", + "Value": "uint32", + "Functions": "idMapFunctions", + }, +) + +go_library( + name = "auth", + srcs = [ + "auth.go", + "auth_state.go", + "capability_set.go", + "context.go", + "credentials.go", + "id.go", + "id_map.go", + "id_map_functions.go", + "id_map_range.go", + "id_map_set.go", + "user_namespace.go", + ], + importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth", + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/bits", + "//pkg/log", + "//pkg/sentry/context", + "//pkg/state", + "//pkg/syserror", + ], +) diff --git a/pkg/sentry/kernel/auth/auth.go b/pkg/sentry/kernel/auth/auth.go new file mode 100644 index 000000000..c49a6b852 --- /dev/null +++ b/pkg/sentry/kernel/auth/auth.go @@ -0,0 +1,22 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package auth implements an access control model that is a subset of Linux's. +// +// The auth package supports two kinds of access controls: user/group IDs and +// capabilities. Each resource in the security model is associated with a user +// namespace; "privileged" operations check that the operator's credentials +// have the required user/group IDs or capabilities within the user namespace +// of accessed resources. +package auth diff --git a/pkg/sentry/kernel/auth/capability_set.go b/pkg/sentry/kernel/auth/capability_set.go new file mode 100644 index 000000000..5b8164c49 --- /dev/null +++ b/pkg/sentry/kernel/auth/capability_set.go @@ -0,0 +1,61 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package auth + +import ( + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/bits" +) + +// A CapabilitySet is a set of capabilities implemented as a bitset. The zero +// value of CapabilitySet is a set containing no capabilities. +type CapabilitySet uint64 + +// AllCapabilities is a CapabilitySet containing all valid capabilities. +var AllCapabilities = CapabilitySetOf(linux.MaxCapability+1) - 1 + +// CapabilitySetOf returns a CapabilitySet containing only the given +// capability. +func CapabilitySetOf(cp linux.Capability) CapabilitySet { + return CapabilitySet(bits.MaskOf64(int(cp))) +} + +// CapabilitySetOfMany returns a CapabilitySet containing the given capabilities. +func CapabilitySetOfMany(cps []linux.Capability) CapabilitySet { + var cs uint64 + for _, cp := range cps { + cs |= bits.MaskOf64(int(cp)) + } + return CapabilitySet(cs) +} + +// TaskCapabilities represents all the capability sets for a task. Each of these +// sets is explained in greater detail in capabilities(7). +type TaskCapabilities struct { + // Permitted is a limiting superset for the effective capabilities that + // the thread may assume. + PermittedCaps CapabilitySet + // Inheritable is a set of capabilities preserved across an execve(2). + InheritableCaps CapabilitySet + // Effective is the set of capabilities used by the kernel to perform + // permission checks for the thread. + EffectiveCaps CapabilitySet + // Bounding is a limiting superset for the capabilities that a thread + // can add to its inheritable set using capset(2). + BoundingCaps CapabilitySet + // Ambient is a set of capabilities that are preserved across an + // execve(2) of a program that is not privileged. + AmbientCaps CapabilitySet +} diff --git a/pkg/sentry/kernel/auth/context.go b/pkg/sentry/kernel/auth/context.go new file mode 100644 index 000000000..914589b28 --- /dev/null +++ b/pkg/sentry/kernel/auth/context.go @@ -0,0 +1,36 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package auth + +import ( + "gvisor.googlesource.com/gvisor/pkg/sentry/context" +) + +// contextID is the auth package's type for context.Context.Value keys. +type contextID int + +const ( + // CtxCredentials is a Context.Value key for Credentials. + CtxCredentials contextID = iota +) + +// CredentialsFromContext returns a copy of the Credentials used by ctx, or a +// set of Credentials with no capabilities if ctx does not have Credentials. +func CredentialsFromContext(ctx context.Context) *Credentials { + if v := ctx.Value(CtxCredentials); v != nil { + return v.(*Credentials) + } + return NewAnonymousCredentials() +} diff --git a/pkg/sentry/kernel/auth/credentials.go b/pkg/sentry/kernel/auth/credentials.go new file mode 100644 index 000000000..b832b28fe --- /dev/null +++ b/pkg/sentry/kernel/auth/credentials.go @@ -0,0 +1,227 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package auth + +import ( + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/syserror" +) + +// Credentials contains information required to authorize privileged operations +// in a user namespace. +type Credentials struct { + // Real/effective/saved user/group IDs in the root user namespace. None of + // these should ever be NoID. + RealKUID KUID + EffectiveKUID KUID + SavedKUID KUID + RealKGID KGID + EffectiveKGID KGID + SavedKGID KGID + + // Filesystem user/group IDs are not implemented. "... setfsuid() is + // nowadays unneeded and should be avoided in new applications (likewise + // for setfsgid(2))." - setfsuid(2) + + // Supplementary groups used by set/getgroups. + // + // ExtraKGIDs slices are immutable, allowing multiple Credentials with the + // same ExtraKGIDs to share the same slice. + ExtraKGIDs []KGID + + // The capability sets applicable to this set of credentials. + PermittedCaps CapabilitySet + InheritableCaps CapabilitySet + EffectiveCaps CapabilitySet + BoundingCaps CapabilitySet + // Ambient capabilities are not introduced until Linux 4.3. + + // KeepCaps is the flag for PR_SET_KEEPCAPS which allow capabilities to be + // maintained after a switch from root user to non-root user via setuid(). + KeepCaps bool + + // The user namespace associated with the owner of the credentials. + UserNamespace *UserNamespace +} + +// NewAnonymousCredentials returns a set of credentials with no capabilities in +// any user namespace. +func NewAnonymousCredentials() *Credentials { + // Create a new root user namespace. Since the new namespace's owner is + // KUID 0 and the returned credentials have non-zero KUID/KGID, the + // returned credentials do not have any capabilities in the new namespace. + // Since the new namespace is not part of any existing user namespace + // hierarchy, the returned credentials do not have any capabilities in any + // other namespace. + return &Credentials{ + RealKUID: NobodyKUID, + EffectiveKUID: NobodyKUID, + SavedKUID: NobodyKUID, + RealKGID: NobodyKGID, + EffectiveKGID: NobodyKGID, + SavedKGID: NobodyKGID, + UserNamespace: NewRootUserNamespace(), + } +} + +// NewRootCredentials returns a set of credentials with KUID and KGID 0 (i.e. +// global root) in user namespace ns. +func NewRootCredentials(ns *UserNamespace) *Credentials { + // I can't find documentation for this anywhere, but it's correct for the + // inheritable capability set to be initially empty (the capabilities test + // checks for this property). + return &Credentials{ + RealKUID: RootKUID, + EffectiveKUID: RootKUID, + SavedKUID: RootKUID, + RealKGID: RootKGID, + EffectiveKGID: RootKGID, + SavedKGID: RootKGID, + PermittedCaps: AllCapabilities, + EffectiveCaps: AllCapabilities, + BoundingCaps: AllCapabilities, + UserNamespace: ns, + } +} + +// NewUserCredentials returns a set of credentials based on the given UID, GIDs, +// and capabilities in a given namespace. If all arguments are their zero +// values, this returns the same credentials as NewRootCredentials. +func NewUserCredentials(kuid KUID, kgid KGID, extraKGIDs []KGID, capabilities *TaskCapabilities, ns *UserNamespace) *Credentials { + creds := NewRootCredentials(ns) + + // Set the UID. + uid := kuid + creds.RealKUID = uid + creds.EffectiveKUID = uid + creds.SavedKUID = uid + + // Set GID. + gid := kgid + creds.RealKGID = gid + creds.EffectiveKGID = gid + creds.SavedKGID = gid + + // Set additional GIDs. + creds.ExtraKGIDs = append(creds.ExtraKGIDs, extraKGIDs...) + + // Set capabilities. If capabilities aren't specified, we default to + // all capabilities. + if capabilities != nil { + creds.PermittedCaps = capabilities.PermittedCaps + creds.EffectiveCaps = capabilities.EffectiveCaps + creds.BoundingCaps = capabilities.BoundingCaps + creds.InheritableCaps = capabilities.InheritableCaps + // // TODO: Support ambient capabilities. + } else { + // If no capabilities are specified, grant the same capabilites + // that NewRootCredentials does. + creds.PermittedCaps = AllCapabilities + creds.EffectiveCaps = AllCapabilities + creds.BoundingCaps = AllCapabilities + } + + return creds +} + +// Fork generates an identical copy of a set of credentials. +func (c *Credentials) Fork() *Credentials { + nc := new(Credentials) + *nc = *c // Copy-by-value; this is legal for all fields. + return nc +} + +// InGroup returns true if c is in group kgid. Compare Linux's +// kernel/groups.c:in_group_p(). +func (c *Credentials) InGroup(kgid KGID) bool { + if c.EffectiveKGID == kgid { + return true + } + for _, extraKGID := range c.ExtraKGIDs { + if extraKGID == kgid { + return true + } + } + return false +} + +// HasCapabilityIn returns true if c has capability cp in ns. +func (c *Credentials) HasCapabilityIn(cp linux.Capability, ns *UserNamespace) bool { + for { + // "1. A process has a capability inside a user namespace if it is a member + // of that namespace and it has the capability in its effective capability + // set." - user_namespaces(7) + if c.UserNamespace == ns { + return CapabilitySetOf(cp)&c.EffectiveCaps != 0 + } + // "3. ... A process that resides in the parent of the user namespace and + // whose effective user ID matches the owner of the namespace has all + // capabilities in the namespace." + if c.UserNamespace == ns.parent && c.EffectiveKUID == ns.owner { + return true + } + // "2. If a process has a capability in a user namespace, then it has that + // capability in all child (and further removed descendant) namespaces as + // well." + if ns.parent == nil { + return false + } + ns = ns.parent + } +} + +// HasCapability returns true if c has capability cp in its user namespace. +func (c *Credentials) HasCapability(cp linux.Capability) bool { + return c.HasCapabilityIn(cp, c.UserNamespace) +} + +// UseUID checks that c can use uid in its user namespace, then translates it +// to the root user namespace. +// +// The checks UseUID does are common, but you should verify that it's doing +// exactly what you want. +func (c *Credentials) UseUID(uid UID) (KUID, error) { + // uid must be mapped. + kuid := c.UserNamespace.MapToKUID(uid) + if !kuid.Ok() { + return NoID, syserror.EINVAL + } + // If c has CAP_SETUID, then it can use any UID in its user namespace. + if c.HasCapability(linux.CAP_SETUID) { + return kuid, nil + } + // Otherwise, c must already have the UID as its real, effective, or saved + // set-user-ID. + if kuid == c.RealKUID || kuid == c.EffectiveKUID || kuid == c.SavedKUID { + return kuid, nil + } + return NoID, syserror.EPERM +} + +// UseGID checks that c can use gid in its user namespace, then translates it +// to the root user namespace. +func (c *Credentials) UseGID(gid GID) (KGID, error) { + kgid := c.UserNamespace.MapToKGID(gid) + if !kgid.Ok() { + return NoID, syserror.EINVAL + } + if c.HasCapability(linux.CAP_SETGID) { + return kgid, nil + } + if kgid == c.RealKGID || kgid == c.EffectiveKGID || kgid == c.SavedKGID { + return kgid, nil + } + return NoID, syserror.EPERM +} diff --git a/pkg/sentry/kernel/auth/id.go b/pkg/sentry/kernel/auth/id.go new file mode 100644 index 000000000..37522b018 --- /dev/null +++ b/pkg/sentry/kernel/auth/id.go @@ -0,0 +1,121 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package auth + +import ( + "math" +) + +// UID is a user ID in an unspecified user namespace. +type UID uint32 + +// GID is a group ID in an unspecified user namespace. +type GID uint32 + +// In the root user namespace, user/group IDs have a 1-to-1 relationship with +// the users/groups they represent. In other user namespaces, this is not the +// case; for example, two different unmapped users may both "have" the overflow +// UID. This means that it is generally only valid to compare user and group +// IDs in the root user namespace. We assign distinct types, KUID/KGID, to such +// IDs to emphasize this distinction. ("k" is for "key", as in "unique key". +// Linux also uses the prefix "k", but I think they mean "kernel".) + +// KUID is a user ID in the root user namespace. +type KUID uint32 + +// KGID is a group ID in the root user namespace. +type KGID uint32 + +const ( + // NoID is uint32(-1). -1 is consistently used as a special value, in Linux + // and by extension in the auth package, to mean "no ID": + // + // - ID mapping returns -1 if the ID is not mapped. + // + // - Most set*id() syscalls accept -1 to mean "do not change this ID". + NoID = math.MaxUint32 + + // OverflowUID is the default value of /proc/sys/kernel/overflowuid. The + // "overflow UID" is usually [1] used when translating a user ID between + // namespaces fails because the ID is not mapped. (We don't implement this + // file, so the overflow UID is constant.) + // + // [1] "There is one notable case where unmapped user and group IDs are not + // converted to the corresponding overflow ID value. When viewing a uid_map + // or gid_map file in which there is no mapping for the second field, that + // field is displayed as 4294967295 (-1 as an unsigned integer);" - + // user_namespaces(7) + OverflowUID = UID(65534) + OverflowGID = GID(65534) + + // NobodyKUID is the user ID usually reserved for the least privileged user + // "nobody". + NobodyKUID = KUID(65534) + NobodyKGID = KGID(65534) + + // RootKUID is the user ID usually used for the most privileged user "root". + RootKUID = KUID(0) + RootKGID = KGID(0) + RootUID = UID(0) + RootGID = GID(0) +) + +// Ok returns true if uid is not -1. +func (uid UID) Ok() bool { + return uid != NoID +} + +// Ok returns true if gid is not -1. +func (gid GID) Ok() bool { + return gid != NoID +} + +// Ok returns true if kuid is not -1. +func (kuid KUID) Ok() bool { + return kuid != NoID +} + +// Ok returns true if kgid is not -1. +func (kgid KGID) Ok() bool { + return kgid != NoID +} + +// OrOverflow returns uid if it is valid and the overflow UID otherwise. +func (uid UID) OrOverflow() UID { + if uid.Ok() { + return uid + } + return OverflowUID +} + +// OrOverflow returns gid if it is valid and the overflow GID otherwise. +func (gid GID) OrOverflow() GID { + if gid.Ok() { + return gid + } + return OverflowGID +} + +// In translates kuid into user namespace ns. If kuid is not mapped in ns, In +// returns NoID. +func (kuid KUID) In(ns *UserNamespace) UID { + return ns.MapFromKUID(kuid) +} + +// In translates kgid into user namespace ns. If kgid is not mapped in ns, In +// returns NoID. +func (kgid KGID) In(ns *UserNamespace) GID { + return ns.MapFromKGID(kgid) +} diff --git a/pkg/sentry/kernel/auth/id_map.go b/pkg/sentry/kernel/auth/id_map.go new file mode 100644 index 000000000..6adb33530 --- /dev/null +++ b/pkg/sentry/kernel/auth/id_map.go @@ -0,0 +1,283 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package auth + +import ( + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/syserror" +) + +// MapFromKUID translates kuid, a UID in the root namespace, to a UID in ns. +func (ns *UserNamespace) MapFromKUID(kuid KUID) UID { + if ns.parent == nil { + return UID(kuid) + } + return UID(ns.mapID(&ns.uidMapFromParent, uint32(ns.parent.MapFromKUID(kuid)))) +} + +// MapFromKGID translates kgid, a GID in the root namespace, to a GID in ns. +func (ns *UserNamespace) MapFromKGID(kgid KGID) GID { + if ns.parent == nil { + return GID(kgid) + } + return GID(ns.mapID(&ns.gidMapFromParent, uint32(ns.parent.MapFromKGID(kgid)))) +} + +// MapToKUID translates uid, a UID in ns, to a UID in the root namespace. +func (ns *UserNamespace) MapToKUID(uid UID) KUID { + if ns.parent == nil { + return KUID(uid) + } + return ns.parent.MapToKUID(UID(ns.mapID(&ns.uidMapToParent, uint32(uid)))) +} + +// MapToKGID translates gid, a GID in ns, to a GID in the root namespace. +func (ns *UserNamespace) MapToKGID(gid GID) KGID { + if ns.parent == nil { + return KGID(gid) + } + return ns.parent.MapToKGID(GID(ns.mapID(&ns.gidMapToParent, uint32(gid)))) +} + +func (ns *UserNamespace) mapID(m *idMapSet, id uint32) uint32 { + if id == NoID { + return NoID + } + ns.mu.Lock() + defer ns.mu.Unlock() + if it := m.FindSegment(id); it.Ok() { + return it.Value() + (id - it.Start()) + } + return NoID +} + +// allIDsMapped returns true if all IDs in the range [start, end) are mapped in +// m. +// +// Preconditions: end >= start. +func (ns *UserNamespace) allIDsMapped(m *idMapSet, start, end uint32) bool { + ns.mu.Lock() + defer ns.mu.Unlock() + return m.SpanRange(idMapRange{start, end}) == end-start +} + +// An IDMapEntry represents a mapping from a range of contiguous IDs in a user +// namespace to an equally-sized range of contiguous IDs in the namespace's +// parent. +type IDMapEntry struct { + // FirstID is the first ID in the range in the namespace. + FirstID uint32 + + // FirstParentID is the first ID in the range in the parent namespace. + FirstParentID uint32 + + // Length is the number of IDs in the range. + Length uint32 +} + +// SetUIDMap instructs ns to translate UIDs as specified by entries. +// +// Note: SetUIDMap does not place an upper bound on the number of entries, but +// Linux does. This restriction is implemented in SetUIDMap's caller, the +// implementation of /proc/[pid]/uid_map. +func (ns *UserNamespace) SetUIDMap(ctx context.Context, entries []IDMapEntry) error { + c := CredentialsFromContext(ctx) + + ns.mu.Lock() + defer ns.mu.Unlock() + // "After the creation of a new user namespace, the uid_map file of *one* + // of the processes in the namespace may be written to *once* to define the + // mapping of user IDs in the new user namespace. An attempt to write more + // than once to a uid_map file in a user namespace fails with the error + // EPERM. Similar rules apply for gid_map files." - user_namespaces(7) + if !ns.uidMapFromParent.IsEmpty() { + return syserror.EPERM + } + // "At least one line must be written to the file." + if len(entries) == 0 { + return syserror.EINVAL + } + // """ + // In order for a process to write to the /proc/[pid]/uid_map + // (/proc/[pid]/gid_map) file, all of the following requirements must be + // met: + // + // 1. The writing process must have the CAP_SETUID (CAP_SETGID) capability + // in the user namespace of the process pid. + // """ + if !c.HasCapabilityIn(linux.CAP_SETUID, ns) { + return syserror.EPERM + } + // "2. The writing process must either be in the user namespace of the process + // pid or be in the parent user namespace of the process pid." + if c.UserNamespace != ns && c.UserNamespace != ns.parent { + return syserror.EPERM + } + // """ + // 3. (see trySetUIDMap) + // + // 4. One of the following two cases applies: + // + // * Either the writing process has the CAP_SETUID (CAP_SETGID) capability + // in the parent user namespace. + // """ + if !c.HasCapabilityIn(linux.CAP_SETUID, ns.parent) { + // """ + // * Or otherwise all of the following restrictions apply: + // + // + The data written to uid_map (gid_map) must consist of a single line + // that maps the writing process' effective user ID (group ID) in the + // parent user namespace to a user ID (group ID) in the user namespace. + // """ + if len(entries) != 1 || ns.parent.MapToKUID(UID(entries[0].FirstParentID)) != c.EffectiveKUID || entries[0].Length != 1 { + return syserror.EPERM + } + // """ + // + The writing process must have the same effective user ID as the + // process that created the user namespace. + // """ + if c.EffectiveKUID != ns.owner { + return syserror.EPERM + } + } + // trySetUIDMap leaves data in maps if it fails. + if err := ns.trySetUIDMap(entries); err != nil { + ns.uidMapFromParent.RemoveAll() + ns.uidMapToParent.RemoveAll() + return err + } + return nil +} + +func (ns *UserNamespace) trySetUIDMap(entries []IDMapEntry) error { + for _, e := range entries { + // Determine upper bounds and check for overflow. This implicitly + // checks for NoID. + lastID := e.FirstID + e.Length + if lastID <= e.FirstID { + return syserror.EINVAL + } + lastParentID := e.FirstParentID + e.Length + if lastParentID <= e.FirstParentID { + return syserror.EINVAL + } + // "3. The mapped user IDs (group IDs) must in turn have a mapping in + // the parent user namespace." + // Only the root namespace has a nil parent, and root is assigned + // mappings when it's created, so SetUIDMap would have returned EPERM + // without reaching this point if ns is root. + if !ns.parent.allIDsMapped(&ns.parent.uidMapToParent, e.FirstParentID, lastParentID) { + return syserror.EPERM + } + // If either of these Adds fail, we have an overlapping range. + if !ns.uidMapFromParent.Add(idMapRange{e.FirstParentID, lastParentID}, e.FirstID) { + return syserror.EINVAL + } + if !ns.uidMapToParent.Add(idMapRange{e.FirstID, lastID}, e.FirstParentID) { + return syserror.EINVAL + } + } + return nil +} + +// SetGIDMap instructs ns to translate GIDs as specified by entries. +func (ns *UserNamespace) SetGIDMap(ctx context.Context, entries []IDMapEntry) error { + c := CredentialsFromContext(ctx) + + ns.mu.Lock() + defer ns.mu.Unlock() + if !ns.gidMapFromParent.IsEmpty() { + return syserror.EPERM + } + if len(entries) == 0 { + return syserror.EINVAL + } + if !c.HasCapabilityIn(linux.CAP_SETGID, ns) { + return syserror.EPERM + } + if c.UserNamespace != ns && c.UserNamespace != ns.parent { + return syserror.EPERM + } + if !c.HasCapabilityIn(linux.CAP_SETGID, ns.parent) { + if len(entries) != 1 || ns.parent.MapToKGID(GID(entries[0].FirstParentID)) != c.EffectiveKGID || entries[0].Length != 1 { + return syserror.EPERM + } + // It's correct for this to still be UID. + if c.EffectiveKUID != ns.owner { + return syserror.EPERM + } + // "In the case of gid_map, use of the setgroups(2) system call must + // first be denied by writing "deny" to the /proc/[pid]/setgroups file + // (see below) before writing to gid_map." (This file isn't implemented + // in the version of Linux we're emulating; see comment in + // UserNamespace.) + } + if err := ns.trySetGIDMap(entries); err != nil { + ns.gidMapFromParent.RemoveAll() + ns.gidMapToParent.RemoveAll() + return err + } + return nil +} + +func (ns *UserNamespace) trySetGIDMap(entries []IDMapEntry) error { + for _, e := range entries { + lastID := e.FirstID + e.Length + if lastID <= e.FirstID { + return syserror.EINVAL + } + lastParentID := e.FirstParentID + e.Length + if lastParentID <= e.FirstParentID { + return syserror.EINVAL + } + if !ns.parent.allIDsMapped(&ns.parent.gidMapToParent, e.FirstParentID, lastParentID) { + return syserror.EPERM + } + if !ns.gidMapFromParent.Add(idMapRange{e.FirstParentID, lastParentID}, e.FirstID) { + return syserror.EINVAL + } + if !ns.gidMapToParent.Add(idMapRange{e.FirstID, lastID}, e.FirstParentID) { + return syserror.EINVAL + } + } + return nil +} + +// UIDMap returns the user ID mappings configured for ns. If no mappings +// have been configured, UIDMap returns nil. +func (ns *UserNamespace) UIDMap() []IDMapEntry { + return ns.getIDMap(&ns.uidMapToParent) +} + +// GIDMap returns the group ID mappings configured for ns. If no mappings +// have been configured, GIDMap returns nil. +func (ns *UserNamespace) GIDMap() []IDMapEntry { + return ns.getIDMap(&ns.gidMapToParent) +} + +func (ns *UserNamespace) getIDMap(m *idMapSet) []IDMapEntry { + ns.mu.Lock() + defer ns.mu.Unlock() + var entries []IDMapEntry + for it := m.FirstSegment(); it.Ok(); it = it.NextSegment() { + entries = append(entries, IDMapEntry{ + FirstID: it.Start(), + FirstParentID: it.Value(), + Length: it.Range().Length(), + }) + } + return entries +} diff --git a/pkg/sentry/kernel/auth/id_map_functions.go b/pkg/sentry/kernel/auth/id_map_functions.go new file mode 100644 index 000000000..889291d96 --- /dev/null +++ b/pkg/sentry/kernel/auth/id_map_functions.go @@ -0,0 +1,45 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package auth + +// idMapFunctions "implements" generic interface segment.Functions for +// idMapSet. An idMapSet maps non-overlapping ranges of contiguous IDs in one +// user namespace to non-overlapping ranges of contiguous IDs in another user +// namespace. Each such ID mapping is implemented as a range-to-value mapping +// in the set such that [range.Start(), range.End()) => [value, value + +// range.Length()). +type idMapFunctions struct{} + +func (idMapFunctions) MinKey() uint32 { + return 0 +} + +func (idMapFunctions) MaxKey() uint32 { + return NoID +} + +func (idMapFunctions) ClearValue(*uint32) {} + +func (idMapFunctions) Merge(r1 idMapRange, val1 uint32, r2 idMapRange, val2 uint32) (uint32, bool) { + // Mapped ranges have to be contiguous. + if val1+r1.Length() != val2 { + return 0, false + } + return val1, true +} + +func (idMapFunctions) Split(r idMapRange, val uint32, split uint32) (uint32, uint32) { + return val, val + (split - r.Start) +} diff --git a/pkg/sentry/kernel/auth/user_namespace.go b/pkg/sentry/kernel/auth/user_namespace.go new file mode 100644 index 000000000..0980aeadf --- /dev/null +++ b/pkg/sentry/kernel/auth/user_namespace.go @@ -0,0 +1,130 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package auth + +import ( + "math" + "sync" + + "gvisor.googlesource.com/gvisor/pkg/syserror" +) + +// A UserNamespace represents a user namespace. See user_namespaces(7) for +// details. +type UserNamespace struct { + // parent is this namespace's parent. If this is the root namespace, parent + // is nil. The parent pointer is immutable. + parent *UserNamespace + + // owner is the effective UID of the namespace's creator in the root + // namespace. owner is immutable. + owner KUID + + // mu protects the following fields. + // + // If mu will be locked in multiple UserNamespaces, it must be locked in + // descendant namespaces before ancestors. + mu sync.Mutex `state:"nosave"` + + // Mappings of user/group IDs between this namespace and its parent. + // + // All ID maps, once set, cannot be changed. This means that successful + // UID/GID translations cannot be racy. + uidMapFromParent idMapSet + uidMapToParent idMapSet + gidMapFromParent idMapSet + gidMapToParent idMapSet + + // TODO: Consider supporting disabling setgroups(2), which "was + // added in Linux 3.19, but was backported to many earlier stable kernel + // series, because it addresses a security issue" - user_namespaces(7). (It + // was not backported to 3.11.10, which we are currently imitating.) +} + +// NewRootUserNamespace returns a UserNamespace that is appropriate for a +// system's root user namespace. +func NewRootUserNamespace() *UserNamespace { + var ns UserNamespace + // """ + // The initial user namespace has no parent namespace, but, for + // consistency, the kernel provides dummy user and group ID mapping files + // for this namespace. Looking at the uid_map file (gid_map is the same) + // from a shell in the initial namespace shows: + // + // $ cat /proc/$$/uid_map + // 0 0 4294967295 + // """ - user_namespaces(7) + for _, m := range []*idMapSet{ + &ns.uidMapFromParent, + &ns.uidMapToParent, + &ns.gidMapFromParent, + &ns.gidMapToParent, + } { + if !m.Add(idMapRange{0, math.MaxUint32}, 0) { + panic("Failed to insert into empty ID map") + } + } + return &ns +} + +// Root returns the root of the user namespace tree containing ns. +func (ns *UserNamespace) Root() *UserNamespace { + for ns.parent != nil { + ns = ns.parent + } + return ns +} + +// "The kernel imposes (since version 3.11) a limit of 32 nested levels of user +// namespaces." - user_namespaces(7) +const maxUserNamespaceDepth = 32 + +func (ns *UserNamespace) depth() int { + var i int + for ns != nil { + i++ + ns = ns.parent + } + return i +} + +// NewChildUserNamespace returns a new user namespace created by a caller with +// credentials c. +func (c *Credentials) NewChildUserNamespace() (*UserNamespace, error) { + if c.UserNamespace.depth() >= maxUserNamespaceDepth { + // "... Calls to unshare(2) or clone(2) that would cause this limit to + // be exceeded fail with the error EUSERS." - user_namespaces(7) + return nil, syserror.EUSERS + } + // "EPERM: CLONE_NEWUSER was specified in flags, but either the effective + // user ID or the effective group ID of the caller does not have a mapping + // in the parent namespace (see user_namespaces(7))." - clone(2) + // "CLONE_NEWUSER requires that the user ID and group ID of the calling + // process are mapped to user IDs and group IDs in the user namespace of + // the calling process at the time of the call." - unshare(2) + if !c.EffectiveKUID.In(c.UserNamespace).Ok() { + return nil, syserror.EPERM + } + if !c.EffectiveKGID.In(c.UserNamespace).Ok() { + return nil, syserror.EPERM + } + return &UserNamespace{ + parent: c.UserNamespace, + owner: c.EffectiveKUID, + // "When a user namespace is created, it starts without a mapping of + // user IDs (group IDs) to the parent user namespace." - + // user_namespaces(7) + }, nil +} |