diff options
author | Rahat Mahmood <rahat@google.com> | 2018-11-08 11:08:41 -0800 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-11-08 11:09:46 -0800 |
commit | 5a0be6fa203273d1e4ab06a206eaffeca5724533 (patch) | |
tree | f73a10689d42ca3b3676cd438dfd569ec1e7b745 /pkg | |
parent | 90e81b2e5c665b9fc149f97dcf15142c190260c6 (diff) |
Create stubs for syscalls upto Linux 4.4.
Create syscall stubs for missing syscalls upto Linux 4.4 and advertise
a kernel version of 4.4.
PiperOrigin-RevId: 220667680
Change-Id: Idbdccde538faabf16debc22f492dd053a8af0ba7
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/sentry/fs/inode.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/README.md | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/fds.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/task.go | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/auth/user_namespace.go | 5 | ||||
-rw-r--r-- | pkg/sentry/kernel/version.go | 2 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/linux64.go | 21 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_file.go | 3 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_thread.go | 4 |
9 files changed, 24 insertions, 25 deletions
diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index 95769ccf8..38b140bd2 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -439,10 +439,7 @@ func (i *Inode) CheckOwnership(ctx context.Context) bool { // CheckCapability checks whether `ctx` has capability `cp` with respect to // operations on this Inode. // -// Compare Linux's kernel/capability.c:capable_wrt_inode_uidgid(). Note that -// this function didn't exist in Linux 3.11.10, but was added by upstream -// 23adbe12ef7d "fs,userns: Change inode_capable to capable_wrt_inode_uidgid" -// to fix local privilege escalation CVE-2014-4014. +// Compare Linux's kernel/capability.c:capable_wrt_inode_uidgid(). func (i *Inode) CheckCapability(ctx context.Context, cp linux.Capability) bool { uattr, err := i.UnstableAttr(ctx) if err != nil { diff --git a/pkg/sentry/fs/proc/README.md b/pkg/sentry/fs/proc/README.md index e1ed88512..686d40f0c 100644 --- a/pkg/sentry/fs/proc/README.md +++ b/pkg/sentry/fs/proc/README.md @@ -223,7 +223,7 @@ Number of seconds idle | Always zero ```bash $ cat /proc/version -Linux version 3.11.10 #1 SMP Fri Nov 29 10:47:50 PST 2013 +Linux version 4.4 #1 SMP Sun Jan 10 15:06:54 PST 2016 ``` ## Process-specific data diff --git a/pkg/sentry/fs/proc/fds.go b/pkg/sentry/fs/proc/fds.go index 5ebb33703..5acbce75e 100644 --- a/pkg/sentry/fs/proc/fds.go +++ b/pkg/sentry/fs/proc/fds.go @@ -173,11 +173,6 @@ func (f *fdDir) Check(ctx context.Context, inode *fs.Inode, req fs.PermMask) boo if t := kernel.TaskFromContext(ctx); t != nil { // Allow access if the task trying to access it is in the // thread group corresponding to this directory. - // - // N.B. Technically, in Linux 3.11, this compares what would be - // the equivalent of task pointers. However, this was fixed - // later in 54708d2858e7 ("proc: actually make - // proc_fd_permission() thread-friendly"). if f.t.ThreadGroup() == t.ThreadGroup() { return true } diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 404faea0a..9f13ff91c 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -76,7 +76,7 @@ func newTaskDir(t *kernel.Task, msrc *fs.MountSource, pidns *kernel.PIDNamespace "gid_map": newGIDMap(t, msrc), // TODO: This is incorrect for /proc/[pid]/task/[tid]/io, i.e. if // showSubtasks is false: - // http://lxr.free-electrons.com/source/fs/proc/base.c?v=3.11#L2980 + // https://elixir.bootlin.com/linux/v4.4/source/fs/proc/base.c#L3154 "io": newIO(t, msrc), "maps": newMaps(t, msrc), "mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), diff --git a/pkg/sentry/kernel/auth/user_namespace.go b/pkg/sentry/kernel/auth/user_namespace.go index 5bb9c44c0..30957bb9a 100644 --- a/pkg/sentry/kernel/auth/user_namespace.go +++ b/pkg/sentry/kernel/auth/user_namespace.go @@ -49,10 +49,7 @@ type UserNamespace struct { gidMapFromParent idMapSet gidMapToParent idMapSet - // TODO: Consider supporting disabling setgroups(2), which "was - // added in Linux 3.19, but was backported to many earlier stable kernel - // series, because it addresses a security issue" - user_namespaces(7). (It - // was not backported to 3.11.10, which we are currently imitating.) + // TODO: Support disabling setgroups(2). } // NewRootUserNamespace returns a UserNamespace that is appropriate for a diff --git a/pkg/sentry/kernel/version.go b/pkg/sentry/kernel/version.go index 72bb0f93c..8d2f14209 100644 --- a/pkg/sentry/kernel/version.go +++ b/pkg/sentry/kernel/version.go @@ -19,7 +19,7 @@ type Version struct { // Operating system name (e.g. "Linux"). Sysname string - // Operating system release (e.g. "3.11.10-amd64"). + // Operating system release (e.g. "4.4-amd64"). Release string // Operating system version. On Linux this takes the shape diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index 11bf81f88..13084c0ef 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -32,15 +32,19 @@ import ( const _AUDIT_ARCH_X86_64 = 0xc000003e // AMD64 is a table of Linux amd64 syscall API with the corresponding syscall -// numbers from Linux 3.11. The entries commented out are those syscalls we +// numbers from Linux 4.4. The entries commented out are those syscalls we // don't currently support. var AMD64 = &kernel.SyscallTable{ OS: abi.Linux, Arch: arch.AMD64, Version: kernel.Version{ + // Version 4.4 is chosen as a stable, longterm version of Linux, which + // guides the interface provided by this syscall table. The build + // version is that for a clean build with default kernel config, at 5 + // minutes after v4.4 was tagged. Sysname: "Linux", - Release: "3.11.10", - Version: "#1 SMP Fri Nov 29 10:47:50 PST 2013", + Release: "4.4", + Version: "#1 SMP Sun Jan 10 15:06:54 PST 2016", }, AuditNumber: _AUDIT_ARCH_X86_64, Table: map[uintptr]kernel.SyscallFn{ @@ -358,9 +362,18 @@ var AMD64 = &kernel.SyscallTable{ // 311: ProcessVmWritev, TODO may require cap_sys_ptrace 312: syscalls.CapError(linux.CAP_SYS_PTRACE), // Kcmp, requires cap_sys_ptrace 313: syscalls.CapError(linux.CAP_SYS_MODULE), // FinitModule, requires cap_sys_module - // "Backports." + // 314: SchedSetattr, TODO, we have no scheduler + // 315: SchedGetattr, TODO, we have no scheduler + // 316: Renameat2, TODO 317: Seccomp, 318: GetRandom, + // 319: MemfdCreate, TODO + 320: syscalls.CapError(linux.CAP_SYS_BOOT), // KexecFileLoad, infeasible to support + 321: syscalls.CapError(linux.CAP_SYS_ADMIN), // Bpf, requires cap_sys_admin for all commands + // 322: Execveat, TODO + // 323: Userfaultfd, TODO + // 324: Membarrier, TODO + 325: syscalls.Error(nil), // Mlock2, TODO }, Emulate: map[usermem.Addr]uintptr{ diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index a70f35be0..89d21dd98 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -1140,9 +1140,6 @@ func mayLinkAt(t *kernel.Task, target *fs.Inode) error { // always enabled, and thus imposes the following restrictions on hard // links. - // Technically Linux is more restrictive in 3.11.10 (requires CAP_FOWNER in - // root user namespace); this is from the later f2ca379642d7 "namei: permit - // linking with CAP_FOWNER in userns". if target.CheckOwnership(t) { // fs/namei.c:may_linkat: "Source inode owner (or CAP_FOWNER) // can hardlink all they like." diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index 820ca680e..9eed613a1 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -159,8 +159,8 @@ func clone(t *kernel.Task, flags int, stack usermem.Addr, parentTID usermem.Addr } // Clone implements linux syscall clone(2). -// sys_clone has so many flavors. We implement the default one in the -// current linux 3.11 x86_64: +// sys_clone has so many flavors. We implement the default one in linux 3.11 +// x86_64: // sys_clone(clone_flags, newsp, parent_tidptr, child_tidptr, tls_val) func Clone(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { flags := int(args[0].Int()) |