summaryrefslogtreecommitdiffhomepage
path: root/pkg/abi
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/abi')
-rw-r--r--pkg/abi/BUILD13
-rw-r--r--pkg/abi/abi.go45
-rw-r--r--pkg/abi/abi_linux.go20
-rw-r--r--pkg/abi/flag.go85
-rw-r--r--pkg/abi/linux/BUILD85
-rw-r--r--pkg/abi/linux/aio.go20
-rw-r--r--pkg/abi/linux/arch_amd64.go23
-rw-r--r--pkg/abi/linux/audit.go23
-rw-r--r--pkg/abi/linux/bpf.go34
-rw-r--r--pkg/abi/linux/capability.go190
-rw-r--r--pkg/abi/linux/clone.go41
-rw-r--r--pkg/abi/linux/dev.go58
-rw-r--r--pkg/abi/linux/elf.go108
-rw-r--r--pkg/abi/linux/epoll.go62
-rw-r--r--pkg/abi/linux/epoll_amd64.go29
-rw-r--r--pkg/abi/linux/epoll_arm64.go28
-rw-r--r--pkg/abi/linux/errors.go172
-rw-r--r--pkg/abi/linux/eventfd.go22
-rw-r--r--pkg/abi/linux/exec.go18
-rw-r--r--pkg/abi/linux/fcntl.go69
-rw-r--r--pkg/abi/linux/file.go383
-rw-r--r--pkg/abi/linux/file_amd64.go46
-rw-r--r--pkg/abi/linux/file_arm64.go47
-rw-r--r--pkg/abi/linux/fs.go103
-rw-r--r--pkg/abi/linux/futex.go62
-rw-r--r--pkg/abi/linux/inotify.go97
-rw-r--r--pkg/abi/linux/ioctl.go100
-rw-r--r--pkg/abi/linux/ioctl_tun.go29
-rw-r--r--pkg/abi/linux/ip.go151
-rw-r--r--pkg/abi/linux/ipc.go53
-rw-r--r--pkg/abi/linux/limits.go88
-rw-r--r--pkg/abi/linux/linux.go39
-rw-r--r--pkg/abi/linux/mm.go130
-rw-r--r--pkg/abi/linux/netdevice.go86
-rw-r--r--pkg/abi/linux/netfilter.go552
-rw-r--r--pkg/abi/linux/netfilter_test.go46
-rw-r--r--pkg/abi/linux/netlink.go130
-rw-r--r--pkg/abi/linux/netlink_route.go346
-rw-r--r--pkg/abi/linux/poll.go42
-rw-r--r--pkg/abi/linux/prctl.go164
-rw-r--r--pkg/abi/linux/ptrace.go89
-rw-r--r--pkg/abi/linux/ptrace_amd64.go52
-rw-r--r--pkg/abi/linux/ptrace_arm64.go29
-rw-r--r--pkg/abi/linux/rseq.go130
-rw-r--r--pkg/abi/linux/rusage.go46
-rw-r--r--pkg/abi/linux/sched.go36
-rw-r--r--pkg/abi/linux/seccomp.go72
-rw-r--r--pkg/abi/linux/sem.go52
-rw-r--r--pkg/abi/linux/shm.go86
-rw-r--r--pkg/abi/linux/signal.go234
-rw-r--r--pkg/abi/linux/signalfd.go45
-rw-r--r--pkg/abi/linux/socket.go456
-rw-r--r--pkg/abi/linux/splice.go23
-rw-r--r--pkg/abi/linux/tcp.go60
-rw-r--r--pkg/abi/linux/time.go270
-rw-r--r--pkg/abi/linux/timer.go23
-rw-r--r--pkg/abi/linux/tty.go344
-rw-r--r--pkg/abi/linux/uio.go18
-rw-r--r--pkg/abi/linux/utsname.go49
-rw-r--r--pkg/abi/linux/wait.go36
-rw-r--r--pkg/abi/linux/xattr.go28
61 files changed, 6017 insertions, 0 deletions
diff --git a/pkg/abi/BUILD b/pkg/abi/BUILD
new file mode 100644
index 000000000..839f822eb
--- /dev/null
+++ b/pkg/abi/BUILD
@@ -0,0 +1,13 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "abi",
+ srcs = [
+ "abi.go",
+ "abi_linux.go",
+ "flag.go",
+ ],
+ visibility = ["//:sandbox"],
+)
diff --git a/pkg/abi/abi.go b/pkg/abi/abi.go
new file mode 100644
index 000000000..e6be93c3a
--- /dev/null
+++ b/pkg/abi/abi.go
@@ -0,0 +1,45 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package abi describes the interface between a kernel and userspace.
+package abi
+
+import (
+ "fmt"
+)
+
+// OS describes the target operating system for an ABI.
+//
+// Note that OS is architecture-independent. The details of the OS ABI will
+// vary between architectures.
+type OS int
+
+const (
+ // Linux is the Linux ABI.
+ Linux OS = iota
+)
+
+// String implements fmt.Stringer.
+func (o OS) String() string {
+ switch o {
+ case Linux:
+ return "linux"
+ default:
+ return fmt.Sprintf("OS(%d)", o)
+ }
+}
+
+// ABI is an interface that defines OS-specific interactions.
+type ABI interface {
+}
diff --git a/pkg/abi/abi_linux.go b/pkg/abi/abi_linux.go
new file mode 100644
index 000000000..3059479bd
--- /dev/null
+++ b/pkg/abi/abi_linux.go
@@ -0,0 +1,20 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build linux
+
+package abi
+
+// Host specifies the host ABI.
+const Host = Linux
diff --git a/pkg/abi/flag.go b/pkg/abi/flag.go
new file mode 100644
index 000000000..dcdd66d4e
--- /dev/null
+++ b/pkg/abi/flag.go
@@ -0,0 +1,85 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package abi
+
+import (
+ "fmt"
+ "math"
+ "strconv"
+ "strings"
+)
+
+// A FlagSet is a slice of bit-flags and their name.
+type FlagSet []struct {
+ Flag uint64
+ Name string
+}
+
+// Parse returns a pretty version of val, using the flag names for known flags.
+// Unknown flags remain numeric.
+func (s FlagSet) Parse(val uint64) string {
+ var flags []string
+
+ for _, f := range s {
+ if val&f.Flag == f.Flag {
+ flags = append(flags, f.Name)
+ val &^= f.Flag
+ }
+ }
+
+ if val != 0 {
+ flags = append(flags, "0x"+strconv.FormatUint(val, 16))
+ }
+
+ if len(flags) == 0 {
+ // Prefer 0 to an empty string.
+ return "0x0"
+ }
+
+ return strings.Join(flags, "|")
+}
+
+// ValueSet is a map of syscall values to their name. Parse will use the name
+// or the value if unknown.
+type ValueSet map[uint64]string
+
+// Parse returns the name of the value associated with `val`. Unknown values
+// are converted to hex.
+func (s ValueSet) Parse(val uint64) string {
+ if v, ok := s[val]; ok {
+ return v
+ }
+ return fmt.Sprintf("%#x", val)
+}
+
+// ParseDecimal returns the name of the value associated with `val`. Unknown
+// values are converted to decimal.
+func (s ValueSet) ParseDecimal(val uint64) string {
+ if v, ok := s[val]; ok {
+ return v
+ }
+ return fmt.Sprintf("%d", val)
+}
+
+// ParseName returns the flag value associated with 'name'. Returns false
+// if no value is found.
+func (s ValueSet) ParseName(name string) (uint64, bool) {
+ for k, v := range s {
+ if v == name {
+ return k, true
+ }
+ }
+ return math.MaxUint64, false
+}
diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD
new file mode 100644
index 000000000..114b516e2
--- /dev/null
+++ b/pkg/abi/linux/BUILD
@@ -0,0 +1,85 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+# Package linux contains the constants and types needed to interface with a
+# Linux kernel. It should be used instead of syscall or golang.org/x/sys/unix
+# when the host OS may not be Linux.
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "linux",
+ srcs = [
+ "aio.go",
+ "arch_amd64.go",
+ "audit.go",
+ "bpf.go",
+ "capability.go",
+ "clone.go",
+ "dev.go",
+ "elf.go",
+ "epoll.go",
+ "epoll_amd64.go",
+ "epoll_arm64.go",
+ "errors.go",
+ "eventfd.go",
+ "exec.go",
+ "fcntl.go",
+ "file.go",
+ "file_amd64.go",
+ "file_arm64.go",
+ "fs.go",
+ "futex.go",
+ "inotify.go",
+ "ioctl.go",
+ "ioctl_tun.go",
+ "ip.go",
+ "ipc.go",
+ "limits.go",
+ "linux.go",
+ "mm.go",
+ "netdevice.go",
+ "netfilter.go",
+ "netlink.go",
+ "netlink_route.go",
+ "poll.go",
+ "prctl.go",
+ "ptrace.go",
+ "ptrace_amd64.go",
+ "ptrace_arm64.go",
+ "rseq.go",
+ "rusage.go",
+ "sched.go",
+ "seccomp.go",
+ "sem.go",
+ "shm.go",
+ "signal.go",
+ "signalfd.go",
+ "socket.go",
+ "splice.go",
+ "tcp.go",
+ "time.go",
+ "timer.go",
+ "tty.go",
+ "uio.go",
+ "utsname.go",
+ "wait.go",
+ "xattr.go",
+ ],
+ marshal = True,
+ visibility = ["//visibility:public"],
+ deps = [
+ "//pkg/abi",
+ "//pkg/binary",
+ "//pkg/bits",
+ ],
+)
+
+go_test(
+ name = "linux_test",
+ size = "small",
+ srcs = ["netfilter_test.go"],
+ library = ":linux",
+ deps = [
+ "//pkg/binary",
+ ],
+)
diff --git a/pkg/abi/linux/aio.go b/pkg/abi/linux/aio.go
new file mode 100644
index 000000000..3c6e0079d
--- /dev/null
+++ b/pkg/abi/linux/aio.go
@@ -0,0 +1,20 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+const (
+ // AIORingSize is sizeof(struct aio_ring).
+ AIORingSize = 32
+)
diff --git a/pkg/abi/linux/arch_amd64.go b/pkg/abi/linux/arch_amd64.go
new file mode 100644
index 000000000..0be31e755
--- /dev/null
+++ b/pkg/abi/linux/arch_amd64.go
@@ -0,0 +1,23 @@
+// Copyright 2020 The gVisor Authors.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package linux
+
+// Start and end addresses of the vsyscall page.
+const (
+ VSyscallStartAddr uint64 = 0xffffffffff600000
+ VSyscallEndAddr uint64 = 0xffffffffff601000
+)
diff --git a/pkg/abi/linux/audit.go b/pkg/abi/linux/audit.go
new file mode 100644
index 000000000..6cca69af9
--- /dev/null
+++ b/pkg/abi/linux/audit.go
@@ -0,0 +1,23 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Audit numbers identify different system call APIs, from <uapi/linux/audit.h>
+const (
+ // AUDIT_ARCH_X86_64 identifies AMD64.
+ AUDIT_ARCH_X86_64 = 0xc000003e
+ // AUDIT_ARCH_AARCH64 identifies ARM64.
+ AUDIT_ARCH_AARCH64 = 0xc00000b7
+)
diff --git a/pkg/abi/linux/bpf.go b/pkg/abi/linux/bpf.go
new file mode 100644
index 000000000..aa3d3ce70
--- /dev/null
+++ b/pkg/abi/linux/bpf.go
@@ -0,0 +1,34 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// BPFInstruction is a raw BPF virtual machine instruction.
+//
+// +stateify savable
+type BPFInstruction struct {
+ // OpCode is the operation to execute.
+ OpCode uint16
+
+ // JumpIfTrue is the number of instructions to skip if OpCode is a
+ // conditional instruction and the condition is true.
+ JumpIfTrue uint8
+
+ // JumpIfFalse is the number of instructions to skip if OpCode is a
+ // conditional instruction and the condition is false.
+ JumpIfFalse uint8
+
+ // K is a constant parameter. The meaning depends on the value of OpCode.
+ K uint32
+}
diff --git a/pkg/abi/linux/capability.go b/pkg/abi/linux/capability.go
new file mode 100644
index 000000000..965f74663
--- /dev/null
+++ b/pkg/abi/linux/capability.go
@@ -0,0 +1,190 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// A Capability represents the ability to perform a privileged operation.
+type Capability int
+
+// Capabilities defined by Linux. Taken from the kernel's
+// include/uapi/linux/capability.h. See capabilities(7) or that file for more
+// detailed capability descriptions.
+const (
+ CAP_CHOWN = Capability(0)
+ CAP_DAC_OVERRIDE = Capability(1)
+ CAP_DAC_READ_SEARCH = Capability(2)
+ CAP_FOWNER = Capability(3)
+ CAP_FSETID = Capability(4)
+ CAP_KILL = Capability(5)
+ CAP_SETGID = Capability(6)
+ CAP_SETUID = Capability(7)
+ CAP_SETPCAP = Capability(8)
+ CAP_LINUX_IMMUTABLE = Capability(9)
+ CAP_NET_BIND_SERVICE = Capability(10)
+ CAP_NET_BROADCAST = Capability(11)
+ CAP_NET_ADMIN = Capability(12)
+ CAP_NET_RAW = Capability(13)
+ CAP_IPC_LOCK = Capability(14)
+ CAP_IPC_OWNER = Capability(15)
+ CAP_SYS_MODULE = Capability(16)
+ CAP_SYS_RAWIO = Capability(17)
+ CAP_SYS_CHROOT = Capability(18)
+ CAP_SYS_PTRACE = Capability(19)
+ CAP_SYS_PACCT = Capability(20)
+ CAP_SYS_ADMIN = Capability(21)
+ CAP_SYS_BOOT = Capability(22)
+ CAP_SYS_NICE = Capability(23)
+ CAP_SYS_RESOURCE = Capability(24)
+ CAP_SYS_TIME = Capability(25)
+ CAP_SYS_TTY_CONFIG = Capability(26)
+ CAP_MKNOD = Capability(27)
+ CAP_LEASE = Capability(28)
+ CAP_AUDIT_WRITE = Capability(29)
+ CAP_AUDIT_CONTROL = Capability(30)
+ CAP_SETFCAP = Capability(31)
+ CAP_MAC_OVERRIDE = Capability(32)
+ CAP_MAC_ADMIN = Capability(33)
+ CAP_SYSLOG = Capability(34)
+ CAP_WAKE_ALARM = Capability(35)
+ CAP_BLOCK_SUSPEND = Capability(36)
+ CAP_AUDIT_READ = Capability(37)
+
+ // CAP_LAST_CAP is the highest-numbered capability.
+ // Seach for "CAP_LAST_CAP" to find other places that need to change.
+ CAP_LAST_CAP = CAP_AUDIT_READ
+)
+
+// Ok returns true if cp is a supported capability.
+func (cp Capability) Ok() bool {
+ return cp >= 0 && cp <= CAP_LAST_CAP
+}
+
+// String returns the capability name.
+func (cp Capability) String() string {
+ switch cp {
+ case CAP_CHOWN:
+ return "CAP_CHOWN"
+ case CAP_DAC_OVERRIDE:
+ return "CAP_DAC_OVERRIDE"
+ case CAP_DAC_READ_SEARCH:
+ return "CAP_DAC_READ_SEARCH"
+ case CAP_FOWNER:
+ return "CAP_FOWNER"
+ case CAP_FSETID:
+ return "CAP_FSETID"
+ case CAP_KILL:
+ return "CAP_KILL"
+ case CAP_SETGID:
+ return "CAP_SETGID"
+ case CAP_SETUID:
+ return "CAP_SETUID"
+ case CAP_SETPCAP:
+ return "CAP_SETPCAP"
+ case CAP_LINUX_IMMUTABLE:
+ return "CAP_LINUX_IMMUTABLE"
+ case CAP_NET_BIND_SERVICE:
+ return "CAP_NET_BIND_SERVICE"
+ case CAP_NET_BROADCAST:
+ return "CAP_NET_BROADCAST"
+ case CAP_NET_ADMIN:
+ return "CAP_NET_ADMIN"
+ case CAP_NET_RAW:
+ return "CAP_NET_RAW"
+ case CAP_IPC_LOCK:
+ return "CAP_IPC_LOCK"
+ case CAP_IPC_OWNER:
+ return "CAP_IPC_OWNER"
+ case CAP_SYS_MODULE:
+ return "CAP_SYS_MODULE"
+ case CAP_SYS_RAWIO:
+ return "CAP_SYS_RAWIO"
+ case CAP_SYS_CHROOT:
+ return "CAP_SYS_CHROOT"
+ case CAP_SYS_PTRACE:
+ return "CAP_SYS_PTRACE"
+ case CAP_SYS_PACCT:
+ return "CAP_SYS_PACCT"
+ case CAP_SYS_ADMIN:
+ return "CAP_SYS_ADMIN"
+ case CAP_SYS_BOOT:
+ return "CAP_SYS_BOOT"
+ case CAP_SYS_NICE:
+ return "CAP_SYS_NICE"
+ case CAP_SYS_RESOURCE:
+ return "CAP_SYS_RESOURCE"
+ case CAP_SYS_TIME:
+ return "CAP_SYS_TIME"
+ case CAP_SYS_TTY_CONFIG:
+ return "CAP_SYS_TTY_CONFIG"
+ case CAP_MKNOD:
+ return "CAP_MKNOD"
+ case CAP_LEASE:
+ return "CAP_LEASE"
+ case CAP_AUDIT_WRITE:
+ return "CAP_AUDIT_WRITE"
+ case CAP_AUDIT_CONTROL:
+ return "CAP_AUDIT_CONTROL"
+ case CAP_SETFCAP:
+ return "CAP_SETFCAP"
+ case CAP_MAC_OVERRIDE:
+ return "CAP_MAC_OVERRIDE"
+ case CAP_MAC_ADMIN:
+ return "CAP_MAC_ADMIN"
+ case CAP_SYSLOG:
+ return "CAP_SYSLOG"
+ case CAP_WAKE_ALARM:
+ return "CAP_WAKE_ALARM"
+ case CAP_BLOCK_SUSPEND:
+ return "CAP_BLOCK_SUSPEND"
+ case CAP_AUDIT_READ:
+ return "CAP_AUDIT_READ"
+ default:
+ return "UNKNOWN"
+ }
+}
+
+// Version numbers used by the capget/capset syscalls, defined in Linux's
+// include/uapi/linux/capability.h.
+const (
+ // LINUX_CAPABILITY_VERSION_1 causes the data pointer to be
+ // interpreted as a pointer to a single cap_user_data_t. Since capability
+ // sets are 64 bits and the "capability sets" in cap_user_data_t are 32
+ // bits only, this causes the upper 32 bits to be implicitly 0.
+ LINUX_CAPABILITY_VERSION_1 = 0x19980330
+
+ // LINUX_CAPABILITY_VERSION_2 and LINUX_CAPABILITY_VERSION_3 cause the
+ // data pointer to be interpreted as a pointer to an array of 2
+ // cap_user_data_t, using the second to store the 32 MSB of each capability
+ // set. Versions 2 and 3 are identical, but Linux printk's a warning on use
+ // of version 2 due to a userspace API defect.
+ LINUX_CAPABILITY_VERSION_2 = 0x20071026
+ LINUX_CAPABILITY_VERSION_3 = 0x20080522
+
+ // HighestCapabilityVersion is the highest supported
+ // LINUX_CAPABILITY_VERSION_* version.
+ HighestCapabilityVersion = LINUX_CAPABILITY_VERSION_3
+)
+
+// CapUserHeader is equivalent to Linux's cap_user_header_t.
+type CapUserHeader struct {
+ Version uint32
+ Pid int32
+}
+
+// CapUserData is equivalent to Linux's cap_user_data_t.
+type CapUserData struct {
+ Effective uint32
+ Permitted uint32
+ Inheritable uint32
+}
diff --git a/pkg/abi/linux/clone.go b/pkg/abi/linux/clone.go
new file mode 100644
index 000000000..c2cbfca5e
--- /dev/null
+++ b/pkg/abi/linux/clone.go
@@ -0,0 +1,41 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Clone constants per clone(2).
+const (
+ CLONE_VM = 0x100
+ CLONE_FS = 0x200
+ CLONE_FILES = 0x400
+ CLONE_SIGHAND = 0x800
+ CLONE_PARENT = 0x8000
+ CLONE_PTRACE = 0x2000
+ CLONE_VFORK = 0x4000
+ CLONE_THREAD = 0x10000
+ CLONE_NEWNS = 0x20000
+ CLONE_SYSVSEM = 0x40000
+ CLONE_SETTLS = 0x80000
+ CLONE_PARENT_SETTID = 0x100000
+ CLONE_CHILD_CLEARTID = 0x200000
+ CLONE_DETACHED = 0x400000
+ CLONE_UNTRACED = 0x800000
+ CLONE_CHILD_SETTID = 0x1000000
+ CLONE_NEWUTS = 0x4000000
+ CLONE_NEWIPC = 0x8000000
+ CLONE_NEWUSER = 0x10000000
+ CLONE_NEWPID = 0x20000000
+ CLONE_NEWNET = 0x40000000
+ CLONE_IO = 0x80000000
+)
diff --git a/pkg/abi/linux/dev.go b/pkg/abi/linux/dev.go
new file mode 100644
index 000000000..89f9a793f
--- /dev/null
+++ b/pkg/abi/linux/dev.go
@@ -0,0 +1,58 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// MakeDeviceID encodes a major and minor device number into a single device ID.
+//
+// Format (see linux/kdev_t.h:new_encode_dev):
+//
+// Bits 7:0 - minor bits 7:0
+// Bits 19:8 - major bits 11:0
+// Bits 31:20 - minor bits 19:8
+func MakeDeviceID(major uint16, minor uint32) uint32 {
+ return (minor & 0xff) | ((uint32(major) & 0xfff) << 8) | ((minor >> 8) << 20)
+}
+
+// DecodeDeviceID decodes a device ID into major and minor device numbers.
+func DecodeDeviceID(rdev uint32) (uint16, uint32) {
+ major := uint16((rdev >> 8) & 0xfff)
+ minor := (rdev & 0xff) | ((rdev >> 20) << 8)
+ return major, minor
+}
+
+// Character device IDs.
+//
+// See Documentations/devices.txt and uapi/linux/major.h.
+const (
+ // MEM_MAJOR is the major device number for "memory" character devices.
+ MEM_MAJOR = 1
+
+ // TTYAUX_MAJOR is the major device number for alternate TTY devices.
+ TTYAUX_MAJOR = 5
+
+ // UNIX98_PTY_MASTER_MAJOR is the initial major device number for
+ // Unix98 PTY masters.
+ UNIX98_PTY_MASTER_MAJOR = 128
+
+ // UNIX98_PTY_SLAVE_MAJOR is the initial major device number for
+ // Unix98 PTY slaves.
+ UNIX98_PTY_SLAVE_MAJOR = 136
+)
+
+// Minor device numbers for TTYAUX_MAJOR.
+const (
+ // PTMX_MINOR is the minor device number for /dev/ptmx.
+ PTMX_MINOR = 2
+)
diff --git a/pkg/abi/linux/elf.go b/pkg/abi/linux/elf.go
new file mode 100644
index 000000000..7c9a02f20
--- /dev/null
+++ b/pkg/abi/linux/elf.go
@@ -0,0 +1,108 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Linux auxiliary vector entry types.
+const (
+ // AT_NULL is the end of the auxiliary vector.
+ AT_NULL = 0
+
+ // AT_IGNORE should be ignored.
+ AT_IGNORE = 1
+
+ // AT_EXECFD is the file descriptor of the program.
+ AT_EXECFD = 2
+
+ // AT_PHDR points to the program headers.
+ AT_PHDR = 3
+
+ // AT_PHENT is the size of a program header entry.
+ AT_PHENT = 4
+
+ // AT_PHNUM is the number of program headers.
+ AT_PHNUM = 5
+
+ // AT_PAGESZ is the system page size.
+ AT_PAGESZ = 6
+
+ // AT_BASE is the base address of the interpreter.
+ AT_BASE = 7
+
+ // AT_FLAGS are flags.
+ AT_FLAGS = 8
+
+ // AT_ENTRY is the program entry point.
+ AT_ENTRY = 9
+
+ // AT_NOTELF indicates that the program is not an ELF binary.
+ AT_NOTELF = 10
+
+ // AT_UID is the real UID.
+ AT_UID = 11
+
+ // AT_EUID is the effective UID.
+ AT_EUID = 12
+
+ // AT_GID is the real GID.
+ AT_GID = 13
+
+ // AT_EGID is the effective GID.
+ AT_EGID = 14
+
+ // AT_PLATFORM is a string identifying the CPU.
+ AT_PLATFORM = 15
+
+ // AT_HWCAP are arch-dependent CPU capabilities.
+ AT_HWCAP = 16
+
+ // AT_CLKTCK is the frequency used by times(2).
+ AT_CLKTCK = 17
+
+ // AT_SECURE indicate secure mode.
+ AT_SECURE = 23
+
+ // AT_BASE_PLATFORM is a string identifying the "real" platform. It may
+ // differ from AT_PLATFORM.
+ AT_BASE_PLATFORM = 24
+
+ // AT_RANDOM points to 16-bytes of random data.
+ AT_RANDOM = 25
+
+ // AT_HWCAP2 is an extension of AT_HWCAP.
+ AT_HWCAP2 = 26
+
+ // AT_EXECFN is the path used to execute the program.
+ AT_EXECFN = 31
+
+ // AT_SYSINFO_EHDR is the address of the VDSO.
+ AT_SYSINFO_EHDR = 33
+)
+
+// ELF ET_CORE and ptrace GETREGSET/SETREGSET register set types.
+//
+// See include/uapi/linux/elf.h.
+const (
+ // NT_PRSTATUS is for general purpose register.
+ NT_PRSTATUS = 0x1
+
+ // NT_PRFPREG is for float point register.
+ NT_PRFPREG = 0x2
+
+ // NT_X86_XSTATE is for x86 extended state using xsave.
+ NT_X86_XSTATE = 0x202
+
+ // NT_ARM_TLS is for ARM TLS register.
+ NT_ARM_TLS = 0x401
+)
diff --git a/pkg/abi/linux/epoll.go b/pkg/abi/linux/epoll.go
new file mode 100644
index 000000000..1121a1a92
--- /dev/null
+++ b/pkg/abi/linux/epoll.go
@@ -0,0 +1,62 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "gvisor.dev/gvisor/pkg/binary"
+)
+
+// Event masks.
+const (
+ EPOLLIN = 0x1
+ EPOLLPRI = 0x2
+ EPOLLOUT = 0x4
+ EPOLLERR = 0x8
+ EPOLLHUP = 0x10
+ EPOLLRDNORM = 0x40
+ EPOLLRDBAND = 0x80
+ EPOLLWRNORM = 0x100
+ EPOLLWRBAND = 0x200
+ EPOLLMSG = 0x400
+ EPOLLRDHUP = 0x2000
+)
+
+// Per-file descriptor flags.
+const (
+ EPOLLEXCLUSIVE = 1 << 28
+ EPOLLWAKEUP = 1 << 29
+ EPOLLONESHOT = 1 << 30
+ EPOLLET = 1 << 31
+
+ // EP_PRIVATE_BITS is fs/eventpoll.c:EP_PRIVATE_BITS, the set of all bits
+ // in an epoll event mask that correspond to flags rather than I/O events.
+ EP_PRIVATE_BITS = EPOLLEXCLUSIVE | EPOLLWAKEUP | EPOLLONESHOT | EPOLLET
+)
+
+// Operation flags.
+const (
+ EPOLL_CLOEXEC = 0x80000
+ EPOLL_NONBLOCK = 0x800
+)
+
+// Control operations.
+const (
+ EPOLL_CTL_ADD = 0x1
+ EPOLL_CTL_DEL = 0x2
+ EPOLL_CTL_MOD = 0x3
+)
+
+// SizeOfEpollEvent is the size of EpollEvent struct.
+var SizeOfEpollEvent = int(binary.Size(EpollEvent{}))
diff --git a/pkg/abi/linux/epoll_amd64.go b/pkg/abi/linux/epoll_amd64.go
new file mode 100644
index 000000000..7e74b1143
--- /dev/null
+++ b/pkg/abi/linux/epoll_amd64.go
@@ -0,0 +1,29 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package linux
+
+// EpollEvent is equivalent to struct epoll_event from epoll(2).
+//
+// +marshal slice:EpollEventSlice
+type EpollEvent struct {
+ Events uint32
+ // Linux makes struct epoll_event::data a __u64. We represent it as
+ // [2]int32 because, on amd64, Linux also makes struct epoll_event
+ // __attribute__((packed)), such that there is no padding between Events
+ // and Data.
+ Data [2]int32
+}
diff --git a/pkg/abi/linux/epoll_arm64.go b/pkg/abi/linux/epoll_arm64.go
new file mode 100644
index 000000000..a35939cc9
--- /dev/null
+++ b/pkg/abi/linux/epoll_arm64.go
@@ -0,0 +1,28 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package linux
+
+// EpollEvent is equivalent to struct epoll_event from epoll(2).
+//
+// +marshal slice:EpollEventSlice
+type EpollEvent struct {
+ Events uint32
+ // Linux makes struct epoll_event a __u64, necessitating 4 bytes of padding
+ // here.
+ _ int32
+ Data [2]int32
+}
diff --git a/pkg/abi/linux/errors.go b/pkg/abi/linux/errors.go
new file mode 100644
index 000000000..93f85a864
--- /dev/null
+++ b/pkg/abi/linux/errors.go
@@ -0,0 +1,172 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Errno represents a Linux errno value.
+type Errno struct {
+ number int
+ name string
+}
+
+// Number returns the errno number.
+func (e *Errno) Number() int {
+ return e.number
+}
+
+// String implements fmt.Stringer.String.
+func (e *Errno) String() string {
+ return e.name
+}
+
+// Errno values from include/uapi/asm-generic/errno-base.h.
+var (
+ EPERM = &Errno{1, "operation not permitted"}
+ ENOENT = &Errno{2, "no such file or directory"}
+ ESRCH = &Errno{3, "no such process"}
+ EINTR = &Errno{4, "interrupted system call"}
+ EIO = &Errno{5, "I/O error"}
+ ENXIO = &Errno{6, "no such device or address"}
+ E2BIG = &Errno{7, "argument list too long"}
+ ENOEXEC = &Errno{8, "exec format error"}
+ EBADF = &Errno{9, "bad file number"}
+ ECHILD = &Errno{10, "no child processes"}
+ EAGAIN = &Errno{11, "try again"}
+ ENOMEM = &Errno{12, "out of memory"}
+ EACCES = &Errno{13, "permission denied"}
+ EFAULT = &Errno{14, "bad address"}
+ ENOTBLK = &Errno{15, "block device required"}
+ EBUSY = &Errno{16, "device or resource busy"}
+ EEXIST = &Errno{17, "file exists"}
+ EXDEV = &Errno{18, "cross-device link"}
+ ENODEV = &Errno{19, "no such device"}
+ ENOTDIR = &Errno{20, "not a directory"}
+ EISDIR = &Errno{21, "is a directory"}
+ EINVAL = &Errno{22, "invalid argument"}
+ ENFILE = &Errno{23, "file table overflow"}
+ EMFILE = &Errno{24, "too many open files"}
+ ENOTTY = &Errno{25, "not a typewriter"}
+ ETXTBSY = &Errno{26, "text file busy"}
+ EFBIG = &Errno{27, "file too large"}
+ ENOSPC = &Errno{28, "no space left on device"}
+ ESPIPE = &Errno{29, "illegal seek"}
+ EROFS = &Errno{30, "read-only file system"}
+ EMLINK = &Errno{31, "too many links"}
+ EPIPE = &Errno{32, "broken pipe"}
+ EDOM = &Errno{33, "math argument out of domain of func"}
+ ERANGE = &Errno{34, "math result not representable"}
+)
+
+// Errno values from include/uapi/asm-generic/errno.h.
+var (
+ EDEADLK = &Errno{35, "resource deadlock would occur"}
+ ENAMETOOLONG = &Errno{36, "file name too long"}
+ ENOLCK = &Errno{37, "no record locks available"}
+ ENOSYS = &Errno{38, "invalid system call number"}
+ ENOTEMPTY = &Errno{39, "directory not empty"}
+ ELOOP = &Errno{40, "too many symbolic links encountered"}
+ EWOULDBLOCK = &Errno{EAGAIN.number, "operation would block"}
+ ENOMSG = &Errno{42, "no message of desired type"}
+ EIDRM = &Errno{43, "identifier removed"}
+ ECHRNG = &Errno{44, "channel number out of range"}
+ EL2NSYNC = &Errno{45, "level 2 not synchronized"}
+ EL3HLT = &Errno{46, "level 3 halted"}
+ EL3RST = &Errno{47, "level 3 reset"}
+ ELNRNG = &Errno{48, "link number out of range"}
+ EUNATCH = &Errno{49, "protocol driver not attached"}
+ ENOCSI = &Errno{50, "no CSI structure available"}
+ EL2HLT = &Errno{51, "level 2 halted"}
+ EBADE = &Errno{52, "invalid exchange"}
+ EBADR = &Errno{53, "invalid request descriptor"}
+ EXFULL = &Errno{54, "exchange full"}
+ ENOANO = &Errno{55, "no anode"}
+ EBADRQC = &Errno{56, "invalid request code"}
+ EBADSLT = &Errno{57, "invalid slot"}
+ EDEADLOCK = EDEADLK
+ EBFONT = &Errno{59, "bad font file format"}
+ ENOSTR = &Errno{60, "device not a stream"}
+ ENODATA = &Errno{61, "no data available"}
+ ETIME = &Errno{62, "timer expired"}
+ ENOSR = &Errno{63, "out of streams resources"}
+ ENONET = &Errno{64, "machine is not on the network"}
+ ENOPKG = &Errno{65, "package not installed"}
+ EREMOTE = &Errno{66, "object is remote"}
+ ENOLINK = &Errno{67, "link has been severed"}
+ EADV = &Errno{68, "advertise error"}
+ ESRMNT = &Errno{69, "srmount error"}
+ ECOMM = &Errno{70, "communication error on send"}
+ EPROTO = &Errno{71, "protocol error"}
+ EMULTIHOP = &Errno{72, "multihop attempted"}
+ EDOTDOT = &Errno{73, "RFS specific error"}
+ EBADMSG = &Errno{74, "not a data message"}
+ EOVERFLOW = &Errno{75, "value too large for defined data type"}
+ ENOTUNIQ = &Errno{76, "name not unique on network"}
+ EBADFD = &Errno{77, "file descriptor in bad state"}
+ EREMCHG = &Errno{78, "remote address changed"}
+ ELIBACC = &Errno{79, "can not access a needed shared library"}
+ ELIBBAD = &Errno{80, "accessing a corrupted shared library"}
+ ELIBSCN = &Errno{81, ".lib section in a.out corrupted"}
+ ELIBMAX = &Errno{82, "attempting to link in too many shared libraries"}
+ ELIBEXEC = &Errno{83, "cannot exec a shared library directly"}
+ EILSEQ = &Errno{84, "illegal byte sequence"}
+ ERESTART = &Errno{85, "interrupted system call should be restarted"}
+ ESTRPIPE = &Errno{86, "streams pipe error"}
+ EUSERS = &Errno{87, "too many users"}
+ ENOTSOCK = &Errno{88, "socket operation on non-socket"}
+ EDESTADDRREQ = &Errno{89, "destination address required"}
+ EMSGSIZE = &Errno{90, "message too long"}
+ EPROTOTYPE = &Errno{91, "protocol wrong type for socket"}
+ ENOPROTOOPT = &Errno{92, "protocol not available"}
+ EPROTONOSUPPORT = &Errno{93, "protocol not supported"}
+ ESOCKTNOSUPPORT = &Errno{94, "socket type not supported"}
+ EOPNOTSUPP = &Errno{95, "operation not supported on transport endpoint"}
+ EPFNOSUPPORT = &Errno{96, "protocol family not supported"}
+ EAFNOSUPPORT = &Errno{97, "address family not supported by protocol"}
+ EADDRINUSE = &Errno{98, "address already in use"}
+ EADDRNOTAVAIL = &Errno{99, "cannot assign requested address"}
+ ENETDOWN = &Errno{100, "network is down"}
+ ENETUNREACH = &Errno{101, "network is unreachable"}
+ ENETRESET = &Errno{102, "network dropped connection because of reset"}
+ ECONNABORTED = &Errno{103, "software caused connection abort"}
+ ECONNRESET = &Errno{104, "connection reset by peer"}
+ ENOBUFS = &Errno{105, "no buffer space available"}
+ EISCONN = &Errno{106, "transport endpoint is already connected"}
+ ENOTCONN = &Errno{107, "transport endpoint is not connected"}
+ ESHUTDOWN = &Errno{108, "cannot send after transport endpoint shutdown"}
+ ETOOMANYREFS = &Errno{109, "too many references: cannot splice"}
+ ETIMEDOUT = &Errno{110, "connection timed out"}
+ ECONNREFUSED = &Errno{111, "connection refused"}
+ EHOSTDOWN = &Errno{112, "host is down"}
+ EHOSTUNREACH = &Errno{113, "no route to host"}
+ EALREADY = &Errno{114, "operation already in progress"}
+ EINPROGRESS = &Errno{115, "operation now in progress"}
+ ESTALE = &Errno{116, "stale file handle"}
+ EUCLEAN = &Errno{117, "structure needs cleaning"}
+ ENOTNAM = &Errno{118, "not a XENIX named type file"}
+ ENAVAIL = &Errno{119, "no XENIX semaphores available"}
+ EISNAM = &Errno{120, "is a named type file"}
+ EREMOTEIO = &Errno{121, "remote I/O error"}
+ EDQUOT = &Errno{122, "quota exceeded"}
+ ENOMEDIUM = &Errno{123, "no medium found"}
+ EMEDIUMTYPE = &Errno{124, "wrong medium type"}
+ ECANCELED = &Errno{125, "operation Canceled"}
+ ENOKEY = &Errno{126, "required key not available"}
+ EKEYEXPIRED = &Errno{127, "key has expired"}
+ EKEYREVOKED = &Errno{128, "key has been revoked"}
+ EKEYREJECTED = &Errno{129, "key was rejected by service"}
+ EOWNERDEAD = &Errno{130, "owner died"}
+ ENOTRECOVERABLE = &Errno{131, "state not recoverable"}
+ ERFKILL = &Errno{132, "operation not possible due to RF-kill"}
+ EHWPOISON = &Errno{133, "memory page has hardware error"}
+)
diff --git a/pkg/abi/linux/eventfd.go b/pkg/abi/linux/eventfd.go
new file mode 100644
index 000000000..9c479fc8f
--- /dev/null
+++ b/pkg/abi/linux/eventfd.go
@@ -0,0 +1,22 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Constants for eventfd2(2).
+const (
+ EFD_SEMAPHORE = 0x1
+ EFD_CLOEXEC = O_CLOEXEC
+ EFD_NONBLOCK = O_NONBLOCK
+)
diff --git a/pkg/abi/linux/exec.go b/pkg/abi/linux/exec.go
new file mode 100644
index 000000000..579d46c41
--- /dev/null
+++ b/pkg/abi/linux/exec.go
@@ -0,0 +1,18 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// TASK_COMM_LEN is the task command name length.
+const TASK_COMM_LEN = 16
diff --git a/pkg/abi/linux/fcntl.go b/pkg/abi/linux/fcntl.go
new file mode 100644
index 000000000..6663a199c
--- /dev/null
+++ b/pkg/abi/linux/fcntl.go
@@ -0,0 +1,69 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Commands from linux/fcntl.h.
+const (
+ F_DUPFD = 0
+ F_GETFD = 1
+ F_SETFD = 2
+ F_GETFL = 3
+ F_SETFL = 4
+ F_SETLK = 6
+ F_SETLKW = 7
+ F_SETOWN = 8
+ F_GETOWN = 9
+ F_SETOWN_EX = 15
+ F_GETOWN_EX = 16
+ F_DUPFD_CLOEXEC = 1024 + 6
+ F_SETPIPE_SZ = 1024 + 7
+ F_GETPIPE_SZ = 1024 + 8
+)
+
+// Commands for F_SETLK.
+const (
+ F_RDLCK = 0
+ F_WRLCK = 1
+ F_UNLCK = 2
+)
+
+// Flags for fcntl.
+const (
+ FD_CLOEXEC = 00000001
+)
+
+// Flock is the lock structure for F_SETLK.
+type Flock struct {
+ Type int16
+ Whence int16
+ _ [4]byte
+ Start int64
+ Len int64
+ Pid int32
+ _ [4]byte
+}
+
+// Flags for F_SETOWN_EX and F_GETOWN_EX.
+const (
+ F_OWNER_TID = 0
+ F_OWNER_PID = 1
+ F_OWNER_PGRP = 2
+)
+
+// FOwnerEx is the owner structure for F_SETOWN_EX and F_GETOWN_EX.
+type FOwnerEx struct {
+ Type int32
+ PID int32
+}
diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go
new file mode 100644
index 000000000..055ac1d7c
--- /dev/null
+++ b/pkg/abi/linux/file.go
@@ -0,0 +1,383 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "fmt"
+ "strings"
+
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/binary"
+)
+
+// Constants for open(2).
+const (
+ O_ACCMODE = 000000003
+ O_RDONLY = 000000000
+ O_WRONLY = 000000001
+ O_RDWR = 000000002
+ O_CREAT = 000000100
+ O_EXCL = 000000200
+ O_NOCTTY = 000000400
+ O_TRUNC = 000001000
+ O_APPEND = 000002000
+ O_NONBLOCK = 000004000
+ O_DSYNC = 000010000
+ O_ASYNC = 000020000
+ O_NOATIME = 001000000
+ O_CLOEXEC = 002000000
+ O_SYNC = 004000000 // __O_SYNC in Linux
+ O_PATH = 010000000
+ O_TMPFILE = 020000000 // __O_TMPFILE in Linux
+)
+
+// Constants for fstatat(2).
+const (
+ AT_SYMLINK_NOFOLLOW = 0x100
+)
+
+// Constants for mount(2).
+const (
+ MS_RDONLY = 0x1
+ MS_NOSUID = 0x2
+ MS_NODEV = 0x4
+ MS_NOEXEC = 0x8
+ MS_SYNCHRONOUS = 0x10
+ MS_REMOUNT = 0x20
+ MS_MANDLOCK = 0x40
+ MS_DIRSYNC = 0x80
+ MS_NOATIME = 0x400
+ MS_NODIRATIME = 0x800
+ MS_BIND = 0x1000
+ MS_MOVE = 0x2000
+ MS_REC = 0x4000
+
+ MS_POSIXACL = 0x10000
+ MS_UNBINDABLE = 0x20000
+ MS_PRIVATE = 0x40000
+ MS_SLAVE = 0x80000
+ MS_SHARED = 0x100000
+ MS_RELATIME = 0x200000
+ MS_KERNMOUNT = 0x400000
+ MS_I_VERSION = 0x800000
+ MS_STRICTATIME = 0x1000000
+
+ MS_MGC_VAL = 0xC0ED0000
+ MS_MGC_MSK = 0xffff0000
+)
+
+// Constants for umount2(2).
+const (
+ MNT_FORCE = 0x1
+ MNT_DETACH = 0x2
+ MNT_EXPIRE = 0x4
+ UMOUNT_NOFOLLOW = 0x8
+)
+
+// Constants for unlinkat(2).
+const (
+ AT_REMOVEDIR = 0x200
+)
+
+// Constants for linkat(2) and fchownat(2).
+const (
+ AT_SYMLINK_FOLLOW = 0x400
+ AT_EMPTY_PATH = 0x1000
+)
+
+// Constants for all file-related ...at(2) syscalls.
+const (
+ AT_FDCWD = -100
+)
+
+// Special values for the ns field in utimensat(2).
+const (
+ UTIME_NOW = ((1 << 30) - 1)
+ UTIME_OMIT = ((1 << 30) - 2)
+)
+
+// MaxSymlinkTraversals is the maximum number of links that will be followed by
+// the kernel to resolve a symlink.
+const MaxSymlinkTraversals = 40
+
+// Constants for flock(2).
+const (
+ LOCK_SH = 1 // shared lock
+ LOCK_EX = 2 // exclusive lock
+ LOCK_NB = 4 // or'd with one of the above to prevent blocking
+ LOCK_UN = 8 // remove lock
+)
+
+// Values for mode_t.
+const (
+ S_IFMT = 0170000
+ S_IFSOCK = 0140000
+ S_IFLNK = 0120000
+ S_IFREG = 0100000
+ S_IFBLK = 060000
+ S_IFDIR = 040000
+ S_IFCHR = 020000
+ S_IFIFO = 010000
+
+ FileTypeMask = S_IFMT
+ ModeSocket = S_IFSOCK
+ ModeSymlink = S_IFLNK
+ ModeRegular = S_IFREG
+ ModeBlockDevice = S_IFBLK
+ ModeDirectory = S_IFDIR
+ ModeCharacterDevice = S_IFCHR
+ ModeNamedPipe = S_IFIFO
+
+ S_ISUID = 04000
+ S_ISGID = 02000
+ S_ISVTX = 01000
+
+ ModeSetUID = S_ISUID
+ ModeSetGID = S_ISGID
+ ModeSticky = S_ISVTX
+
+ ModeUserAll = 0700
+ ModeUserRead = 0400
+ ModeUserWrite = 0200
+ ModeUserExec = 0100
+ ModeGroupAll = 0070
+ ModeGroupRead = 0040
+ ModeGroupWrite = 0020
+ ModeGroupExec = 0010
+ ModeOtherAll = 0007
+ ModeOtherRead = 0004
+ ModeOtherWrite = 0002
+ ModeOtherExec = 0001
+ PermissionsMask = 0777
+)
+
+// Values for linux_dirent64.d_type.
+const (
+ DT_UNKNOWN = 0
+ DT_FIFO = 1
+ DT_CHR = 2
+ DT_DIR = 4
+ DT_BLK = 6
+ DT_REG = 8
+ DT_LNK = 10
+ DT_SOCK = 12
+ DT_WHT = 14
+)
+
+// DirentType are the friendly strings for linux_dirent64.d_type.
+var DirentType = abi.ValueSet{
+ DT_UNKNOWN: "DT_UNKNOWN",
+ DT_FIFO: "DT_FIFO",
+ DT_CHR: "DT_CHR",
+ DT_DIR: "DT_DIR",
+ DT_BLK: "DT_BLK",
+ DT_REG: "DT_REG",
+ DT_LNK: "DT_LNK",
+ DT_SOCK: "DT_SOCK",
+ DT_WHT: "DT_WHT",
+}
+
+// Values for preadv2/pwritev2.
+const (
+ // Note: gVisor does not implement the RWF_HIPRI feature, but the flag is
+ // accepted as a valid flag argument for preadv2/pwritev2.
+ RWF_HIPRI = 0x00000001
+ RWF_DSYNC = 0x00000002
+ RWF_SYNC = 0x00000004
+ RWF_VALID = RWF_HIPRI | RWF_DSYNC | RWF_SYNC
+)
+
+// SizeOfStat is the size of a Stat struct.
+var SizeOfStat = binary.Size(Stat{})
+
+// Flags for statx.
+const (
+ AT_STATX_SYNC_TYPE = 0x6000
+ AT_STATX_SYNC_AS_STAT = 0x0000
+ AT_STATX_FORCE_SYNC = 0x2000
+ AT_STATX_DONT_SYNC = 0x4000
+)
+
+// Mask values for statx.
+const (
+ STATX_TYPE = 0x00000001
+ STATX_MODE = 0x00000002
+ STATX_NLINK = 0x00000004
+ STATX_UID = 0x00000008
+ STATX_GID = 0x00000010
+ STATX_ATIME = 0x00000020
+ STATX_MTIME = 0x00000040
+ STATX_CTIME = 0x00000080
+ STATX_INO = 0x00000100
+ STATX_SIZE = 0x00000200
+ STATX_BLOCKS = 0x00000400
+ STATX_BASIC_STATS = 0x000007ff
+ STATX_BTIME = 0x00000800
+ STATX_ALL = 0x00000fff
+ STATX__RESERVED = 0x80000000
+)
+
+// Bitmasks for Statx.Attributes and Statx.AttributesMask, from
+// include/uapi/linux/stat.h.
+const (
+ STATX_ATTR_COMPRESSED = 0x00000004
+ STATX_ATTR_IMMUTABLE = 0x00000010
+ STATX_ATTR_APPEND = 0x00000020
+ STATX_ATTR_NODUMP = 0x00000040
+ STATX_ATTR_ENCRYPTED = 0x00000800
+ STATX_ATTR_AUTOMOUNT = 0x00001000
+)
+
+// Statx represents struct statx.
+//
+// +marshal
+type Statx struct {
+ Mask uint32
+ Blksize uint32
+ Attributes uint64
+ Nlink uint32
+ UID uint32
+ GID uint32
+ Mode uint16
+ _ uint16
+ Ino uint64
+ Size uint64
+ Blocks uint64
+ AttributesMask uint64
+ Atime StatxTimestamp
+ Btime StatxTimestamp
+ Ctime StatxTimestamp
+ Mtime StatxTimestamp
+ RdevMajor uint32
+ RdevMinor uint32
+ DevMajor uint32
+ DevMinor uint32
+}
+
+// SizeOfStatx is the size of a Statx struct.
+var SizeOfStatx = binary.Size(Statx{})
+
+// FileMode represents a mode_t.
+type FileMode uint16
+
+// Permissions returns just the permission bits.
+func (m FileMode) Permissions() FileMode {
+ return m & PermissionsMask
+}
+
+// FileType returns just the file type bits.
+func (m FileMode) FileType() FileMode {
+ return m & FileTypeMask
+}
+
+// ExtraBits returns everything but the file type and permission bits.
+func (m FileMode) ExtraBits() FileMode {
+ return m &^ (PermissionsMask | FileTypeMask)
+}
+
+// IsDir returns true if file type represents a directory.
+func (m FileMode) IsDir() bool {
+ return m.FileType() == S_IFDIR
+}
+
+// String returns a string representation of m.
+func (m FileMode) String() string {
+ var s []string
+ if ft := m.FileType(); ft != 0 {
+ s = append(s, fileType.Parse(uint64(ft)))
+ }
+ if eb := m.ExtraBits(); eb != 0 {
+ s = append(s, modeExtraBits.Parse(uint64(eb)))
+ }
+ s = append(s, fmt.Sprintf("0o%o", m.Permissions()))
+ return strings.Join(s, "|")
+}
+
+// DirentType maps file types to dirent types appropriate for (struct
+// dirent)::d_type.
+func (m FileMode) DirentType() uint8 {
+ switch m.FileType() {
+ case ModeSocket:
+ return DT_SOCK
+ case ModeSymlink:
+ return DT_LNK
+ case ModeRegular:
+ return DT_REG
+ case ModeBlockDevice:
+ return DT_BLK
+ case ModeDirectory:
+ return DT_DIR
+ case ModeCharacterDevice:
+ return DT_CHR
+ case ModeNamedPipe:
+ return DT_FIFO
+ default:
+ return DT_UNKNOWN
+ }
+}
+
+var modeExtraBits = abi.FlagSet{
+ {
+ Flag: ModeSetUID,
+ Name: "S_ISUID",
+ },
+ {
+ Flag: ModeSetGID,
+ Name: "S_ISGID",
+ },
+ {
+ Flag: ModeSticky,
+ Name: "S_ISVTX",
+ },
+}
+
+var fileType = abi.ValueSet{
+ ModeSocket: "S_IFSOCK",
+ ModeSymlink: "S_IFLINK",
+ ModeRegular: "S_IFREG",
+ ModeBlockDevice: "S_IFBLK",
+ ModeDirectory: "S_IFDIR",
+ ModeCharacterDevice: "S_IFCHR",
+ ModeNamedPipe: "S_IFIFO",
+}
+
+// Constants for memfd_create(2). Source: include/uapi/linux/memfd.h
+const (
+ MFD_CLOEXEC = 0x0001
+ MFD_ALLOW_SEALING = 0x0002
+)
+
+// Constants related to file seals. Source: include/uapi/{asm-generic,linux}/fcntl.h
+const (
+ F_LINUX_SPECIFIC_BASE = 1024
+ F_ADD_SEALS = F_LINUX_SPECIFIC_BASE + 9
+ F_GET_SEALS = F_LINUX_SPECIFIC_BASE + 10
+
+ F_SEAL_SEAL = 0x0001 // Prevent further seals from being set.
+ F_SEAL_SHRINK = 0x0002 // Prevent file from shrinking.
+ F_SEAL_GROW = 0x0004 // Prevent file from growing.
+ F_SEAL_WRITE = 0x0008 // Prevent writes.
+)
+
+// Constants related to fallocate(2). Source: include/uapi/linux/falloc.h
+const (
+ FALLOC_FL_KEEP_SIZE = 0x01
+ FALLOC_FL_PUNCH_HOLE = 0x02
+ FALLOC_FL_NO_HIDE_STALE = 0x04
+ FALLOC_FL_COLLAPSE_RANGE = 0x08
+ FALLOC_FL_ZERO_RANGE = 0x10
+ FALLOC_FL_INSERT_RANGE = 0x20
+ FALLOC_FL_UNSHARE_RANGE = 0x40
+)
diff --git a/pkg/abi/linux/file_amd64.go b/pkg/abi/linux/file_amd64.go
new file mode 100644
index 000000000..6b72364ea
--- /dev/null
+++ b/pkg/abi/linux/file_amd64.go
@@ -0,0 +1,46 @@
+// Copyright 2018 The gVisor Authors.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package linux
+
+// Constants for open(2).
+const (
+ O_DIRECT = 000040000
+ O_LARGEFILE = 000100000
+ O_DIRECTORY = 000200000
+ O_NOFOLLOW = 000400000
+)
+
+// Stat represents struct stat.
+//
+// +marshal
+type Stat struct {
+ Dev uint64
+ Ino uint64
+ Nlink uint64
+ Mode uint32
+ UID uint32
+ GID uint32
+ _ int32
+ Rdev uint64
+ Size int64
+ Blksize int64
+ Blocks int64
+ ATime Timespec
+ MTime Timespec
+ CTime Timespec
+ _ [3]int64
+}
diff --git a/pkg/abi/linux/file_arm64.go b/pkg/abi/linux/file_arm64.go
new file mode 100644
index 000000000..6492c9038
--- /dev/null
+++ b/pkg/abi/linux/file_arm64.go
@@ -0,0 +1,47 @@
+// Copyright 2019 The gVisor Authors.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package linux
+
+// Constants for open(2).
+const (
+ O_DIRECTORY = 000040000
+ O_NOFOLLOW = 000100000
+ O_DIRECT = 000200000
+ O_LARGEFILE = 000400000
+)
+
+// Stat represents struct stat.
+//
+// +marshal
+type Stat struct {
+ Dev uint64
+ Ino uint64
+ Mode uint32
+ Nlink uint32
+ UID uint32
+ GID uint32
+ Rdev uint64
+ _ uint64
+ Size int64
+ Blksize int32
+ _ int32
+ Blocks int64
+ ATime Timespec
+ MTime Timespec
+ CTime Timespec
+ _ [2]int32
+}
diff --git a/pkg/abi/linux/fs.go b/pkg/abi/linux/fs.go
new file mode 100644
index 000000000..158d2db5b
--- /dev/null
+++ b/pkg/abi/linux/fs.go
@@ -0,0 +1,103 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Filesystem types used in statfs(2).
+//
+// See linux/magic.h.
+const (
+ ANON_INODE_FS_MAGIC = 0x09041934
+ DEVPTS_SUPER_MAGIC = 0x00001cd1
+ EXT_SUPER_MAGIC = 0xef53
+ OVERLAYFS_SUPER_MAGIC = 0x794c7630
+ PIPEFS_MAGIC = 0x50495045
+ PROC_SUPER_MAGIC = 0x9fa0
+ RAMFS_MAGIC = 0x09041934
+ SOCKFS_MAGIC = 0x534F434B
+ SYSFS_MAGIC = 0x62656572
+ TMPFS_MAGIC = 0x01021994
+ V9FS_MAGIC = 0x01021997
+)
+
+// Filesystem path limits, from uapi/linux/limits.h.
+const (
+ NAME_MAX = 255
+ PATH_MAX = 4096
+)
+
+// Statfs is struct statfs, from uapi/asm-generic/statfs.h.
+//
+// +marshal
+type Statfs struct {
+ // Type is one of the filesystem magic values, defined above.
+ Type uint64
+
+ // BlockSize is the data block size.
+ BlockSize int64
+
+ // Blocks is the number of data blocks in use.
+ Blocks uint64
+
+ // BlocksFree is the number of free blocks.
+ BlocksFree uint64
+
+ // BlocksAvailable is the number of blocks free for use by
+ // unprivileged users.
+ BlocksAvailable uint64
+
+ // Files is the number of used file nodes on the filesystem.
+ Files uint64
+
+ // FileFress is the number of free file nodes on the filesystem.
+ FilesFree uint64
+
+ // FSID is the filesystem ID.
+ FSID [2]int32
+
+ // NameLength is the maximum file name length.
+ NameLength uint64
+
+ // FragmentSize is equivalent to BlockSize.
+ FragmentSize int64
+
+ // Flags is the set of filesystem mount flags.
+ Flags uint64
+
+ // Spare is unused.
+ Spare [4]uint64
+}
+
+// Whence argument to lseek(2), from include/uapi/linux/fs.h.
+const (
+ SEEK_SET = 0
+ SEEK_CUR = 1
+ SEEK_END = 2
+ SEEK_DATA = 3
+ SEEK_HOLE = 4
+)
+
+// Sync_file_range flags, from include/uapi/linux/fs.h
+const (
+ SYNC_FILE_RANGE_WAIT_BEFORE = 1
+ SYNC_FILE_RANGE_WRITE = 2
+ SYNC_FILE_RANGE_WAIT_AFTER = 4
+)
+
+// Flag argument to renameat2(2), from include/uapi/linux/fs.h.
+const (
+ RENAME_NOREPLACE = (1 << 0) // Don't overwrite target.
+ RENAME_EXCHANGE = (1 << 1) // Exchange src and dst.
+ RENAME_WHITEOUT = (1 << 2) // Whiteout src.
+)
diff --git a/pkg/abi/linux/futex.go b/pkg/abi/linux/futex.go
new file mode 100644
index 000000000..08bfde3b5
--- /dev/null
+++ b/pkg/abi/linux/futex.go
@@ -0,0 +1,62 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// From <linux/futex.h> and <sys/time.h>.
+// Flags are used in syscall futex(2).
+const (
+ FUTEX_WAIT = 0
+ FUTEX_WAKE = 1
+ FUTEX_FD = 2
+ FUTEX_REQUEUE = 3
+ FUTEX_CMP_REQUEUE = 4
+ FUTEX_WAKE_OP = 5
+ FUTEX_LOCK_PI = 6
+ FUTEX_UNLOCK_PI = 7
+ FUTEX_TRYLOCK_PI = 8
+ FUTEX_WAIT_BITSET = 9
+ FUTEX_WAKE_BITSET = 10
+ FUTEX_WAIT_REQUEUE_PI = 11
+ FUTEX_CMP_REQUEUE_PI = 12
+
+ FUTEX_PRIVATE_FLAG = 128
+ FUTEX_CLOCK_REALTIME = 256
+)
+
+// These are flags are from <linux/futex.h> and are used in FUTEX_WAKE_OP
+// to define the operations.
+const (
+ FUTEX_OP_SET = 0
+ FUTEX_OP_ADD = 1
+ FUTEX_OP_OR = 2
+ FUTEX_OP_ANDN = 3
+ FUTEX_OP_XOR = 4
+ FUTEX_OP_OPARG_SHIFT = 8
+ FUTEX_OP_CMP_EQ = 0
+ FUTEX_OP_CMP_NE = 1
+ FUTEX_OP_CMP_LT = 2
+ FUTEX_OP_CMP_LE = 3
+ FUTEX_OP_CMP_GT = 4
+ FUTEX_OP_CMP_GE = 5
+)
+
+// FUTEX_TID_MASK is the TID portion of a PI futex word.
+const FUTEX_TID_MASK = 0x3fffffff
+
+// Constants used for priority-inheritance futexes.
+const (
+ FUTEX_WAITERS = 0x80000000
+ FUTEX_OWNER_DIED = 0x40000000
+)
diff --git a/pkg/abi/linux/inotify.go b/pkg/abi/linux/inotify.go
new file mode 100644
index 000000000..2d08194ba
--- /dev/null
+++ b/pkg/abi/linux/inotify.go
@@ -0,0 +1,97 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Inotify events observable by userspace. These directly correspond to
+// filesystem operations and there may only be a single of them per inotify
+// event read from an inotify fd.
+const (
+ // IN_ACCESS indicates a file was accessed.
+ IN_ACCESS = 0x00000001
+ // IN_MODIFY indicates a file was modified.
+ IN_MODIFY = 0x00000002
+ // IN_ATTRIB indicates a watch target's metadata changed.
+ IN_ATTRIB = 0x00000004
+ // IN_CLOSE_WRITE indicates a writable file was closed.
+ IN_CLOSE_WRITE = 0x00000008
+ // IN_CLOSE_NOWRITE indicates a non-writable file was closed.
+ IN_CLOSE_NOWRITE = 0x00000010
+ // IN_OPEN indicates a file was opened.
+ IN_OPEN = 0x00000020
+ // IN_MOVED_FROM indicates a file was moved from X.
+ IN_MOVED_FROM = 0x00000040
+ // IN_MOVED_TO indicates a file was moved to Y.
+ IN_MOVED_TO = 0x00000080
+ // IN_CREATE indicates a file was created in a watched directory.
+ IN_CREATE = 0x00000100
+ // IN_DELETE indicates a file was deleted in a watched directory.
+ IN_DELETE = 0x00000200
+ // IN_DELETE_SELF indicates a watch target itself was deleted.
+ IN_DELETE_SELF = 0x00000400
+ // IN_MOVE_SELF indicates a watch target itself was moved.
+ IN_MOVE_SELF = 0x00000800
+ // IN_ALL_EVENTS is a mask for all observable userspace events.
+ IN_ALL_EVENTS = 0x00000fff
+)
+
+// Inotify control events. These may be present in their own events, or ORed
+// with other observable events.
+const (
+ // IN_UNMOUNT indicates the backing filesystem was unmounted.
+ IN_UNMOUNT = 0x00002000
+ // IN_Q_OVERFLOW indicates the event queued overflowed.
+ IN_Q_OVERFLOW = 0x00004000
+ // IN_IGNORED indicates a watch was removed, either implicitly or through
+ // inotify_rm_watch(2).
+ IN_IGNORED = 0x00008000
+ // IN_ISDIR indicates the subject of an event was a directory.
+ IN_ISDIR = 0x40000000
+)
+
+// Feature flags for inotify_add_watch(2).
+const (
+ // IN_ONLYDIR indicates that a path should be watched only if it's a
+ // directory.
+ IN_ONLYDIR = 0x01000000
+ // IN_DONT_FOLLOW indicates that the watch path shouldn't be resolved if
+ // it's a symlink.
+ IN_DONT_FOLLOW = 0x02000000
+ // IN_EXCL_UNLINK indicates events to this watch from unlinked objects
+ // should be filtered out.
+ IN_EXCL_UNLINK = 0x04000000
+ // IN_MASK_ADD indicates the provided mask should be ORed into any existing
+ // watch on the provided path.
+ IN_MASK_ADD = 0x20000000
+ // IN_ONESHOT indicates the watch should be removed after one event.
+ IN_ONESHOT = 0x80000000
+)
+
+// Feature flags for inotify_init1(2).
+const (
+ // IN_CLOEXEC is an alias for O_CLOEXEC. It indicates that the inotify
+ // fd should be closed on exec(2) and friends.
+ IN_CLOEXEC = 0x00080000
+ // IN_NONBLOCK is an alias for O_NONBLOCK. It indicates I/O syscall on the
+ // inotify fd should not block.
+ IN_NONBLOCK = 0x00000800
+)
+
+// ALL_INOTIFY_BITS contains all the bits for all possible inotify events. It's
+// defined in the Linux source at "include/linux/inotify.h".
+const ALL_INOTIFY_BITS = IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE |
+ IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | IN_MOVED_TO | IN_CREATE |
+ IN_DELETE | IN_DELETE_SELF | IN_MOVE_SELF | IN_UNMOUNT | IN_Q_OVERFLOW |
+ IN_IGNORED | IN_ONLYDIR | IN_DONT_FOLLOW | IN_EXCL_UNLINK | IN_MASK_ADD |
+ IN_ISDIR | IN_ONESHOT
diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go
new file mode 100644
index 000000000..2062e6a4b
--- /dev/null
+++ b/pkg/abi/linux/ioctl.go
@@ -0,0 +1,100 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// ioctl(2) requests provided by asm-generic/ioctls.h
+//
+// These are ordered by request number (low byte).
+const (
+ TCGETS = 0x00005401
+ TCSETS = 0x00005402
+ TCSETSW = 0x00005403
+ TCSETSF = 0x00005404
+ TCSBRK = 0x00005409
+ TIOCEXCL = 0x0000540c
+ TIOCNXCL = 0x0000540d
+ TIOCSCTTY = 0x0000540e
+ TIOCGPGRP = 0x0000540f
+ TIOCSPGRP = 0x00005410
+ TIOCOUTQ = 0x00005411
+ TIOCSTI = 0x00005412
+ TIOCGWINSZ = 0x00005413
+ TIOCSWINSZ = 0x00005414
+ TIOCMGET = 0x00005415
+ TIOCMBIS = 0x00005416
+ TIOCMBIC = 0x00005417
+ TIOCMSET = 0x00005418
+ TIOCINQ = 0x0000541b
+ FIONREAD = TIOCINQ
+ FIONBIO = 0x00005421
+ TIOCSETD = 0x00005423
+ TIOCNOTTY = 0x00005422
+ TIOCGETD = 0x00005424
+ TCSBRKP = 0x00005425
+ TIOCSBRK = 0x00005427
+ TIOCCBRK = 0x00005428
+ TIOCGSID = 0x00005429
+ TIOCGPTN = 0x80045430
+ TIOCSPTLCK = 0x40045431
+ TIOCGDEV = 0x80045432
+ TIOCVHANGUP = 0x00005437
+ TCFLSH = 0x0000540b
+ TIOCCONS = 0x0000541d
+ TIOCSSERIAL = 0x0000541f
+ TIOCGEXCL = 0x80045440
+ TIOCGPTPEER = 0x80045441
+ TIOCGICOUNT = 0x0000545d
+ FIONCLEX = 0x00005450
+ FIOCLEX = 0x00005451
+ FIOASYNC = 0x00005452
+ FIOSETOWN = 0x00008901
+ SIOCSPGRP = 0x00008902
+ FIOGETOWN = 0x00008903
+ SIOCGPGRP = 0x00008904
+)
+
+// ioctl(2) requests provided by uapi/linux/sockios.h
+const (
+ SIOCGIFMEM = 0x891f
+ SIOCGIFPFLAGS = 0x8935
+ SIOCGMIIPHY = 0x8947
+ SIOCGMIIREG = 0x8948
+)
+
+// ioctl(2) directions. Used to calculate requests number.
+// Constants from asm-generic/ioctl.h.
+const (
+ _IOC_NONE = 0
+ _IOC_WRITE = 1
+ _IOC_READ = 2
+)
+
+// Constants from asm-generic/ioctl.h.
+const (
+ _IOC_NRBITS = 8
+ _IOC_TYPEBITS = 8
+ _IOC_SIZEBITS = 14
+ _IOC_DIRBITS = 2
+
+ _IOC_NRSHIFT = 0
+ _IOC_TYPESHIFT = _IOC_NRSHIFT + _IOC_NRBITS
+ _IOC_SIZESHIFT = _IOC_TYPESHIFT + _IOC_TYPEBITS
+ _IOC_DIRSHIFT = _IOC_SIZESHIFT + _IOC_SIZEBITS
+)
+
+// IOC outputs the result of _IOC macro in asm-generic/ioctl.h.
+func IOC(dir, typ, nr, size uint32) uint32 {
+ return uint32(dir)<<_IOC_DIRSHIFT | typ<<_IOC_TYPESHIFT | nr<<_IOC_NRSHIFT | size<<_IOC_SIZESHIFT
+}
diff --git a/pkg/abi/linux/ioctl_tun.go b/pkg/abi/linux/ioctl_tun.go
new file mode 100644
index 000000000..c59c9c136
--- /dev/null
+++ b/pkg/abi/linux/ioctl_tun.go
@@ -0,0 +1,29 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// ioctl(2) request numbers from linux/if_tun.h
+var (
+ TUNSETIFF = IOC(_IOC_WRITE, 'T', 202, 4)
+ TUNGETIFF = IOC(_IOC_READ, 'T', 210, 4)
+)
+
+// Flags from net/if_tun.h
+const (
+ IFF_TUN = 0x0001
+ IFF_TAP = 0x0002
+ IFF_NO_PI = 0x1000
+ IFF_NOFILTER = 0x1000
+)
diff --git a/pkg/abi/linux/ip.go b/pkg/abi/linux/ip.go
new file mode 100644
index 000000000..31e56ffa6
--- /dev/null
+++ b/pkg/abi/linux/ip.go
@@ -0,0 +1,151 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// IP protocols
+const (
+ IPPROTO_IP = 0
+ IPPROTO_ICMP = 1
+ IPPROTO_IGMP = 2
+ IPPROTO_IPIP = 4
+ IPPROTO_TCP = 6
+ IPPROTO_EGP = 8
+ IPPROTO_PUP = 12
+ IPPROTO_UDP = 17
+ IPPROTO_IDP = 22
+ IPPROTO_TP = 29
+ IPPROTO_DCCP = 33
+ IPPROTO_IPV6 = 41
+ IPPROTO_RSVP = 46
+ IPPROTO_GRE = 47
+ IPPROTO_ESP = 50
+ IPPROTO_AH = 51
+ IPPROTO_MTP = 92
+ IPPROTO_BEETPH = 94
+ IPPROTO_ENCAP = 98
+ IPPROTO_PIM = 103
+ IPPROTO_COMP = 108
+ IPPROTO_SCTP = 132
+ IPPROTO_UDPLITE = 136
+ IPPROTO_MPLS = 137
+ IPPROTO_RAW = 255
+)
+
+// Socket options from uapi/linux/in.h
+const (
+ IP_TOS = 1
+ IP_TTL = 2
+ IP_HDRINCL = 3
+ IP_OPTIONS = 4
+ IP_ROUTER_ALERT = 5
+ IP_RECVOPTS = 6
+ IP_RETOPTS = 7
+ IP_PKTINFO = 8
+ IP_PKTOPTIONS = 9
+ IP_MTU_DISCOVER = 10
+ IP_RECVERR = 11
+ IP_RECVTTL = 12
+ IP_RECVTOS = 13
+ IP_MTU = 14
+ IP_FREEBIND = 15
+ IP_IPSEC_POLICY = 16
+ IP_XFRM_POLICY = 17
+ IP_PASSSEC = 18
+ IP_TRANSPARENT = 19
+ IP_ORIGDSTADDR = 20
+ IP_RECVORIGDSTADDR = IP_ORIGDSTADDR
+ IP_MINTTL = 21
+ IP_NODEFRAG = 22
+ IP_CHECKSUM = 23
+ IP_BIND_ADDRESS_NO_PORT = 24
+ IP_RECVFRAGSIZE = 25
+ IP_MULTICAST_IF = 32
+ IP_MULTICAST_TTL = 33
+ IP_MULTICAST_LOOP = 34
+ IP_ADD_MEMBERSHIP = 35
+ IP_DROP_MEMBERSHIP = 36
+ IP_UNBLOCK_SOURCE = 37
+ IP_BLOCK_SOURCE = 38
+ IP_ADD_SOURCE_MEMBERSHIP = 39
+ IP_DROP_SOURCE_MEMBERSHIP = 40
+ IP_MSFILTER = 41
+ MCAST_JOIN_GROUP = 42
+ MCAST_BLOCK_SOURCE = 43
+ MCAST_UNBLOCK_SOURCE = 44
+ MCAST_LEAVE_GROUP = 45
+ MCAST_JOIN_SOURCE_GROUP = 46
+ MCAST_LEAVE_SOURCE_GROUP = 47
+ MCAST_MSFILTER = 48
+ IP_MULTICAST_ALL = 49
+ IP_UNICAST_IF = 50
+)
+
+// Socket options from uapi/linux/in6.h
+const (
+ IPV6_ADDRFORM = 1
+ IPV6_2292PKTINFO = 2
+ IPV6_2292HOPOPTS = 3
+ IPV6_2292DSTOPTS = 4
+ IPV6_2292RTHDR = 5
+ IPV6_2292PKTOPTIONS = 6
+ IPV6_CHECKSUM = 7
+ IPV6_2292HOPLIMIT = 8
+ IPV6_NEXTHOP = 9
+ IPV6_FLOWINFO = 11
+ IPV6_UNICAST_HOPS = 16
+ IPV6_MULTICAST_IF = 17
+ IPV6_MULTICAST_HOPS = 18
+ IPV6_MULTICAST_LOOP = 19
+ IPV6_ADD_MEMBERSHIP = 20
+ IPV6_DROP_MEMBERSHIP = 21
+ IPV6_ROUTER_ALERT = 22
+ IPV6_MTU_DISCOVER = 23
+ IPV6_MTU = 24
+ IPV6_RECVERR = 25
+ IPV6_V6ONLY = 26
+ IPV6_JOIN_ANYCAST = 27
+ IPV6_LEAVE_ANYCAST = 28
+ IPV6_MULTICAST_ALL = 29
+ IPV6_FLOWLABEL_MGR = 32
+ IPV6_FLOWINFO_SEND = 33
+ IPV6_IPSEC_POLICY = 34
+ IPV6_XFRM_POLICY = 35
+ IPV6_HDRINCL = 36
+ IPV6_RECVPKTINFO = 49
+ IPV6_PKTINFO = 50
+ IPV6_RECVHOPLIMIT = 51
+ IPV6_HOPLIMIT = 52
+ IPV6_RECVHOPOPTS = 53
+ IPV6_HOPOPTS = 54
+ IPV6_RTHDRDSTOPTS = 55
+ IPV6_RECVRTHDR = 56
+ IPV6_RTHDR = 57
+ IPV6_RECVDSTOPTS = 58
+ IPV6_DSTOPTS = 59
+ IPV6_RECVPATHMTU = 60
+ IPV6_PATHMTU = 61
+ IPV6_DONTFRAG = 62
+ IPV6_RECVTCLASS = 66
+ IPV6_TCLASS = 67
+ IPV6_AUTOFLOWLABEL = 70
+ IPV6_ADDR_PREFERENCES = 72
+ IPV6_MINHOPCOUNT = 73
+ IPV6_ORIGDSTADDR = 74
+ IPV6_RECVORIGDSTADDR = IPV6_ORIGDSTADDR
+ IPV6_TRANSPARENT = 75
+ IPV6_UNICAST_IF = 76
+ IPV6_RECVFRAGSIZE = 77
+ IPV6_FREEBIND = 78
+)
diff --git a/pkg/abi/linux/ipc.go b/pkg/abi/linux/ipc.go
new file mode 100644
index 000000000..22acd2d43
--- /dev/null
+++ b/pkg/abi/linux/ipc.go
@@ -0,0 +1,53 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Control commands used with semctl, shmctl, and msgctl. Source:
+// include/uapi/linux/ipc.h.
+const (
+ IPC_RMID = 0
+ IPC_SET = 1
+ IPC_STAT = 2
+ IPC_INFO = 3
+)
+
+// resource get request flags. Source: include/uapi/linux/ipc.h
+const (
+ IPC_CREAT = 00001000
+ IPC_EXCL = 00002000
+ IPC_NOWAIT = 00004000
+)
+
+const IPC_PRIVATE = 0
+
+// In Linux, amd64 does not enable CONFIG_ARCH_WANT_IPC_PARSE_VERSION, so SysV
+// IPC unconditionally uses the "new" 64-bit structures that are needed for
+// features like 32-bit UIDs.
+
+// IPCPerm is equivalent to struct ipc64_perm.
+type IPCPerm struct {
+ Key uint32
+ UID uint32
+ GID uint32
+ CUID uint32
+ CGID uint32
+ Mode uint16
+ _ uint16
+ Seq uint16
+ _ uint16
+ _ uint32
+ unused1 uint64
+ unused2 uint64
+}
diff --git a/pkg/abi/linux/limits.go b/pkg/abi/linux/limits.go
new file mode 100644
index 000000000..c74dfcd53
--- /dev/null
+++ b/pkg/abi/linux/limits.go
@@ -0,0 +1,88 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Resources for getrlimit(2)/setrlimit(2)/prlimit(2).
+const (
+ RLIMIT_CPU = 0
+ RLIMIT_FSIZE = 1
+ RLIMIT_DATA = 2
+ RLIMIT_STACK = 3
+ RLIMIT_CORE = 4
+ RLIMIT_RSS = 5
+ RLIMIT_NPROC = 6
+ RLIMIT_NOFILE = 7
+ RLIMIT_MEMLOCK = 8
+ RLIMIT_AS = 9
+ RLIMIT_LOCKS = 10
+ RLIMIT_SIGPENDING = 11
+ RLIMIT_MSGQUEUE = 12
+ RLIMIT_NICE = 13
+ RLIMIT_RTPRIO = 14
+ RLIMIT_RTTIME = 15
+)
+
+// RLimit corresponds to Linux's struct rlimit.
+type RLimit struct {
+ // Cur specifies the soft limit.
+ Cur uint64
+ // Max specifies the hard limit.
+ Max uint64
+}
+
+const (
+ // RLimInfinity is RLIM_INFINITY on Linux.
+ RLimInfinity = ^uint64(0)
+
+ // DefaultStackSoftLimit is called _STK_LIM in Linux.
+ DefaultStackSoftLimit = 8 * 1024 * 1024
+
+ // DefaultNprocLimit is defined in kernel/fork.c:set_max_threads, and
+ // called MAX_THREADS / 2 in Linux.
+ DefaultNprocLimit = FUTEX_TID_MASK / 2
+
+ // DefaultNofileSoftLimit is called INR_OPEN_CUR in Linux.
+ DefaultNofileSoftLimit = 1024
+
+ // DefaultNofileHardLimit is called INR_OPEN_MAX in Linux.
+ DefaultNofileHardLimit = 4096
+
+ // DefaultMemlockLimit is called MLOCK_LIMIT in Linux.
+ DefaultMemlockLimit = 64 * 1024
+
+ // DefaultMsgqueueLimit is called MQ_BYTES_MAX in Linux.
+ DefaultMsgqueueLimit = 819200
+)
+
+// InitRLimits is a map of initial rlimits set by Linux in
+// include/asm-generic/resource.h.
+var InitRLimits = map[int]RLimit{
+ RLIMIT_CPU: {RLimInfinity, RLimInfinity},
+ RLIMIT_FSIZE: {RLimInfinity, RLimInfinity},
+ RLIMIT_DATA: {RLimInfinity, RLimInfinity},
+ RLIMIT_STACK: {DefaultStackSoftLimit, RLimInfinity},
+ RLIMIT_CORE: {0, RLimInfinity},
+ RLIMIT_RSS: {RLimInfinity, RLimInfinity},
+ RLIMIT_NPROC: {DefaultNprocLimit, DefaultNprocLimit},
+ RLIMIT_NOFILE: {DefaultNofileSoftLimit, DefaultNofileHardLimit},
+ RLIMIT_MEMLOCK: {DefaultMemlockLimit, DefaultMemlockLimit},
+ RLIMIT_AS: {RLimInfinity, RLimInfinity},
+ RLIMIT_LOCKS: {RLimInfinity, RLimInfinity},
+ RLIMIT_SIGPENDING: {0, 0},
+ RLIMIT_MSGQUEUE: {DefaultMsgqueueLimit, DefaultMsgqueueLimit},
+ RLIMIT_NICE: {0, 0},
+ RLIMIT_RTPRIO: {0, 0},
+ RLIMIT_RTTIME: {RLimInfinity, RLimInfinity},
+}
diff --git a/pkg/abi/linux/linux.go b/pkg/abi/linux/linux.go
new file mode 100644
index 000000000..281acdbde
--- /dev/null
+++ b/pkg/abi/linux/linux.go
@@ -0,0 +1,39 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package linux contains the constants and types needed to interface with a Linux kernel.
+package linux
+
+// NumSoftIRQ is the number of software IRQs, exposed via /proc/stat.
+//
+// Defined in linux/interrupt.h.
+const NumSoftIRQ = 10
+
+// Sysinfo is the structure provided by sysinfo on linux versions > 2.3.48.
+type Sysinfo struct {
+ Uptime int64
+ Loads [3]uint64
+ TotalRAM uint64
+ FreeRAM uint64
+ SharedRAM uint64
+ BufferRAM uint64
+ TotalSwap uint64
+ FreeSwap uint64
+ Procs uint16
+ _ [6]byte // Pad Procs to 64bits.
+ TotalHigh uint64
+ FreeHigh uint64
+ Unit uint32
+ /* The _f field in the glibc version of Sysinfo has size 0 on AMD64 */
+}
diff --git a/pkg/abi/linux/mm.go b/pkg/abi/linux/mm.go
new file mode 100644
index 000000000..07cc1895e
--- /dev/null
+++ b/pkg/abi/linux/mm.go
@@ -0,0 +1,130 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Protections for mmap(2).
+const (
+ PROT_NONE = 0
+ PROT_READ = 1 << 0
+ PROT_WRITE = 1 << 1
+ PROT_EXEC = 1 << 2
+ PROT_SEM = 1 << 3
+ PROT_GROWSDOWN = 1 << 24
+ PROT_GROWSUP = 1 << 25
+)
+
+// Flags for mmap(2).
+const (
+ MAP_SHARED = 1 << 0
+ MAP_PRIVATE = 1 << 1
+ MAP_FIXED = 1 << 4
+ MAP_ANONYMOUS = 1 << 5
+ MAP_32BIT = 1 << 6 // arch/x86/include/uapi/asm/mman.h
+ MAP_GROWSDOWN = 1 << 8
+ MAP_DENYWRITE = 1 << 11
+ MAP_EXECUTABLE = 1 << 12
+ MAP_LOCKED = 1 << 13
+ MAP_NORESERVE = 1 << 14
+ MAP_POPULATE = 1 << 15
+ MAP_NONBLOCK = 1 << 16
+ MAP_STACK = 1 << 17
+ MAP_HUGETLB = 1 << 18
+)
+
+// Flags for mremap(2).
+const (
+ MREMAP_MAYMOVE = 1 << 0
+ MREMAP_FIXED = 1 << 1
+)
+
+// Flags for mlock2(2).
+const (
+ MLOCK_ONFAULT = 0x01
+)
+
+// Flags for mlockall(2).
+const (
+ MCL_CURRENT = 1
+ MCL_FUTURE = 2
+ MCL_ONFAULT = 4
+)
+
+// Advice for madvise(2).
+const (
+ MADV_NORMAL = 0
+ MADV_RANDOM = 1
+ MADV_SEQUENTIAL = 2
+ MADV_WILLNEED = 3
+ MADV_DONTNEED = 4
+ MADV_REMOVE = 9
+ MADV_DONTFORK = 10
+ MADV_DOFORK = 11
+ MADV_MERGEABLE = 12
+ MADV_UNMERGEABLE = 13
+ MADV_HUGEPAGE = 14
+ MADV_NOHUGEPAGE = 15
+ MADV_DONTDUMP = 16
+ MADV_DODUMP = 17
+ MADV_HWPOISON = 100
+ MADV_SOFT_OFFLINE = 101
+ MADV_NOMAJFAULT = 200
+ MADV_DONTCHGME = 201
+)
+
+// Flags for msync(2).
+const (
+ MS_ASYNC = 1 << 0
+ MS_INVALIDATE = 1 << 1
+ MS_SYNC = 1 << 2
+)
+
+// NumaPolicy is the NUMA memory policy for a memory range. See numa(7).
+//
+// +marshal
+type NumaPolicy int32
+
+// Policies for get_mempolicy(2)/set_mempolicy(2).
+const (
+ MPOL_DEFAULT NumaPolicy = 0
+ MPOL_PREFERRED NumaPolicy = 1
+ MPOL_BIND NumaPolicy = 2
+ MPOL_INTERLEAVE NumaPolicy = 3
+ MPOL_LOCAL NumaPolicy = 4
+ MPOL_MAX NumaPolicy = 5
+)
+
+// Flags for get_mempolicy(2).
+const (
+ MPOL_F_NODE = 1 << 0
+ MPOL_F_ADDR = 1 << 1
+ MPOL_F_MEMS_ALLOWED = 1 << 2
+)
+
+// Flags for set_mempolicy(2).
+const (
+ MPOL_F_RELATIVE_NODES = 1 << 14
+ MPOL_F_STATIC_NODES = 1 << 15
+
+ MPOL_MODE_FLAGS = (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES)
+)
+
+// Flags for mbind(2).
+const (
+ MPOL_MF_STRICT = 1 << 0
+ MPOL_MF_MOVE = 1 << 1
+ MPOL_MF_MOVE_ALL = 1 << 2
+
+ MPOL_MF_VALID = MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL
+)
diff --git a/pkg/abi/linux/netdevice.go b/pkg/abi/linux/netdevice.go
new file mode 100644
index 000000000..7866352b4
--- /dev/null
+++ b/pkg/abi/linux/netdevice.go
@@ -0,0 +1,86 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import "gvisor.dev/gvisor/pkg/binary"
+
+const (
+ // IFNAMSIZ is the size of the name field for IFReq.
+ IFNAMSIZ = 16
+)
+
+// IFReq is an interface request.
+type IFReq struct {
+ // IFName is an encoded name, normally null-terminated. This should be
+ // accessed via the Name and SetName functions.
+ IFName [IFNAMSIZ]byte
+
+ // Data is the union of the following structures:
+ //
+ // struct sockaddr ifr_addr;
+ // struct sockaddr ifr_dstaddr;
+ // struct sockaddr ifr_broadaddr;
+ // struct sockaddr ifr_netmask;
+ // struct sockaddr ifr_hwaddr;
+ // short ifr_flags;
+ // int ifr_ifindex;
+ // int ifr_metric;
+ // int ifr_mtu;
+ // struct ifmap ifr_map;
+ // char ifr_slave[IFNAMSIZ];
+ // char ifr_newname[IFNAMSIZ];
+ // char *ifr_data;
+ Data [24]byte
+}
+
+// Name returns the name.
+func (ifr *IFReq) Name() string {
+ for c := 0; c < len(ifr.IFName); c++ {
+ if ifr.IFName[c] == 0 {
+ return string(ifr.IFName[:c])
+ }
+ }
+ return string(ifr.IFName[:])
+}
+
+// SetName sets the name.
+func (ifr *IFReq) SetName(name string) {
+ n := copy(ifr.IFName[:], []byte(name))
+ for i := n; i < len(ifr.IFName); i++ {
+ ifr.IFName[i] = 0
+ }
+}
+
+// SizeOfIFReq is the binary size of an IFReq struct (40 bytes).
+var SizeOfIFReq = binary.Size(IFReq{})
+
+// IFMap contains interface hardware parameters.
+type IFMap struct {
+ MemStart uint64
+ MemEnd uint64
+ BaseAddr int16
+ IRQ byte
+ DMA byte
+ Port byte
+ _ [3]byte // Pad to sizeof(struct ifmap).
+}
+
+// IFConf is used to return a list of interfaces and their addresses. See
+// netdevice(7) and struct ifconf for more detail on its use.
+type IFConf struct {
+ Len int32
+ _ [4]byte // Pad to sizeof(struct ifconf).
+ Ptr uint64
+}
diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go
new file mode 100644
index 000000000..a8d4f9d69
--- /dev/null
+++ b/pkg/abi/linux/netfilter.go
@@ -0,0 +1,552 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// This file contains structures required to support netfilter, specifically
+// the iptables tool.
+
+// Hooks into the network stack. These correspond to values in
+// include/uapi/linux/netfilter.h.
+const (
+ NF_INET_PRE_ROUTING = 0
+ NF_INET_LOCAL_IN = 1
+ NF_INET_FORWARD = 2
+ NF_INET_LOCAL_OUT = 3
+ NF_INET_POST_ROUTING = 4
+ NF_INET_NUMHOOKS = 5
+)
+
+// Verdicts that can be returned by targets. These correspond to values in
+// include/uapi/linux/netfilter.h
+const (
+ NF_DROP = 0
+ NF_ACCEPT = 1
+ NF_STOLEN = 2
+ NF_QUEUE = 3
+ NF_REPEAT = 4
+ NF_STOP = 5
+ NF_MAX_VERDICT = NF_STOP
+ // NF_RETURN is defined in include/uapi/linux/netfilter/x_tables.h.
+ NF_RETURN = -NF_REPEAT - 1
+)
+
+// VerdictStrings maps int verdicts to the strings they represent. It is used
+// for debugging.
+var VerdictStrings = map[int32]string{
+ -NF_DROP - 1: "DROP",
+ -NF_ACCEPT - 1: "ACCEPT",
+ -NF_QUEUE - 1: "QUEUE",
+ NF_RETURN: "RETURN",
+}
+
+// Socket options. These correspond to values in
+// include/uapi/linux/netfilter_ipv4/ip_tables.h.
+const (
+ IPT_BASE_CTL = 64
+ IPT_SO_SET_REPLACE = IPT_BASE_CTL
+ IPT_SO_SET_ADD_COUNTERS = IPT_BASE_CTL + 1
+ IPT_SO_SET_MAX = IPT_SO_SET_ADD_COUNTERS
+
+ IPT_SO_GET_INFO = IPT_BASE_CTL
+ IPT_SO_GET_ENTRIES = IPT_BASE_CTL + 1
+ IPT_SO_GET_REVISION_MATCH = IPT_BASE_CTL + 2
+ IPT_SO_GET_REVISION_TARGET = IPT_BASE_CTL + 3
+ IPT_SO_GET_MAX = IPT_SO_GET_REVISION_TARGET
+)
+
+// Name lengths. These correspond to values in
+// include/uapi/linux/netfilter/x_tables.h.
+const (
+ XT_FUNCTION_MAXNAMELEN = 30
+ XT_EXTENSION_MAXNAMELEN = 29
+ XT_TABLE_MAXNAMELEN = 32
+)
+
+// IPTEntry is an iptable rule. It corresponds to struct ipt_entry in
+// include/uapi/linux/netfilter_ipv4/ip_tables.h.
+type IPTEntry struct {
+ // IP is used to filter packets based on the IP header.
+ IP IPTIP
+
+ // NFCache relates to kernel-internal caching and isn't used by
+ // userspace.
+ NFCache uint32
+
+ // TargetOffset is the byte offset from the beginning of this IPTEntry
+ // to the start of the entry's target.
+ TargetOffset uint16
+
+ // NextOffset is the byte offset from the beginning of this IPTEntry to
+ // the start of the next entry. It is thus also the size of the entry.
+ NextOffset uint16
+
+ // Comeback is a return pointer. It is not used by userspace.
+ Comeback uint32
+
+ // Counters holds the packet and byte counts for this rule.
+ Counters XTCounters
+
+ // Elems holds the data for all this rule's matches followed by the
+ // target. It is variable length -- users have to iterate over any
+ // matches and use TargetOffset and NextOffset to make sense of the
+ // data.
+ //
+ // Elems is omitted here because it would cause IPTEntry to be an extra
+ // byte larger (see http://www.catb.org/esr/structure-packing/).
+ //
+ // Elems [0]byte
+}
+
+// SizeOfIPTEntry is the size of an IPTEntry.
+const SizeOfIPTEntry = 112
+
+// KernelIPTEntry is identical to IPTEntry, but includes the Elems field. This
+// struct marshaled via the binary package to write an IPTEntry to userspace.
+type KernelIPTEntry struct {
+ IPTEntry
+
+ // Elems holds the data for all this rule's matches followed by the
+ // target. It is variable length -- users have to iterate over any
+ // matches and use TargetOffset and NextOffset to make sense of the
+ // data.
+ Elems []byte
+}
+
+// IPTIP contains information for matching a packet's IP header.
+// It corresponds to struct ipt_ip in
+// include/uapi/linux/netfilter_ipv4/ip_tables.h.
+type IPTIP struct {
+ // Src is the source IP address.
+ Src InetAddr
+
+ // Dst is the destination IP address.
+ Dst InetAddr
+
+ // SrcMask is the source IP mask.
+ SrcMask InetAddr
+
+ // DstMask is the destination IP mask.
+ DstMask InetAddr
+
+ // InputInterface is the input network interface.
+ InputInterface [IFNAMSIZ]byte
+
+ // OutputInterface is the output network interface.
+ OutputInterface [IFNAMSIZ]byte
+
+ // InputInterfaceMask is the intput interface mask.
+ InputInterfaceMask [IFNAMSIZ]byte
+
+ // OuputInterfaceMask is the output interface mask.
+ OutputInterfaceMask [IFNAMSIZ]byte
+
+ // Protocol is the transport protocol.
+ Protocol uint16
+
+ // Flags define matching behavior for the IP header.
+ Flags uint8
+
+ // InverseFlags invert the meaning of fields in struct IPTIP. See the
+ // IPT_INV_* flags.
+ InverseFlags uint8
+}
+
+// Flags in IPTIP.InverseFlags. Corresponding constants are in
+// include/uapi/linux/netfilter_ipv4/ip_tables.h.
+const (
+ // Invert the meaning of InputInterface.
+ IPT_INV_VIA_IN = 0x01
+ // Invert the meaning of OutputInterface.
+ IPT_INV_VIA_OUT = 0x02
+ // Unclear what this is, as no references to it exist in the kernel.
+ IPT_INV_TOS = 0x04
+ // Invert the meaning of Src.
+ IPT_INV_SRCIP = 0x08
+ // Invert the meaning of Dst.
+ IPT_INV_DSTIP = 0x10
+ // Invert the meaning of the IPT_F_FRAG flag.
+ IPT_INV_FRAG = 0x20
+ // Invert the meaning of the Protocol field.
+ IPT_INV_PROTO = 0x40
+ // Enable all flags.
+ IPT_INV_MASK = 0x7F
+)
+
+// SizeOfIPTIP is the size of an IPTIP.
+const SizeOfIPTIP = 84
+
+// XTCounters holds packet and byte counts for a rule. It corresponds to struct
+// xt_counters in include/uapi/linux/netfilter/x_tables.h.
+type XTCounters struct {
+ // Pcnt is the packet count.
+ Pcnt uint64
+
+ // Bcnt is the byte count.
+ Bcnt uint64
+}
+
+// SizeOfXTCounters is the size of an XTCounters.
+const SizeOfXTCounters = 16
+
+// XTEntryMatch holds a match for a rule. For example, a user using the
+// addrtype iptables match extension would put the data for that match into an
+// XTEntryMatch. iptables-extensions(8) has a list of possible matches.
+//
+// XTEntryMatch corresponds to struct xt_entry_match in
+// include/uapi/linux/netfilter/x_tables.h. That struct contains a union
+// exposing different data to the user and kernel, but this struct holds only
+// the user data.
+type XTEntryMatch struct {
+ MatchSize uint16
+ Name ExtensionName
+ Revision uint8
+ // Data is omitted here because it would cause XTEntryMatch to be an
+ // extra byte larger (see http://www.catb.org/esr/structure-packing/).
+ // Data [0]byte
+}
+
+// SizeOfXTEntryMatch is the size of an XTEntryMatch.
+const SizeOfXTEntryMatch = 32
+
+// KernelXTEntryMatch is identical to XTEntryMatch, but contains
+// variable-length Data field.
+type KernelXTEntryMatch struct {
+ XTEntryMatch
+ Data []byte
+}
+
+// XTEntryTarget holds a target for a rule. For example, it can specify that
+// packets matching the rule should DROP, ACCEPT, or use an extension target.
+// iptables-extension(8) has a list of possible targets.
+//
+// XTEntryTarget corresponds to struct xt_entry_target in
+// include/uapi/linux/netfilter/x_tables.h. That struct contains a union
+// exposing different data to the user and kernel, but this struct holds only
+// the user data.
+type XTEntryTarget struct {
+ TargetSize uint16
+ Name ExtensionName
+ Revision uint8
+ // Data is omitted here because it would cause XTEntryTarget to be an
+ // extra byte larger (see http://www.catb.org/esr/structure-packing/).
+ // Data [0]byte
+}
+
+// SizeOfXTEntryTarget is the size of an XTEntryTarget.
+const SizeOfXTEntryTarget = 32
+
+// XTStandardTarget is a built-in target, one of ACCEPT, DROP, JUMP, QUEUE,
+// RETURN, or jump. It corresponds to struct xt_standard_target in
+// include/uapi/linux/netfilter/x_tables.h.
+type XTStandardTarget struct {
+ Target XTEntryTarget
+ // A positive verdict indicates a jump, and is the offset from the
+ // start of the table to jump to. A negative value means one of the
+ // other built-in targets.
+ Verdict int32
+ _ [4]byte
+}
+
+// SizeOfXTStandardTarget is the size of an XTStandardTarget.
+const SizeOfXTStandardTarget = 40
+
+// XTErrorTarget triggers an error when reached. It is also used to mark the
+// beginning of user-defined chains by putting the name of the chain in
+// ErrorName. It corresponds to struct xt_error_target in
+// include/uapi/linux/netfilter/x_tables.h.
+type XTErrorTarget struct {
+ Target XTEntryTarget
+ Name ErrorName
+ _ [2]byte
+}
+
+// SizeOfXTErrorTarget is the size of an XTErrorTarget.
+const SizeOfXTErrorTarget = 64
+
+// Flag values for NfNATIPV4Range. The values indicate whether to map
+// protocol specific part(ports) or IPs. It corresponds to values in
+// include/uapi/linux/netfilter/nf_nat.h.
+const (
+ NF_NAT_RANGE_MAP_IPS = 1 << 0
+ NF_NAT_RANGE_PROTO_SPECIFIED = 1 << 1
+ NF_NAT_RANGE_PROTO_RANDOM = 1 << 2
+ NF_NAT_RANGE_PERSISTENT = 1 << 3
+ NF_NAT_RANGE_PROTO_RANDOM_FULLY = 1 << 4
+ NF_NAT_RANGE_PROTO_RANDOM_ALL = (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
+ NF_NAT_RANGE_MASK = (NF_NAT_RANGE_MAP_IPS |
+ NF_NAT_RANGE_PROTO_SPECIFIED | NF_NAT_RANGE_PROTO_RANDOM |
+ NF_NAT_RANGE_PERSISTENT | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
+)
+
+// NfNATIPV4Range corresponds to struct nf_nat_ipv4_range
+// in include/uapi/linux/netfilter/nf_nat.h. The fields are in
+// network byte order.
+type NfNATIPV4Range struct {
+ Flags uint32
+ MinIP [4]byte
+ MaxIP [4]byte
+ MinPort uint16
+ MaxPort uint16
+}
+
+// NfNATIPV4MultiRangeCompat corresponds to struct
+// nf_nat_ipv4_multi_range_compat in include/uapi/linux/netfilter/nf_nat.h.
+type NfNATIPV4MultiRangeCompat struct {
+ RangeSize uint32
+ RangeIPV4 NfNATIPV4Range
+}
+
+// XTRedirectTarget triggers a redirect when reached.
+// Adding 4 bytes of padding to make the struct 8 byte aligned.
+type XTRedirectTarget struct {
+ Target XTEntryTarget
+ NfRange NfNATIPV4MultiRangeCompat
+ _ [4]byte
+}
+
+// SizeOfXTRedirectTarget is the size of an XTRedirectTarget.
+const SizeOfXTRedirectTarget = 56
+
+// IPTGetinfo is the argument for the IPT_SO_GET_INFO sockopt. It corresponds
+// to struct ipt_getinfo in include/uapi/linux/netfilter_ipv4/ip_tables.h.
+type IPTGetinfo struct {
+ Name TableName
+ ValidHooks uint32
+ HookEntry [NF_INET_NUMHOOKS]uint32
+ Underflow [NF_INET_NUMHOOKS]uint32
+ NumEntries uint32
+ Size uint32
+}
+
+// SizeOfIPTGetinfo is the size of an IPTGetinfo.
+const SizeOfIPTGetinfo = 84
+
+// IPTGetEntries is the argument for the IPT_SO_GET_ENTRIES sockopt. It
+// corresponds to struct ipt_get_entries in
+// include/uapi/linux/netfilter_ipv4/ip_tables.h.
+type IPTGetEntries struct {
+ Name TableName
+ Size uint32
+ _ [4]byte
+ // Entrytable is omitted here because it would cause IPTGetEntries to
+ // be an extra byte longer (see
+ // http://www.catb.org/esr/structure-packing/).
+ // Entrytable [0]IPTEntry
+}
+
+// SizeOfIPTGetEntries is the size of an IPTGetEntries.
+const SizeOfIPTGetEntries = 40
+
+// KernelIPTGetEntries is identical to IPTGetEntries, but includes the
+// Entrytable field. This struct marshaled via the binary package to write an
+// KernelIPTGetEntries to userspace.
+type KernelIPTGetEntries struct {
+ IPTGetEntries
+ Entrytable []KernelIPTEntry
+}
+
+// IPTReplace is the argument for the IPT_SO_SET_REPLACE sockopt. It
+// corresponds to struct ipt_replace in
+// include/uapi/linux/netfilter_ipv4/ip_tables.h.
+type IPTReplace struct {
+ Name TableName
+ ValidHooks uint32
+ NumEntries uint32
+ Size uint32
+ HookEntry [NF_INET_NUMHOOKS]uint32
+ Underflow [NF_INET_NUMHOOKS]uint32
+ NumCounters uint32
+ Counters uint64 // This is really a *XTCounters.
+ // Entries is omitted here because it would cause IPTReplace to be an
+ // extra byte longer (see http://www.catb.org/esr/structure-packing/).
+ // Entries [0]IPTEntry
+}
+
+// KernelIPTReplace is identical to IPTReplace, but includes the Entries field.
+type KernelIPTReplace struct {
+ IPTReplace
+ Entries [0]IPTEntry
+}
+
+// SizeOfIPTReplace is the size of an IPTReplace.
+const SizeOfIPTReplace = 96
+
+// ExtensionName holds the name of a netfilter extension.
+type ExtensionName [XT_EXTENSION_MAXNAMELEN]byte
+
+// String implements fmt.Stringer.
+func (en ExtensionName) String() string {
+ return goString(en[:])
+}
+
+// TableName holds the name of a netfilter table.
+type TableName [XT_TABLE_MAXNAMELEN]byte
+
+// String implements fmt.Stringer.
+func (tn TableName) String() string {
+ return goString(tn[:])
+}
+
+// ErrorName holds the name of a netfilter error. These can also hold
+// user-defined chains.
+type ErrorName [XT_FUNCTION_MAXNAMELEN]byte
+
+// String implements fmt.Stringer.
+func (en ErrorName) String() string {
+ return goString(en[:])
+}
+
+func goString(cstring []byte) string {
+ for i, c := range cstring {
+ if c == 0 {
+ return string(cstring[:i])
+ }
+ }
+ return string(cstring)
+}
+
+// XTTCP holds data for matching TCP packets. It corresponds to struct xt_tcp
+// in include/uapi/linux/netfilter/xt_tcpudp.h.
+type XTTCP struct {
+ // SourcePortStart specifies the inclusive start of the range of source
+ // ports to which the matcher applies.
+ SourcePortStart uint16
+
+ // SourcePortEnd specifies the inclusive end of the range of source ports
+ // to which the matcher applies.
+ SourcePortEnd uint16
+
+ // DestinationPortStart specifies the start of the destination port
+ // range to which the matcher applies.
+ DestinationPortStart uint16
+
+ // DestinationPortEnd specifies the end of the destination port
+ // range to which the matcher applies.
+ DestinationPortEnd uint16
+
+ // Option specifies that a particular TCP option must be set.
+ Option uint8
+
+ // FlagMask masks TCP flags when comparing to the FlagCompare byte. It allows
+ // for specification of which flags are important to the matcher.
+ FlagMask uint8
+
+ // FlagCompare, in combination with FlagMask, is used to match only packets
+ // that have certain flags set.
+ FlagCompare uint8
+
+ // InverseFlags flips the meaning of certain fields. See the
+ // TX_TCP_INV_* flags.
+ InverseFlags uint8
+}
+
+// SizeOfXTTCP is the size of an XTTCP.
+const SizeOfXTTCP = 12
+
+// Flags in XTTCP.InverseFlags. Corresponding constants are in
+// include/uapi/linux/netfilter/xt_tcpudp.h.
+const (
+ // Invert the meaning of SourcePortStart/End.
+ XT_TCP_INV_SRCPT = 0x01
+ // Invert the meaning of DestinationPortStart/End.
+ XT_TCP_INV_DSTPT = 0x02
+ // Invert the meaning of FlagCompare.
+ XT_TCP_INV_FLAGS = 0x04
+ // Invert the meaning of Option.
+ XT_TCP_INV_OPTION = 0x08
+ // Enable all flags.
+ XT_TCP_INV_MASK = 0x0F
+)
+
+// XTUDP holds data for matching UDP packets. It corresponds to struct xt_udp
+// in include/uapi/linux/netfilter/xt_tcpudp.h.
+type XTUDP struct {
+ // SourcePortStart is the inclusive start of the range of source ports
+ // to which the matcher applies.
+ SourcePortStart uint16
+
+ // SourcePortEnd is the inclusive end of the range of source ports to
+ // which the matcher applies.
+ SourcePortEnd uint16
+
+ // DestinationPortStart is the inclusive start of the destination port
+ // range to which the matcher applies.
+ DestinationPortStart uint16
+
+ // DestinationPortEnd is the inclusive end of the destination port
+ // range to which the matcher applies.
+ DestinationPortEnd uint16
+
+ // InverseFlags flips the meaning of certain fields. See the
+ // TX_UDP_INV_* flags.
+ InverseFlags uint8
+
+ _ uint8
+}
+
+// SizeOfXTUDP is the size of an XTUDP.
+const SizeOfXTUDP = 10
+
+// Flags in XTUDP.InverseFlags. Corresponding constants are in
+// include/uapi/linux/netfilter/xt_tcpudp.h.
+const (
+ // Invert the meaning of SourcePortStart/End.
+ XT_UDP_INV_SRCPT = 0x01
+ // Invert the meaning of DestinationPortStart/End.
+ XT_UDP_INV_DSTPT = 0x02
+ // Enable all flags.
+ XT_UDP_INV_MASK = 0x03
+)
+
+// IPTOwnerInfo holds data for matching packets with owner. It corresponds
+// to struct ipt_owner_info in libxt_owner.c of iptables binary.
+type IPTOwnerInfo struct {
+ // UID is user id which created the packet.
+ UID uint32
+
+ // GID is group id which created the packet.
+ GID uint32
+
+ // PID is process id of the process which created the packet.
+ PID uint32
+
+ // SID is session id which created the packet.
+ SID uint32
+
+ // Comm is the command name which created the packet.
+ Comm [16]byte
+
+ // Match is used to match UID/GID of the socket. See the
+ // XT_OWNER_* flags below.
+ Match uint8
+
+ // Invert flips the meaning of Match field.
+ Invert uint8
+}
+
+// SizeOfIPTOwnerInfo is the size of an XTOwnerMatchInfo.
+const SizeOfIPTOwnerInfo = 34
+
+// Flags in IPTOwnerInfo.Match. Corresponding constants are in
+// include/uapi/linux/netfilter/xt_owner.h.
+const (
+ // Match the UID of the packet.
+ XT_OWNER_UID = 1 << 0
+ // Match the GID of the packet.
+ XT_OWNER_GID = 1 << 1
+ // Match if the socket exists for the packet. Forwarded
+ // packets do not have an associated socket.
+ XT_OWNER_SOCKET = 1 << 2
+)
diff --git a/pkg/abi/linux/netfilter_test.go b/pkg/abi/linux/netfilter_test.go
new file mode 100644
index 000000000..565dd550e
--- /dev/null
+++ b/pkg/abi/linux/netfilter_test.go
@@ -0,0 +1,46 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/binary"
+)
+
+func TestSizes(t *testing.T) {
+ testCases := []struct {
+ typ interface{}
+ defined uintptr
+ }{
+ {IPTEntry{}, SizeOfIPTEntry},
+ {IPTGetEntries{}, SizeOfIPTGetEntries},
+ {IPTGetinfo{}, SizeOfIPTGetinfo},
+ {IPTIP{}, SizeOfIPTIP},
+ {IPTOwnerInfo{}, SizeOfIPTOwnerInfo},
+ {IPTReplace{}, SizeOfIPTReplace},
+ {XTCounters{}, SizeOfXTCounters},
+ {XTEntryMatch{}, SizeOfXTEntryMatch},
+ {XTEntryTarget{}, SizeOfXTEntryTarget},
+ {XTErrorTarget{}, SizeOfXTErrorTarget},
+ {XTStandardTarget{}, SizeOfXTStandardTarget},
+ }
+
+ for _, tc := range testCases {
+ if calculated := binary.Size(tc.typ); calculated != tc.defined {
+ t.Errorf("%T has a defined size of %d and calculated size of %d", tc.typ, tc.defined, calculated)
+ }
+ }
+}
diff --git a/pkg/abi/linux/netlink.go b/pkg/abi/linux/netlink.go
new file mode 100644
index 000000000..0ba086c76
--- /dev/null
+++ b/pkg/abi/linux/netlink.go
@@ -0,0 +1,130 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Netlink protocols, from uapi/linux/netlink.h.
+const (
+ NETLINK_ROUTE = 0
+ NETLINK_UNUSED = 1
+ NETLINK_USERSOCK = 2
+ NETLINK_FIREWALL = 3
+ NETLINK_SOCK_DIAG = 4
+ NETLINK_NFLOG = 5
+ NETLINK_XFRM = 6
+ NETLINK_SELINUX = 7
+ NETLINK_ISCSI = 8
+ NETLINK_AUDIT = 9
+ NETLINK_FIB_LOOKUP = 10
+ NETLINK_CONNECTOR = 11
+ NETLINK_NETFILTER = 12
+ NETLINK_IP6_FW = 13
+ NETLINK_DNRTMSG = 14
+ NETLINK_KOBJECT_UEVENT = 15
+ NETLINK_GENERIC = 16
+ NETLINK_SCSITRANSPORT = 18
+ NETLINK_ECRYPTFS = 19
+ NETLINK_RDMA = 20
+ NETLINK_CRYPTO = 21
+)
+
+// SockAddrNetlink is struct sockaddr_nl, from uapi/linux/netlink.h.
+type SockAddrNetlink struct {
+ Family uint16
+ _ uint16
+ PortID uint32
+ Groups uint32
+}
+
+// SockAddrNetlinkSize is the size of SockAddrNetlink.
+const SockAddrNetlinkSize = 12
+
+// NetlinkMessageHeader is struct nlmsghdr, from uapi/linux/netlink.h.
+type NetlinkMessageHeader struct {
+ Length uint32
+ Type uint16
+ Flags uint16
+ Seq uint32
+ PortID uint32
+}
+
+// NetlinkMessageHeaderSize is the size of NetlinkMessageHeader.
+const NetlinkMessageHeaderSize = 16
+
+// Netlink message header flags, from uapi/linux/netlink.h.
+const (
+ NLM_F_REQUEST = 0x1
+ NLM_F_MULTI = 0x2
+ NLM_F_ACK = 0x4
+ NLM_F_ECHO = 0x8
+ NLM_F_DUMP_INTR = 0x10
+ NLM_F_ROOT = 0x100
+ NLM_F_MATCH = 0x200
+ NLM_F_ATOMIC = 0x400
+ NLM_F_DUMP = NLM_F_ROOT | NLM_F_MATCH
+ NLM_F_REPLACE = 0x100
+ NLM_F_EXCL = 0x200
+ NLM_F_CREATE = 0x400
+ NLM_F_APPEND = 0x800
+)
+
+// Standard netlink message types, from uapi/linux/netlink.h.
+const (
+ NLMSG_NOOP = 0x1
+ NLMSG_ERROR = 0x2
+ NLMSG_DONE = 0x3
+ NLMSG_OVERRUN = 0x4
+
+ // NLMSG_MIN_TYPE is the first value for protocol-level types.
+ NLMSG_MIN_TYPE = 0x10
+)
+
+// NLMSG_ALIGNTO is the alignment of netlink messages, from
+// uapi/linux/netlink.h.
+const NLMSG_ALIGNTO = 4
+
+// NetlinkAttrHeader is the header of a netlink attribute, followed by payload.
+//
+// This is struct nlattr, from uapi/linux/netlink.h.
+type NetlinkAttrHeader struct {
+ Length uint16
+ Type uint16
+}
+
+// NetlinkAttrHeaderSize is the size of NetlinkAttrHeader.
+const NetlinkAttrHeaderSize = 4
+
+// NLA_ALIGNTO is the alignment of netlink attributes, from
+// uapi/linux/netlink.h.
+const NLA_ALIGNTO = 4
+
+// Socket options, from uapi/linux/netlink.h.
+const (
+ NETLINK_ADD_MEMBERSHIP = 1
+ NETLINK_DROP_MEMBERSHIP = 2
+ NETLINK_PKTINFO = 3
+ NETLINK_BROADCAST_ERROR = 4
+ NETLINK_NO_ENOBUFS = 5
+ NETLINK_LISTEN_ALL_NSID = 8
+ NETLINK_LIST_MEMBERSHIPS = 9
+ NETLINK_CAP_ACK = 10
+ NETLINK_EXT_ACK = 11
+ NETLINK_DUMP_STRICT_CHK = 12
+)
+
+// NetlinkErrorMessage is struct nlmsgerr, from uapi/linux/netlink.h.
+type NetlinkErrorMessage struct {
+ Error int32
+ Header NetlinkMessageHeader
+}
diff --git a/pkg/abi/linux/netlink_route.go b/pkg/abi/linux/netlink_route.go
new file mode 100644
index 000000000..40bec566c
--- /dev/null
+++ b/pkg/abi/linux/netlink_route.go
@@ -0,0 +1,346 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Netlink message types for NETLINK_ROUTE sockets, from uapi/linux/rtnetlink.h.
+const (
+ RTM_NEWLINK = 16
+ RTM_DELLINK = 17
+ RTM_GETLINK = 18
+ RTM_SETLINK = 19
+
+ RTM_NEWADDR = 20
+ RTM_DELADDR = 21
+ RTM_GETADDR = 22
+
+ RTM_NEWROUTE = 24
+ RTM_DELROUTE = 25
+ RTM_GETROUTE = 26
+
+ RTM_NEWNEIGH = 28
+ RTM_DELNEIGH = 29
+ RTM_GETNEIGH = 30
+
+ RTM_NEWRULE = 32
+ RTM_DELRULE = 33
+ RTM_GETRULE = 34
+
+ RTM_NEWQDISC = 36
+ RTM_DELQDISC = 37
+ RTM_GETQDISC = 38
+
+ RTM_NEWTCLASS = 40
+ RTM_DELTCLASS = 41
+ RTM_GETTCLASS = 42
+
+ RTM_NEWTFILTER = 44
+ RTM_DELTFILTER = 45
+ RTM_GETTFILTER = 46
+
+ RTM_NEWACTION = 48
+ RTM_DELACTION = 49
+ RTM_GETACTION = 50
+
+ RTM_NEWPREFIX = 52
+
+ RTM_GETMULTICAST = 58
+
+ RTM_GETANYCAST = 62
+
+ RTM_NEWNEIGHTBL = 64
+ RTM_GETNEIGHTBL = 66
+ RTM_SETNEIGHTBL = 67
+
+ RTM_NEWNDUSEROPT = 68
+
+ RTM_NEWADDRLABEL = 72
+ RTM_DELADDRLABEL = 73
+ RTM_GETADDRLABEL = 74
+
+ RTM_GETDCB = 78
+ RTM_SETDCB = 79
+
+ RTM_NEWNETCONF = 80
+ RTM_GETNETCONF = 82
+
+ RTM_NEWMDB = 84
+ RTM_DELMDB = 85
+ RTM_GETMDB = 86
+
+ RTM_NEWNSID = 88
+ RTM_DELNSID = 89
+ RTM_GETNSID = 90
+)
+
+// InterfaceInfoMessage is struct ifinfomsg, from uapi/linux/rtnetlink.h.
+type InterfaceInfoMessage struct {
+ Family uint8
+ _ uint8
+ Type uint16
+ Index int32
+ Flags uint32
+ Change uint32
+}
+
+// Interface flags, from uapi/linux/if.h.
+const (
+ IFF_UP = 1 << 0
+ IFF_BROADCAST = 1 << 1
+ IFF_DEBUG = 1 << 2
+ IFF_LOOPBACK = 1 << 3
+ IFF_POINTOPOINT = 1 << 4
+ IFF_NOTRAILERS = 1 << 5
+ IFF_RUNNING = 1 << 6
+ IFF_NOARP = 1 << 7
+ IFF_PROMISC = 1 << 8
+ IFF_ALLMULTI = 1 << 9
+ IFF_MASTER = 1 << 10
+ IFF_SLAVE = 1 << 11
+ IFF_MULTICAST = 1 << 12
+ IFF_PORTSEL = 1 << 13
+ IFF_AUTOMEDIA = 1 << 14
+ IFF_DYNAMIC = 1 << 15
+ IFF_LOWER_UP = 1 << 16
+ IFF_DORMANT = 1 << 17
+ IFF_ECHO = 1 << 18
+)
+
+// Interface link attributes, from uapi/linux/if_link.h.
+const (
+ IFLA_UNSPEC = 0
+ IFLA_ADDRESS = 1
+ IFLA_BROADCAST = 2
+ IFLA_IFNAME = 3
+ IFLA_MTU = 4
+ IFLA_LINK = 5
+ IFLA_QDISC = 6
+ IFLA_STATS = 7
+ IFLA_COST = 8
+ IFLA_PRIORITY = 9
+ IFLA_MASTER = 10
+ IFLA_WIRELESS = 11
+ IFLA_PROTINFO = 12
+ IFLA_TXQLEN = 13
+ IFLA_MAP = 14
+ IFLA_WEIGHT = 15
+ IFLA_OPERSTATE = 16
+ IFLA_LINKMODE = 17
+ IFLA_LINKINFO = 18
+ IFLA_NET_NS_PID = 19
+ IFLA_IFALIAS = 20
+ IFLA_NUM_VF = 21
+ IFLA_VFINFO_LIST = 22
+ IFLA_STATS64 = 23
+ IFLA_VF_PORTS = 24
+ IFLA_PORT_SELF = 25
+ IFLA_AF_SPEC = 26
+ IFLA_GROUP = 27
+ IFLA_NET_NS_FD = 28
+ IFLA_EXT_MASK = 29
+ IFLA_PROMISCUITY = 30
+ IFLA_NUM_TX_QUEUES = 31
+ IFLA_NUM_RX_QUEUES = 32
+ IFLA_CARRIER = 33
+ IFLA_PHYS_PORT_ID = 34
+ IFLA_CARRIER_CHANGES = 35
+ IFLA_PHYS_SWITCH_ID = 36
+ IFLA_LINK_NETNSID = 37
+ IFLA_PHYS_PORT_NAME = 38
+ IFLA_PROTO_DOWN = 39
+ IFLA_GSO_MAX_SEGS = 40
+ IFLA_GSO_MAX_SIZE = 41
+)
+
+// InterfaceAddrMessage is struct ifaddrmsg, from uapi/linux/if_addr.h.
+type InterfaceAddrMessage struct {
+ Family uint8
+ PrefixLen uint8
+ Flags uint8
+ Scope uint8
+ Index uint32
+}
+
+// Interface attributes, from uapi/linux/if_addr.h.
+const (
+ IFA_UNSPEC = 0
+ IFA_ADDRESS = 1
+ IFA_LOCAL = 2
+ IFA_LABEL = 3
+ IFA_BROADCAST = 4
+ IFA_ANYCAST = 5
+ IFA_CACHEINFO = 6
+ IFA_MULTICAST = 7
+ IFA_FLAGS = 8
+)
+
+// Device types, from uapi/linux/if_arp.h.
+const (
+ ARPHRD_LOOPBACK = 772
+)
+
+// RouteMessage is struct rtmsg, from uapi/linux/rtnetlink.h.
+type RouteMessage struct {
+ Family uint8
+ DstLen uint8
+ SrcLen uint8
+ TOS uint8
+
+ Table uint8
+ Protocol uint8
+ Scope uint8
+ Type uint8
+
+ Flags uint32
+}
+
+// SizeOfRouteMessage is the size of RouteMessage.
+const SizeOfRouteMessage = 12
+
+// Route types, from uapi/linux/rtnetlink.h.
+const (
+ // RTN_UNSPEC represents an unspecified route type.
+ RTN_UNSPEC = 0
+
+ // RTN_UNICAST represents a unicast route.
+ RTN_UNICAST = 1
+
+ // RTN_LOCAL represents a route that is accepted locally.
+ RTN_LOCAL = 2
+
+ // RTN_BROADCAST represents a broadcast route (Traffic is accepted locally
+ // as broadcast, and sent as broadcast).
+ RTN_BROADCAST = 3
+
+ // RTN_ANYCAST represents a anycast route (Traffic is accepted locally as
+ // broadcast but sent as unicast).
+ RTN_ANYCAST = 6
+
+ // RTN_MULTICAST represents a multicast route.
+ RTN_MULTICAST = 5
+
+ // RTN_BLACKHOLE represents a route where all traffic is dropped.
+ RTN_BLACKHOLE = 6
+
+ // RTN_UNREACHABLE represents a route where the destination is unreachable.
+ RTN_UNREACHABLE = 7
+
+ RTN_PROHIBIT = 8
+ RTN_THROW = 9
+ RTN_NAT = 10
+ RTN_XRESOLVE = 11
+)
+
+// Route protocols/origins, from uapi/linux/rtnetlink.h.
+const (
+ RTPROT_UNSPEC = 0
+ RTPROT_REDIRECT = 1
+ RTPROT_KERNEL = 2
+ RTPROT_BOOT = 3
+ RTPROT_STATIC = 4
+ RTPROT_GATED = 8
+ RTPROT_RA = 9
+ RTPROT_MRT = 10
+ RTPROT_ZEBRA = 11
+ RTPROT_BIRD = 12
+ RTPROT_DNROUTED = 13
+ RTPROT_XORP = 14
+ RTPROT_NTK = 15
+ RTPROT_DHCP = 16
+ RTPROT_MROUTED = 17
+ RTPROT_BABEL = 42
+ RTPROT_BGP = 186
+ RTPROT_ISIS = 187
+ RTPROT_OSPF = 188
+ RTPROT_RIP = 189
+ RTPROT_EIGRP = 192
+)
+
+// Route scopes, from uapi/linux/rtnetlink.h.
+const (
+ RT_SCOPE_UNIVERSE = 0
+ RT_SCOPE_SITE = 200
+ RT_SCOPE_LINK = 253
+ RT_SCOPE_HOST = 254
+ RT_SCOPE_NOWHERE = 255
+)
+
+// Route flags, from uapi/linux/rtnetlink.h.
+const (
+ RTM_F_NOTIFY = 0x100
+ RTM_F_CLONED = 0x200
+ RTM_F_EQUALIZE = 0x400
+ RTM_F_PREFIX = 0x800
+ RTM_F_LOOKUP_TABLE = 0x1000
+ RTM_F_FIB_MATCH = 0x2000
+)
+
+// Route tables, from uapi/linux/rtnetlink.h.
+const (
+ RT_TABLE_UNSPEC = 0
+ RT_TABLE_COMPAT = 252
+ RT_TABLE_DEFAULT = 253
+ RT_TABLE_MAIN = 254
+ RT_TABLE_LOCAL = 255
+)
+
+// Route attributes, from uapi/linux/rtnetlink.h.
+const (
+ RTA_UNSPEC = 0
+ RTA_DST = 1
+ RTA_SRC = 2
+ RTA_IIF = 3
+ RTA_OIF = 4
+ RTA_GATEWAY = 5
+ RTA_PRIORITY = 6
+ RTA_PREFSRC = 7
+ RTA_METRICS = 8
+ RTA_MULTIPATH = 9
+ RTA_PROTOINFO = 10
+ RTA_FLOW = 11
+ RTA_CACHEINFO = 12
+ RTA_SESSION = 13
+ RTA_MP_ALGO = 14
+ RTA_TABLE = 15
+ RTA_MARK = 16
+ RTA_MFC_STATS = 17
+ RTA_VIA = 18
+ RTA_NEWDST = 19
+ RTA_PREF = 20
+ RTA_ENCAP_TYPE = 21
+ RTA_ENCAP = 22
+ RTA_EXPIRES = 23
+ RTA_PAD = 24
+ RTA_UID = 25
+ RTA_TTL_PROPAGATE = 26
+ RTA_IP_PROTO = 27
+ RTA_SPORT = 28
+ RTA_DPORT = 29
+)
+
+// Route flags, from include/uapi/linux/route.h.
+const (
+ RTF_GATEWAY = 0x2
+ RTF_UP = 0x1
+)
+
+// RtAttr is the header of optional addition route information, as a netlink
+// attribute. From include/uapi/linux/rtnetlink.h.
+type RtAttr struct {
+ Len uint16
+ Type uint16
+}
+
+// SizeOfRtAttr is the size of RtAttr.
+const SizeOfRtAttr = 4
diff --git a/pkg/abi/linux/poll.go b/pkg/abi/linux/poll.go
new file mode 100644
index 000000000..c04d26e4c
--- /dev/null
+++ b/pkg/abi/linux/poll.go
@@ -0,0 +1,42 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// PollFD is struct pollfd, used by poll(2)/ppoll(2), from uapi/asm-generic/poll.h.
+type PollFD struct {
+ FD int32
+ Events int16
+ REvents int16
+}
+
+// Poll event flags, used by poll(2)/ppoll(2) and/or
+// epoll_ctl(2)/epoll_wait(2), from uapi/asm-generic/poll.h.
+const (
+ POLLIN = 0x0001
+ POLLPRI = 0x0002
+ POLLOUT = 0x0004
+ POLLERR = 0x0008
+ POLLHUP = 0x0010
+ POLLNVAL = 0x0020
+ POLLRDNORM = 0x0040
+ POLLRDBAND = 0x0080
+ POLLWRNORM = 0x0100
+ POLLWRBAND = 0x0200
+ POLLMSG = 0x0400
+ POLLREMOVE = 0x1000
+ POLLRDHUP = 0x2000
+ POLLFREE = 0x4000
+ POLL_BUSY_LOOP = 0x8000
+)
diff --git a/pkg/abi/linux/prctl.go b/pkg/abi/linux/prctl.go
new file mode 100644
index 000000000..391cfaa1c
--- /dev/null
+++ b/pkg/abi/linux/prctl.go
@@ -0,0 +1,164 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// PR_* flags, from <linux/pcrtl.h> for prctl(2).
+const (
+ // PR_SET_PDEATHSIG sets the process' death signal.
+ PR_SET_PDEATHSIG = 1
+
+ // PR_GET_PDEATHSIG gets the process' death signal.
+ PR_GET_PDEATHSIG = 2
+
+ // PR_GET_DUMPABLE gets the process' dumpable flag.
+ PR_GET_DUMPABLE = 3
+
+ // PR_SET_DUMPABLE sets the process' dumpable flag.
+ PR_SET_DUMPABLE = 4
+
+ // PR_GET_KEEPCAPS gets the value of the keep capabilities flag.
+ PR_GET_KEEPCAPS = 7
+
+ // PR_SET_KEEPCAPS sets the value of the keep capabilities flag.
+ PR_SET_KEEPCAPS = 8
+
+ // PR_GET_TIMING gets the process' timing method.
+ PR_GET_TIMING = 13
+
+ // PR_SET_TIMING sets the process' timing method.
+ PR_SET_TIMING = 14
+
+ // PR_SET_NAME sets the process' name.
+ PR_SET_NAME = 15
+
+ // PR_GET_NAME gets the process' name.
+ PR_GET_NAME = 16
+
+ // PR_GET_SECCOMP gets a process' seccomp mode.
+ PR_GET_SECCOMP = 21
+
+ // PR_SET_SECCOMP sets a process' seccomp mode.
+ PR_SET_SECCOMP = 22
+
+ // PR_CAPBSET_READ gets the capability bounding set.
+ PR_CAPBSET_READ = 23
+
+ // PR_CAPBSET_DROP sets the capability bounding set.
+ PR_CAPBSET_DROP = 24
+
+ // PR_GET_TSC gets the value of the flag determining whether the
+ // timestamp counter can be read.
+ PR_GET_TSC = 25
+
+ // PR_SET_TSC sets the value of the flag determining whether the
+ // timestamp counter can be read.
+ PR_SET_TSC = 26
+
+ // PR_SET_TIMERSLACK sets the process' time slack.
+ PR_SET_TIMERSLACK = 29
+
+ // PR_GET_TIMERSLACK gets the process' time slack.
+ PR_GET_TIMERSLACK = 30
+
+ // PR_TASK_PERF_EVENTS_DISABLE disables all performance counters
+ // attached to the calling process.
+ PR_TASK_PERF_EVENTS_DISABLE = 31
+
+ // PR_TASK_PERF_EVENTS_ENABLE enables all performance counters attached
+ // to the calling process.
+ PR_TASK_PERF_EVENTS_ENABLE = 32
+
+ // PR_MCE_KILL sets the machine check memory corruption kill policy for
+ // the calling thread.
+ PR_MCE_KILL = 33
+
+ // PR_MCE_KILL_GET gets the machine check memory corruption kill policy
+ // for the calling thread.
+ PR_MCE_KILL_GET = 34
+
+ // PR_SET_MM modifies certain kernel memory map descriptor fields of
+ // the calling process. See prctl(2) for more information.
+ PR_SET_MM = 35
+
+ PR_SET_MM_START_CODE = 1
+ PR_SET_MM_END_CODE = 2
+ PR_SET_MM_START_DATA = 3
+ PR_SET_MM_END_DATA = 4
+ PR_SET_MM_START_STACK = 5
+ PR_SET_MM_START_BRK = 6
+ PR_SET_MM_BRK = 7
+ PR_SET_MM_ARG_START = 8
+ PR_SET_MM_ARG_END = 9
+ PR_SET_MM_ENV_START = 10
+ PR_SET_MM_ENV_END = 11
+ PR_SET_MM_AUXV = 12
+ // PR_SET_MM_EXE_FILE supersedes the /proc/pid/exe symbolic link with a
+ // new one pointing to a new executable file identified by the file
+ // descriptor provided in arg3 argument. See prctl(2) for more
+ // information.
+ PR_SET_MM_EXE_FILE = 13
+ PR_SET_MM_MAP = 14
+ PR_SET_MM_MAP_SIZE = 15
+
+ // PR_SET_CHILD_SUBREAPER sets the "child subreaper" attribute of the
+ // calling process.
+ PR_SET_CHILD_SUBREAPER = 36
+
+ // PR_GET_CHILD_SUBREAPER gets the "child subreaper" attribute of the
+ // calling process.
+ PR_GET_CHILD_SUBREAPER = 37
+
+ // PR_SET_NO_NEW_PRIVS sets the calling thread's no_new_privs bit.
+ PR_SET_NO_NEW_PRIVS = 38
+
+ // PR_GET_NO_NEW_PRIVS gets the calling thread's no_new_privs bit.
+ PR_GET_NO_NEW_PRIVS = 39
+
+ // PR_GET_TID_ADDRESS retrieves the clear_child_tid address.
+ PR_GET_TID_ADDRESS = 40
+
+ // PR_SET_THP_DISABLE sets the state of the "THP disable" flag for the
+ // calling thread.
+ PR_SET_THP_DISABLE = 41
+
+ // PR_GET_THP_DISABLE gets the state of the "THP disable" flag for the
+ // calling thread.
+ PR_GET_THP_DISABLE = 42
+
+ // PR_MPX_ENABLE_MANAGEMENT enables kernel management of Memory
+ // Protection eXtensions (MPX) bounds tables.
+ PR_MPX_ENABLE_MANAGEMENT = 43
+
+ // PR_MPX_DISABLE_MANAGEMENT disables kernel management of Memory
+ // Protection eXtensions (MPX) bounds tables.
+ PR_MPX_DISABLE_MANAGEMENT = 44
+)
+
+// From <asm/prctl.h>
+// Flags are used in syscall arch_prctl(2).
+const (
+ ARCH_SET_GS = 0x1001
+ ARCH_SET_FS = 0x1002
+ ARCH_GET_FS = 0x1003
+ ARCH_GET_GS = 0x1004
+ ARCH_SET_CPUID = 0x1012
+)
+
+// Flags for prctl(PR_SET_DUMPABLE), defined in include/linux/sched/coredump.h.
+const (
+ SUID_DUMP_DISABLE = 0
+ SUID_DUMP_USER = 1
+ SUID_DUMP_ROOT = 2
+)
diff --git a/pkg/abi/linux/ptrace.go b/pkg/abi/linux/ptrace.go
new file mode 100644
index 000000000..23e605ab2
--- /dev/null
+++ b/pkg/abi/linux/ptrace.go
@@ -0,0 +1,89 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// ptrace commands from include/uapi/linux/ptrace.h.
+const (
+ PTRACE_TRACEME = 0
+ PTRACE_PEEKTEXT = 1
+ PTRACE_PEEKDATA = 2
+ PTRACE_PEEKUSR = 3
+ PTRACE_POKETEXT = 4
+ PTRACE_POKEDATA = 5
+ PTRACE_POKEUSR = 6
+ PTRACE_CONT = 7
+ PTRACE_KILL = 8
+ PTRACE_SINGLESTEP = 9
+ PTRACE_ATTACH = 16
+ PTRACE_DETACH = 17
+ PTRACE_SYSCALL = 24
+ PTRACE_SETOPTIONS = 0x4200
+ PTRACE_GETEVENTMSG = 0x4201
+ PTRACE_GETSIGINFO = 0x4202
+ PTRACE_SETSIGINFO = 0x4203
+ PTRACE_GETREGSET = 0x4204
+ PTRACE_SETREGSET = 0x4205
+ PTRACE_SEIZE = 0x4206
+ PTRACE_INTERRUPT = 0x4207
+ PTRACE_LISTEN = 0x4208
+ PTRACE_PEEKSIGINFO = 0x4209
+ PTRACE_GETSIGMASK = 0x420a
+ PTRACE_SETSIGMASK = 0x420b
+ PTRACE_SECCOMP_GET_FILTER = 0x420c
+ PTRACE_SECCOMP_GET_METADATA = 0x420d
+)
+
+// ptrace commands from arch/x86/include/uapi/asm/ptrace-abi.h.
+const (
+ PTRACE_GETREGS = 12
+ PTRACE_SETREGS = 13
+ PTRACE_GETFPREGS = 14
+ PTRACE_SETFPREGS = 15
+ PTRACE_GETFPXREGS = 18
+ PTRACE_SETFPXREGS = 19
+ PTRACE_OLDSETOPTIONS = 21
+ PTRACE_GET_THREAD_AREA = 25
+ PTRACE_SET_THREAD_AREA = 26
+ PTRACE_ARCH_PRCTL = 30
+ PTRACE_SYSEMU = 31
+ PTRACE_SYSEMU_SINGLESTEP = 32
+ PTRACE_SINGLEBLOCK = 33
+)
+
+// ptrace event codes from include/uapi/linux/ptrace.h.
+const (
+ PTRACE_EVENT_FORK = 1
+ PTRACE_EVENT_VFORK = 2
+ PTRACE_EVENT_CLONE = 3
+ PTRACE_EVENT_EXEC = 4
+ PTRACE_EVENT_VFORK_DONE = 5
+ PTRACE_EVENT_EXIT = 6
+ PTRACE_EVENT_SECCOMP = 7
+ PTRACE_EVENT_STOP = 128
+)
+
+// PTRACE_SETOPTIONS options from include/uapi/linux/ptrace.h.
+const (
+ PTRACE_O_TRACESYSGOOD = 1
+ PTRACE_O_TRACEFORK = 1 << PTRACE_EVENT_FORK
+ PTRACE_O_TRACEVFORK = 1 << PTRACE_EVENT_VFORK
+ PTRACE_O_TRACECLONE = 1 << PTRACE_EVENT_CLONE
+ PTRACE_O_TRACEEXEC = 1 << PTRACE_EVENT_EXEC
+ PTRACE_O_TRACEVFORKDONE = 1 << PTRACE_EVENT_VFORK_DONE
+ PTRACE_O_TRACEEXIT = 1 << PTRACE_EVENT_EXIT
+ PTRACE_O_TRACESECCOMP = 1 << PTRACE_EVENT_SECCOMP
+ PTRACE_O_EXITKILL = 1 << 20
+ PTRACE_O_SUSPEND_SECCOMP = 1 << 21
+)
diff --git a/pkg/abi/linux/ptrace_amd64.go b/pkg/abi/linux/ptrace_amd64.go
new file mode 100644
index 000000000..ed3881e27
--- /dev/null
+++ b/pkg/abi/linux/ptrace_amd64.go
@@ -0,0 +1,52 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package linux
+
+// PtraceRegs is the set of CPU registers exposed by ptrace. Source:
+// syscall.PtraceRegs.
+//
+// +marshal
+// +stateify savable
+type PtraceRegs struct {
+ R15 uint64
+ R14 uint64
+ R13 uint64
+ R12 uint64
+ Rbp uint64
+ Rbx uint64
+ R11 uint64
+ R10 uint64
+ R9 uint64
+ R8 uint64
+ Rax uint64
+ Rcx uint64
+ Rdx uint64
+ Rsi uint64
+ Rdi uint64
+ Orig_rax uint64
+ Rip uint64
+ Cs uint64
+ Eflags uint64
+ Rsp uint64
+ Ss uint64
+ Fs_base uint64
+ Gs_base uint64
+ Ds uint64
+ Es uint64
+ Fs uint64
+ Gs uint64
+}
diff --git a/pkg/abi/linux/ptrace_arm64.go b/pkg/abi/linux/ptrace_arm64.go
new file mode 100644
index 000000000..6147738b3
--- /dev/null
+++ b/pkg/abi/linux/ptrace_arm64.go
@@ -0,0 +1,29 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package linux
+
+// PtraceRegs is the set of CPU registers exposed by ptrace. Source:
+// syscall.PtraceRegs.
+//
+// +marshal
+// +stateify savable
+type PtraceRegs struct {
+ Regs [31]uint64
+ Sp uint64
+ Pc uint64
+ Pstate uint64
+}
diff --git a/pkg/abi/linux/rseq.go b/pkg/abi/linux/rseq.go
new file mode 100644
index 000000000..76253ba30
--- /dev/null
+++ b/pkg/abi/linux/rseq.go
@@ -0,0 +1,130 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Flags passed to rseq(2).
+//
+// Defined in include/uapi/linux/rseq.h.
+const (
+ // RSEQ_FLAG_UNREGISTER unregisters the current thread.
+ RSEQ_FLAG_UNREGISTER = 1 << 0
+)
+
+// Critical section flags used in RSeqCriticalSection.Flags and RSeq.Flags.
+//
+// Defined in include/uapi/linux/rseq.h.
+const (
+ // RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT inhibits restart on preemption.
+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = 1 << 0
+
+ // RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL inhibits restart on signal
+ // delivery.
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = 1 << 1
+
+ // RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE inhibits restart on CPU
+ // migration.
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = 1 << 2
+)
+
+// RSeqCriticalSection describes a restartable sequences critical section. It
+// is equivalent to struct rseq_cs, defined in include/uapi/linux/rseq.h.
+//
+// In userspace, this structure is always aligned to 32 bytes.
+//
+// +marshal
+type RSeqCriticalSection struct {
+ // Version is the version of this structure. Version 0 is defined here.
+ Version uint32
+
+ // Flags are the critical section flags, defined above.
+ Flags uint32
+
+ // Start is the start address of the critical section.
+ Start uint64
+
+ // PostCommitOffset is the offset from Start of the first instruction
+ // outside of the critical section.
+ PostCommitOffset uint64
+
+ // Abort is the abort address. It must be outside the critical section,
+ // and the 4 bytes prior must match the abort signature.
+ Abort uint64
+}
+
+const (
+ // SizeOfRSeqCriticalSection is the size of RSeqCriticalSection.
+ SizeOfRSeqCriticalSection = 32
+
+ // SizeOfRSeqSignature is the size of the signature immediately
+ // preceding RSeqCriticalSection.Abort.
+ SizeOfRSeqSignature = 4
+)
+
+// Special values for RSeq.CPUID, defined in include/uapi/linux/rseq.h.
+const (
+ // RSEQ_CPU_ID_UNINITIALIZED indicates that this thread has not
+ // performed rseq initialization.
+ RSEQ_CPU_ID_UNINITIALIZED = ^uint32(0) // -1
+
+ // RSEQ_CPU_ID_REGISTRATION_FAILED indicates that rseq initialization
+ // failed.
+ RSEQ_CPU_ID_REGISTRATION_FAILED = ^uint32(1) // -2
+)
+
+// RSeq is the thread-local restartable sequences config/status. It
+// is equivalent to struct rseq, defined in include/uapi/linux/rseq.h.
+//
+// In userspace, this structure is always aligned to 32 bytes.
+type RSeq struct {
+ // CPUIDStart contains the current CPU ID if rseq is initialized.
+ //
+ // This field should only be read by the thread which registered this
+ // structure, and must be read atomically.
+ CPUIDStart uint32
+
+ // CPUID contains the current CPU ID or one of the CPU ID special
+ // values defined above.
+ //
+ // This field should only be read by the thread which registered this
+ // structure, and must be read atomically.
+ CPUID uint32
+
+ // RSeqCriticalSection is a pointer to the current RSeqCriticalSection
+ // block, or NULL. It is reset to NULL by the kernel on restart or
+ // non-restarting preempt/signal.
+ //
+ // This field should only be written by the thread which registered
+ // this structure, and must be written atomically.
+ RSeqCriticalSection uint64
+
+ // Flags are the critical section flags that apply to all critical
+ // sections on this thread, defined above.
+ Flags uint32
+}
+
+const (
+ // SizeOfRSeq is the size of RSeq.
+ //
+ // Note that RSeq is naively 24 bytes. However, it has 32-byte
+ // alignment, which in C increases sizeof to 32. That is the size that
+ // the Linux kernel uses.
+ SizeOfRSeq = 32
+
+ // AlignOfRSeq is the standard alignment of RSeq.
+ AlignOfRSeq = 32
+
+ // OffsetOfRSeqCriticalSection is the offset of RSeqCriticalSection in RSeq.
+ OffsetOfRSeqCriticalSection = 8
+)
diff --git a/pkg/abi/linux/rusage.go b/pkg/abi/linux/rusage.go
new file mode 100644
index 000000000..d8302dc85
--- /dev/null
+++ b/pkg/abi/linux/rusage.go
@@ -0,0 +1,46 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Flags that may be used with wait4(2) and getrusage(2).
+const (
+ // wait4(2) uses this to aggregate RUSAGE_SELF and RUSAGE_CHILDREN.
+ RUSAGE_BOTH = -0x2
+
+ // getrusage(2) flags.
+ RUSAGE_CHILDREN = -0x1
+ RUSAGE_SELF = 0x0
+ RUSAGE_THREAD = 0x1
+)
+
+// Rusage represents the Linux struct rusage.
+type Rusage struct {
+ UTime Timeval
+ STime Timeval
+ MaxRSS int64
+ IXRSS int64
+ IDRSS int64
+ ISRSS int64
+ MinFlt int64
+ MajFlt int64
+ NSwap int64
+ InBlock int64
+ OuBlock int64
+ MsgSnd int64
+ MsgRcv int64
+ NSignals int64
+ NVCSw int64
+ NIvCSw int64
+}
diff --git a/pkg/abi/linux/sched.go b/pkg/abi/linux/sched.go
new file mode 100644
index 000000000..70e820823
--- /dev/null
+++ b/pkg/abi/linux/sched.go
@@ -0,0 +1,36 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Scheduling policies, exposed by sched_getscheduler(2)/sched_setscheduler(2).
+const (
+ SCHED_NORMAL = 0
+ SCHED_FIFO = 1
+ SCHED_RR = 2
+ SCHED_BATCH = 3
+ SCHED_IDLE = 5
+ SCHED_DEADLINE = 6
+ SCHED_MICROQ = 16
+
+ // SCHED_RESET_ON_FORK is a flag that indicates that the process is
+ // reverted back to SCHED_NORMAL on fork.
+ SCHED_RESET_ON_FORK = 0x40000000
+)
+
+const (
+ PRIO_PGRP = 0x1
+ PRIO_PROCESS = 0x0
+ PRIO_USER = 0x2
+)
diff --git a/pkg/abi/linux/seccomp.go b/pkg/abi/linux/seccomp.go
new file mode 100644
index 000000000..d0607e256
--- /dev/null
+++ b/pkg/abi/linux/seccomp.go
@@ -0,0 +1,72 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import "fmt"
+
+// Seccomp constants taken from <linux/seccomp.h>.
+const (
+ SECCOMP_MODE_NONE = 0
+ SECCOMP_MODE_FILTER = 2
+
+ SECCOMP_RET_ACTION_FULL = 0xffff0000
+ SECCOMP_RET_ACTION = 0x7fff0000
+ SECCOMP_RET_DATA = 0x0000ffff
+
+ SECCOMP_SET_MODE_FILTER = 1
+ SECCOMP_FILTER_FLAG_TSYNC = 1
+ SECCOMP_GET_ACTION_AVAIL = 2
+)
+
+type BPFAction uint32
+
+const (
+ SECCOMP_RET_KILL_PROCESS BPFAction = 0x80000000
+ SECCOMP_RET_KILL_THREAD = 0x00000000
+ SECCOMP_RET_TRAP = 0x00030000
+ SECCOMP_RET_ERRNO = 0x00050000
+ SECCOMP_RET_TRACE = 0x7ff00000
+ SECCOMP_RET_ALLOW = 0x7fff0000
+)
+
+func (a BPFAction) String() string {
+ switch a & SECCOMP_RET_ACTION_FULL {
+ case SECCOMP_RET_KILL_PROCESS:
+ return "kill process"
+ case SECCOMP_RET_KILL_THREAD:
+ return "kill thread"
+ case SECCOMP_RET_TRAP:
+ return fmt.Sprintf("trap (%d)", a.Data())
+ case SECCOMP_RET_ERRNO:
+ return fmt.Sprintf("errno (%d)", a.Data())
+ case SECCOMP_RET_TRACE:
+ return fmt.Sprintf("trace (%d)", a.Data())
+ case SECCOMP_RET_ALLOW:
+ return "allow"
+ }
+ return fmt.Sprintf("invalid action: %#x", a)
+}
+
+// Data returns the SECCOMP_RET_DATA portion of the action.
+func (a BPFAction) Data() uint16 {
+ return uint16(a & SECCOMP_RET_DATA)
+}
+
+// SockFprog is sock_fprog taken from <linux/filter.h>.
+type SockFprog struct {
+ Len uint16
+ pad [6]byte
+ Filter *BPFInstruction
+}
diff --git a/pkg/abi/linux/sem.go b/pkg/abi/linux/sem.go
new file mode 100644
index 000000000..de422c519
--- /dev/null
+++ b/pkg/abi/linux/sem.go
@@ -0,0 +1,52 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// semctl Command Definitions. Source: include/uapi/linux/sem.h
+const (
+ GETPID = 11
+ GETVAL = 12
+ GETALL = 13
+ GETNCNT = 14
+ GETZCNT = 15
+ SETVAL = 16
+ SETALL = 17
+)
+
+// ipcs ctl cmds. Source: include/uapi/linux/sem.h
+const (
+ SEM_STAT = 18
+ SEM_INFO = 19
+ SEM_STAT_ANY = 20
+)
+
+const SEM_UNDO = 0x1000
+
+// SemidDS is equivalent to struct semid64_ds.
+type SemidDS struct {
+ SemPerm IPCPerm
+ SemOTime TimeT
+ SemCTime TimeT
+ SemNSems uint64
+ unused3 uint64
+ unused4 uint64
+}
+
+// Sembuf is equivalent to struct sembuf.
+type Sembuf struct {
+ SemNum uint16
+ SemOp int16
+ SemFlg int16
+}
diff --git a/pkg/abi/linux/shm.go b/pkg/abi/linux/shm.go
new file mode 100644
index 000000000..e45aadb10
--- /dev/null
+++ b/pkg/abi/linux/shm.go
@@ -0,0 +1,86 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import "math"
+
+// shmat(2) flags. Source: include/uapi/linux/shm.h
+const (
+ SHM_RDONLY = 010000 // Read-only access.
+ SHM_RND = 020000 // Round attach address to SHMLBA boundary.
+ SHM_REMAP = 040000 // Take-over region on attach.
+ SHM_EXEC = 0100000 // Execution access.
+)
+
+// IPCPerm.Mode upper byte flags. Source: include/linux/shm.h
+const (
+ SHM_DEST = 01000 // Segment will be destroyed on last detach.
+ SHM_LOCKED = 02000 // Segment will not be swapped.
+ SHM_HUGETLB = 04000 // Segment will use huge TLB pages.
+ SHM_NORESERVE = 010000 // Don't check for reservations.
+)
+
+// Additional Linux-only flags for shmctl(2). Source: include/uapi/linux/shm.h
+const (
+ SHM_LOCK = 11
+ SHM_UNLOCK = 12
+ SHM_STAT = 13
+ SHM_INFO = 14
+)
+
+// SHM defaults as specified by linux. Source: include/uapi/linux/shm.h
+const (
+ SHMMIN = 1
+ SHMMNI = 4096
+ SHMMAX = math.MaxUint64 - 1<<24
+ SHMALL = math.MaxUint64 - 1<<24
+ SHMSEG = 4096
+)
+
+// ShmidDS is equivalent to struct shmid64_ds. Source:
+// include/uapi/asm-generic/shmbuf.h
+type ShmidDS struct {
+ ShmPerm IPCPerm
+ ShmSegsz uint64
+ ShmAtime TimeT
+ ShmDtime TimeT
+ ShmCtime TimeT
+ ShmCpid int32
+ ShmLpid int32
+ ShmNattach uint64
+
+ Unused4 uint64
+ Unused5 uint64
+}
+
+// ShmParams is equivalent to struct shminfo. Source: include/uapi/linux/shm.h
+type ShmParams struct {
+ ShmMax uint64
+ ShmMin uint64
+ ShmMni uint64
+ ShmSeg uint64
+ ShmAll uint64
+}
+
+// ShmInfo is equivalent to struct shm_info. Source: include/uapi/linux/shm.h
+type ShmInfo struct {
+ UsedIDs int32 // Number of currently existing segments.
+ _ [4]byte
+ ShmTot uint64 // Total number of shared memory pages.
+ ShmRss uint64 // Number of resident shared memory pages.
+ ShmSwp uint64 // Number of swapped shared memory pages.
+ SwapAttempts uint64 // Unused since Linux 2.4.
+ SwapSuccesses uint64 // Unused since Linux 2.4.
+}
diff --git a/pkg/abi/linux/signal.go b/pkg/abi/linux/signal.go
new file mode 100644
index 000000000..1c330e763
--- /dev/null
+++ b/pkg/abi/linux/signal.go
@@ -0,0 +1,234 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "gvisor.dev/gvisor/pkg/bits"
+)
+
+const (
+ // SignalMaximum is the highest valid signal number.
+ SignalMaximum = 64
+
+ // FirstStdSignal is the lowest standard signal number.
+ FirstStdSignal = 1
+
+ // LastStdSignal is the highest standard signal number.
+ LastStdSignal = 31
+
+ // FirstRTSignal is the lowest real-time signal number.
+ //
+ // 32 (SIGCANCEL) and 33 (SIGSETXID) are used internally by glibc.
+ FirstRTSignal = 32
+
+ // LastRTSignal is the highest real-time signal number.
+ LastRTSignal = 64
+
+ // NumStdSignals is the number of standard signals.
+ NumStdSignals = LastStdSignal - FirstStdSignal + 1
+
+ // NumRTSignals is the number of realtime signals.
+ NumRTSignals = LastRTSignal - FirstRTSignal + 1
+)
+
+// Signal is a signal number.
+type Signal int
+
+// IsValid returns true if s is a valid standard or realtime signal. (0 is not
+// considered valid; interfaces special-casing signal number 0 should check for
+// 0 first before asserting validity.)
+func (s Signal) IsValid() bool {
+ return s > 0 && s <= SignalMaximum
+}
+
+// IsStandard returns true if s is a standard signal.
+//
+// Preconditions: s.IsValid().
+func (s Signal) IsStandard() bool {
+ return s <= LastStdSignal
+}
+
+// IsRealtime returns true if s is a realtime signal.
+//
+// Preconditions: s.IsValid().
+func (s Signal) IsRealtime() bool {
+ return s >= FirstRTSignal
+}
+
+// Index returns the index for signal s into arrays of both standard and
+// realtime signals (e.g. signal masks).
+//
+// Preconditions: s.IsValid().
+func (s Signal) Index() int {
+ return int(s - 1)
+}
+
+// Signals.
+const (
+ SIGABRT = Signal(6)
+ SIGALRM = Signal(14)
+ SIGBUS = Signal(7)
+ SIGCHLD = Signal(17)
+ SIGCLD = Signal(17)
+ SIGCONT = Signal(18)
+ SIGFPE = Signal(8)
+ SIGHUP = Signal(1)
+ SIGILL = Signal(4)
+ SIGINT = Signal(2)
+ SIGIO = Signal(29)
+ SIGIOT = Signal(6)
+ SIGKILL = Signal(9)
+ SIGPIPE = Signal(13)
+ SIGPOLL = Signal(29)
+ SIGPROF = Signal(27)
+ SIGPWR = Signal(30)
+ SIGQUIT = Signal(3)
+ SIGSEGV = Signal(11)
+ SIGSTKFLT = Signal(16)
+ SIGSTOP = Signal(19)
+ SIGSYS = Signal(31)
+ SIGTERM = Signal(15)
+ SIGTRAP = Signal(5)
+ SIGTSTP = Signal(20)
+ SIGTTIN = Signal(21)
+ SIGTTOU = Signal(22)
+ SIGUNUSED = Signal(31)
+ SIGURG = Signal(23)
+ SIGUSR1 = Signal(10)
+ SIGUSR2 = Signal(12)
+ SIGVTALRM = Signal(26)
+ SIGWINCH = Signal(28)
+ SIGXCPU = Signal(24)
+ SIGXFSZ = Signal(25)
+)
+
+// SignalSet is a signal mask with a bit corresponding to each signal.
+//
+// +marshal
+type SignalSet uint64
+
+// SignalSetSize is the size in bytes of a SignalSet.
+const SignalSetSize = 8
+
+// MakeSignalSet returns SignalSet with the bit corresponding to each of the
+// given signals set.
+func MakeSignalSet(sigs ...Signal) SignalSet {
+ indices := make([]int, len(sigs))
+ for i, sig := range sigs {
+ indices[i] = sig.Index()
+ }
+ return SignalSet(bits.Mask64(indices...))
+}
+
+// SignalSetOf returns a SignalSet with a single signal set.
+func SignalSetOf(sig Signal) SignalSet {
+ return SignalSet(bits.MaskOf64(sig.Index()))
+}
+
+// ForEachSignal invokes f for each signal set in the given mask.
+func ForEachSignal(mask SignalSet, f func(sig Signal)) {
+ bits.ForEachSetBit64(uint64(mask), func(i int) {
+ f(Signal(i + 1))
+ })
+}
+
+// 'how' values for rt_sigprocmask(2).
+const (
+ // SIG_BLOCK blocks the signals in the set.
+ SIG_BLOCK = 0
+
+ // SIG_UNBLOCK blocks the signals in the set.
+ SIG_UNBLOCK = 1
+
+ // SIG_SETMASK sets the signal mask to set.
+ SIG_SETMASK = 2
+)
+
+// Signal actions for rt_sigaction(2), from uapi/asm-generic/signal-defs.h.
+const (
+ // SIG_DFL performs the default action.
+ SIG_DFL = 0
+
+ // SIG_IGN ignores the signal.
+ SIG_IGN = 1
+)
+
+// Signal action flags for rt_sigaction(2), from uapi/asm-generic/signal.h
+const (
+ SA_NOCLDSTOP = 0x00000001
+ SA_NOCLDWAIT = 0x00000002
+ SA_SIGINFO = 0x00000004
+ SA_RESTORER = 0x04000000
+ SA_ONSTACK = 0x08000000
+ SA_RESTART = 0x10000000
+ SA_NODEFER = 0x40000000
+ SA_RESETHAND = 0x80000000
+ SA_NOMASK = SA_NODEFER
+ SA_ONESHOT = SA_RESETHAND
+)
+
+// Signal info types.
+const (
+ SI_MASK = 0xffff0000
+ SI_KILL = 0 << 16
+ SI_TIMER = 1 << 16
+ SI_POLL = 2 << 16
+ SI_FAULT = 3 << 16
+ SI_CHLD = 4 << 16
+ SI_RT = 5 << 16
+ SI_MESGQ = 6 << 16
+ SI_SYS = 7 << 16
+)
+
+// SIGPOLL si_codes.
+const (
+ // POLL_IN indicates that data input available.
+ POLL_IN = SI_POLL | 1
+
+ // POLL_OUT indicates that output buffers available.
+ POLL_OUT = SI_POLL | 2
+
+ // POLL_MSG indicates that an input message available.
+ POLL_MSG = SI_POLL | 3
+
+ // POLL_ERR indicates that there was an i/o error.
+ POLL_ERR = SI_POLL | 4
+
+ // POLL_PRI indicates that a high priority input available.
+ POLL_PRI = SI_POLL | 5
+
+ // POLL_HUP indicates that a device disconnected.
+ POLL_HUP = SI_POLL | 6
+)
+
+// Sigevent represents struct sigevent.
+type Sigevent struct {
+ Value uint64 // union sigval {int, void*}
+ Signo int32
+ Notify int32
+
+ // struct sigevent here contains 48-byte union _sigev_un. However, only
+ // member _tid is significant to the kernel.
+ Tid int32
+ UnRemainder [44]byte
+}
+
+// Possible values for Sigevent.Notify, aka struct sigevent::sigev_notify.
+const (
+ SIGEV_SIGNAL = 0
+ SIGEV_NONE = 1
+ SIGEV_THREAD = 2
+ SIGEV_THREAD_ID = 4
+)
diff --git a/pkg/abi/linux/signalfd.go b/pkg/abi/linux/signalfd.go
new file mode 100644
index 000000000..85fad9956
--- /dev/null
+++ b/pkg/abi/linux/signalfd.go
@@ -0,0 +1,45 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+const (
+ // SFD_NONBLOCK is a signalfd(2) flag.
+ SFD_NONBLOCK = 00004000
+
+ // SFD_CLOEXEC is a signalfd(2) flag.
+ SFD_CLOEXEC = 02000000
+)
+
+// SignalfdSiginfo is the siginfo encoding for signalfds.
+type SignalfdSiginfo struct {
+ Signo uint32
+ Errno int32
+ Code int32
+ PID uint32
+ UID uint32
+ FD int32
+ TID uint32
+ Band uint32
+ Overrun uint32
+ TrapNo uint32
+ Status int32
+ Int int32
+ Ptr uint64
+ UTime uint64
+ STime uint64
+ Addr uint64
+ AddrLSB uint16
+ _ [48]uint8
+}
diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go
new file mode 100644
index 000000000..4a14ef691
--- /dev/null
+++ b/pkg/abi/linux/socket.go
@@ -0,0 +1,456 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import "gvisor.dev/gvisor/pkg/binary"
+
+// Address families, from linux/socket.h.
+const (
+ AF_UNSPEC = 0
+ AF_UNIX = 1
+ AF_INET = 2
+ AF_AX25 = 3
+ AF_IPX = 4
+ AF_APPLETALK = 5
+ AF_NETROM = 6
+ AF_BRIDGE = 7
+ AF_ATMPVC = 8
+ AF_X25 = 9
+ AF_INET6 = 10
+ AF_ROSE = 11
+ AF_DECnet = 12
+ AF_NETBEUI = 13
+ AF_SECURITY = 14
+ AF_KEY = 15
+ AF_NETLINK = 16
+ AF_PACKET = 17
+ AF_ASH = 18
+ AF_ECONET = 19
+ AF_ATMSVC = 20
+ AF_RDS = 21
+ AF_SNA = 22
+ AF_IRDA = 23
+ AF_PPPOX = 24
+ AF_WANPIPE = 25
+ AF_LLC = 26
+ AF_IB = 27
+ AF_MPLS = 28
+ AF_CAN = 29
+ AF_TIPC = 30
+ AF_BLUETOOTH = 31
+ AF_IUCV = 32
+ AF_RXRPC = 33
+ AF_ISDN = 34
+ AF_PHONET = 35
+ AF_IEEE802154 = 36
+ AF_CAIF = 37
+ AF_ALG = 38
+ AF_NFC = 39
+ AF_VSOCK = 40
+)
+
+// sendmsg(2)/recvmsg(2) flags, from linux/socket.h.
+const (
+ MSG_OOB = 0x1
+ MSG_PEEK = 0x2
+ MSG_DONTROUTE = 0x4
+ MSG_TRYHARD = 0x4
+ MSG_CTRUNC = 0x8
+ MSG_PROBE = 0x10
+ MSG_TRUNC = 0x20
+ MSG_DONTWAIT = 0x40
+ MSG_EOR = 0x80
+ MSG_WAITALL = 0x100
+ MSG_FIN = 0x200
+ MSG_EOF = MSG_FIN
+ MSG_SYN = 0x400
+ MSG_CONFIRM = 0x800
+ MSG_RST = 0x1000
+ MSG_ERRQUEUE = 0x2000
+ MSG_NOSIGNAL = 0x4000
+ MSG_MORE = 0x8000
+ MSG_WAITFORONE = 0x10000
+ MSG_SENDPAGE_NOTLAST = 0x20000
+ MSG_REINJECT = 0x8000000
+ MSG_ZEROCOPY = 0x4000000
+ MSG_FASTOPEN = 0x20000000
+ MSG_CMSG_CLOEXEC = 0x40000000
+)
+
+// Set/get socket option levels, from socket.h.
+const (
+ SOL_IP = 0
+ SOL_SOCKET = 1
+ SOL_TCP = 6
+ SOL_UDP = 17
+ SOL_IPV6 = 41
+ SOL_ICMPV6 = 58
+ SOL_RAW = 255
+ SOL_PACKET = 263
+ SOL_NETLINK = 270
+)
+
+// A SockType is a type (as opposed to family) of sockets. These are enumerated
+// below as SOCK_* constants.
+type SockType int
+
+// Socket types, from linux/net.h.
+const (
+ SOCK_STREAM SockType = 1
+ SOCK_DGRAM = 2
+ SOCK_RAW = 3
+ SOCK_RDM = 4
+ SOCK_SEQPACKET = 5
+ SOCK_DCCP = 6
+ SOCK_PACKET = 10
+)
+
+// SOCK_TYPE_MASK covers all of the above socket types. The remaining bits are
+// flags. From linux/net.h.
+const SOCK_TYPE_MASK = 0xf
+
+// socket(2)/socketpair(2)/accept4(2) flags, from linux/net.h.
+const (
+ SOCK_CLOEXEC = O_CLOEXEC
+ SOCK_NONBLOCK = O_NONBLOCK
+)
+
+// shutdown(2) how commands, from <linux/net.h>.
+const (
+ SHUT_RD = 0
+ SHUT_WR = 1
+ SHUT_RDWR = 2
+)
+
+// Socket options from socket.h.
+const (
+ SO_DEBUG = 1
+ SO_REUSEADDR = 2
+ SO_TYPE = 3
+ SO_ERROR = 4
+ SO_DONTROUTE = 5
+ SO_BROADCAST = 6
+ SO_SNDBUF = 7
+ SO_RCVBUF = 8
+ SO_KEEPALIVE = 9
+ SO_OOBINLINE = 10
+ SO_NO_CHECK = 11
+ SO_PRIORITY = 12
+ SO_LINGER = 13
+ SO_BSDCOMPAT = 14
+ SO_REUSEPORT = 15
+ SO_PASSCRED = 16
+ SO_PEERCRED = 17
+ SO_RCVLOWAT = 18
+ SO_SNDLOWAT = 19
+ SO_RCVTIMEO = 20
+ SO_SNDTIMEO = 21
+ SO_BINDTODEVICE = 25
+ SO_ATTACH_FILTER = 26
+ SO_DETACH_FILTER = 27
+ SO_GET_FILTER = SO_ATTACH_FILTER
+ SO_PEERNAME = 28
+ SO_TIMESTAMP = 29
+ SO_ACCEPTCONN = 30
+ SO_PEERSEC = 31
+ SO_SNDBUFFORCE = 32
+ SO_RCVBUFFORCE = 33
+ SO_PASSSEC = 34
+ SO_TIMESTAMPNS = 35
+ SO_MARK = 36
+ SO_TIMESTAMPING = 37
+ SO_PROTOCOL = 38
+ SO_DOMAIN = 39
+ SO_RXQ_OVFL = 40
+ SO_WIFI_STATUS = 41
+ SO_PEEK_OFF = 42
+ SO_NOFCS = 43
+ SO_LOCK_FILTER = 44
+ SO_SELECT_ERR_QUEUE = 45
+ SO_BUSY_POLL = 46
+ SO_MAX_PACING_RATE = 47
+ SO_BPF_EXTENSIONS = 48
+ SO_INCOMING_CPU = 49
+ SO_ATTACH_BPF = 50
+ SO_ATTACH_REUSEPORT_CBPF = 51
+ SO_ATTACH_REUSEPORT_EBPF = 52
+ SO_CNX_ADVICE = 53
+ SO_MEMINFO = 55
+ SO_INCOMING_NAPI_ID = 56
+ SO_COOKIE = 57
+ SO_PEERGROUPS = 59
+ SO_ZEROCOPY = 60
+ SO_TXTIME = 61
+)
+
+// enum socket_state, from uapi/linux/net.h.
+const (
+ SS_FREE = 0 // Not allocated.
+ SS_UNCONNECTED = 1 // Unconnected to any socket.
+ SS_CONNECTING = 2 // In process of connecting.
+ SS_CONNECTED = 3 // Connected to socket.
+ SS_DISCONNECTING = 4 // In process of disconnecting.
+)
+
+// TCP protocol states, from include/net/tcp_states.h.
+const (
+ TCP_ESTABLISHED uint32 = iota + 1
+ TCP_SYN_SENT
+ TCP_SYN_RECV
+ TCP_FIN_WAIT1
+ TCP_FIN_WAIT2
+ TCP_TIME_WAIT
+ TCP_CLOSE
+ TCP_CLOSE_WAIT
+ TCP_LAST_ACK
+ TCP_LISTEN
+ TCP_CLOSING
+ TCP_NEW_SYN_RECV
+)
+
+// SockAddrMax is the maximum size of a struct sockaddr, from
+// uapi/linux/socket.h.
+const SockAddrMax = 128
+
+// InetAddr is struct in_addr, from uapi/linux/in.h.
+type InetAddr [4]byte
+
+// SockAddrInet is struct sockaddr_in, from uapi/linux/in.h.
+type SockAddrInet struct {
+ Family uint16
+ Port uint16
+ Addr InetAddr
+ Zero [8]uint8 // pad to sizeof(struct sockaddr).
+}
+
+// InetMulticastRequest is struct ip_mreq, from uapi/linux/in.h.
+type InetMulticastRequest struct {
+ MulticastAddr InetAddr
+ InterfaceAddr InetAddr
+}
+
+// InetMulticastRequestWithNIC is struct ip_mreqn, from uapi/linux/in.h.
+type InetMulticastRequestWithNIC struct {
+ InetMulticastRequest
+ InterfaceIndex int32
+}
+
+// SockAddrInet6 is struct sockaddr_in6, from uapi/linux/in6.h.
+type SockAddrInet6 struct {
+ Family uint16
+ Port uint16
+ Flowinfo uint32
+ Addr [16]byte
+ Scope_id uint32
+}
+
+// SockAddrLink is a struct sockaddr_ll, from uapi/linux/if_packet.h.
+type SockAddrLink struct {
+ Family uint16
+ Protocol uint16
+ InterfaceIndex int32
+ ARPHardwareType uint16
+ PacketType byte
+ HardwareAddrLen byte
+ HardwareAddr [8]byte
+}
+
+// UnixPathMax is the maximum length of the path in an AF_UNIX socket.
+//
+// From uapi/linux/un.h.
+const UnixPathMax = 108
+
+// SockAddrUnix is struct sockaddr_un, from uapi/linux/un.h.
+type SockAddrUnix struct {
+ Family uint16
+ Path [UnixPathMax]int8
+}
+
+// SockAddr represents a union of valid socket address types. This is logically
+// equivalent to struct sockaddr. SockAddr ensures that a well-defined set of
+// types can be used as socket addresses.
+type SockAddr interface {
+ // implementsSockAddr exists purely to allow a type to indicate that they
+ // implement this interface. This method is a no-op and shouldn't be called.
+ implementsSockAddr()
+}
+
+func (s *SockAddrInet) implementsSockAddr() {}
+func (s *SockAddrInet6) implementsSockAddr() {}
+func (s *SockAddrLink) implementsSockAddr() {}
+func (s *SockAddrUnix) implementsSockAddr() {}
+func (s *SockAddrNetlink) implementsSockAddr() {}
+
+// Linger is struct linger, from include/linux/socket.h.
+type Linger struct {
+ OnOff int32
+ Linger int32
+}
+
+// SizeOfLinger is the binary size of a Linger struct.
+const SizeOfLinger = 8
+
+// TCPInfo is a collection of TCP statistics.
+//
+// From uapi/linux/tcp.h. Newer versions of Linux continue to add new fields to
+// the end of this struct or within existing unusued space, so its size grows
+// over time. The current iteration is based on linux v4.17. New versions are
+// always backwards compatible.
+type TCPInfo struct {
+ State uint8
+ CaState uint8
+ Retransmits uint8
+ Probes uint8
+ Backoff uint8
+ Options uint8
+ // WindowScale is the combination of snd_wscale (first 4 bits) and rcv_wscale (second 4 bits)
+ WindowScale uint8
+ // DeliveryRateAppLimited is a boolean and only the first bit is meaningful.
+ DeliveryRateAppLimited uint8
+
+ RTO uint32
+ ATO uint32
+ SndMss uint32
+ RcvMss uint32
+
+ Unacked uint32
+ Sacked uint32
+ Lost uint32
+ Retrans uint32
+ Fackets uint32
+
+ // Times.
+ LastDataSent uint32
+ LastAckSent uint32
+ LastDataRecv uint32
+ LastAckRecv uint32
+
+ // Metrics.
+ PMTU uint32
+ RcvSsthresh uint32
+ RTT uint32
+ RTTVar uint32
+ SndSsthresh uint32
+ SndCwnd uint32
+ Advmss uint32
+ Reordering uint32
+
+ RcvRTT uint32
+ RcvSpace uint32
+
+ TotalRetrans uint32
+
+ PacingRate uint64
+ MaxPacingRate uint64
+ // BytesAcked is RFC4898 tcpEStatsAppHCThruOctetsAcked.
+ BytesAcked uint64
+ // BytesReceived is RFC4898 tcpEStatsAppHCThruOctetsReceived.
+ BytesReceived uint64
+ // SegsOut is RFC4898 tcpEStatsPerfSegsOut.
+ SegsOut uint32
+ // SegsIn is RFC4898 tcpEStatsPerfSegsIn.
+ SegsIn uint32
+
+ NotSentBytes uint32
+ MinRTT uint32
+ // DataSegsIn is RFC4898 tcpEStatsDataSegsIn.
+ DataSegsIn uint32
+ // DataSegsOut is RFC4898 tcpEStatsDataSegsOut.
+ DataSegsOut uint32
+
+ DeliveryRate uint64
+
+ // BusyTime is the time in microseconds busy sending data.
+ BusyTime uint64
+ // RwndLimited is the time in microseconds limited by receive window.
+ RwndLimited uint64
+ // SndBufLimited is the time in microseconds limited by send buffer.
+ SndBufLimited uint64
+}
+
+// SizeOfTCPInfo is the binary size of a TCPInfo struct.
+var SizeOfTCPInfo = int(binary.Size(TCPInfo{}))
+
+// Control message types, from linux/socket.h.
+const (
+ SCM_CREDENTIALS = 0x2
+ SCM_RIGHTS = 0x1
+)
+
+// A ControlMessageHeader is the header for a socket control message.
+//
+// ControlMessageHeader represents struct cmsghdr from linux/socket.h.
+type ControlMessageHeader struct {
+ Length uint64
+ Level int32
+ Type int32
+}
+
+// SizeOfControlMessageHeader is the binary size of a ControlMessageHeader
+// struct.
+var SizeOfControlMessageHeader = int(binary.Size(ControlMessageHeader{}))
+
+// A ControlMessageCredentials is an SCM_CREDENTIALS socket control message.
+//
+// ControlMessageCredentials represents struct ucred from linux/socket.h.
+type ControlMessageCredentials struct {
+ PID int32
+ UID uint32
+ GID uint32
+}
+
+// A ControlMessageIPPacketInfo is IP_PKTINFO socket control message.
+//
+// ControlMessageIPPacketInfo represents struct in_pktinfo from linux/in.h.
+type ControlMessageIPPacketInfo struct {
+ NIC int32
+ LocalAddr InetAddr
+ DestinationAddr InetAddr
+}
+
+// SizeOfControlMessageCredentials is the binary size of a
+// ControlMessageCredentials struct.
+var SizeOfControlMessageCredentials = int(binary.Size(ControlMessageCredentials{}))
+
+// A ControlMessageRights is an SCM_RIGHTS socket control message.
+type ControlMessageRights []int32
+
+// SizeOfControlMessageRight is the size of a single element in
+// ControlMessageRights.
+const SizeOfControlMessageRight = 4
+
+// SizeOfControlMessageInq is the size of a TCP_INQ control message.
+const SizeOfControlMessageInq = 4
+
+// SizeOfControlMessageTOS is the size of an IP_TOS control message.
+const SizeOfControlMessageTOS = 1
+
+// SizeOfControlMessageTClass is the size of an IPV6_TCLASS control message.
+const SizeOfControlMessageTClass = 4
+
+// SizeOfControlMessageIPPacketInfo is the size of an IP_PKTINFO
+// control message.
+const SizeOfControlMessageIPPacketInfo = 12
+
+// SCM_MAX_FD is the maximum number of FDs accepted in a single sendmsg call.
+// From net/scm.h.
+const SCM_MAX_FD = 253
+
+// SO_ACCEPTCON is defined as __SO_ACCEPTCON in
+// include/uapi/linux/net.h, which represents a listening socket
+// state. Note that this is distinct from SO_ACCEPTCONN, which is a
+// socket option for querying whether a socket is in a listening
+// state.
+const SO_ACCEPTCON = 1 << 16
diff --git a/pkg/abi/linux/splice.go b/pkg/abi/linux/splice.go
new file mode 100644
index 000000000..650eb87e8
--- /dev/null
+++ b/pkg/abi/linux/splice.go
@@ -0,0 +1,23 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Constants for splice(2), sendfile(2) and tee(2).
+const (
+ SPLICE_F_MOVE = 1 << iota
+ SPLICE_F_NONBLOCK
+ SPLICE_F_MORE
+ SPLICE_F_GIFT
+)
diff --git a/pkg/abi/linux/tcp.go b/pkg/abi/linux/tcp.go
new file mode 100644
index 000000000..174d470e2
--- /dev/null
+++ b/pkg/abi/linux/tcp.go
@@ -0,0 +1,60 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Socket options from uapi/linux/tcp.h.
+const (
+ TCP_NODELAY = 1
+ TCP_MAXSEG = 2
+ TCP_CORK = 3
+ TCP_KEEPIDLE = 4
+ TCP_KEEPINTVL = 5
+ TCP_KEEPCNT = 6
+ TCP_SYNCNT = 7
+ TCP_LINGER2 = 8
+ TCP_DEFER_ACCEPT = 9
+ TCP_WINDOW_CLAMP = 10
+ TCP_INFO = 11
+ TCP_QUICKACK = 12
+ TCP_CONGESTION = 13
+ TCP_MD5SIG = 14
+ TCP_THIN_LINEAR_TIMEOUTS = 16
+ TCP_THIN_DUPACK = 17
+ TCP_USER_TIMEOUT = 18
+ TCP_REPAIR = 19
+ TCP_REPAIR_QUEUE = 20
+ TCP_QUEUE_SEQ = 21
+ TCP_REPAIR_OPTIONS = 22
+ TCP_FASTOPEN = 23
+ TCP_TIMESTAMP = 24
+ TCP_NOTSENT_LOWAT = 25
+ TCP_CC_INFO = 26
+ TCP_SAVE_SYN = 27
+ TCP_SAVED_SYN = 28
+ TCP_REPAIR_WINDOW = 29
+ TCP_FASTOPEN_CONNECT = 30
+ TCP_ULP = 31
+ TCP_MD5SIG_EXT = 32
+ TCP_FASTOPEN_KEY = 33
+ TCP_FASTOPEN_NO_COOKIE = 34
+ TCP_ZEROCOPY_RECEIVE = 35
+ TCP_INQ = 36
+)
+
+// Socket constants from include/net/tcp.h.
+const (
+ MAX_TCP_KEEPIDLE = 32767
+ MAX_TCP_KEEPINTVL = 32767
+)
diff --git a/pkg/abi/linux/time.go b/pkg/abi/linux/time.go
new file mode 100644
index 000000000..e6860ed49
--- /dev/null
+++ b/pkg/abi/linux/time.go
@@ -0,0 +1,270 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "math"
+ "time"
+)
+
+const (
+ // ClockTick is the length of time represented by a single clock tick, as
+ // used by times(2) and /proc/[pid]/stat.
+ ClockTick = time.Second / CLOCKS_PER_SEC
+
+ // CLOCKS_PER_SEC is the number of ClockTicks per second.
+ //
+ // Linux defines this to be 100 on most architectures, irrespective of
+ // CONFIG_HZ. Userspace obtains the value through sysconf(_SC_CLK_TCK),
+ // which uses the AT_CLKTCK entry in the auxiliary vector if one is
+ // provided, and assumes 100 otherwise (glibc:
+ // sysdeps/posix/sysconf.c:__sysconf() =>
+ // sysdeps/unix/sysv/linux/getclktck.c, elf/dl-support.c:_dl_aux_init()).
+ //
+ // Not to be confused with POSIX CLOCKS_PER_SEC, as used by clock(3); "XSI
+ // requires that [POSIX] CLOCKS_PER_SEC equals 1000000 independent of the
+ // actual resolution" - clock(3).
+ CLOCKS_PER_SEC = 100
+)
+
+// CPU clock types for use with clock_gettime(2) et al.
+//
+// The 29 most significant bits of a 32 bit clock ID are either a PID or a FD.
+//
+// Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
+//
+// Bit 2 indicates whether a cpu clock refers to a thread or a process.
+const (
+ CPUCLOCK_PROF = 0
+ CPUCLOCK_VIRT = 1
+ CPUCLOCK_SCHED = 2
+ CPUCLOCK_MAX = 3
+ CLOCKFD = CPUCLOCK_MAX
+
+ CPUCLOCK_CLOCK_MASK = 3
+ CPUCLOCK_PERTHREAD_MASK = 4
+)
+
+// Clock identifiers for use with clock_gettime(2), clock_getres(2),
+// clock_nanosleep(2).
+const (
+ CLOCK_REALTIME = 0
+ CLOCK_MONOTONIC = 1
+ CLOCK_PROCESS_CPUTIME_ID = 2
+ CLOCK_THREAD_CPUTIME_ID = 3
+ CLOCK_MONOTONIC_RAW = 4
+ CLOCK_REALTIME_COARSE = 5
+ CLOCK_MONOTONIC_COARSE = 6
+ CLOCK_BOOTTIME = 7
+ CLOCK_REALTIME_ALARM = 8
+ CLOCK_BOOTTIME_ALARM = 9
+)
+
+// Flags for clock_nanosleep(2).
+const (
+ TIMER_ABSTIME = 1
+)
+
+// Flags for timerfd syscalls (timerfd_create(2), timerfd_settime(2)).
+const (
+ // TFD_CLOEXEC is a timerfd_create flag.
+ TFD_CLOEXEC = O_CLOEXEC
+
+ // TFD_NONBLOCK is a timerfd_create flag.
+ TFD_NONBLOCK = O_NONBLOCK
+
+ // TFD_TIMER_ABSTIME is a timerfd_settime flag.
+ TFD_TIMER_ABSTIME = 1
+)
+
+// The safe number of seconds you can represent by int64.
+const maxSecInDuration = math.MaxInt64 / int64(time.Second)
+
+// TimeT represents time_t in <time.h>. It represents time in seconds.
+type TimeT int64
+
+// NsecToTimeT translates nanoseconds to TimeT (seconds).
+func NsecToTimeT(nsec int64) TimeT {
+ return TimeT(nsec / 1e9)
+}
+
+// Timespec represents struct timespec in <time.h>.
+//
+// +marshal
+type Timespec struct {
+ Sec int64
+ Nsec int64
+}
+
+// Unix returns the second and nanosecond.
+func (ts Timespec) Unix() (sec int64, nsec int64) {
+ return int64(ts.Sec), int64(ts.Nsec)
+}
+
+// ToTime returns the Go time.Time representation.
+func (ts Timespec) ToTime() time.Time {
+ return time.Unix(ts.Sec, ts.Nsec)
+}
+
+// ToNsec returns the nanosecond representation.
+func (ts Timespec) ToNsec() int64 {
+ return int64(ts.Sec)*1e9 + int64(ts.Nsec)
+}
+
+// ToNsecCapped returns the safe nanosecond representation.
+func (ts Timespec) ToNsecCapped() int64 {
+ if ts.Sec > maxSecInDuration {
+ return math.MaxInt64
+ }
+ return ts.ToNsec()
+}
+
+// ToDuration returns the safe nanosecond representation as time.Duration.
+func (ts Timespec) ToDuration() time.Duration {
+ return time.Duration(ts.ToNsecCapped())
+}
+
+// Valid returns whether the timespec contains valid values.
+func (ts Timespec) Valid() bool {
+ return !(ts.Sec < 0 || ts.Nsec < 0 || ts.Nsec >= int64(time.Second))
+}
+
+// NsecToTimespec translates nanoseconds to Timespec.
+func NsecToTimespec(nsec int64) (ts Timespec) {
+ ts.Sec = nsec / 1e9
+ ts.Nsec = nsec % 1e9
+ return
+}
+
+// DurationToTimespec translates time.Duration to Timespec.
+func DurationToTimespec(dur time.Duration) Timespec {
+ return NsecToTimespec(dur.Nanoseconds())
+}
+
+// SizeOfTimeval is the size of a Timeval struct in bytes.
+const SizeOfTimeval = 16
+
+// Timeval represents struct timeval in <time.h>.
+//
+// +marshal
+type Timeval struct {
+ Sec int64
+ Usec int64
+}
+
+// ToNsecCapped returns the safe nanosecond representation.
+func (tv Timeval) ToNsecCapped() int64 {
+ if tv.Sec > maxSecInDuration {
+ return math.MaxInt64
+ }
+ return int64(tv.Sec)*1e9 + int64(tv.Usec)*1e3
+}
+
+// ToDuration returns the safe nanosecond representation as a time.Duration.
+func (tv Timeval) ToDuration() time.Duration {
+ return time.Duration(tv.ToNsecCapped())
+}
+
+// ToTime returns the Go time.Time representation.
+func (tv Timeval) ToTime() time.Time {
+ return time.Unix(tv.Sec, tv.Usec*1e3)
+}
+
+// NsecToTimeval translates nanosecond to Timeval.
+func NsecToTimeval(nsec int64) (tv Timeval) {
+ nsec += 999 // round up to microsecond
+ tv.Sec = nsec / 1e9
+ tv.Usec = nsec % 1e9 / 1e3
+ return
+}
+
+// DurationToTimeval translates time.Duration to Timeval.
+func DurationToTimeval(dur time.Duration) Timeval {
+ return NsecToTimeval(dur.Nanoseconds())
+}
+
+// Itimerspec represents struct itimerspec in <time.h>.
+type Itimerspec struct {
+ Interval Timespec
+ Value Timespec
+}
+
+// ItimerVal mimics the following struct in <sys/time.h>
+// struct itimerval {
+// struct timeval it_interval; /* next value */
+// struct timeval it_value; /* current value */
+// };
+type ItimerVal struct {
+ Interval Timeval
+ Value Timeval
+}
+
+// ClockT represents type clock_t.
+type ClockT int64
+
+// ClockTFromDuration converts time.Duration to clock_t.
+func ClockTFromDuration(d time.Duration) ClockT {
+ return ClockT(d / ClockTick)
+}
+
+// Tms represents struct tms, used by times(2).
+type Tms struct {
+ UTime ClockT
+ STime ClockT
+ CUTime ClockT
+ CSTime ClockT
+}
+
+// TimerID represents type timer_t, which identifies a POSIX per-process
+// interval timer.
+type TimerID int32
+
+// StatxTimestamp represents struct statx_timestamp.
+//
+// +marshal
+type StatxTimestamp struct {
+ Sec int64
+ Nsec uint32
+ _ int32
+}
+
+// ToNsec returns the nanosecond representation.
+func (sxts StatxTimestamp) ToNsec() int64 {
+ return int64(sxts.Sec)*1e9 + int64(sxts.Nsec)
+}
+
+// ToNsecCapped returns the safe nanosecond representation.
+func (sxts StatxTimestamp) ToNsecCapped() int64 {
+ if sxts.Sec > maxSecInDuration {
+ return math.MaxInt64
+ }
+ return sxts.ToNsec()
+}
+
+// NsecToStatxTimestamp translates nanoseconds to StatxTimestamp.
+func NsecToStatxTimestamp(nsec int64) (ts StatxTimestamp) {
+ return StatxTimestamp{
+ Sec: nsec / 1e9,
+ Nsec: uint32(nsec % 1e9),
+ }
+}
+
+// Utime represents struct utimbuf used by utimes(2).
+//
+// +marshal
+type Utime struct {
+ Actime int64
+ Modtime int64
+}
diff --git a/pkg/abi/linux/timer.go b/pkg/abi/linux/timer.go
new file mode 100644
index 000000000..e32d09e10
--- /dev/null
+++ b/pkg/abi/linux/timer.go
@@ -0,0 +1,23 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// itimer types for getitimer(2) and setitimer(2), from
+// include/uapi/linux/time.h.
+const (
+ ITIMER_REAL = 0
+ ITIMER_VIRTUAL = 1
+ ITIMER_PROF = 2
+)
diff --git a/pkg/abi/linux/tty.go b/pkg/abi/linux/tty.go
new file mode 100644
index 000000000..8ac02aee8
--- /dev/null
+++ b/pkg/abi/linux/tty.go
@@ -0,0 +1,344 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+const (
+ // NumControlCharacters is the number of control characters in Termios.
+ NumControlCharacters = 19
+ // disabledChar is used to indicate that a control character is
+ // disabled.
+ disabledChar = 0
+)
+
+// Winsize is struct winsize, defined in uapi/asm-generic/termios.h.
+type Winsize struct {
+ Row uint16
+ Col uint16
+ Xpixel uint16
+ Ypixel uint16
+}
+
+// Termios is struct termios, defined in uapi/asm-generic/termbits.h.
+type Termios struct {
+ InputFlags uint32
+ OutputFlags uint32
+ ControlFlags uint32
+ LocalFlags uint32
+ LineDiscipline uint8
+ ControlCharacters [NumControlCharacters]uint8
+}
+
+// KernelTermios is struct ktermios/struct termios2, defined in
+// uapi/asm-generic/termbits.h.
+//
+// +stateify savable
+type KernelTermios struct {
+ InputFlags uint32
+ OutputFlags uint32
+ ControlFlags uint32
+ LocalFlags uint32
+ LineDiscipline uint8
+ ControlCharacters [NumControlCharacters]uint8
+ InputSpeed uint32
+ OutputSpeed uint32
+}
+
+// IEnabled returns whether flag is enabled in termios input flags.
+func (t *KernelTermios) IEnabled(flag uint32) bool {
+ return t.InputFlags&flag == flag
+}
+
+// OEnabled returns whether flag is enabled in termios output flags.
+func (t *KernelTermios) OEnabled(flag uint32) bool {
+ return t.OutputFlags&flag == flag
+}
+
+// CEnabled returns whether flag is enabled in termios control flags.
+func (t *KernelTermios) CEnabled(flag uint32) bool {
+ return t.ControlFlags&flag == flag
+}
+
+// LEnabled returns whether flag is enabled in termios local flags.
+func (t *KernelTermios) LEnabled(flag uint32) bool {
+ return t.LocalFlags&flag == flag
+}
+
+// ToTermios copies fields that are shared with Termios into a new Termios
+// struct.
+func (t *KernelTermios) ToTermios() Termios {
+ return Termios{
+ InputFlags: t.InputFlags,
+ OutputFlags: t.OutputFlags,
+ ControlFlags: t.ControlFlags,
+ LocalFlags: t.LocalFlags,
+ LineDiscipline: t.LineDiscipline,
+ ControlCharacters: t.ControlCharacters,
+ }
+}
+
+// FromTermios copies fields that are shared with Termios into this
+// KernelTermios struct.
+func (t *KernelTermios) FromTermios(term Termios) {
+ t.InputFlags = term.InputFlags
+ t.OutputFlags = term.OutputFlags
+ t.ControlFlags = term.ControlFlags
+ t.LocalFlags = term.LocalFlags
+ t.LineDiscipline = term.LineDiscipline
+ t.ControlCharacters = term.ControlCharacters
+}
+
+// IsTerminating returns whether c is a line terminating character.
+func (t *KernelTermios) IsTerminating(cBytes []byte) bool {
+ // All terminating characters are 1 byte.
+ if len(cBytes) != 1 {
+ return false
+ }
+ c := cBytes[0]
+
+ // Is this the user-set EOF character?
+ if t.IsEOF(c) {
+ return true
+ }
+
+ switch c {
+ case disabledChar:
+ return false
+ case '\n', t.ControlCharacters[VEOL]:
+ return true
+ case t.ControlCharacters[VEOL2]:
+ return t.LEnabled(IEXTEN)
+ }
+ return false
+}
+
+// IsEOF returns whether c is the EOF character.
+func (t *KernelTermios) IsEOF(c byte) bool {
+ return c == t.ControlCharacters[VEOF] && t.ControlCharacters[VEOF] != disabledChar
+}
+
+// Input flags.
+const (
+ IGNBRK = 0000001
+ BRKINT = 0000002
+ IGNPAR = 0000004
+ PARMRK = 0000010
+ INPCK = 0000020
+ ISTRIP = 0000040
+ INLCR = 0000100
+ IGNCR = 0000200
+ ICRNL = 0000400
+ IUCLC = 0001000
+ IXON = 0002000
+ IXANY = 0004000
+ IXOFF = 0010000
+ IMAXBEL = 0020000
+ IUTF8 = 0040000
+)
+
+// Output flags.
+const (
+ OPOST = 0000001
+ OLCUC = 0000002
+ ONLCR = 0000004
+ OCRNL = 0000010
+ ONOCR = 0000020
+ ONLRET = 0000040
+ OFILL = 0000100
+ OFDEL = 0000200
+ NLDLY = 0000400
+ NL0 = 0000000
+ NL1 = 0000400
+ CRDLY = 0003000
+ CR0 = 0000000
+ CR1 = 0001000
+ CR2 = 0002000
+ CR3 = 0003000
+ TABDLY = 0014000
+ TAB0 = 0000000
+ TAB1 = 0004000
+ TAB2 = 0010000
+ TAB3 = 0014000
+ XTABS = 0014000
+ BSDLY = 0020000
+ BS0 = 0000000
+ BS1 = 0020000
+ VTDLY = 0040000
+ VT0 = 0000000
+ VT1 = 0040000
+ FFDLY = 0100000
+ FF0 = 0000000
+ FF1 = 0100000
+)
+
+// Control flags.
+const (
+ CBAUD = 0010017
+ B0 = 0000000
+ B50 = 0000001
+ B75 = 0000002
+ B110 = 0000003
+ B134 = 0000004
+ B150 = 0000005
+ B200 = 0000006
+ B300 = 0000007
+ B600 = 0000010
+ B1200 = 0000011
+ B1800 = 0000012
+ B2400 = 0000013
+ B4800 = 0000014
+ B9600 = 0000015
+ B19200 = 0000016
+ B38400 = 0000017
+ EXTA = B19200
+ EXTB = B38400
+ CSIZE = 0000060
+ CS5 = 0000000
+ CS6 = 0000020
+ CS7 = 0000040
+ CS8 = 0000060
+ CSTOPB = 0000100
+ CREAD = 0000200
+ PARENB = 0000400
+ PARODD = 0001000
+ HUPCL = 0002000
+ CLOCAL = 0004000
+ CBAUDEX = 0010000
+ BOTHER = 0010000
+ B57600 = 0010001
+ B115200 = 0010002
+ B230400 = 0010003
+ B460800 = 0010004
+ B500000 = 0010005
+ B576000 = 0010006
+ B921600 = 0010007
+ B1000000 = 0010010
+ B1152000 = 0010011
+ B1500000 = 0010012
+ B2000000 = 0010013
+ B2500000 = 0010014
+ B3000000 = 0010015
+ B3500000 = 0010016
+ B4000000 = 0010017
+ CIBAUD = 002003600000
+ CMSPAR = 010000000000
+ CRTSCTS = 020000000000
+
+ // IBSHIFT is the shift from CBAUD to CIBAUD.
+ IBSHIFT = 16
+)
+
+// Local flags.
+const (
+ ISIG = 0000001
+ ICANON = 0000002
+ XCASE = 0000004
+ ECHO = 0000010
+ ECHOE = 0000020
+ ECHOK = 0000040
+ ECHONL = 0000100
+ NOFLSH = 0000200
+ TOSTOP = 0000400
+ ECHOCTL = 0001000
+ ECHOPRT = 0002000
+ ECHOKE = 0004000
+ FLUSHO = 0010000
+ PENDIN = 0040000
+ IEXTEN = 0100000
+ EXTPROC = 0200000
+)
+
+// Control Character indices.
+const (
+ VINTR = 0
+ VQUIT = 1
+ VERASE = 2
+ VKILL = 3
+ VEOF = 4
+ VTIME = 5
+ VMIN = 6
+ VSWTC = 7
+ VSTART = 8
+ VSTOP = 9
+ VSUSP = 10
+ VEOL = 11
+ VREPRINT = 12
+ VDISCARD = 13
+ VWERASE = 14
+ VLNEXT = 15
+ VEOL2 = 16
+)
+
+// ControlCharacter returns the termios-style control character for the passed
+// character.
+//
+// e.g., for Ctrl-C, i.e., ^C, call ControlCharacter('C').
+//
+// Standard control characters are ASCII bytes 0 through 31.
+func ControlCharacter(c byte) uint8 {
+ // A is 1, B is 2, etc.
+ return uint8(c - 'A' + 1)
+}
+
+// DefaultControlCharacters is the default set of Termios control characters.
+var DefaultControlCharacters = [NumControlCharacters]uint8{
+ ControlCharacter('C'), // VINTR = ^C
+ ControlCharacter('\\'), // VQUIT = ^\
+ '\x7f', // VERASE = DEL
+ ControlCharacter('U'), // VKILL = ^U
+ ControlCharacter('D'), // VEOF = ^D
+ 0, // VTIME
+ 1, // VMIN
+ 0, // VSWTC
+ ControlCharacter('Q'), // VSTART = ^Q
+ ControlCharacter('S'), // VSTOP = ^S
+ ControlCharacter('Z'), // VSUSP = ^Z
+ 0, // VEOL
+ ControlCharacter('R'), // VREPRINT = ^R
+ ControlCharacter('O'), // VDISCARD = ^O
+ ControlCharacter('W'), // VWERASE = ^W
+ ControlCharacter('V'), // VLNEXT = ^V
+ 0, // VEOL2
+}
+
+// MasterTermios is the terminal configuration of the master end of a Unix98
+// pseudoterminal.
+var MasterTermios = KernelTermios{
+ ControlFlags: B38400 | CS8 | CREAD,
+ ControlCharacters: DefaultControlCharacters,
+ InputSpeed: 38400,
+ OutputSpeed: 38400,
+}
+
+// DefaultSlaveTermios is the default terminal configuration of the slave end
+// of a Unix98 pseudoterminal.
+var DefaultSlaveTermios = KernelTermios{
+ InputFlags: ICRNL | IXON,
+ OutputFlags: OPOST | ONLCR,
+ ControlFlags: B38400 | CS8 | CREAD,
+ LocalFlags: ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN,
+ ControlCharacters: DefaultControlCharacters,
+ InputSpeed: 38400,
+ OutputSpeed: 38400,
+}
+
+// WindowSize corresponds to struct winsize defined in
+// include/uapi/asm-generic/termios.h.
+//
+// +stateify savable
+type WindowSize struct {
+ Rows uint16
+ Cols uint16
+ _ [4]byte // Padding for 2 unused shorts.
+}
diff --git a/pkg/abi/linux/uio.go b/pkg/abi/linux/uio.go
new file mode 100644
index 000000000..1fd1e9802
--- /dev/null
+++ b/pkg/abi/linux/uio.go
@@ -0,0 +1,18 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// UIO_MAXIOV is the maximum number of struct iovecs in a struct iovec array.
+const UIO_MAXIOV = 1024
diff --git a/pkg/abi/linux/utsname.go b/pkg/abi/linux/utsname.go
new file mode 100644
index 000000000..60f220a67
--- /dev/null
+++ b/pkg/abi/linux/utsname.go
@@ -0,0 +1,49 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "bytes"
+ "fmt"
+)
+
+const (
+ // UTSLen is the maximum length of strings contained in fields of
+ // UtsName.
+ UTSLen = 64
+)
+
+// UtsName represents struct utsname, the struct returned by uname(2).
+type UtsName struct {
+ Sysname [UTSLen + 1]byte
+ Nodename [UTSLen + 1]byte
+ Release [UTSLen + 1]byte
+ Version [UTSLen + 1]byte
+ Machine [UTSLen + 1]byte
+ Domainname [UTSLen + 1]byte
+}
+
+// utsNameString converts a UtsName entry to a string without NULs.
+func utsNameString(s [UTSLen + 1]byte) string {
+ // The NUL bytes will remain even in a cast to string. We must
+ // explicitly strip them.
+ return string(bytes.TrimRight(s[:], "\x00"))
+}
+
+func (u UtsName) String() string {
+ return fmt.Sprintf("{Sysname: %s, Nodename: %s, Release: %s, Version: %s, Machine: %s, Domainname: %s}",
+ utsNameString(u.Sysname), utsNameString(u.Nodename), utsNameString(u.Release),
+ utsNameString(u.Version), utsNameString(u.Machine), utsNameString(u.Domainname))
+}
diff --git a/pkg/abi/linux/wait.go b/pkg/abi/linux/wait.go
new file mode 100644
index 000000000..4bdc280d1
--- /dev/null
+++ b/pkg/abi/linux/wait.go
@@ -0,0 +1,36 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Options for waitpid(2), wait4(2), and/or waitid(2), from
+// include/uapi/linux/wait.h.
+const (
+ WNOHANG = 0x00000001
+ WUNTRACED = 0x00000002
+ WSTOPPED = WUNTRACED
+ WEXITED = 0x00000004
+ WCONTINUED = 0x00000008
+ WNOWAIT = 0x01000000
+ WNOTHREAD = 0x20000000
+ WALL = 0x40000000
+ WCLONE = 0x80000000
+)
+
+// ID types for waitid(2), from include/uapi/linux/wait.h.
+const (
+ P_ALL = 0x0
+ P_PID = 0x1
+ P_PGID = 0x2
+)
diff --git a/pkg/abi/linux/xattr.go b/pkg/abi/linux/xattr.go
new file mode 100644
index 000000000..99180b208
--- /dev/null
+++ b/pkg/abi/linux/xattr.go
@@ -0,0 +1,28 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Constants for extended attributes.
+const (
+ XATTR_NAME_MAX = 255
+ XATTR_SIZE_MAX = 65536
+ XATTR_LIST_MAX = 65536
+
+ XATTR_CREATE = 1
+ XATTR_REPLACE = 2
+
+ XATTR_USER_PREFIX = "user."
+ XATTR_USER_PREFIX_LEN = len(XATTR_USER_PREFIX)
+)