diff options
Diffstat (limited to 'pkg/abi')
61 files changed, 6017 insertions, 0 deletions
diff --git a/pkg/abi/BUILD b/pkg/abi/BUILD new file mode 100644 index 000000000..839f822eb --- /dev/null +++ b/pkg/abi/BUILD @@ -0,0 +1,13 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "abi", + srcs = [ + "abi.go", + "abi_linux.go", + "flag.go", + ], + visibility = ["//:sandbox"], +) diff --git a/pkg/abi/abi.go b/pkg/abi/abi.go new file mode 100644 index 000000000..e6be93c3a --- /dev/null +++ b/pkg/abi/abi.go @@ -0,0 +1,45 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package abi describes the interface between a kernel and userspace. +package abi + +import ( + "fmt" +) + +// OS describes the target operating system for an ABI. +// +// Note that OS is architecture-independent. The details of the OS ABI will +// vary between architectures. +type OS int + +const ( + // Linux is the Linux ABI. + Linux OS = iota +) + +// String implements fmt.Stringer. +func (o OS) String() string { + switch o { + case Linux: + return "linux" + default: + return fmt.Sprintf("OS(%d)", o) + } +} + +// ABI is an interface that defines OS-specific interactions. +type ABI interface { +} diff --git a/pkg/abi/abi_linux.go b/pkg/abi/abi_linux.go new file mode 100644 index 000000000..3059479bd --- /dev/null +++ b/pkg/abi/abi_linux.go @@ -0,0 +1,20 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package abi + +// Host specifies the host ABI. +const Host = Linux diff --git a/pkg/abi/flag.go b/pkg/abi/flag.go new file mode 100644 index 000000000..dcdd66d4e --- /dev/null +++ b/pkg/abi/flag.go @@ -0,0 +1,85 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package abi + +import ( + "fmt" + "math" + "strconv" + "strings" +) + +// A FlagSet is a slice of bit-flags and their name. +type FlagSet []struct { + Flag uint64 + Name string +} + +// Parse returns a pretty version of val, using the flag names for known flags. +// Unknown flags remain numeric. +func (s FlagSet) Parse(val uint64) string { + var flags []string + + for _, f := range s { + if val&f.Flag == f.Flag { + flags = append(flags, f.Name) + val &^= f.Flag + } + } + + if val != 0 { + flags = append(flags, "0x"+strconv.FormatUint(val, 16)) + } + + if len(flags) == 0 { + // Prefer 0 to an empty string. + return "0x0" + } + + return strings.Join(flags, "|") +} + +// ValueSet is a map of syscall values to their name. Parse will use the name +// or the value if unknown. +type ValueSet map[uint64]string + +// Parse returns the name of the value associated with `val`. Unknown values +// are converted to hex. +func (s ValueSet) Parse(val uint64) string { + if v, ok := s[val]; ok { + return v + } + return fmt.Sprintf("%#x", val) +} + +// ParseDecimal returns the name of the value associated with `val`. Unknown +// values are converted to decimal. +func (s ValueSet) ParseDecimal(val uint64) string { + if v, ok := s[val]; ok { + return v + } + return fmt.Sprintf("%d", val) +} + +// ParseName returns the flag value associated with 'name'. Returns false +// if no value is found. +func (s ValueSet) ParseName(name string) (uint64, bool) { + for k, v := range s { + if v == name { + return k, true + } + } + return math.MaxUint64, false +} diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD new file mode 100644 index 000000000..114b516e2 --- /dev/null +++ b/pkg/abi/linux/BUILD @@ -0,0 +1,85 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +# Package linux contains the constants and types needed to interface with a +# Linux kernel. It should be used instead of syscall or golang.org/x/sys/unix +# when the host OS may not be Linux. + +package(licenses = ["notice"]) + +go_library( + name = "linux", + srcs = [ + "aio.go", + "arch_amd64.go", + "audit.go", + "bpf.go", + "capability.go", + "clone.go", + "dev.go", + "elf.go", + "epoll.go", + "epoll_amd64.go", + "epoll_arm64.go", + "errors.go", + "eventfd.go", + "exec.go", + "fcntl.go", + "file.go", + "file_amd64.go", + "file_arm64.go", + "fs.go", + "futex.go", + "inotify.go", + "ioctl.go", + "ioctl_tun.go", + "ip.go", + "ipc.go", + "limits.go", + "linux.go", + "mm.go", + "netdevice.go", + "netfilter.go", + "netlink.go", + "netlink_route.go", + "poll.go", + "prctl.go", + "ptrace.go", + "ptrace_amd64.go", + "ptrace_arm64.go", + "rseq.go", + "rusage.go", + "sched.go", + "seccomp.go", + "sem.go", + "shm.go", + "signal.go", + "signalfd.go", + "socket.go", + "splice.go", + "tcp.go", + "time.go", + "timer.go", + "tty.go", + "uio.go", + "utsname.go", + "wait.go", + "xattr.go", + ], + marshal = True, + visibility = ["//visibility:public"], + deps = [ + "//pkg/abi", + "//pkg/binary", + "//pkg/bits", + ], +) + +go_test( + name = "linux_test", + size = "small", + srcs = ["netfilter_test.go"], + library = ":linux", + deps = [ + "//pkg/binary", + ], +) diff --git a/pkg/abi/linux/aio.go b/pkg/abi/linux/aio.go new file mode 100644 index 000000000..3c6e0079d --- /dev/null +++ b/pkg/abi/linux/aio.go @@ -0,0 +1,20 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +const ( + // AIORingSize is sizeof(struct aio_ring). + AIORingSize = 32 +) diff --git a/pkg/abi/linux/arch_amd64.go b/pkg/abi/linux/arch_amd64.go new file mode 100644 index 000000000..0be31e755 --- /dev/null +++ b/pkg/abi/linux/arch_amd64.go @@ -0,0 +1,23 @@ +// Copyright 2020 The gVisor Authors. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +package linux + +// Start and end addresses of the vsyscall page. +const ( + VSyscallStartAddr uint64 = 0xffffffffff600000 + VSyscallEndAddr uint64 = 0xffffffffff601000 +) diff --git a/pkg/abi/linux/audit.go b/pkg/abi/linux/audit.go new file mode 100644 index 000000000..6cca69af9 --- /dev/null +++ b/pkg/abi/linux/audit.go @@ -0,0 +1,23 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Audit numbers identify different system call APIs, from <uapi/linux/audit.h> +const ( + // AUDIT_ARCH_X86_64 identifies AMD64. + AUDIT_ARCH_X86_64 = 0xc000003e + // AUDIT_ARCH_AARCH64 identifies ARM64. + AUDIT_ARCH_AARCH64 = 0xc00000b7 +) diff --git a/pkg/abi/linux/bpf.go b/pkg/abi/linux/bpf.go new file mode 100644 index 000000000..aa3d3ce70 --- /dev/null +++ b/pkg/abi/linux/bpf.go @@ -0,0 +1,34 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// BPFInstruction is a raw BPF virtual machine instruction. +// +// +stateify savable +type BPFInstruction struct { + // OpCode is the operation to execute. + OpCode uint16 + + // JumpIfTrue is the number of instructions to skip if OpCode is a + // conditional instruction and the condition is true. + JumpIfTrue uint8 + + // JumpIfFalse is the number of instructions to skip if OpCode is a + // conditional instruction and the condition is false. + JumpIfFalse uint8 + + // K is a constant parameter. The meaning depends on the value of OpCode. + K uint32 +} diff --git a/pkg/abi/linux/capability.go b/pkg/abi/linux/capability.go new file mode 100644 index 000000000..965f74663 --- /dev/null +++ b/pkg/abi/linux/capability.go @@ -0,0 +1,190 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// A Capability represents the ability to perform a privileged operation. +type Capability int + +// Capabilities defined by Linux. Taken from the kernel's +// include/uapi/linux/capability.h. See capabilities(7) or that file for more +// detailed capability descriptions. +const ( + CAP_CHOWN = Capability(0) + CAP_DAC_OVERRIDE = Capability(1) + CAP_DAC_READ_SEARCH = Capability(2) + CAP_FOWNER = Capability(3) + CAP_FSETID = Capability(4) + CAP_KILL = Capability(5) + CAP_SETGID = Capability(6) + CAP_SETUID = Capability(7) + CAP_SETPCAP = Capability(8) + CAP_LINUX_IMMUTABLE = Capability(9) + CAP_NET_BIND_SERVICE = Capability(10) + CAP_NET_BROADCAST = Capability(11) + CAP_NET_ADMIN = Capability(12) + CAP_NET_RAW = Capability(13) + CAP_IPC_LOCK = Capability(14) + CAP_IPC_OWNER = Capability(15) + CAP_SYS_MODULE = Capability(16) + CAP_SYS_RAWIO = Capability(17) + CAP_SYS_CHROOT = Capability(18) + CAP_SYS_PTRACE = Capability(19) + CAP_SYS_PACCT = Capability(20) + CAP_SYS_ADMIN = Capability(21) + CAP_SYS_BOOT = Capability(22) + CAP_SYS_NICE = Capability(23) + CAP_SYS_RESOURCE = Capability(24) + CAP_SYS_TIME = Capability(25) + CAP_SYS_TTY_CONFIG = Capability(26) + CAP_MKNOD = Capability(27) + CAP_LEASE = Capability(28) + CAP_AUDIT_WRITE = Capability(29) + CAP_AUDIT_CONTROL = Capability(30) + CAP_SETFCAP = Capability(31) + CAP_MAC_OVERRIDE = Capability(32) + CAP_MAC_ADMIN = Capability(33) + CAP_SYSLOG = Capability(34) + CAP_WAKE_ALARM = Capability(35) + CAP_BLOCK_SUSPEND = Capability(36) + CAP_AUDIT_READ = Capability(37) + + // CAP_LAST_CAP is the highest-numbered capability. + // Seach for "CAP_LAST_CAP" to find other places that need to change. + CAP_LAST_CAP = CAP_AUDIT_READ +) + +// Ok returns true if cp is a supported capability. +func (cp Capability) Ok() bool { + return cp >= 0 && cp <= CAP_LAST_CAP +} + +// String returns the capability name. +func (cp Capability) String() string { + switch cp { + case CAP_CHOWN: + return "CAP_CHOWN" + case CAP_DAC_OVERRIDE: + return "CAP_DAC_OVERRIDE" + case CAP_DAC_READ_SEARCH: + return "CAP_DAC_READ_SEARCH" + case CAP_FOWNER: + return "CAP_FOWNER" + case CAP_FSETID: + return "CAP_FSETID" + case CAP_KILL: + return "CAP_KILL" + case CAP_SETGID: + return "CAP_SETGID" + case CAP_SETUID: + return "CAP_SETUID" + case CAP_SETPCAP: + return "CAP_SETPCAP" + case CAP_LINUX_IMMUTABLE: + return "CAP_LINUX_IMMUTABLE" + case CAP_NET_BIND_SERVICE: + return "CAP_NET_BIND_SERVICE" + case CAP_NET_BROADCAST: + return "CAP_NET_BROADCAST" + case CAP_NET_ADMIN: + return "CAP_NET_ADMIN" + case CAP_NET_RAW: + return "CAP_NET_RAW" + case CAP_IPC_LOCK: + return "CAP_IPC_LOCK" + case CAP_IPC_OWNER: + return "CAP_IPC_OWNER" + case CAP_SYS_MODULE: + return "CAP_SYS_MODULE" + case CAP_SYS_RAWIO: + return "CAP_SYS_RAWIO" + case CAP_SYS_CHROOT: + return "CAP_SYS_CHROOT" + case CAP_SYS_PTRACE: + return "CAP_SYS_PTRACE" + case CAP_SYS_PACCT: + return "CAP_SYS_PACCT" + case CAP_SYS_ADMIN: + return "CAP_SYS_ADMIN" + case CAP_SYS_BOOT: + return "CAP_SYS_BOOT" + case CAP_SYS_NICE: + return "CAP_SYS_NICE" + case CAP_SYS_RESOURCE: + return "CAP_SYS_RESOURCE" + case CAP_SYS_TIME: + return "CAP_SYS_TIME" + case CAP_SYS_TTY_CONFIG: + return "CAP_SYS_TTY_CONFIG" + case CAP_MKNOD: + return "CAP_MKNOD" + case CAP_LEASE: + return "CAP_LEASE" + case CAP_AUDIT_WRITE: + return "CAP_AUDIT_WRITE" + case CAP_AUDIT_CONTROL: + return "CAP_AUDIT_CONTROL" + case CAP_SETFCAP: + return "CAP_SETFCAP" + case CAP_MAC_OVERRIDE: + return "CAP_MAC_OVERRIDE" + case CAP_MAC_ADMIN: + return "CAP_MAC_ADMIN" + case CAP_SYSLOG: + return "CAP_SYSLOG" + case CAP_WAKE_ALARM: + return "CAP_WAKE_ALARM" + case CAP_BLOCK_SUSPEND: + return "CAP_BLOCK_SUSPEND" + case CAP_AUDIT_READ: + return "CAP_AUDIT_READ" + default: + return "UNKNOWN" + } +} + +// Version numbers used by the capget/capset syscalls, defined in Linux's +// include/uapi/linux/capability.h. +const ( + // LINUX_CAPABILITY_VERSION_1 causes the data pointer to be + // interpreted as a pointer to a single cap_user_data_t. Since capability + // sets are 64 bits and the "capability sets" in cap_user_data_t are 32 + // bits only, this causes the upper 32 bits to be implicitly 0. + LINUX_CAPABILITY_VERSION_1 = 0x19980330 + + // LINUX_CAPABILITY_VERSION_2 and LINUX_CAPABILITY_VERSION_3 cause the + // data pointer to be interpreted as a pointer to an array of 2 + // cap_user_data_t, using the second to store the 32 MSB of each capability + // set. Versions 2 and 3 are identical, but Linux printk's a warning on use + // of version 2 due to a userspace API defect. + LINUX_CAPABILITY_VERSION_2 = 0x20071026 + LINUX_CAPABILITY_VERSION_3 = 0x20080522 + + // HighestCapabilityVersion is the highest supported + // LINUX_CAPABILITY_VERSION_* version. + HighestCapabilityVersion = LINUX_CAPABILITY_VERSION_3 +) + +// CapUserHeader is equivalent to Linux's cap_user_header_t. +type CapUserHeader struct { + Version uint32 + Pid int32 +} + +// CapUserData is equivalent to Linux's cap_user_data_t. +type CapUserData struct { + Effective uint32 + Permitted uint32 + Inheritable uint32 +} diff --git a/pkg/abi/linux/clone.go b/pkg/abi/linux/clone.go new file mode 100644 index 000000000..c2cbfca5e --- /dev/null +++ b/pkg/abi/linux/clone.go @@ -0,0 +1,41 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Clone constants per clone(2). +const ( + CLONE_VM = 0x100 + CLONE_FS = 0x200 + CLONE_FILES = 0x400 + CLONE_SIGHAND = 0x800 + CLONE_PARENT = 0x8000 + CLONE_PTRACE = 0x2000 + CLONE_VFORK = 0x4000 + CLONE_THREAD = 0x10000 + CLONE_NEWNS = 0x20000 + CLONE_SYSVSEM = 0x40000 + CLONE_SETTLS = 0x80000 + CLONE_PARENT_SETTID = 0x100000 + CLONE_CHILD_CLEARTID = 0x200000 + CLONE_DETACHED = 0x400000 + CLONE_UNTRACED = 0x800000 + CLONE_CHILD_SETTID = 0x1000000 + CLONE_NEWUTS = 0x4000000 + CLONE_NEWIPC = 0x8000000 + CLONE_NEWUSER = 0x10000000 + CLONE_NEWPID = 0x20000000 + CLONE_NEWNET = 0x40000000 + CLONE_IO = 0x80000000 +) diff --git a/pkg/abi/linux/dev.go b/pkg/abi/linux/dev.go new file mode 100644 index 000000000..89f9a793f --- /dev/null +++ b/pkg/abi/linux/dev.go @@ -0,0 +1,58 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// MakeDeviceID encodes a major and minor device number into a single device ID. +// +// Format (see linux/kdev_t.h:new_encode_dev): +// +// Bits 7:0 - minor bits 7:0 +// Bits 19:8 - major bits 11:0 +// Bits 31:20 - minor bits 19:8 +func MakeDeviceID(major uint16, minor uint32) uint32 { + return (minor & 0xff) | ((uint32(major) & 0xfff) << 8) | ((minor >> 8) << 20) +} + +// DecodeDeviceID decodes a device ID into major and minor device numbers. +func DecodeDeviceID(rdev uint32) (uint16, uint32) { + major := uint16((rdev >> 8) & 0xfff) + minor := (rdev & 0xff) | ((rdev >> 20) << 8) + return major, minor +} + +// Character device IDs. +// +// See Documentations/devices.txt and uapi/linux/major.h. +const ( + // MEM_MAJOR is the major device number for "memory" character devices. + MEM_MAJOR = 1 + + // TTYAUX_MAJOR is the major device number for alternate TTY devices. + TTYAUX_MAJOR = 5 + + // UNIX98_PTY_MASTER_MAJOR is the initial major device number for + // Unix98 PTY masters. + UNIX98_PTY_MASTER_MAJOR = 128 + + // UNIX98_PTY_SLAVE_MAJOR is the initial major device number for + // Unix98 PTY slaves. + UNIX98_PTY_SLAVE_MAJOR = 136 +) + +// Minor device numbers for TTYAUX_MAJOR. +const ( + // PTMX_MINOR is the minor device number for /dev/ptmx. + PTMX_MINOR = 2 +) diff --git a/pkg/abi/linux/elf.go b/pkg/abi/linux/elf.go new file mode 100644 index 000000000..7c9a02f20 --- /dev/null +++ b/pkg/abi/linux/elf.go @@ -0,0 +1,108 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Linux auxiliary vector entry types. +const ( + // AT_NULL is the end of the auxiliary vector. + AT_NULL = 0 + + // AT_IGNORE should be ignored. + AT_IGNORE = 1 + + // AT_EXECFD is the file descriptor of the program. + AT_EXECFD = 2 + + // AT_PHDR points to the program headers. + AT_PHDR = 3 + + // AT_PHENT is the size of a program header entry. + AT_PHENT = 4 + + // AT_PHNUM is the number of program headers. + AT_PHNUM = 5 + + // AT_PAGESZ is the system page size. + AT_PAGESZ = 6 + + // AT_BASE is the base address of the interpreter. + AT_BASE = 7 + + // AT_FLAGS are flags. + AT_FLAGS = 8 + + // AT_ENTRY is the program entry point. + AT_ENTRY = 9 + + // AT_NOTELF indicates that the program is not an ELF binary. + AT_NOTELF = 10 + + // AT_UID is the real UID. + AT_UID = 11 + + // AT_EUID is the effective UID. + AT_EUID = 12 + + // AT_GID is the real GID. + AT_GID = 13 + + // AT_EGID is the effective GID. + AT_EGID = 14 + + // AT_PLATFORM is a string identifying the CPU. + AT_PLATFORM = 15 + + // AT_HWCAP are arch-dependent CPU capabilities. + AT_HWCAP = 16 + + // AT_CLKTCK is the frequency used by times(2). + AT_CLKTCK = 17 + + // AT_SECURE indicate secure mode. + AT_SECURE = 23 + + // AT_BASE_PLATFORM is a string identifying the "real" platform. It may + // differ from AT_PLATFORM. + AT_BASE_PLATFORM = 24 + + // AT_RANDOM points to 16-bytes of random data. + AT_RANDOM = 25 + + // AT_HWCAP2 is an extension of AT_HWCAP. + AT_HWCAP2 = 26 + + // AT_EXECFN is the path used to execute the program. + AT_EXECFN = 31 + + // AT_SYSINFO_EHDR is the address of the VDSO. + AT_SYSINFO_EHDR = 33 +) + +// ELF ET_CORE and ptrace GETREGSET/SETREGSET register set types. +// +// See include/uapi/linux/elf.h. +const ( + // NT_PRSTATUS is for general purpose register. + NT_PRSTATUS = 0x1 + + // NT_PRFPREG is for float point register. + NT_PRFPREG = 0x2 + + // NT_X86_XSTATE is for x86 extended state using xsave. + NT_X86_XSTATE = 0x202 + + // NT_ARM_TLS is for ARM TLS register. + NT_ARM_TLS = 0x401 +) diff --git a/pkg/abi/linux/epoll.go b/pkg/abi/linux/epoll.go new file mode 100644 index 000000000..1121a1a92 --- /dev/null +++ b/pkg/abi/linux/epoll.go @@ -0,0 +1,62 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "gvisor.dev/gvisor/pkg/binary" +) + +// Event masks. +const ( + EPOLLIN = 0x1 + EPOLLPRI = 0x2 + EPOLLOUT = 0x4 + EPOLLERR = 0x8 + EPOLLHUP = 0x10 + EPOLLRDNORM = 0x40 + EPOLLRDBAND = 0x80 + EPOLLWRNORM = 0x100 + EPOLLWRBAND = 0x200 + EPOLLMSG = 0x400 + EPOLLRDHUP = 0x2000 +) + +// Per-file descriptor flags. +const ( + EPOLLEXCLUSIVE = 1 << 28 + EPOLLWAKEUP = 1 << 29 + EPOLLONESHOT = 1 << 30 + EPOLLET = 1 << 31 + + // EP_PRIVATE_BITS is fs/eventpoll.c:EP_PRIVATE_BITS, the set of all bits + // in an epoll event mask that correspond to flags rather than I/O events. + EP_PRIVATE_BITS = EPOLLEXCLUSIVE | EPOLLWAKEUP | EPOLLONESHOT | EPOLLET +) + +// Operation flags. +const ( + EPOLL_CLOEXEC = 0x80000 + EPOLL_NONBLOCK = 0x800 +) + +// Control operations. +const ( + EPOLL_CTL_ADD = 0x1 + EPOLL_CTL_DEL = 0x2 + EPOLL_CTL_MOD = 0x3 +) + +// SizeOfEpollEvent is the size of EpollEvent struct. +var SizeOfEpollEvent = int(binary.Size(EpollEvent{})) diff --git a/pkg/abi/linux/epoll_amd64.go b/pkg/abi/linux/epoll_amd64.go new file mode 100644 index 000000000..7e74b1143 --- /dev/null +++ b/pkg/abi/linux/epoll_amd64.go @@ -0,0 +1,29 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +package linux + +// EpollEvent is equivalent to struct epoll_event from epoll(2). +// +// +marshal slice:EpollEventSlice +type EpollEvent struct { + Events uint32 + // Linux makes struct epoll_event::data a __u64. We represent it as + // [2]int32 because, on amd64, Linux also makes struct epoll_event + // __attribute__((packed)), such that there is no padding between Events + // and Data. + Data [2]int32 +} diff --git a/pkg/abi/linux/epoll_arm64.go b/pkg/abi/linux/epoll_arm64.go new file mode 100644 index 000000000..a35939cc9 --- /dev/null +++ b/pkg/abi/linux/epoll_arm64.go @@ -0,0 +1,28 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package linux + +// EpollEvent is equivalent to struct epoll_event from epoll(2). +// +// +marshal slice:EpollEventSlice +type EpollEvent struct { + Events uint32 + // Linux makes struct epoll_event a __u64, necessitating 4 bytes of padding + // here. + _ int32 + Data [2]int32 +} diff --git a/pkg/abi/linux/errors.go b/pkg/abi/linux/errors.go new file mode 100644 index 000000000..93f85a864 --- /dev/null +++ b/pkg/abi/linux/errors.go @@ -0,0 +1,172 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Errno represents a Linux errno value. +type Errno struct { + number int + name string +} + +// Number returns the errno number. +func (e *Errno) Number() int { + return e.number +} + +// String implements fmt.Stringer.String. +func (e *Errno) String() string { + return e.name +} + +// Errno values from include/uapi/asm-generic/errno-base.h. +var ( + EPERM = &Errno{1, "operation not permitted"} + ENOENT = &Errno{2, "no such file or directory"} + ESRCH = &Errno{3, "no such process"} + EINTR = &Errno{4, "interrupted system call"} + EIO = &Errno{5, "I/O error"} + ENXIO = &Errno{6, "no such device or address"} + E2BIG = &Errno{7, "argument list too long"} + ENOEXEC = &Errno{8, "exec format error"} + EBADF = &Errno{9, "bad file number"} + ECHILD = &Errno{10, "no child processes"} + EAGAIN = &Errno{11, "try again"} + ENOMEM = &Errno{12, "out of memory"} + EACCES = &Errno{13, "permission denied"} + EFAULT = &Errno{14, "bad address"} + ENOTBLK = &Errno{15, "block device required"} + EBUSY = &Errno{16, "device or resource busy"} + EEXIST = &Errno{17, "file exists"} + EXDEV = &Errno{18, "cross-device link"} + ENODEV = &Errno{19, "no such device"} + ENOTDIR = &Errno{20, "not a directory"} + EISDIR = &Errno{21, "is a directory"} + EINVAL = &Errno{22, "invalid argument"} + ENFILE = &Errno{23, "file table overflow"} + EMFILE = &Errno{24, "too many open files"} + ENOTTY = &Errno{25, "not a typewriter"} + ETXTBSY = &Errno{26, "text file busy"} + EFBIG = &Errno{27, "file too large"} + ENOSPC = &Errno{28, "no space left on device"} + ESPIPE = &Errno{29, "illegal seek"} + EROFS = &Errno{30, "read-only file system"} + EMLINK = &Errno{31, "too many links"} + EPIPE = &Errno{32, "broken pipe"} + EDOM = &Errno{33, "math argument out of domain of func"} + ERANGE = &Errno{34, "math result not representable"} +) + +// Errno values from include/uapi/asm-generic/errno.h. +var ( + EDEADLK = &Errno{35, "resource deadlock would occur"} + ENAMETOOLONG = &Errno{36, "file name too long"} + ENOLCK = &Errno{37, "no record locks available"} + ENOSYS = &Errno{38, "invalid system call number"} + ENOTEMPTY = &Errno{39, "directory not empty"} + ELOOP = &Errno{40, "too many symbolic links encountered"} + EWOULDBLOCK = &Errno{EAGAIN.number, "operation would block"} + ENOMSG = &Errno{42, "no message of desired type"} + EIDRM = &Errno{43, "identifier removed"} + ECHRNG = &Errno{44, "channel number out of range"} + EL2NSYNC = &Errno{45, "level 2 not synchronized"} + EL3HLT = &Errno{46, "level 3 halted"} + EL3RST = &Errno{47, "level 3 reset"} + ELNRNG = &Errno{48, "link number out of range"} + EUNATCH = &Errno{49, "protocol driver not attached"} + ENOCSI = &Errno{50, "no CSI structure available"} + EL2HLT = &Errno{51, "level 2 halted"} + EBADE = &Errno{52, "invalid exchange"} + EBADR = &Errno{53, "invalid request descriptor"} + EXFULL = &Errno{54, "exchange full"} + ENOANO = &Errno{55, "no anode"} + EBADRQC = &Errno{56, "invalid request code"} + EBADSLT = &Errno{57, "invalid slot"} + EDEADLOCK = EDEADLK + EBFONT = &Errno{59, "bad font file format"} + ENOSTR = &Errno{60, "device not a stream"} + ENODATA = &Errno{61, "no data available"} + ETIME = &Errno{62, "timer expired"} + ENOSR = &Errno{63, "out of streams resources"} + ENONET = &Errno{64, "machine is not on the network"} + ENOPKG = &Errno{65, "package not installed"} + EREMOTE = &Errno{66, "object is remote"} + ENOLINK = &Errno{67, "link has been severed"} + EADV = &Errno{68, "advertise error"} + ESRMNT = &Errno{69, "srmount error"} + ECOMM = &Errno{70, "communication error on send"} + EPROTO = &Errno{71, "protocol error"} + EMULTIHOP = &Errno{72, "multihop attempted"} + EDOTDOT = &Errno{73, "RFS specific error"} + EBADMSG = &Errno{74, "not a data message"} + EOVERFLOW = &Errno{75, "value too large for defined data type"} + ENOTUNIQ = &Errno{76, "name not unique on network"} + EBADFD = &Errno{77, "file descriptor in bad state"} + EREMCHG = &Errno{78, "remote address changed"} + ELIBACC = &Errno{79, "can not access a needed shared library"} + ELIBBAD = &Errno{80, "accessing a corrupted shared library"} + ELIBSCN = &Errno{81, ".lib section in a.out corrupted"} + ELIBMAX = &Errno{82, "attempting to link in too many shared libraries"} + ELIBEXEC = &Errno{83, "cannot exec a shared library directly"} + EILSEQ = &Errno{84, "illegal byte sequence"} + ERESTART = &Errno{85, "interrupted system call should be restarted"} + ESTRPIPE = &Errno{86, "streams pipe error"} + EUSERS = &Errno{87, "too many users"} + ENOTSOCK = &Errno{88, "socket operation on non-socket"} + EDESTADDRREQ = &Errno{89, "destination address required"} + EMSGSIZE = &Errno{90, "message too long"} + EPROTOTYPE = &Errno{91, "protocol wrong type for socket"} + ENOPROTOOPT = &Errno{92, "protocol not available"} + EPROTONOSUPPORT = &Errno{93, "protocol not supported"} + ESOCKTNOSUPPORT = &Errno{94, "socket type not supported"} + EOPNOTSUPP = &Errno{95, "operation not supported on transport endpoint"} + EPFNOSUPPORT = &Errno{96, "protocol family not supported"} + EAFNOSUPPORT = &Errno{97, "address family not supported by protocol"} + EADDRINUSE = &Errno{98, "address already in use"} + EADDRNOTAVAIL = &Errno{99, "cannot assign requested address"} + ENETDOWN = &Errno{100, "network is down"} + ENETUNREACH = &Errno{101, "network is unreachable"} + ENETRESET = &Errno{102, "network dropped connection because of reset"} + ECONNABORTED = &Errno{103, "software caused connection abort"} + ECONNRESET = &Errno{104, "connection reset by peer"} + ENOBUFS = &Errno{105, "no buffer space available"} + EISCONN = &Errno{106, "transport endpoint is already connected"} + ENOTCONN = &Errno{107, "transport endpoint is not connected"} + ESHUTDOWN = &Errno{108, "cannot send after transport endpoint shutdown"} + ETOOMANYREFS = &Errno{109, "too many references: cannot splice"} + ETIMEDOUT = &Errno{110, "connection timed out"} + ECONNREFUSED = &Errno{111, "connection refused"} + EHOSTDOWN = &Errno{112, "host is down"} + EHOSTUNREACH = &Errno{113, "no route to host"} + EALREADY = &Errno{114, "operation already in progress"} + EINPROGRESS = &Errno{115, "operation now in progress"} + ESTALE = &Errno{116, "stale file handle"} + EUCLEAN = &Errno{117, "structure needs cleaning"} + ENOTNAM = &Errno{118, "not a XENIX named type file"} + ENAVAIL = &Errno{119, "no XENIX semaphores available"} + EISNAM = &Errno{120, "is a named type file"} + EREMOTEIO = &Errno{121, "remote I/O error"} + EDQUOT = &Errno{122, "quota exceeded"} + ENOMEDIUM = &Errno{123, "no medium found"} + EMEDIUMTYPE = &Errno{124, "wrong medium type"} + ECANCELED = &Errno{125, "operation Canceled"} + ENOKEY = &Errno{126, "required key not available"} + EKEYEXPIRED = &Errno{127, "key has expired"} + EKEYREVOKED = &Errno{128, "key has been revoked"} + EKEYREJECTED = &Errno{129, "key was rejected by service"} + EOWNERDEAD = &Errno{130, "owner died"} + ENOTRECOVERABLE = &Errno{131, "state not recoverable"} + ERFKILL = &Errno{132, "operation not possible due to RF-kill"} + EHWPOISON = &Errno{133, "memory page has hardware error"} +) diff --git a/pkg/abi/linux/eventfd.go b/pkg/abi/linux/eventfd.go new file mode 100644 index 000000000..9c479fc8f --- /dev/null +++ b/pkg/abi/linux/eventfd.go @@ -0,0 +1,22 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Constants for eventfd2(2). +const ( + EFD_SEMAPHORE = 0x1 + EFD_CLOEXEC = O_CLOEXEC + EFD_NONBLOCK = O_NONBLOCK +) diff --git a/pkg/abi/linux/exec.go b/pkg/abi/linux/exec.go new file mode 100644 index 000000000..579d46c41 --- /dev/null +++ b/pkg/abi/linux/exec.go @@ -0,0 +1,18 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// TASK_COMM_LEN is the task command name length. +const TASK_COMM_LEN = 16 diff --git a/pkg/abi/linux/fcntl.go b/pkg/abi/linux/fcntl.go new file mode 100644 index 000000000..6663a199c --- /dev/null +++ b/pkg/abi/linux/fcntl.go @@ -0,0 +1,69 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Commands from linux/fcntl.h. +const ( + F_DUPFD = 0 + F_GETFD = 1 + F_SETFD = 2 + F_GETFL = 3 + F_SETFL = 4 + F_SETLK = 6 + F_SETLKW = 7 + F_SETOWN = 8 + F_GETOWN = 9 + F_SETOWN_EX = 15 + F_GETOWN_EX = 16 + F_DUPFD_CLOEXEC = 1024 + 6 + F_SETPIPE_SZ = 1024 + 7 + F_GETPIPE_SZ = 1024 + 8 +) + +// Commands for F_SETLK. +const ( + F_RDLCK = 0 + F_WRLCK = 1 + F_UNLCK = 2 +) + +// Flags for fcntl. +const ( + FD_CLOEXEC = 00000001 +) + +// Flock is the lock structure for F_SETLK. +type Flock struct { + Type int16 + Whence int16 + _ [4]byte + Start int64 + Len int64 + Pid int32 + _ [4]byte +} + +// Flags for F_SETOWN_EX and F_GETOWN_EX. +const ( + F_OWNER_TID = 0 + F_OWNER_PID = 1 + F_OWNER_PGRP = 2 +) + +// FOwnerEx is the owner structure for F_SETOWN_EX and F_GETOWN_EX. +type FOwnerEx struct { + Type int32 + PID int32 +} diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go new file mode 100644 index 000000000..055ac1d7c --- /dev/null +++ b/pkg/abi/linux/file.go @@ -0,0 +1,383 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "fmt" + "strings" + + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/binary" +) + +// Constants for open(2). +const ( + O_ACCMODE = 000000003 + O_RDONLY = 000000000 + O_WRONLY = 000000001 + O_RDWR = 000000002 + O_CREAT = 000000100 + O_EXCL = 000000200 + O_NOCTTY = 000000400 + O_TRUNC = 000001000 + O_APPEND = 000002000 + O_NONBLOCK = 000004000 + O_DSYNC = 000010000 + O_ASYNC = 000020000 + O_NOATIME = 001000000 + O_CLOEXEC = 002000000 + O_SYNC = 004000000 // __O_SYNC in Linux + O_PATH = 010000000 + O_TMPFILE = 020000000 // __O_TMPFILE in Linux +) + +// Constants for fstatat(2). +const ( + AT_SYMLINK_NOFOLLOW = 0x100 +) + +// Constants for mount(2). +const ( + MS_RDONLY = 0x1 + MS_NOSUID = 0x2 + MS_NODEV = 0x4 + MS_NOEXEC = 0x8 + MS_SYNCHRONOUS = 0x10 + MS_REMOUNT = 0x20 + MS_MANDLOCK = 0x40 + MS_DIRSYNC = 0x80 + MS_NOATIME = 0x400 + MS_NODIRATIME = 0x800 + MS_BIND = 0x1000 + MS_MOVE = 0x2000 + MS_REC = 0x4000 + + MS_POSIXACL = 0x10000 + MS_UNBINDABLE = 0x20000 + MS_PRIVATE = 0x40000 + MS_SLAVE = 0x80000 + MS_SHARED = 0x100000 + MS_RELATIME = 0x200000 + MS_KERNMOUNT = 0x400000 + MS_I_VERSION = 0x800000 + MS_STRICTATIME = 0x1000000 + + MS_MGC_VAL = 0xC0ED0000 + MS_MGC_MSK = 0xffff0000 +) + +// Constants for umount2(2). +const ( + MNT_FORCE = 0x1 + MNT_DETACH = 0x2 + MNT_EXPIRE = 0x4 + UMOUNT_NOFOLLOW = 0x8 +) + +// Constants for unlinkat(2). +const ( + AT_REMOVEDIR = 0x200 +) + +// Constants for linkat(2) and fchownat(2). +const ( + AT_SYMLINK_FOLLOW = 0x400 + AT_EMPTY_PATH = 0x1000 +) + +// Constants for all file-related ...at(2) syscalls. +const ( + AT_FDCWD = -100 +) + +// Special values for the ns field in utimensat(2). +const ( + UTIME_NOW = ((1 << 30) - 1) + UTIME_OMIT = ((1 << 30) - 2) +) + +// MaxSymlinkTraversals is the maximum number of links that will be followed by +// the kernel to resolve a symlink. +const MaxSymlinkTraversals = 40 + +// Constants for flock(2). +const ( + LOCK_SH = 1 // shared lock + LOCK_EX = 2 // exclusive lock + LOCK_NB = 4 // or'd with one of the above to prevent blocking + LOCK_UN = 8 // remove lock +) + +// Values for mode_t. +const ( + S_IFMT = 0170000 + S_IFSOCK = 0140000 + S_IFLNK = 0120000 + S_IFREG = 0100000 + S_IFBLK = 060000 + S_IFDIR = 040000 + S_IFCHR = 020000 + S_IFIFO = 010000 + + FileTypeMask = S_IFMT + ModeSocket = S_IFSOCK + ModeSymlink = S_IFLNK + ModeRegular = S_IFREG + ModeBlockDevice = S_IFBLK + ModeDirectory = S_IFDIR + ModeCharacterDevice = S_IFCHR + ModeNamedPipe = S_IFIFO + + S_ISUID = 04000 + S_ISGID = 02000 + S_ISVTX = 01000 + + ModeSetUID = S_ISUID + ModeSetGID = S_ISGID + ModeSticky = S_ISVTX + + ModeUserAll = 0700 + ModeUserRead = 0400 + ModeUserWrite = 0200 + ModeUserExec = 0100 + ModeGroupAll = 0070 + ModeGroupRead = 0040 + ModeGroupWrite = 0020 + ModeGroupExec = 0010 + ModeOtherAll = 0007 + ModeOtherRead = 0004 + ModeOtherWrite = 0002 + ModeOtherExec = 0001 + PermissionsMask = 0777 +) + +// Values for linux_dirent64.d_type. +const ( + DT_UNKNOWN = 0 + DT_FIFO = 1 + DT_CHR = 2 + DT_DIR = 4 + DT_BLK = 6 + DT_REG = 8 + DT_LNK = 10 + DT_SOCK = 12 + DT_WHT = 14 +) + +// DirentType are the friendly strings for linux_dirent64.d_type. +var DirentType = abi.ValueSet{ + DT_UNKNOWN: "DT_UNKNOWN", + DT_FIFO: "DT_FIFO", + DT_CHR: "DT_CHR", + DT_DIR: "DT_DIR", + DT_BLK: "DT_BLK", + DT_REG: "DT_REG", + DT_LNK: "DT_LNK", + DT_SOCK: "DT_SOCK", + DT_WHT: "DT_WHT", +} + +// Values for preadv2/pwritev2. +const ( + // Note: gVisor does not implement the RWF_HIPRI feature, but the flag is + // accepted as a valid flag argument for preadv2/pwritev2. + RWF_HIPRI = 0x00000001 + RWF_DSYNC = 0x00000002 + RWF_SYNC = 0x00000004 + RWF_VALID = RWF_HIPRI | RWF_DSYNC | RWF_SYNC +) + +// SizeOfStat is the size of a Stat struct. +var SizeOfStat = binary.Size(Stat{}) + +// Flags for statx. +const ( + AT_STATX_SYNC_TYPE = 0x6000 + AT_STATX_SYNC_AS_STAT = 0x0000 + AT_STATX_FORCE_SYNC = 0x2000 + AT_STATX_DONT_SYNC = 0x4000 +) + +// Mask values for statx. +const ( + STATX_TYPE = 0x00000001 + STATX_MODE = 0x00000002 + STATX_NLINK = 0x00000004 + STATX_UID = 0x00000008 + STATX_GID = 0x00000010 + STATX_ATIME = 0x00000020 + STATX_MTIME = 0x00000040 + STATX_CTIME = 0x00000080 + STATX_INO = 0x00000100 + STATX_SIZE = 0x00000200 + STATX_BLOCKS = 0x00000400 + STATX_BASIC_STATS = 0x000007ff + STATX_BTIME = 0x00000800 + STATX_ALL = 0x00000fff + STATX__RESERVED = 0x80000000 +) + +// Bitmasks for Statx.Attributes and Statx.AttributesMask, from +// include/uapi/linux/stat.h. +const ( + STATX_ATTR_COMPRESSED = 0x00000004 + STATX_ATTR_IMMUTABLE = 0x00000010 + STATX_ATTR_APPEND = 0x00000020 + STATX_ATTR_NODUMP = 0x00000040 + STATX_ATTR_ENCRYPTED = 0x00000800 + STATX_ATTR_AUTOMOUNT = 0x00001000 +) + +// Statx represents struct statx. +// +// +marshal +type Statx struct { + Mask uint32 + Blksize uint32 + Attributes uint64 + Nlink uint32 + UID uint32 + GID uint32 + Mode uint16 + _ uint16 + Ino uint64 + Size uint64 + Blocks uint64 + AttributesMask uint64 + Atime StatxTimestamp + Btime StatxTimestamp + Ctime StatxTimestamp + Mtime StatxTimestamp + RdevMajor uint32 + RdevMinor uint32 + DevMajor uint32 + DevMinor uint32 +} + +// SizeOfStatx is the size of a Statx struct. +var SizeOfStatx = binary.Size(Statx{}) + +// FileMode represents a mode_t. +type FileMode uint16 + +// Permissions returns just the permission bits. +func (m FileMode) Permissions() FileMode { + return m & PermissionsMask +} + +// FileType returns just the file type bits. +func (m FileMode) FileType() FileMode { + return m & FileTypeMask +} + +// ExtraBits returns everything but the file type and permission bits. +func (m FileMode) ExtraBits() FileMode { + return m &^ (PermissionsMask | FileTypeMask) +} + +// IsDir returns true if file type represents a directory. +func (m FileMode) IsDir() bool { + return m.FileType() == S_IFDIR +} + +// String returns a string representation of m. +func (m FileMode) String() string { + var s []string + if ft := m.FileType(); ft != 0 { + s = append(s, fileType.Parse(uint64(ft))) + } + if eb := m.ExtraBits(); eb != 0 { + s = append(s, modeExtraBits.Parse(uint64(eb))) + } + s = append(s, fmt.Sprintf("0o%o", m.Permissions())) + return strings.Join(s, "|") +} + +// DirentType maps file types to dirent types appropriate for (struct +// dirent)::d_type. +func (m FileMode) DirentType() uint8 { + switch m.FileType() { + case ModeSocket: + return DT_SOCK + case ModeSymlink: + return DT_LNK + case ModeRegular: + return DT_REG + case ModeBlockDevice: + return DT_BLK + case ModeDirectory: + return DT_DIR + case ModeCharacterDevice: + return DT_CHR + case ModeNamedPipe: + return DT_FIFO + default: + return DT_UNKNOWN + } +} + +var modeExtraBits = abi.FlagSet{ + { + Flag: ModeSetUID, + Name: "S_ISUID", + }, + { + Flag: ModeSetGID, + Name: "S_ISGID", + }, + { + Flag: ModeSticky, + Name: "S_ISVTX", + }, +} + +var fileType = abi.ValueSet{ + ModeSocket: "S_IFSOCK", + ModeSymlink: "S_IFLINK", + ModeRegular: "S_IFREG", + ModeBlockDevice: "S_IFBLK", + ModeDirectory: "S_IFDIR", + ModeCharacterDevice: "S_IFCHR", + ModeNamedPipe: "S_IFIFO", +} + +// Constants for memfd_create(2). Source: include/uapi/linux/memfd.h +const ( + MFD_CLOEXEC = 0x0001 + MFD_ALLOW_SEALING = 0x0002 +) + +// Constants related to file seals. Source: include/uapi/{asm-generic,linux}/fcntl.h +const ( + F_LINUX_SPECIFIC_BASE = 1024 + F_ADD_SEALS = F_LINUX_SPECIFIC_BASE + 9 + F_GET_SEALS = F_LINUX_SPECIFIC_BASE + 10 + + F_SEAL_SEAL = 0x0001 // Prevent further seals from being set. + F_SEAL_SHRINK = 0x0002 // Prevent file from shrinking. + F_SEAL_GROW = 0x0004 // Prevent file from growing. + F_SEAL_WRITE = 0x0008 // Prevent writes. +) + +// Constants related to fallocate(2). Source: include/uapi/linux/falloc.h +const ( + FALLOC_FL_KEEP_SIZE = 0x01 + FALLOC_FL_PUNCH_HOLE = 0x02 + FALLOC_FL_NO_HIDE_STALE = 0x04 + FALLOC_FL_COLLAPSE_RANGE = 0x08 + FALLOC_FL_ZERO_RANGE = 0x10 + FALLOC_FL_INSERT_RANGE = 0x20 + FALLOC_FL_UNSHARE_RANGE = 0x40 +) diff --git a/pkg/abi/linux/file_amd64.go b/pkg/abi/linux/file_amd64.go new file mode 100644 index 000000000..6b72364ea --- /dev/null +++ b/pkg/abi/linux/file_amd64.go @@ -0,0 +1,46 @@ +// Copyright 2018 The gVisor Authors. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +package linux + +// Constants for open(2). +const ( + O_DIRECT = 000040000 + O_LARGEFILE = 000100000 + O_DIRECTORY = 000200000 + O_NOFOLLOW = 000400000 +) + +// Stat represents struct stat. +// +// +marshal +type Stat struct { + Dev uint64 + Ino uint64 + Nlink uint64 + Mode uint32 + UID uint32 + GID uint32 + _ int32 + Rdev uint64 + Size int64 + Blksize int64 + Blocks int64 + ATime Timespec + MTime Timespec + CTime Timespec + _ [3]int64 +} diff --git a/pkg/abi/linux/file_arm64.go b/pkg/abi/linux/file_arm64.go new file mode 100644 index 000000000..6492c9038 --- /dev/null +++ b/pkg/abi/linux/file_arm64.go @@ -0,0 +1,47 @@ +// Copyright 2019 The gVisor Authors. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package linux + +// Constants for open(2). +const ( + O_DIRECTORY = 000040000 + O_NOFOLLOW = 000100000 + O_DIRECT = 000200000 + O_LARGEFILE = 000400000 +) + +// Stat represents struct stat. +// +// +marshal +type Stat struct { + Dev uint64 + Ino uint64 + Mode uint32 + Nlink uint32 + UID uint32 + GID uint32 + Rdev uint64 + _ uint64 + Size int64 + Blksize int32 + _ int32 + Blocks int64 + ATime Timespec + MTime Timespec + CTime Timespec + _ [2]int32 +} diff --git a/pkg/abi/linux/fs.go b/pkg/abi/linux/fs.go new file mode 100644 index 000000000..158d2db5b --- /dev/null +++ b/pkg/abi/linux/fs.go @@ -0,0 +1,103 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Filesystem types used in statfs(2). +// +// See linux/magic.h. +const ( + ANON_INODE_FS_MAGIC = 0x09041934 + DEVPTS_SUPER_MAGIC = 0x00001cd1 + EXT_SUPER_MAGIC = 0xef53 + OVERLAYFS_SUPER_MAGIC = 0x794c7630 + PIPEFS_MAGIC = 0x50495045 + PROC_SUPER_MAGIC = 0x9fa0 + RAMFS_MAGIC = 0x09041934 + SOCKFS_MAGIC = 0x534F434B + SYSFS_MAGIC = 0x62656572 + TMPFS_MAGIC = 0x01021994 + V9FS_MAGIC = 0x01021997 +) + +// Filesystem path limits, from uapi/linux/limits.h. +const ( + NAME_MAX = 255 + PATH_MAX = 4096 +) + +// Statfs is struct statfs, from uapi/asm-generic/statfs.h. +// +// +marshal +type Statfs struct { + // Type is one of the filesystem magic values, defined above. + Type uint64 + + // BlockSize is the data block size. + BlockSize int64 + + // Blocks is the number of data blocks in use. + Blocks uint64 + + // BlocksFree is the number of free blocks. + BlocksFree uint64 + + // BlocksAvailable is the number of blocks free for use by + // unprivileged users. + BlocksAvailable uint64 + + // Files is the number of used file nodes on the filesystem. + Files uint64 + + // FileFress is the number of free file nodes on the filesystem. + FilesFree uint64 + + // FSID is the filesystem ID. + FSID [2]int32 + + // NameLength is the maximum file name length. + NameLength uint64 + + // FragmentSize is equivalent to BlockSize. + FragmentSize int64 + + // Flags is the set of filesystem mount flags. + Flags uint64 + + // Spare is unused. + Spare [4]uint64 +} + +// Whence argument to lseek(2), from include/uapi/linux/fs.h. +const ( + SEEK_SET = 0 + SEEK_CUR = 1 + SEEK_END = 2 + SEEK_DATA = 3 + SEEK_HOLE = 4 +) + +// Sync_file_range flags, from include/uapi/linux/fs.h +const ( + SYNC_FILE_RANGE_WAIT_BEFORE = 1 + SYNC_FILE_RANGE_WRITE = 2 + SYNC_FILE_RANGE_WAIT_AFTER = 4 +) + +// Flag argument to renameat2(2), from include/uapi/linux/fs.h. +const ( + RENAME_NOREPLACE = (1 << 0) // Don't overwrite target. + RENAME_EXCHANGE = (1 << 1) // Exchange src and dst. + RENAME_WHITEOUT = (1 << 2) // Whiteout src. +) diff --git a/pkg/abi/linux/futex.go b/pkg/abi/linux/futex.go new file mode 100644 index 000000000..08bfde3b5 --- /dev/null +++ b/pkg/abi/linux/futex.go @@ -0,0 +1,62 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// From <linux/futex.h> and <sys/time.h>. +// Flags are used in syscall futex(2). +const ( + FUTEX_WAIT = 0 + FUTEX_WAKE = 1 + FUTEX_FD = 2 + FUTEX_REQUEUE = 3 + FUTEX_CMP_REQUEUE = 4 + FUTEX_WAKE_OP = 5 + FUTEX_LOCK_PI = 6 + FUTEX_UNLOCK_PI = 7 + FUTEX_TRYLOCK_PI = 8 + FUTEX_WAIT_BITSET = 9 + FUTEX_WAKE_BITSET = 10 + FUTEX_WAIT_REQUEUE_PI = 11 + FUTEX_CMP_REQUEUE_PI = 12 + + FUTEX_PRIVATE_FLAG = 128 + FUTEX_CLOCK_REALTIME = 256 +) + +// These are flags are from <linux/futex.h> and are used in FUTEX_WAKE_OP +// to define the operations. +const ( + FUTEX_OP_SET = 0 + FUTEX_OP_ADD = 1 + FUTEX_OP_OR = 2 + FUTEX_OP_ANDN = 3 + FUTEX_OP_XOR = 4 + FUTEX_OP_OPARG_SHIFT = 8 + FUTEX_OP_CMP_EQ = 0 + FUTEX_OP_CMP_NE = 1 + FUTEX_OP_CMP_LT = 2 + FUTEX_OP_CMP_LE = 3 + FUTEX_OP_CMP_GT = 4 + FUTEX_OP_CMP_GE = 5 +) + +// FUTEX_TID_MASK is the TID portion of a PI futex word. +const FUTEX_TID_MASK = 0x3fffffff + +// Constants used for priority-inheritance futexes. +const ( + FUTEX_WAITERS = 0x80000000 + FUTEX_OWNER_DIED = 0x40000000 +) diff --git a/pkg/abi/linux/inotify.go b/pkg/abi/linux/inotify.go new file mode 100644 index 000000000..2d08194ba --- /dev/null +++ b/pkg/abi/linux/inotify.go @@ -0,0 +1,97 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Inotify events observable by userspace. These directly correspond to +// filesystem operations and there may only be a single of them per inotify +// event read from an inotify fd. +const ( + // IN_ACCESS indicates a file was accessed. + IN_ACCESS = 0x00000001 + // IN_MODIFY indicates a file was modified. + IN_MODIFY = 0x00000002 + // IN_ATTRIB indicates a watch target's metadata changed. + IN_ATTRIB = 0x00000004 + // IN_CLOSE_WRITE indicates a writable file was closed. + IN_CLOSE_WRITE = 0x00000008 + // IN_CLOSE_NOWRITE indicates a non-writable file was closed. + IN_CLOSE_NOWRITE = 0x00000010 + // IN_OPEN indicates a file was opened. + IN_OPEN = 0x00000020 + // IN_MOVED_FROM indicates a file was moved from X. + IN_MOVED_FROM = 0x00000040 + // IN_MOVED_TO indicates a file was moved to Y. + IN_MOVED_TO = 0x00000080 + // IN_CREATE indicates a file was created in a watched directory. + IN_CREATE = 0x00000100 + // IN_DELETE indicates a file was deleted in a watched directory. + IN_DELETE = 0x00000200 + // IN_DELETE_SELF indicates a watch target itself was deleted. + IN_DELETE_SELF = 0x00000400 + // IN_MOVE_SELF indicates a watch target itself was moved. + IN_MOVE_SELF = 0x00000800 + // IN_ALL_EVENTS is a mask for all observable userspace events. + IN_ALL_EVENTS = 0x00000fff +) + +// Inotify control events. These may be present in their own events, or ORed +// with other observable events. +const ( + // IN_UNMOUNT indicates the backing filesystem was unmounted. + IN_UNMOUNT = 0x00002000 + // IN_Q_OVERFLOW indicates the event queued overflowed. + IN_Q_OVERFLOW = 0x00004000 + // IN_IGNORED indicates a watch was removed, either implicitly or through + // inotify_rm_watch(2). + IN_IGNORED = 0x00008000 + // IN_ISDIR indicates the subject of an event was a directory. + IN_ISDIR = 0x40000000 +) + +// Feature flags for inotify_add_watch(2). +const ( + // IN_ONLYDIR indicates that a path should be watched only if it's a + // directory. + IN_ONLYDIR = 0x01000000 + // IN_DONT_FOLLOW indicates that the watch path shouldn't be resolved if + // it's a symlink. + IN_DONT_FOLLOW = 0x02000000 + // IN_EXCL_UNLINK indicates events to this watch from unlinked objects + // should be filtered out. + IN_EXCL_UNLINK = 0x04000000 + // IN_MASK_ADD indicates the provided mask should be ORed into any existing + // watch on the provided path. + IN_MASK_ADD = 0x20000000 + // IN_ONESHOT indicates the watch should be removed after one event. + IN_ONESHOT = 0x80000000 +) + +// Feature flags for inotify_init1(2). +const ( + // IN_CLOEXEC is an alias for O_CLOEXEC. It indicates that the inotify + // fd should be closed on exec(2) and friends. + IN_CLOEXEC = 0x00080000 + // IN_NONBLOCK is an alias for O_NONBLOCK. It indicates I/O syscall on the + // inotify fd should not block. + IN_NONBLOCK = 0x00000800 +) + +// ALL_INOTIFY_BITS contains all the bits for all possible inotify events. It's +// defined in the Linux source at "include/linux/inotify.h". +const ALL_INOTIFY_BITS = IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | + IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | IN_MOVED_TO | IN_CREATE | + IN_DELETE | IN_DELETE_SELF | IN_MOVE_SELF | IN_UNMOUNT | IN_Q_OVERFLOW | + IN_IGNORED | IN_ONLYDIR | IN_DONT_FOLLOW | IN_EXCL_UNLINK | IN_MASK_ADD | + IN_ISDIR | IN_ONESHOT diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go new file mode 100644 index 000000000..2062e6a4b --- /dev/null +++ b/pkg/abi/linux/ioctl.go @@ -0,0 +1,100 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// ioctl(2) requests provided by asm-generic/ioctls.h +// +// These are ordered by request number (low byte). +const ( + TCGETS = 0x00005401 + TCSETS = 0x00005402 + TCSETSW = 0x00005403 + TCSETSF = 0x00005404 + TCSBRK = 0x00005409 + TIOCEXCL = 0x0000540c + TIOCNXCL = 0x0000540d + TIOCSCTTY = 0x0000540e + TIOCGPGRP = 0x0000540f + TIOCSPGRP = 0x00005410 + TIOCOUTQ = 0x00005411 + TIOCSTI = 0x00005412 + TIOCGWINSZ = 0x00005413 + TIOCSWINSZ = 0x00005414 + TIOCMGET = 0x00005415 + TIOCMBIS = 0x00005416 + TIOCMBIC = 0x00005417 + TIOCMSET = 0x00005418 + TIOCINQ = 0x0000541b + FIONREAD = TIOCINQ + FIONBIO = 0x00005421 + TIOCSETD = 0x00005423 + TIOCNOTTY = 0x00005422 + TIOCGETD = 0x00005424 + TCSBRKP = 0x00005425 + TIOCSBRK = 0x00005427 + TIOCCBRK = 0x00005428 + TIOCGSID = 0x00005429 + TIOCGPTN = 0x80045430 + TIOCSPTLCK = 0x40045431 + TIOCGDEV = 0x80045432 + TIOCVHANGUP = 0x00005437 + TCFLSH = 0x0000540b + TIOCCONS = 0x0000541d + TIOCSSERIAL = 0x0000541f + TIOCGEXCL = 0x80045440 + TIOCGPTPEER = 0x80045441 + TIOCGICOUNT = 0x0000545d + FIONCLEX = 0x00005450 + FIOCLEX = 0x00005451 + FIOASYNC = 0x00005452 + FIOSETOWN = 0x00008901 + SIOCSPGRP = 0x00008902 + FIOGETOWN = 0x00008903 + SIOCGPGRP = 0x00008904 +) + +// ioctl(2) requests provided by uapi/linux/sockios.h +const ( + SIOCGIFMEM = 0x891f + SIOCGIFPFLAGS = 0x8935 + SIOCGMIIPHY = 0x8947 + SIOCGMIIREG = 0x8948 +) + +// ioctl(2) directions. Used to calculate requests number. +// Constants from asm-generic/ioctl.h. +const ( + _IOC_NONE = 0 + _IOC_WRITE = 1 + _IOC_READ = 2 +) + +// Constants from asm-generic/ioctl.h. +const ( + _IOC_NRBITS = 8 + _IOC_TYPEBITS = 8 + _IOC_SIZEBITS = 14 + _IOC_DIRBITS = 2 + + _IOC_NRSHIFT = 0 + _IOC_TYPESHIFT = _IOC_NRSHIFT + _IOC_NRBITS + _IOC_SIZESHIFT = _IOC_TYPESHIFT + _IOC_TYPEBITS + _IOC_DIRSHIFT = _IOC_SIZESHIFT + _IOC_SIZEBITS +) + +// IOC outputs the result of _IOC macro in asm-generic/ioctl.h. +func IOC(dir, typ, nr, size uint32) uint32 { + return uint32(dir)<<_IOC_DIRSHIFT | typ<<_IOC_TYPESHIFT | nr<<_IOC_NRSHIFT | size<<_IOC_SIZESHIFT +} diff --git a/pkg/abi/linux/ioctl_tun.go b/pkg/abi/linux/ioctl_tun.go new file mode 100644 index 000000000..c59c9c136 --- /dev/null +++ b/pkg/abi/linux/ioctl_tun.go @@ -0,0 +1,29 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// ioctl(2) request numbers from linux/if_tun.h +var ( + TUNSETIFF = IOC(_IOC_WRITE, 'T', 202, 4) + TUNGETIFF = IOC(_IOC_READ, 'T', 210, 4) +) + +// Flags from net/if_tun.h +const ( + IFF_TUN = 0x0001 + IFF_TAP = 0x0002 + IFF_NO_PI = 0x1000 + IFF_NOFILTER = 0x1000 +) diff --git a/pkg/abi/linux/ip.go b/pkg/abi/linux/ip.go new file mode 100644 index 000000000..31e56ffa6 --- /dev/null +++ b/pkg/abi/linux/ip.go @@ -0,0 +1,151 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// IP protocols +const ( + IPPROTO_IP = 0 + IPPROTO_ICMP = 1 + IPPROTO_IGMP = 2 + IPPROTO_IPIP = 4 + IPPROTO_TCP = 6 + IPPROTO_EGP = 8 + IPPROTO_PUP = 12 + IPPROTO_UDP = 17 + IPPROTO_IDP = 22 + IPPROTO_TP = 29 + IPPROTO_DCCP = 33 + IPPROTO_IPV6 = 41 + IPPROTO_RSVP = 46 + IPPROTO_GRE = 47 + IPPROTO_ESP = 50 + IPPROTO_AH = 51 + IPPROTO_MTP = 92 + IPPROTO_BEETPH = 94 + IPPROTO_ENCAP = 98 + IPPROTO_PIM = 103 + IPPROTO_COMP = 108 + IPPROTO_SCTP = 132 + IPPROTO_UDPLITE = 136 + IPPROTO_MPLS = 137 + IPPROTO_RAW = 255 +) + +// Socket options from uapi/linux/in.h +const ( + IP_TOS = 1 + IP_TTL = 2 + IP_HDRINCL = 3 + IP_OPTIONS = 4 + IP_ROUTER_ALERT = 5 + IP_RECVOPTS = 6 + IP_RETOPTS = 7 + IP_PKTINFO = 8 + IP_PKTOPTIONS = 9 + IP_MTU_DISCOVER = 10 + IP_RECVERR = 11 + IP_RECVTTL = 12 + IP_RECVTOS = 13 + IP_MTU = 14 + IP_FREEBIND = 15 + IP_IPSEC_POLICY = 16 + IP_XFRM_POLICY = 17 + IP_PASSSEC = 18 + IP_TRANSPARENT = 19 + IP_ORIGDSTADDR = 20 + IP_RECVORIGDSTADDR = IP_ORIGDSTADDR + IP_MINTTL = 21 + IP_NODEFRAG = 22 + IP_CHECKSUM = 23 + IP_BIND_ADDRESS_NO_PORT = 24 + IP_RECVFRAGSIZE = 25 + IP_MULTICAST_IF = 32 + IP_MULTICAST_TTL = 33 + IP_MULTICAST_LOOP = 34 + IP_ADD_MEMBERSHIP = 35 + IP_DROP_MEMBERSHIP = 36 + IP_UNBLOCK_SOURCE = 37 + IP_BLOCK_SOURCE = 38 + IP_ADD_SOURCE_MEMBERSHIP = 39 + IP_DROP_SOURCE_MEMBERSHIP = 40 + IP_MSFILTER = 41 + MCAST_JOIN_GROUP = 42 + MCAST_BLOCK_SOURCE = 43 + MCAST_UNBLOCK_SOURCE = 44 + MCAST_LEAVE_GROUP = 45 + MCAST_JOIN_SOURCE_GROUP = 46 + MCAST_LEAVE_SOURCE_GROUP = 47 + MCAST_MSFILTER = 48 + IP_MULTICAST_ALL = 49 + IP_UNICAST_IF = 50 +) + +// Socket options from uapi/linux/in6.h +const ( + IPV6_ADDRFORM = 1 + IPV6_2292PKTINFO = 2 + IPV6_2292HOPOPTS = 3 + IPV6_2292DSTOPTS = 4 + IPV6_2292RTHDR = 5 + IPV6_2292PKTOPTIONS = 6 + IPV6_CHECKSUM = 7 + IPV6_2292HOPLIMIT = 8 + IPV6_NEXTHOP = 9 + IPV6_FLOWINFO = 11 + IPV6_UNICAST_HOPS = 16 + IPV6_MULTICAST_IF = 17 + IPV6_MULTICAST_HOPS = 18 + IPV6_MULTICAST_LOOP = 19 + IPV6_ADD_MEMBERSHIP = 20 + IPV6_DROP_MEMBERSHIP = 21 + IPV6_ROUTER_ALERT = 22 + IPV6_MTU_DISCOVER = 23 + IPV6_MTU = 24 + IPV6_RECVERR = 25 + IPV6_V6ONLY = 26 + IPV6_JOIN_ANYCAST = 27 + IPV6_LEAVE_ANYCAST = 28 + IPV6_MULTICAST_ALL = 29 + IPV6_FLOWLABEL_MGR = 32 + IPV6_FLOWINFO_SEND = 33 + IPV6_IPSEC_POLICY = 34 + IPV6_XFRM_POLICY = 35 + IPV6_HDRINCL = 36 + IPV6_RECVPKTINFO = 49 + IPV6_PKTINFO = 50 + IPV6_RECVHOPLIMIT = 51 + IPV6_HOPLIMIT = 52 + IPV6_RECVHOPOPTS = 53 + IPV6_HOPOPTS = 54 + IPV6_RTHDRDSTOPTS = 55 + IPV6_RECVRTHDR = 56 + IPV6_RTHDR = 57 + IPV6_RECVDSTOPTS = 58 + IPV6_DSTOPTS = 59 + IPV6_RECVPATHMTU = 60 + IPV6_PATHMTU = 61 + IPV6_DONTFRAG = 62 + IPV6_RECVTCLASS = 66 + IPV6_TCLASS = 67 + IPV6_AUTOFLOWLABEL = 70 + IPV6_ADDR_PREFERENCES = 72 + IPV6_MINHOPCOUNT = 73 + IPV6_ORIGDSTADDR = 74 + IPV6_RECVORIGDSTADDR = IPV6_ORIGDSTADDR + IPV6_TRANSPARENT = 75 + IPV6_UNICAST_IF = 76 + IPV6_RECVFRAGSIZE = 77 + IPV6_FREEBIND = 78 +) diff --git a/pkg/abi/linux/ipc.go b/pkg/abi/linux/ipc.go new file mode 100644 index 000000000..22acd2d43 --- /dev/null +++ b/pkg/abi/linux/ipc.go @@ -0,0 +1,53 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Control commands used with semctl, shmctl, and msgctl. Source: +// include/uapi/linux/ipc.h. +const ( + IPC_RMID = 0 + IPC_SET = 1 + IPC_STAT = 2 + IPC_INFO = 3 +) + +// resource get request flags. Source: include/uapi/linux/ipc.h +const ( + IPC_CREAT = 00001000 + IPC_EXCL = 00002000 + IPC_NOWAIT = 00004000 +) + +const IPC_PRIVATE = 0 + +// In Linux, amd64 does not enable CONFIG_ARCH_WANT_IPC_PARSE_VERSION, so SysV +// IPC unconditionally uses the "new" 64-bit structures that are needed for +// features like 32-bit UIDs. + +// IPCPerm is equivalent to struct ipc64_perm. +type IPCPerm struct { + Key uint32 + UID uint32 + GID uint32 + CUID uint32 + CGID uint32 + Mode uint16 + _ uint16 + Seq uint16 + _ uint16 + _ uint32 + unused1 uint64 + unused2 uint64 +} diff --git a/pkg/abi/linux/limits.go b/pkg/abi/linux/limits.go new file mode 100644 index 000000000..c74dfcd53 --- /dev/null +++ b/pkg/abi/linux/limits.go @@ -0,0 +1,88 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Resources for getrlimit(2)/setrlimit(2)/prlimit(2). +const ( + RLIMIT_CPU = 0 + RLIMIT_FSIZE = 1 + RLIMIT_DATA = 2 + RLIMIT_STACK = 3 + RLIMIT_CORE = 4 + RLIMIT_RSS = 5 + RLIMIT_NPROC = 6 + RLIMIT_NOFILE = 7 + RLIMIT_MEMLOCK = 8 + RLIMIT_AS = 9 + RLIMIT_LOCKS = 10 + RLIMIT_SIGPENDING = 11 + RLIMIT_MSGQUEUE = 12 + RLIMIT_NICE = 13 + RLIMIT_RTPRIO = 14 + RLIMIT_RTTIME = 15 +) + +// RLimit corresponds to Linux's struct rlimit. +type RLimit struct { + // Cur specifies the soft limit. + Cur uint64 + // Max specifies the hard limit. + Max uint64 +} + +const ( + // RLimInfinity is RLIM_INFINITY on Linux. + RLimInfinity = ^uint64(0) + + // DefaultStackSoftLimit is called _STK_LIM in Linux. + DefaultStackSoftLimit = 8 * 1024 * 1024 + + // DefaultNprocLimit is defined in kernel/fork.c:set_max_threads, and + // called MAX_THREADS / 2 in Linux. + DefaultNprocLimit = FUTEX_TID_MASK / 2 + + // DefaultNofileSoftLimit is called INR_OPEN_CUR in Linux. + DefaultNofileSoftLimit = 1024 + + // DefaultNofileHardLimit is called INR_OPEN_MAX in Linux. + DefaultNofileHardLimit = 4096 + + // DefaultMemlockLimit is called MLOCK_LIMIT in Linux. + DefaultMemlockLimit = 64 * 1024 + + // DefaultMsgqueueLimit is called MQ_BYTES_MAX in Linux. + DefaultMsgqueueLimit = 819200 +) + +// InitRLimits is a map of initial rlimits set by Linux in +// include/asm-generic/resource.h. +var InitRLimits = map[int]RLimit{ + RLIMIT_CPU: {RLimInfinity, RLimInfinity}, + RLIMIT_FSIZE: {RLimInfinity, RLimInfinity}, + RLIMIT_DATA: {RLimInfinity, RLimInfinity}, + RLIMIT_STACK: {DefaultStackSoftLimit, RLimInfinity}, + RLIMIT_CORE: {0, RLimInfinity}, + RLIMIT_RSS: {RLimInfinity, RLimInfinity}, + RLIMIT_NPROC: {DefaultNprocLimit, DefaultNprocLimit}, + RLIMIT_NOFILE: {DefaultNofileSoftLimit, DefaultNofileHardLimit}, + RLIMIT_MEMLOCK: {DefaultMemlockLimit, DefaultMemlockLimit}, + RLIMIT_AS: {RLimInfinity, RLimInfinity}, + RLIMIT_LOCKS: {RLimInfinity, RLimInfinity}, + RLIMIT_SIGPENDING: {0, 0}, + RLIMIT_MSGQUEUE: {DefaultMsgqueueLimit, DefaultMsgqueueLimit}, + RLIMIT_NICE: {0, 0}, + RLIMIT_RTPRIO: {0, 0}, + RLIMIT_RTTIME: {RLimInfinity, RLimInfinity}, +} diff --git a/pkg/abi/linux/linux.go b/pkg/abi/linux/linux.go new file mode 100644 index 000000000..281acdbde --- /dev/null +++ b/pkg/abi/linux/linux.go @@ -0,0 +1,39 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package linux contains the constants and types needed to interface with a Linux kernel. +package linux + +// NumSoftIRQ is the number of software IRQs, exposed via /proc/stat. +// +// Defined in linux/interrupt.h. +const NumSoftIRQ = 10 + +// Sysinfo is the structure provided by sysinfo on linux versions > 2.3.48. +type Sysinfo struct { + Uptime int64 + Loads [3]uint64 + TotalRAM uint64 + FreeRAM uint64 + SharedRAM uint64 + BufferRAM uint64 + TotalSwap uint64 + FreeSwap uint64 + Procs uint16 + _ [6]byte // Pad Procs to 64bits. + TotalHigh uint64 + FreeHigh uint64 + Unit uint32 + /* The _f field in the glibc version of Sysinfo has size 0 on AMD64 */ +} diff --git a/pkg/abi/linux/mm.go b/pkg/abi/linux/mm.go new file mode 100644 index 000000000..07cc1895e --- /dev/null +++ b/pkg/abi/linux/mm.go @@ -0,0 +1,130 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Protections for mmap(2). +const ( + PROT_NONE = 0 + PROT_READ = 1 << 0 + PROT_WRITE = 1 << 1 + PROT_EXEC = 1 << 2 + PROT_SEM = 1 << 3 + PROT_GROWSDOWN = 1 << 24 + PROT_GROWSUP = 1 << 25 +) + +// Flags for mmap(2). +const ( + MAP_SHARED = 1 << 0 + MAP_PRIVATE = 1 << 1 + MAP_FIXED = 1 << 4 + MAP_ANONYMOUS = 1 << 5 + MAP_32BIT = 1 << 6 // arch/x86/include/uapi/asm/mman.h + MAP_GROWSDOWN = 1 << 8 + MAP_DENYWRITE = 1 << 11 + MAP_EXECUTABLE = 1 << 12 + MAP_LOCKED = 1 << 13 + MAP_NORESERVE = 1 << 14 + MAP_POPULATE = 1 << 15 + MAP_NONBLOCK = 1 << 16 + MAP_STACK = 1 << 17 + MAP_HUGETLB = 1 << 18 +) + +// Flags for mremap(2). +const ( + MREMAP_MAYMOVE = 1 << 0 + MREMAP_FIXED = 1 << 1 +) + +// Flags for mlock2(2). +const ( + MLOCK_ONFAULT = 0x01 +) + +// Flags for mlockall(2). +const ( + MCL_CURRENT = 1 + MCL_FUTURE = 2 + MCL_ONFAULT = 4 +) + +// Advice for madvise(2). +const ( + MADV_NORMAL = 0 + MADV_RANDOM = 1 + MADV_SEQUENTIAL = 2 + MADV_WILLNEED = 3 + MADV_DONTNEED = 4 + MADV_REMOVE = 9 + MADV_DONTFORK = 10 + MADV_DOFORK = 11 + MADV_MERGEABLE = 12 + MADV_UNMERGEABLE = 13 + MADV_HUGEPAGE = 14 + MADV_NOHUGEPAGE = 15 + MADV_DONTDUMP = 16 + MADV_DODUMP = 17 + MADV_HWPOISON = 100 + MADV_SOFT_OFFLINE = 101 + MADV_NOMAJFAULT = 200 + MADV_DONTCHGME = 201 +) + +// Flags for msync(2). +const ( + MS_ASYNC = 1 << 0 + MS_INVALIDATE = 1 << 1 + MS_SYNC = 1 << 2 +) + +// NumaPolicy is the NUMA memory policy for a memory range. See numa(7). +// +// +marshal +type NumaPolicy int32 + +// Policies for get_mempolicy(2)/set_mempolicy(2). +const ( + MPOL_DEFAULT NumaPolicy = 0 + MPOL_PREFERRED NumaPolicy = 1 + MPOL_BIND NumaPolicy = 2 + MPOL_INTERLEAVE NumaPolicy = 3 + MPOL_LOCAL NumaPolicy = 4 + MPOL_MAX NumaPolicy = 5 +) + +// Flags for get_mempolicy(2). +const ( + MPOL_F_NODE = 1 << 0 + MPOL_F_ADDR = 1 << 1 + MPOL_F_MEMS_ALLOWED = 1 << 2 +) + +// Flags for set_mempolicy(2). +const ( + MPOL_F_RELATIVE_NODES = 1 << 14 + MPOL_F_STATIC_NODES = 1 << 15 + + MPOL_MODE_FLAGS = (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES) +) + +// Flags for mbind(2). +const ( + MPOL_MF_STRICT = 1 << 0 + MPOL_MF_MOVE = 1 << 1 + MPOL_MF_MOVE_ALL = 1 << 2 + + MPOL_MF_VALID = MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL +) diff --git a/pkg/abi/linux/netdevice.go b/pkg/abi/linux/netdevice.go new file mode 100644 index 000000000..7866352b4 --- /dev/null +++ b/pkg/abi/linux/netdevice.go @@ -0,0 +1,86 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import "gvisor.dev/gvisor/pkg/binary" + +const ( + // IFNAMSIZ is the size of the name field for IFReq. + IFNAMSIZ = 16 +) + +// IFReq is an interface request. +type IFReq struct { + // IFName is an encoded name, normally null-terminated. This should be + // accessed via the Name and SetName functions. + IFName [IFNAMSIZ]byte + + // Data is the union of the following structures: + // + // struct sockaddr ifr_addr; + // struct sockaddr ifr_dstaddr; + // struct sockaddr ifr_broadaddr; + // struct sockaddr ifr_netmask; + // struct sockaddr ifr_hwaddr; + // short ifr_flags; + // int ifr_ifindex; + // int ifr_metric; + // int ifr_mtu; + // struct ifmap ifr_map; + // char ifr_slave[IFNAMSIZ]; + // char ifr_newname[IFNAMSIZ]; + // char *ifr_data; + Data [24]byte +} + +// Name returns the name. +func (ifr *IFReq) Name() string { + for c := 0; c < len(ifr.IFName); c++ { + if ifr.IFName[c] == 0 { + return string(ifr.IFName[:c]) + } + } + return string(ifr.IFName[:]) +} + +// SetName sets the name. +func (ifr *IFReq) SetName(name string) { + n := copy(ifr.IFName[:], []byte(name)) + for i := n; i < len(ifr.IFName); i++ { + ifr.IFName[i] = 0 + } +} + +// SizeOfIFReq is the binary size of an IFReq struct (40 bytes). +var SizeOfIFReq = binary.Size(IFReq{}) + +// IFMap contains interface hardware parameters. +type IFMap struct { + MemStart uint64 + MemEnd uint64 + BaseAddr int16 + IRQ byte + DMA byte + Port byte + _ [3]byte // Pad to sizeof(struct ifmap). +} + +// IFConf is used to return a list of interfaces and their addresses. See +// netdevice(7) and struct ifconf for more detail on its use. +type IFConf struct { + Len int32 + _ [4]byte // Pad to sizeof(struct ifconf). + Ptr uint64 +} diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go new file mode 100644 index 000000000..a8d4f9d69 --- /dev/null +++ b/pkg/abi/linux/netfilter.go @@ -0,0 +1,552 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// This file contains structures required to support netfilter, specifically +// the iptables tool. + +// Hooks into the network stack. These correspond to values in +// include/uapi/linux/netfilter.h. +const ( + NF_INET_PRE_ROUTING = 0 + NF_INET_LOCAL_IN = 1 + NF_INET_FORWARD = 2 + NF_INET_LOCAL_OUT = 3 + NF_INET_POST_ROUTING = 4 + NF_INET_NUMHOOKS = 5 +) + +// Verdicts that can be returned by targets. These correspond to values in +// include/uapi/linux/netfilter.h +const ( + NF_DROP = 0 + NF_ACCEPT = 1 + NF_STOLEN = 2 + NF_QUEUE = 3 + NF_REPEAT = 4 + NF_STOP = 5 + NF_MAX_VERDICT = NF_STOP + // NF_RETURN is defined in include/uapi/linux/netfilter/x_tables.h. + NF_RETURN = -NF_REPEAT - 1 +) + +// VerdictStrings maps int verdicts to the strings they represent. It is used +// for debugging. +var VerdictStrings = map[int32]string{ + -NF_DROP - 1: "DROP", + -NF_ACCEPT - 1: "ACCEPT", + -NF_QUEUE - 1: "QUEUE", + NF_RETURN: "RETURN", +} + +// Socket options. These correspond to values in +// include/uapi/linux/netfilter_ipv4/ip_tables.h. +const ( + IPT_BASE_CTL = 64 + IPT_SO_SET_REPLACE = IPT_BASE_CTL + IPT_SO_SET_ADD_COUNTERS = IPT_BASE_CTL + 1 + IPT_SO_SET_MAX = IPT_SO_SET_ADD_COUNTERS + + IPT_SO_GET_INFO = IPT_BASE_CTL + IPT_SO_GET_ENTRIES = IPT_BASE_CTL + 1 + IPT_SO_GET_REVISION_MATCH = IPT_BASE_CTL + 2 + IPT_SO_GET_REVISION_TARGET = IPT_BASE_CTL + 3 + IPT_SO_GET_MAX = IPT_SO_GET_REVISION_TARGET +) + +// Name lengths. These correspond to values in +// include/uapi/linux/netfilter/x_tables.h. +const ( + XT_FUNCTION_MAXNAMELEN = 30 + XT_EXTENSION_MAXNAMELEN = 29 + XT_TABLE_MAXNAMELEN = 32 +) + +// IPTEntry is an iptable rule. It corresponds to struct ipt_entry in +// include/uapi/linux/netfilter_ipv4/ip_tables.h. +type IPTEntry struct { + // IP is used to filter packets based on the IP header. + IP IPTIP + + // NFCache relates to kernel-internal caching and isn't used by + // userspace. + NFCache uint32 + + // TargetOffset is the byte offset from the beginning of this IPTEntry + // to the start of the entry's target. + TargetOffset uint16 + + // NextOffset is the byte offset from the beginning of this IPTEntry to + // the start of the next entry. It is thus also the size of the entry. + NextOffset uint16 + + // Comeback is a return pointer. It is not used by userspace. + Comeback uint32 + + // Counters holds the packet and byte counts for this rule. + Counters XTCounters + + // Elems holds the data for all this rule's matches followed by the + // target. It is variable length -- users have to iterate over any + // matches and use TargetOffset and NextOffset to make sense of the + // data. + // + // Elems is omitted here because it would cause IPTEntry to be an extra + // byte larger (see http://www.catb.org/esr/structure-packing/). + // + // Elems [0]byte +} + +// SizeOfIPTEntry is the size of an IPTEntry. +const SizeOfIPTEntry = 112 + +// KernelIPTEntry is identical to IPTEntry, but includes the Elems field. This +// struct marshaled via the binary package to write an IPTEntry to userspace. +type KernelIPTEntry struct { + IPTEntry + + // Elems holds the data for all this rule's matches followed by the + // target. It is variable length -- users have to iterate over any + // matches and use TargetOffset and NextOffset to make sense of the + // data. + Elems []byte +} + +// IPTIP contains information for matching a packet's IP header. +// It corresponds to struct ipt_ip in +// include/uapi/linux/netfilter_ipv4/ip_tables.h. +type IPTIP struct { + // Src is the source IP address. + Src InetAddr + + // Dst is the destination IP address. + Dst InetAddr + + // SrcMask is the source IP mask. + SrcMask InetAddr + + // DstMask is the destination IP mask. + DstMask InetAddr + + // InputInterface is the input network interface. + InputInterface [IFNAMSIZ]byte + + // OutputInterface is the output network interface. + OutputInterface [IFNAMSIZ]byte + + // InputInterfaceMask is the intput interface mask. + InputInterfaceMask [IFNAMSIZ]byte + + // OuputInterfaceMask is the output interface mask. + OutputInterfaceMask [IFNAMSIZ]byte + + // Protocol is the transport protocol. + Protocol uint16 + + // Flags define matching behavior for the IP header. + Flags uint8 + + // InverseFlags invert the meaning of fields in struct IPTIP. See the + // IPT_INV_* flags. + InverseFlags uint8 +} + +// Flags in IPTIP.InverseFlags. Corresponding constants are in +// include/uapi/linux/netfilter_ipv4/ip_tables.h. +const ( + // Invert the meaning of InputInterface. + IPT_INV_VIA_IN = 0x01 + // Invert the meaning of OutputInterface. + IPT_INV_VIA_OUT = 0x02 + // Unclear what this is, as no references to it exist in the kernel. + IPT_INV_TOS = 0x04 + // Invert the meaning of Src. + IPT_INV_SRCIP = 0x08 + // Invert the meaning of Dst. + IPT_INV_DSTIP = 0x10 + // Invert the meaning of the IPT_F_FRAG flag. + IPT_INV_FRAG = 0x20 + // Invert the meaning of the Protocol field. + IPT_INV_PROTO = 0x40 + // Enable all flags. + IPT_INV_MASK = 0x7F +) + +// SizeOfIPTIP is the size of an IPTIP. +const SizeOfIPTIP = 84 + +// XTCounters holds packet and byte counts for a rule. It corresponds to struct +// xt_counters in include/uapi/linux/netfilter/x_tables.h. +type XTCounters struct { + // Pcnt is the packet count. + Pcnt uint64 + + // Bcnt is the byte count. + Bcnt uint64 +} + +// SizeOfXTCounters is the size of an XTCounters. +const SizeOfXTCounters = 16 + +// XTEntryMatch holds a match for a rule. For example, a user using the +// addrtype iptables match extension would put the data for that match into an +// XTEntryMatch. iptables-extensions(8) has a list of possible matches. +// +// XTEntryMatch corresponds to struct xt_entry_match in +// include/uapi/linux/netfilter/x_tables.h. That struct contains a union +// exposing different data to the user and kernel, but this struct holds only +// the user data. +type XTEntryMatch struct { + MatchSize uint16 + Name ExtensionName + Revision uint8 + // Data is omitted here because it would cause XTEntryMatch to be an + // extra byte larger (see http://www.catb.org/esr/structure-packing/). + // Data [0]byte +} + +// SizeOfXTEntryMatch is the size of an XTEntryMatch. +const SizeOfXTEntryMatch = 32 + +// KernelXTEntryMatch is identical to XTEntryMatch, but contains +// variable-length Data field. +type KernelXTEntryMatch struct { + XTEntryMatch + Data []byte +} + +// XTEntryTarget holds a target for a rule. For example, it can specify that +// packets matching the rule should DROP, ACCEPT, or use an extension target. +// iptables-extension(8) has a list of possible targets. +// +// XTEntryTarget corresponds to struct xt_entry_target in +// include/uapi/linux/netfilter/x_tables.h. That struct contains a union +// exposing different data to the user and kernel, but this struct holds only +// the user data. +type XTEntryTarget struct { + TargetSize uint16 + Name ExtensionName + Revision uint8 + // Data is omitted here because it would cause XTEntryTarget to be an + // extra byte larger (see http://www.catb.org/esr/structure-packing/). + // Data [0]byte +} + +// SizeOfXTEntryTarget is the size of an XTEntryTarget. +const SizeOfXTEntryTarget = 32 + +// XTStandardTarget is a built-in target, one of ACCEPT, DROP, JUMP, QUEUE, +// RETURN, or jump. It corresponds to struct xt_standard_target in +// include/uapi/linux/netfilter/x_tables.h. +type XTStandardTarget struct { + Target XTEntryTarget + // A positive verdict indicates a jump, and is the offset from the + // start of the table to jump to. A negative value means one of the + // other built-in targets. + Verdict int32 + _ [4]byte +} + +// SizeOfXTStandardTarget is the size of an XTStandardTarget. +const SizeOfXTStandardTarget = 40 + +// XTErrorTarget triggers an error when reached. It is also used to mark the +// beginning of user-defined chains by putting the name of the chain in +// ErrorName. It corresponds to struct xt_error_target in +// include/uapi/linux/netfilter/x_tables.h. +type XTErrorTarget struct { + Target XTEntryTarget + Name ErrorName + _ [2]byte +} + +// SizeOfXTErrorTarget is the size of an XTErrorTarget. +const SizeOfXTErrorTarget = 64 + +// Flag values for NfNATIPV4Range. The values indicate whether to map +// protocol specific part(ports) or IPs. It corresponds to values in +// include/uapi/linux/netfilter/nf_nat.h. +const ( + NF_NAT_RANGE_MAP_IPS = 1 << 0 + NF_NAT_RANGE_PROTO_SPECIFIED = 1 << 1 + NF_NAT_RANGE_PROTO_RANDOM = 1 << 2 + NF_NAT_RANGE_PERSISTENT = 1 << 3 + NF_NAT_RANGE_PROTO_RANDOM_FULLY = 1 << 4 + NF_NAT_RANGE_PROTO_RANDOM_ALL = (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY) + NF_NAT_RANGE_MASK = (NF_NAT_RANGE_MAP_IPS | + NF_NAT_RANGE_PROTO_SPECIFIED | NF_NAT_RANGE_PROTO_RANDOM | + NF_NAT_RANGE_PERSISTENT | NF_NAT_RANGE_PROTO_RANDOM_FULLY) +) + +// NfNATIPV4Range corresponds to struct nf_nat_ipv4_range +// in include/uapi/linux/netfilter/nf_nat.h. The fields are in +// network byte order. +type NfNATIPV4Range struct { + Flags uint32 + MinIP [4]byte + MaxIP [4]byte + MinPort uint16 + MaxPort uint16 +} + +// NfNATIPV4MultiRangeCompat corresponds to struct +// nf_nat_ipv4_multi_range_compat in include/uapi/linux/netfilter/nf_nat.h. +type NfNATIPV4MultiRangeCompat struct { + RangeSize uint32 + RangeIPV4 NfNATIPV4Range +} + +// XTRedirectTarget triggers a redirect when reached. +// Adding 4 bytes of padding to make the struct 8 byte aligned. +type XTRedirectTarget struct { + Target XTEntryTarget + NfRange NfNATIPV4MultiRangeCompat + _ [4]byte +} + +// SizeOfXTRedirectTarget is the size of an XTRedirectTarget. +const SizeOfXTRedirectTarget = 56 + +// IPTGetinfo is the argument for the IPT_SO_GET_INFO sockopt. It corresponds +// to struct ipt_getinfo in include/uapi/linux/netfilter_ipv4/ip_tables.h. +type IPTGetinfo struct { + Name TableName + ValidHooks uint32 + HookEntry [NF_INET_NUMHOOKS]uint32 + Underflow [NF_INET_NUMHOOKS]uint32 + NumEntries uint32 + Size uint32 +} + +// SizeOfIPTGetinfo is the size of an IPTGetinfo. +const SizeOfIPTGetinfo = 84 + +// IPTGetEntries is the argument for the IPT_SO_GET_ENTRIES sockopt. It +// corresponds to struct ipt_get_entries in +// include/uapi/linux/netfilter_ipv4/ip_tables.h. +type IPTGetEntries struct { + Name TableName + Size uint32 + _ [4]byte + // Entrytable is omitted here because it would cause IPTGetEntries to + // be an extra byte longer (see + // http://www.catb.org/esr/structure-packing/). + // Entrytable [0]IPTEntry +} + +// SizeOfIPTGetEntries is the size of an IPTGetEntries. +const SizeOfIPTGetEntries = 40 + +// KernelIPTGetEntries is identical to IPTGetEntries, but includes the +// Entrytable field. This struct marshaled via the binary package to write an +// KernelIPTGetEntries to userspace. +type KernelIPTGetEntries struct { + IPTGetEntries + Entrytable []KernelIPTEntry +} + +// IPTReplace is the argument for the IPT_SO_SET_REPLACE sockopt. It +// corresponds to struct ipt_replace in +// include/uapi/linux/netfilter_ipv4/ip_tables.h. +type IPTReplace struct { + Name TableName + ValidHooks uint32 + NumEntries uint32 + Size uint32 + HookEntry [NF_INET_NUMHOOKS]uint32 + Underflow [NF_INET_NUMHOOKS]uint32 + NumCounters uint32 + Counters uint64 // This is really a *XTCounters. + // Entries is omitted here because it would cause IPTReplace to be an + // extra byte longer (see http://www.catb.org/esr/structure-packing/). + // Entries [0]IPTEntry +} + +// KernelIPTReplace is identical to IPTReplace, but includes the Entries field. +type KernelIPTReplace struct { + IPTReplace + Entries [0]IPTEntry +} + +// SizeOfIPTReplace is the size of an IPTReplace. +const SizeOfIPTReplace = 96 + +// ExtensionName holds the name of a netfilter extension. +type ExtensionName [XT_EXTENSION_MAXNAMELEN]byte + +// String implements fmt.Stringer. +func (en ExtensionName) String() string { + return goString(en[:]) +} + +// TableName holds the name of a netfilter table. +type TableName [XT_TABLE_MAXNAMELEN]byte + +// String implements fmt.Stringer. +func (tn TableName) String() string { + return goString(tn[:]) +} + +// ErrorName holds the name of a netfilter error. These can also hold +// user-defined chains. +type ErrorName [XT_FUNCTION_MAXNAMELEN]byte + +// String implements fmt.Stringer. +func (en ErrorName) String() string { + return goString(en[:]) +} + +func goString(cstring []byte) string { + for i, c := range cstring { + if c == 0 { + return string(cstring[:i]) + } + } + return string(cstring) +} + +// XTTCP holds data for matching TCP packets. It corresponds to struct xt_tcp +// in include/uapi/linux/netfilter/xt_tcpudp.h. +type XTTCP struct { + // SourcePortStart specifies the inclusive start of the range of source + // ports to which the matcher applies. + SourcePortStart uint16 + + // SourcePortEnd specifies the inclusive end of the range of source ports + // to which the matcher applies. + SourcePortEnd uint16 + + // DestinationPortStart specifies the start of the destination port + // range to which the matcher applies. + DestinationPortStart uint16 + + // DestinationPortEnd specifies the end of the destination port + // range to which the matcher applies. + DestinationPortEnd uint16 + + // Option specifies that a particular TCP option must be set. + Option uint8 + + // FlagMask masks TCP flags when comparing to the FlagCompare byte. It allows + // for specification of which flags are important to the matcher. + FlagMask uint8 + + // FlagCompare, in combination with FlagMask, is used to match only packets + // that have certain flags set. + FlagCompare uint8 + + // InverseFlags flips the meaning of certain fields. See the + // TX_TCP_INV_* flags. + InverseFlags uint8 +} + +// SizeOfXTTCP is the size of an XTTCP. +const SizeOfXTTCP = 12 + +// Flags in XTTCP.InverseFlags. Corresponding constants are in +// include/uapi/linux/netfilter/xt_tcpudp.h. +const ( + // Invert the meaning of SourcePortStart/End. + XT_TCP_INV_SRCPT = 0x01 + // Invert the meaning of DestinationPortStart/End. + XT_TCP_INV_DSTPT = 0x02 + // Invert the meaning of FlagCompare. + XT_TCP_INV_FLAGS = 0x04 + // Invert the meaning of Option. + XT_TCP_INV_OPTION = 0x08 + // Enable all flags. + XT_TCP_INV_MASK = 0x0F +) + +// XTUDP holds data for matching UDP packets. It corresponds to struct xt_udp +// in include/uapi/linux/netfilter/xt_tcpudp.h. +type XTUDP struct { + // SourcePortStart is the inclusive start of the range of source ports + // to which the matcher applies. + SourcePortStart uint16 + + // SourcePortEnd is the inclusive end of the range of source ports to + // which the matcher applies. + SourcePortEnd uint16 + + // DestinationPortStart is the inclusive start of the destination port + // range to which the matcher applies. + DestinationPortStart uint16 + + // DestinationPortEnd is the inclusive end of the destination port + // range to which the matcher applies. + DestinationPortEnd uint16 + + // InverseFlags flips the meaning of certain fields. See the + // TX_UDP_INV_* flags. + InverseFlags uint8 + + _ uint8 +} + +// SizeOfXTUDP is the size of an XTUDP. +const SizeOfXTUDP = 10 + +// Flags in XTUDP.InverseFlags. Corresponding constants are in +// include/uapi/linux/netfilter/xt_tcpudp.h. +const ( + // Invert the meaning of SourcePortStart/End. + XT_UDP_INV_SRCPT = 0x01 + // Invert the meaning of DestinationPortStart/End. + XT_UDP_INV_DSTPT = 0x02 + // Enable all flags. + XT_UDP_INV_MASK = 0x03 +) + +// IPTOwnerInfo holds data for matching packets with owner. It corresponds +// to struct ipt_owner_info in libxt_owner.c of iptables binary. +type IPTOwnerInfo struct { + // UID is user id which created the packet. + UID uint32 + + // GID is group id which created the packet. + GID uint32 + + // PID is process id of the process which created the packet. + PID uint32 + + // SID is session id which created the packet. + SID uint32 + + // Comm is the command name which created the packet. + Comm [16]byte + + // Match is used to match UID/GID of the socket. See the + // XT_OWNER_* flags below. + Match uint8 + + // Invert flips the meaning of Match field. + Invert uint8 +} + +// SizeOfIPTOwnerInfo is the size of an XTOwnerMatchInfo. +const SizeOfIPTOwnerInfo = 34 + +// Flags in IPTOwnerInfo.Match. Corresponding constants are in +// include/uapi/linux/netfilter/xt_owner.h. +const ( + // Match the UID of the packet. + XT_OWNER_UID = 1 << 0 + // Match the GID of the packet. + XT_OWNER_GID = 1 << 1 + // Match if the socket exists for the packet. Forwarded + // packets do not have an associated socket. + XT_OWNER_SOCKET = 1 << 2 +) diff --git a/pkg/abi/linux/netfilter_test.go b/pkg/abi/linux/netfilter_test.go new file mode 100644 index 000000000..565dd550e --- /dev/null +++ b/pkg/abi/linux/netfilter_test.go @@ -0,0 +1,46 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "testing" + + "gvisor.dev/gvisor/pkg/binary" +) + +func TestSizes(t *testing.T) { + testCases := []struct { + typ interface{} + defined uintptr + }{ + {IPTEntry{}, SizeOfIPTEntry}, + {IPTGetEntries{}, SizeOfIPTGetEntries}, + {IPTGetinfo{}, SizeOfIPTGetinfo}, + {IPTIP{}, SizeOfIPTIP}, + {IPTOwnerInfo{}, SizeOfIPTOwnerInfo}, + {IPTReplace{}, SizeOfIPTReplace}, + {XTCounters{}, SizeOfXTCounters}, + {XTEntryMatch{}, SizeOfXTEntryMatch}, + {XTEntryTarget{}, SizeOfXTEntryTarget}, + {XTErrorTarget{}, SizeOfXTErrorTarget}, + {XTStandardTarget{}, SizeOfXTStandardTarget}, + } + + for _, tc := range testCases { + if calculated := binary.Size(tc.typ); calculated != tc.defined { + t.Errorf("%T has a defined size of %d and calculated size of %d", tc.typ, tc.defined, calculated) + } + } +} diff --git a/pkg/abi/linux/netlink.go b/pkg/abi/linux/netlink.go new file mode 100644 index 000000000..0ba086c76 --- /dev/null +++ b/pkg/abi/linux/netlink.go @@ -0,0 +1,130 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Netlink protocols, from uapi/linux/netlink.h. +const ( + NETLINK_ROUTE = 0 + NETLINK_UNUSED = 1 + NETLINK_USERSOCK = 2 + NETLINK_FIREWALL = 3 + NETLINK_SOCK_DIAG = 4 + NETLINK_NFLOG = 5 + NETLINK_XFRM = 6 + NETLINK_SELINUX = 7 + NETLINK_ISCSI = 8 + NETLINK_AUDIT = 9 + NETLINK_FIB_LOOKUP = 10 + NETLINK_CONNECTOR = 11 + NETLINK_NETFILTER = 12 + NETLINK_IP6_FW = 13 + NETLINK_DNRTMSG = 14 + NETLINK_KOBJECT_UEVENT = 15 + NETLINK_GENERIC = 16 + NETLINK_SCSITRANSPORT = 18 + NETLINK_ECRYPTFS = 19 + NETLINK_RDMA = 20 + NETLINK_CRYPTO = 21 +) + +// SockAddrNetlink is struct sockaddr_nl, from uapi/linux/netlink.h. +type SockAddrNetlink struct { + Family uint16 + _ uint16 + PortID uint32 + Groups uint32 +} + +// SockAddrNetlinkSize is the size of SockAddrNetlink. +const SockAddrNetlinkSize = 12 + +// NetlinkMessageHeader is struct nlmsghdr, from uapi/linux/netlink.h. +type NetlinkMessageHeader struct { + Length uint32 + Type uint16 + Flags uint16 + Seq uint32 + PortID uint32 +} + +// NetlinkMessageHeaderSize is the size of NetlinkMessageHeader. +const NetlinkMessageHeaderSize = 16 + +// Netlink message header flags, from uapi/linux/netlink.h. +const ( + NLM_F_REQUEST = 0x1 + NLM_F_MULTI = 0x2 + NLM_F_ACK = 0x4 + NLM_F_ECHO = 0x8 + NLM_F_DUMP_INTR = 0x10 + NLM_F_ROOT = 0x100 + NLM_F_MATCH = 0x200 + NLM_F_ATOMIC = 0x400 + NLM_F_DUMP = NLM_F_ROOT | NLM_F_MATCH + NLM_F_REPLACE = 0x100 + NLM_F_EXCL = 0x200 + NLM_F_CREATE = 0x400 + NLM_F_APPEND = 0x800 +) + +// Standard netlink message types, from uapi/linux/netlink.h. +const ( + NLMSG_NOOP = 0x1 + NLMSG_ERROR = 0x2 + NLMSG_DONE = 0x3 + NLMSG_OVERRUN = 0x4 + + // NLMSG_MIN_TYPE is the first value for protocol-level types. + NLMSG_MIN_TYPE = 0x10 +) + +// NLMSG_ALIGNTO is the alignment of netlink messages, from +// uapi/linux/netlink.h. +const NLMSG_ALIGNTO = 4 + +// NetlinkAttrHeader is the header of a netlink attribute, followed by payload. +// +// This is struct nlattr, from uapi/linux/netlink.h. +type NetlinkAttrHeader struct { + Length uint16 + Type uint16 +} + +// NetlinkAttrHeaderSize is the size of NetlinkAttrHeader. +const NetlinkAttrHeaderSize = 4 + +// NLA_ALIGNTO is the alignment of netlink attributes, from +// uapi/linux/netlink.h. +const NLA_ALIGNTO = 4 + +// Socket options, from uapi/linux/netlink.h. +const ( + NETLINK_ADD_MEMBERSHIP = 1 + NETLINK_DROP_MEMBERSHIP = 2 + NETLINK_PKTINFO = 3 + NETLINK_BROADCAST_ERROR = 4 + NETLINK_NO_ENOBUFS = 5 + NETLINK_LISTEN_ALL_NSID = 8 + NETLINK_LIST_MEMBERSHIPS = 9 + NETLINK_CAP_ACK = 10 + NETLINK_EXT_ACK = 11 + NETLINK_DUMP_STRICT_CHK = 12 +) + +// NetlinkErrorMessage is struct nlmsgerr, from uapi/linux/netlink.h. +type NetlinkErrorMessage struct { + Error int32 + Header NetlinkMessageHeader +} diff --git a/pkg/abi/linux/netlink_route.go b/pkg/abi/linux/netlink_route.go new file mode 100644 index 000000000..40bec566c --- /dev/null +++ b/pkg/abi/linux/netlink_route.go @@ -0,0 +1,346 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Netlink message types for NETLINK_ROUTE sockets, from uapi/linux/rtnetlink.h. +const ( + RTM_NEWLINK = 16 + RTM_DELLINK = 17 + RTM_GETLINK = 18 + RTM_SETLINK = 19 + + RTM_NEWADDR = 20 + RTM_DELADDR = 21 + RTM_GETADDR = 22 + + RTM_NEWROUTE = 24 + RTM_DELROUTE = 25 + RTM_GETROUTE = 26 + + RTM_NEWNEIGH = 28 + RTM_DELNEIGH = 29 + RTM_GETNEIGH = 30 + + RTM_NEWRULE = 32 + RTM_DELRULE = 33 + RTM_GETRULE = 34 + + RTM_NEWQDISC = 36 + RTM_DELQDISC = 37 + RTM_GETQDISC = 38 + + RTM_NEWTCLASS = 40 + RTM_DELTCLASS = 41 + RTM_GETTCLASS = 42 + + RTM_NEWTFILTER = 44 + RTM_DELTFILTER = 45 + RTM_GETTFILTER = 46 + + RTM_NEWACTION = 48 + RTM_DELACTION = 49 + RTM_GETACTION = 50 + + RTM_NEWPREFIX = 52 + + RTM_GETMULTICAST = 58 + + RTM_GETANYCAST = 62 + + RTM_NEWNEIGHTBL = 64 + RTM_GETNEIGHTBL = 66 + RTM_SETNEIGHTBL = 67 + + RTM_NEWNDUSEROPT = 68 + + RTM_NEWADDRLABEL = 72 + RTM_DELADDRLABEL = 73 + RTM_GETADDRLABEL = 74 + + RTM_GETDCB = 78 + RTM_SETDCB = 79 + + RTM_NEWNETCONF = 80 + RTM_GETNETCONF = 82 + + RTM_NEWMDB = 84 + RTM_DELMDB = 85 + RTM_GETMDB = 86 + + RTM_NEWNSID = 88 + RTM_DELNSID = 89 + RTM_GETNSID = 90 +) + +// InterfaceInfoMessage is struct ifinfomsg, from uapi/linux/rtnetlink.h. +type InterfaceInfoMessage struct { + Family uint8 + _ uint8 + Type uint16 + Index int32 + Flags uint32 + Change uint32 +} + +// Interface flags, from uapi/linux/if.h. +const ( + IFF_UP = 1 << 0 + IFF_BROADCAST = 1 << 1 + IFF_DEBUG = 1 << 2 + IFF_LOOPBACK = 1 << 3 + IFF_POINTOPOINT = 1 << 4 + IFF_NOTRAILERS = 1 << 5 + IFF_RUNNING = 1 << 6 + IFF_NOARP = 1 << 7 + IFF_PROMISC = 1 << 8 + IFF_ALLMULTI = 1 << 9 + IFF_MASTER = 1 << 10 + IFF_SLAVE = 1 << 11 + IFF_MULTICAST = 1 << 12 + IFF_PORTSEL = 1 << 13 + IFF_AUTOMEDIA = 1 << 14 + IFF_DYNAMIC = 1 << 15 + IFF_LOWER_UP = 1 << 16 + IFF_DORMANT = 1 << 17 + IFF_ECHO = 1 << 18 +) + +// Interface link attributes, from uapi/linux/if_link.h. +const ( + IFLA_UNSPEC = 0 + IFLA_ADDRESS = 1 + IFLA_BROADCAST = 2 + IFLA_IFNAME = 3 + IFLA_MTU = 4 + IFLA_LINK = 5 + IFLA_QDISC = 6 + IFLA_STATS = 7 + IFLA_COST = 8 + IFLA_PRIORITY = 9 + IFLA_MASTER = 10 + IFLA_WIRELESS = 11 + IFLA_PROTINFO = 12 + IFLA_TXQLEN = 13 + IFLA_MAP = 14 + IFLA_WEIGHT = 15 + IFLA_OPERSTATE = 16 + IFLA_LINKMODE = 17 + IFLA_LINKINFO = 18 + IFLA_NET_NS_PID = 19 + IFLA_IFALIAS = 20 + IFLA_NUM_VF = 21 + IFLA_VFINFO_LIST = 22 + IFLA_STATS64 = 23 + IFLA_VF_PORTS = 24 + IFLA_PORT_SELF = 25 + IFLA_AF_SPEC = 26 + IFLA_GROUP = 27 + IFLA_NET_NS_FD = 28 + IFLA_EXT_MASK = 29 + IFLA_PROMISCUITY = 30 + IFLA_NUM_TX_QUEUES = 31 + IFLA_NUM_RX_QUEUES = 32 + IFLA_CARRIER = 33 + IFLA_PHYS_PORT_ID = 34 + IFLA_CARRIER_CHANGES = 35 + IFLA_PHYS_SWITCH_ID = 36 + IFLA_LINK_NETNSID = 37 + IFLA_PHYS_PORT_NAME = 38 + IFLA_PROTO_DOWN = 39 + IFLA_GSO_MAX_SEGS = 40 + IFLA_GSO_MAX_SIZE = 41 +) + +// InterfaceAddrMessage is struct ifaddrmsg, from uapi/linux/if_addr.h. +type InterfaceAddrMessage struct { + Family uint8 + PrefixLen uint8 + Flags uint8 + Scope uint8 + Index uint32 +} + +// Interface attributes, from uapi/linux/if_addr.h. +const ( + IFA_UNSPEC = 0 + IFA_ADDRESS = 1 + IFA_LOCAL = 2 + IFA_LABEL = 3 + IFA_BROADCAST = 4 + IFA_ANYCAST = 5 + IFA_CACHEINFO = 6 + IFA_MULTICAST = 7 + IFA_FLAGS = 8 +) + +// Device types, from uapi/linux/if_arp.h. +const ( + ARPHRD_LOOPBACK = 772 +) + +// RouteMessage is struct rtmsg, from uapi/linux/rtnetlink.h. +type RouteMessage struct { + Family uint8 + DstLen uint8 + SrcLen uint8 + TOS uint8 + + Table uint8 + Protocol uint8 + Scope uint8 + Type uint8 + + Flags uint32 +} + +// SizeOfRouteMessage is the size of RouteMessage. +const SizeOfRouteMessage = 12 + +// Route types, from uapi/linux/rtnetlink.h. +const ( + // RTN_UNSPEC represents an unspecified route type. + RTN_UNSPEC = 0 + + // RTN_UNICAST represents a unicast route. + RTN_UNICAST = 1 + + // RTN_LOCAL represents a route that is accepted locally. + RTN_LOCAL = 2 + + // RTN_BROADCAST represents a broadcast route (Traffic is accepted locally + // as broadcast, and sent as broadcast). + RTN_BROADCAST = 3 + + // RTN_ANYCAST represents a anycast route (Traffic is accepted locally as + // broadcast but sent as unicast). + RTN_ANYCAST = 6 + + // RTN_MULTICAST represents a multicast route. + RTN_MULTICAST = 5 + + // RTN_BLACKHOLE represents a route where all traffic is dropped. + RTN_BLACKHOLE = 6 + + // RTN_UNREACHABLE represents a route where the destination is unreachable. + RTN_UNREACHABLE = 7 + + RTN_PROHIBIT = 8 + RTN_THROW = 9 + RTN_NAT = 10 + RTN_XRESOLVE = 11 +) + +// Route protocols/origins, from uapi/linux/rtnetlink.h. +const ( + RTPROT_UNSPEC = 0 + RTPROT_REDIRECT = 1 + RTPROT_KERNEL = 2 + RTPROT_BOOT = 3 + RTPROT_STATIC = 4 + RTPROT_GATED = 8 + RTPROT_RA = 9 + RTPROT_MRT = 10 + RTPROT_ZEBRA = 11 + RTPROT_BIRD = 12 + RTPROT_DNROUTED = 13 + RTPROT_XORP = 14 + RTPROT_NTK = 15 + RTPROT_DHCP = 16 + RTPROT_MROUTED = 17 + RTPROT_BABEL = 42 + RTPROT_BGP = 186 + RTPROT_ISIS = 187 + RTPROT_OSPF = 188 + RTPROT_RIP = 189 + RTPROT_EIGRP = 192 +) + +// Route scopes, from uapi/linux/rtnetlink.h. +const ( + RT_SCOPE_UNIVERSE = 0 + RT_SCOPE_SITE = 200 + RT_SCOPE_LINK = 253 + RT_SCOPE_HOST = 254 + RT_SCOPE_NOWHERE = 255 +) + +// Route flags, from uapi/linux/rtnetlink.h. +const ( + RTM_F_NOTIFY = 0x100 + RTM_F_CLONED = 0x200 + RTM_F_EQUALIZE = 0x400 + RTM_F_PREFIX = 0x800 + RTM_F_LOOKUP_TABLE = 0x1000 + RTM_F_FIB_MATCH = 0x2000 +) + +// Route tables, from uapi/linux/rtnetlink.h. +const ( + RT_TABLE_UNSPEC = 0 + RT_TABLE_COMPAT = 252 + RT_TABLE_DEFAULT = 253 + RT_TABLE_MAIN = 254 + RT_TABLE_LOCAL = 255 +) + +// Route attributes, from uapi/linux/rtnetlink.h. +const ( + RTA_UNSPEC = 0 + RTA_DST = 1 + RTA_SRC = 2 + RTA_IIF = 3 + RTA_OIF = 4 + RTA_GATEWAY = 5 + RTA_PRIORITY = 6 + RTA_PREFSRC = 7 + RTA_METRICS = 8 + RTA_MULTIPATH = 9 + RTA_PROTOINFO = 10 + RTA_FLOW = 11 + RTA_CACHEINFO = 12 + RTA_SESSION = 13 + RTA_MP_ALGO = 14 + RTA_TABLE = 15 + RTA_MARK = 16 + RTA_MFC_STATS = 17 + RTA_VIA = 18 + RTA_NEWDST = 19 + RTA_PREF = 20 + RTA_ENCAP_TYPE = 21 + RTA_ENCAP = 22 + RTA_EXPIRES = 23 + RTA_PAD = 24 + RTA_UID = 25 + RTA_TTL_PROPAGATE = 26 + RTA_IP_PROTO = 27 + RTA_SPORT = 28 + RTA_DPORT = 29 +) + +// Route flags, from include/uapi/linux/route.h. +const ( + RTF_GATEWAY = 0x2 + RTF_UP = 0x1 +) + +// RtAttr is the header of optional addition route information, as a netlink +// attribute. From include/uapi/linux/rtnetlink.h. +type RtAttr struct { + Len uint16 + Type uint16 +} + +// SizeOfRtAttr is the size of RtAttr. +const SizeOfRtAttr = 4 diff --git a/pkg/abi/linux/poll.go b/pkg/abi/linux/poll.go new file mode 100644 index 000000000..c04d26e4c --- /dev/null +++ b/pkg/abi/linux/poll.go @@ -0,0 +1,42 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// PollFD is struct pollfd, used by poll(2)/ppoll(2), from uapi/asm-generic/poll.h. +type PollFD struct { + FD int32 + Events int16 + REvents int16 +} + +// Poll event flags, used by poll(2)/ppoll(2) and/or +// epoll_ctl(2)/epoll_wait(2), from uapi/asm-generic/poll.h. +const ( + POLLIN = 0x0001 + POLLPRI = 0x0002 + POLLOUT = 0x0004 + POLLERR = 0x0008 + POLLHUP = 0x0010 + POLLNVAL = 0x0020 + POLLRDNORM = 0x0040 + POLLRDBAND = 0x0080 + POLLWRNORM = 0x0100 + POLLWRBAND = 0x0200 + POLLMSG = 0x0400 + POLLREMOVE = 0x1000 + POLLRDHUP = 0x2000 + POLLFREE = 0x4000 + POLL_BUSY_LOOP = 0x8000 +) diff --git a/pkg/abi/linux/prctl.go b/pkg/abi/linux/prctl.go new file mode 100644 index 000000000..391cfaa1c --- /dev/null +++ b/pkg/abi/linux/prctl.go @@ -0,0 +1,164 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// PR_* flags, from <linux/pcrtl.h> for prctl(2). +const ( + // PR_SET_PDEATHSIG sets the process' death signal. + PR_SET_PDEATHSIG = 1 + + // PR_GET_PDEATHSIG gets the process' death signal. + PR_GET_PDEATHSIG = 2 + + // PR_GET_DUMPABLE gets the process' dumpable flag. + PR_GET_DUMPABLE = 3 + + // PR_SET_DUMPABLE sets the process' dumpable flag. + PR_SET_DUMPABLE = 4 + + // PR_GET_KEEPCAPS gets the value of the keep capabilities flag. + PR_GET_KEEPCAPS = 7 + + // PR_SET_KEEPCAPS sets the value of the keep capabilities flag. + PR_SET_KEEPCAPS = 8 + + // PR_GET_TIMING gets the process' timing method. + PR_GET_TIMING = 13 + + // PR_SET_TIMING sets the process' timing method. + PR_SET_TIMING = 14 + + // PR_SET_NAME sets the process' name. + PR_SET_NAME = 15 + + // PR_GET_NAME gets the process' name. + PR_GET_NAME = 16 + + // PR_GET_SECCOMP gets a process' seccomp mode. + PR_GET_SECCOMP = 21 + + // PR_SET_SECCOMP sets a process' seccomp mode. + PR_SET_SECCOMP = 22 + + // PR_CAPBSET_READ gets the capability bounding set. + PR_CAPBSET_READ = 23 + + // PR_CAPBSET_DROP sets the capability bounding set. + PR_CAPBSET_DROP = 24 + + // PR_GET_TSC gets the value of the flag determining whether the + // timestamp counter can be read. + PR_GET_TSC = 25 + + // PR_SET_TSC sets the value of the flag determining whether the + // timestamp counter can be read. + PR_SET_TSC = 26 + + // PR_SET_TIMERSLACK sets the process' time slack. + PR_SET_TIMERSLACK = 29 + + // PR_GET_TIMERSLACK gets the process' time slack. + PR_GET_TIMERSLACK = 30 + + // PR_TASK_PERF_EVENTS_DISABLE disables all performance counters + // attached to the calling process. + PR_TASK_PERF_EVENTS_DISABLE = 31 + + // PR_TASK_PERF_EVENTS_ENABLE enables all performance counters attached + // to the calling process. + PR_TASK_PERF_EVENTS_ENABLE = 32 + + // PR_MCE_KILL sets the machine check memory corruption kill policy for + // the calling thread. + PR_MCE_KILL = 33 + + // PR_MCE_KILL_GET gets the machine check memory corruption kill policy + // for the calling thread. + PR_MCE_KILL_GET = 34 + + // PR_SET_MM modifies certain kernel memory map descriptor fields of + // the calling process. See prctl(2) for more information. + PR_SET_MM = 35 + + PR_SET_MM_START_CODE = 1 + PR_SET_MM_END_CODE = 2 + PR_SET_MM_START_DATA = 3 + PR_SET_MM_END_DATA = 4 + PR_SET_MM_START_STACK = 5 + PR_SET_MM_START_BRK = 6 + PR_SET_MM_BRK = 7 + PR_SET_MM_ARG_START = 8 + PR_SET_MM_ARG_END = 9 + PR_SET_MM_ENV_START = 10 + PR_SET_MM_ENV_END = 11 + PR_SET_MM_AUXV = 12 + // PR_SET_MM_EXE_FILE supersedes the /proc/pid/exe symbolic link with a + // new one pointing to a new executable file identified by the file + // descriptor provided in arg3 argument. See prctl(2) for more + // information. + PR_SET_MM_EXE_FILE = 13 + PR_SET_MM_MAP = 14 + PR_SET_MM_MAP_SIZE = 15 + + // PR_SET_CHILD_SUBREAPER sets the "child subreaper" attribute of the + // calling process. + PR_SET_CHILD_SUBREAPER = 36 + + // PR_GET_CHILD_SUBREAPER gets the "child subreaper" attribute of the + // calling process. + PR_GET_CHILD_SUBREAPER = 37 + + // PR_SET_NO_NEW_PRIVS sets the calling thread's no_new_privs bit. + PR_SET_NO_NEW_PRIVS = 38 + + // PR_GET_NO_NEW_PRIVS gets the calling thread's no_new_privs bit. + PR_GET_NO_NEW_PRIVS = 39 + + // PR_GET_TID_ADDRESS retrieves the clear_child_tid address. + PR_GET_TID_ADDRESS = 40 + + // PR_SET_THP_DISABLE sets the state of the "THP disable" flag for the + // calling thread. + PR_SET_THP_DISABLE = 41 + + // PR_GET_THP_DISABLE gets the state of the "THP disable" flag for the + // calling thread. + PR_GET_THP_DISABLE = 42 + + // PR_MPX_ENABLE_MANAGEMENT enables kernel management of Memory + // Protection eXtensions (MPX) bounds tables. + PR_MPX_ENABLE_MANAGEMENT = 43 + + // PR_MPX_DISABLE_MANAGEMENT disables kernel management of Memory + // Protection eXtensions (MPX) bounds tables. + PR_MPX_DISABLE_MANAGEMENT = 44 +) + +// From <asm/prctl.h> +// Flags are used in syscall arch_prctl(2). +const ( + ARCH_SET_GS = 0x1001 + ARCH_SET_FS = 0x1002 + ARCH_GET_FS = 0x1003 + ARCH_GET_GS = 0x1004 + ARCH_SET_CPUID = 0x1012 +) + +// Flags for prctl(PR_SET_DUMPABLE), defined in include/linux/sched/coredump.h. +const ( + SUID_DUMP_DISABLE = 0 + SUID_DUMP_USER = 1 + SUID_DUMP_ROOT = 2 +) diff --git a/pkg/abi/linux/ptrace.go b/pkg/abi/linux/ptrace.go new file mode 100644 index 000000000..23e605ab2 --- /dev/null +++ b/pkg/abi/linux/ptrace.go @@ -0,0 +1,89 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// ptrace commands from include/uapi/linux/ptrace.h. +const ( + PTRACE_TRACEME = 0 + PTRACE_PEEKTEXT = 1 + PTRACE_PEEKDATA = 2 + PTRACE_PEEKUSR = 3 + PTRACE_POKETEXT = 4 + PTRACE_POKEDATA = 5 + PTRACE_POKEUSR = 6 + PTRACE_CONT = 7 + PTRACE_KILL = 8 + PTRACE_SINGLESTEP = 9 + PTRACE_ATTACH = 16 + PTRACE_DETACH = 17 + PTRACE_SYSCALL = 24 + PTRACE_SETOPTIONS = 0x4200 + PTRACE_GETEVENTMSG = 0x4201 + PTRACE_GETSIGINFO = 0x4202 + PTRACE_SETSIGINFO = 0x4203 + PTRACE_GETREGSET = 0x4204 + PTRACE_SETREGSET = 0x4205 + PTRACE_SEIZE = 0x4206 + PTRACE_INTERRUPT = 0x4207 + PTRACE_LISTEN = 0x4208 + PTRACE_PEEKSIGINFO = 0x4209 + PTRACE_GETSIGMASK = 0x420a + PTRACE_SETSIGMASK = 0x420b + PTRACE_SECCOMP_GET_FILTER = 0x420c + PTRACE_SECCOMP_GET_METADATA = 0x420d +) + +// ptrace commands from arch/x86/include/uapi/asm/ptrace-abi.h. +const ( + PTRACE_GETREGS = 12 + PTRACE_SETREGS = 13 + PTRACE_GETFPREGS = 14 + PTRACE_SETFPREGS = 15 + PTRACE_GETFPXREGS = 18 + PTRACE_SETFPXREGS = 19 + PTRACE_OLDSETOPTIONS = 21 + PTRACE_GET_THREAD_AREA = 25 + PTRACE_SET_THREAD_AREA = 26 + PTRACE_ARCH_PRCTL = 30 + PTRACE_SYSEMU = 31 + PTRACE_SYSEMU_SINGLESTEP = 32 + PTRACE_SINGLEBLOCK = 33 +) + +// ptrace event codes from include/uapi/linux/ptrace.h. +const ( + PTRACE_EVENT_FORK = 1 + PTRACE_EVENT_VFORK = 2 + PTRACE_EVENT_CLONE = 3 + PTRACE_EVENT_EXEC = 4 + PTRACE_EVENT_VFORK_DONE = 5 + PTRACE_EVENT_EXIT = 6 + PTRACE_EVENT_SECCOMP = 7 + PTRACE_EVENT_STOP = 128 +) + +// PTRACE_SETOPTIONS options from include/uapi/linux/ptrace.h. +const ( + PTRACE_O_TRACESYSGOOD = 1 + PTRACE_O_TRACEFORK = 1 << PTRACE_EVENT_FORK + PTRACE_O_TRACEVFORK = 1 << PTRACE_EVENT_VFORK + PTRACE_O_TRACECLONE = 1 << PTRACE_EVENT_CLONE + PTRACE_O_TRACEEXEC = 1 << PTRACE_EVENT_EXEC + PTRACE_O_TRACEVFORKDONE = 1 << PTRACE_EVENT_VFORK_DONE + PTRACE_O_TRACEEXIT = 1 << PTRACE_EVENT_EXIT + PTRACE_O_TRACESECCOMP = 1 << PTRACE_EVENT_SECCOMP + PTRACE_O_EXITKILL = 1 << 20 + PTRACE_O_SUSPEND_SECCOMP = 1 << 21 +) diff --git a/pkg/abi/linux/ptrace_amd64.go b/pkg/abi/linux/ptrace_amd64.go new file mode 100644 index 000000000..ed3881e27 --- /dev/null +++ b/pkg/abi/linux/ptrace_amd64.go @@ -0,0 +1,52 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +package linux + +// PtraceRegs is the set of CPU registers exposed by ptrace. Source: +// syscall.PtraceRegs. +// +// +marshal +// +stateify savable +type PtraceRegs struct { + R15 uint64 + R14 uint64 + R13 uint64 + R12 uint64 + Rbp uint64 + Rbx uint64 + R11 uint64 + R10 uint64 + R9 uint64 + R8 uint64 + Rax uint64 + Rcx uint64 + Rdx uint64 + Rsi uint64 + Rdi uint64 + Orig_rax uint64 + Rip uint64 + Cs uint64 + Eflags uint64 + Rsp uint64 + Ss uint64 + Fs_base uint64 + Gs_base uint64 + Ds uint64 + Es uint64 + Fs uint64 + Gs uint64 +} diff --git a/pkg/abi/linux/ptrace_arm64.go b/pkg/abi/linux/ptrace_arm64.go new file mode 100644 index 000000000..6147738b3 --- /dev/null +++ b/pkg/abi/linux/ptrace_arm64.go @@ -0,0 +1,29 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package linux + +// PtraceRegs is the set of CPU registers exposed by ptrace. Source: +// syscall.PtraceRegs. +// +// +marshal +// +stateify savable +type PtraceRegs struct { + Regs [31]uint64 + Sp uint64 + Pc uint64 + Pstate uint64 +} diff --git a/pkg/abi/linux/rseq.go b/pkg/abi/linux/rseq.go new file mode 100644 index 000000000..76253ba30 --- /dev/null +++ b/pkg/abi/linux/rseq.go @@ -0,0 +1,130 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Flags passed to rseq(2). +// +// Defined in include/uapi/linux/rseq.h. +const ( + // RSEQ_FLAG_UNREGISTER unregisters the current thread. + RSEQ_FLAG_UNREGISTER = 1 << 0 +) + +// Critical section flags used in RSeqCriticalSection.Flags and RSeq.Flags. +// +// Defined in include/uapi/linux/rseq.h. +const ( + // RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT inhibits restart on preemption. + RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = 1 << 0 + + // RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL inhibits restart on signal + // delivery. + RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = 1 << 1 + + // RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE inhibits restart on CPU + // migration. + RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = 1 << 2 +) + +// RSeqCriticalSection describes a restartable sequences critical section. It +// is equivalent to struct rseq_cs, defined in include/uapi/linux/rseq.h. +// +// In userspace, this structure is always aligned to 32 bytes. +// +// +marshal +type RSeqCriticalSection struct { + // Version is the version of this structure. Version 0 is defined here. + Version uint32 + + // Flags are the critical section flags, defined above. + Flags uint32 + + // Start is the start address of the critical section. + Start uint64 + + // PostCommitOffset is the offset from Start of the first instruction + // outside of the critical section. + PostCommitOffset uint64 + + // Abort is the abort address. It must be outside the critical section, + // and the 4 bytes prior must match the abort signature. + Abort uint64 +} + +const ( + // SizeOfRSeqCriticalSection is the size of RSeqCriticalSection. + SizeOfRSeqCriticalSection = 32 + + // SizeOfRSeqSignature is the size of the signature immediately + // preceding RSeqCriticalSection.Abort. + SizeOfRSeqSignature = 4 +) + +// Special values for RSeq.CPUID, defined in include/uapi/linux/rseq.h. +const ( + // RSEQ_CPU_ID_UNINITIALIZED indicates that this thread has not + // performed rseq initialization. + RSEQ_CPU_ID_UNINITIALIZED = ^uint32(0) // -1 + + // RSEQ_CPU_ID_REGISTRATION_FAILED indicates that rseq initialization + // failed. + RSEQ_CPU_ID_REGISTRATION_FAILED = ^uint32(1) // -2 +) + +// RSeq is the thread-local restartable sequences config/status. It +// is equivalent to struct rseq, defined in include/uapi/linux/rseq.h. +// +// In userspace, this structure is always aligned to 32 bytes. +type RSeq struct { + // CPUIDStart contains the current CPU ID if rseq is initialized. + // + // This field should only be read by the thread which registered this + // structure, and must be read atomically. + CPUIDStart uint32 + + // CPUID contains the current CPU ID or one of the CPU ID special + // values defined above. + // + // This field should only be read by the thread which registered this + // structure, and must be read atomically. + CPUID uint32 + + // RSeqCriticalSection is a pointer to the current RSeqCriticalSection + // block, or NULL. It is reset to NULL by the kernel on restart or + // non-restarting preempt/signal. + // + // This field should only be written by the thread which registered + // this structure, and must be written atomically. + RSeqCriticalSection uint64 + + // Flags are the critical section flags that apply to all critical + // sections on this thread, defined above. + Flags uint32 +} + +const ( + // SizeOfRSeq is the size of RSeq. + // + // Note that RSeq is naively 24 bytes. However, it has 32-byte + // alignment, which in C increases sizeof to 32. That is the size that + // the Linux kernel uses. + SizeOfRSeq = 32 + + // AlignOfRSeq is the standard alignment of RSeq. + AlignOfRSeq = 32 + + // OffsetOfRSeqCriticalSection is the offset of RSeqCriticalSection in RSeq. + OffsetOfRSeqCriticalSection = 8 +) diff --git a/pkg/abi/linux/rusage.go b/pkg/abi/linux/rusage.go new file mode 100644 index 000000000..d8302dc85 --- /dev/null +++ b/pkg/abi/linux/rusage.go @@ -0,0 +1,46 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Flags that may be used with wait4(2) and getrusage(2). +const ( + // wait4(2) uses this to aggregate RUSAGE_SELF and RUSAGE_CHILDREN. + RUSAGE_BOTH = -0x2 + + // getrusage(2) flags. + RUSAGE_CHILDREN = -0x1 + RUSAGE_SELF = 0x0 + RUSAGE_THREAD = 0x1 +) + +// Rusage represents the Linux struct rusage. +type Rusage struct { + UTime Timeval + STime Timeval + MaxRSS int64 + IXRSS int64 + IDRSS int64 + ISRSS int64 + MinFlt int64 + MajFlt int64 + NSwap int64 + InBlock int64 + OuBlock int64 + MsgSnd int64 + MsgRcv int64 + NSignals int64 + NVCSw int64 + NIvCSw int64 +} diff --git a/pkg/abi/linux/sched.go b/pkg/abi/linux/sched.go new file mode 100644 index 000000000..70e820823 --- /dev/null +++ b/pkg/abi/linux/sched.go @@ -0,0 +1,36 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Scheduling policies, exposed by sched_getscheduler(2)/sched_setscheduler(2). +const ( + SCHED_NORMAL = 0 + SCHED_FIFO = 1 + SCHED_RR = 2 + SCHED_BATCH = 3 + SCHED_IDLE = 5 + SCHED_DEADLINE = 6 + SCHED_MICROQ = 16 + + // SCHED_RESET_ON_FORK is a flag that indicates that the process is + // reverted back to SCHED_NORMAL on fork. + SCHED_RESET_ON_FORK = 0x40000000 +) + +const ( + PRIO_PGRP = 0x1 + PRIO_PROCESS = 0x0 + PRIO_USER = 0x2 +) diff --git a/pkg/abi/linux/seccomp.go b/pkg/abi/linux/seccomp.go new file mode 100644 index 000000000..d0607e256 --- /dev/null +++ b/pkg/abi/linux/seccomp.go @@ -0,0 +1,72 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import "fmt" + +// Seccomp constants taken from <linux/seccomp.h>. +const ( + SECCOMP_MODE_NONE = 0 + SECCOMP_MODE_FILTER = 2 + + SECCOMP_RET_ACTION_FULL = 0xffff0000 + SECCOMP_RET_ACTION = 0x7fff0000 + SECCOMP_RET_DATA = 0x0000ffff + + SECCOMP_SET_MODE_FILTER = 1 + SECCOMP_FILTER_FLAG_TSYNC = 1 + SECCOMP_GET_ACTION_AVAIL = 2 +) + +type BPFAction uint32 + +const ( + SECCOMP_RET_KILL_PROCESS BPFAction = 0x80000000 + SECCOMP_RET_KILL_THREAD = 0x00000000 + SECCOMP_RET_TRAP = 0x00030000 + SECCOMP_RET_ERRNO = 0x00050000 + SECCOMP_RET_TRACE = 0x7ff00000 + SECCOMP_RET_ALLOW = 0x7fff0000 +) + +func (a BPFAction) String() string { + switch a & SECCOMP_RET_ACTION_FULL { + case SECCOMP_RET_KILL_PROCESS: + return "kill process" + case SECCOMP_RET_KILL_THREAD: + return "kill thread" + case SECCOMP_RET_TRAP: + return fmt.Sprintf("trap (%d)", a.Data()) + case SECCOMP_RET_ERRNO: + return fmt.Sprintf("errno (%d)", a.Data()) + case SECCOMP_RET_TRACE: + return fmt.Sprintf("trace (%d)", a.Data()) + case SECCOMP_RET_ALLOW: + return "allow" + } + return fmt.Sprintf("invalid action: %#x", a) +} + +// Data returns the SECCOMP_RET_DATA portion of the action. +func (a BPFAction) Data() uint16 { + return uint16(a & SECCOMP_RET_DATA) +} + +// SockFprog is sock_fprog taken from <linux/filter.h>. +type SockFprog struct { + Len uint16 + pad [6]byte + Filter *BPFInstruction +} diff --git a/pkg/abi/linux/sem.go b/pkg/abi/linux/sem.go new file mode 100644 index 000000000..de422c519 --- /dev/null +++ b/pkg/abi/linux/sem.go @@ -0,0 +1,52 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// semctl Command Definitions. Source: include/uapi/linux/sem.h +const ( + GETPID = 11 + GETVAL = 12 + GETALL = 13 + GETNCNT = 14 + GETZCNT = 15 + SETVAL = 16 + SETALL = 17 +) + +// ipcs ctl cmds. Source: include/uapi/linux/sem.h +const ( + SEM_STAT = 18 + SEM_INFO = 19 + SEM_STAT_ANY = 20 +) + +const SEM_UNDO = 0x1000 + +// SemidDS is equivalent to struct semid64_ds. +type SemidDS struct { + SemPerm IPCPerm + SemOTime TimeT + SemCTime TimeT + SemNSems uint64 + unused3 uint64 + unused4 uint64 +} + +// Sembuf is equivalent to struct sembuf. +type Sembuf struct { + SemNum uint16 + SemOp int16 + SemFlg int16 +} diff --git a/pkg/abi/linux/shm.go b/pkg/abi/linux/shm.go new file mode 100644 index 000000000..e45aadb10 --- /dev/null +++ b/pkg/abi/linux/shm.go @@ -0,0 +1,86 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import "math" + +// shmat(2) flags. Source: include/uapi/linux/shm.h +const ( + SHM_RDONLY = 010000 // Read-only access. + SHM_RND = 020000 // Round attach address to SHMLBA boundary. + SHM_REMAP = 040000 // Take-over region on attach. + SHM_EXEC = 0100000 // Execution access. +) + +// IPCPerm.Mode upper byte flags. Source: include/linux/shm.h +const ( + SHM_DEST = 01000 // Segment will be destroyed on last detach. + SHM_LOCKED = 02000 // Segment will not be swapped. + SHM_HUGETLB = 04000 // Segment will use huge TLB pages. + SHM_NORESERVE = 010000 // Don't check for reservations. +) + +// Additional Linux-only flags for shmctl(2). Source: include/uapi/linux/shm.h +const ( + SHM_LOCK = 11 + SHM_UNLOCK = 12 + SHM_STAT = 13 + SHM_INFO = 14 +) + +// SHM defaults as specified by linux. Source: include/uapi/linux/shm.h +const ( + SHMMIN = 1 + SHMMNI = 4096 + SHMMAX = math.MaxUint64 - 1<<24 + SHMALL = math.MaxUint64 - 1<<24 + SHMSEG = 4096 +) + +// ShmidDS is equivalent to struct shmid64_ds. Source: +// include/uapi/asm-generic/shmbuf.h +type ShmidDS struct { + ShmPerm IPCPerm + ShmSegsz uint64 + ShmAtime TimeT + ShmDtime TimeT + ShmCtime TimeT + ShmCpid int32 + ShmLpid int32 + ShmNattach uint64 + + Unused4 uint64 + Unused5 uint64 +} + +// ShmParams is equivalent to struct shminfo. Source: include/uapi/linux/shm.h +type ShmParams struct { + ShmMax uint64 + ShmMin uint64 + ShmMni uint64 + ShmSeg uint64 + ShmAll uint64 +} + +// ShmInfo is equivalent to struct shm_info. Source: include/uapi/linux/shm.h +type ShmInfo struct { + UsedIDs int32 // Number of currently existing segments. + _ [4]byte + ShmTot uint64 // Total number of shared memory pages. + ShmRss uint64 // Number of resident shared memory pages. + ShmSwp uint64 // Number of swapped shared memory pages. + SwapAttempts uint64 // Unused since Linux 2.4. + SwapSuccesses uint64 // Unused since Linux 2.4. +} diff --git a/pkg/abi/linux/signal.go b/pkg/abi/linux/signal.go new file mode 100644 index 000000000..1c330e763 --- /dev/null +++ b/pkg/abi/linux/signal.go @@ -0,0 +1,234 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "gvisor.dev/gvisor/pkg/bits" +) + +const ( + // SignalMaximum is the highest valid signal number. + SignalMaximum = 64 + + // FirstStdSignal is the lowest standard signal number. + FirstStdSignal = 1 + + // LastStdSignal is the highest standard signal number. + LastStdSignal = 31 + + // FirstRTSignal is the lowest real-time signal number. + // + // 32 (SIGCANCEL) and 33 (SIGSETXID) are used internally by glibc. + FirstRTSignal = 32 + + // LastRTSignal is the highest real-time signal number. + LastRTSignal = 64 + + // NumStdSignals is the number of standard signals. + NumStdSignals = LastStdSignal - FirstStdSignal + 1 + + // NumRTSignals is the number of realtime signals. + NumRTSignals = LastRTSignal - FirstRTSignal + 1 +) + +// Signal is a signal number. +type Signal int + +// IsValid returns true if s is a valid standard or realtime signal. (0 is not +// considered valid; interfaces special-casing signal number 0 should check for +// 0 first before asserting validity.) +func (s Signal) IsValid() bool { + return s > 0 && s <= SignalMaximum +} + +// IsStandard returns true if s is a standard signal. +// +// Preconditions: s.IsValid(). +func (s Signal) IsStandard() bool { + return s <= LastStdSignal +} + +// IsRealtime returns true if s is a realtime signal. +// +// Preconditions: s.IsValid(). +func (s Signal) IsRealtime() bool { + return s >= FirstRTSignal +} + +// Index returns the index for signal s into arrays of both standard and +// realtime signals (e.g. signal masks). +// +// Preconditions: s.IsValid(). +func (s Signal) Index() int { + return int(s - 1) +} + +// Signals. +const ( + SIGABRT = Signal(6) + SIGALRM = Signal(14) + SIGBUS = Signal(7) + SIGCHLD = Signal(17) + SIGCLD = Signal(17) + SIGCONT = Signal(18) + SIGFPE = Signal(8) + SIGHUP = Signal(1) + SIGILL = Signal(4) + SIGINT = Signal(2) + SIGIO = Signal(29) + SIGIOT = Signal(6) + SIGKILL = Signal(9) + SIGPIPE = Signal(13) + SIGPOLL = Signal(29) + SIGPROF = Signal(27) + SIGPWR = Signal(30) + SIGQUIT = Signal(3) + SIGSEGV = Signal(11) + SIGSTKFLT = Signal(16) + SIGSTOP = Signal(19) + SIGSYS = Signal(31) + SIGTERM = Signal(15) + SIGTRAP = Signal(5) + SIGTSTP = Signal(20) + SIGTTIN = Signal(21) + SIGTTOU = Signal(22) + SIGUNUSED = Signal(31) + SIGURG = Signal(23) + SIGUSR1 = Signal(10) + SIGUSR2 = Signal(12) + SIGVTALRM = Signal(26) + SIGWINCH = Signal(28) + SIGXCPU = Signal(24) + SIGXFSZ = Signal(25) +) + +// SignalSet is a signal mask with a bit corresponding to each signal. +// +// +marshal +type SignalSet uint64 + +// SignalSetSize is the size in bytes of a SignalSet. +const SignalSetSize = 8 + +// MakeSignalSet returns SignalSet with the bit corresponding to each of the +// given signals set. +func MakeSignalSet(sigs ...Signal) SignalSet { + indices := make([]int, len(sigs)) + for i, sig := range sigs { + indices[i] = sig.Index() + } + return SignalSet(bits.Mask64(indices...)) +} + +// SignalSetOf returns a SignalSet with a single signal set. +func SignalSetOf(sig Signal) SignalSet { + return SignalSet(bits.MaskOf64(sig.Index())) +} + +// ForEachSignal invokes f for each signal set in the given mask. +func ForEachSignal(mask SignalSet, f func(sig Signal)) { + bits.ForEachSetBit64(uint64(mask), func(i int) { + f(Signal(i + 1)) + }) +} + +// 'how' values for rt_sigprocmask(2). +const ( + // SIG_BLOCK blocks the signals in the set. + SIG_BLOCK = 0 + + // SIG_UNBLOCK blocks the signals in the set. + SIG_UNBLOCK = 1 + + // SIG_SETMASK sets the signal mask to set. + SIG_SETMASK = 2 +) + +// Signal actions for rt_sigaction(2), from uapi/asm-generic/signal-defs.h. +const ( + // SIG_DFL performs the default action. + SIG_DFL = 0 + + // SIG_IGN ignores the signal. + SIG_IGN = 1 +) + +// Signal action flags for rt_sigaction(2), from uapi/asm-generic/signal.h +const ( + SA_NOCLDSTOP = 0x00000001 + SA_NOCLDWAIT = 0x00000002 + SA_SIGINFO = 0x00000004 + SA_RESTORER = 0x04000000 + SA_ONSTACK = 0x08000000 + SA_RESTART = 0x10000000 + SA_NODEFER = 0x40000000 + SA_RESETHAND = 0x80000000 + SA_NOMASK = SA_NODEFER + SA_ONESHOT = SA_RESETHAND +) + +// Signal info types. +const ( + SI_MASK = 0xffff0000 + SI_KILL = 0 << 16 + SI_TIMER = 1 << 16 + SI_POLL = 2 << 16 + SI_FAULT = 3 << 16 + SI_CHLD = 4 << 16 + SI_RT = 5 << 16 + SI_MESGQ = 6 << 16 + SI_SYS = 7 << 16 +) + +// SIGPOLL si_codes. +const ( + // POLL_IN indicates that data input available. + POLL_IN = SI_POLL | 1 + + // POLL_OUT indicates that output buffers available. + POLL_OUT = SI_POLL | 2 + + // POLL_MSG indicates that an input message available. + POLL_MSG = SI_POLL | 3 + + // POLL_ERR indicates that there was an i/o error. + POLL_ERR = SI_POLL | 4 + + // POLL_PRI indicates that a high priority input available. + POLL_PRI = SI_POLL | 5 + + // POLL_HUP indicates that a device disconnected. + POLL_HUP = SI_POLL | 6 +) + +// Sigevent represents struct sigevent. +type Sigevent struct { + Value uint64 // union sigval {int, void*} + Signo int32 + Notify int32 + + // struct sigevent here contains 48-byte union _sigev_un. However, only + // member _tid is significant to the kernel. + Tid int32 + UnRemainder [44]byte +} + +// Possible values for Sigevent.Notify, aka struct sigevent::sigev_notify. +const ( + SIGEV_SIGNAL = 0 + SIGEV_NONE = 1 + SIGEV_THREAD = 2 + SIGEV_THREAD_ID = 4 +) diff --git a/pkg/abi/linux/signalfd.go b/pkg/abi/linux/signalfd.go new file mode 100644 index 000000000..85fad9956 --- /dev/null +++ b/pkg/abi/linux/signalfd.go @@ -0,0 +1,45 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +const ( + // SFD_NONBLOCK is a signalfd(2) flag. + SFD_NONBLOCK = 00004000 + + // SFD_CLOEXEC is a signalfd(2) flag. + SFD_CLOEXEC = 02000000 +) + +// SignalfdSiginfo is the siginfo encoding for signalfds. +type SignalfdSiginfo struct { + Signo uint32 + Errno int32 + Code int32 + PID uint32 + UID uint32 + FD int32 + TID uint32 + Band uint32 + Overrun uint32 + TrapNo uint32 + Status int32 + Int int32 + Ptr uint64 + UTime uint64 + STime uint64 + Addr uint64 + AddrLSB uint16 + _ [48]uint8 +} diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go new file mode 100644 index 000000000..4a14ef691 --- /dev/null +++ b/pkg/abi/linux/socket.go @@ -0,0 +1,456 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import "gvisor.dev/gvisor/pkg/binary" + +// Address families, from linux/socket.h. +const ( + AF_UNSPEC = 0 + AF_UNIX = 1 + AF_INET = 2 + AF_AX25 = 3 + AF_IPX = 4 + AF_APPLETALK = 5 + AF_NETROM = 6 + AF_BRIDGE = 7 + AF_ATMPVC = 8 + AF_X25 = 9 + AF_INET6 = 10 + AF_ROSE = 11 + AF_DECnet = 12 + AF_NETBEUI = 13 + AF_SECURITY = 14 + AF_KEY = 15 + AF_NETLINK = 16 + AF_PACKET = 17 + AF_ASH = 18 + AF_ECONET = 19 + AF_ATMSVC = 20 + AF_RDS = 21 + AF_SNA = 22 + AF_IRDA = 23 + AF_PPPOX = 24 + AF_WANPIPE = 25 + AF_LLC = 26 + AF_IB = 27 + AF_MPLS = 28 + AF_CAN = 29 + AF_TIPC = 30 + AF_BLUETOOTH = 31 + AF_IUCV = 32 + AF_RXRPC = 33 + AF_ISDN = 34 + AF_PHONET = 35 + AF_IEEE802154 = 36 + AF_CAIF = 37 + AF_ALG = 38 + AF_NFC = 39 + AF_VSOCK = 40 +) + +// sendmsg(2)/recvmsg(2) flags, from linux/socket.h. +const ( + MSG_OOB = 0x1 + MSG_PEEK = 0x2 + MSG_DONTROUTE = 0x4 + MSG_TRYHARD = 0x4 + MSG_CTRUNC = 0x8 + MSG_PROBE = 0x10 + MSG_TRUNC = 0x20 + MSG_DONTWAIT = 0x40 + MSG_EOR = 0x80 + MSG_WAITALL = 0x100 + MSG_FIN = 0x200 + MSG_EOF = MSG_FIN + MSG_SYN = 0x400 + MSG_CONFIRM = 0x800 + MSG_RST = 0x1000 + MSG_ERRQUEUE = 0x2000 + MSG_NOSIGNAL = 0x4000 + MSG_MORE = 0x8000 + MSG_WAITFORONE = 0x10000 + MSG_SENDPAGE_NOTLAST = 0x20000 + MSG_REINJECT = 0x8000000 + MSG_ZEROCOPY = 0x4000000 + MSG_FASTOPEN = 0x20000000 + MSG_CMSG_CLOEXEC = 0x40000000 +) + +// Set/get socket option levels, from socket.h. +const ( + SOL_IP = 0 + SOL_SOCKET = 1 + SOL_TCP = 6 + SOL_UDP = 17 + SOL_IPV6 = 41 + SOL_ICMPV6 = 58 + SOL_RAW = 255 + SOL_PACKET = 263 + SOL_NETLINK = 270 +) + +// A SockType is a type (as opposed to family) of sockets. These are enumerated +// below as SOCK_* constants. +type SockType int + +// Socket types, from linux/net.h. +const ( + SOCK_STREAM SockType = 1 + SOCK_DGRAM = 2 + SOCK_RAW = 3 + SOCK_RDM = 4 + SOCK_SEQPACKET = 5 + SOCK_DCCP = 6 + SOCK_PACKET = 10 +) + +// SOCK_TYPE_MASK covers all of the above socket types. The remaining bits are +// flags. From linux/net.h. +const SOCK_TYPE_MASK = 0xf + +// socket(2)/socketpair(2)/accept4(2) flags, from linux/net.h. +const ( + SOCK_CLOEXEC = O_CLOEXEC + SOCK_NONBLOCK = O_NONBLOCK +) + +// shutdown(2) how commands, from <linux/net.h>. +const ( + SHUT_RD = 0 + SHUT_WR = 1 + SHUT_RDWR = 2 +) + +// Socket options from socket.h. +const ( + SO_DEBUG = 1 + SO_REUSEADDR = 2 + SO_TYPE = 3 + SO_ERROR = 4 + SO_DONTROUTE = 5 + SO_BROADCAST = 6 + SO_SNDBUF = 7 + SO_RCVBUF = 8 + SO_KEEPALIVE = 9 + SO_OOBINLINE = 10 + SO_NO_CHECK = 11 + SO_PRIORITY = 12 + SO_LINGER = 13 + SO_BSDCOMPAT = 14 + SO_REUSEPORT = 15 + SO_PASSCRED = 16 + SO_PEERCRED = 17 + SO_RCVLOWAT = 18 + SO_SNDLOWAT = 19 + SO_RCVTIMEO = 20 + SO_SNDTIMEO = 21 + SO_BINDTODEVICE = 25 + SO_ATTACH_FILTER = 26 + SO_DETACH_FILTER = 27 + SO_GET_FILTER = SO_ATTACH_FILTER + SO_PEERNAME = 28 + SO_TIMESTAMP = 29 + SO_ACCEPTCONN = 30 + SO_PEERSEC = 31 + SO_SNDBUFFORCE = 32 + SO_RCVBUFFORCE = 33 + SO_PASSSEC = 34 + SO_TIMESTAMPNS = 35 + SO_MARK = 36 + SO_TIMESTAMPING = 37 + SO_PROTOCOL = 38 + SO_DOMAIN = 39 + SO_RXQ_OVFL = 40 + SO_WIFI_STATUS = 41 + SO_PEEK_OFF = 42 + SO_NOFCS = 43 + SO_LOCK_FILTER = 44 + SO_SELECT_ERR_QUEUE = 45 + SO_BUSY_POLL = 46 + SO_MAX_PACING_RATE = 47 + SO_BPF_EXTENSIONS = 48 + SO_INCOMING_CPU = 49 + SO_ATTACH_BPF = 50 + SO_ATTACH_REUSEPORT_CBPF = 51 + SO_ATTACH_REUSEPORT_EBPF = 52 + SO_CNX_ADVICE = 53 + SO_MEMINFO = 55 + SO_INCOMING_NAPI_ID = 56 + SO_COOKIE = 57 + SO_PEERGROUPS = 59 + SO_ZEROCOPY = 60 + SO_TXTIME = 61 +) + +// enum socket_state, from uapi/linux/net.h. +const ( + SS_FREE = 0 // Not allocated. + SS_UNCONNECTED = 1 // Unconnected to any socket. + SS_CONNECTING = 2 // In process of connecting. + SS_CONNECTED = 3 // Connected to socket. + SS_DISCONNECTING = 4 // In process of disconnecting. +) + +// TCP protocol states, from include/net/tcp_states.h. +const ( + TCP_ESTABLISHED uint32 = iota + 1 + TCP_SYN_SENT + TCP_SYN_RECV + TCP_FIN_WAIT1 + TCP_FIN_WAIT2 + TCP_TIME_WAIT + TCP_CLOSE + TCP_CLOSE_WAIT + TCP_LAST_ACK + TCP_LISTEN + TCP_CLOSING + TCP_NEW_SYN_RECV +) + +// SockAddrMax is the maximum size of a struct sockaddr, from +// uapi/linux/socket.h. +const SockAddrMax = 128 + +// InetAddr is struct in_addr, from uapi/linux/in.h. +type InetAddr [4]byte + +// SockAddrInet is struct sockaddr_in, from uapi/linux/in.h. +type SockAddrInet struct { + Family uint16 + Port uint16 + Addr InetAddr + Zero [8]uint8 // pad to sizeof(struct sockaddr). +} + +// InetMulticastRequest is struct ip_mreq, from uapi/linux/in.h. +type InetMulticastRequest struct { + MulticastAddr InetAddr + InterfaceAddr InetAddr +} + +// InetMulticastRequestWithNIC is struct ip_mreqn, from uapi/linux/in.h. +type InetMulticastRequestWithNIC struct { + InetMulticastRequest + InterfaceIndex int32 +} + +// SockAddrInet6 is struct sockaddr_in6, from uapi/linux/in6.h. +type SockAddrInet6 struct { + Family uint16 + Port uint16 + Flowinfo uint32 + Addr [16]byte + Scope_id uint32 +} + +// SockAddrLink is a struct sockaddr_ll, from uapi/linux/if_packet.h. +type SockAddrLink struct { + Family uint16 + Protocol uint16 + InterfaceIndex int32 + ARPHardwareType uint16 + PacketType byte + HardwareAddrLen byte + HardwareAddr [8]byte +} + +// UnixPathMax is the maximum length of the path in an AF_UNIX socket. +// +// From uapi/linux/un.h. +const UnixPathMax = 108 + +// SockAddrUnix is struct sockaddr_un, from uapi/linux/un.h. +type SockAddrUnix struct { + Family uint16 + Path [UnixPathMax]int8 +} + +// SockAddr represents a union of valid socket address types. This is logically +// equivalent to struct sockaddr. SockAddr ensures that a well-defined set of +// types can be used as socket addresses. +type SockAddr interface { + // implementsSockAddr exists purely to allow a type to indicate that they + // implement this interface. This method is a no-op and shouldn't be called. + implementsSockAddr() +} + +func (s *SockAddrInet) implementsSockAddr() {} +func (s *SockAddrInet6) implementsSockAddr() {} +func (s *SockAddrLink) implementsSockAddr() {} +func (s *SockAddrUnix) implementsSockAddr() {} +func (s *SockAddrNetlink) implementsSockAddr() {} + +// Linger is struct linger, from include/linux/socket.h. +type Linger struct { + OnOff int32 + Linger int32 +} + +// SizeOfLinger is the binary size of a Linger struct. +const SizeOfLinger = 8 + +// TCPInfo is a collection of TCP statistics. +// +// From uapi/linux/tcp.h. Newer versions of Linux continue to add new fields to +// the end of this struct or within existing unusued space, so its size grows +// over time. The current iteration is based on linux v4.17. New versions are +// always backwards compatible. +type TCPInfo struct { + State uint8 + CaState uint8 + Retransmits uint8 + Probes uint8 + Backoff uint8 + Options uint8 + // WindowScale is the combination of snd_wscale (first 4 bits) and rcv_wscale (second 4 bits) + WindowScale uint8 + // DeliveryRateAppLimited is a boolean and only the first bit is meaningful. + DeliveryRateAppLimited uint8 + + RTO uint32 + ATO uint32 + SndMss uint32 + RcvMss uint32 + + Unacked uint32 + Sacked uint32 + Lost uint32 + Retrans uint32 + Fackets uint32 + + // Times. + LastDataSent uint32 + LastAckSent uint32 + LastDataRecv uint32 + LastAckRecv uint32 + + // Metrics. + PMTU uint32 + RcvSsthresh uint32 + RTT uint32 + RTTVar uint32 + SndSsthresh uint32 + SndCwnd uint32 + Advmss uint32 + Reordering uint32 + + RcvRTT uint32 + RcvSpace uint32 + + TotalRetrans uint32 + + PacingRate uint64 + MaxPacingRate uint64 + // BytesAcked is RFC4898 tcpEStatsAppHCThruOctetsAcked. + BytesAcked uint64 + // BytesReceived is RFC4898 tcpEStatsAppHCThruOctetsReceived. + BytesReceived uint64 + // SegsOut is RFC4898 tcpEStatsPerfSegsOut. + SegsOut uint32 + // SegsIn is RFC4898 tcpEStatsPerfSegsIn. + SegsIn uint32 + + NotSentBytes uint32 + MinRTT uint32 + // DataSegsIn is RFC4898 tcpEStatsDataSegsIn. + DataSegsIn uint32 + // DataSegsOut is RFC4898 tcpEStatsDataSegsOut. + DataSegsOut uint32 + + DeliveryRate uint64 + + // BusyTime is the time in microseconds busy sending data. + BusyTime uint64 + // RwndLimited is the time in microseconds limited by receive window. + RwndLimited uint64 + // SndBufLimited is the time in microseconds limited by send buffer. + SndBufLimited uint64 +} + +// SizeOfTCPInfo is the binary size of a TCPInfo struct. +var SizeOfTCPInfo = int(binary.Size(TCPInfo{})) + +// Control message types, from linux/socket.h. +const ( + SCM_CREDENTIALS = 0x2 + SCM_RIGHTS = 0x1 +) + +// A ControlMessageHeader is the header for a socket control message. +// +// ControlMessageHeader represents struct cmsghdr from linux/socket.h. +type ControlMessageHeader struct { + Length uint64 + Level int32 + Type int32 +} + +// SizeOfControlMessageHeader is the binary size of a ControlMessageHeader +// struct. +var SizeOfControlMessageHeader = int(binary.Size(ControlMessageHeader{})) + +// A ControlMessageCredentials is an SCM_CREDENTIALS socket control message. +// +// ControlMessageCredentials represents struct ucred from linux/socket.h. +type ControlMessageCredentials struct { + PID int32 + UID uint32 + GID uint32 +} + +// A ControlMessageIPPacketInfo is IP_PKTINFO socket control message. +// +// ControlMessageIPPacketInfo represents struct in_pktinfo from linux/in.h. +type ControlMessageIPPacketInfo struct { + NIC int32 + LocalAddr InetAddr + DestinationAddr InetAddr +} + +// SizeOfControlMessageCredentials is the binary size of a +// ControlMessageCredentials struct. +var SizeOfControlMessageCredentials = int(binary.Size(ControlMessageCredentials{})) + +// A ControlMessageRights is an SCM_RIGHTS socket control message. +type ControlMessageRights []int32 + +// SizeOfControlMessageRight is the size of a single element in +// ControlMessageRights. +const SizeOfControlMessageRight = 4 + +// SizeOfControlMessageInq is the size of a TCP_INQ control message. +const SizeOfControlMessageInq = 4 + +// SizeOfControlMessageTOS is the size of an IP_TOS control message. +const SizeOfControlMessageTOS = 1 + +// SizeOfControlMessageTClass is the size of an IPV6_TCLASS control message. +const SizeOfControlMessageTClass = 4 + +// SizeOfControlMessageIPPacketInfo is the size of an IP_PKTINFO +// control message. +const SizeOfControlMessageIPPacketInfo = 12 + +// SCM_MAX_FD is the maximum number of FDs accepted in a single sendmsg call. +// From net/scm.h. +const SCM_MAX_FD = 253 + +// SO_ACCEPTCON is defined as __SO_ACCEPTCON in +// include/uapi/linux/net.h, which represents a listening socket +// state. Note that this is distinct from SO_ACCEPTCONN, which is a +// socket option for querying whether a socket is in a listening +// state. +const SO_ACCEPTCON = 1 << 16 diff --git a/pkg/abi/linux/splice.go b/pkg/abi/linux/splice.go new file mode 100644 index 000000000..650eb87e8 --- /dev/null +++ b/pkg/abi/linux/splice.go @@ -0,0 +1,23 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Constants for splice(2), sendfile(2) and tee(2). +const ( + SPLICE_F_MOVE = 1 << iota + SPLICE_F_NONBLOCK + SPLICE_F_MORE + SPLICE_F_GIFT +) diff --git a/pkg/abi/linux/tcp.go b/pkg/abi/linux/tcp.go new file mode 100644 index 000000000..174d470e2 --- /dev/null +++ b/pkg/abi/linux/tcp.go @@ -0,0 +1,60 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Socket options from uapi/linux/tcp.h. +const ( + TCP_NODELAY = 1 + TCP_MAXSEG = 2 + TCP_CORK = 3 + TCP_KEEPIDLE = 4 + TCP_KEEPINTVL = 5 + TCP_KEEPCNT = 6 + TCP_SYNCNT = 7 + TCP_LINGER2 = 8 + TCP_DEFER_ACCEPT = 9 + TCP_WINDOW_CLAMP = 10 + TCP_INFO = 11 + TCP_QUICKACK = 12 + TCP_CONGESTION = 13 + TCP_MD5SIG = 14 + TCP_THIN_LINEAR_TIMEOUTS = 16 + TCP_THIN_DUPACK = 17 + TCP_USER_TIMEOUT = 18 + TCP_REPAIR = 19 + TCP_REPAIR_QUEUE = 20 + TCP_QUEUE_SEQ = 21 + TCP_REPAIR_OPTIONS = 22 + TCP_FASTOPEN = 23 + TCP_TIMESTAMP = 24 + TCP_NOTSENT_LOWAT = 25 + TCP_CC_INFO = 26 + TCP_SAVE_SYN = 27 + TCP_SAVED_SYN = 28 + TCP_REPAIR_WINDOW = 29 + TCP_FASTOPEN_CONNECT = 30 + TCP_ULP = 31 + TCP_MD5SIG_EXT = 32 + TCP_FASTOPEN_KEY = 33 + TCP_FASTOPEN_NO_COOKIE = 34 + TCP_ZEROCOPY_RECEIVE = 35 + TCP_INQ = 36 +) + +// Socket constants from include/net/tcp.h. +const ( + MAX_TCP_KEEPIDLE = 32767 + MAX_TCP_KEEPINTVL = 32767 +) diff --git a/pkg/abi/linux/time.go b/pkg/abi/linux/time.go new file mode 100644 index 000000000..e6860ed49 --- /dev/null +++ b/pkg/abi/linux/time.go @@ -0,0 +1,270 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "math" + "time" +) + +const ( + // ClockTick is the length of time represented by a single clock tick, as + // used by times(2) and /proc/[pid]/stat. + ClockTick = time.Second / CLOCKS_PER_SEC + + // CLOCKS_PER_SEC is the number of ClockTicks per second. + // + // Linux defines this to be 100 on most architectures, irrespective of + // CONFIG_HZ. Userspace obtains the value through sysconf(_SC_CLK_TCK), + // which uses the AT_CLKTCK entry in the auxiliary vector if one is + // provided, and assumes 100 otherwise (glibc: + // sysdeps/posix/sysconf.c:__sysconf() => + // sysdeps/unix/sysv/linux/getclktck.c, elf/dl-support.c:_dl_aux_init()). + // + // Not to be confused with POSIX CLOCKS_PER_SEC, as used by clock(3); "XSI + // requires that [POSIX] CLOCKS_PER_SEC equals 1000000 independent of the + // actual resolution" - clock(3). + CLOCKS_PER_SEC = 100 +) + +// CPU clock types for use with clock_gettime(2) et al. +// +// The 29 most significant bits of a 32 bit clock ID are either a PID or a FD. +// +// Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3. +// +// Bit 2 indicates whether a cpu clock refers to a thread or a process. +const ( + CPUCLOCK_PROF = 0 + CPUCLOCK_VIRT = 1 + CPUCLOCK_SCHED = 2 + CPUCLOCK_MAX = 3 + CLOCKFD = CPUCLOCK_MAX + + CPUCLOCK_CLOCK_MASK = 3 + CPUCLOCK_PERTHREAD_MASK = 4 +) + +// Clock identifiers for use with clock_gettime(2), clock_getres(2), +// clock_nanosleep(2). +const ( + CLOCK_REALTIME = 0 + CLOCK_MONOTONIC = 1 + CLOCK_PROCESS_CPUTIME_ID = 2 + CLOCK_THREAD_CPUTIME_ID = 3 + CLOCK_MONOTONIC_RAW = 4 + CLOCK_REALTIME_COARSE = 5 + CLOCK_MONOTONIC_COARSE = 6 + CLOCK_BOOTTIME = 7 + CLOCK_REALTIME_ALARM = 8 + CLOCK_BOOTTIME_ALARM = 9 +) + +// Flags for clock_nanosleep(2). +const ( + TIMER_ABSTIME = 1 +) + +// Flags for timerfd syscalls (timerfd_create(2), timerfd_settime(2)). +const ( + // TFD_CLOEXEC is a timerfd_create flag. + TFD_CLOEXEC = O_CLOEXEC + + // TFD_NONBLOCK is a timerfd_create flag. + TFD_NONBLOCK = O_NONBLOCK + + // TFD_TIMER_ABSTIME is a timerfd_settime flag. + TFD_TIMER_ABSTIME = 1 +) + +// The safe number of seconds you can represent by int64. +const maxSecInDuration = math.MaxInt64 / int64(time.Second) + +// TimeT represents time_t in <time.h>. It represents time in seconds. +type TimeT int64 + +// NsecToTimeT translates nanoseconds to TimeT (seconds). +func NsecToTimeT(nsec int64) TimeT { + return TimeT(nsec / 1e9) +} + +// Timespec represents struct timespec in <time.h>. +// +// +marshal +type Timespec struct { + Sec int64 + Nsec int64 +} + +// Unix returns the second and nanosecond. +func (ts Timespec) Unix() (sec int64, nsec int64) { + return int64(ts.Sec), int64(ts.Nsec) +} + +// ToTime returns the Go time.Time representation. +func (ts Timespec) ToTime() time.Time { + return time.Unix(ts.Sec, ts.Nsec) +} + +// ToNsec returns the nanosecond representation. +func (ts Timespec) ToNsec() int64 { + return int64(ts.Sec)*1e9 + int64(ts.Nsec) +} + +// ToNsecCapped returns the safe nanosecond representation. +func (ts Timespec) ToNsecCapped() int64 { + if ts.Sec > maxSecInDuration { + return math.MaxInt64 + } + return ts.ToNsec() +} + +// ToDuration returns the safe nanosecond representation as time.Duration. +func (ts Timespec) ToDuration() time.Duration { + return time.Duration(ts.ToNsecCapped()) +} + +// Valid returns whether the timespec contains valid values. +func (ts Timespec) Valid() bool { + return !(ts.Sec < 0 || ts.Nsec < 0 || ts.Nsec >= int64(time.Second)) +} + +// NsecToTimespec translates nanoseconds to Timespec. +func NsecToTimespec(nsec int64) (ts Timespec) { + ts.Sec = nsec / 1e9 + ts.Nsec = nsec % 1e9 + return +} + +// DurationToTimespec translates time.Duration to Timespec. +func DurationToTimespec(dur time.Duration) Timespec { + return NsecToTimespec(dur.Nanoseconds()) +} + +// SizeOfTimeval is the size of a Timeval struct in bytes. +const SizeOfTimeval = 16 + +// Timeval represents struct timeval in <time.h>. +// +// +marshal +type Timeval struct { + Sec int64 + Usec int64 +} + +// ToNsecCapped returns the safe nanosecond representation. +func (tv Timeval) ToNsecCapped() int64 { + if tv.Sec > maxSecInDuration { + return math.MaxInt64 + } + return int64(tv.Sec)*1e9 + int64(tv.Usec)*1e3 +} + +// ToDuration returns the safe nanosecond representation as a time.Duration. +func (tv Timeval) ToDuration() time.Duration { + return time.Duration(tv.ToNsecCapped()) +} + +// ToTime returns the Go time.Time representation. +func (tv Timeval) ToTime() time.Time { + return time.Unix(tv.Sec, tv.Usec*1e3) +} + +// NsecToTimeval translates nanosecond to Timeval. +func NsecToTimeval(nsec int64) (tv Timeval) { + nsec += 999 // round up to microsecond + tv.Sec = nsec / 1e9 + tv.Usec = nsec % 1e9 / 1e3 + return +} + +// DurationToTimeval translates time.Duration to Timeval. +func DurationToTimeval(dur time.Duration) Timeval { + return NsecToTimeval(dur.Nanoseconds()) +} + +// Itimerspec represents struct itimerspec in <time.h>. +type Itimerspec struct { + Interval Timespec + Value Timespec +} + +// ItimerVal mimics the following struct in <sys/time.h> +// struct itimerval { +// struct timeval it_interval; /* next value */ +// struct timeval it_value; /* current value */ +// }; +type ItimerVal struct { + Interval Timeval + Value Timeval +} + +// ClockT represents type clock_t. +type ClockT int64 + +// ClockTFromDuration converts time.Duration to clock_t. +func ClockTFromDuration(d time.Duration) ClockT { + return ClockT(d / ClockTick) +} + +// Tms represents struct tms, used by times(2). +type Tms struct { + UTime ClockT + STime ClockT + CUTime ClockT + CSTime ClockT +} + +// TimerID represents type timer_t, which identifies a POSIX per-process +// interval timer. +type TimerID int32 + +// StatxTimestamp represents struct statx_timestamp. +// +// +marshal +type StatxTimestamp struct { + Sec int64 + Nsec uint32 + _ int32 +} + +// ToNsec returns the nanosecond representation. +func (sxts StatxTimestamp) ToNsec() int64 { + return int64(sxts.Sec)*1e9 + int64(sxts.Nsec) +} + +// ToNsecCapped returns the safe nanosecond representation. +func (sxts StatxTimestamp) ToNsecCapped() int64 { + if sxts.Sec > maxSecInDuration { + return math.MaxInt64 + } + return sxts.ToNsec() +} + +// NsecToStatxTimestamp translates nanoseconds to StatxTimestamp. +func NsecToStatxTimestamp(nsec int64) (ts StatxTimestamp) { + return StatxTimestamp{ + Sec: nsec / 1e9, + Nsec: uint32(nsec % 1e9), + } +} + +// Utime represents struct utimbuf used by utimes(2). +// +// +marshal +type Utime struct { + Actime int64 + Modtime int64 +} diff --git a/pkg/abi/linux/timer.go b/pkg/abi/linux/timer.go new file mode 100644 index 000000000..e32d09e10 --- /dev/null +++ b/pkg/abi/linux/timer.go @@ -0,0 +1,23 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// itimer types for getitimer(2) and setitimer(2), from +// include/uapi/linux/time.h. +const ( + ITIMER_REAL = 0 + ITIMER_VIRTUAL = 1 + ITIMER_PROF = 2 +) diff --git a/pkg/abi/linux/tty.go b/pkg/abi/linux/tty.go new file mode 100644 index 000000000..8ac02aee8 --- /dev/null +++ b/pkg/abi/linux/tty.go @@ -0,0 +1,344 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +const ( + // NumControlCharacters is the number of control characters in Termios. + NumControlCharacters = 19 + // disabledChar is used to indicate that a control character is + // disabled. + disabledChar = 0 +) + +// Winsize is struct winsize, defined in uapi/asm-generic/termios.h. +type Winsize struct { + Row uint16 + Col uint16 + Xpixel uint16 + Ypixel uint16 +} + +// Termios is struct termios, defined in uapi/asm-generic/termbits.h. +type Termios struct { + InputFlags uint32 + OutputFlags uint32 + ControlFlags uint32 + LocalFlags uint32 + LineDiscipline uint8 + ControlCharacters [NumControlCharacters]uint8 +} + +// KernelTermios is struct ktermios/struct termios2, defined in +// uapi/asm-generic/termbits.h. +// +// +stateify savable +type KernelTermios struct { + InputFlags uint32 + OutputFlags uint32 + ControlFlags uint32 + LocalFlags uint32 + LineDiscipline uint8 + ControlCharacters [NumControlCharacters]uint8 + InputSpeed uint32 + OutputSpeed uint32 +} + +// IEnabled returns whether flag is enabled in termios input flags. +func (t *KernelTermios) IEnabled(flag uint32) bool { + return t.InputFlags&flag == flag +} + +// OEnabled returns whether flag is enabled in termios output flags. +func (t *KernelTermios) OEnabled(flag uint32) bool { + return t.OutputFlags&flag == flag +} + +// CEnabled returns whether flag is enabled in termios control flags. +func (t *KernelTermios) CEnabled(flag uint32) bool { + return t.ControlFlags&flag == flag +} + +// LEnabled returns whether flag is enabled in termios local flags. +func (t *KernelTermios) LEnabled(flag uint32) bool { + return t.LocalFlags&flag == flag +} + +// ToTermios copies fields that are shared with Termios into a new Termios +// struct. +func (t *KernelTermios) ToTermios() Termios { + return Termios{ + InputFlags: t.InputFlags, + OutputFlags: t.OutputFlags, + ControlFlags: t.ControlFlags, + LocalFlags: t.LocalFlags, + LineDiscipline: t.LineDiscipline, + ControlCharacters: t.ControlCharacters, + } +} + +// FromTermios copies fields that are shared with Termios into this +// KernelTermios struct. +func (t *KernelTermios) FromTermios(term Termios) { + t.InputFlags = term.InputFlags + t.OutputFlags = term.OutputFlags + t.ControlFlags = term.ControlFlags + t.LocalFlags = term.LocalFlags + t.LineDiscipline = term.LineDiscipline + t.ControlCharacters = term.ControlCharacters +} + +// IsTerminating returns whether c is a line terminating character. +func (t *KernelTermios) IsTerminating(cBytes []byte) bool { + // All terminating characters are 1 byte. + if len(cBytes) != 1 { + return false + } + c := cBytes[0] + + // Is this the user-set EOF character? + if t.IsEOF(c) { + return true + } + + switch c { + case disabledChar: + return false + case '\n', t.ControlCharacters[VEOL]: + return true + case t.ControlCharacters[VEOL2]: + return t.LEnabled(IEXTEN) + } + return false +} + +// IsEOF returns whether c is the EOF character. +func (t *KernelTermios) IsEOF(c byte) bool { + return c == t.ControlCharacters[VEOF] && t.ControlCharacters[VEOF] != disabledChar +} + +// Input flags. +const ( + IGNBRK = 0000001 + BRKINT = 0000002 + IGNPAR = 0000004 + PARMRK = 0000010 + INPCK = 0000020 + ISTRIP = 0000040 + INLCR = 0000100 + IGNCR = 0000200 + ICRNL = 0000400 + IUCLC = 0001000 + IXON = 0002000 + IXANY = 0004000 + IXOFF = 0010000 + IMAXBEL = 0020000 + IUTF8 = 0040000 +) + +// Output flags. +const ( + OPOST = 0000001 + OLCUC = 0000002 + ONLCR = 0000004 + OCRNL = 0000010 + ONOCR = 0000020 + ONLRET = 0000040 + OFILL = 0000100 + OFDEL = 0000200 + NLDLY = 0000400 + NL0 = 0000000 + NL1 = 0000400 + CRDLY = 0003000 + CR0 = 0000000 + CR1 = 0001000 + CR2 = 0002000 + CR3 = 0003000 + TABDLY = 0014000 + TAB0 = 0000000 + TAB1 = 0004000 + TAB2 = 0010000 + TAB3 = 0014000 + XTABS = 0014000 + BSDLY = 0020000 + BS0 = 0000000 + BS1 = 0020000 + VTDLY = 0040000 + VT0 = 0000000 + VT1 = 0040000 + FFDLY = 0100000 + FF0 = 0000000 + FF1 = 0100000 +) + +// Control flags. +const ( + CBAUD = 0010017 + B0 = 0000000 + B50 = 0000001 + B75 = 0000002 + B110 = 0000003 + B134 = 0000004 + B150 = 0000005 + B200 = 0000006 + B300 = 0000007 + B600 = 0000010 + B1200 = 0000011 + B1800 = 0000012 + B2400 = 0000013 + B4800 = 0000014 + B9600 = 0000015 + B19200 = 0000016 + B38400 = 0000017 + EXTA = B19200 + EXTB = B38400 + CSIZE = 0000060 + CS5 = 0000000 + CS6 = 0000020 + CS7 = 0000040 + CS8 = 0000060 + CSTOPB = 0000100 + CREAD = 0000200 + PARENB = 0000400 + PARODD = 0001000 + HUPCL = 0002000 + CLOCAL = 0004000 + CBAUDEX = 0010000 + BOTHER = 0010000 + B57600 = 0010001 + B115200 = 0010002 + B230400 = 0010003 + B460800 = 0010004 + B500000 = 0010005 + B576000 = 0010006 + B921600 = 0010007 + B1000000 = 0010010 + B1152000 = 0010011 + B1500000 = 0010012 + B2000000 = 0010013 + B2500000 = 0010014 + B3000000 = 0010015 + B3500000 = 0010016 + B4000000 = 0010017 + CIBAUD = 002003600000 + CMSPAR = 010000000000 + CRTSCTS = 020000000000 + + // IBSHIFT is the shift from CBAUD to CIBAUD. + IBSHIFT = 16 +) + +// Local flags. +const ( + ISIG = 0000001 + ICANON = 0000002 + XCASE = 0000004 + ECHO = 0000010 + ECHOE = 0000020 + ECHOK = 0000040 + ECHONL = 0000100 + NOFLSH = 0000200 + TOSTOP = 0000400 + ECHOCTL = 0001000 + ECHOPRT = 0002000 + ECHOKE = 0004000 + FLUSHO = 0010000 + PENDIN = 0040000 + IEXTEN = 0100000 + EXTPROC = 0200000 +) + +// Control Character indices. +const ( + VINTR = 0 + VQUIT = 1 + VERASE = 2 + VKILL = 3 + VEOF = 4 + VTIME = 5 + VMIN = 6 + VSWTC = 7 + VSTART = 8 + VSTOP = 9 + VSUSP = 10 + VEOL = 11 + VREPRINT = 12 + VDISCARD = 13 + VWERASE = 14 + VLNEXT = 15 + VEOL2 = 16 +) + +// ControlCharacter returns the termios-style control character for the passed +// character. +// +// e.g., for Ctrl-C, i.e., ^C, call ControlCharacter('C'). +// +// Standard control characters are ASCII bytes 0 through 31. +func ControlCharacter(c byte) uint8 { + // A is 1, B is 2, etc. + return uint8(c - 'A' + 1) +} + +// DefaultControlCharacters is the default set of Termios control characters. +var DefaultControlCharacters = [NumControlCharacters]uint8{ + ControlCharacter('C'), // VINTR = ^C + ControlCharacter('\\'), // VQUIT = ^\ + '\x7f', // VERASE = DEL + ControlCharacter('U'), // VKILL = ^U + ControlCharacter('D'), // VEOF = ^D + 0, // VTIME + 1, // VMIN + 0, // VSWTC + ControlCharacter('Q'), // VSTART = ^Q + ControlCharacter('S'), // VSTOP = ^S + ControlCharacter('Z'), // VSUSP = ^Z + 0, // VEOL + ControlCharacter('R'), // VREPRINT = ^R + ControlCharacter('O'), // VDISCARD = ^O + ControlCharacter('W'), // VWERASE = ^W + ControlCharacter('V'), // VLNEXT = ^V + 0, // VEOL2 +} + +// MasterTermios is the terminal configuration of the master end of a Unix98 +// pseudoterminal. +var MasterTermios = KernelTermios{ + ControlFlags: B38400 | CS8 | CREAD, + ControlCharacters: DefaultControlCharacters, + InputSpeed: 38400, + OutputSpeed: 38400, +} + +// DefaultSlaveTermios is the default terminal configuration of the slave end +// of a Unix98 pseudoterminal. +var DefaultSlaveTermios = KernelTermios{ + InputFlags: ICRNL | IXON, + OutputFlags: OPOST | ONLCR, + ControlFlags: B38400 | CS8 | CREAD, + LocalFlags: ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN, + ControlCharacters: DefaultControlCharacters, + InputSpeed: 38400, + OutputSpeed: 38400, +} + +// WindowSize corresponds to struct winsize defined in +// include/uapi/asm-generic/termios.h. +// +// +stateify savable +type WindowSize struct { + Rows uint16 + Cols uint16 + _ [4]byte // Padding for 2 unused shorts. +} diff --git a/pkg/abi/linux/uio.go b/pkg/abi/linux/uio.go new file mode 100644 index 000000000..1fd1e9802 --- /dev/null +++ b/pkg/abi/linux/uio.go @@ -0,0 +1,18 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// UIO_MAXIOV is the maximum number of struct iovecs in a struct iovec array. +const UIO_MAXIOV = 1024 diff --git a/pkg/abi/linux/utsname.go b/pkg/abi/linux/utsname.go new file mode 100644 index 000000000..60f220a67 --- /dev/null +++ b/pkg/abi/linux/utsname.go @@ -0,0 +1,49 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "bytes" + "fmt" +) + +const ( + // UTSLen is the maximum length of strings contained in fields of + // UtsName. + UTSLen = 64 +) + +// UtsName represents struct utsname, the struct returned by uname(2). +type UtsName struct { + Sysname [UTSLen + 1]byte + Nodename [UTSLen + 1]byte + Release [UTSLen + 1]byte + Version [UTSLen + 1]byte + Machine [UTSLen + 1]byte + Domainname [UTSLen + 1]byte +} + +// utsNameString converts a UtsName entry to a string without NULs. +func utsNameString(s [UTSLen + 1]byte) string { + // The NUL bytes will remain even in a cast to string. We must + // explicitly strip them. + return string(bytes.TrimRight(s[:], "\x00")) +} + +func (u UtsName) String() string { + return fmt.Sprintf("{Sysname: %s, Nodename: %s, Release: %s, Version: %s, Machine: %s, Domainname: %s}", + utsNameString(u.Sysname), utsNameString(u.Nodename), utsNameString(u.Release), + utsNameString(u.Version), utsNameString(u.Machine), utsNameString(u.Domainname)) +} diff --git a/pkg/abi/linux/wait.go b/pkg/abi/linux/wait.go new file mode 100644 index 000000000..4bdc280d1 --- /dev/null +++ b/pkg/abi/linux/wait.go @@ -0,0 +1,36 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Options for waitpid(2), wait4(2), and/or waitid(2), from +// include/uapi/linux/wait.h. +const ( + WNOHANG = 0x00000001 + WUNTRACED = 0x00000002 + WSTOPPED = WUNTRACED + WEXITED = 0x00000004 + WCONTINUED = 0x00000008 + WNOWAIT = 0x01000000 + WNOTHREAD = 0x20000000 + WALL = 0x40000000 + WCLONE = 0x80000000 +) + +// ID types for waitid(2), from include/uapi/linux/wait.h. +const ( + P_ALL = 0x0 + P_PID = 0x1 + P_PGID = 0x2 +) diff --git a/pkg/abi/linux/xattr.go b/pkg/abi/linux/xattr.go new file mode 100644 index 000000000..99180b208 --- /dev/null +++ b/pkg/abi/linux/xattr.go @@ -0,0 +1,28 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Constants for extended attributes. +const ( + XATTR_NAME_MAX = 255 + XATTR_SIZE_MAX = 65536 + XATTR_LIST_MAX = 65536 + + XATTR_CREATE = 1 + XATTR_REPLACE = 2 + + XATTR_USER_PREFIX = "user." + XATTR_USER_PREFIX_LEN = len(XATTR_USER_PREFIX) +) |