summaryrefslogtreecommitdiffhomepage
path: root/pkg/abi
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/abi')
-rw-r--r--pkg/abi/BUILD3
-rw-r--r--pkg/abi/abi.go4
-rw-r--r--pkg/abi/linux/BUILD7
-rw-r--r--pkg/abi/linux/dev.go3
-rw-r--r--pkg/abi/linux/epoll.go10
-rw-r--r--pkg/abi/linux/file.go51
-rw-r--r--pkg/abi/linux/file_amd64.go8
-rw-r--r--pkg/abi/linux/file_arm64.go8
-rw-r--r--pkg/abi/linux/netfilter.go89
-rw-r--r--pkg/abi/linux/netlink_route.go15
-rw-r--r--pkg/abi/linux/rseq.go130
-rw-r--r--pkg/abi/linux/time.go13
12 files changed, 278 insertions, 63 deletions
diff --git a/pkg/abi/BUILD b/pkg/abi/BUILD
index f5c08ea06..839f822eb 100644
--- a/pkg/abi/BUILD
+++ b/pkg/abi/BUILD
@@ -1,4 +1,4 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library")
package(licenses = ["notice"])
@@ -9,6 +9,5 @@ go_library(
"abi_linux.go",
"flag.go",
],
- importpath = "gvisor.dev/gvisor/pkg/abi",
visibility = ["//:sandbox"],
)
diff --git a/pkg/abi/abi.go b/pkg/abi/abi.go
index d56c481c9..e6be93c3a 100644
--- a/pkg/abi/abi.go
+++ b/pkg/abi/abi.go
@@ -39,3 +39,7 @@ func (o OS) String() string {
return fmt.Sprintf("OS(%d)", o)
}
}
+
+// ABI is an interface that defines OS-specific interactions.
+type ABI interface {
+}
diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD
index 9553f164d..1f3c0c687 100644
--- a/pkg/abi/linux/BUILD
+++ b/pkg/abi/linux/BUILD
@@ -1,5 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library", "go_test")
# Package linux contains the constants and types needed to interface with a
# Linux kernel. It should be used instead of syscall or golang.org/x/sys/unix
@@ -41,6 +40,7 @@ go_library(
"poll.go",
"prctl.go",
"ptrace.go",
+ "rseq.go",
"rusage.go",
"sched.go",
"seccomp.go",
@@ -59,7 +59,6 @@ go_library(
"wait.go",
"xattr.go",
],
- importpath = "gvisor.dev/gvisor/pkg/abi/linux",
visibility = ["//visibility:public"],
deps = [
"//pkg/abi",
@@ -72,7 +71,7 @@ go_test(
name = "linux_test",
size = "small",
srcs = ["netfilter_test.go"],
- embed = [":linux"],
+ library = ":linux",
deps = [
"//pkg/binary",
],
diff --git a/pkg/abi/linux/dev.go b/pkg/abi/linux/dev.go
index 421e11256..89f9a793f 100644
--- a/pkg/abi/linux/dev.go
+++ b/pkg/abi/linux/dev.go
@@ -36,6 +36,9 @@ func DecodeDeviceID(rdev uint32) (uint16, uint32) {
//
// See Documentations/devices.txt and uapi/linux/major.h.
const (
+ // MEM_MAJOR is the major device number for "memory" character devices.
+ MEM_MAJOR = 1
+
// TTYAUX_MAJOR is the major device number for alternate TTY devices.
TTYAUX_MAJOR = 5
diff --git a/pkg/abi/linux/epoll.go b/pkg/abi/linux/epoll.go
index 72083b604..0e881aa3c 100644
--- a/pkg/abi/linux/epoll.go
+++ b/pkg/abi/linux/epoll.go
@@ -38,8 +38,14 @@ const (
// Per-file descriptor flags.
const (
- EPOLLET = 0x80000000
- EPOLLONESHOT = 0x40000000
+ EPOLLEXCLUSIVE = 1 << 28
+ EPOLLWAKEUP = 1 << 29
+ EPOLLONESHOT = 1 << 30
+ EPOLLET = 1 << 31
+
+ // EP_PRIVATE_BITS is fs/eventpoll.c:EP_PRIVATE_BITS, the set of all bits
+ // in an epoll event mask that correspond to flags rather than I/O events.
+ EP_PRIVATE_BITS = EPOLLEXCLUSIVE | EPOLLWAKEUP | EPOLLONESHOT | EPOLLET
)
// Operation flags.
diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go
index 16791d03e..c3ab15a4f 100644
--- a/pkg/abi/linux/file.go
+++ b/pkg/abi/linux/file.go
@@ -24,27 +24,23 @@ import (
// Constants for open(2).
const (
- O_ACCMODE = 000000003
- O_RDONLY = 000000000
- O_WRONLY = 000000001
- O_RDWR = 000000002
- O_CREAT = 000000100
- O_EXCL = 000000200
- O_NOCTTY = 000000400
- O_TRUNC = 000001000
- O_APPEND = 000002000
- O_NONBLOCK = 000004000
- O_DSYNC = 000010000
- O_ASYNC = 000020000
- O_DIRECT = 000040000
- O_LARGEFILE = 000100000
- O_DIRECTORY = 000200000
- O_NOFOLLOW = 000400000
- O_NOATIME = 001000000
- O_CLOEXEC = 002000000
- O_SYNC = 004000000 // __O_SYNC in Linux
- O_PATH = 010000000
- O_TMPFILE = 020000000 // __O_TMPFILE in Linux
+ O_ACCMODE = 000000003
+ O_RDONLY = 000000000
+ O_WRONLY = 000000001
+ O_RDWR = 000000002
+ O_CREAT = 000000100
+ O_EXCL = 000000200
+ O_NOCTTY = 000000400
+ O_TRUNC = 000001000
+ O_APPEND = 000002000
+ O_NONBLOCK = 000004000
+ O_DSYNC = 000010000
+ O_ASYNC = 000020000
+ O_NOATIME = 001000000
+ O_CLOEXEC = 002000000
+ O_SYNC = 004000000 // __O_SYNC in Linux
+ O_PATH = 010000000
+ O_TMPFILE = 020000000 // __O_TMPFILE in Linux
)
// Constants for fstatat(2).
@@ -180,6 +176,19 @@ const (
DT_WHT = 14
)
+// DirentType are the friendly strings for linux_dirent64.d_type.
+var DirentType = abi.ValueSet{
+ DT_UNKNOWN: "DT_UNKNOWN",
+ DT_FIFO: "DT_FIFO",
+ DT_CHR: "DT_CHR",
+ DT_DIR: "DT_DIR",
+ DT_BLK: "DT_BLK",
+ DT_REG: "DT_REG",
+ DT_LNK: "DT_LNK",
+ DT_SOCK: "DT_SOCK",
+ DT_WHT: "DT_WHT",
+}
+
// Values for preadv2/pwritev2.
const (
// Note: gVisor does not implement the RWF_HIPRI feature, but the flag is
diff --git a/pkg/abi/linux/file_amd64.go b/pkg/abi/linux/file_amd64.go
index 74c554be6..9d307e840 100644
--- a/pkg/abi/linux/file_amd64.go
+++ b/pkg/abi/linux/file_amd64.go
@@ -14,6 +14,14 @@
package linux
+// Constants for open(2).
+const (
+ O_DIRECT = 000040000
+ O_LARGEFILE = 000100000
+ O_DIRECTORY = 000200000
+ O_NOFOLLOW = 000400000
+)
+
// Stat represents struct stat.
type Stat struct {
Dev uint64
diff --git a/pkg/abi/linux/file_arm64.go b/pkg/abi/linux/file_arm64.go
index f16c07589..26a54f416 100644
--- a/pkg/abi/linux/file_arm64.go
+++ b/pkg/abi/linux/file_arm64.go
@@ -14,6 +14,14 @@
package linux
+// Constants for open(2).
+const (
+ O_DIRECTORY = 000040000
+ O_NOFOLLOW = 000100000
+ O_DIRECT = 000200000
+ O_LARGEFILE = 000400000
+)
+
// Stat represents struct stat.
type Stat struct {
Dev uint64
diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go
index 269ba5567..33fcc6c95 100644
--- a/pkg/abi/linux/netfilter.go
+++ b/pkg/abi/linux/netfilter.go
@@ -42,6 +42,15 @@ const (
NF_RETURN = -NF_REPEAT - 1
)
+// VerdictStrings maps int verdicts to the strings they represent. It is used
+// for debugging.
+var VerdictStrings = map[int32]string{
+ -NF_DROP - 1: "DROP",
+ -NF_ACCEPT - 1: "ACCEPT",
+ -NF_QUEUE - 1: "QUEUE",
+ NF_RETURN: "RETURN",
+}
+
// Socket options. These correspond to values in
// include/uapi/linux/netfilter_ipv4/ip_tables.h.
const (
@@ -179,7 +188,7 @@ const SizeOfXTCounters = 16
// the user data.
type XTEntryMatch struct {
MatchSize uint16
- Name [XT_EXTENSION_MAXNAMELEN]byte
+ Name ExtensionName
Revision uint8
// Data is omitted here because it would cause XTEntryMatch to be an
// extra byte larger (see http://www.catb.org/esr/structure-packing/).
@@ -199,7 +208,7 @@ const SizeOfXTEntryMatch = 32
// the user data.
type XTEntryTarget struct {
TargetSize uint16
- Name [XT_EXTENSION_MAXNAMELEN]byte
+ Name ExtensionName
Revision uint8
// Data is omitted here because it would cause XTEntryTarget to be an
// extra byte larger (see http://www.catb.org/esr/structure-packing/).
@@ -226,9 +235,9 @@ const SizeOfXTStandardTarget = 40
// ErrorName. It corresponds to struct xt_error_target in
// include/uapi/linux/netfilter/x_tables.h.
type XTErrorTarget struct {
- Target XTEntryTarget
- ErrorName [XT_FUNCTION_MAXNAMELEN]byte
- _ [2]byte
+ Target XTEntryTarget
+ Name ErrorName
+ _ [2]byte
}
// SizeOfXTErrorTarget is the size of an XTErrorTarget.
@@ -237,7 +246,7 @@ const SizeOfXTErrorTarget = 64
// IPTGetinfo is the argument for the IPT_SO_GET_INFO sockopt. It corresponds
// to struct ipt_getinfo in include/uapi/linux/netfilter_ipv4/ip_tables.h.
type IPTGetinfo struct {
- Name [XT_TABLE_MAXNAMELEN]byte
+ Name TableName
ValidHooks uint32
HookEntry [NF_INET_NUMHOOKS]uint32
Underflow [NF_INET_NUMHOOKS]uint32
@@ -248,16 +257,11 @@ type IPTGetinfo struct {
// SizeOfIPTGetinfo is the size of an IPTGetinfo.
const SizeOfIPTGetinfo = 84
-// TableName returns the table name.
-func (info *IPTGetinfo) TableName() string {
- return tableName(info.Name[:])
-}
-
// IPTGetEntries is the argument for the IPT_SO_GET_ENTRIES sockopt. It
// corresponds to struct ipt_get_entries in
// include/uapi/linux/netfilter_ipv4/ip_tables.h.
type IPTGetEntries struct {
- Name [XT_TABLE_MAXNAMELEN]byte
+ Name TableName
Size uint32
_ [4]byte
// Entrytable is omitted here because it would cause IPTGetEntries to
@@ -266,34 +270,22 @@ type IPTGetEntries struct {
// Entrytable [0]IPTEntry
}
-// TableName returns the entries' table name.
-func (entries *IPTGetEntries) TableName() string {
- return tableName(entries.Name[:])
-}
-
// SizeOfIPTGetEntries is the size of an IPTGetEntries.
const SizeOfIPTGetEntries = 40
-// KernelIPTGetEntries is identical to IPTEntry, but includes the Elems field.
-// This struct marshaled via the binary package to write an KernelIPTGetEntries
-// to userspace.
+// KernelIPTGetEntries is identical to IPTGetEntries, but includes the
+// Entrytable field. This struct marshaled via the binary package to write an
+// KernelIPTGetEntries to userspace.
type KernelIPTGetEntries struct {
- Name [XT_TABLE_MAXNAMELEN]byte
- Size uint32
- _ [4]byte
+ IPTGetEntries
Entrytable []KernelIPTEntry
}
-// TableName returns the entries' table name.
-func (entries *KernelIPTGetEntries) TableName() string {
- return tableName(entries.Name[:])
-}
-
// IPTReplace is the argument for the IPT_SO_SET_REPLACE sockopt. It
// corresponds to struct ipt_replace in
// include/uapi/linux/netfilter_ipv4/ip_tables.h.
type IPTReplace struct {
- Name [XT_TABLE_MAXNAMELEN]byte
+ Name TableName
ValidHooks uint32
NumEntries uint32
Size uint32
@@ -306,14 +298,45 @@ type IPTReplace struct {
// Entries [0]IPTEntry
}
+// KernelIPTReplace is identical to IPTReplace, but includes the Entries field.
+type KernelIPTReplace struct {
+ IPTReplace
+ Entries [0]IPTEntry
+}
+
// SizeOfIPTReplace is the size of an IPTReplace.
const SizeOfIPTReplace = 96
-func tableName(name []byte) string {
- for i, c := range name {
+// ExtensionName holds the name of a netfilter extension.
+type ExtensionName [XT_EXTENSION_MAXNAMELEN]byte
+
+// String implements fmt.Stringer.
+func (en ExtensionName) String() string {
+ return goString(en[:])
+}
+
+// TableName holds the name of a netfilter table.
+type TableName [XT_TABLE_MAXNAMELEN]byte
+
+// String implements fmt.Stringer.
+func (tn TableName) String() string {
+ return goString(tn[:])
+}
+
+// ErrorName holds the name of a netfilter error. These can also hold
+// user-defined chains.
+type ErrorName [XT_FUNCTION_MAXNAMELEN]byte
+
+// String implements fmt.Stringer.
+func (en ErrorName) String() string {
+ return goString(en[:])
+}
+
+func goString(cstring []byte) string {
+ for i, c := range cstring {
if c == 0 {
- return string(name[:i])
+ return string(cstring[:i])
}
}
- return string(name)
+ return string(cstring)
}
diff --git a/pkg/abi/linux/netlink_route.go b/pkg/abi/linux/netlink_route.go
index 3898d2314..40bec566c 100644
--- a/pkg/abi/linux/netlink_route.go
+++ b/pkg/abi/linux/netlink_route.go
@@ -190,7 +190,7 @@ const (
ARPHRD_LOOPBACK = 772
)
-// RouteMessage struct rtmsg, from uapi/linux/rtnetlink.h.
+// RouteMessage is struct rtmsg, from uapi/linux/rtnetlink.h.
type RouteMessage struct {
Family uint8
DstLen uint8
@@ -205,6 +205,9 @@ type RouteMessage struct {
Flags uint32
}
+// SizeOfRouteMessage is the size of RouteMessage.
+const SizeOfRouteMessage = 12
+
// Route types, from uapi/linux/rtnetlink.h.
const (
// RTN_UNSPEC represents an unspecified route type.
@@ -331,3 +334,13 @@ const (
RTF_GATEWAY = 0x2
RTF_UP = 0x1
)
+
+// RtAttr is the header of optional addition route information, as a netlink
+// attribute. From include/uapi/linux/rtnetlink.h.
+type RtAttr struct {
+ Len uint16
+ Type uint16
+}
+
+// SizeOfRtAttr is the size of RtAttr.
+const SizeOfRtAttr = 4
diff --git a/pkg/abi/linux/rseq.go b/pkg/abi/linux/rseq.go
new file mode 100644
index 000000000..76253ba30
--- /dev/null
+++ b/pkg/abi/linux/rseq.go
@@ -0,0 +1,130 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Flags passed to rseq(2).
+//
+// Defined in include/uapi/linux/rseq.h.
+const (
+ // RSEQ_FLAG_UNREGISTER unregisters the current thread.
+ RSEQ_FLAG_UNREGISTER = 1 << 0
+)
+
+// Critical section flags used in RSeqCriticalSection.Flags and RSeq.Flags.
+//
+// Defined in include/uapi/linux/rseq.h.
+const (
+ // RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT inhibits restart on preemption.
+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = 1 << 0
+
+ // RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL inhibits restart on signal
+ // delivery.
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = 1 << 1
+
+ // RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE inhibits restart on CPU
+ // migration.
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = 1 << 2
+)
+
+// RSeqCriticalSection describes a restartable sequences critical section. It
+// is equivalent to struct rseq_cs, defined in include/uapi/linux/rseq.h.
+//
+// In userspace, this structure is always aligned to 32 bytes.
+//
+// +marshal
+type RSeqCriticalSection struct {
+ // Version is the version of this structure. Version 0 is defined here.
+ Version uint32
+
+ // Flags are the critical section flags, defined above.
+ Flags uint32
+
+ // Start is the start address of the critical section.
+ Start uint64
+
+ // PostCommitOffset is the offset from Start of the first instruction
+ // outside of the critical section.
+ PostCommitOffset uint64
+
+ // Abort is the abort address. It must be outside the critical section,
+ // and the 4 bytes prior must match the abort signature.
+ Abort uint64
+}
+
+const (
+ // SizeOfRSeqCriticalSection is the size of RSeqCriticalSection.
+ SizeOfRSeqCriticalSection = 32
+
+ // SizeOfRSeqSignature is the size of the signature immediately
+ // preceding RSeqCriticalSection.Abort.
+ SizeOfRSeqSignature = 4
+)
+
+// Special values for RSeq.CPUID, defined in include/uapi/linux/rseq.h.
+const (
+ // RSEQ_CPU_ID_UNINITIALIZED indicates that this thread has not
+ // performed rseq initialization.
+ RSEQ_CPU_ID_UNINITIALIZED = ^uint32(0) // -1
+
+ // RSEQ_CPU_ID_REGISTRATION_FAILED indicates that rseq initialization
+ // failed.
+ RSEQ_CPU_ID_REGISTRATION_FAILED = ^uint32(1) // -2
+)
+
+// RSeq is the thread-local restartable sequences config/status. It
+// is equivalent to struct rseq, defined in include/uapi/linux/rseq.h.
+//
+// In userspace, this structure is always aligned to 32 bytes.
+type RSeq struct {
+ // CPUIDStart contains the current CPU ID if rseq is initialized.
+ //
+ // This field should only be read by the thread which registered this
+ // structure, and must be read atomically.
+ CPUIDStart uint32
+
+ // CPUID contains the current CPU ID or one of the CPU ID special
+ // values defined above.
+ //
+ // This field should only be read by the thread which registered this
+ // structure, and must be read atomically.
+ CPUID uint32
+
+ // RSeqCriticalSection is a pointer to the current RSeqCriticalSection
+ // block, or NULL. It is reset to NULL by the kernel on restart or
+ // non-restarting preempt/signal.
+ //
+ // This field should only be written by the thread which registered
+ // this structure, and must be written atomically.
+ RSeqCriticalSection uint64
+
+ // Flags are the critical section flags that apply to all critical
+ // sections on this thread, defined above.
+ Flags uint32
+}
+
+const (
+ // SizeOfRSeq is the size of RSeq.
+ //
+ // Note that RSeq is naively 24 bytes. However, it has 32-byte
+ // alignment, which in C increases sizeof to 32. That is the size that
+ // the Linux kernel uses.
+ SizeOfRSeq = 32
+
+ // AlignOfRSeq is the standard alignment of RSeq.
+ AlignOfRSeq = 32
+
+ // OffsetOfRSeqCriticalSection is the offset of RSeqCriticalSection in RSeq.
+ OffsetOfRSeqCriticalSection = 8
+)
diff --git a/pkg/abi/linux/time.go b/pkg/abi/linux/time.go
index 546668bca..5c5a58cd4 100644
--- a/pkg/abi/linux/time.go
+++ b/pkg/abi/linux/time.go
@@ -234,6 +234,19 @@ type StatxTimestamp struct {
_ int32
}
+// ToNsec returns the nanosecond representation.
+func (sxts StatxTimestamp) ToNsec() int64 {
+ return int64(sxts.Sec)*1e9 + int64(sxts.Nsec)
+}
+
+// ToNsecCapped returns the safe nanosecond representation.
+func (sxts StatxTimestamp) ToNsecCapped() int64 {
+ if sxts.Sec > maxSecInDuration {
+ return math.MaxInt64
+ }
+ return sxts.ToNsec()
+}
+
// NsecToStatxTimestamp translates nanoseconds to StatxTimestamp.
func NsecToStatxTimestamp(nsec int64) (ts StatxTimestamp) {
return StatxTimestamp{