summaryrefslogtreecommitdiffhomepage
path: root/pkg/abi
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/abi')
-rw-r--r--pkg/abi/linux/BUILD9
-rw-r--r--pkg/abi/linux/aio.go63
-rw-r--r--pkg/abi/linux/bpf.go1
-rw-r--r--pkg/abi/linux/capability.go4
-rw-r--r--pkg/abi/linux/dev.go10
-rw-r--r--pkg/abi/linux/fadvise.go24
-rw-r--r--pkg/abi/linux/fcntl.go6
-rw-r--r--pkg/abi/linux/file.go5
-rw-r--r--pkg/abi/linux/fs.go12
-rw-r--r--pkg/abi/linux/fuse.go873
-rw-r--r--pkg/abi/linux/futex.go18
-rw-r--r--pkg/abi/linux/ioctl.go80
-rw-r--r--pkg/abi/linux/ipc.go2
-rw-r--r--pkg/abi/linux/linux.go9
-rw-r--r--pkg/abi/linux/membarrier.go34
-rw-r--r--pkg/abi/linux/netdevice.go4
-rw-r--r--pkg/abi/linux/netfilter.go175
-rw-r--r--pkg/abi/linux/netfilter_ipv6.go336
-rw-r--r--pkg/abi/linux/netfilter_test.go3
-rw-r--r--pkg/abi/linux/netlink.go2
-rw-r--r--pkg/abi/linux/netlink_route.go2
-rw-r--r--pkg/abi/linux/poll.go2
-rw-r--r--pkg/abi/linux/rusage.go2
-rw-r--r--pkg/abi/linux/seccomp.go42
-rw-r--r--pkg/abi/linux/sem.go12
-rw-r--r--pkg/abi/linux/sem_amd64.go33
-rw-r--r--pkg/abi/linux/sem_arm64.go31
-rw-r--r--pkg/abi/linux/shm.go6
-rw-r--r--pkg/abi/linux/signal.go2
-rw-r--r--pkg/abi/linux/signalfd.go4
-rw-r--r--pkg/abi/linux/socket.go40
-rw-r--r--pkg/abi/linux/tcp.go1
-rw-r--r--pkg/abi/linux/time.go16
-rw-r--r--pkg/abi/linux/tty.go11
-rw-r--r--pkg/abi/linux/utsname.go2
-rw-r--r--pkg/abi/linux/xattr.go3
36 files changed, 1822 insertions, 57 deletions
diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD
index 114b516e2..a0654df2f 100644
--- a/pkg/abi/linux/BUILD
+++ b/pkg/abi/linux/BUILD
@@ -23,11 +23,13 @@ go_library(
"errors.go",
"eventfd.go",
"exec.go",
+ "fadvise.go",
"fcntl.go",
"file.go",
"file_amd64.go",
"file_arm64.go",
"fs.go",
+ "fuse.go",
"futex.go",
"inotify.go",
"ioctl.go",
@@ -36,9 +38,11 @@ go_library(
"ipc.go",
"limits.go",
"linux.go",
+ "membarrier.go",
"mm.go",
"netdevice.go",
"netfilter.go",
+ "netfilter_ipv6.go",
"netlink.go",
"netlink_route.go",
"poll.go",
@@ -51,6 +55,8 @@ go_library(
"sched.go",
"seccomp.go",
"sem.go",
+ "sem_amd64.go",
+ "sem_arm64.go",
"shm.go",
"signal.go",
"signalfd.go",
@@ -71,6 +77,9 @@ go_library(
"//pkg/abi",
"//pkg/binary",
"//pkg/bits",
+ "//pkg/marshal",
+ "//pkg/marshal/primitive",
+ "//pkg/usermem",
],
)
diff --git a/pkg/abi/linux/aio.go b/pkg/abi/linux/aio.go
index 3c6e0079d..5fc099892 100644
--- a/pkg/abi/linux/aio.go
+++ b/pkg/abi/linux/aio.go
@@ -14,7 +14,66 @@
package linux
+import "encoding/binary"
+
+// AIORingSize is sizeof(struct aio_ring).
+const AIORingSize = 32
+
+// I/O commands.
+const (
+ IOCB_CMD_PREAD = 0
+ IOCB_CMD_PWRITE = 1
+ IOCB_CMD_FSYNC = 2
+ IOCB_CMD_FDSYNC = 3
+ // 4 was the experimental IOCB_CMD_PREADX.
+ IOCB_CMD_POLL = 5
+ IOCB_CMD_NOOP = 6
+ IOCB_CMD_PREADV = 7
+ IOCB_CMD_PWRITEV = 8
+)
+
+// I/O flags.
const (
- // AIORingSize is sizeof(struct aio_ring).
- AIORingSize = 32
+ IOCB_FLAG_RESFD = 1
+ IOCB_FLAG_IOPRIO = 2
)
+
+// IOCallback describes an I/O request.
+//
+// The priority field is currently ignored in the implementation below. Also
+// note that the IOCB_FLAG_RESFD feature is not supported.
+//
+// +marshal
+type IOCallback struct {
+ Data uint64
+ Key uint32
+ _ uint32
+
+ OpCode uint16
+ ReqPrio int16
+ FD int32
+
+ Buf uint64
+ Bytes uint64
+ Offset int64
+
+ Reserved2 uint64
+ Flags uint32
+
+ // eventfd to signal if IOCB_FLAG_RESFD is set in flags.
+ ResFD int32
+}
+
+// IOEvent describes an I/O result.
+//
+// +marshal
+// +stateify savable
+type IOEvent struct {
+ Data uint64
+ Obj uint64
+ Result int64
+ Result2 int64
+}
+
+// IOEventSize is the size of an ioEvent encoded.
+var IOEventSize = binary.Size(IOEvent{})
diff --git a/pkg/abi/linux/bpf.go b/pkg/abi/linux/bpf.go
index aa3d3ce70..9422fcf69 100644
--- a/pkg/abi/linux/bpf.go
+++ b/pkg/abi/linux/bpf.go
@@ -16,6 +16,7 @@ package linux
// BPFInstruction is a raw BPF virtual machine instruction.
//
+// +marshal slice:BPFInstructionSlice
// +stateify savable
type BPFInstruction struct {
// OpCode is the operation to execute.
diff --git a/pkg/abi/linux/capability.go b/pkg/abi/linux/capability.go
index 965f74663..afd16cc27 100644
--- a/pkg/abi/linux/capability.go
+++ b/pkg/abi/linux/capability.go
@@ -177,12 +177,16 @@ const (
)
// CapUserHeader is equivalent to Linux's cap_user_header_t.
+//
+// +marshal
type CapUserHeader struct {
Version uint32
Pid int32
}
// CapUserData is equivalent to Linux's cap_user_data_t.
+//
+// +marshal slice:CapUserDataSlice
type CapUserData struct {
Effective uint32
Permitted uint32
diff --git a/pkg/abi/linux/dev.go b/pkg/abi/linux/dev.go
index fa3ae5f18..7771650b3 100644
--- a/pkg/abi/linux/dev.go
+++ b/pkg/abi/linux/dev.go
@@ -46,13 +46,17 @@ const (
// TTYAUX_MAJOR is the major device number for alternate TTY devices.
TTYAUX_MAJOR = 5
+ // MISC_MAJOR is the major device number for non-serial mice, misc feature
+ // devices.
+ MISC_MAJOR = 10
+
// UNIX98_PTY_MASTER_MAJOR is the initial major device number for
// Unix98 PTY masters.
UNIX98_PTY_MASTER_MAJOR = 128
- // UNIX98_PTY_SLAVE_MAJOR is the initial major device number for
- // Unix98 PTY slaves.
- UNIX98_PTY_SLAVE_MAJOR = 136
+ // UNIX98_PTY_REPLICA_MAJOR is the initial major device number for
+ // Unix98 PTY replicas.
+ UNIX98_PTY_REPLICA_MAJOR = 136
)
// Minor device numbers for TTYAUX_MAJOR.
diff --git a/pkg/abi/linux/fadvise.go b/pkg/abi/linux/fadvise.go
new file mode 100644
index 000000000..b06ff9964
--- /dev/null
+++ b/pkg/abi/linux/fadvise.go
@@ -0,0 +1,24 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+const (
+ POSIX_FADV_NORMAL = 0
+ POSIX_FADV_RANDOM = 1
+ POSIX_FADV_SEQUENTIAL = 2
+ POSIX_FADV_WILLNEED = 3
+ POSIX_FADV_DONTNEED = 4
+ POSIX_FADV_NOREUSE = 5
+)
diff --git a/pkg/abi/linux/fcntl.go b/pkg/abi/linux/fcntl.go
index 6663a199c..cc3571fad 100644
--- a/pkg/abi/linux/fcntl.go
+++ b/pkg/abi/linux/fcntl.go
@@ -45,6 +45,8 @@ const (
)
// Flock is the lock structure for F_SETLK.
+//
+// +marshal
type Flock struct {
Type int16
Whence int16
@@ -55,7 +57,7 @@ type Flock struct {
_ [4]byte
}
-// Flags for F_SETOWN_EX and F_GETOWN_EX.
+// Owner types for F_SETOWN_EX and F_GETOWN_EX.
const (
F_OWNER_TID = 0
F_OWNER_PID = 1
@@ -63,6 +65,8 @@ const (
)
// FOwnerEx is the owner structure for F_SETOWN_EX and F_GETOWN_EX.
+//
+// +marshal
type FOwnerEx struct {
Type int32
PID int32
diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go
index 055ac1d7c..e11ca2d62 100644
--- a/pkg/abi/linux/file.go
+++ b/pkg/abi/linux/file.go
@@ -191,8 +191,9 @@ var DirentType = abi.ValueSet{
// Values for preadv2/pwritev2.
const (
- // Note: gVisor does not implement the RWF_HIPRI feature, but the flag is
- // accepted as a valid flag argument for preadv2/pwritev2.
+ // NOTE(b/120162627): gVisor does not implement the RWF_HIPRI feature, but
+ // the flag is accepted as a valid flag argument for preadv2/pwritev2 and
+ // silently ignored.
RWF_HIPRI = 0x00000001
RWF_DSYNC = 0x00000002
RWF_SYNC = 0x00000004
diff --git a/pkg/abi/linux/fs.go b/pkg/abi/linux/fs.go
index 158d2db5b..0d921ed6f 100644
--- a/pkg/abi/linux/fs.go
+++ b/pkg/abi/linux/fs.go
@@ -29,6 +29,7 @@ const (
SYSFS_MAGIC = 0x62656572
TMPFS_MAGIC = 0x01021994
V9FS_MAGIC = 0x01021997
+ FUSE_SUPER_MAGIC = 0x65735546
)
// Filesystem path limits, from uapi/linux/limits.h.
@@ -44,17 +45,18 @@ type Statfs struct {
// Type is one of the filesystem magic values, defined above.
Type uint64
- // BlockSize is the data block size.
+ // BlockSize is the optimal transfer block size in bytes.
BlockSize int64
- // Blocks is the number of data blocks in use.
+ // Blocks is the maximum number of data blocks the filesystem may store, in
+ // units of BlockSize.
Blocks uint64
- // BlocksFree is the number of free blocks.
+ // BlocksFree is the number of free data blocks, in units of BlockSize.
BlocksFree uint64
- // BlocksAvailable is the number of blocks free for use by
- // unprivileged users.
+ // BlocksAvailable is the number of data blocks free for use by
+ // unprivileged users, in units of BlockSize.
BlocksAvailable uint64
// Files is the number of used file nodes on the filesystem.
diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
new file mode 100644
index 000000000..d91c97a64
--- /dev/null
+++ b/pkg/abi/linux/fuse.go
@@ -0,0 +1,873 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "gvisor.dev/gvisor/pkg/marshal"
+ "gvisor.dev/gvisor/pkg/marshal/primitive"
+)
+
+// +marshal
+type FUSEOpcode uint32
+
+// +marshal
+type FUSEOpID uint64
+
+// FUSE_ROOT_ID is the id of root inode.
+const FUSE_ROOT_ID = 1
+
+// Opcodes for FUSE operations. Analogous to the opcodes in include/linux/fuse.h.
+const (
+ FUSE_LOOKUP FUSEOpcode = 1
+ FUSE_FORGET = 2 /* no reply */
+ FUSE_GETATTR = 3
+ FUSE_SETATTR = 4
+ FUSE_READLINK = 5
+ FUSE_SYMLINK = 6
+ _
+ FUSE_MKNOD = 8
+ FUSE_MKDIR = 9
+ FUSE_UNLINK = 10
+ FUSE_RMDIR = 11
+ FUSE_RENAME = 12
+ FUSE_LINK = 13
+ FUSE_OPEN = 14
+ FUSE_READ = 15
+ FUSE_WRITE = 16
+ FUSE_STATFS = 17
+ FUSE_RELEASE = 18
+ _
+ FUSE_FSYNC = 20
+ FUSE_SETXATTR = 21
+ FUSE_GETXATTR = 22
+ FUSE_LISTXATTR = 23
+ FUSE_REMOVEXATTR = 24
+ FUSE_FLUSH = 25
+ FUSE_INIT = 26
+ FUSE_OPENDIR = 27
+ FUSE_READDIR = 28
+ FUSE_RELEASEDIR = 29
+ FUSE_FSYNCDIR = 30
+ FUSE_GETLK = 31
+ FUSE_SETLK = 32
+ FUSE_SETLKW = 33
+ FUSE_ACCESS = 34
+ FUSE_CREATE = 35
+ FUSE_INTERRUPT = 36
+ FUSE_BMAP = 37
+ FUSE_DESTROY = 38
+ FUSE_IOCTL = 39
+ FUSE_POLL = 40
+ FUSE_NOTIFY_REPLY = 41
+ FUSE_BATCH_FORGET = 42
+)
+
+const (
+ // FUSE_MIN_READ_BUFFER is the minimum size the read can be for any FUSE filesystem.
+ // This is the minimum size Linux supports. See linux.fuse.h.
+ FUSE_MIN_READ_BUFFER uint32 = 8192
+)
+
+// FUSEHeaderIn is the header read by the daemon with each request.
+//
+// +marshal
+type FUSEHeaderIn struct {
+ // Len specifies the total length of the data, including this header.
+ Len uint32
+
+ // Opcode specifies the kind of operation of the request.
+ Opcode FUSEOpcode
+
+ // Unique specifies the unique identifier for this request.
+ Unique FUSEOpID
+
+ // NodeID is the ID of the filesystem object being operated on.
+ NodeID uint64
+
+ // UID is the UID of the requesting process.
+ UID uint32
+
+ // GID is the GID of the requesting process.
+ GID uint32
+
+ // PID is the PID of the requesting process.
+ PID uint32
+
+ _ uint32
+}
+
+// FUSEHeaderOut is the header written by the daemon when it processes
+// a request and wants to send a reply (almost all operations require a
+// reply; if they do not, this will be explicitly documented).
+//
+// +marshal
+type FUSEHeaderOut struct {
+ // Len specifies the total length of the data, including this header.
+ Len uint32
+
+ // Error specifies the error that occurred (0 if none).
+ Error int32
+
+ // Unique specifies the unique identifier of the corresponding request.
+ Unique FUSEOpID
+}
+
+// FUSE_INIT flags, consistent with the ones in include/uapi/linux/fuse.h.
+// Our taget version is 7.23 but we have few implemented in advance.
+const (
+ FUSE_ASYNC_READ = 1 << 0
+ FUSE_POSIX_LOCKS = 1 << 1
+ FUSE_FILE_OPS = 1 << 2
+ FUSE_ATOMIC_O_TRUNC = 1 << 3
+ FUSE_EXPORT_SUPPORT = 1 << 4
+ FUSE_BIG_WRITES = 1 << 5
+ FUSE_DONT_MASK = 1 << 6
+ FUSE_SPLICE_WRITE = 1 << 7
+ FUSE_SPLICE_MOVE = 1 << 8
+ FUSE_SPLICE_READ = 1 << 9
+ FUSE_FLOCK_LOCKS = 1 << 10
+ FUSE_HAS_IOCTL_DIR = 1 << 11
+ FUSE_AUTO_INVAL_DATA = 1 << 12
+ FUSE_DO_READDIRPLUS = 1 << 13
+ FUSE_READDIRPLUS_AUTO = 1 << 14
+ FUSE_ASYNC_DIO = 1 << 15
+ FUSE_WRITEBACK_CACHE = 1 << 16
+ FUSE_NO_OPEN_SUPPORT = 1 << 17
+ FUSE_MAX_PAGES = 1 << 22 // From FUSE 7.28
+)
+
+// currently supported FUSE protocol version numbers.
+const (
+ FUSE_KERNEL_VERSION = 7
+ FUSE_KERNEL_MINOR_VERSION = 31
+)
+
+// Constants relevant to FUSE operations.
+const (
+ FUSE_NAME_MAX = 1024
+ FUSE_PAGE_SIZE = 4096
+ FUSE_DIRENT_ALIGN = 8
+)
+
+// FUSEInitIn is the request sent by the kernel to the daemon,
+// to negotiate the version and flags.
+//
+// +marshal
+type FUSEInitIn struct {
+ // Major version supported by kernel.
+ Major uint32
+
+ // Minor version supported by the kernel.
+ Minor uint32
+
+ // MaxReadahead is the maximum number of bytes to read-ahead
+ // decided by the kernel.
+ MaxReadahead uint32
+
+ // Flags of this init request.
+ Flags uint32
+}
+
+// FUSEInitOut is the reply sent by the daemon to the kernel
+// for FUSEInitIn. We target FUSE 7.23; this struct supports 7.28.
+//
+// +marshal
+type FUSEInitOut struct {
+ // Major version supported by daemon.
+ Major uint32
+
+ // Minor version supported by daemon.
+ Minor uint32
+
+ // MaxReadahead is the maximum number of bytes to read-ahead.
+ // Decided by the daemon, after receiving the value from kernel.
+ MaxReadahead uint32
+
+ // Flags of this init reply.
+ Flags uint32
+
+ // MaxBackground is the maximum number of pending background requests
+ // that the daemon wants.
+ MaxBackground uint16
+
+ // CongestionThreshold is the daemon-decided threshold for
+ // the number of the pending background requests.
+ CongestionThreshold uint16
+
+ // MaxWrite is the daemon's maximum size of a write buffer.
+ // Kernel adjusts it to the minimum (fuse/init.go:fuseMinMaxWrite).
+ // if the value from daemon is too small.
+ MaxWrite uint32
+
+ // TimeGran is the daemon's time granularity for mtime and ctime metadata.
+ // The unit is nanosecond.
+ // Value should be power of 10.
+ // 1 indicates full nanosecond granularity support.
+ TimeGran uint32
+
+ // MaxPages is the daemon's maximum number of pages for one write operation.
+ // Kernel adjusts it to the maximum (fuse/init.go:FUSE_MAX_MAX_PAGES).
+ // if the value from daemon is too large.
+ MaxPages uint16
+
+ _ uint16
+
+ _ [8]uint32
+}
+
+// FUSE_GETATTR_FH is currently the only flag of FUSEGetAttrIn.GetAttrFlags.
+// If it is set, the file handle (FUSEGetAttrIn.Fh) is used to indicate the
+// object instead of the node id attribute in the request header.
+const FUSE_GETATTR_FH = (1 << 0)
+
+// FUSEGetAttrIn is the request sent by the kernel to the daemon,
+// to get the attribute of a inode.
+//
+// +marshal
+type FUSEGetAttrIn struct {
+ // GetAttrFlags specifies whether getattr request is sent with a nodeid or
+ // with a file handle.
+ GetAttrFlags uint32
+
+ _ uint32
+
+ // Fh is the file handler when GetAttrFlags has FUSE_GETATTR_FH bit. If
+ // used, the operation is analogous to fstat(2).
+ Fh uint64
+}
+
+// FUSEAttr is the struct used in the reponse FUSEGetAttrOut.
+//
+// +marshal
+type FUSEAttr struct {
+ // Ino is the inode number of this file.
+ Ino uint64
+
+ // Size is the size of this file.
+ Size uint64
+
+ // Blocks is the number of the 512B blocks allocated by this file.
+ Blocks uint64
+
+ // Atime is the time of last access.
+ Atime uint64
+
+ // Mtime is the time of last modification.
+ Mtime uint64
+
+ // Ctime is the time of last status change.
+ Ctime uint64
+
+ // AtimeNsec is the nano second part of Atime.
+ AtimeNsec uint32
+
+ // MtimeNsec is the nano second part of Mtime.
+ MtimeNsec uint32
+
+ // CtimeNsec is the nano second part of Ctime.
+ CtimeNsec uint32
+
+ // Mode contains the file type and mode.
+ Mode uint32
+
+ // Nlink is the number of the hard links.
+ Nlink uint32
+
+ // UID is user ID of the owner.
+ UID uint32
+
+ // GID is group ID of the owner.
+ GID uint32
+
+ // Rdev is the device ID if this is a special file.
+ Rdev uint32
+
+ // BlkSize is the block size for filesystem I/O.
+ BlkSize uint32
+
+ _ uint32
+}
+
+// FUSEGetAttrOut is the reply sent by the daemon to the kernel
+// for FUSEGetAttrIn.
+//
+// +marshal
+type FUSEGetAttrOut struct {
+ // AttrValid and AttrValidNsec describe the attribute cache duration
+ AttrValid uint64
+
+ // AttrValidNsec is the nanosecond part of the attribute cache duration
+ AttrValidNsec uint32
+
+ _ uint32
+
+ // Attr contains the metadata returned from the FUSE server
+ Attr FUSEAttr
+}
+
+// FUSEEntryOut is the reply sent by the daemon to the kernel
+// for FUSE_MKNOD, FUSE_MKDIR, FUSE_SYMLINK, FUSE_LINK and
+// FUSE_LOOKUP.
+//
+// +marshal
+type FUSEEntryOut struct {
+ // NodeID is the ID for current inode.
+ NodeID uint64
+
+ // Generation is the generation number of inode.
+ // Used to identify an inode that have different ID at different time.
+ Generation uint64
+
+ // EntryValid indicates timeout for an entry.
+ EntryValid uint64
+
+ // AttrValid indicates timeout for an entry's attributes.
+ AttrValid uint64
+
+ // EntryValidNsec indicates timeout for an entry in nanosecond.
+ EntryValidNSec uint32
+
+ // AttrValidNsec indicates timeout for an entry's attributes in nanosecond.
+ AttrValidNSec uint32
+
+ // Attr contains the attributes of an entry.
+ Attr FUSEAttr
+}
+
+// FUSELookupIn is the request sent by the kernel to the daemon
+// to look up a file name.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSELookupIn struct {
+ marshal.StubMarshallable
+
+ // Name is a file name to be looked up.
+ Name string
+}
+
+// MarshalBytes serializes r.name to the dst buffer.
+func (r *FUSELookupIn) MarshalBytes(buf []byte) {
+ copy(buf, r.Name)
+}
+
+// SizeBytes is the size of the memory representation of FUSELookupIn.
+// 1 extra byte for null-terminated string.
+func (r *FUSELookupIn) SizeBytes() int {
+ return len(r.Name) + 1
+}
+
+// MAX_NON_LFS indicates the maximum offset without large file support.
+const MAX_NON_LFS = ((1 << 31) - 1)
+
+// flags returned by OPEN request.
+const (
+ // FOPEN_DIRECT_IO indicates bypassing page cache for this opened file.
+ FOPEN_DIRECT_IO = 1 << 0
+ // FOPEN_KEEP_CACHE avoids invalidate of data cache on open.
+ FOPEN_KEEP_CACHE = 1 << 1
+ // FOPEN_NONSEEKABLE indicates the file cannot be seeked.
+ FOPEN_NONSEEKABLE = 1 << 2
+)
+
+// FUSEOpenIn is the request sent by the kernel to the daemon,
+// to negotiate flags and get file handle.
+//
+// +marshal
+type FUSEOpenIn struct {
+ // Flags of this open request.
+ Flags uint32
+
+ _ uint32
+}
+
+// FUSEOpenOut is the reply sent by the daemon to the kernel
+// for FUSEOpenIn.
+//
+// +marshal
+type FUSEOpenOut struct {
+ // Fh is the file handler for opened file.
+ Fh uint64
+
+ // OpenFlag for the opened file.
+ OpenFlag uint32
+
+ _ uint32
+}
+
+// FUSE_READ flags, consistent with the ones in include/uapi/linux/fuse.h.
+const (
+ FUSE_READ_LOCKOWNER = 1 << 1
+)
+
+// FUSEReadIn is the request sent by the kernel to the daemon
+// for FUSE_READ.
+//
+// +marshal
+type FUSEReadIn struct {
+ // Fh is the file handle in userspace.
+ Fh uint64
+
+ // Offset is the read offset.
+ Offset uint64
+
+ // Size is the number of bytes to read.
+ Size uint32
+
+ // ReadFlags for this FUSE_READ request.
+ // Currently only contains FUSE_READ_LOCKOWNER.
+ ReadFlags uint32
+
+ // LockOwner is the id of the lock owner if there is one.
+ LockOwner uint64
+
+ // Flags for the underlying file.
+ Flags uint32
+
+ _ uint32
+}
+
+// FUSEWriteIn is the first part of the payload of the
+// request sent by the kernel to the daemon
+// for FUSE_WRITE (struct for FUSE version >= 7.9).
+//
+// The second part of the payload is the
+// binary bytes of the data to be written.
+//
+// +marshal
+type FUSEWriteIn struct {
+ // Fh is the file handle in userspace.
+ Fh uint64
+
+ // Offset is the write offset.
+ Offset uint64
+
+ // Size is the number of bytes to write.
+ Size uint32
+
+ // ReadFlags for this FUSE_WRITE request.
+ WriteFlags uint32
+
+ // LockOwner is the id of the lock owner if there is one.
+ LockOwner uint64
+
+ // Flags for the underlying file.
+ Flags uint32
+
+ _ uint32
+}
+
+// FUSEWriteOut is the payload of the reply sent by the daemon to the kernel
+// for a FUSE_WRITE request.
+//
+// +marshal
+type FUSEWriteOut struct {
+ // Size is the number of bytes written.
+ Size uint32
+
+ _ uint32
+}
+
+// FUSEReleaseIn is the request sent by the kernel to the daemon
+// when there is no more reference to a file.
+//
+// +marshal
+type FUSEReleaseIn struct {
+ // Fh is the file handler for the file to be released.
+ Fh uint64
+
+ // Flags of the file.
+ Flags uint32
+
+ // ReleaseFlags of this release request.
+ ReleaseFlags uint32
+
+ // LockOwner is the id of the lock owner if there is one.
+ LockOwner uint64
+}
+
+// FUSECreateMeta contains all the static fields of FUSECreateIn,
+// which is used for FUSE_CREATE.
+//
+// +marshal
+type FUSECreateMeta struct {
+ // Flags of the creating file.
+ Flags uint32
+
+ // Mode is the mode of the creating file.
+ Mode uint32
+
+ // Umask is the current file mode creation mask.
+ Umask uint32
+ _ uint32
+}
+
+// FUSECreateIn contains all the arguments sent by the kernel to the daemon, to
+// atomically create and open a new regular file.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSECreateIn struct {
+ marshal.StubMarshallable
+
+ // CreateMeta contains mode, rdev and umash field for FUSE_MKNODS.
+ CreateMeta FUSECreateMeta
+
+ // Name is the name of the node to create.
+ Name string
+}
+
+// MarshalBytes serializes r.CreateMeta and r.Name to the dst buffer.
+func (r *FUSECreateIn) MarshalBytes(buf []byte) {
+ r.CreateMeta.MarshalBytes(buf[:r.CreateMeta.SizeBytes()])
+ copy(buf[r.CreateMeta.SizeBytes():], r.Name)
+}
+
+// SizeBytes is the size of the memory representation of FUSECreateIn.
+// 1 extra byte for null-terminated string.
+func (r *FUSECreateIn) SizeBytes() int {
+ return r.CreateMeta.SizeBytes() + len(r.Name) + 1
+}
+
+// FUSEMknodMeta contains all the static fields of FUSEMknodIn,
+// which is used for FUSE_MKNOD.
+//
+// +marshal
+type FUSEMknodMeta struct {
+ // Mode of the inode to create.
+ Mode uint32
+
+ // Rdev encodes device major and minor information.
+ Rdev uint32
+
+ // Umask is the current file mode creation mask.
+ Umask uint32
+
+ _ uint32
+}
+
+// FUSEMknodIn contains all the arguments sent by the kernel
+// to the daemon, to create a new file node.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSEMknodIn struct {
+ marshal.StubMarshallable
+
+ // MknodMeta contains mode, rdev and umash field for FUSE_MKNODS.
+ MknodMeta FUSEMknodMeta
+
+ // Name is the name of the node to create.
+ Name string
+}
+
+// MarshalBytes serializes r.MknodMeta and r.Name to the dst buffer.
+func (r *FUSEMknodIn) MarshalBytes(buf []byte) {
+ r.MknodMeta.MarshalBytes(buf[:r.MknodMeta.SizeBytes()])
+ copy(buf[r.MknodMeta.SizeBytes():], r.Name)
+}
+
+// SizeBytes is the size of the memory representation of FUSEMknodIn.
+// 1 extra byte for null-terminated string.
+func (r *FUSEMknodIn) SizeBytes() int {
+ return r.MknodMeta.SizeBytes() + len(r.Name) + 1
+}
+
+// FUSESymLinkIn is the request sent by the kernel to the daemon,
+// to create a symbolic link.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSESymLinkIn struct {
+ marshal.StubMarshallable
+
+ // Name of symlink to create.
+ Name string
+
+ // Target of the symlink.
+ Target string
+}
+
+// MarshalBytes serializes r.Name and r.Target to the dst buffer.
+// Left null-termination at end of r.Name and r.Target.
+func (r *FUSESymLinkIn) MarshalBytes(buf []byte) {
+ copy(buf, r.Name)
+ copy(buf[len(r.Name)+1:], r.Target)
+}
+
+// SizeBytes is the size of the memory representation of FUSESymLinkIn.
+// 2 extra bytes for null-terminated string.
+func (r *FUSESymLinkIn) SizeBytes() int {
+ return len(r.Name) + len(r.Target) + 2
+}
+
+// FUSEEmptyIn is used by operations without request body.
+type FUSEEmptyIn struct{ marshal.StubMarshallable }
+
+// MarshalBytes do nothing for marshal.
+func (r *FUSEEmptyIn) MarshalBytes(buf []byte) {}
+
+// SizeBytes is 0 for empty request.
+func (r *FUSEEmptyIn) SizeBytes() int {
+ return 0
+}
+
+// FUSEMkdirMeta contains all the static fields of FUSEMkdirIn,
+// which is used for FUSE_MKDIR.
+//
+// +marshal
+type FUSEMkdirMeta struct {
+ // Mode of the directory of create.
+ Mode uint32
+
+ // Umask is the user file creation mask.
+ Umask uint32
+}
+
+// FUSEMkdirIn contains all the arguments sent by the kernel
+// to the daemon, to create a new directory.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSEMkdirIn struct {
+ marshal.StubMarshallable
+
+ // MkdirMeta contains Mode and Umask of the directory to create.
+ MkdirMeta FUSEMkdirMeta
+
+ // Name of the directory to create.
+ Name string
+}
+
+// MarshalBytes serializes r.MkdirMeta and r.Name to the dst buffer.
+func (r *FUSEMkdirIn) MarshalBytes(buf []byte) {
+ r.MkdirMeta.MarshalBytes(buf[:r.MkdirMeta.SizeBytes()])
+ copy(buf[r.MkdirMeta.SizeBytes():], r.Name)
+}
+
+// SizeBytes is the size of the memory representation of FUSEMkdirIn.
+// 1 extra byte for null-terminated Name string.
+func (r *FUSEMkdirIn) SizeBytes() int {
+ return r.MkdirMeta.SizeBytes() + len(r.Name) + 1
+}
+
+// FUSERmDirIn is the request sent by the kernel to the daemon
+// when trying to remove a directory.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSERmDirIn struct {
+ marshal.StubMarshallable
+
+ // Name is a directory name to be removed.
+ Name string
+}
+
+// MarshalBytes serializes r.name to the dst buffer.
+func (r *FUSERmDirIn) MarshalBytes(buf []byte) {
+ copy(buf, r.Name)
+}
+
+// SizeBytes is the size of the memory representation of FUSERmDirIn.
+func (r *FUSERmDirIn) SizeBytes() int {
+ return len(r.Name) + 1
+}
+
+// FUSEDirents is a list of Dirents received from the FUSE daemon server.
+// It is used for FUSE_READDIR.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSEDirents struct {
+ marshal.StubMarshallable
+
+ Dirents []*FUSEDirent
+}
+
+// FUSEDirent is a Dirent received from the FUSE daemon server.
+// It is used for FUSE_READDIR.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSEDirent struct {
+ marshal.StubMarshallable
+
+ // Meta contains all the static fields of FUSEDirent.
+ Meta FUSEDirentMeta
+
+ // Name is the filename of the dirent.
+ Name string
+}
+
+// FUSEDirentMeta contains all the static fields of FUSEDirent.
+// It is used for FUSE_READDIR.
+//
+// +marshal
+type FUSEDirentMeta struct {
+ // Inode of the dirent.
+ Ino uint64
+
+ // Offset of the dirent.
+ Off uint64
+
+ // NameLen is the length of the dirent name.
+ NameLen uint32
+
+ // Type of the dirent.
+ Type uint32
+}
+
+// SizeBytes is the size of the memory representation of FUSEDirents.
+func (r *FUSEDirents) SizeBytes() int {
+ var sizeBytes int
+ for _, dirent := range r.Dirents {
+ sizeBytes += dirent.SizeBytes()
+ }
+
+ return sizeBytes
+}
+
+// UnmarshalBytes deserializes FUSEDirents from the src buffer.
+func (r *FUSEDirents) UnmarshalBytes(src []byte) {
+ for {
+ if len(src) <= (*FUSEDirentMeta)(nil).SizeBytes() {
+ break
+ }
+
+ // Its unclear how many dirents there are in src. Each dirent is dynamically
+ // sized and so we can't make assumptions about how many dirents we can allocate.
+ if r.Dirents == nil {
+ r.Dirents = make([]*FUSEDirent, 0)
+ }
+
+ // We have to allocate a struct for each dirent - there must be a better way
+ // to do this. Linux allocates 1 page to store all the dirents and then
+ // simply reads them from the page.
+ var dirent FUSEDirent
+ dirent.UnmarshalBytes(src)
+ r.Dirents = append(r.Dirents, &dirent)
+
+ src = src[dirent.SizeBytes():]
+ }
+}
+
+// SizeBytes is the size of the memory representation of FUSEDirent.
+func (r *FUSEDirent) SizeBytes() int {
+ dataSize := r.Meta.SizeBytes() + len(r.Name)
+
+ // Each Dirent must be padded such that its size is a multiple
+ // of FUSE_DIRENT_ALIGN. Similar to the fuse dirent alignment
+ // in linux/fuse.h.
+ return (dataSize + (FUSE_DIRENT_ALIGN - 1)) & ^(FUSE_DIRENT_ALIGN - 1)
+}
+
+// UnmarshalBytes deserializes FUSEDirent from the src buffer.
+func (r *FUSEDirent) UnmarshalBytes(src []byte) {
+ r.Meta.UnmarshalBytes(src)
+ src = src[r.Meta.SizeBytes():]
+
+ if r.Meta.NameLen > FUSE_NAME_MAX {
+ // The name is too long and therefore invalid. We don't
+ // need to unmarshal the name since it'll be thrown away.
+ return
+ }
+
+ buf := make([]byte, r.Meta.NameLen)
+ name := primitive.ByteSlice(buf)
+ name.UnmarshalBytes(src[:r.Meta.NameLen])
+ r.Name = string(name)
+}
+
+// FATTR_* consts are the attribute flags defined in include/uapi/linux/fuse.h.
+// These should be or-ed together for setattr to know what has been changed.
+const (
+ FATTR_MODE = (1 << 0)
+ FATTR_UID = (1 << 1)
+ FATTR_GID = (1 << 2)
+ FATTR_SIZE = (1 << 3)
+ FATTR_ATIME = (1 << 4)
+ FATTR_MTIME = (1 << 5)
+ FATTR_FH = (1 << 6)
+ FATTR_ATIME_NOW = (1 << 7)
+ FATTR_MTIME_NOW = (1 << 8)
+ FATTR_LOCKOWNER = (1 << 9)
+ FATTR_CTIME = (1 << 10)
+)
+
+// FUSESetAttrIn is the request sent by the kernel to the daemon,
+// to set the attribute(s) of a file.
+//
+// +marshal
+type FUSESetAttrIn struct {
+ // Valid indicates which attributes are modified by this request.
+ Valid uint32
+
+ _ uint32
+
+ // Fh is used to identify the file if FATTR_FH is set in Valid.
+ Fh uint64
+
+ // Size is the size that the request wants to change to.
+ Size uint64
+
+ // LockOwner is the owner of the lock that the request wants to change to.
+ LockOwner uint64
+
+ // Atime is the access time that the request wants to change to.
+ Atime uint64
+
+ // Mtime is the modification time that the request wants to change to.
+ Mtime uint64
+
+ // Ctime is the status change time that the request wants to change to.
+ Ctime uint64
+
+ // AtimeNsec is the nano second part of Atime.
+ AtimeNsec uint32
+
+ // MtimeNsec is the nano second part of Mtime.
+ MtimeNsec uint32
+
+ // CtimeNsec is the nano second part of Ctime.
+ CtimeNsec uint32
+
+ // Mode is the file mode that the request wants to change to.
+ Mode uint32
+
+ _ uint32
+
+ // UID is the user ID of the owner that the request wants to change to.
+ UID uint32
+
+ // GID is the group ID of the owner that the request wants to change to.
+ GID uint32
+
+ _ uint32
+}
+
+// FUSEUnlinkIn is the request sent by the kernel to the daemon
+// when trying to unlink a node.
+//
+// Dynamically-sized objects cannot be marshalled.
+type FUSEUnlinkIn struct {
+ marshal.StubMarshallable
+
+ // Name of the node to unlink.
+ Name string
+}
+
+// MarshalBytes serializes r.name to the dst buffer, which should
+// have size len(r.Name) + 1 and last byte set to 0.
+func (r *FUSEUnlinkIn) MarshalBytes(buf []byte) {
+ copy(buf, r.Name)
+}
+
+// SizeBytes is the size of the memory representation of FUSEUnlinkIn.
+// 1 extra byte for null-terminated Name string.
+func (r *FUSEUnlinkIn) SizeBytes() int {
+ return len(r.Name) + 1
+}
diff --git a/pkg/abi/linux/futex.go b/pkg/abi/linux/futex.go
index 08bfde3b5..8138088a6 100644
--- a/pkg/abi/linux/futex.go
+++ b/pkg/abi/linux/futex.go
@@ -60,3 +60,21 @@ const (
FUTEX_WAITERS = 0x80000000
FUTEX_OWNER_DIED = 0x40000000
)
+
+// FUTEX_BITSET_MATCH_ANY has all bits set.
+const FUTEX_BITSET_MATCH_ANY = 0xffffffff
+
+// ROBUST_LIST_LIMIT protects against a deliberately circular list.
+const ROBUST_LIST_LIMIT = 2048
+
+// RobustListHead corresponds to Linux's struct robust_list_head.
+//
+// +marshal
+type RobustListHead struct {
+ List uint64
+ FutexOffset uint64
+ ListOpPending uint64
+}
+
+// SizeOfRobustListHead is the size of a RobustListHead struct.
+var SizeOfRobustListHead = (*RobustListHead)(nil).SizeBytes()
diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go
index 2062e6a4b..006b5a525 100644
--- a/pkg/abi/linux/ioctl.go
+++ b/pkg/abi/linux/ioctl.go
@@ -67,10 +67,29 @@ const (
// ioctl(2) requests provided by uapi/linux/sockios.h
const (
- SIOCGIFMEM = 0x891f
- SIOCGIFPFLAGS = 0x8935
- SIOCGMIIPHY = 0x8947
- SIOCGMIIREG = 0x8948
+ SIOCGIFNAME = 0x8910
+ SIOCGIFCONF = 0x8912
+ SIOCGIFFLAGS = 0x8913
+ SIOCGIFADDR = 0x8915
+ SIOCGIFDSTADDR = 0x8917
+ SIOCGIFBRDADDR = 0x8919
+ SIOCGIFNETMASK = 0x891b
+ SIOCGIFMETRIC = 0x891d
+ SIOCGIFMTU = 0x8921
+ SIOCGIFMEM = 0x891f
+ SIOCGIFHWADDR = 0x8927
+ SIOCGIFINDEX = 0x8933
+ SIOCGIFPFLAGS = 0x8935
+ SIOCGIFTXQLEN = 0x8942
+ SIOCETHTOOL = 0x8946
+ SIOCGMIIPHY = 0x8947
+ SIOCGMIIREG = 0x8948
+ SIOCGIFMAP = 0x8970
+)
+
+// ioctl(2) requests provided by uapi/asm-generic/sockios.h
+const (
+ SIOCGSTAMP = 0x8906
)
// ioctl(2) directions. Used to calculate requests number.
@@ -94,7 +113,60 @@ const (
_IOC_DIRSHIFT = _IOC_SIZESHIFT + _IOC_SIZEBITS
)
+// Constants from uapi/linux/fs.h.
+const (
+ FS_IOC_GETFLAGS = 2148034049
+ FS_VERITY_FL = 1048576
+)
+
+// Constants from uapi/linux/fsverity.h.
+const (
+ FS_VERITY_HASH_ALG_SHA256 = 1
+ FS_VERITY_HASH_ALG_SHA512 = 2
+
+ FS_IOC_ENABLE_VERITY = 1082156677
+ FS_IOC_MEASURE_VERITY = 3221513862
+)
+
+// DigestMetadata is a helper struct for VerityDigest.
+//
+// +marshal
+type DigestMetadata struct {
+ DigestAlgorithm uint16
+ DigestSize uint16
+}
+
+// SizeOfDigestMetadata is the size of struct DigestMetadata.
+const SizeOfDigestMetadata = 4
+
+// VerityDigest is struct from uapi/linux/fsverity.h.
+type VerityDigest struct {
+ Metadata DigestMetadata
+ Digest []byte
+}
+
// IOC outputs the result of _IOC macro in asm-generic/ioctl.h.
func IOC(dir, typ, nr, size uint32) uint32 {
return uint32(dir)<<_IOC_DIRSHIFT | typ<<_IOC_TYPESHIFT | nr<<_IOC_NRSHIFT | size<<_IOC_SIZESHIFT
}
+
+// Kcov ioctls from kernel/kcov.h.
+var (
+ KCOV_INIT_TRACE = IOC(_IOC_READ, 'c', 1, 8)
+ KCOV_ENABLE = IOC(_IOC_NONE, 'c', 100, 0)
+ KCOV_DISABLE = IOC(_IOC_NONE, 'c', 101, 0)
+)
+
+// Kcov trace types from kernel/kcov.h.
+const (
+ KCOV_TRACE_PC = 0
+ KCOV_TRACE_CMP = 1
+)
+
+// Kcov state constants from kernel/kcov.h.
+const (
+ KCOV_MODE_DISABLED = 0
+ KCOV_MODE_INIT = 1
+ KCOV_MODE_TRACE_PC = 2
+ KCOV_MODE_TRACE_CMP = 3
+)
diff --git a/pkg/abi/linux/ipc.go b/pkg/abi/linux/ipc.go
index 22acd2d43..c6e65df62 100644
--- a/pkg/abi/linux/ipc.go
+++ b/pkg/abi/linux/ipc.go
@@ -37,6 +37,8 @@ const IPC_PRIVATE = 0
// features like 32-bit UIDs.
// IPCPerm is equivalent to struct ipc64_perm.
+//
+// +marshal
type IPCPerm struct {
Key uint32
UID uint32
diff --git a/pkg/abi/linux/linux.go b/pkg/abi/linux/linux.go
index 281acdbde..3b4abece1 100644
--- a/pkg/abi/linux/linux.go
+++ b/pkg/abi/linux/linux.go
@@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package linux contains the constants and types needed to interface with a Linux kernel.
+// Package linux contains the constants and types needed to interface with a
+// Linux kernel.
package linux
// NumSoftIRQ is the number of software IRQs, exposed via /proc/stat.
@@ -21,6 +22,8 @@ package linux
const NumSoftIRQ = 10
// Sysinfo is the structure provided by sysinfo on linux versions > 2.3.48.
+//
+// +marshal
type Sysinfo struct {
Uptime int64
Loads [3]uint64
@@ -34,6 +37,6 @@ type Sysinfo struct {
_ [6]byte // Pad Procs to 64bits.
TotalHigh uint64
FreeHigh uint64
- Unit uint32
- /* The _f field in the glibc version of Sysinfo has size 0 on AMD64 */
+ Unit uint32 `marshal:"unaligned"` // Struct ends mid-64-bit-word.
+ // The _f field in the glibc version of Sysinfo has size 0 on AMD64.
}
diff --git a/pkg/abi/linux/membarrier.go b/pkg/abi/linux/membarrier.go
new file mode 100644
index 000000000..4f6021a1d
--- /dev/null
+++ b/pkg/abi/linux/membarrier.go
@@ -0,0 +1,34 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// membarrier(2) commands, from include/uapi/linux/membarrier.h.
+const (
+ MEMBARRIER_CMD_QUERY = 0
+ MEMBARRIER_CMD_GLOBAL = (1 << 0)
+ MEMBARRIER_CMD_GLOBAL_EXPEDITED = (1 << 1)
+ MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED = (1 << 2)
+ MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3)
+ MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4)
+ MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE = (1 << 5)
+ MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE = (1 << 6)
+ MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7)
+ MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8)
+)
+
+// membarrier(2) flags, from include/uapi/linux/membarrier.h.
+const (
+ MEMBARRIER_CMD_FLAG_CPU = (1 << 0)
+)
diff --git a/pkg/abi/linux/netdevice.go b/pkg/abi/linux/netdevice.go
index 7866352b4..0faf015c7 100644
--- a/pkg/abi/linux/netdevice.go
+++ b/pkg/abi/linux/netdevice.go
@@ -22,6 +22,8 @@ const (
)
// IFReq is an interface request.
+//
+// +marshal
type IFReq struct {
// IFName is an encoded name, normally null-terminated. This should be
// accessed via the Name and SetName functions.
@@ -79,6 +81,8 @@ type IFMap struct {
// IFConf is used to return a list of interfaces and their addresses. See
// netdevice(7) and struct ifconf for more detail on its use.
+//
+// +marshal
type IFConf struct {
Len int32
_ [4]byte // Pad to sizeof(struct ifconf).
diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go
index 46d8b0b42..b521144d9 100644
--- a/pkg/abi/linux/netfilter.go
+++ b/pkg/abi/linux/netfilter.go
@@ -14,6 +14,14 @@
package linux
+import (
+ "io"
+
+ "gvisor.dev/gvisor/pkg/marshal"
+ "gvisor.dev/gvisor/pkg/marshal/primitive"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
// This file contains structures required to support netfilter, specifically
// the iptables tool.
@@ -51,7 +59,7 @@ var VerdictStrings = map[int32]string{
NF_RETURN: "RETURN",
}
-// Socket options. These correspond to values in
+// Socket options for SOL_SOCKET. These correspond to values in
// include/uapi/linux/netfilter_ipv4/ip_tables.h.
const (
IPT_BASE_CTL = 64
@@ -66,6 +74,12 @@ const (
IPT_SO_GET_MAX = IPT_SO_GET_REVISION_TARGET
)
+// Socket option for SOL_IP. This corresponds to the value in
+// include/uapi/linux/netfilter_ipv4.h.
+const (
+ SO_ORIGINAL_DST = 80
+)
+
// Name lengths. These correspond to values in
// include/uapi/linux/netfilter/x_tables.h.
const (
@@ -76,6 +90,8 @@ const (
// IPTEntry is an iptable rule. It corresponds to struct ipt_entry in
// include/uapi/linux/netfilter_ipv4/ip_tables.h.
+//
+// +marshal
type IPTEntry struct {
// IP is used to filter packets based on the IP header.
IP IPTIP
@@ -112,21 +128,41 @@ type IPTEntry struct {
// SizeOfIPTEntry is the size of an IPTEntry.
const SizeOfIPTEntry = 112
-// KernelIPTEntry is identical to IPTEntry, but includes the Elems field. This
-// struct marshaled via the binary package to write an IPTEntry to userspace.
+// KernelIPTEntry is identical to IPTEntry, but includes the Elems field.
+// KernelIPTEntry itself is not Marshallable but it implements some methods of
+// marshal.Marshallable that help in other implementations of Marshallable.
type KernelIPTEntry struct {
- IPTEntry
+ Entry IPTEntry
// Elems holds the data for all this rule's matches followed by the
// target. It is variable length -- users have to iterate over any
// matches and use TargetOffset and NextOffset to make sense of the
// data.
- Elems []byte
+ Elems primitive.ByteSlice
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (ke *KernelIPTEntry) SizeBytes() int {
+ return ke.Entry.SizeBytes() + ke.Elems.SizeBytes()
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (ke *KernelIPTEntry) MarshalBytes(dst []byte) {
+ ke.Entry.MarshalBytes(dst)
+ ke.Elems.MarshalBytes(dst[ke.Entry.SizeBytes():])
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (ke *KernelIPTEntry) UnmarshalBytes(src []byte) {
+ ke.Entry.UnmarshalBytes(src)
+ ke.Elems.UnmarshalBytes(src[ke.Entry.SizeBytes():])
}
// IPTIP contains information for matching a packet's IP header.
// It corresponds to struct ipt_ip in
// include/uapi/linux/netfilter_ipv4/ip_tables.h.
+//
+// +marshal
type IPTIP struct {
// Src is the source IP address.
Src InetAddr
@@ -189,6 +225,8 @@ const SizeOfIPTIP = 84
// XTCounters holds packet and byte counts for a rule. It corresponds to struct
// xt_counters in include/uapi/linux/netfilter/x_tables.h.
+//
+// +marshal
type XTCounters struct {
// Pcnt is the packet count.
Pcnt uint64
@@ -227,6 +265,18 @@ type KernelXTEntryMatch struct {
Data []byte
}
+// XTGetRevision corresponds to xt_get_revision in
+// include/uapi/linux/netfilter/x_tables.h
+//
+// +marshal
+type XTGetRevision struct {
+ Name ExtensionName
+ Revision uint8
+}
+
+// SizeOfXTGetRevision is the size of an XTGetRevision.
+const SizeOfXTGetRevision = 30
+
// XTEntryTarget holds a target for a rule. For example, it can specify that
// packets matching the rule should DROP, ACCEPT, or use an extension target.
// iptables-extension(8) has a list of possible targets.
@@ -247,6 +297,13 @@ type XTEntryTarget struct {
// SizeOfXTEntryTarget is the size of an XTEntryTarget.
const SizeOfXTEntryTarget = 32
+// KernelXTEntryTarget is identical to XTEntryTarget, but contains a
+// variable-length Data field.
+type KernelXTEntryTarget struct {
+ XTEntryTarget
+ Data []byte
+}
+
// XTStandardTarget is a built-in target, one of ACCEPT, DROP, JUMP, QUEUE,
// RETURN, or jump. It corresponds to struct xt_standard_target in
// include/uapi/linux/netfilter/x_tables.h.
@@ -321,6 +378,8 @@ const SizeOfXTRedirectTarget = 56
// IPTGetinfo is the argument for the IPT_SO_GET_INFO sockopt. It corresponds
// to struct ipt_getinfo in include/uapi/linux/netfilter_ipv4/ip_tables.h.
+//
+// +marshal
type IPTGetinfo struct {
Name TableName
ValidHooks uint32
@@ -336,6 +395,8 @@ const SizeOfIPTGetinfo = 84
// IPTGetEntries is the argument for the IPT_SO_GET_ENTRIES sockopt. It
// corresponds to struct ipt_get_entries in
// include/uapi/linux/netfilter_ipv4/ip_tables.h.
+//
+// +marshal
type IPTGetEntries struct {
Name TableName
Size uint32
@@ -350,13 +411,103 @@ type IPTGetEntries struct {
const SizeOfIPTGetEntries = 40
// KernelIPTGetEntries is identical to IPTGetEntries, but includes the
-// Entrytable field. This struct marshaled via the binary package to write an
-// KernelIPTGetEntries to userspace.
+// Entrytable field. This has been manually made marshal.Marshallable since it
+// is dynamically sized.
type KernelIPTGetEntries struct {
IPTGetEntries
Entrytable []KernelIPTEntry
}
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (ke *KernelIPTGetEntries) SizeBytes() int {
+ res := ke.IPTGetEntries.SizeBytes()
+ for _, entry := range ke.Entrytable {
+ res += entry.SizeBytes()
+ }
+ return res
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (ke *KernelIPTGetEntries) MarshalBytes(dst []byte) {
+ ke.IPTGetEntries.MarshalBytes(dst)
+ marshalledUntil := ke.IPTGetEntries.SizeBytes()
+ for i := range ke.Entrytable {
+ ke.Entrytable[i].MarshalBytes(dst[marshalledUntil:])
+ marshalledUntil += ke.Entrytable[i].SizeBytes()
+ }
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (ke *KernelIPTGetEntries) UnmarshalBytes(src []byte) {
+ ke.IPTGetEntries.UnmarshalBytes(src)
+ unmarshalledUntil := ke.IPTGetEntries.SizeBytes()
+ for i := range ke.Entrytable {
+ ke.Entrytable[i].UnmarshalBytes(src[unmarshalledUntil:])
+ unmarshalledUntil += ke.Entrytable[i].SizeBytes()
+ }
+}
+
+// Packed implements marshal.Marshallable.Packed.
+func (ke *KernelIPTGetEntries) Packed() bool {
+ // KernelIPTGetEntries isn't packed because the ke.Entrytable contains an
+ // indirection to the actual data we want to marshal (the slice data
+ // pointer), and the memory for KernelIPTGetEntries contains the slice
+ // header which we don't want to marshal.
+ return false
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (ke *KernelIPTGetEntries) MarshalUnsafe(dst []byte) {
+ // Fall back to safe Marshal because the type in not packed.
+ ke.MarshalBytes(dst)
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (ke *KernelIPTGetEntries) UnmarshalUnsafe(src []byte) {
+ // Fall back to safe Unmarshal because the type in not packed.
+ ke.UnmarshalBytes(src)
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+func (ke *KernelIPTGetEntries) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
+ buf := cc.CopyScratchBuffer(ke.SizeBytes()) // escapes: okay.
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Unmarshal unconditionally. If we had a short copy-in, this results in a
+ // partially unmarshalled struct.
+ ke.UnmarshalBytes(buf) // escapes: fallback.
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+func (ke *KernelIPTGetEntries) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
+ // Type KernelIPTGetEntries doesn't have a packed layout in memory, fall
+ // back to MarshalBytes.
+ return cc.CopyOutBytes(addr, ke.marshalAll(cc))
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+func (ke *KernelIPTGetEntries) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {
+ // Type KernelIPTGetEntries doesn't have a packed layout in memory, fall
+ // back to MarshalBytes.
+ return cc.CopyOutBytes(addr, ke.marshalAll(cc)[:limit])
+}
+
+func (ke *KernelIPTGetEntries) marshalAll(cc marshal.CopyContext) []byte {
+ buf := cc.CopyScratchBuffer(ke.SizeBytes())
+ ke.MarshalBytes(buf)
+ return buf
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (ke *KernelIPTGetEntries) WriteTo(w io.Writer) (int64, error) {
+ buf := make([]byte, ke.SizeBytes())
+ ke.MarshalBytes(buf)
+ length, err := w.Write(buf)
+ return int64(length), err
+}
+
+var _ marshal.Marshallable = (*KernelIPTGetEntries)(nil)
+
// IPTReplace is the argument for the IPT_SO_SET_REPLACE sockopt. It
// corresponds to struct ipt_replace in
// include/uapi/linux/netfilter_ipv4/ip_tables.h.
@@ -374,16 +525,12 @@ type IPTReplace struct {
// Entries [0]IPTEntry
}
-// KernelIPTReplace is identical to IPTReplace, but includes the Entries field.
-type KernelIPTReplace struct {
- IPTReplace
- Entries [0]IPTEntry
-}
-
// SizeOfIPTReplace is the size of an IPTReplace.
const SizeOfIPTReplace = 96
// ExtensionName holds the name of a netfilter extension.
+//
+// +marshal
type ExtensionName [XT_EXTENSION_MAXNAMELEN]byte
// String implements fmt.Stringer.
@@ -392,6 +539,8 @@ func (en ExtensionName) String() string {
}
// TableName holds the name of a netfilter table.
+//
+// +marshal
type TableName [XT_TABLE_MAXNAMELEN]byte
// String implements fmt.Stringer.
diff --git a/pkg/abi/linux/netfilter_ipv6.go b/pkg/abi/linux/netfilter_ipv6.go
new file mode 100644
index 000000000..6d31eb5e3
--- /dev/null
+++ b/pkg/abi/linux/netfilter_ipv6.go
@@ -0,0 +1,336 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "io"
+
+ "gvisor.dev/gvisor/pkg/marshal"
+ "gvisor.dev/gvisor/pkg/marshal/primitive"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// This file contains structures required to support IPv6 netfilter and
+// ip6tables. Some constants and structs are equal to their IPv4 analogues, and
+// are only distinguished by context (e.g. whether used on an IPv4 of IPv6
+// socket).
+
+// Socket options for SOL_SOCLET. These correspond to values in
+// include/uapi/linux/netfilter_ipv6/ip6_tables.h.
+const (
+ IP6T_BASE_CTL = 64
+ IP6T_SO_SET_REPLACE = IPT_BASE_CTL
+ IP6T_SO_SET_ADD_COUNTERS = IPT_BASE_CTL + 1
+ IP6T_SO_SET_MAX = IPT_SO_SET_ADD_COUNTERS
+
+ IP6T_SO_GET_INFO = IPT_BASE_CTL
+ IP6T_SO_GET_ENTRIES = IPT_BASE_CTL + 1
+ IP6T_SO_GET_REVISION_MATCH = IPT_BASE_CTL + 4
+ IP6T_SO_GET_REVISION_TARGET = IPT_BASE_CTL + 5
+ IP6T_SO_GET_MAX = IP6T_SO_GET_REVISION_TARGET
+)
+
+// IP6T_ORIGINAL_DST is the ip6tables SOL_IPV6 socket option. Corresponds to
+// the value in include/uapi/linux/netfilter_ipv6/ip6_tables.h.
+// TODO(gvisor.dev/issue/3549): Support IPv6 original destination.
+const IP6T_ORIGINAL_DST = 80
+
+// IP6TReplace is the argument for the IP6T_SO_SET_REPLACE sockopt. It
+// corresponds to struct ip6t_replace in
+// include/uapi/linux/netfilter_ipv6/ip6_tables.h.
+//
+// +marshal
+type IP6TReplace struct {
+ Name TableName
+ ValidHooks uint32
+ NumEntries uint32
+ Size uint32
+ HookEntry [NF_INET_NUMHOOKS]uint32
+ Underflow [NF_INET_NUMHOOKS]uint32
+ NumCounters uint32
+ Counters uint64 // This is really a *XTCounters.
+ // Entries is omitted here because it would cause IP6TReplace to be an
+ // extra byte longer (see http://www.catb.org/esr/structure-packing/).
+ // Entries [0]IP6TEntry
+}
+
+// SizeOfIP6TReplace is the size of an IP6TReplace.
+const SizeOfIP6TReplace = 96
+
+// KernelIP6TGetEntries is identical to IP6TGetEntries, but includes the
+// Entrytable field. This has been manually made marshal.Marshallable since it
+// is dynamically sized.
+type KernelIP6TGetEntries struct {
+ IPTGetEntries
+ Entrytable []KernelIP6TEntry
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (ke *KernelIP6TGetEntries) SizeBytes() int {
+ res := ke.IPTGetEntries.SizeBytes()
+ for _, entry := range ke.Entrytable {
+ res += entry.SizeBytes()
+ }
+ return res
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (ke *KernelIP6TGetEntries) MarshalBytes(dst []byte) {
+ ke.IPTGetEntries.MarshalBytes(dst)
+ marshalledUntil := ke.IPTGetEntries.SizeBytes()
+ for i := range ke.Entrytable {
+ ke.Entrytable[i].MarshalBytes(dst[marshalledUntil:])
+ marshalledUntil += ke.Entrytable[i].SizeBytes()
+ }
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (ke *KernelIP6TGetEntries) UnmarshalBytes(src []byte) {
+ ke.IPTGetEntries.UnmarshalBytes(src)
+ unmarshalledUntil := ke.IPTGetEntries.SizeBytes()
+ for i := range ke.Entrytable {
+ ke.Entrytable[i].UnmarshalBytes(src[unmarshalledUntil:])
+ unmarshalledUntil += ke.Entrytable[i].SizeBytes()
+ }
+}
+
+// Packed implements marshal.Marshallable.Packed.
+func (ke *KernelIP6TGetEntries) Packed() bool {
+ // KernelIP6TGetEntries isn't packed because the ke.Entrytable contains
+ // an indirection to the actual data we want to marshal (the slice data
+ // pointer), and the memory for KernelIP6TGetEntries contains the slice
+ // header which we don't want to marshal.
+ return false
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (ke *KernelIP6TGetEntries) MarshalUnsafe(dst []byte) {
+ // Fall back to safe Marshal because the type in not packed.
+ ke.MarshalBytes(dst)
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (ke *KernelIP6TGetEntries) UnmarshalUnsafe(src []byte) {
+ // Fall back to safe Unmarshal because the type in not packed.
+ ke.UnmarshalBytes(src)
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+func (ke *KernelIP6TGetEntries) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
+ buf := cc.CopyScratchBuffer(ke.SizeBytes()) // escapes: okay.
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Unmarshal unconditionally. If we had a short copy-in, this results
+ // in a partially unmarshalled struct.
+ ke.UnmarshalBytes(buf) // escapes: fallback.
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+func (ke *KernelIP6TGetEntries) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
+ // Type KernelIP6TGetEntries doesn't have a packed layout in memory,
+ // fall back to MarshalBytes.
+ return cc.CopyOutBytes(addr, ke.marshalAll(cc))
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+func (ke *KernelIP6TGetEntries) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {
+ // Type KernelIP6TGetEntries doesn't have a packed layout in memory, fall
+ // back to MarshalBytes.
+ return cc.CopyOutBytes(addr, ke.marshalAll(cc)[:limit])
+}
+
+func (ke *KernelIP6TGetEntries) marshalAll(cc marshal.CopyContext) []byte {
+ buf := cc.CopyScratchBuffer(ke.SizeBytes())
+ ke.MarshalBytes(buf)
+ return buf
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (ke *KernelIP6TGetEntries) WriteTo(w io.Writer) (int64, error) {
+ buf := make([]byte, ke.SizeBytes())
+ ke.MarshalBytes(buf)
+ length, err := w.Write(buf)
+ return int64(length), err
+}
+
+var _ marshal.Marshallable = (*KernelIP6TGetEntries)(nil)
+
+// IP6TEntry is an iptables rule. It corresponds to struct ip6t_entry in
+// include/uapi/linux/netfilter_ipv6/ip6_tables.h.
+//
+// +marshal
+type IP6TEntry struct {
+ // IPv6 is used to filter packets based on the IPv6 header.
+ IPv6 IP6TIP
+
+ // NFCache relates to kernel-internal caching and isn't used by
+ // userspace.
+ NFCache uint32
+
+ // TargetOffset is the byte offset from the beginning of this IPTEntry
+ // to the start of the entry's target.
+ TargetOffset uint16
+
+ // NextOffset is the byte offset from the beginning of this IPTEntry to
+ // the start of the next entry. It is thus also the size of the entry.
+ NextOffset uint16
+
+ // Comeback is a return pointer. It is not used by userspace.
+ Comeback uint32
+
+ _ [4]byte
+
+ // Counters holds the packet and byte counts for this rule.
+ Counters XTCounters
+
+ // Elems holds the data for all this rule's matches followed by the
+ // target. It is variable length -- users have to iterate over any
+ // matches and use TargetOffset and NextOffset to make sense of the
+ // data.
+ //
+ // Elems is omitted here because it would cause IPTEntry to be an extra
+ // byte larger (see http://www.catb.org/esr/structure-packing/).
+ //
+ // Elems [0]byte
+}
+
+// SizeOfIP6TEntry is the size of an IP6TEntry.
+const SizeOfIP6TEntry = 168
+
+// KernelIP6TEntry is identical to IP6TEntry, but includes the Elems field.
+// KernelIP6TEntry itself is not Marshallable but it implements some methods of
+// marshal.Marshallable that help in other implementations of Marshallable.
+type KernelIP6TEntry struct {
+ Entry IP6TEntry
+
+ // Elems holds the data for all this rule's matches followed by the
+ // target. It is variable length -- users have to iterate over any
+ // matches and use TargetOffset and NextOffset to make sense of the
+ // data.
+ Elems primitive.ByteSlice
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (ke *KernelIP6TEntry) SizeBytes() int {
+ return ke.Entry.SizeBytes() + ke.Elems.SizeBytes()
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (ke *KernelIP6TEntry) MarshalBytes(dst []byte) {
+ ke.Entry.MarshalBytes(dst)
+ ke.Elems.MarshalBytes(dst[ke.Entry.SizeBytes():])
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (ke *KernelIP6TEntry) UnmarshalBytes(src []byte) {
+ ke.Entry.UnmarshalBytes(src)
+ ke.Elems.UnmarshalBytes(src[ke.Entry.SizeBytes():])
+}
+
+// IP6TIP contains information for matching a packet's IP header.
+// It corresponds to struct ip6t_ip6 in
+// include/uapi/linux/netfilter_ipv6/ip6_tables.h.
+//
+// +marshal
+type IP6TIP struct {
+ // Src is the source IP address.
+ Src Inet6Addr
+
+ // Dst is the destination IP address.
+ Dst Inet6Addr
+
+ // SrcMask is the source IP mask.
+ SrcMask Inet6Addr
+
+ // DstMask is the destination IP mask.
+ DstMask Inet6Addr
+
+ // InputInterface is the input network interface.
+ InputInterface [IFNAMSIZ]byte
+
+ // OutputInterface is the output network interface.
+ OutputInterface [IFNAMSIZ]byte
+
+ // InputInterfaceMask is the input interface mask.
+ InputInterfaceMask [IFNAMSIZ]byte
+
+ // OuputInterfaceMask is the output interface mask.
+ OutputInterfaceMask [IFNAMSIZ]byte
+
+ // Protocol is the transport protocol.
+ Protocol uint16
+
+ // TOS matches TOS flags when Flags indicates filtering by TOS.
+ TOS uint8
+
+ // Flags define matching behavior for the IP header.
+ Flags uint8
+
+ // InverseFlags invert the meaning of fields in struct IPTIP. See the
+ // IP6T_INV_* flags.
+ InverseFlags uint8
+
+ // Linux defines in6_addr (Inet6Addr for us) as the union of a
+ // 16-element byte array and a 4-element 32-bit integer array, so the
+ // whole struct is 4-byte aligned.
+ _ [3]byte
+}
+
+const SizeOfIP6TIP = 136
+
+// Flags in IP6TIP.Flags. Corresponding constants are in
+// include/uapi/linux/netfilter_ipv6/ip6_tables.h.
+const (
+ // Whether to check the Protocol field.
+ IP6T_F_PROTO = 0x01
+ // Whether to match the TOS field.
+ IP6T_F_TOS = 0x02
+ // Indicates that the jump target is an aboslute GOTO, not an offset.
+ IP6T_F_GOTO = 0x04
+ // Enables all flags.
+ IP6T_F_MASK = 0x07
+)
+
+// Flags in IP6TIP.InverseFlags. Corresponding constants are in
+// include/uapi/linux/netfilter_ipv6/ip6_tables.h.
+const (
+ // Invert the meaning of InputInterface.
+ IP6T_INV_VIA_IN = 0x01
+ // Invert the meaning of OutputInterface.
+ IP6T_INV_VIA_OUT = 0x02
+ // Invert the meaning of TOS.
+ IP6T_INV_TOS = 0x04
+ // Invert the meaning of Src.
+ IP6T_INV_SRCIP = 0x08
+ // Invert the meaning of Dst.
+ IP6T_INV_DSTIP = 0x10
+ // Invert the meaning of the IPT_F_FRAG flag.
+ IP6T_INV_FRAG = 0x20
+ // Enable all flags.
+ IP6T_INV_MASK = 0x7F
+)
+
+// NFNATRange corresponds to struct nf_nat_range in
+// include/uapi/linux/netfilter/nf_nat.h.
+type NFNATRange struct {
+ Flags uint32
+ MinAddr Inet6Addr
+ MaxAddr Inet6Addr
+ MinProto uint16 // Network byte order.
+ MaxProto uint16 // Network byte order.
+}
+
+// SizeOfNFNATRange is the size of NFNATRange.
+const SizeOfNFNATRange = 40
diff --git a/pkg/abi/linux/netfilter_test.go b/pkg/abi/linux/netfilter_test.go
index 565dd550e..bf73271c6 100644
--- a/pkg/abi/linux/netfilter_test.go
+++ b/pkg/abi/linux/netfilter_test.go
@@ -36,6 +36,9 @@ func TestSizes(t *testing.T) {
{XTEntryTarget{}, SizeOfXTEntryTarget},
{XTErrorTarget{}, SizeOfXTErrorTarget},
{XTStandardTarget{}, SizeOfXTStandardTarget},
+ {IP6TReplace{}, SizeOfIP6TReplace},
+ {IP6TEntry{}, SizeOfIP6TEntry},
+ {IP6TIP{}, SizeOfIP6TIP},
}
for _, tc := range testCases {
diff --git a/pkg/abi/linux/netlink.go b/pkg/abi/linux/netlink.go
index 0ba086c76..b41f94a69 100644
--- a/pkg/abi/linux/netlink.go
+++ b/pkg/abi/linux/netlink.go
@@ -40,6 +40,8 @@ const (
)
// SockAddrNetlink is struct sockaddr_nl, from uapi/linux/netlink.h.
+//
+// +marshal
type SockAddrNetlink struct {
Family uint16
_ uint16
diff --git a/pkg/abi/linux/netlink_route.go b/pkg/abi/linux/netlink_route.go
index 40bec566c..ceda0a8d3 100644
--- a/pkg/abi/linux/netlink_route.go
+++ b/pkg/abi/linux/netlink_route.go
@@ -187,6 +187,8 @@ const (
// Device types, from uapi/linux/if_arp.h.
const (
+ ARPHRD_NONE = 65534
+ ARPHRD_ETHER = 1
ARPHRD_LOOPBACK = 772
)
diff --git a/pkg/abi/linux/poll.go b/pkg/abi/linux/poll.go
index c04d26e4c..3443a5768 100644
--- a/pkg/abi/linux/poll.go
+++ b/pkg/abi/linux/poll.go
@@ -15,6 +15,8 @@
package linux
// PollFD is struct pollfd, used by poll(2)/ppoll(2), from uapi/asm-generic/poll.h.
+//
+// +marshal slice:PollFDSlice
type PollFD struct {
FD int32
Events int16
diff --git a/pkg/abi/linux/rusage.go b/pkg/abi/linux/rusage.go
index d8302dc85..e29d0ac7e 100644
--- a/pkg/abi/linux/rusage.go
+++ b/pkg/abi/linux/rusage.go
@@ -26,6 +26,8 @@ const (
)
// Rusage represents the Linux struct rusage.
+//
+// +marshal
type Rusage struct {
UTime Timeval
STime Timeval
diff --git a/pkg/abi/linux/seccomp.go b/pkg/abi/linux/seccomp.go
index d0607e256..5be3f10f9 100644
--- a/pkg/abi/linux/seccomp.go
+++ b/pkg/abi/linux/seccomp.go
@@ -34,11 +34,11 @@ type BPFAction uint32
const (
SECCOMP_RET_KILL_PROCESS BPFAction = 0x80000000
- SECCOMP_RET_KILL_THREAD = 0x00000000
- SECCOMP_RET_TRAP = 0x00030000
- SECCOMP_RET_ERRNO = 0x00050000
- SECCOMP_RET_TRACE = 0x7ff00000
- SECCOMP_RET_ALLOW = 0x7fff0000
+ SECCOMP_RET_KILL_THREAD BPFAction = 0x00000000
+ SECCOMP_RET_TRAP BPFAction = 0x00030000
+ SECCOMP_RET_ERRNO BPFAction = 0x00050000
+ SECCOMP_RET_TRACE BPFAction = 0x7ff00000
+ SECCOMP_RET_ALLOW BPFAction = 0x7fff0000
)
func (a BPFAction) String() string {
@@ -64,9 +64,41 @@ func (a BPFAction) Data() uint16 {
return uint16(a & SECCOMP_RET_DATA)
}
+// WithReturnCode sets the lower 16 bits of the SECCOMP_RET_ERRNO or
+// SECCOMP_RET_TRACE actions to the provided return code, overwriting the previous
+// action, and returns a new BPFAction. If not SECCOMP_RET_ERRNO or
+// SECCOMP_RET_TRACE then this panics.
+func (a BPFAction) WithReturnCode(code uint16) BPFAction {
+ // mask out the previous return value
+ baseAction := a & SECCOMP_RET_ACTION_FULL
+ if baseAction == SECCOMP_RET_ERRNO || baseAction == SECCOMP_RET_TRACE {
+ return BPFAction(uint32(baseAction) | uint32(code))
+ }
+ panic("WithReturnCode only valid for SECCOMP_RET_ERRNO and SECCOMP_RET_TRACE")
+}
+
// SockFprog is sock_fprog taken from <linux/filter.h>.
type SockFprog struct {
Len uint16
pad [6]byte
Filter *BPFInstruction
}
+
+// SeccompData is equivalent to struct seccomp_data, which contains the data
+// passed to seccomp-bpf filters.
+//
+// +marshal
+type SeccompData struct {
+ // Nr is the system call number.
+ Nr int32
+
+ // Arch is an AUDIT_ARCH_* value indicating the system call convention.
+ Arch uint32
+
+ // InstructionPointer is the value of the instruction pointer at the time
+ // of the system call.
+ InstructionPointer uint64
+
+ // Args contains the first 6 system call arguments.
+ Args [6]uint64
+}
diff --git a/pkg/abi/linux/sem.go b/pkg/abi/linux/sem.go
index de422c519..1b2f76c0b 100644
--- a/pkg/abi/linux/sem.go
+++ b/pkg/abi/linux/sem.go
@@ -34,17 +34,9 @@ const (
const SEM_UNDO = 0x1000
-// SemidDS is equivalent to struct semid64_ds.
-type SemidDS struct {
- SemPerm IPCPerm
- SemOTime TimeT
- SemCTime TimeT
- SemNSems uint64
- unused3 uint64
- unused4 uint64
-}
-
// Sembuf is equivalent to struct sembuf.
+//
+// +marshal slice:SembufSlice
type Sembuf struct {
SemNum uint16
SemOp int16
diff --git a/pkg/abi/linux/sem_amd64.go b/pkg/abi/linux/sem_amd64.go
new file mode 100644
index 000000000..ab980cb4f
--- /dev/null
+++ b/pkg/abi/linux/sem_amd64.go
@@ -0,0 +1,33 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package linux
+
+// SemidDS is equivalent to struct semid64_ds.
+//
+// Source: arch/x86/include/uapi/asm/sembuf.h
+//
+// +marshal
+type SemidDS struct {
+ SemPerm IPCPerm
+ SemOTime TimeT
+ unused1 uint64
+ SemCTime TimeT
+ unused2 uint64
+ SemNSems uint64
+ unused3 uint64
+ unused4 uint64
+}
diff --git a/pkg/abi/linux/sem_arm64.go b/pkg/abi/linux/sem_arm64.go
new file mode 100644
index 000000000..521468fb1
--- /dev/null
+++ b/pkg/abi/linux/sem_arm64.go
@@ -0,0 +1,31 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package linux
+
+// SemidDS is equivalent to struct semid64_ds.
+//
+// Source: include/uapi/asm-generic/sembuf.h
+//
+// +marshal
+type SemidDS struct {
+ SemPerm IPCPerm
+ SemOTime TimeT
+ SemCTime TimeT
+ SemNSems uint64
+ unused3 uint64
+ unused4 uint64
+}
diff --git a/pkg/abi/linux/shm.go b/pkg/abi/linux/shm.go
index e45aadb10..274b1e847 100644
--- a/pkg/abi/linux/shm.go
+++ b/pkg/abi/linux/shm.go
@@ -51,6 +51,8 @@ const (
// ShmidDS is equivalent to struct shmid64_ds. Source:
// include/uapi/asm-generic/shmbuf.h
+//
+// +marshal
type ShmidDS struct {
ShmPerm IPCPerm
ShmSegsz uint64
@@ -66,6 +68,8 @@ type ShmidDS struct {
}
// ShmParams is equivalent to struct shminfo. Source: include/uapi/linux/shm.h
+//
+// +marshal
type ShmParams struct {
ShmMax uint64
ShmMin uint64
@@ -75,6 +79,8 @@ type ShmParams struct {
}
// ShmInfo is equivalent to struct shm_info. Source: include/uapi/linux/shm.h
+//
+// +marshal
type ShmInfo struct {
UsedIDs int32 // Number of currently existing segments.
_ [4]byte
diff --git a/pkg/abi/linux/signal.go b/pkg/abi/linux/signal.go
index 1c330e763..6ca57ffbb 100644
--- a/pkg/abi/linux/signal.go
+++ b/pkg/abi/linux/signal.go
@@ -214,6 +214,8 @@ const (
)
// Sigevent represents struct sigevent.
+//
+// +marshal
type Sigevent struct {
Value uint64 // union sigval {int, void*}
Signo int32
diff --git a/pkg/abi/linux/signalfd.go b/pkg/abi/linux/signalfd.go
index 85fad9956..468c6a387 100644
--- a/pkg/abi/linux/signalfd.go
+++ b/pkg/abi/linux/signalfd.go
@@ -23,6 +23,8 @@ const (
)
// SignalfdSiginfo is the siginfo encoding for signalfds.
+//
+// +marshal
type SignalfdSiginfo struct {
Signo uint32
Errno int32
@@ -41,5 +43,5 @@ type SignalfdSiginfo struct {
STime uint64
Addr uint64
AddrLSB uint16
- _ [48]uint8
+ _ [48]uint8 `marshal:"unaligned"`
}
diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go
index 4a14ef691..d156d41e4 100644
--- a/pkg/abi/linux/socket.go
+++ b/pkg/abi/linux/socket.go
@@ -14,7 +14,10 @@
package linux
-import "gvisor.dev/gvisor/pkg/binary"
+import (
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/marshal"
+)
// Address families, from linux/socket.h.
const (
@@ -83,7 +86,6 @@ const (
MSG_MORE = 0x8000
MSG_WAITFORONE = 0x10000
MSG_SENDPAGE_NOTLAST = 0x20000
- MSG_REINJECT = 0x8000000
MSG_ZEROCOPY = 0x4000000
MSG_FASTOPEN = 0x20000000
MSG_CMSG_CLOEXEC = 0x40000000
@@ -134,6 +136,15 @@ const (
SHUT_RDWR = 2
)
+// Packet types from <linux/if_packet.h>
+const (
+ PACKET_HOST = 0 // To us
+ PACKET_BROADCAST = 1 // To all
+ PACKET_MULTICAST = 2 // To group
+ PACKET_OTHERHOST = 3 // To someone else
+ PACKET_OUTGOING = 4 // Outgoing of any type
+)
+
// Socket options from socket.h.
const (
SO_DEBUG = 1
@@ -225,14 +236,18 @@ const (
const SockAddrMax = 128
// InetAddr is struct in_addr, from uapi/linux/in.h.
+//
+// +marshal
type InetAddr [4]byte
// SockAddrInet is struct sockaddr_in, from uapi/linux/in.h.
+//
+// +marshal
type SockAddrInet struct {
Family uint16
Port uint16
Addr InetAddr
- Zero [8]uint8 // pad to sizeof(struct sockaddr).
+ _ [8]uint8 // pad to sizeof(struct sockaddr).
}
// InetMulticastRequest is struct ip_mreq, from uapi/linux/in.h.
@@ -247,7 +262,14 @@ type InetMulticastRequestWithNIC struct {
InterfaceIndex int32
}
+// Inet6Addr is struct in6_addr, from uapi/linux/in6.h.
+//
+// +marshal
+type Inet6Addr [16]byte
+
// SockAddrInet6 is struct sockaddr_in6, from uapi/linux/in6.h.
+//
+// +marshal
type SockAddrInet6 struct {
Family uint16
Port uint16
@@ -257,6 +279,8 @@ type SockAddrInet6 struct {
}
// SockAddrLink is a struct sockaddr_ll, from uapi/linux/if_packet.h.
+//
+// +marshal
type SockAddrLink struct {
Family uint16
Protocol uint16
@@ -273,6 +297,8 @@ type SockAddrLink struct {
const UnixPathMax = 108
// SockAddrUnix is struct sockaddr_un, from uapi/linux/un.h.
+//
+// +marshal
type SockAddrUnix struct {
Family uint16
Path [UnixPathMax]int8
@@ -282,6 +308,8 @@ type SockAddrUnix struct {
// equivalent to struct sockaddr. SockAddr ensures that a well-defined set of
// types can be used as socket addresses.
type SockAddr interface {
+ marshal.Marshallable
+
// implementsSockAddr exists purely to allow a type to indicate that they
// implement this interface. This method is a no-op and shouldn't be called.
implementsSockAddr()
@@ -294,6 +322,8 @@ func (s *SockAddrUnix) implementsSockAddr() {}
func (s *SockAddrNetlink) implementsSockAddr() {}
// Linger is struct linger, from include/linux/socket.h.
+//
+// +marshal
type Linger struct {
OnOff int32
Linger int32
@@ -308,6 +338,8 @@ const SizeOfLinger = 8
// the end of this struct or within existing unusued space, so its size grows
// over time. The current iteration is based on linux v4.17. New versions are
// always backwards compatible.
+//
+// +marshal
type TCPInfo struct {
State uint8
CaState uint8
@@ -405,6 +437,8 @@ var SizeOfControlMessageHeader = int(binary.Size(ControlMessageHeader{}))
// A ControlMessageCredentials is an SCM_CREDENTIALS socket control message.
//
// ControlMessageCredentials represents struct ucred from linux/socket.h.
+//
+// +marshal
type ControlMessageCredentials struct {
PID int32
UID uint32
diff --git a/pkg/abi/linux/tcp.go b/pkg/abi/linux/tcp.go
index 174d470e2..2a8d4708b 100644
--- a/pkg/abi/linux/tcp.go
+++ b/pkg/abi/linux/tcp.go
@@ -57,4 +57,5 @@ const (
const (
MAX_TCP_KEEPIDLE = 32767
MAX_TCP_KEEPINTVL = 32767
+ MAX_TCP_KEEPCNT = 127
)
diff --git a/pkg/abi/linux/time.go b/pkg/abi/linux/time.go
index e6860ed49..206f5af7e 100644
--- a/pkg/abi/linux/time.go
+++ b/pkg/abi/linux/time.go
@@ -93,6 +93,8 @@ const (
const maxSecInDuration = math.MaxInt64 / int64(time.Second)
// TimeT represents time_t in <time.h>. It represents time in seconds.
+//
+// +marshal
type TimeT int64
// NsecToTimeT translates nanoseconds to TimeT (seconds).
@@ -102,7 +104,7 @@ func NsecToTimeT(nsec int64) TimeT {
// Timespec represents struct timespec in <time.h>.
//
-// +marshal
+// +marshal slice:TimespecSlice
type Timespec struct {
Sec int64
Nsec int64
@@ -158,7 +160,7 @@ const SizeOfTimeval = 16
// Timeval represents struct timeval in <time.h>.
//
-// +marshal
+// +marshal slice:TimevalSlice
type Timeval struct {
Sec int64
Usec int64
@@ -196,6 +198,8 @@ func DurationToTimeval(dur time.Duration) Timeval {
}
// Itimerspec represents struct itimerspec in <time.h>.
+//
+// +marshal
type Itimerspec struct {
Interval Timespec
Value Timespec
@@ -206,12 +210,16 @@ type Itimerspec struct {
// struct timeval it_interval; /* next value */
// struct timeval it_value; /* current value */
// };
+//
+// +marshal
type ItimerVal struct {
Interval Timeval
Value Timeval
}
// ClockT represents type clock_t.
+//
+// +marshal
type ClockT int64
// ClockTFromDuration converts time.Duration to clock_t.
@@ -220,6 +228,8 @@ func ClockTFromDuration(d time.Duration) ClockT {
}
// Tms represents struct tms, used by times(2).
+//
+// +marshal
type Tms struct {
UTime ClockT
STime ClockT
@@ -229,6 +239,8 @@ type Tms struct {
// TimerID represents type timer_t, which identifies a POSIX per-process
// interval timer.
+//
+// +marshal
type TimerID int32
// StatxTimestamp represents struct statx_timestamp.
diff --git a/pkg/abi/linux/tty.go b/pkg/abi/linux/tty.go
index 8ac02aee8..47e65d9fb 100644
--- a/pkg/abi/linux/tty.go
+++ b/pkg/abi/linux/tty.go
@@ -23,6 +23,8 @@ const (
)
// Winsize is struct winsize, defined in uapi/asm-generic/termios.h.
+//
+// +marshal
type Winsize struct {
Row uint16
Col uint16
@@ -31,6 +33,8 @@ type Winsize struct {
}
// Termios is struct termios, defined in uapi/asm-generic/termbits.h.
+//
+// +marshal
type Termios struct {
InputFlags uint32
OutputFlags uint32
@@ -321,9 +325,9 @@ var MasterTermios = KernelTermios{
OutputSpeed: 38400,
}
-// DefaultSlaveTermios is the default terminal configuration of the slave end
-// of a Unix98 pseudoterminal.
-var DefaultSlaveTermios = KernelTermios{
+// DefaultReplicaTermios is the default terminal configuration of the replica
+// end of a Unix98 pseudoterminal.
+var DefaultReplicaTermios = KernelTermios{
InputFlags: ICRNL | IXON,
OutputFlags: OPOST | ONLCR,
ControlFlags: B38400 | CS8 | CREAD,
@@ -337,6 +341,7 @@ var DefaultSlaveTermios = KernelTermios{
// include/uapi/asm-generic/termios.h.
//
// +stateify savable
+// +marshal
type WindowSize struct {
Rows uint16
Cols uint16
diff --git a/pkg/abi/linux/utsname.go b/pkg/abi/linux/utsname.go
index 60f220a67..cb7c95437 100644
--- a/pkg/abi/linux/utsname.go
+++ b/pkg/abi/linux/utsname.go
@@ -26,6 +26,8 @@ const (
)
// UtsName represents struct utsname, the struct returned by uname(2).
+//
+// +marshal
type UtsName struct {
Sysname [UTSLen + 1]byte
Nodename [UTSLen + 1]byte
diff --git a/pkg/abi/linux/xattr.go b/pkg/abi/linux/xattr.go
index 99180b208..8ef837f27 100644
--- a/pkg/abi/linux/xattr.go
+++ b/pkg/abi/linux/xattr.go
@@ -23,6 +23,9 @@ const (
XATTR_CREATE = 1
XATTR_REPLACE = 2
+ XATTR_TRUSTED_PREFIX = "trusted."
+ XATTR_TRUSTED_PREFIX_LEN = len(XATTR_TRUSTED_PREFIX)
+
XATTR_USER_PREFIX = "user."
XATTR_USER_PREFIX_LEN = len(XATTR_USER_PREFIX)
)