diff options
Diffstat (limited to 'pkg/abi/linux')
-rw-r--r-- | pkg/abi/linux/BUILD | 1 | ||||
-rw-r--r-- | pkg/abi/linux/netfilter.go | 89 | ||||
-rw-r--r-- | pkg/abi/linux/rseq.go | 130 | ||||
-rw-r--r-- | pkg/abi/linux/time.go | 13 |
4 files changed, 200 insertions, 33 deletions
diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index 9553f164d..716ff22d2 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -41,6 +41,7 @@ go_library( "poll.go", "prctl.go", "ptrace.go", + "rseq.go", "rusage.go", "sched.go", "seccomp.go", diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go index 269ba5567..33fcc6c95 100644 --- a/pkg/abi/linux/netfilter.go +++ b/pkg/abi/linux/netfilter.go @@ -42,6 +42,15 @@ const ( NF_RETURN = -NF_REPEAT - 1 ) +// VerdictStrings maps int verdicts to the strings they represent. It is used +// for debugging. +var VerdictStrings = map[int32]string{ + -NF_DROP - 1: "DROP", + -NF_ACCEPT - 1: "ACCEPT", + -NF_QUEUE - 1: "QUEUE", + NF_RETURN: "RETURN", +} + // Socket options. These correspond to values in // include/uapi/linux/netfilter_ipv4/ip_tables.h. const ( @@ -179,7 +188,7 @@ const SizeOfXTCounters = 16 // the user data. type XTEntryMatch struct { MatchSize uint16 - Name [XT_EXTENSION_MAXNAMELEN]byte + Name ExtensionName Revision uint8 // Data is omitted here because it would cause XTEntryMatch to be an // extra byte larger (see http://www.catb.org/esr/structure-packing/). @@ -199,7 +208,7 @@ const SizeOfXTEntryMatch = 32 // the user data. type XTEntryTarget struct { TargetSize uint16 - Name [XT_EXTENSION_MAXNAMELEN]byte + Name ExtensionName Revision uint8 // Data is omitted here because it would cause XTEntryTarget to be an // extra byte larger (see http://www.catb.org/esr/structure-packing/). @@ -226,9 +235,9 @@ const SizeOfXTStandardTarget = 40 // ErrorName. It corresponds to struct xt_error_target in // include/uapi/linux/netfilter/x_tables.h. type XTErrorTarget struct { - Target XTEntryTarget - ErrorName [XT_FUNCTION_MAXNAMELEN]byte - _ [2]byte + Target XTEntryTarget + Name ErrorName + _ [2]byte } // SizeOfXTErrorTarget is the size of an XTErrorTarget. @@ -237,7 +246,7 @@ const SizeOfXTErrorTarget = 64 // IPTGetinfo is the argument for the IPT_SO_GET_INFO sockopt. It corresponds // to struct ipt_getinfo in include/uapi/linux/netfilter_ipv4/ip_tables.h. type IPTGetinfo struct { - Name [XT_TABLE_MAXNAMELEN]byte + Name TableName ValidHooks uint32 HookEntry [NF_INET_NUMHOOKS]uint32 Underflow [NF_INET_NUMHOOKS]uint32 @@ -248,16 +257,11 @@ type IPTGetinfo struct { // SizeOfIPTGetinfo is the size of an IPTGetinfo. const SizeOfIPTGetinfo = 84 -// TableName returns the table name. -func (info *IPTGetinfo) TableName() string { - return tableName(info.Name[:]) -} - // IPTGetEntries is the argument for the IPT_SO_GET_ENTRIES sockopt. It // corresponds to struct ipt_get_entries in // include/uapi/linux/netfilter_ipv4/ip_tables.h. type IPTGetEntries struct { - Name [XT_TABLE_MAXNAMELEN]byte + Name TableName Size uint32 _ [4]byte // Entrytable is omitted here because it would cause IPTGetEntries to @@ -266,34 +270,22 @@ type IPTGetEntries struct { // Entrytable [0]IPTEntry } -// TableName returns the entries' table name. -func (entries *IPTGetEntries) TableName() string { - return tableName(entries.Name[:]) -} - // SizeOfIPTGetEntries is the size of an IPTGetEntries. const SizeOfIPTGetEntries = 40 -// KernelIPTGetEntries is identical to IPTEntry, but includes the Elems field. -// This struct marshaled via the binary package to write an KernelIPTGetEntries -// to userspace. +// KernelIPTGetEntries is identical to IPTGetEntries, but includes the +// Entrytable field. This struct marshaled via the binary package to write an +// KernelIPTGetEntries to userspace. type KernelIPTGetEntries struct { - Name [XT_TABLE_MAXNAMELEN]byte - Size uint32 - _ [4]byte + IPTGetEntries Entrytable []KernelIPTEntry } -// TableName returns the entries' table name. -func (entries *KernelIPTGetEntries) TableName() string { - return tableName(entries.Name[:]) -} - // IPTReplace is the argument for the IPT_SO_SET_REPLACE sockopt. It // corresponds to struct ipt_replace in // include/uapi/linux/netfilter_ipv4/ip_tables.h. type IPTReplace struct { - Name [XT_TABLE_MAXNAMELEN]byte + Name TableName ValidHooks uint32 NumEntries uint32 Size uint32 @@ -306,14 +298,45 @@ type IPTReplace struct { // Entries [0]IPTEntry } +// KernelIPTReplace is identical to IPTReplace, but includes the Entries field. +type KernelIPTReplace struct { + IPTReplace + Entries [0]IPTEntry +} + // SizeOfIPTReplace is the size of an IPTReplace. const SizeOfIPTReplace = 96 -func tableName(name []byte) string { - for i, c := range name { +// ExtensionName holds the name of a netfilter extension. +type ExtensionName [XT_EXTENSION_MAXNAMELEN]byte + +// String implements fmt.Stringer. +func (en ExtensionName) String() string { + return goString(en[:]) +} + +// TableName holds the name of a netfilter table. +type TableName [XT_TABLE_MAXNAMELEN]byte + +// String implements fmt.Stringer. +func (tn TableName) String() string { + return goString(tn[:]) +} + +// ErrorName holds the name of a netfilter error. These can also hold +// user-defined chains. +type ErrorName [XT_FUNCTION_MAXNAMELEN]byte + +// String implements fmt.Stringer. +func (en ErrorName) String() string { + return goString(en[:]) +} + +func goString(cstring []byte) string { + for i, c := range cstring { if c == 0 { - return string(name[:i]) + return string(cstring[:i]) } } - return string(name) + return string(cstring) } diff --git a/pkg/abi/linux/rseq.go b/pkg/abi/linux/rseq.go new file mode 100644 index 000000000..76253ba30 --- /dev/null +++ b/pkg/abi/linux/rseq.go @@ -0,0 +1,130 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Flags passed to rseq(2). +// +// Defined in include/uapi/linux/rseq.h. +const ( + // RSEQ_FLAG_UNREGISTER unregisters the current thread. + RSEQ_FLAG_UNREGISTER = 1 << 0 +) + +// Critical section flags used in RSeqCriticalSection.Flags and RSeq.Flags. +// +// Defined in include/uapi/linux/rseq.h. +const ( + // RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT inhibits restart on preemption. + RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = 1 << 0 + + // RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL inhibits restart on signal + // delivery. + RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = 1 << 1 + + // RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE inhibits restart on CPU + // migration. + RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = 1 << 2 +) + +// RSeqCriticalSection describes a restartable sequences critical section. It +// is equivalent to struct rseq_cs, defined in include/uapi/linux/rseq.h. +// +// In userspace, this structure is always aligned to 32 bytes. +// +// +marshal +type RSeqCriticalSection struct { + // Version is the version of this structure. Version 0 is defined here. + Version uint32 + + // Flags are the critical section flags, defined above. + Flags uint32 + + // Start is the start address of the critical section. + Start uint64 + + // PostCommitOffset is the offset from Start of the first instruction + // outside of the critical section. + PostCommitOffset uint64 + + // Abort is the abort address. It must be outside the critical section, + // and the 4 bytes prior must match the abort signature. + Abort uint64 +} + +const ( + // SizeOfRSeqCriticalSection is the size of RSeqCriticalSection. + SizeOfRSeqCriticalSection = 32 + + // SizeOfRSeqSignature is the size of the signature immediately + // preceding RSeqCriticalSection.Abort. + SizeOfRSeqSignature = 4 +) + +// Special values for RSeq.CPUID, defined in include/uapi/linux/rseq.h. +const ( + // RSEQ_CPU_ID_UNINITIALIZED indicates that this thread has not + // performed rseq initialization. + RSEQ_CPU_ID_UNINITIALIZED = ^uint32(0) // -1 + + // RSEQ_CPU_ID_REGISTRATION_FAILED indicates that rseq initialization + // failed. + RSEQ_CPU_ID_REGISTRATION_FAILED = ^uint32(1) // -2 +) + +// RSeq is the thread-local restartable sequences config/status. It +// is equivalent to struct rseq, defined in include/uapi/linux/rseq.h. +// +// In userspace, this structure is always aligned to 32 bytes. +type RSeq struct { + // CPUIDStart contains the current CPU ID if rseq is initialized. + // + // This field should only be read by the thread which registered this + // structure, and must be read atomically. + CPUIDStart uint32 + + // CPUID contains the current CPU ID or one of the CPU ID special + // values defined above. + // + // This field should only be read by the thread which registered this + // structure, and must be read atomically. + CPUID uint32 + + // RSeqCriticalSection is a pointer to the current RSeqCriticalSection + // block, or NULL. It is reset to NULL by the kernel on restart or + // non-restarting preempt/signal. + // + // This field should only be written by the thread which registered + // this structure, and must be written atomically. + RSeqCriticalSection uint64 + + // Flags are the critical section flags that apply to all critical + // sections on this thread, defined above. + Flags uint32 +} + +const ( + // SizeOfRSeq is the size of RSeq. + // + // Note that RSeq is naively 24 bytes. However, it has 32-byte + // alignment, which in C increases sizeof to 32. That is the size that + // the Linux kernel uses. + SizeOfRSeq = 32 + + // AlignOfRSeq is the standard alignment of RSeq. + AlignOfRSeq = 32 + + // OffsetOfRSeqCriticalSection is the offset of RSeqCriticalSection in RSeq. + OffsetOfRSeqCriticalSection = 8 +) diff --git a/pkg/abi/linux/time.go b/pkg/abi/linux/time.go index 546668bca..5c5a58cd4 100644 --- a/pkg/abi/linux/time.go +++ b/pkg/abi/linux/time.go @@ -234,6 +234,19 @@ type StatxTimestamp struct { _ int32 } +// ToNsec returns the nanosecond representation. +func (sxts StatxTimestamp) ToNsec() int64 { + return int64(sxts.Sec)*1e9 + int64(sxts.Nsec) +} + +// ToNsecCapped returns the safe nanosecond representation. +func (sxts StatxTimestamp) ToNsecCapped() int64 { + if sxts.Sec > maxSecInDuration { + return math.MaxInt64 + } + return sxts.ToNsec() +} + // NsecToStatxTimestamp translates nanoseconds to StatxTimestamp. func NsecToStatxTimestamp(nsec int64) (ts StatxTimestamp) { return StatxTimestamp{ |