summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/sentry/mm/procfs.go92
-rwxr-xr-xpkg/sentry/platform/ring0/defs_impl.go4
-rw-r--r--pkg/sentry/socket/epsocket/epsocket.go45
-rw-r--r--pkg/sentry/socket/epsocket/stack.go4
-rw-r--r--pkg/sentry/socket/hostinet/stack.go8
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go254
-rw-r--r--pkg/sentry/socket/unix/unix.go2
-rw-r--r--pkg/sentry/strace/socket.go2
-rw-r--r--pkg/tcpip/link/sniffer/sniffer.go5
-rw-r--r--pkg/tcpip/tcpip.go3
-rw-r--r--pkg/tcpip/transport/icmp/endpoint.go10
-rw-r--r--pkg/tcpip/transport/raw/endpoint.go10
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go10
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go6
14 files changed, 382 insertions, 73 deletions
diff --git a/pkg/sentry/mm/procfs.go b/pkg/sentry/mm/procfs.go
index a8819aa84..8c2246bb4 100644
--- a/pkg/sentry/mm/procfs.go
+++ b/pkg/sentry/mm/procfs.go
@@ -58,6 +58,34 @@ func (mm *MemoryManager) NeedsUpdate(generation int64) bool {
return true
}
+// ReadMapsDataInto is called by fsimpl/proc.mapsData.Generate to
+// implement /proc/[pid]/maps.
+func (mm *MemoryManager) ReadMapsDataInto(ctx context.Context, buf *bytes.Buffer) {
+ mm.mappingMu.RLock()
+ defer mm.mappingMu.RUnlock()
+ var start usermem.Addr
+
+ for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
+ // FIXME(b/30793614): If we use a usermem.Addr for the handle, we get
+ // "panic: autosave error: type usermem.Addr is not registered".
+ mm.appendVMAMapsEntryLocked(ctx, vseg, buf)
+ }
+
+ // We always emulate vsyscall, so advertise it here. Everything about a
+ // vsyscall region is static, so just hard code the maps entry since we
+ // don't have a real vma backing it. The vsyscall region is at the end of
+ // the virtual address space so nothing should be mapped after it (if
+ // something is really mapped in the tiny ~10 MiB segment afterwards, we'll
+ // get the sorting on the maps file wrong at worst; but that's not possible
+ // on any current platform).
+ //
+ // Artifically adjust the seqfile handle so we only output vsyscall entry once.
+ if start != vsyscallEnd {
+ // FIXME(b/30793614): Can't get a pointer to constant vsyscallEnd.
+ buf.WriteString(vsyscallMapsEntry)
+ }
+}
+
// ReadMapsSeqFileData is called by fs/proc.mapsData.ReadSeqFileData to
// implement /proc/[pid]/maps.
func (mm *MemoryManager) ReadMapsSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
@@ -151,6 +179,27 @@ func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaI
b.WriteString("\n")
}
+// ReadSmapsDataInto is called by fsimpl/proc.smapsData.Generate to
+// implement /proc/[pid]/maps.
+func (mm *MemoryManager) ReadSmapsDataInto(ctx context.Context, buf *bytes.Buffer) {
+ mm.mappingMu.RLock()
+ defer mm.mappingMu.RUnlock()
+ var start usermem.Addr
+
+ for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
+ // FIXME(b/30793614): If we use a usermem.Addr for the handle, we get
+ // "panic: autosave error: type usermem.Addr is not registered".
+ mm.vmaSmapsEntryIntoLocked(ctx, vseg, buf)
+ }
+
+ // We always emulate vsyscall, so advertise it here. See
+ // ReadMapsSeqFileData for additional commentary.
+ if start != vsyscallEnd {
+ // FIXME(b/30793614): Can't get a pointer to constant vsyscallEnd.
+ buf.WriteString(vsyscallSmapsEntry)
+ }
+}
+
// ReadSmapsSeqFileData is called by fs/proc.smapsData.ReadSeqFileData to
// implement /proc/[pid]/smaps.
func (mm *MemoryManager) ReadSmapsSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
@@ -190,7 +239,12 @@ func (mm *MemoryManager) ReadSmapsSeqFileData(ctx context.Context, handle seqfil
// Preconditions: mm.mappingMu must be locked.
func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte {
var b bytes.Buffer
- mm.appendVMAMapsEntryLocked(ctx, vseg, &b)
+ mm.vmaSmapsEntryIntoLocked(ctx, vseg, &b)
+ return b.Bytes()
+}
+
+func (mm *MemoryManager) vmaSmapsEntryIntoLocked(ctx context.Context, vseg vmaIterator, b *bytes.Buffer) {
+ mm.appendVMAMapsEntryLocked(ctx, vseg, b)
vma := vseg.ValuePtr()
// We take mm.activeMu here in each call to vmaSmapsEntryLocked, instead of
@@ -211,40 +265,40 @@ func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterat
}
mm.activeMu.RUnlock()
- fmt.Fprintf(&b, "Size: %8d kB\n", vseg.Range().Length()/1024)
- fmt.Fprintf(&b, "Rss: %8d kB\n", rss/1024)
+ fmt.Fprintf(b, "Size: %8d kB\n", vseg.Range().Length()/1024)
+ fmt.Fprintf(b, "Rss: %8d kB\n", rss/1024)
// Currently we report PSS = RSS, i.e. we pretend each page mapped by a pma
// is only mapped by that pma. This avoids having to query memmap.Mappables
// for reference count information on each page. As a corollary, all pages
// are accounted as "private" whether or not the vma is private; compare
// Linux's fs/proc/task_mmu.c:smaps_account().
- fmt.Fprintf(&b, "Pss: %8d kB\n", rss/1024)
- fmt.Fprintf(&b, "Shared_Clean: %8d kB\n", 0)
- fmt.Fprintf(&b, "Shared_Dirty: %8d kB\n", 0)
+ fmt.Fprintf(b, "Pss: %8d kB\n", rss/1024)
+ fmt.Fprintf(b, "Shared_Clean: %8d kB\n", 0)
+ fmt.Fprintf(b, "Shared_Dirty: %8d kB\n", 0)
// Pretend that all pages are dirty if the vma is writable, and clean otherwise.
clean := rss
if vma.effectivePerms.Write {
clean = 0
}
- fmt.Fprintf(&b, "Private_Clean: %8d kB\n", clean/1024)
- fmt.Fprintf(&b, "Private_Dirty: %8d kB\n", (rss-clean)/1024)
+ fmt.Fprintf(b, "Private_Clean: %8d kB\n", clean/1024)
+ fmt.Fprintf(b, "Private_Dirty: %8d kB\n", (rss-clean)/1024)
// Pretend that all pages are "referenced" (recently touched).
- fmt.Fprintf(&b, "Referenced: %8d kB\n", rss/1024)
- fmt.Fprintf(&b, "Anonymous: %8d kB\n", anon/1024)
+ fmt.Fprintf(b, "Referenced: %8d kB\n", rss/1024)
+ fmt.Fprintf(b, "Anonymous: %8d kB\n", anon/1024)
// Hugepages (hugetlb and THP) are not implemented.
- fmt.Fprintf(&b, "AnonHugePages: %8d kB\n", 0)
- fmt.Fprintf(&b, "Shared_Hugetlb: %8d kB\n", 0)
- fmt.Fprintf(&b, "Private_Hugetlb: %7d kB\n", 0)
+ fmt.Fprintf(b, "AnonHugePages: %8d kB\n", 0)
+ fmt.Fprintf(b, "Shared_Hugetlb: %8d kB\n", 0)
+ fmt.Fprintf(b, "Private_Hugetlb: %7d kB\n", 0)
// Swap is not implemented.
- fmt.Fprintf(&b, "Swap: %8d kB\n", 0)
- fmt.Fprintf(&b, "SwapPss: %8d kB\n", 0)
- fmt.Fprintf(&b, "KernelPageSize: %8d kB\n", usermem.PageSize/1024)
- fmt.Fprintf(&b, "MMUPageSize: %8d kB\n", usermem.PageSize/1024)
+ fmt.Fprintf(b, "Swap: %8d kB\n", 0)
+ fmt.Fprintf(b, "SwapPss: %8d kB\n", 0)
+ fmt.Fprintf(b, "KernelPageSize: %8d kB\n", usermem.PageSize/1024)
+ fmt.Fprintf(b, "MMUPageSize: %8d kB\n", usermem.PageSize/1024)
locked := rss
if vma.mlockMode == memmap.MLockNone {
locked = 0
}
- fmt.Fprintf(&b, "Locked: %8d kB\n", locked/1024)
+ fmt.Fprintf(b, "Locked: %8d kB\n", locked/1024)
b.WriteString("VmFlags: ")
if vma.realPerms.Read {
@@ -284,6 +338,4 @@ func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterat
b.WriteString("ac ")
}
b.WriteString("\n")
-
- return b.Bytes()
}
diff --git a/pkg/sentry/platform/ring0/defs_impl.go b/pkg/sentry/platform/ring0/defs_impl.go
index a30a9dd4a..8efc3825f 100755
--- a/pkg/sentry/platform/ring0/defs_impl.go
+++ b/pkg/sentry/platform/ring0/defs_impl.go
@@ -1,13 +1,13 @@
package ring0
import (
+ "fmt"
"gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
"io"
"reflect"
"syscall"
- "fmt"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/usermem"
)
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go
index 4d8a5ac22..635042263 100644
--- a/pkg/sentry/socket/epsocket/epsocket.go
+++ b/pkg/sentry/socket/epsocket/epsocket.go
@@ -291,18 +291,22 @@ func bytesToIPAddress(addr []byte) tcpip.Address {
return tcpip.Address(addr)
}
-// GetAddress reads an sockaddr struct from the given address and converts it
-// to the FullAddress format. It supports AF_UNIX, AF_INET and AF_INET6
-// addresses.
-func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syserr.Error) {
+// AddressAndFamily reads an sockaddr struct from the given address and
+// converts it to the FullAddress format. It supports AF_UNIX, AF_INET and
+// AF_INET6 addresses.
+//
+// strict indicates whether addresses with the AF_UNSPEC family are accepted of not.
+//
+// AddressAndFamily returns an address, its family.
+func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, uint16, *syserr.Error) {
// Make sure we have at least 2 bytes for the address family.
if len(addr) < 2 {
- return tcpip.FullAddress{}, syserr.ErrInvalidArgument
+ return tcpip.FullAddress{}, 0, syserr.ErrInvalidArgument
}
family := usermem.ByteOrder.Uint16(addr)
if family != uint16(sfamily) && (!strict && family != linux.AF_UNSPEC) {
- return tcpip.FullAddress{}, syserr.ErrAddressFamilyNotSupported
+ return tcpip.FullAddress{}, family, syserr.ErrAddressFamilyNotSupported
}
// Get the rest of the fields based on the address family.
@@ -310,7 +314,7 @@ func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syse
case linux.AF_UNIX:
path := addr[2:]
if len(path) > linux.UnixPathMax {
- return tcpip.FullAddress{}, syserr.ErrInvalidArgument
+ return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
}
// Drop the terminating NUL (if one exists) and everything after
// it for filesystem (non-abstract) addresses.
@@ -321,12 +325,12 @@ func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syse
}
return tcpip.FullAddress{
Addr: tcpip.Address(path),
- }, nil
+ }, family, nil
case linux.AF_INET:
var a linux.SockAddrInet
if len(addr) < sockAddrInetSize {
- return tcpip.FullAddress{}, syserr.ErrInvalidArgument
+ return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
}
binary.Unmarshal(addr[:sockAddrInetSize], usermem.ByteOrder, &a)
@@ -334,12 +338,12 @@ func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syse
Addr: bytesToIPAddress(a.Addr[:]),
Port: ntohs(a.Port),
}
- return out, nil
+ return out, family, nil
case linux.AF_INET6:
var a linux.SockAddrInet6
if len(addr) < sockAddrInet6Size {
- return tcpip.FullAddress{}, syserr.ErrInvalidArgument
+ return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
}
binary.Unmarshal(addr[:sockAddrInet6Size], usermem.ByteOrder, &a)
@@ -350,13 +354,13 @@ func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syse
if isLinkLocal(out.Addr) {
out.NIC = tcpip.NICID(a.Scope_id)
}
- return out, nil
+ return out, family, nil
case linux.AF_UNSPEC:
- return tcpip.FullAddress{}, nil
+ return tcpip.FullAddress{}, family, nil
default:
- return tcpip.FullAddress{}, syserr.ErrAddressFamilyNotSupported
+ return tcpip.FullAddress{}, 0, syserr.ErrAddressFamilyNotSupported
}
}
@@ -482,11 +486,18 @@ func (s *SocketOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
// Connect implements the linux syscall connect(2) for sockets backed by
// tpcip.Endpoint.
func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error {
- addr, err := GetAddress(s.family, sockaddr, false /* strict */)
+ addr, family, err := AddressAndFamily(s.family, sockaddr, false /* strict */)
if err != nil {
return err
}
+ if family == linux.AF_UNSPEC {
+ err := s.Endpoint.Disconnect()
+ if err == tcpip.ErrNotSupported {
+ return syserr.ErrAddressFamilyNotSupported
+ }
+ return syserr.TranslateNetstackError(err)
+ }
// Always return right away in the non-blocking case.
if !blocking {
return syserr.TranslateNetstackError(s.Endpoint.Connect(addr))
@@ -515,7 +526,7 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo
// Bind implements the linux syscall bind(2) for sockets backed by
// tcpip.Endpoint.
func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
- addr, err := GetAddress(s.family, sockaddr, true /* strict */)
+ addr, _, err := AddressAndFamily(s.family, sockaddr, true /* strict */)
if err != nil {
return err
}
@@ -2023,7 +2034,7 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
var addr *tcpip.FullAddress
if len(to) > 0 {
- addrBuf, err := GetAddress(s.family, to, true /* strict */)
+ addrBuf, _, err := AddressAndFamily(s.family, to, true /* strict */)
if err != nil {
return 0, err
}
diff --git a/pkg/sentry/socket/epsocket/stack.go b/pkg/sentry/socket/epsocket/stack.go
index 8f1572bf4..1b11f4b2d 100644
--- a/pkg/sentry/socket/epsocket/stack.go
+++ b/pkg/sentry/socket/epsocket/stack.go
@@ -198,8 +198,8 @@ func (s *Stack) IPTables() (iptables.IPTables, error) {
// FillDefaultIPTables sets the stack's iptables to the default tables, which
// allow and do not modify all traffic.
-func (s *Stack) FillDefaultIPTables() error {
- return netfilter.FillDefaultIPTables(s.Stack)
+func (s *Stack) FillDefaultIPTables() {
+ netfilter.FillDefaultIPTables(s.Stack)
}
// Resume implements inet.Stack.Resume.
diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go
index 1902fe155..3a4fdec47 100644
--- a/pkg/sentry/socket/hostinet/stack.go
+++ b/pkg/sentry/socket/hostinet/stack.go
@@ -203,8 +203,14 @@ func ExtractHostRoutes(routeMsgs []syscall.NetlinkMessage) ([]inet.Route, error)
inetRoute.DstAddr = attr.Value
case syscall.RTA_SRC:
inetRoute.SrcAddr = attr.Value
- case syscall.RTA_OIF:
+ case syscall.RTA_GATEWAY:
inetRoute.GatewayAddr = attr.Value
+ case syscall.RTA_OIF:
+ expected := int(binary.Size(inetRoute.OutputInterface))
+ if len(attr.Value) != expected {
+ return nil, fmt.Errorf("RTM_GETROUTE returned RTM_NEWROUTE message with invalid attribute data length (%d bytes, expected %d bytes)", len(attr.Value), expected)
+ }
+ binary.Unmarshal(attr.Value, usermem.ByteOrder, &inetRoute.OutputInterface)
}
}
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index efdb42903..9f87c32f1 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -17,7 +17,10 @@
package netfilter
import (
+ "fmt"
+
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/syserr"
@@ -26,21 +29,258 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
+// errorTargetName is used to mark targets as error targets. Error targets
+// shouldn't be reached - an error has occurred if we fall through to one.
+const errorTargetName = "ERROR"
+
+// metadata is opaque to netstack. It holds data that we need to translate
+// between Linux's and netstack's iptables representations.
+type metadata struct {
+ HookEntry [linux.NF_INET_NUMHOOKS]uint32
+ Underflow [linux.NF_INET_NUMHOOKS]uint32
+ NumEntries uint32
+ Size uint32
+}
+
// GetInfo returns information about iptables.
func GetInfo(t *kernel.Task, ep tcpip.Endpoint, outPtr usermem.Addr) (linux.IPTGetinfo, *syserr.Error) {
- // TODO(b/129292233): Implement.
- return linux.IPTGetinfo{}, syserr.ErrInvalidArgument
+ // Read in the struct and table name.
+ var info linux.IPTGetinfo
+ if _, err := t.CopyIn(outPtr, &info); err != nil {
+ return linux.IPTGetinfo{}, syserr.FromError(err)
+ }
+
+ // Find the appropriate table.
+ table, err := findTable(ep, info.TableName())
+ if err != nil {
+ return linux.IPTGetinfo{}, err
+ }
+
+ // Get the hooks that apply to this table.
+ info.ValidHooks = table.ValidHooks()
+
+ // Grab the metadata struct, which is used to store information (e.g.
+ // the number of entries) that applies to the user's encoding of
+ // iptables, but not netstack's.
+ metadata := table.Metadata().(metadata)
+
+ // Set values from metadata.
+ info.HookEntry = metadata.HookEntry
+ info.Underflow = metadata.Underflow
+ info.NumEntries = metadata.NumEntries
+ info.Size = metadata.Size
+
+ return info, nil
}
// GetEntries returns netstack's iptables rules encoded for the iptables tool.
func GetEntries(t *kernel.Task, ep tcpip.Endpoint, outPtr usermem.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) {
- // TODO(b/129292233): Implement.
- return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
+ // Read in the struct and table name.
+ var userEntries linux.IPTGetEntries
+ if _, err := t.CopyIn(outPtr, &userEntries); err != nil {
+ return linux.KernelIPTGetEntries{}, syserr.FromError(err)
+ }
+
+ // Find the appropriate table.
+ table, err := findTable(ep, userEntries.TableName())
+ if err != nil {
+ return linux.KernelIPTGetEntries{}, err
+ }
+
+ // Convert netstack's iptables rules to something that the iptables
+ // tool can understand.
+ entries, _, err := convertNetstackToBinary(userEntries.TableName(), table)
+ if err != nil {
+ return linux.KernelIPTGetEntries{}, err
+ }
+ if binary.Size(entries) > uintptr(outLen) {
+ return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
+ }
+
+ return entries, nil
+}
+
+func findTable(ep tcpip.Endpoint, tableName string) (iptables.Table, *syserr.Error) {
+ ipt, err := ep.IPTables()
+ if err != nil {
+ return iptables.Table{}, syserr.FromError(err)
+ }
+ table, ok := ipt.Tables[tableName]
+ if !ok {
+ return iptables.Table{}, syserr.ErrInvalidArgument
+ }
+ return table, nil
}
// FillDefaultIPTables sets stack's IPTables to the default tables and
// populates them with metadata.
-func FillDefaultIPTables(stack *stack.Stack) error {
- stack.SetIPTables(iptables.DefaultTables())
- return nil
+func FillDefaultIPTables(stack *stack.Stack) {
+ ipt := iptables.DefaultTables()
+
+ // In order to fill in the metadata, we have to translate ipt from its
+ // netstack format to Linux's giant-binary-blob format.
+ for name, table := range ipt.Tables {
+ _, metadata, err := convertNetstackToBinary(name, table)
+ if err != nil {
+ panic(fmt.Errorf("Unable to set default IP tables: %v", err))
+ }
+ table.SetMetadata(metadata)
+ ipt.Tables[name] = table
+ }
+
+ stack.SetIPTables(ipt)
+}
+
+// convertNetstackToBinary converts the iptables as stored in netstack to the
+// format expected by the iptables tool. Linux stores each table as a binary
+// blob that can only be traversed by parsing a bit, reading some offsets,
+// jumping to those offsets, parsing again, etc.
+func convertNetstackToBinary(name string, table iptables.Table) (linux.KernelIPTGetEntries, metadata, *syserr.Error) {
+ // Return values.
+ var entries linux.KernelIPTGetEntries
+ var meta metadata
+
+ // The table name has to fit in the struct.
+ if linux.XT_TABLE_MAXNAMELEN < len(name) {
+ return linux.KernelIPTGetEntries{}, metadata{}, syserr.ErrInvalidArgument
+ }
+ copy(entries.Name[:], name)
+
+ // Deal with the built in chains first (INPUT, OUTPUT, etc.). Each of
+ // these chains ends with an unconditional policy entry.
+ for hook := iptables.Prerouting; hook < iptables.NumHooks; hook++ {
+ chain, ok := table.BuiltinChains[hook]
+ if !ok {
+ // This table doesn't support this hook.
+ continue
+ }
+
+ // Sanity check.
+ if len(chain.Rules) < 1 {
+ return linux.KernelIPTGetEntries{}, metadata{}, syserr.ErrInvalidArgument
+ }
+
+ for ruleIdx, rule := range chain.Rules {
+ // If this is the first rule of a builtin chain, set
+ // the metadata hook entry point.
+ if ruleIdx == 0 {
+ meta.HookEntry[hook] = entries.Size
+ }
+
+ // Each rule corresponds to an entry.
+ entry := linux.KernelIPTEntry{
+ IPTEntry: linux.IPTEntry{
+ NextOffset: linux.SizeOfIPTEntry,
+ TargetOffset: linux.SizeOfIPTEntry,
+ },
+ }
+
+ for _, matcher := range rule.Matchers {
+ // Serialize the matcher and add it to the
+ // entry.
+ serialized := marshalMatcher(matcher)
+ entry.Elems = append(entry.Elems, serialized...)
+ entry.NextOffset += uint16(len(serialized))
+ entry.TargetOffset += uint16(len(serialized))
+ }
+
+ // Serialize and append the target.
+ serialized := marshalTarget(rule.Target)
+ entry.Elems = append(entry.Elems, serialized...)
+ entry.NextOffset += uint16(len(serialized))
+
+ // The underflow rule is the last rule in the chain,
+ // and is an unconditional rule (i.e. it matches any
+ // packet). This is enforced when saving iptables.
+ if ruleIdx == len(chain.Rules)-1 {
+ meta.Underflow[hook] = entries.Size
+ }
+
+ entries.Size += uint32(entry.NextOffset)
+ entries.Entrytable = append(entries.Entrytable, entry)
+ meta.NumEntries++
+ }
+
+ }
+
+ // TODO(gvisor.dev/issue/170): Deal with the user chains here. Each of
+ // these starts with an error node holding the chain's name and ends
+ // with an unconditional return.
+
+ // Lastly, each table ends with an unconditional error target rule as
+ // its final entry.
+ errorEntry := linux.KernelIPTEntry{
+ IPTEntry: linux.IPTEntry{
+ NextOffset: linux.SizeOfIPTEntry,
+ TargetOffset: linux.SizeOfIPTEntry,
+ },
+ }
+ var errorTarget linux.XTErrorTarget
+ errorTarget.Target.TargetSize = linux.SizeOfXTErrorTarget
+ copy(errorTarget.ErrorName[:], errorTargetName)
+ copy(errorTarget.Target.Name[:], errorTargetName)
+
+ // Serialize and add it to the list of entries.
+ errorTargetBuf := make([]byte, 0, linux.SizeOfXTErrorTarget)
+ serializedErrorTarget := binary.Marshal(errorTargetBuf, usermem.ByteOrder, errorTarget)
+ errorEntry.Elems = append(errorEntry.Elems, serializedErrorTarget...)
+ errorEntry.NextOffset += uint16(len(serializedErrorTarget))
+
+ entries.Size += uint32(errorEntry.NextOffset)
+ entries.Entrytable = append(entries.Entrytable, errorEntry)
+ meta.NumEntries++
+ meta.Size = entries.Size
+
+ return entries, meta, nil
+}
+
+func marshalMatcher(matcher iptables.Matcher) []byte {
+ switch matcher.(type) {
+ default:
+ // TODO(gvisor.dev/issue/170): We don't support any matchers yet, so
+ // any call to marshalMatcher will panic.
+ panic(fmt.Errorf("unknown matcher of type %T", matcher))
+ }
+}
+
+func marshalTarget(target iptables.Target) []byte {
+ switch target.(type) {
+ case iptables.UnconditionalAcceptTarget:
+ return marshalUnconditionalAcceptTarget()
+ default:
+ panic(fmt.Errorf("unknown target of type %T", target))
+ }
+}
+
+func marshalUnconditionalAcceptTarget() []byte {
+ // The target's name will be the empty string.
+ target := linux.XTStandardTarget{
+ Target: linux.XTEntryTarget{
+ TargetSize: linux.SizeOfXTStandardTarget,
+ },
+ Verdict: translateStandardVerdict(iptables.Accept),
+ }
+
+ ret := make([]byte, 0, linux.SizeOfXTStandardTarget)
+ return binary.Marshal(ret, usermem.ByteOrder, target)
+}
+
+// translateStandardVerdict translates verdicts the same way as the iptables
+// tool.
+func translateStandardVerdict(verdict iptables.Verdict) int32 {
+ switch verdict {
+ case iptables.Accept:
+ return -linux.NF_ACCEPT - 1
+ case iptables.Drop:
+ return -linux.NF_DROP - 1
+ case iptables.Queue:
+ return -linux.NF_QUEUE - 1
+ case iptables.Return:
+ return linux.NF_RETURN
+ case iptables.Jump:
+ // TODO(gvisor.dev/issue/170): Support Jump.
+ panic("Jump isn't supported yet")
+ default:
+ panic(fmt.Sprintf("unknown standard verdict: %d", verdict))
+ }
}
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index 8a3f65236..0d0cb68df 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -116,7 +116,7 @@ func (s *SocketOperations) Endpoint() transport.Endpoint {
// extractPath extracts and validates the address.
func extractPath(sockaddr []byte) (string, *syserr.Error) {
- addr, err := epsocket.GetAddress(linux.AF_UNIX, sockaddr, true /* strict */)
+ addr, _, err := epsocket.AddressAndFamily(linux.AF_UNIX, sockaddr, true /* strict */)
if err != nil {
return "", err
}
diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go
index 386b40af7..f779186ad 100644
--- a/pkg/sentry/strace/socket.go
+++ b/pkg/sentry/strace/socket.go
@@ -332,7 +332,7 @@ func sockAddr(t *kernel.Task, addr usermem.Addr, length uint32) string {
switch family {
case linux.AF_INET, linux.AF_INET6, linux.AF_UNIX:
- fa, err := epsocket.GetAddress(int(family), b, true /* strict */)
+ fa, _, err := epsocket.AddressAndFamily(int(family), b, true /* strict */)
if err != nil {
return fmt.Sprintf("%#x {Family: %s, error extracting address: %v}", addr, familyStr, err)
}
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index fc584c6a4..36c8c46fc 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -360,10 +360,9 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, b buffer.Vie
if fragmentOffset == 0 && len(udp) >= header.UDPMinimumSize {
srcPort = udp.SourcePort()
dstPort = udp.DestinationPort()
+ details = fmt.Sprintf("xsum: 0x%x", udp.Checksum())
+ size -= header.UDPMinimumSize
}
- size -= header.UDPMinimumSize
-
- details = fmt.Sprintf("xsum: 0x%x", udp.Checksum())
case header.TCPProtocolNumber:
transName = "tcp"
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index ba2dd85b8..29a6025d9 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -358,6 +358,9 @@ type Endpoint interface {
// ErrAddressFamilyNotSupported must be returned.
Connect(address FullAddress) *Error
+ // Disconnect disconnects the endpoint from its peer.
+ Disconnect() *Error
+
// Shutdown closes the read and/or write end of the endpoint connection
// to its peer.
Shutdown(flags ShutdownFlags) *Error
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index 2e8d5d4bf..451d3880e 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -428,16 +428,16 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress, allowMismatch bool) (t
return netProto, nil
}
+// Disconnect implements tcpip.Endpoint.Disconnect.
+func (*endpoint) Disconnect() *tcpip.Error {
+ return tcpip.ErrNotSupported
+}
+
// Connect connects the endpoint to its peer. Specifying a NIC is optional.
func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
e.mu.Lock()
defer e.mu.Unlock()
- if addr.Addr == "" {
- // AF_UNSPEC isn't supported.
- return tcpip.ErrAddressFamilyNotSupported
- }
-
nicid := addr.NIC
localPort := uint16(0)
switch e.state {
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index 53c9515a4..13e17e2a6 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -349,16 +349,16 @@ func (ep *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error)
return 0, tcpip.ControlMessages{}, nil
}
+// Disconnect implements tcpip.Endpoint.Disconnect.
+func (*endpoint) Disconnect() *tcpip.Error {
+ return tcpip.ErrNotSupported
+}
+
// Connect implements tcpip.Endpoint.Connect.
func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
ep.mu.Lock()
defer ep.mu.Unlock()
- if addr.Addr == "" {
- // AF_UNSPEC isn't supported.
- return tcpip.ErrAddressFamilyNotSupported
- }
-
if ep.closed {
return tcpip.ErrInvalidEndpointState
}
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index e5f835c20..24b32e4af 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -1362,13 +1362,13 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress) (tcpip.NetworkProtocol
return netProto, nil
}
+// Disconnect implements tcpip.Endpoint.Disconnect.
+func (*endpoint) Disconnect() *tcpip.Error {
+ return tcpip.ErrNotSupported
+}
+
// Connect connects the endpoint to its peer.
func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
- if addr.Addr == "" && addr.Port == 0 {
- // AF_UNSPEC isn't supported.
- return tcpip.ErrAddressFamilyNotSupported
- }
-
return e.connect(addr, true, true)
}
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 640bb8667..8b3356406 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -711,7 +711,8 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress, allowMismatch bool) (t
return netProto, nil
}
-func (e *endpoint) disconnect() *tcpip.Error {
+// Disconnect implements tcpip.Endpoint.Disconnect.
+func (e *endpoint) Disconnect() *tcpip.Error {
e.mu.Lock()
defer e.mu.Unlock()
@@ -750,9 +751,6 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
if err != nil {
return err
}
- if addr.Addr == "" {
- return e.disconnect()
- }
if addr.Port == 0 {
// We don't support connecting to port zero.
return tcpip.ErrInvalidEndpointState