diff options
Diffstat (limited to 'pkg/sentry/socket')
-rw-r--r-- | pkg/sentry/socket/control/control.go | 2 | ||||
-rw-r--r-- | pkg/sentry/socket/netfilter/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/socket/netfilter/netfilter.go | 408 | ||||
-rw-r--r-- | pkg/sentry/socket/netlink/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/socket/netlink/port/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/socket/netlink/port/port.go | 3 | ||||
-rw-r--r-- | pkg/sentry/socket/netlink/socket.go | 2 | ||||
-rw-r--r-- | pkg/sentry/socket/netstack/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/socket/netstack/netstack.go | 152 | ||||
-rw-r--r-- | pkg/sentry/socket/rpcinet/conn/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/socket/rpcinet/conn/conn.go | 2 | ||||
-rw-r--r-- | pkg/sentry/socket/rpcinet/notifier/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/socket/rpcinet/notifier/notifier.go | 2 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/transport/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/transport/connectioned.go | 3 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/transport/queue.go | 3 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/transport/unix.go | 2 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/unix.go | 5 |
18 files changed, 469 insertions, 122 deletions
diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index 4301b697c..1684dfc24 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -327,7 +327,7 @@ func PackInq(t *kernel.Task, inq int32, buf []byte) []byte { } // PackTOS packs an IP_TOS socket control message. -func PackTOS(t *kernel.Task, tos int8, buf []byte) []byte { +func PackTOS(t *kernel.Task, tos uint8, buf []byte) []byte { return putCmsgStruct( buf, linux.SOL_IP, diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD index 5eb06bbf4..b70047d81 100644 --- a/pkg/sentry/socket/netfilter/BUILD +++ b/pkg/sentry/socket/netfilter/BUILD @@ -14,6 +14,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/binary", + "//pkg/log", "//pkg/sentry/kernel", "//pkg/sentry/usermem", "//pkg/syserr", diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 9f87c32f1..a9cfc1749 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserr" @@ -35,6 +36,7 @@ const errorTargetName = "ERROR" // metadata is opaque to netstack. It holds data that we need to translate // between Linux's and netstack's iptables representations. +// TODO(gvisor.dev/issue/170): This might be removable. type metadata struct { HookEntry [linux.NF_INET_NUMHOOKS]uint32 Underflow [linux.NF_INET_NUMHOOKS]uint32 @@ -51,7 +53,7 @@ func GetInfo(t *kernel.Task, ep tcpip.Endpoint, outPtr usermem.Addr) (linux.IPTG } // Find the appropriate table. - table, err := findTable(ep, info.TableName()) + table, err := findTable(ep, info.Name) if err != nil { return linux.IPTGetinfo{}, err } @@ -82,30 +84,31 @@ func GetEntries(t *kernel.Task, ep tcpip.Endpoint, outPtr usermem.Addr, outLen i } // Find the appropriate table. - table, err := findTable(ep, userEntries.TableName()) + table, err := findTable(ep, userEntries.Name) if err != nil { return linux.KernelIPTGetEntries{}, err } // Convert netstack's iptables rules to something that the iptables // tool can understand. - entries, _, err := convertNetstackToBinary(userEntries.TableName(), table) + entries, _, err := convertNetstackToBinary(userEntries.Name.String(), table) if err != nil { return linux.KernelIPTGetEntries{}, err } if binary.Size(entries) > uintptr(outLen) { + log.Warningf("Insufficient GetEntries output size: %d", uintptr(outLen)) return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument } return entries, nil } -func findTable(ep tcpip.Endpoint, tableName string) (iptables.Table, *syserr.Error) { +func findTable(ep tcpip.Endpoint, tablename linux.TableName) (iptables.Table, *syserr.Error) { ipt, err := ep.IPTables() if err != nil { return iptables.Table{}, syserr.FromError(err) } - table, ok := ipt.Tables[tableName] + table, ok := ipt.Tables[tablename.String()] if !ok { return iptables.Table{}, syserr.ErrInvalidArgument } @@ -135,110 +138,68 @@ func FillDefaultIPTables(stack *stack.Stack) { // format expected by the iptables tool. Linux stores each table as a binary // blob that can only be traversed by parsing a bit, reading some offsets, // jumping to those offsets, parsing again, etc. -func convertNetstackToBinary(name string, table iptables.Table) (linux.KernelIPTGetEntries, metadata, *syserr.Error) { +func convertNetstackToBinary(tablename string, table iptables.Table) (linux.KernelIPTGetEntries, metadata, *syserr.Error) { // Return values. var entries linux.KernelIPTGetEntries var meta metadata // The table name has to fit in the struct. - if linux.XT_TABLE_MAXNAMELEN < len(name) { + if linux.XT_TABLE_MAXNAMELEN < len(tablename) { + log.Warningf("Table name %q too long.", tablename) return linux.KernelIPTGetEntries{}, metadata{}, syserr.ErrInvalidArgument } - copy(entries.Name[:], name) - - // Deal with the built in chains first (INPUT, OUTPUT, etc.). Each of - // these chains ends with an unconditional policy entry. - for hook := iptables.Prerouting; hook < iptables.NumHooks; hook++ { - chain, ok := table.BuiltinChains[hook] - if !ok { - // This table doesn't support this hook. - continue - } - - // Sanity check. - if len(chain.Rules) < 1 { - return linux.KernelIPTGetEntries{}, metadata{}, syserr.ErrInvalidArgument - } + copy(entries.Name[:], tablename) - for ruleIdx, rule := range chain.Rules { - // If this is the first rule of a builtin chain, set - // the metadata hook entry point. - if ruleIdx == 0 { + for ruleIdx, rule := range table.Rules { + // Is this a chain entry point? + for hook, hookRuleIdx := range table.BuiltinChains { + if hookRuleIdx == ruleIdx { meta.HookEntry[hook] = entries.Size } - - // Each rule corresponds to an entry. - entry := linux.KernelIPTEntry{ - IPTEntry: linux.IPTEntry{ - NextOffset: linux.SizeOfIPTEntry, - TargetOffset: linux.SizeOfIPTEntry, - }, + } + // Is this a chain underflow point? + for underflow, underflowRuleIdx := range table.Underflows { + if underflowRuleIdx == ruleIdx { + meta.Underflow[underflow] = entries.Size } + } - for _, matcher := range rule.Matchers { - // Serialize the matcher and add it to the - // entry. - serialized := marshalMatcher(matcher) - entry.Elems = append(entry.Elems, serialized...) - entry.NextOffset += uint16(len(serialized)) - entry.TargetOffset += uint16(len(serialized)) - } + // Each rule corresponds to an entry. + entry := linux.KernelIPTEntry{ + IPTEntry: linux.IPTEntry{ + NextOffset: linux.SizeOfIPTEntry, + TargetOffset: linux.SizeOfIPTEntry, + }, + } - // Serialize and append the target. - serialized := marshalTarget(rule.Target) + for _, matcher := range rule.Matchers { + // Serialize the matcher and add it to the + // entry. + serialized := marshalMatcher(matcher) entry.Elems = append(entry.Elems, serialized...) entry.NextOffset += uint16(len(serialized)) - - // The underflow rule is the last rule in the chain, - // and is an unconditional rule (i.e. it matches any - // packet). This is enforced when saving iptables. - if ruleIdx == len(chain.Rules)-1 { - meta.Underflow[hook] = entries.Size - } - - entries.Size += uint32(entry.NextOffset) - entries.Entrytable = append(entries.Entrytable, entry) - meta.NumEntries++ + entry.TargetOffset += uint16(len(serialized)) } - } - - // TODO(gvisor.dev/issue/170): Deal with the user chains here. Each of - // these starts with an error node holding the chain's name and ends - // with an unconditional return. + // Serialize and append the target. + serialized := marshalTarget(rule.Target) + entry.Elems = append(entry.Elems, serialized...) + entry.NextOffset += uint16(len(serialized)) - // Lastly, each table ends with an unconditional error target rule as - // its final entry. - errorEntry := linux.KernelIPTEntry{ - IPTEntry: linux.IPTEntry{ - NextOffset: linux.SizeOfIPTEntry, - TargetOffset: linux.SizeOfIPTEntry, - }, + entries.Size += uint32(entry.NextOffset) + entries.Entrytable = append(entries.Entrytable, entry) + meta.NumEntries++ } - var errorTarget linux.XTErrorTarget - errorTarget.Target.TargetSize = linux.SizeOfXTErrorTarget - copy(errorTarget.ErrorName[:], errorTargetName) - copy(errorTarget.Target.Name[:], errorTargetName) - - // Serialize and add it to the list of entries. - errorTargetBuf := make([]byte, 0, linux.SizeOfXTErrorTarget) - serializedErrorTarget := binary.Marshal(errorTargetBuf, usermem.ByteOrder, errorTarget) - errorEntry.Elems = append(errorEntry.Elems, serializedErrorTarget...) - errorEntry.NextOffset += uint16(len(serializedErrorTarget)) - - entries.Size += uint32(errorEntry.NextOffset) - entries.Entrytable = append(entries.Entrytable, errorEntry) - meta.NumEntries++ - meta.Size = entries.Size + meta.Size = entries.Size return entries, meta, nil } func marshalMatcher(matcher iptables.Matcher) []byte { switch matcher.(type) { default: - // TODO(gvisor.dev/issue/170): We don't support any matchers yet, so - // any call to marshalMatcher will panic. + // TODO(gvisor.dev/issue/170): We don't support any matchers + // yet, so any call to marshalMatcher will panic. panic(fmt.Errorf("unknown matcher of type %T", matcher)) } } @@ -246,28 +207,46 @@ func marshalMatcher(matcher iptables.Matcher) []byte { func marshalTarget(target iptables.Target) []byte { switch target.(type) { case iptables.UnconditionalAcceptTarget: - return marshalUnconditionalAcceptTarget() + return marshalStandardTarget(iptables.Accept) + case iptables.UnconditionalDropTarget: + return marshalStandardTarget(iptables.Drop) + case iptables.ErrorTarget: + return marshalErrorTarget() default: panic(fmt.Errorf("unknown target of type %T", target)) } } -func marshalUnconditionalAcceptTarget() []byte { +func marshalStandardTarget(verdict iptables.Verdict) []byte { // The target's name will be the empty string. target := linux.XTStandardTarget{ Target: linux.XTEntryTarget{ TargetSize: linux.SizeOfXTStandardTarget, }, - Verdict: translateStandardVerdict(iptables.Accept), + Verdict: translateFromStandardVerdict(verdict), } ret := make([]byte, 0, linux.SizeOfXTStandardTarget) return binary.Marshal(ret, usermem.ByteOrder, target) } -// translateStandardVerdict translates verdicts the same way as the iptables +func marshalErrorTarget() []byte { + // This is an error target named error + target := linux.XTErrorTarget{ + Target: linux.XTEntryTarget{ + TargetSize: linux.SizeOfXTErrorTarget, + }, + } + copy(target.Name[:], errorTargetName) + copy(target.Target.Name[:], errorTargetName) + + ret := make([]byte, 0, linux.SizeOfXTErrorTarget) + return binary.Marshal(ret, usermem.ByteOrder, target) +} + +// translateFromStandardVerdict translates verdicts the same way as the iptables // tool. -func translateStandardVerdict(verdict iptables.Verdict) int32 { +func translateFromStandardVerdict(verdict iptables.Verdict) int32 { switch verdict { case iptables.Accept: return -linux.NF_ACCEPT - 1 @@ -280,7 +259,258 @@ func translateStandardVerdict(verdict iptables.Verdict) int32 { case iptables.Jump: // TODO(gvisor.dev/issue/170): Support Jump. panic("Jump isn't supported yet") + } + panic(fmt.Sprintf("unknown standard verdict: %d", verdict)) +} + +// translateToStandardVerdict translates from the value in a +// linux.XTStandardTarget to an iptables.Verdict. +func translateToStandardVerdict(val int32) (iptables.Verdict, *syserr.Error) { + // TODO(gvisor.dev/issue/170): Support other verdicts. + switch val { + case -linux.NF_ACCEPT - 1: + return iptables.Accept, nil + case -linux.NF_DROP - 1: + return iptables.Drop, nil + case -linux.NF_QUEUE - 1: + log.Warningf("Unsupported iptables verdict QUEUE.") + case linux.NF_RETURN: + log.Warningf("Unsupported iptables verdict RETURN.") + default: + log.Warningf("Unknown iptables verdict %d.", val) + } + return iptables.Invalid, syserr.ErrInvalidArgument +} + +// SetEntries sets iptables rules for a single table. See +// net/ipv4/netfilter/ip_tables.c:translate_table for reference. +func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { + printReplace(optVal) + + // Get the basic rules data (struct ipt_replace). + if len(optVal) < linux.SizeOfIPTReplace { + log.Warningf("netfilter.SetEntries: optVal has insufficient size for replace %d", len(optVal)) + return syserr.ErrInvalidArgument + } + var replace linux.IPTReplace + replaceBuf := optVal[:linux.SizeOfIPTReplace] + optVal = optVal[linux.SizeOfIPTReplace:] + binary.Unmarshal(replaceBuf, usermem.ByteOrder, &replace) + + // TODO(gvisor.dev/issue/170): Support other tables. + var table iptables.Table + switch replace.Name.String() { + case iptables.TablenameFilter: + table = iptables.EmptyFilterTable() default: - panic(fmt.Sprintf("unknown standard verdict: %d", verdict)) + log.Warningf("We don't yet support writing to the %q table (gvisor.dev/issue/170)", replace.Name.String()) + return syserr.ErrInvalidArgument + } + + // Convert input into a list of rules and their offsets. + var offset uint32 + var offsets []uint32 + for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ { + // Get the struct ipt_entry. + if len(optVal) < linux.SizeOfIPTEntry { + log.Warningf("netfilter: optVal has insufficient size for entry %d", len(optVal)) + return syserr.ErrInvalidArgument + } + var entry linux.IPTEntry + buf := optVal[:linux.SizeOfIPTEntry] + optVal = optVal[linux.SizeOfIPTEntry:] + binary.Unmarshal(buf, usermem.ByteOrder, &entry) + if entry.TargetOffset != linux.SizeOfIPTEntry { + // TODO(gvisor.dev/issue/170): Support matchers. + return syserr.ErrInvalidArgument + } + + // TODO(gvisor.dev/issue/170): We should support IPTIP + // filtering. We reject any nonzero IPTIP values for now. + emptyIPTIP := linux.IPTIP{} + if entry.IP != emptyIPTIP { + log.Warningf("netfilter: non-empty struct iptip found") + return syserr.ErrInvalidArgument + } + + // Get the target of the rule. + target, consumed, err := parseTarget(optVal) + if err != nil { + return err + } + optVal = optVal[consumed:] + + table.Rules = append(table.Rules, iptables.Rule{Target: target}) + offsets = append(offsets, offset) + offset += linux.SizeOfIPTEntry + consumed + } + + // Go through the list of supported hooks for this table and, for each + // one, set the rule it corresponds to. + for hook, _ := range replace.HookEntry { + if table.ValidHooks()&uint32(hook) != 0 { + hk := hookFromLinux(hook) + for ruleIdx, offset := range offsets { + if offset == replace.HookEntry[hook] { + table.BuiltinChains[hk] = ruleIdx + } + if offset == replace.Underflow[hook] { + table.Underflows[hk] = ruleIdx + } + } + if ruleIdx := table.BuiltinChains[hk]; ruleIdx == iptables.HookUnset { + log.Warningf("Hook %v is unset.", hk) + return syserr.ErrInvalidArgument + } + if ruleIdx := table.Underflows[hk]; ruleIdx == iptables.HookUnset { + log.Warningf("Underflow %v is unset.", hk) + return syserr.ErrInvalidArgument + } + } + } + + ipt := stack.IPTables() + table.SetMetadata(metadata{ + HookEntry: replace.HookEntry, + Underflow: replace.Underflow, + NumEntries: replace.NumEntries, + Size: replace.Size, + }) + ipt.Tables[replace.Name.String()] = table + stack.SetIPTables(ipt) + + return nil +} + +// parseTarget parses a target from the start of optVal and returns the target +// along with the number of bytes it occupies in optVal. +func parseTarget(optVal []byte) (iptables.Target, uint32, *syserr.Error) { + if len(optVal) < linux.SizeOfXTEntryTarget { + log.Warningf("netfilter: optVal has insufficient size for entry target %d", len(optVal)) + return nil, 0, syserr.ErrInvalidArgument + } + var target linux.XTEntryTarget + buf := optVal[:linux.SizeOfXTEntryTarget] + binary.Unmarshal(buf, usermem.ByteOrder, &target) + switch target.Name.String() { + case "": + // Standard target. + if len(optVal) < linux.SizeOfXTStandardTarget { + log.Warningf("netfilter.SetEntries: optVal has insufficient size for standard target %d", len(optVal)) + return nil, 0, syserr.ErrInvalidArgument + } + var standardTarget linux.XTStandardTarget + buf = optVal[:linux.SizeOfXTStandardTarget] + binary.Unmarshal(buf, usermem.ByteOrder, &standardTarget) + + verdict, err := translateToStandardVerdict(standardTarget.Verdict) + if err != nil { + return nil, 0, err + } + switch verdict { + case iptables.Accept: + return iptables.UnconditionalAcceptTarget{}, linux.SizeOfXTStandardTarget, nil + case iptables.Drop: + // TODO(gvisor.dev/issue/170): Return an + // iptables.UnconditionalDropTarget to support DROP. + log.Infof("netfilter DROP is not supported yet.") + return nil, 0, syserr.ErrInvalidArgument + default: + panic(fmt.Sprintf("Unknown verdict: %v", verdict)) + } + + case errorTargetName: + // Error target. + if len(optVal) < linux.SizeOfXTErrorTarget { + log.Infof("netfilter.SetEntries: optVal has insufficient size for error target %d", len(optVal)) + return nil, 0, syserr.ErrInvalidArgument + } + var errorTarget linux.XTErrorTarget + buf = optVal[:linux.SizeOfXTErrorTarget] + binary.Unmarshal(buf, usermem.ByteOrder, &errorTarget) + + // Error targets are used in 2 cases: + // * An actual error case. These rules have an error + // named errorTargetName. The last entry of the table + // is usually an error case to catch any packets that + // somehow fall through every rule. + // * To mark the start of a user defined chain. These + // rules have an error with the name of the chain. + switch errorTarget.Name.String() { + case errorTargetName: + return iptables.ErrorTarget{}, linux.SizeOfXTErrorTarget, nil + default: + log.Infof("Unknown error target %q doesn't exist or isn't supported yet.", errorTarget.Name.String()) + return nil, 0, syserr.ErrInvalidArgument + } + } + + // Unknown target. + log.Infof("Unknown target %q doesn't exist or isn't supported yet.", target.Name.String()) + return nil, 0, syserr.ErrInvalidArgument +} + +func hookFromLinux(hook int) iptables.Hook { + switch hook { + case linux.NF_INET_PRE_ROUTING: + return iptables.Prerouting + case linux.NF_INET_LOCAL_IN: + return iptables.Input + case linux.NF_INET_FORWARD: + return iptables.Forward + case linux.NF_INET_LOCAL_OUT: + return iptables.Output + case linux.NF_INET_POST_ROUTING: + return iptables.Postrouting + } + panic(fmt.Sprintf("Unknown hook %d does not correspond to a builtin chain", hook)) +} + +// printReplace prints information about the struct ipt_replace in optVal. It +// is only for debugging. +func printReplace(optVal []byte) { + // Basic replace info. + var replace linux.IPTReplace + replaceBuf := optVal[:linux.SizeOfIPTReplace] + optVal = optVal[linux.SizeOfIPTReplace:] + binary.Unmarshal(replaceBuf, usermem.ByteOrder, &replace) + log.Infof("Replacing table %q: %+v", replace.Name.String(), replace) + + // Read in the list of entries at the end of replace. + var totalOffset uint16 + for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ { + var entry linux.IPTEntry + entryBuf := optVal[:linux.SizeOfIPTEntry] + binary.Unmarshal(entryBuf, usermem.ByteOrder, &entry) + log.Infof("Entry %d (total offset %d): %+v", entryIdx, totalOffset, entry) + + totalOffset += entry.NextOffset + if entry.TargetOffset == linux.SizeOfIPTEntry { + log.Infof("Entry has no matches.") + } else { + log.Infof("Entry has matches.") + } + + var target linux.XTEntryTarget + targetBuf := optVal[entry.TargetOffset : entry.TargetOffset+linux.SizeOfXTEntryTarget] + binary.Unmarshal(targetBuf, usermem.ByteOrder, &target) + log.Infof("Target named %q: %+v", target.Name.String(), target) + + switch target.Name.String() { + case "": + var standardTarget linux.XTStandardTarget + stBuf := optVal[entry.TargetOffset : entry.TargetOffset+linux.SizeOfXTStandardTarget] + binary.Unmarshal(stBuf, usermem.ByteOrder, &standardTarget) + log.Infof("Standard target with verdict %q (%d).", linux.VerdictStrings[standardTarget.Verdict], standardTarget.Verdict) + case errorTargetName: + var errorTarget linux.XTErrorTarget + etBuf := optVal[entry.TargetOffset : entry.TargetOffset+linux.SizeOfXTErrorTarget] + binary.Unmarshal(etBuf, usermem.ByteOrder, &errorTarget) + log.Infof("Error target with name %q.", errorTarget.Name.String()) + default: + log.Infof("Unknown target type.") + } + + optVal = optVal[entry.NextOffset:] } } diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 136821963..103933144 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -27,6 +27,7 @@ go_library( "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", diff --git a/pkg/sentry/socket/netlink/port/BUILD b/pkg/sentry/socket/netlink/port/BUILD index 463544c1a..2d9f4ba9b 100644 --- a/pkg/sentry/socket/netlink/port/BUILD +++ b/pkg/sentry/socket/netlink/port/BUILD @@ -8,6 +8,7 @@ go_library( srcs = ["port.go"], importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port", visibility = ["//pkg/sentry:internal"], + deps = ["//pkg/sync"], ) go_test( diff --git a/pkg/sentry/socket/netlink/port/port.go b/pkg/sentry/socket/netlink/port/port.go index e9d3275b1..2cd3afc22 100644 --- a/pkg/sentry/socket/netlink/port/port.go +++ b/pkg/sentry/socket/netlink/port/port.go @@ -24,7 +24,8 @@ import ( "fmt" "math" "math/rand" - "sync" + + "gvisor.dev/gvisor/pkg/sync" ) // maxPorts is a sanity limit on the maximum number of ports to allocate per diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index d2e3644a6..cea56f4ed 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -17,7 +17,6 @@ package netlink import ( "math" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" @@ -34,6 +33,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index e414d8055..f78784569 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -34,6 +34,7 @@ go_library( "//pkg/sentry/socket/netfilter", "//pkg/sentry/unimpl", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 9e0d69046..d2f7e987d 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -29,7 +29,6 @@ import ( "io" "math" "reflect" - "sync" "syscall" "time" @@ -49,6 +48,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" @@ -324,22 +324,15 @@ func bytesToIPAddress(addr []byte) tcpip.Address { // converts it to the FullAddress format. It supports AF_UNIX, AF_INET, // AF_INET6, and AF_PACKET addresses. // -// strict indicates whether addresses with the AF_UNSPEC family are accepted of not. -// // AddressAndFamily returns an address and its family. -func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, uint16, *syserr.Error) { +func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) { // Make sure we have at least 2 bytes for the address family. if len(addr) < 2 { return tcpip.FullAddress{}, 0, syserr.ErrInvalidArgument } - family := usermem.ByteOrder.Uint16(addr) - if family != uint16(sfamily) && (strict || family != linux.AF_UNSPEC) { - return tcpip.FullAddress{}, family, syserr.ErrAddressFamilyNotSupported - } - // Get the rest of the fields based on the address family. - switch family { + switch family := usermem.ByteOrder.Uint16(addr); family { case linux.AF_UNIX: path := addr[2:] if len(path) > linux.UnixPathMax { @@ -638,10 +631,40 @@ func (s *SocketOperations) Readiness(mask waiter.EventMask) waiter.EventMask { return r } +func (s *SocketOperations) checkFamily(family uint16, exact bool) *syserr.Error { + if family == uint16(s.family) { + return nil + } + if !exact && family == linux.AF_INET && s.family == linux.AF_INET6 { + v, err := s.Endpoint.GetSockOptBool(tcpip.V6OnlyOption) + if err != nil { + return syserr.TranslateNetstackError(err) + } + if !v { + return nil + } + } + return syserr.ErrInvalidArgument +} + +// mapFamily maps the AF_INET ANY address to the IPv4-mapped IPv6 ANY if the +// receiver's family is AF_INET6. +// +// This is a hack to work around the fact that both IPv4 and IPv6 ANY are +// represented by the empty string. +// +// TODO(gvisor.dev/issues/1556): remove this function. +func (s *SocketOperations) mapFamily(addr tcpip.FullAddress, family uint16) tcpip.FullAddress { + if len(addr.Addr) == 0 && s.family == linux.AF_INET6 && family == linux.AF_INET { + addr.Addr = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\x00\x00\x00\x00" + } + return addr +} + // Connect implements the linux syscall connect(2) for sockets backed by // tpcip.Endpoint. func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error { - addr, family, err := AddressAndFamily(s.family, sockaddr, false /* strict */) + addr, family, err := AddressAndFamily(sockaddr) if err != nil { return err } @@ -653,6 +676,12 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo } return syserr.TranslateNetstackError(err) } + + if err := s.checkFamily(family, false /* exact */); err != nil { + return err + } + addr = s.mapFamily(addr, family) + // Always return right away in the non-blocking case. if !blocking { return syserr.TranslateNetstackError(s.Endpoint.Connect(addr)) @@ -681,10 +710,14 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo // Bind implements the linux syscall bind(2) for sockets backed by // tcpip.Endpoint. func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { - addr, _, err := AddressAndFamily(s.family, sockaddr, true /* strict */) + addr, family, err := AddressAndFamily(sockaddr) if err != nil { return err } + if err := s.checkFamily(family, true /* exact */); err != nil { + return err + } + addr = s.mapFamily(addr, family) // Issue the bind request to the endpoint. return syserr.TranslateNetstackError(s.Endpoint.Bind(addr)) @@ -985,13 +1018,23 @@ func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family if err := ep.GetSockOpt(&v); err != nil { return nil, syserr.TranslateNetstackError(err) } - if len(v) == 0 { + if v == 0 { return []byte{}, nil } if outLen < linux.IFNAMSIZ { return nil, syserr.ErrInvalidArgument } - return append([]byte(v), 0), nil + s := t.NetworkContext() + if s == nil { + return nil, syserr.ErrNoDevice + } + nic, ok := s.Interfaces()[int32(v)] + if !ok { + // The NICID no longer indicates a valid interface, probably because that + // interface was removed. + return nil, syserr.ErrUnknownDevice + } + return append([]byte(nic.Name), 0), nil case linux.SO_BROADCAST: if outLen < sizeOfInt32 { @@ -1225,11 +1268,11 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (interf if err != nil { return nil, syserr.TranslateNetstackError(err) } - var o uint32 + var o int32 if v { o = 1 } - return int32(o), nil + return o, nil case linux.IPV6_PATHMTU: t.Kernel().EmitUnimplementedEvent(t) @@ -1334,6 +1377,21 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in } return int32(v), nil + case linux.IP_RECVTOS: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + v, err := ep.GetSockOptBool(tcpip.ReceiveTOSOption) + if err != nil { + return nil, syserr.TranslateNetstackError(err) + } + var o int32 + if v { + o = 1 + } + return o, nil + default: emitUnimplementedEventIP(t, name) } @@ -1367,6 +1425,26 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa return nil } + if s.skType == linux.SOCK_RAW && level == linux.IPPROTO_IP { + switch name { + case linux.IPT_SO_SET_REPLACE: + if len(optVal) < linux.SizeOfIPTReplace { + return syserr.ErrInvalidArgument + } + + stack := inet.StackFromContext(t) + if stack == nil { + return syserr.ErrNoDevice + } + // Stack must be a netstack stack. + return netfilter.SetEntries(stack.(*Stack).Stack, optVal) + + case linux.IPT_SO_SET_ADD_COUNTERS: + // TODO(gvisor.dev/issue/170): Counter support. + return nil + } + } + return SetSockOpt(t, s, s.Endpoint, level, name, optVal) } @@ -1438,7 +1516,20 @@ func setSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name i if n == -1 { n = len(optVal) } - return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.BindToDeviceOption(optVal[:n]))) + name := string(optVal[:n]) + if name == "" { + return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.BindToDeviceOption(0))) + } + s := t.NetworkContext() + if s == nil { + return syserr.ErrNoDevice + } + for nicID, nic := range s.Interfaces() { + if nic.Name == name { + return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.BindToDeviceOption(nicID))) + } + } + return syserr.ErrUnknownDevice case linux.SO_BROADCAST: if len(optVal) < sizeOfInt32 { @@ -1819,6 +1910,13 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s } return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.IPv4TOSOption(v))) + case linux.IP_RECVTOS: + v, err := parseIntOrChar(optVal) + if err != nil { + return err + } + return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveTOSOption, v != 0)) + case linux.IP_ADD_SOURCE_MEMBERSHIP, linux.IP_BIND_ADDRESS_NO_PORT, linux.IP_BLOCK_SOURCE, @@ -1839,7 +1937,6 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s linux.IP_RECVFRAGSIZE, linux.IP_RECVOPTS, linux.IP_RECVORIGDSTADDR, - linux.IP_RECVTOS, linux.IP_RECVTTL, linux.IP_RETOPTS, linux.IP_TRANSPARENT, @@ -2037,8 +2134,8 @@ func ConvertAddress(family int, addr tcpip.FullAddress) (linux.SockAddr, uint32) case linux.AF_INET6: var out linux.SockAddrInet6 - if len(addr.Addr) == 4 { - // Copy address is v4-mapped format. + if len(addr.Addr) == header.IPv4AddressSize { + // Copy address in v4-mapped format. copy(out.Addr[12:], addr.Addr) out.Addr[10] = 0xff out.Addr[11] = 0xff @@ -2259,7 +2356,14 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe } func (s *SocketOperations) controlMessages() socket.ControlMessages { - return socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.readCM.HasTimestamp && s.sockOptTimestamp, Timestamp: s.readCM.Timestamp}} + return socket.ControlMessages{ + IP: tcpip.ControlMessages{ + HasTimestamp: s.readCM.HasTimestamp && s.sockOptTimestamp, + Timestamp: s.readCM.Timestamp, + HasTOS: s.readCM.HasTOS, + TOS: s.readCM.TOS, + }, + } } // updateTimestamp sets the timestamp for SIOCGSTAMP. It should be called after @@ -2352,10 +2456,14 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to [] var addr *tcpip.FullAddress if len(to) > 0 { - addrBuf, _, err := AddressAndFamily(s.family, to, true /* strict */) + addrBuf, family, err := AddressAndFamily(to) if err != nil { return 0, err } + if err := s.checkFamily(family, false /* exact */); err != nil { + return 0, err + } + addrBuf = s.mapFamily(addrBuf, family) addr = &addrBuf } diff --git a/pkg/sentry/socket/rpcinet/conn/BUILD b/pkg/sentry/socket/rpcinet/conn/BUILD index 23eadcb1b..b2677c659 100644 --- a/pkg/sentry/socket/rpcinet/conn/BUILD +++ b/pkg/sentry/socket/rpcinet/conn/BUILD @@ -10,6 +10,7 @@ go_library( deps = [ "//pkg/binary", "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto", + "//pkg/sync", "//pkg/syserr", "//pkg/unet", "@com_github_golang_protobuf//proto:go_default_library", diff --git a/pkg/sentry/socket/rpcinet/conn/conn.go b/pkg/sentry/socket/rpcinet/conn/conn.go index 356adad99..02f39c767 100644 --- a/pkg/sentry/socket/rpcinet/conn/conn.go +++ b/pkg/sentry/socket/rpcinet/conn/conn.go @@ -17,12 +17,12 @@ package conn import ( "fmt" - "sync" "sync/atomic" "syscall" "github.com/golang/protobuf/proto" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/unet" diff --git a/pkg/sentry/socket/rpcinet/notifier/BUILD b/pkg/sentry/socket/rpcinet/notifier/BUILD index a3585e10d..a5954f22b 100644 --- a/pkg/sentry/socket/rpcinet/notifier/BUILD +++ b/pkg/sentry/socket/rpcinet/notifier/BUILD @@ -10,6 +10,7 @@ go_library( deps = [ "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto", "//pkg/sentry/socket/rpcinet/conn", + "//pkg/sync", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/pkg/sentry/socket/rpcinet/notifier/notifier.go b/pkg/sentry/socket/rpcinet/notifier/notifier.go index 7efe4301f..82b75d6dd 100644 --- a/pkg/sentry/socket/rpcinet/notifier/notifier.go +++ b/pkg/sentry/socket/rpcinet/notifier/notifier.go @@ -17,12 +17,12 @@ package notifier import ( "fmt" - "sync" "syscall" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn" pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/socket/unix/transport/BUILD b/pkg/sentry/socket/unix/transport/BUILD index 788ad70d2..d7ba95dff 100644 --- a/pkg/sentry/socket/unix/transport/BUILD +++ b/pkg/sentry/socket/unix/transport/BUILD @@ -32,6 +32,7 @@ go_library( "//pkg/ilist", "//pkg/refs", "//pkg/sentry/context", + "//pkg/sync", "//pkg/syserr", "//pkg/tcpip", "//pkg/tcpip/buffer", diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go index dea11e253..9e6fbc111 100644 --- a/pkg/sentry/socket/unix/transport/connectioned.go +++ b/pkg/sentry/socket/unix/transport/connectioned.go @@ -15,10 +15,9 @@ package transport import ( - "sync" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/waiter" diff --git a/pkg/sentry/socket/unix/transport/queue.go b/pkg/sentry/socket/unix/transport/queue.go index e27b1c714..5dcd3d95e 100644 --- a/pkg/sentry/socket/unix/transport/queue.go +++ b/pkg/sentry/socket/unix/transport/queue.go @@ -15,9 +15,8 @@ package transport import ( - "sync" - "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go index 37c7ac3c1..fcc0da332 100644 --- a/pkg/sentry/socket/unix/transport/unix.go +++ b/pkg/sentry/socket/unix/transport/unix.go @@ -16,11 +16,11 @@ package transport import ( - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 91effe89a..7f49ba864 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -116,13 +116,16 @@ func (s *SocketOperations) Endpoint() transport.Endpoint { // extractPath extracts and validates the address. func extractPath(sockaddr []byte) (string, *syserr.Error) { - addr, _, err := netstack.AddressAndFamily(linux.AF_UNIX, sockaddr, true /* strict */) + addr, family, err := netstack.AddressAndFamily(sockaddr) if err != nil { if err == syserr.ErrAddressFamilyNotSupported { err = syserr.ErrInvalidArgument } return "", err } + if family != linux.AF_UNIX { + return "", syserr.ErrInvalidArgument + } // The address is trimmed by GetAddress. p := string(addr.Addr) |