diff options
Diffstat (limited to 'pkg/sentry/socket')
21 files changed, 430 insertions, 143 deletions
diff --git a/pkg/sentry/socket/control/control_vfs2.go b/pkg/sentry/socket/control/control_vfs2.go index d9621968c..37d02948f 100644 --- a/pkg/sentry/socket/control/control_vfs2.go +++ b/pkg/sentry/socket/control/control_vfs2.go @@ -24,6 +24,8 @@ import ( ) // SCMRightsVFS2 represents a SCM_RIGHTS socket control message. +// +// +stateify savable type SCMRightsVFS2 interface { transport.RightsControlMessage @@ -34,9 +36,11 @@ type SCMRightsVFS2 interface { Files(ctx context.Context, max int) (rf RightsFilesVFS2, truncated bool) } -// RightsFiles represents a SCM_RIGHTS socket control message. A reference is -// maintained for each vfs.FileDescription and is release either when an FD is created or -// when the Release method is called. +// RightsFilesVFS2 represents a SCM_RIGHTS socket control message. A reference +// is maintained for each vfs.FileDescription and is release either when an FD +// is created or when the Release method is called. +// +// +stateify savable type RightsFilesVFS2 []*vfs.FileDescription // NewSCMRightsVFS2 creates a new SCM_RIGHTS socket control message diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go index 163af329b..9a2cac40b 100644 --- a/pkg/sentry/socket/hostinet/socket_vfs2.go +++ b/pkg/sentry/socket/hostinet/socket_vfs2.go @@ -33,6 +33,7 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// +stateify savable type socketVFS2 struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl @@ -51,7 +52,7 @@ var _ = socket.SocketVFS2(&socketVFS2{}) func newVFS2Socket(t *kernel.Task, family int, stype linux.SockType, protocol int, fd int, flags uint32) (*vfs.FileDescription, *syserr.Error) { mnt := t.Kernel().SocketMount() - d := sockfs.NewDentry(t.Credentials(), mnt) + d := sockfs.NewDentry(t, mnt) defer d.DecRef(t) s := &socketVFS2{ diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go index faa61160e..7e7857ac3 100644 --- a/pkg/sentry/socket/hostinet/stack.go +++ b/pkg/sentry/socket/hostinet/stack.go @@ -324,7 +324,12 @@ func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { } // AddInterfaceAddr implements inet.Stack.AddInterfaceAddr. -func (s *Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error { +func (s *Stack) AddInterfaceAddr(int32, inet.InterfaceAddr) error { + return syserror.EACCES +} + +// RemoveInterfaceAddr implements inet.Stack.RemoveInterfaceAddr. +func (s *Stack) RemoveInterfaceAddr(int32, inet.InterfaceAddr) error { return syserror.EACCES } @@ -359,7 +364,7 @@ func (s *Stack) TCPSACKEnabled() (bool, error) { } // SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled. -func (s *Stack) SetTCPSACKEnabled(enabled bool) error { +func (s *Stack) SetTCPSACKEnabled(bool) error { return syserror.EACCES } @@ -369,7 +374,7 @@ func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) { } // SetTCPRecovery implements inet.Stack.SetTCPRecovery. -func (s *Stack) SetTCPRecovery(recovery inet.TCPLossRecovery) error { +func (s *Stack) SetTCPRecovery(inet.TCPLossRecovery) error { return syserror.EACCES } @@ -430,18 +435,18 @@ func (s *Stack) Statistics(stat interface{}, arg string) error { } if rawLine == "" { - return fmt.Errorf("Failed to get raw line") + return fmt.Errorf("failed to get raw line") } parts := strings.SplitN(rawLine, ":", 2) if len(parts) != 2 { - return fmt.Errorf("Failed to get prefix from: %q", rawLine) + return fmt.Errorf("failed to get prefix from: %q", rawLine) } sliceStat = toSlice(stat) fields := strings.Fields(strings.TrimSpace(parts[1])) if len(fields) != len(sliceStat) { - return fmt.Errorf("Failed to parse fields: %q", rawLine) + return fmt.Errorf("failed to parse fields: %q", rawLine) } if _, ok := stat.(*inet.StatSNMPTCP); ok { snmpTCP = true @@ -457,7 +462,7 @@ func (s *Stack) Statistics(stat interface{}, arg string) error { sliceStat[i], err = strconv.ParseUint(fields[i], 10, 64) } if err != nil { - return fmt.Errorf("Failed to parse field %d from: %q, %v", i, rawLine, err) + return fmt.Errorf("failed to parse field %d from: %q, %v", i, rawLine, err) } } @@ -495,6 +500,6 @@ func (s *Stack) Forwarding(protocol tcpip.NetworkProtocolNumber) bool { } // SetForwarding implements inet.Stack.SetForwarding. -func (s *Stack) SetForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) error { +func (s *Stack) SetForwarding(tcpip.NetworkProtocolNumber, bool) error { return syserror.EACCES } diff --git a/pkg/sentry/socket/netfilter/extensions.go b/pkg/sentry/socket/netfilter/extensions.go index 549787955..e0976fed0 100644 --- a/pkg/sentry/socket/netfilter/extensions.go +++ b/pkg/sentry/socket/netfilter/extensions.go @@ -100,24 +100,43 @@ func unmarshalMatcher(match linux.XTEntryMatch, filter stack.IPHeaderFilter, buf // marshalTarget and unmarshalTarget can be used. type targetMaker interface { // id uniquely identifies the target. - id() stack.TargetID + id() targetID - // marshal converts from a stack.Target to an ABI struct. - marshal(target stack.Target) []byte + // marshal converts from a target to an ABI struct. + marshal(target target) []byte - // unmarshal converts from the ABI matcher struct to a stack.Target. - unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Target, *syserr.Error) + // unmarshal converts from the ABI matcher struct to a target. + unmarshal(buf []byte, filter stack.IPHeaderFilter) (target, *syserr.Error) } -// targetMakers maps the TargetID of supported targets to the targetMaker that +// A targetID uniquely identifies a target. +type targetID struct { + // name is the target name as stored in the xt_entry_target struct. + name string + + // networkProtocol is the protocol to which the target applies. + networkProtocol tcpip.NetworkProtocolNumber + + // revision is the version of the target. + revision uint8 +} + +// target extends a stack.Target, allowing it to be used with the extension +// system. The sentry only uses targets, never stack.Targets directly. +type target interface { + stack.Target + id() targetID +} + +// targetMakers maps the targetID of supported targets to the targetMaker that // marshals and unmarshals it. It is immutable after package initialization. -var targetMakers = map[stack.TargetID]targetMaker{} +var targetMakers = map[targetID]targetMaker{} func targetRevision(name string, netProto tcpip.NetworkProtocolNumber, rev uint8) (uint8, bool) { - tid := stack.TargetID{ - Name: name, - NetworkProtocol: netProto, - Revision: rev, + tid := targetID{ + name: name, + networkProtocol: netProto, + revision: rev, } if _, ok := targetMakers[tid]; !ok { return 0, false @@ -126,8 +145,8 @@ func targetRevision(name string, netProto tcpip.NetworkProtocolNumber, rev uint8 // Return the highest supported revision unless rev is higher. for _, other := range targetMakers { otherID := other.id() - if name == otherID.Name && netProto == otherID.NetworkProtocol && otherID.Revision > rev { - rev = uint8(otherID.Revision) + if name == otherID.name && netProto == otherID.networkProtocol && otherID.revision > rev { + rev = uint8(otherID.revision) } } return rev, true @@ -142,19 +161,21 @@ func registerTargetMaker(tm targetMaker) { targetMakers[tm.id()] = tm } -func marshalTarget(target stack.Target) []byte { - targetMaker, ok := targetMakers[target.ID()] +func marshalTarget(tgt stack.Target) []byte { + // The sentry only uses targets, never stack.Targets directly. + target := tgt.(target) + targetMaker, ok := targetMakers[target.id()] if !ok { - panic(fmt.Sprintf("unknown target of type %T with id %+v.", target, target.ID())) + panic(fmt.Sprintf("unknown target of type %T with id %+v.", target, target.id())) } return targetMaker.marshal(target) } -func unmarshalTarget(target linux.XTEntryTarget, filter stack.IPHeaderFilter, buf []byte) (stack.Target, *syserr.Error) { - tid := stack.TargetID{ - Name: target.Name.String(), - NetworkProtocol: filter.NetworkProtocol(), - Revision: target.Revision, +func unmarshalTarget(target linux.XTEntryTarget, filter stack.IPHeaderFilter, buf []byte) (target, *syserr.Error) { + tid := targetID{ + name: target.Name.String(), + networkProtocol: filter.NetworkProtocol(), + revision: target.Revision, } targetMaker, ok := targetMakers[tid] if !ok { diff --git a/pkg/sentry/socket/netfilter/ipv4.go b/pkg/sentry/socket/netfilter/ipv4.go index b560fae0d..70c561cce 100644 --- a/pkg/sentry/socket/netfilter/ipv4.go +++ b/pkg/sentry/socket/netfilter/ipv4.go @@ -46,13 +46,13 @@ func convertNetstackToBinary4(stk *stack.Stack, tablename linux.TableName) (linu return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename) } - table, ok := stk.IPTables().GetTable(tablename.String(), false) + id, ok := nameToID[tablename.String()] if !ok { return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename) } // Setup the info struct. - entries, info := getEntries4(table, tablename) + entries, info := getEntries4(stk.IPTables().GetTable(id, false), tablename) return entries, info, nil } diff --git a/pkg/sentry/socket/netfilter/ipv6.go b/pkg/sentry/socket/netfilter/ipv6.go index 4253f7bf4..5dbb604f0 100644 --- a/pkg/sentry/socket/netfilter/ipv6.go +++ b/pkg/sentry/socket/netfilter/ipv6.go @@ -46,13 +46,13 @@ func convertNetstackToBinary6(stk *stack.Stack, tablename linux.TableName) (linu return linux.KernelIP6TGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename) } - table, ok := stk.IPTables().GetTable(tablename.String(), true) + id, ok := nameToID[tablename.String()] if !ok { return linux.KernelIP6TGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename) } // Setup the info struct, which is the same in IPv4 and IPv6. - entries, info := getEntries6(table, tablename) + entries, info := getEntries6(stk.IPTables().GetTable(id, true), tablename) return entries, info, nil } diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 904a12e38..b283d7229 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -42,6 +42,45 @@ func nflog(format string, args ...interface{}) { } } +// Table names. +const ( + natTable = "nat" + mangleTable = "mangle" + filterTable = "filter" +) + +// nameToID is immutable. +var nameToID = map[string]stack.TableID{ + natTable: stack.NATID, + mangleTable: stack.MangleID, + filterTable: stack.FilterID, +} + +// DefaultLinuxTables returns the rules of stack.DefaultTables() wrapped for +// compatibility with netfilter extensions. +func DefaultLinuxTables() *stack.IPTables { + tables := stack.DefaultTables() + tables.VisitTargets(func(oldTarget stack.Target) stack.Target { + switch val := oldTarget.(type) { + case *stack.AcceptTarget: + return &acceptTarget{AcceptTarget: *val} + case *stack.DropTarget: + return &dropTarget{DropTarget: *val} + case *stack.ErrorTarget: + return &errorTarget{ErrorTarget: *val} + case *stack.UserChainTarget: + return &userChainTarget{UserChainTarget: *val} + case *stack.ReturnTarget: + return &returnTarget{ReturnTarget: *val} + case *stack.RedirectTarget: + return &redirectTarget{RedirectTarget: *val} + default: + panic(fmt.Sprintf("Unknown rule in default iptables of type %T", val)) + } + }) + return tables +} + // GetInfo returns information about iptables. func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, ipv6 bool) (linux.IPTGetinfo, *syserr.Error) { // Read in the struct and table name. @@ -144,9 +183,9 @@ func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error { // TODO(gvisor.dev/issue/170): Support other tables. var table stack.Table switch replace.Name.String() { - case stack.FilterTable: + case filterTable: table = stack.EmptyFilterTable() - case stack.NATTable: + case natTable: table = stack.EmptyNATTable() default: nflog("we don't yet support writing to the %q table (gvisor.dev/issue/170)", replace.Name.String()) @@ -177,7 +216,7 @@ func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error { } if offset == replace.Underflow[hook] { if !validUnderflow(table.Rules[ruleIdx], ipv6) { - nflog("underflow for hook %d isn't an unconditional ACCEPT or DROP", ruleIdx) + nflog("underflow for hook %d isn't an unconditional ACCEPT or DROP: %+v", ruleIdx) return syserr.ErrInvalidArgument } table.Underflows[hk] = ruleIdx @@ -253,8 +292,7 @@ func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error { // - There are no chains without an unconditional final rule. // - There are no chains without an unconditional underflow rule. - return syserr.TranslateNetstackError(stk.IPTables().ReplaceTable(replace.Name.String(), table, ipv6)) - + return syserr.TranslateNetstackError(stk.IPTables().ReplaceTable(nameToID[replace.Name.String()], table, ipv6)) } // parseMatchers parses 0 or more matchers from optVal. optVal should contain @@ -308,7 +346,7 @@ func validUnderflow(rule stack.Rule, ipv6 bool) bool { return false } switch rule.Target.(type) { - case *stack.AcceptTarget, *stack.DropTarget: + case *acceptTarget, *dropTarget: return true default: return false @@ -319,7 +357,7 @@ func isUnconditionalAccept(rule stack.Rule, ipv6 bool) bool { if !validUnderflow(rule, ipv6) { return false } - _, ok := rule.Target.(*stack.AcceptTarget) + _, ok := rule.Target.(*acceptTarget) return ok } diff --git a/pkg/sentry/socket/netfilter/targets.go b/pkg/sentry/socket/netfilter/targets.go index 0e14447fe..f2653d523 100644 --- a/pkg/sentry/socket/netfilter/targets.go +++ b/pkg/sentry/socket/netfilter/targets.go @@ -26,6 +26,15 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) +// ErrorTargetName is used to mark targets as error targets. Error targets +// shouldn't be reached - an error has occurred if we fall through to one. +const ErrorTargetName = "ERROR" + +// RedirectTargetName is used to mark targets as redirect targets. Redirect +// targets should be reached for only NAT and Mangle tables. These targets will +// change the destination port and/or IP for packets. +const RedirectTargetName = "REDIRECT" + func init() { // Standard targets include ACCEPT, DROP, RETURN, and JUMP. registerTargetMaker(&standardTargetMaker{ @@ -52,25 +61,96 @@ func init() { }) } +// The stack package provides some basic, useful targets for us. The following +// types wrap them for compatibility with the extension system. + +type acceptTarget struct { + stack.AcceptTarget +} + +func (at *acceptTarget) id() targetID { + return targetID{ + networkProtocol: at.NetworkProtocol, + } +} + +type dropTarget struct { + stack.DropTarget +} + +func (dt *dropTarget) id() targetID { + return targetID{ + networkProtocol: dt.NetworkProtocol, + } +} + +type errorTarget struct { + stack.ErrorTarget +} + +func (et *errorTarget) id() targetID { + return targetID{ + name: ErrorTargetName, + networkProtocol: et.NetworkProtocol, + } +} + +type userChainTarget struct { + stack.UserChainTarget +} + +func (uc *userChainTarget) id() targetID { + return targetID{ + name: ErrorTargetName, + networkProtocol: uc.NetworkProtocol, + } +} + +type returnTarget struct { + stack.ReturnTarget +} + +func (rt *returnTarget) id() targetID { + return targetID{ + networkProtocol: rt.NetworkProtocol, + } +} + +type redirectTarget struct { + stack.RedirectTarget + + // addr must be (un)marshalled when reading and writing the target to + // userspace, but does not affect behavior. + addr tcpip.Address +} + +func (rt *redirectTarget) id() targetID { + return targetID{ + name: RedirectTargetName, + networkProtocol: rt.NetworkProtocol, + } +} + type standardTargetMaker struct { NetworkProtocol tcpip.NetworkProtocolNumber } -func (sm *standardTargetMaker) id() stack.TargetID { +func (sm *standardTargetMaker) id() targetID { // Standard targets have the empty string as a name and no revisions. - return stack.TargetID{ - NetworkProtocol: sm.NetworkProtocol, + return targetID{ + networkProtocol: sm.NetworkProtocol, } } -func (*standardTargetMaker) marshal(target stack.Target) []byte { + +func (*standardTargetMaker) marshal(target target) []byte { // Translate verdicts the same way as the iptables tool. var verdict int32 switch tg := target.(type) { - case *stack.AcceptTarget: + case *acceptTarget: verdict = -linux.NF_ACCEPT - 1 - case *stack.DropTarget: + case *dropTarget: verdict = -linux.NF_DROP - 1 - case *stack.ReturnTarget: + case *returnTarget: verdict = linux.NF_RETURN case *JumpTarget: verdict = int32(tg.Offset) @@ -90,7 +170,7 @@ func (*standardTargetMaker) marshal(target stack.Target) []byte { return binary.Marshal(ret, usermem.ByteOrder, xt) } -func (*standardTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Target, *syserr.Error) { +func (*standardTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (target, *syserr.Error) { if len(buf) != linux.SizeOfXTStandardTarget { nflog("buf has wrong size for standard target %d", len(buf)) return nil, syserr.ErrInvalidArgument @@ -114,20 +194,20 @@ type errorTargetMaker struct { NetworkProtocol tcpip.NetworkProtocolNumber } -func (em *errorTargetMaker) id() stack.TargetID { +func (em *errorTargetMaker) id() targetID { // Error targets have no revision. - return stack.TargetID{ - Name: stack.ErrorTargetName, - NetworkProtocol: em.NetworkProtocol, + return targetID{ + name: ErrorTargetName, + networkProtocol: em.NetworkProtocol, } } -func (*errorTargetMaker) marshal(target stack.Target) []byte { +func (*errorTargetMaker) marshal(target target) []byte { var errorName string switch tg := target.(type) { - case *stack.ErrorTarget: - errorName = stack.ErrorTargetName - case *stack.UserChainTarget: + case *errorTarget: + errorName = ErrorTargetName + case *userChainTarget: errorName = tg.Name default: panic(fmt.Sprintf("errorMakerTarget cannot marshal unknown type %T", target)) @@ -140,37 +220,38 @@ func (*errorTargetMaker) marshal(target stack.Target) []byte { }, } copy(xt.Name[:], errorName) - copy(xt.Target.Name[:], stack.ErrorTargetName) + copy(xt.Target.Name[:], ErrorTargetName) ret := make([]byte, 0, linux.SizeOfXTErrorTarget) return binary.Marshal(ret, usermem.ByteOrder, xt) } -func (*errorTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Target, *syserr.Error) { +func (*errorTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (target, *syserr.Error) { if len(buf) != linux.SizeOfXTErrorTarget { nflog("buf has insufficient size for error target %d", len(buf)) return nil, syserr.ErrInvalidArgument } - var errorTarget linux.XTErrorTarget + var errTgt linux.XTErrorTarget buf = buf[:linux.SizeOfXTErrorTarget] - binary.Unmarshal(buf, usermem.ByteOrder, &errorTarget) + binary.Unmarshal(buf, usermem.ByteOrder, &errTgt) // Error targets are used in 2 cases: - // * An actual error case. These rules have an error - // named stack.ErrorTargetName. The last entry of the table - // is usually an error case to catch any packets that - // somehow fall through every rule. + // * An actual error case. These rules have an error named + // ErrorTargetName. The last entry of the table is usually an error + // case to catch any packets that somehow fall through every rule. // * To mark the start of a user defined chain. These // rules have an error with the name of the chain. - switch name := errorTarget.Name.String(); name { - case stack.ErrorTargetName: - return &stack.ErrorTarget{NetworkProtocol: filter.NetworkProtocol()}, nil + switch name := errTgt.Name.String(); name { + case ErrorTargetName: + return &errorTarget{stack.ErrorTarget{ + NetworkProtocol: filter.NetworkProtocol(), + }}, nil default: // User defined chain. - return &stack.UserChainTarget{ + return &userChainTarget{stack.UserChainTarget{ Name: name, NetworkProtocol: filter.NetworkProtocol(), - }, nil + }}, nil } } @@ -178,22 +259,22 @@ type redirectTargetMaker struct { NetworkProtocol tcpip.NetworkProtocolNumber } -func (rm *redirectTargetMaker) id() stack.TargetID { - return stack.TargetID{ - Name: stack.RedirectTargetName, - NetworkProtocol: rm.NetworkProtocol, +func (rm *redirectTargetMaker) id() targetID { + return targetID{ + name: RedirectTargetName, + networkProtocol: rm.NetworkProtocol, } } -func (*redirectTargetMaker) marshal(target stack.Target) []byte { - rt := target.(*stack.RedirectTarget) +func (*redirectTargetMaker) marshal(target target) []byte { + rt := target.(*redirectTarget) // This is a redirect target named redirect xt := linux.XTRedirectTarget{ Target: linux.XTEntryTarget{ TargetSize: linux.SizeOfXTRedirectTarget, }, } - copy(xt.Target.Name[:], stack.RedirectTargetName) + copy(xt.Target.Name[:], RedirectTargetName) ret := make([]byte, 0, linux.SizeOfXTRedirectTarget) xt.NfRange.RangeSize = 1 @@ -203,7 +284,7 @@ func (*redirectTargetMaker) marshal(target stack.Target) []byte { return binary.Marshal(ret, usermem.ByteOrder, xt) } -func (*redirectTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Target, *syserr.Error) { +func (*redirectTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (target, *syserr.Error) { if len(buf) < linux.SizeOfXTRedirectTarget { nflog("redirectTargetMaker: buf has insufficient size for redirect target %d", len(buf)) return nil, syserr.ErrInvalidArgument @@ -214,15 +295,17 @@ func (*redirectTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) ( return nil, syserr.ErrInvalidArgument } - var redirectTarget linux.XTRedirectTarget + var rt linux.XTRedirectTarget buf = buf[:linux.SizeOfXTRedirectTarget] - binary.Unmarshal(buf, usermem.ByteOrder, &redirectTarget) + binary.Unmarshal(buf, usermem.ByteOrder, &rt) // Copy linux.XTRedirectTarget to stack.RedirectTarget. - target := stack.RedirectTarget{NetworkProtocol: filter.NetworkProtocol()} + target := redirectTarget{RedirectTarget: stack.RedirectTarget{ + NetworkProtocol: filter.NetworkProtocol(), + }} // RangeSize should be 1. - nfRange := redirectTarget.NfRange + nfRange := rt.NfRange if nfRange.RangeSize != 1 { nflog("redirectTargetMaker: bad rangesize %d", nfRange.RangeSize) return nil, syserr.ErrInvalidArgument @@ -247,7 +330,7 @@ func (*redirectTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) ( return nil, syserr.ErrInvalidArgument } - target.Addr = tcpip.Address(nfRange.RangeIPV4.MinIP[:]) + target.addr = tcpip.Address(nfRange.RangeIPV4.MinIP[:]) target.Port = ntohs(nfRange.RangeIPV4.MinPort) return &target, nil @@ -264,15 +347,15 @@ type nfNATTargetMaker struct { NetworkProtocol tcpip.NetworkProtocolNumber } -func (rm *nfNATTargetMaker) id() stack.TargetID { - return stack.TargetID{ - Name: stack.RedirectTargetName, - NetworkProtocol: rm.NetworkProtocol, +func (rm *nfNATTargetMaker) id() targetID { + return targetID{ + name: RedirectTargetName, + networkProtocol: rm.NetworkProtocol, } } -func (*nfNATTargetMaker) marshal(target stack.Target) []byte { - rt := target.(*stack.RedirectTarget) +func (*nfNATTargetMaker) marshal(target target) []byte { + rt := target.(*redirectTarget) nt := nfNATTarget{ Target: linux.XTEntryTarget{ TargetSize: nfNATMarhsalledSize, @@ -281,9 +364,9 @@ func (*nfNATTargetMaker) marshal(target stack.Target) []byte { Flags: linux.NF_NAT_RANGE_PROTO_SPECIFIED, }, } - copy(nt.Target.Name[:], stack.RedirectTargetName) - copy(nt.Range.MinAddr[:], rt.Addr) - copy(nt.Range.MaxAddr[:], rt.Addr) + copy(nt.Target.Name[:], RedirectTargetName) + copy(nt.Range.MinAddr[:], rt.addr) + copy(nt.Range.MaxAddr[:], rt.addr) nt.Range.MinProto = htons(rt.Port) nt.Range.MaxProto = nt.Range.MinProto @@ -292,7 +375,7 @@ func (*nfNATTargetMaker) marshal(target stack.Target) []byte { return binary.Marshal(ret, usermem.ByteOrder, nt) } -func (*nfNATTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Target, *syserr.Error) { +func (*nfNATTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (target, *syserr.Error) { if size := nfNATMarhsalledSize; len(buf) < size { nflog("nfNATTargetMaker: buf has insufficient size (%d) for nfNAT target (%d)", len(buf), size) return nil, syserr.ErrInvalidArgument @@ -324,10 +407,12 @@ func (*nfNATTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (sta return nil, syserr.ErrInvalidArgument } - target := stack.RedirectTarget{ - NetworkProtocol: filter.NetworkProtocol(), - Addr: tcpip.Address(natRange.MinAddr[:]), - Port: ntohs(natRange.MinProto), + target := redirectTarget{ + RedirectTarget: stack.RedirectTarget{ + NetworkProtocol: filter.NetworkProtocol(), + Port: ntohs(natRange.MinProto), + }, + addr: tcpip.Address(natRange.MinAddr[:]), } return &target, nil @@ -335,18 +420,24 @@ func (*nfNATTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (sta // translateToStandardTarget translates from the value in a // linux.XTStandardTarget to an stack.Verdict. -func translateToStandardTarget(val int32, netProto tcpip.NetworkProtocolNumber) (stack.Target, *syserr.Error) { +func translateToStandardTarget(val int32, netProto tcpip.NetworkProtocolNumber) (target, *syserr.Error) { // TODO(gvisor.dev/issue/170): Support other verdicts. switch val { case -linux.NF_ACCEPT - 1: - return &stack.AcceptTarget{NetworkProtocol: netProto}, nil + return &acceptTarget{stack.AcceptTarget{ + NetworkProtocol: netProto, + }}, nil case -linux.NF_DROP - 1: - return &stack.DropTarget{NetworkProtocol: netProto}, nil + return &dropTarget{stack.DropTarget{ + NetworkProtocol: netProto, + }}, nil case -linux.NF_QUEUE - 1: nflog("unsupported iptables verdict QUEUE") return nil, syserr.ErrInvalidArgument case linux.NF_RETURN: - return &stack.ReturnTarget{NetworkProtocol: netProto}, nil + return &returnTarget{stack.ReturnTarget{ + NetworkProtocol: netProto, + }}, nil default: nflog("unknown iptables verdict %d", val) return nil, syserr.ErrInvalidArgument @@ -382,9 +473,9 @@ type JumpTarget struct { } // ID implements Target.ID. -func (jt *JumpTarget) ID() stack.TargetID { - return stack.TargetID{ - NetworkProtocol: jt.NetworkProtocol, +func (jt *JumpTarget) id() targetID { + return targetID{ + networkProtocol: jt.NetworkProtocol, } } diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go index 844acfede..352c51390 100644 --- a/pkg/sentry/socket/netfilter/tcp_matcher.go +++ b/pkg/sentry/socket/netfilter/tcp_matcher.go @@ -71,7 +71,7 @@ func (tcpMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Ma } if filter.Protocol != header.TCPProtocolNumber { - return nil, fmt.Errorf("TCP matching is only valid for protocol %d.", header.TCPProtocolNumber) + return nil, fmt.Errorf("TCP matching is only valid for protocol %d", header.TCPProtocolNumber) } return &TCPMatcher{ diff --git a/pkg/sentry/socket/netfilter/udp_matcher.go b/pkg/sentry/socket/netfilter/udp_matcher.go index 63201201c..c88d8268d 100644 --- a/pkg/sentry/socket/netfilter/udp_matcher.go +++ b/pkg/sentry/socket/netfilter/udp_matcher.go @@ -68,7 +68,7 @@ func (udpMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Ma } if filter.Protocol != header.UDPProtocolNumber { - return nil, fmt.Errorf("UDP matching is only valid for protocol %d.", header.UDPProtocolNumber) + return nil, fmt.Errorf("UDP matching is only valid for protocol %d", header.UDPProtocolNumber) } return &UDPMatcher{ diff --git a/pkg/sentry/socket/netlink/provider_vfs2.go b/pkg/sentry/socket/netlink/provider_vfs2.go index e8930f031..f061c5d62 100644 --- a/pkg/sentry/socket/netlink/provider_vfs2.go +++ b/pkg/sentry/socket/netlink/provider_vfs2.go @@ -51,7 +51,7 @@ func (*socketProviderVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol vfsfd := &s.vfsfd mnt := t.Kernel().SocketMount() - d := sockfs.NewDentry(t.Credentials(), mnt) + d := sockfs.NewDentry(t, mnt) defer d.DecRef(t) if err := vfsfd.Init(s, linux.O_RDWR, mnt, d, &vfs.FileDescriptionOptions{ DenyPRead: true, diff --git a/pkg/sentry/socket/netlink/route/protocol.go b/pkg/sentry/socket/netlink/route/protocol.go index c84d8bd7c..f4d034c13 100644 --- a/pkg/sentry/socket/netlink/route/protocol.go +++ b/pkg/sentry/socket/netlink/route/protocol.go @@ -36,9 +36,9 @@ type commandKind int const ( kindNew commandKind = 0x0 - kindDel = 0x1 - kindGet = 0x2 - kindSet = 0x3 + kindDel commandKind = 0x1 + kindGet commandKind = 0x2 + kindSet commandKind = 0x3 ) func typeKind(typ uint16) commandKind { @@ -423,6 +423,11 @@ func (p *Protocol) newAddr(ctx context.Context, msg *netlink.Message, ms *netlin } attrs = rest + // NOTE: A netlink message will contain multiple header attributes. + // Both the IFA_ADDRESS and IFA_LOCAL attributes are typically sent + // with IFA_ADDRESS being a prefix address and IFA_LOCAL being the + // local interface address. We add the local interface address here + // and ignore the IFA_ADDRESS. switch ahdr.Type { case linux.IFA_LOCAL: err := stack.AddInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{ @@ -439,11 +444,60 @@ func (p *Protocol) newAddr(ctx context.Context, msg *netlink.Message, ms *netlin } else if err != nil { return syserr.ErrInvalidArgument } + case linux.IFA_ADDRESS: + default: + return syserr.ErrNotSupported } } return nil } +// delAddr handles RTM_DELADDR requests. +func (p *Protocol) delAddr(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { + stack := inet.StackFromContext(ctx) + if stack == nil { + // No network stack. + return syserr.ErrProtocolNotSupported + } + + var ifa linux.InterfaceAddrMessage + attrs, ok := msg.GetData(&ifa) + if !ok { + return syserr.ErrInvalidArgument + } + + for !attrs.Empty() { + ahdr, value, rest, ok := attrs.ParseFirst() + if !ok { + return syserr.ErrInvalidArgument + } + attrs = rest + + // NOTE: A netlink message will contain multiple header attributes. + // Both the IFA_ADDRESS and IFA_LOCAL attributes are typically sent + // with IFA_ADDRESS being a prefix address and IFA_LOCAL being the + // local interface address. We use the local interface address to + // remove the address and ignore the IFA_ADDRESS. + switch ahdr.Type { + case linux.IFA_LOCAL: + err := stack.RemoveInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{ + Family: ifa.Family, + PrefixLen: ifa.PrefixLen, + Flags: ifa.Flags, + Addr: value, + }) + if err != nil { + return syserr.ErrBadLocalAddress + } + case linux.IFA_ADDRESS: + default: + return syserr.ErrNotSupported + } + } + + return nil +} + // ProcessMessage implements netlink.Protocol.ProcessMessage. func (p *Protocol) ProcessMessage(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { hdr := msg.Header() @@ -485,6 +539,8 @@ func (p *Protocol) ProcessMessage(ctx context.Context, msg *netlink.Message, ms return p.dumpRoutes(ctx, msg, ms) case linux.RTM_NEWADDR: return p.newAddr(ctx, msg, ms) + case linux.RTM_DELADDR: + return p.delAddr(ctx, msg, ms) default: return syserr.ErrNotSupported } diff --git a/pkg/sentry/socket/netlink/socket_vfs2.go b/pkg/sentry/socket/netlink/socket_vfs2.go index c83b23242..461d524e5 100644 --- a/pkg/sentry/socket/netlink/socket_vfs2.go +++ b/pkg/sentry/socket/netlink/socket_vfs2.go @@ -37,6 +37,8 @@ import ( // to/from the kernel. // // SocketVFS2 implements socket.SocketVFS2 and transport.Credentialer. +// +// +stateify savable type SocketVFS2 struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 211f07947..86c634715 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1244,6 +1244,18 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam vP := primitive.Int32(boolToInt32(v)) return &vP, nil + case linux.SO_ACCEPTCONN: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + v, err := ep.GetSockOptBool(tcpip.AcceptConnOption) + if err != nil { + return nil, syserr.TranslateNetstackError(err) + } + vP := primitive.Int32(boolToInt32(v)) + return &vP, nil + default: socket.GetSockOptEmitUnimplementedEvent(t, name) } diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index 4c6791fff..b0d9e4d9e 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -35,6 +35,8 @@ import ( // SocketVFS2 encapsulates all the state needed to represent a network stack // endpoint in the kernel context. +// +// +stateify savable type SocketVFS2 struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl @@ -55,7 +57,7 @@ func NewVFS2(t *kernel.Task, family int, skType linux.SockType, protocol int, qu } mnt := t.Kernel().SocketMount() - d := sockfs.NewDentry(t.Credentials(), mnt) + d := sockfs.NewDentry(t, mnt) defer d.DecRef(t) s := &SocketVFS2{ diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index 1028d2a6e..fa9ac9059 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -100,56 +100,101 @@ func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { return nicAddrs } -// AddInterfaceAddr implements inet.Stack.AddInterfaceAddr. -func (s *Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error { +// convertAddr converts an InterfaceAddr to a ProtocolAddress. +func convertAddr(addr inet.InterfaceAddr) (tcpip.ProtocolAddress, error) { var ( - protocol tcpip.NetworkProtocolNumber - address tcpip.Address + protocol tcpip.NetworkProtocolNumber + address tcpip.Address + protocolAddress tcpip.ProtocolAddress ) switch addr.Family { case linux.AF_INET: - if len(addr.Addr) < header.IPv4AddressSize { - return syserror.EINVAL + if len(addr.Addr) != header.IPv4AddressSize { + return protocolAddress, syserror.EINVAL } if addr.PrefixLen > header.IPv4AddressSize*8 { - return syserror.EINVAL + return protocolAddress, syserror.EINVAL } protocol = ipv4.ProtocolNumber - address = tcpip.Address(addr.Addr[:header.IPv4AddressSize]) - + address = tcpip.Address(addr.Addr) case linux.AF_INET6: - if len(addr.Addr) < header.IPv6AddressSize { - return syserror.EINVAL + if len(addr.Addr) != header.IPv6AddressSize { + return protocolAddress, syserror.EINVAL } if addr.PrefixLen > header.IPv6AddressSize*8 { - return syserror.EINVAL + return protocolAddress, syserror.EINVAL } protocol = ipv6.ProtocolNumber - address = tcpip.Address(addr.Addr[:header.IPv6AddressSize]) - + address = tcpip.Address(addr.Addr) default: - return syserror.ENOTSUP + return protocolAddress, syserror.ENOTSUP } - protocolAddress := tcpip.ProtocolAddress{ + protocolAddress = tcpip.ProtocolAddress{ Protocol: protocol, AddressWithPrefix: tcpip.AddressWithPrefix{ Address: address, PrefixLen: int(addr.PrefixLen), }, } + return protocolAddress, nil +} + +// AddInterfaceAddr implements inet.Stack.AddInterfaceAddr. +func (s *Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error { + protocolAddress, err := convertAddr(addr) + if err != nil { + return err + } // Attach address to interface. - if err := s.Stack.AddProtocolAddressWithOptions(tcpip.NICID(idx), protocolAddress, stack.CanBePrimaryEndpoint); err != nil { + nicID := tcpip.NICID(idx) + if err := s.Stack.AddProtocolAddressWithOptions(nicID, protocolAddress, stack.CanBePrimaryEndpoint); err != nil { + return syserr.TranslateNetstackError(err).ToError() + } + + // Add route for local network if it doesn't exist already. + localRoute := tcpip.Route{ + Destination: protocolAddress.AddressWithPrefix.Subnet(), + Gateway: "", // No gateway for local network. + NIC: nicID, + } + + for _, rt := range s.Stack.GetRouteTable() { + if rt.Equal(localRoute) { + return nil + } + } + + // Local route does not exist yet. Add it. + s.Stack.AddRoute(localRoute) + + return nil +} + +// RemoveInterfaceAddr implements inet.Stack.RemoveInterfaceAddr. +func (s *Stack) RemoveInterfaceAddr(idx int32, addr inet.InterfaceAddr) error { + protocolAddress, err := convertAddr(addr) + if err != nil { + return err + } + + // Remove addresses matching the address and prefix. + nicID := tcpip.NICID(idx) + if err := s.Stack.RemoveAddress(nicID, protocolAddress.AddressWithPrefix.Address); err != nil { return syserr.TranslateNetstackError(err).ToError() } - // Add route for local network. - s.Stack.AddRoute(tcpip.Route{ + // Remove the corresponding local network route if it exists. + localRoute := tcpip.Route{ Destination: protocolAddress.AddressWithPrefix.Subnet(), Gateway: "", // No gateway for local network. - NIC: tcpip.NICID(idx), + NIC: nicID, + } + s.Stack.RemoveRoutes(func(rt tcpip.Route) bool { + return rt.Equal(localRoute) }) + return nil } diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index cc7408698..cce0acc33 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -8,7 +8,7 @@ go_template_instance( out = "socket_refs.go", package = "unix", prefix = "socketOperations", - template = "//pkg/refs_vfs2:refs_template", + template = "//pkg/refsvfs2:refs_template", types = { "T": "SocketOperations", }, @@ -19,7 +19,7 @@ go_template_instance( out = "socket_vfs2_refs.go", package = "unix", prefix = "socketVFS2", - template = "//pkg/refs_vfs2:refs_template", + template = "//pkg/refsvfs2:refs_template", types = { "T": "SocketVFS2", }, @@ -43,6 +43,7 @@ go_library( "//pkg/log", "//pkg/marshal", "//pkg/refs", + "//pkg/refsvfs2", "//pkg/safemem", "//pkg/sentry/arch", "//pkg/sentry/device", diff --git a/pkg/sentry/socket/unix/transport/BUILD b/pkg/sentry/socket/unix/transport/BUILD index 26c3a51b9..3ebbd28b0 100644 --- a/pkg/sentry/socket/unix/transport/BUILD +++ b/pkg/sentry/socket/unix/transport/BUILD @@ -20,7 +20,7 @@ go_template_instance( out = "queue_refs.go", package = "transport", prefix = "queue", - template = "//pkg/refs_vfs2:refs_template", + template = "//pkg/refsvfs2:refs_template", types = { "T": "queue", }, @@ -44,6 +44,7 @@ go_library( "//pkg/ilist", "//pkg/log", "//pkg/refs", + "//pkg/refsvfs2", "//pkg/sync", "//pkg/syserr", "//pkg/tcpip", diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go index d6fc03520..b648273a4 100644 --- a/pkg/sentry/socket/unix/transport/unix.go +++ b/pkg/sentry/socket/unix/transport/unix.go @@ -32,6 +32,8 @@ import ( const initialLimit = 16 * 1024 // A RightsControlMessage is a control message containing FDs. +// +// +stateify savable type RightsControlMessage interface { // Clone returns a copy of the RightsControlMessage. Clone() RightsControlMessage @@ -336,7 +338,7 @@ type Receiver interface { RecvMaxQueueSize() int64 // Release releases any resources owned by the Receiver. It should be - // called before droping all references to a Receiver. + // called before dropping all references to a Receiver. Release(ctx context.Context) } @@ -487,7 +489,7 @@ func (q *streamQueueReceiver) Recv(ctx context.Context, data [][]byte, wantCreds c := q.control.Clone() // Don't consume data since we are peeking. - copied, data, _ = vecCopy(data, q.buffer) + copied, _, _ = vecCopy(data, q.buffer) return copied, copied, c, false, q.addr, notify, nil } @@ -572,6 +574,12 @@ func (q *streamQueueReceiver) Recv(ctx context.Context, data [][]byte, wantCreds return copied, copied, c, cmTruncated, q.addr, notify, nil } +// Release implements Receiver.Release. +func (q *streamQueueReceiver) Release(ctx context.Context) { + q.queueReceiver.Release(ctx) + q.control.Release(ctx) +} + // A ConnectedEndpoint is an Endpoint that can be used to send Messages. type ConnectedEndpoint interface { // Passcred implements Endpoint.Passcred. @@ -619,7 +627,7 @@ type ConnectedEndpoint interface { SendMaxQueueSize() int64 // Release releases any resources owned by the ConnectedEndpoint. It should - // be called before droping all references to a ConnectedEndpoint. + // be called before dropping all references to a ConnectedEndpoint. Release(ctx context.Context) // CloseUnread sets the fact that this end is closed with unread data to @@ -879,7 +887,7 @@ func (e *baseEndpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { func (e *baseEndpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) { switch opt { - case tcpip.KeepaliveEnabledOption: + case tcpip.KeepaliveEnabledOption, tcpip.AcceptConnOption: return false, nil case tcpip.PasscredOption: diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index a4a76d0a3..adad485a9 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -81,7 +81,6 @@ func NewWithDirent(ctx context.Context, d *fs.Dirent, ep transport.Endpoint, sty }, } s.EnableLeakCheck() - return fs.NewFile(ctx, d, flags, &s) } diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index 678355fb9..7a78444dc 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -55,7 +55,7 @@ var _ = socket.SocketVFS2(&SocketVFS2{}) // returns a corresponding file description. func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) { mnt := t.Kernel().SocketMount() - d := sockfs.NewDentry(t.Credentials(), mnt) + d := sockfs.NewDentry(t, mnt) defer d.DecRef(t) fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d, &vfs.FileLocks{}) @@ -80,6 +80,7 @@ func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint3 stype: stype, }, } + sock.EnableLeakCheck() sock.LockFD.Init(locks) vfsfd := &sock.vfsfd if err := vfsfd.Init(sock, flags, mnt, d, &vfs.FileDescriptionOptions{ |