summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot/network.go
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/boot/network.go')
-rw-r--r--runsc/boot/network.go341
1 files changed, 341 insertions, 0 deletions
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
new file mode 100644
index 000000000..14d2f56a5
--- /dev/null
+++ b/runsc/boot/network.go
@@ -0,0 +1,341 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "fmt"
+ "net"
+ "runtime"
+ "strings"
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
+ "gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+ "gvisor.dev/gvisor/pkg/tcpip/link/qdisc/fifo"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/network/arp"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/urpc"
+)
+
+var (
+ // DefaultLoopbackLink contains IP addresses and routes of "127.0.0.1/8" and
+ // "::1/8" on "lo" interface.
+ DefaultLoopbackLink = LoopbackLink{
+ Name: "lo",
+ Addresses: []net.IP{
+ net.IP("\x7f\x00\x00\x01"),
+ net.IPv6loopback,
+ },
+ Routes: []Route{
+ {
+ Destination: net.IPNet{
+ IP: net.IPv4(0x7f, 0, 0, 0),
+ Mask: net.IPv4Mask(0xff, 0, 0, 0),
+ },
+ },
+ {
+ Destination: net.IPNet{
+ IP: net.IPv6loopback,
+ Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)),
+ },
+ },
+ },
+ }
+)
+
+// Network exposes methods that can be used to configure a network stack.
+type Network struct {
+ Stack *stack.Stack
+}
+
+// Route represents a route in the network stack.
+type Route struct {
+ Destination net.IPNet
+ Gateway net.IP
+}
+
+// DefaultRoute represents a catch all route to the default gateway.
+type DefaultRoute struct {
+ Route Route
+ Name string
+}
+
+// QueueingDiscipline is used to specify the kind of Queueing Discipline to
+// apply for a give FDBasedLink.
+type QueueingDiscipline int
+
+const (
+ // QDiscNone disables any queueing for the underlying FD.
+ QDiscNone QueueingDiscipline = iota
+
+ // QDiscFIFO applies a simple fifo based queue to the underlying
+ // FD.
+ QDiscFIFO
+)
+
+// MakeQueueingDiscipline if possible the equivalent QueuingDiscipline for s
+// else returns an error.
+func MakeQueueingDiscipline(s string) (QueueingDiscipline, error) {
+ switch s {
+ case "none":
+ return QDiscNone, nil
+ case "fifo":
+ return QDiscFIFO, nil
+ default:
+ return 0, fmt.Errorf("unsupported qdisc specified: %q", s)
+ }
+}
+
+// String implements fmt.Stringer.
+func (q QueueingDiscipline) String() string {
+ switch q {
+ case QDiscNone:
+ return "none"
+ case QDiscFIFO:
+ return "fifo"
+ default:
+ panic(fmt.Sprintf("Invalid queueing discipline: %d", q))
+ }
+}
+
+// FDBasedLink configures an fd-based link.
+type FDBasedLink struct {
+ Name string
+ MTU int
+ Addresses []net.IP
+ Routes []Route
+ GSOMaxSize uint32
+ SoftwareGSOEnabled bool
+ TXChecksumOffload bool
+ RXChecksumOffload bool
+ LinkAddress net.HardwareAddr
+ QDisc QueueingDiscipline
+
+ // NumChannels controls how many underlying FD's are to be used to
+ // create this endpoint.
+ NumChannels int
+}
+
+// LoopbackLink configures a loopback li nk.
+type LoopbackLink struct {
+ Name string
+ Addresses []net.IP
+ Routes []Route
+}
+
+// CreateLinksAndRoutesArgs are arguments to CreateLinkAndRoutes.
+type CreateLinksAndRoutesArgs struct {
+ // FilePayload contains the fds associated with the FDBasedLinks. The
+ // number of fd's should match the sum of the NumChannels field of the
+ // FDBasedLink entries below.
+ urpc.FilePayload
+
+ LoopbackLinks []LoopbackLink
+ FDBasedLinks []FDBasedLink
+
+ Defaultv4Gateway DefaultRoute
+ Defaultv6Gateway DefaultRoute
+}
+
+// Empty returns true if route hasn't been set.
+func (r *Route) Empty() bool {
+ return r.Destination.IP == nil && r.Destination.Mask == nil && r.Gateway == nil
+}
+
+func (r *Route) toTcpipRoute(id tcpip.NICID) (tcpip.Route, error) {
+ subnet, err := tcpip.NewSubnet(ipToAddress(r.Destination.IP), ipMaskToAddressMask(r.Destination.Mask))
+ if err != nil {
+ return tcpip.Route{}, err
+ }
+ return tcpip.Route{
+ Destination: subnet,
+ Gateway: ipToAddress(r.Gateway),
+ NIC: id,
+ }, nil
+}
+
+// CreateLinksAndRoutes creates links and routes in a network stack. It should
+// only be called once.
+func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct{}) error {
+ wantFDs := 0
+ for _, l := range args.FDBasedLinks {
+ wantFDs += l.NumChannels
+ }
+ if got := len(args.FilePayload.Files); got != wantFDs {
+ return fmt.Errorf("args.FilePayload.Files has %d FD's but we need %d entries based on FDBasedLinks", got, wantFDs)
+ }
+
+ var nicID tcpip.NICID
+ nicids := make(map[string]tcpip.NICID)
+
+ // Collect routes from all links.
+ var routes []tcpip.Route
+
+ // Loopback normally appear before other interfaces.
+ for _, link := range args.LoopbackLinks {
+ nicID++
+ nicids[link.Name] = nicID
+
+ linkEP := loopback.New()
+
+ log.Infof("Enabling loopback interface %q with id %d on addresses %+v", link.Name, nicID, link.Addresses)
+ if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil {
+ return err
+ }
+
+ // Collect the routes from this link.
+ for _, r := range link.Routes {
+ route, err := r.toTcpipRoute(nicID)
+ if err != nil {
+ return err
+ }
+ routes = append(routes, route)
+ }
+ }
+
+ fdOffset := 0
+ for _, link := range args.FDBasedLinks {
+ nicID++
+ nicids[link.Name] = nicID
+
+ FDs := []int{}
+ for j := 0; j < link.NumChannels; j++ {
+ // Copy the underlying FD.
+ oldFD := args.FilePayload.Files[fdOffset].Fd()
+ newFD, err := syscall.Dup(int(oldFD))
+ if err != nil {
+ return fmt.Errorf("failed to dup FD %v: %v", oldFD, err)
+ }
+ FDs = append(FDs, newFD)
+ fdOffset++
+ }
+
+ mac := tcpip.LinkAddress(link.LinkAddress)
+ log.Infof("gso max size is: %d", link.GSOMaxSize)
+
+ linkEP, err := fdbased.New(&fdbased.Options{
+ FDs: FDs,
+ MTU: uint32(link.MTU),
+ EthernetHeader: true,
+ Address: mac,
+ PacketDispatchMode: fdbased.RecvMMsg,
+ GSOMaxSize: link.GSOMaxSize,
+ SoftwareGSOEnabled: link.SoftwareGSOEnabled,
+ TXChecksumOffload: link.TXChecksumOffload,
+ RXChecksumOffload: link.RXChecksumOffload,
+ })
+ if err != nil {
+ return err
+ }
+
+ switch link.QDisc {
+ case QDiscNone:
+ case QDiscFIFO:
+ log.Infof("Enabling FIFO QDisc on %q", link.Name)
+ linkEP = fifo.New(linkEP, runtime.GOMAXPROCS(0), 1000)
+ }
+
+ log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels)
+ if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil {
+ return err
+ }
+
+ // Collect the routes from this link.
+ for _, r := range link.Routes {
+ route, err := r.toTcpipRoute(nicID)
+ if err != nil {
+ return err
+ }
+ routes = append(routes, route)
+ }
+ }
+
+ if !args.Defaultv4Gateway.Route.Empty() {
+ nicID, ok := nicids[args.Defaultv4Gateway.Name]
+ if !ok {
+ return fmt.Errorf("invalid interface name %q for default route", args.Defaultv4Gateway.Name)
+ }
+ route, err := args.Defaultv4Gateway.Route.toTcpipRoute(nicID)
+ if err != nil {
+ return err
+ }
+ routes = append(routes, route)
+ }
+
+ if !args.Defaultv6Gateway.Route.Empty() {
+ nicID, ok := nicids[args.Defaultv6Gateway.Name]
+ if !ok {
+ return fmt.Errorf("invalid interface name %q for default route", args.Defaultv6Gateway.Name)
+ }
+ route, err := args.Defaultv6Gateway.Route.toTcpipRoute(nicID)
+ if err != nil {
+ return err
+ }
+ routes = append(routes, route)
+ }
+
+ log.Infof("Setting routes %+v", routes)
+ n.Stack.SetRouteTable(routes)
+ return nil
+}
+
+// createNICWithAddrs creates a NIC in the network stack and adds the given
+// addresses.
+func (n *Network) createNICWithAddrs(id tcpip.NICID, name string, ep stack.LinkEndpoint, addrs []net.IP) error {
+ opts := stack.NICOptions{Name: name}
+ if err := n.Stack.CreateNICWithOptions(id, sniffer.New(ep), opts); err != nil {
+ return fmt.Errorf("CreateNICWithOptions(%d, _, %+v) failed: %v", id, opts, err)
+ }
+
+ // Always start with an arp address for the NIC.
+ if err := n.Stack.AddAddress(id, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
+ return fmt.Errorf("AddAddress(%v, %v, %v) failed: %v", id, arp.ProtocolNumber, arp.ProtocolAddress, err)
+ }
+
+ for _, addr := range addrs {
+ proto, tcpipAddr := ipToAddressAndProto(addr)
+ if err := n.Stack.AddAddress(id, proto, tcpipAddr); err != nil {
+ return fmt.Errorf("AddAddress(%v, %v, %v) failed: %v", id, proto, tcpipAddr, err)
+ }
+ }
+ return nil
+}
+
+// ipToAddressAndProto converts IP to tcpip.Address and a protocol number.
+//
+// Note: don't use 'len(ip)' to determine IP version because length is always 16.
+func ipToAddressAndProto(ip net.IP) (tcpip.NetworkProtocolNumber, tcpip.Address) {
+ if i4 := ip.To4(); i4 != nil {
+ return ipv4.ProtocolNumber, tcpip.Address(i4)
+ }
+ return ipv6.ProtocolNumber, tcpip.Address(ip)
+}
+
+// ipToAddress converts IP to tcpip.Address, ignoring the protocol.
+func ipToAddress(ip net.IP) tcpip.Address {
+ _, addr := ipToAddressAndProto(ip)
+ return addr
+}
+
+// ipMaskToAddressMask converts IPMask to tcpip.AddressMask, ignoring the
+// protocol.
+func ipMaskToAddressMask(ipMask net.IPMask) tcpip.AddressMask {
+ return tcpip.AddressMask(ipToAddress(net.IP(ipMask)))
+}