diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/sentry/socket/epsocket/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/socket/epsocket/provider.go | 19 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_socket.go | 2 | ||||
-rw-r--r-- | pkg/tcpip/network/arp/BUILD | 1 | ||||
-rw-r--r-- | pkg/tcpip/network/arp/arp_test.go | 3 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv4/BUILD | 17 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv4/icmp.go | 190 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv4/icmp_test.go | 124 | ||||
-rw-r--r-- | pkg/tcpip/transport/ping/BUILD | 50 | ||||
-rw-r--r-- | pkg/tcpip/transport/ping/endpoint.go | 665 | ||||
-rw-r--r-- | pkg/tcpip/transport/ping/endpoint_state.go | 61 | ||||
-rw-r--r-- | pkg/tcpip/transport/ping/protocol.go | 106 | ||||
-rw-r--r-- | pkg/tcpip/transport/udp/endpoint_state.go | 2 |
13 files changed, 911 insertions, 330 deletions
diff --git a/pkg/sentry/socket/epsocket/BUILD b/pkg/sentry/socket/epsocket/BUILD index 0e463a92a..8430886cb 100644 --- a/pkg/sentry/socket/epsocket/BUILD +++ b/pkg/sentry/socket/epsocket/BUILD @@ -50,6 +50,7 @@ go_library( "//pkg/syserror", "//pkg/tcpip", "//pkg/tcpip/buffer", + "//pkg/tcpip/header", "//pkg/tcpip/network/ipv4", "//pkg/tcpip/network/ipv6", "//pkg/tcpip/stack", diff --git a/pkg/sentry/socket/epsocket/provider.go b/pkg/sentry/socket/epsocket/provider.go index 5616435b3..6c1e3b6b9 100644 --- a/pkg/sentry/socket/epsocket/provider.go +++ b/pkg/sentry/socket/epsocket/provider.go @@ -23,6 +23,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/socket" "gvisor.googlesource.com/gvisor/pkg/syserr" "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.googlesource.com/gvisor/pkg/tcpip/header" "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" @@ -37,8 +38,8 @@ type provider struct { netProto tcpip.NetworkProtocolNumber } -// GetTransportProtocol figures out transport protocol. Currently only TCP and -// UDP are supported. +// GetTransportProtocol figures out transport protocol. Currently only TCP, +// UDP, and ICMP are supported. func GetTransportProtocol(stype unix.SockType, protocol int) (tcpip.TransportProtocolNumber, *syserr.Error) { switch stype { case linux.SOCK_STREAM: @@ -48,14 +49,16 @@ func GetTransportProtocol(stype unix.SockType, protocol int) (tcpip.TransportPro return tcp.ProtocolNumber, nil case linux.SOCK_DGRAM: - if protocol != 0 && protocol != syscall.IPPROTO_UDP { - return 0, syserr.ErrInvalidArgument + switch protocol { + case 0, syscall.IPPROTO_UDP: + return udp.ProtocolNumber, nil + case syscall.IPPROTO_ICMP: + return header.ICMPv4ProtocolNumber, nil + case syscall.IPPROTO_ICMPV6: + return header.ICMPv6ProtocolNumber, nil } - return udp.ProtocolNumber, nil - - default: - return 0, syserr.ErrInvalidArgument } + return 0, syserr.ErrInvalidArgument } // Socket creates a new socket object for the AF_INET or AF_INET6 family. diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index 6258a1539..d6d5dba8a 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -791,7 +791,7 @@ func recvFrom(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, f } // Reject flags that we don't handle yet. - if flags & ^(linux.MSG_DONTWAIT|linux.MSG_NOSIGNAL|linux.MSG_PEEK|linux.MSG_TRUNC) != 0 { + if flags & ^(linux.MSG_DONTWAIT|linux.MSG_NOSIGNAL|linux.MSG_PEEK|linux.MSG_TRUNC|linux.MSG_CONFIRM) != 0 { return 0, syscall.EINVAL } diff --git a/pkg/tcpip/network/arp/BUILD b/pkg/tcpip/network/arp/BUILD index e6d0899a9..58d174965 100644 --- a/pkg/tcpip/network/arp/BUILD +++ b/pkg/tcpip/network/arp/BUILD @@ -30,5 +30,6 @@ go_test( "//pkg/tcpip/link/sniffer", "//pkg/tcpip/network/ipv4", "//pkg/tcpip/stack", + "//pkg/tcpip/transport/ping", ], ) diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go index 47b10e64e..6d61ff1d7 100644 --- a/pkg/tcpip/network/arp/arp_test.go +++ b/pkg/tcpip/network/arp/arp_test.go @@ -16,6 +16,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/ping" ) const ( @@ -32,7 +33,7 @@ type testContext struct { } func newTestContext(t *testing.T) *testContext { - s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName, arp.ProtocolName}, []string{ipv4.PingProtocolName}) + s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName, arp.ProtocolName}, []string{ping.ProtocolName4}) const defaultMTU = 65536 id, linkEP := channel.New(256, defaultMTU, stackLinkAddr) diff --git a/pkg/tcpip/network/ipv4/BUILD b/pkg/tcpip/network/ipv4/BUILD index 9df113df1..02d55355c 100644 --- a/pkg/tcpip/network/ipv4/BUILD +++ b/pkg/tcpip/network/ipv4/BUILD @@ -1,6 +1,6 @@ package(licenses = ["notice"]) # BSD -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "ipv4", @@ -19,20 +19,5 @@ go_library( "//pkg/tcpip/network/fragmentation", "//pkg/tcpip/network/hash", "//pkg/tcpip/stack", - "//pkg/waiter", - ], -) - -go_test( - name = "ipv4_test", - size = "small", - srcs = ["icmp_test.go"], - deps = [ - ":ipv4", - "//pkg/tcpip", - "//pkg/tcpip/buffer", - "//pkg/tcpip/link/channel", - "//pkg/tcpip/link/sniffer", - "//pkg/tcpip/stack", ], ) diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go index ffd761350..3c382fdc2 100644 --- a/pkg/tcpip/network/ipv4/icmp.go +++ b/pkg/tcpip/network/ipv4/icmp.go @@ -5,26 +5,14 @@ package ipv4 import ( - "context" "encoding/binary" - "time" "gvisor.googlesource.com/gvisor/pkg/tcpip" "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" "gvisor.googlesource.com/gvisor/pkg/tcpip/header" "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" ) -// PingProtocolName is a pseudo transport protocol used to handle ping replies. -// Use it when constructing a stack that intends to use ipv4.Ping. -const PingProtocolName = "icmpv4ping" - -// pingProtocolNumber is a fake transport protocol used to -// deliver incoming ICMP echo replies. The ICMP identifier -// number is used as a port number for multiplexing. -const pingProtocolNumber tcpip.TransportProtocolNumber = 256 + 11 - // handleControl handles the case when an ICMP packet contains the headers of // the original packet that caused the ICMP one to be sent. This information is // used to find out which transport endpoint must be notified about the ICMP @@ -78,7 +66,10 @@ func (e *endpoint) handleICMP(r *stack.Route, vv *buffer.VectorisedView) { } case header.ICMPv4EchoReply: - e.dispatcher.DeliverTransportPacket(r, pingProtocolNumber, vv) + if len(v) < header.ICMPv4EchoMinimumSize { + return + } + e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, vv) case header.ICMPv4DstUnreachable: if len(v) < header.ICMPv4DstUnreachableMinimumSize { @@ -104,179 +95,20 @@ type echoRequest struct { func (e *endpoint) echoReplier() { for req := range e.echoRequests { - sendICMPv4(&req.r, header.ICMPv4EchoReply, 0, req.v) + sendPing4(&req.r, 0, req.v) req.r.Release() } } -func sendICMPv4(r *stack.Route, typ header.ICMPv4Type, code byte, data buffer.View) *tcpip.Error { - hdr := buffer.NewPrependable(header.ICMPv4MinimumSize + int(r.MaxHeaderLength())) +func sendPing4(r *stack.Route, code byte, data buffer.View) *tcpip.Error { + hdr := buffer.NewPrependable(header.ICMPv4EchoMinimumSize + int(r.MaxHeaderLength())) - icmpv4 := header.ICMPv4(hdr.Prepend(header.ICMPv4MinimumSize)) - icmpv4.SetType(typ) + icmpv4 := header.ICMPv4(hdr.Prepend(header.ICMPv4EchoMinimumSize)) + icmpv4.SetType(header.ICMPv4EchoReply) icmpv4.SetCode(code) + copy(icmpv4[header.ICMPv4MinimumSize:], data) + data = data[header.ICMPv4EchoMinimumSize-header.ICMPv4MinimumSize:] icmpv4.SetChecksum(^header.Checksum(icmpv4, header.Checksum(data, 0))) return r.WritePacket(&hdr, data, header.ICMPv4ProtocolNumber) } - -// A Pinger can send echo requests to an address. -type Pinger struct { - Stack *stack.Stack - NICID tcpip.NICID - Addr tcpip.Address - LocalAddr tcpip.Address // optional - Wait time.Duration // if zero, defaults to 1 second - Count uint16 // if zero, defaults to MaxUint16 -} - -// Ping sends echo requests to an ICMPv4 endpoint. -// Responses are streamed to the channel ch. -func (p *Pinger) Ping(ctx context.Context, ch chan<- PingReply) *tcpip.Error { - count := p.Count - if count == 0 { - count = 1<<16 - 1 - } - wait := p.Wait - if wait == 0 { - wait = 1 * time.Second - } - - r, err := p.Stack.FindRoute(p.NICID, p.LocalAddr, p.Addr, ProtocolNumber) - if err != nil { - return err - } - - netProtos := []tcpip.NetworkProtocolNumber{ProtocolNumber} - ep := &pingEndpoint{ - stack: p.Stack, - pktCh: make(chan buffer.View, 1), - } - id := stack.TransportEndpointID{ - LocalAddress: r.LocalAddress, - RemoteAddress: p.Addr, - } - - _, err = p.Stack.PickEphemeralPort(func(port uint16) (bool, *tcpip.Error) { - id.LocalPort = port - err := p.Stack.RegisterTransportEndpoint(p.NICID, netProtos, pingProtocolNumber, id, ep) - switch err { - case nil: - return true, nil - case tcpip.ErrPortInUse: - return false, nil - default: - return false, err - } - }) - if err != nil { - return err - } - defer p.Stack.UnregisterTransportEndpoint(p.NICID, netProtos, pingProtocolNumber, id) - - v := buffer.NewView(4) - binary.BigEndian.PutUint16(v[0:], id.LocalPort) - - start := time.Now() - - done := make(chan struct{}) - go func(count int) { - loop: - for ; count > 0; count-- { - select { - case v := <-ep.pktCh: - seq := binary.BigEndian.Uint16(v[header.ICMPv4MinimumSize+2:]) - ch <- PingReply{ - Duration: time.Since(start) - time.Duration(seq)*wait, - SeqNumber: seq, - } - case <-ctx.Done(): - break loop - } - } - close(done) - }(int(count)) - defer func() { <-done }() - - t := time.NewTicker(wait) - defer t.Stop() - for seq := uint16(0); seq < count; seq++ { - select { - case <-t.C: - case <-ctx.Done(): - return nil - } - binary.BigEndian.PutUint16(v[2:], seq) - sent := time.Now() - if err := sendICMPv4(&r, header.ICMPv4Echo, 0, v); err != nil { - ch <- PingReply{ - Error: err, - Duration: time.Since(sent), - SeqNumber: seq, - } - } - } - return nil -} - -// PingReply summarizes an ICMP echo reply. -type PingReply struct { - Error *tcpip.Error // reports any errors sending a ping request - Duration time.Duration - SeqNumber uint16 -} - -type pingProtocol struct{} - -func (*pingProtocol) NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { - return nil, tcpip.ErrNotSupported // endpoints are created directly -} - -func (*pingProtocol) Number() tcpip.TransportProtocolNumber { return pingProtocolNumber } - -func (*pingProtocol) MinimumPacketSize() int { return header.ICMPv4EchoMinimumSize } - -func (*pingProtocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) { - ident := binary.BigEndian.Uint16(v[4:]) - return 0, ident, nil -} - -func (*pingProtocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEndpointID, *buffer.VectorisedView) bool { - return true -} - -// SetOption implements TransportProtocol.SetOption. -func (p *pingProtocol) SetOption(option interface{}) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption -} - -// Option implements TransportProtocol.Option. -func (p *pingProtocol) Option(option interface{}) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption -} - -func init() { - stack.RegisterTransportProtocolFactory(PingProtocolName, func() stack.TransportProtocol { - return &pingProtocol{} - }) -} - -type pingEndpoint struct { - stack *stack.Stack - pktCh chan buffer.View -} - -func (e *pingEndpoint) Close() { - close(e.pktCh) -} - -func (e *pingEndpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv *buffer.VectorisedView) { - select { - case e.pktCh <- vv.ToView(): - default: - } -} - -// HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket. -func (e *pingEndpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, vv *buffer.VectorisedView) { -} diff --git a/pkg/tcpip/network/ipv4/icmp_test.go b/pkg/tcpip/network/ipv4/icmp_test.go deleted file mode 100644 index c55aa1835..000000000 --- a/pkg/tcpip/network/ipv4/icmp_test.go +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright 2016 The Netstack Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package ipv4_test - -import ( - "context" - "testing" - "time" - - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" -) - -const stackAddr = "\x0a\x00\x00\x01" - -type testContext struct { - t *testing.T - linkEP *channel.Endpoint - s *stack.Stack -} - -func newTestContext(t *testing.T) *testContext { - s := stack.New(&tcpip.StdClock{}, []string{ipv4.ProtocolName}, []string{ipv4.PingProtocolName}) - - const defaultMTU = 65536 - id, linkEP := channel.New(256, defaultMTU, "") - if testing.Verbose() { - id = sniffer.New(id) - } - if err := s.CreateNIC(1, id); err != nil { - t.Fatalf("CreateNIC failed: %v", err) - } - - if err := s.AddAddress(1, ipv4.ProtocolNumber, stackAddr); err != nil { - t.Fatalf("AddAddress failed: %v", err) - } - - s.SetRouteTable([]tcpip.Route{{ - Destination: "\x00\x00\x00\x00", - Mask: "\x00\x00\x00\x00", - Gateway: "", - NIC: 1, - }}) - - return &testContext{ - t: t, - s: s, - linkEP: linkEP, - } -} - -func (c *testContext) cleanup() { - close(c.linkEP.C) -} - -func (c *testContext) loopback() { - go func() { - for pkt := range c.linkEP.C { - v := make(buffer.View, len(pkt.Header)+len(pkt.Payload)) - copy(v, pkt.Header) - copy(v[len(pkt.Header):], pkt.Payload) - vv := v.ToVectorisedView([1]buffer.View{}) - c.linkEP.Inject(pkt.Proto, &vv) - } - }() -} - -func TestEcho(t *testing.T) { - c := newTestContext(t) - defer c.cleanup() - c.loopback() - - ch := make(chan ipv4.PingReply, 1) - p := ipv4.Pinger{ - Stack: c.s, - NICID: 1, - Addr: stackAddr, - Wait: 10 * time.Millisecond, - Count: 1, // one ping only - } - if err := p.Ping(context.Background(), ch); err != nil { - t.Fatalf("icmp.Ping failed: %v", err) - } - - ping := <-ch - if ping.Error != nil { - t.Errorf("bad ping response: %v", ping.Error) - } -} - -func TestEchoSequence(t *testing.T) { - c := newTestContext(t) - defer c.cleanup() - c.loopback() - - const numPings = 3 - ch := make(chan ipv4.PingReply, numPings) - p := ipv4.Pinger{ - Stack: c.s, - NICID: 1, - Addr: stackAddr, - Wait: 10 * time.Millisecond, - Count: numPings, - } - if err := p.Ping(context.Background(), ch); err != nil { - t.Fatalf("icmp.Ping failed: %v", err) - } - - for i := uint16(0); i < numPings; i++ { - ping := <-ch - if ping.Error != nil { - t.Errorf("i=%d bad ping response: %v", i, ping.Error) - } - if ping.SeqNumber != i { - t.Errorf("SeqNumber=%d, want %d", ping.SeqNumber, i) - } - } -} diff --git a/pkg/tcpip/transport/ping/BUILD b/pkg/tcpip/transport/ping/BUILD new file mode 100644 index 000000000..a39a887b6 --- /dev/null +++ b/pkg/tcpip/transport/ping/BUILD @@ -0,0 +1,50 @@ +package(licenses = ["notice"]) # BSD + +load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools/go_generics:defs.bzl", "go_template_instance") +load("//tools/go_stateify:defs.bzl", "go_stateify") + +go_stateify( + name = "ping_state", + srcs = [ + "endpoint.go", + "endpoint_state.go", + "ping_packet_list.go", + ], + out = "ping_state.go", + imports = ["gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"], + package = "ping", +) + +go_template_instance( + name = "ping_packet_list", + out = "ping_packet_list.go", + package = "ping", + prefix = "pingPacket", + template = "//pkg/ilist:generic_list", + types = { + "Linker": "*pingPacket", + }, +) + +go_library( + name = "ping", + srcs = [ + "endpoint.go", + "endpoint_state.go", + "ping_packet_list.go", + "ping_state.go", + "protocol.go", + ], + importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/ping", + visibility = ["//visibility:public"], + deps = [ + "//pkg/sleep", + "//pkg/state", + "//pkg/tcpip", + "//pkg/tcpip/buffer", + "//pkg/tcpip/header", + "//pkg/tcpip/stack", + "//pkg/waiter", + ], +) diff --git a/pkg/tcpip/transport/ping/endpoint.go b/pkg/tcpip/transport/ping/endpoint.go new file mode 100644 index 000000000..609e7d947 --- /dev/null +++ b/pkg/tcpip/transport/ping/endpoint.go @@ -0,0 +1,665 @@ +// Copyright 2016 The Netstack Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ping + +import ( + "encoding/binary" + "sync" + + "gvisor.googlesource.com/gvisor/pkg/sleep" + "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +type pingPacket struct { + pingPacketEntry + senderAddress tcpip.FullAddress + data buffer.VectorisedView `state:".(buffer.VectorisedView)"` + timestamp int64 + hasTimestamp bool + // views is used as buffer for data when its length is large + // enough to store a VectorisedView. + views [8]buffer.View `state:"nosave"` +} + +type endpointState int + +const ( + stateInitial endpointState = iota + stateBound + stateConnected + stateClosed +) + +// endpoint represents a ping endpoint. This struct serves as the interface +// between users of the endpoint and the protocol implementation; it is legal to +// have concurrent goroutines make calls into the endpoint, they are properly +// synchronized. +type endpoint struct { + // The following fields are initialized at creation time and do not + // change throughout the lifetime of the endpoint. + stack *stack.Stack `state:"manual"` + netProto tcpip.NetworkProtocolNumber + waiterQueue *waiter.Queue + + // The following fields are used to manage the receive queue, and are + // protected by rcvMu. + rcvMu sync.Mutex `state:"nosave"` + rcvReady bool + rcvList pingPacketList + rcvBufSizeMax int + rcvBufSize int + rcvClosed bool + rcvTimestamp bool + + // The following fields are protected by the mu mutex. + mu sync.RWMutex `state:"nosave"` + sndBufSize int + id stack.TransportEndpointID + state endpointState + bindNICID tcpip.NICID + bindAddr tcpip.Address + regNICID tcpip.NICID + route stack.Route `state:"manual"` +} + +func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) *endpoint { + return &endpoint{ + stack: stack, + netProto: netProto, + waiterQueue: waiterQueue, + rcvBufSizeMax: 32 * 1024, + sndBufSize: 32 * 1024, + } +} + +// Close puts the endpoint in a closed state and frees all resources +// associated with it. +func (e *endpoint) Close() { + e.mu.Lock() + defer e.mu.Unlock() + + switch e.state { + case stateBound, stateConnected: + e.stack.UnregisterTransportEndpoint(e.regNICID, []tcpip.NetworkProtocolNumber{e.netProto}, ProtocolNumber4, e.id) + } + + // Close the receive list and drain it. + e.rcvMu.Lock() + e.rcvClosed = true + e.rcvBufSize = 0 + for !e.rcvList.Empty() { + p := e.rcvList.Front() + e.rcvList.Remove(p) + } + e.rcvMu.Unlock() + + e.route.Release() + + // Update the state. + e.state = stateClosed +} + +// Read reads data from the endpoint. This method does not block if +// there is no data pending. +func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { + e.rcvMu.Lock() + + if e.rcvList.Empty() { + err := tcpip.ErrWouldBlock + if e.rcvClosed { + err = tcpip.ErrClosedForReceive + } + e.rcvMu.Unlock() + return buffer.View{}, tcpip.ControlMessages{}, err + } + + p := e.rcvList.Front() + e.rcvList.Remove(p) + e.rcvBufSize -= p.data.Size() + ts := e.rcvTimestamp + + e.rcvMu.Unlock() + + if addr != nil { + *addr = p.senderAddress + } + + if ts && !p.hasTimestamp { + // Linux uses the current time. + p.timestamp = e.stack.NowNanoseconds() + } + + return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: ts, Timestamp: p.timestamp}, nil +} + +// prepareForWrite prepares the endpoint for sending data. In particular, it +// binds it if it's still in the initial state. To do so, it must first +// reacquire the mutex in exclusive mode. +// +// Returns true for retry if preparation should be retried. +func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err *tcpip.Error) { + switch e.state { + case stateInitial: + case stateConnected: + return false, nil + + case stateBound: + if to == nil { + return false, tcpip.ErrDestinationRequired + } + return false, nil + default: + return false, tcpip.ErrInvalidEndpointState + } + + e.mu.RUnlock() + defer e.mu.RLock() + + e.mu.Lock() + defer e.mu.Unlock() + + // The state changed when we released the shared locked and re-acquired + // it in exclusive mode. Try again. + if e.state != stateInitial { + return true, nil + } + + // The state is still 'initial', so try to bind the endpoint. + if err := e.bindLocked(tcpip.FullAddress{}, nil); err != nil { + return false, err + } + + return true, nil +} + +// Write writes data to the endpoint's peer. This method does not block +// if the data cannot be written. +func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (uintptr, *tcpip.Error) { + // MSG_MORE is unimplemented. (This also means that MSG_EOR is a no-op.) + if opts.More { + return 0, tcpip.ErrInvalidOptionValue + } + + to := opts.To + + e.mu.RLock() + defer e.mu.RUnlock() + + // Prepare for write. + for { + retry, err := e.prepareForWrite(to) + if err != nil { + return 0, err + } + + if !retry { + break + } + } + + var route *stack.Route + if to == nil { + route = &e.route + + if route.IsResolutionRequired() { + // Promote lock to exclusive if using a shared route, given that it may + // need to change in Route.Resolve() call below. + e.mu.RUnlock() + defer e.mu.RLock() + + e.mu.Lock() + defer e.mu.Unlock() + + // Recheck state after lock was re-acquired. + if e.state != stateConnected { + return 0, tcpip.ErrInvalidEndpointState + } + } + } else { + // Reject destination address if it goes through a different + // NIC than the endpoint was bound to. + nicid := to.NIC + if e.bindNICID != 0 { + if nicid != 0 && nicid != e.bindNICID { + return 0, tcpip.ErrNoRoute + } + + nicid = e.bindNICID + } + + toCopy := *to + to = &toCopy + netProto, err := e.checkV4Mapped(to, true) + if err != nil { + return 0, err + } + + // Find the enpoint. + r, err := e.stack.FindRoute(nicid, e.bindAddr, to.Addr, netProto) + if err != nil { + return 0, err + } + defer r.Release() + + route = &r + } + + if route.IsResolutionRequired() { + waker := &sleep.Waker{} + if err := route.Resolve(waker); err != nil { + if err == tcpip.ErrWouldBlock { + // Link address needs to be resolved. Resolution was triggered the + // background. Better luck next time. + // + // TODO: queue up the request and send after link address + // is resolved. + route.RemoveWaker(waker) + return 0, tcpip.ErrNoLinkAddress + } + return 0, err + } + } + + v, err := p.Get(p.Size()) + if err != nil { + return 0, err + } + + switch e.netProto { + case header.IPv4ProtocolNumber: + err = sendPing4(route, e.id.LocalPort, v) + + case header.IPv6ProtocolNumber: + // TODO: Support IPv6. + } + + return uintptr(len(v)), err +} + +// Peek only returns data from a single datagram, so do nothing here. +func (e *endpoint) Peek([][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Error) { + return 0, tcpip.ControlMessages{}, nil +} + +// SetSockOpt sets a socket option. Currently not supported. +func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { + switch v := opt.(type) { + case tcpip.TimestampOption: + e.rcvMu.Lock() + e.rcvTimestamp = v != 0 + e.rcvMu.Unlock() + } + return nil +} + +// GetSockOpt implements tcpip.Endpoint.GetSockOpt. +func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { + switch o := opt.(type) { + case tcpip.ErrorOption: + return nil + + case *tcpip.SendBufferSizeOption: + e.mu.Lock() + *o = tcpip.SendBufferSizeOption(e.sndBufSize) + e.mu.Unlock() + return nil + + case *tcpip.ReceiveBufferSizeOption: + e.rcvMu.Lock() + *o = tcpip.ReceiveBufferSizeOption(e.rcvBufSizeMax) + e.rcvMu.Unlock() + return nil + + case *tcpip.ReceiveQueueSizeOption: + e.rcvMu.Lock() + if e.rcvList.Empty() { + *o = 0 + } else { + p := e.rcvList.Front() + *o = tcpip.ReceiveQueueSizeOption(p.data.Size()) + } + e.rcvMu.Unlock() + return nil + + case *tcpip.TimestampOption: + e.rcvMu.Lock() + *o = 0 + if e.rcvTimestamp { + *o = 1 + } + e.rcvMu.Unlock() + } + + return tcpip.ErrUnknownProtocolOption +} + +func sendPing4(r *stack.Route, ident uint16, data buffer.View) *tcpip.Error { + if len(data) < header.ICMPv4EchoMinimumSize { + return tcpip.ErrInvalidEndpointState + } + + // Set the ident. Sequence number is provided by the user. + binary.BigEndian.PutUint16(data[header.ICMPv4MinimumSize:], ident) + + hdr := buffer.NewPrependable(header.ICMPv4EchoMinimumSize + int(r.MaxHeaderLength())) + + icmpv4 := header.ICMPv4(hdr.Prepend(header.ICMPv4EchoMinimumSize)) + copy(icmpv4, data) + data = data[header.ICMPv4EchoMinimumSize:] + + // Linux performs these basic checks. + if icmpv4.Type() != header.ICMPv4Echo || icmpv4.Code() != 0 { + return tcpip.ErrInvalidEndpointState + } + + icmpv4.SetChecksum(0) + icmpv4.SetChecksum(^header.Checksum(icmpv4, header.Checksum(data, 0))) + + return r.WritePacket(&hdr, data, header.ICMPv4ProtocolNumber) +} + +func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress, allowMismatch bool) (tcpip.NetworkProtocolNumber, *tcpip.Error) { + netProto := e.netProto + if header.IsV4MappedAddress(addr.Addr) { + return 0, tcpip.ErrNoRoute + } + + // Fail if we're bound to an address length different from the one we're + // checking. + if l := len(e.id.LocalAddress); !allowMismatch && l != 0 && l != len(addr.Addr) { + return 0, tcpip.ErrInvalidEndpointState + } + + return netProto, nil +} + +// Connect connects the endpoint to its peer. Specifying a NIC is optional. +func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { + e.mu.Lock() + defer e.mu.Unlock() + + nicid := addr.NIC + localPort := uint16(0) + switch e.state { + case stateBound, stateConnected: + localPort = e.id.LocalPort + if e.bindNICID == 0 { + break + } + + if nicid != 0 && nicid != e.bindNICID { + return tcpip.ErrInvalidEndpointState + } + + nicid = e.bindNICID + default: + return tcpip.ErrInvalidEndpointState + } + + netProto, err := e.checkV4Mapped(&addr, false) + if err != nil { + return err + } + + // Find a route to the desired destination. + r, err := e.stack.FindRoute(nicid, e.bindAddr, addr.Addr, netProto) + if err != nil { + return err + } + defer r.Release() + + id := stack.TransportEndpointID{ + LocalAddress: r.LocalAddress, + LocalPort: localPort, + RemoteAddress: r.RemoteAddress, + } + + // Even if we're connected, this endpoint can still be used to send + // packets on a different network protocol, so we register both even if + // v6only is set to false and this is an ipv6 endpoint. + netProtos := []tcpip.NetworkProtocolNumber{netProto} + + id, err = e.registerWithStack(nicid, netProtos, id) + if err != nil { + return err + } + + e.id = id + e.route = r.Clone() + e.regNICID = nicid + + e.state = stateConnected + + e.rcvMu.Lock() + e.rcvReady = true + e.rcvMu.Unlock() + + return nil +} + +// ConnectEndpoint is not supported. +func (*endpoint) ConnectEndpoint(tcpip.Endpoint) *tcpip.Error { + return tcpip.ErrInvalidEndpointState +} + +// Shutdown closes the read and/or write end of the endpoint connection +// to its peer. +func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { + e.mu.RLock() + defer e.mu.RUnlock() + + if e.state != stateConnected { + return tcpip.ErrNotConnected + } + + if flags&tcpip.ShutdownRead != 0 { + e.rcvMu.Lock() + wasClosed := e.rcvClosed + e.rcvClosed = true + e.rcvMu.Unlock() + + if !wasClosed { + e.waiterQueue.Notify(waiter.EventIn) + } + } + + return nil +} + +// Listen is not supported by UDP, it just fails. +func (*endpoint) Listen(int) *tcpip.Error { + return tcpip.ErrNotSupported +} + +// Accept is not supported by UDP, it just fails. +func (*endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { + return nil, nil, tcpip.ErrNotSupported +} + +func (e *endpoint) registerWithStack(nicid tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, id stack.TransportEndpointID) (stack.TransportEndpointID, *tcpip.Error) { + if id.LocalPort != 0 { + // The endpoint already has a local port, just attempt to + // register it. + err := e.stack.RegisterTransportEndpoint(nicid, netProtos, ProtocolNumber4, id, e) + return id, err + } + + // We need to find a port for the endpoint. + _, err := e.stack.PickEphemeralPort(func(p uint16) (bool, *tcpip.Error) { + id.LocalPort = p + err := e.stack.RegisterTransportEndpoint(nicid, netProtos, ProtocolNumber4, id, e) + switch err { + case nil: + return true, nil + case tcpip.ErrPortInUse: + return false, nil + default: + return false, err + } + }) + + return id, err +} + +func (e *endpoint) bindLocked(addr tcpip.FullAddress, commit func() *tcpip.Error) *tcpip.Error { + // Don't allow binding once endpoint is not in the initial state + // anymore. + if e.state != stateInitial { + return tcpip.ErrInvalidEndpointState + } + + netProto, err := e.checkV4Mapped(&addr, false) + if err != nil { + return err + } + + // Expand netProtos to include v4 and v6 if the caller is binding to a + // wildcard (empty) address, and this is an IPv6 endpoint with v6only + // set to false. + netProtos := []tcpip.NetworkProtocolNumber{netProto} + + if len(addr.Addr) != 0 { + // A local address was specified, verify that it's valid. + if e.stack.CheckLocalAddress(addr.NIC, netProto, addr.Addr) == 0 { + return tcpip.ErrBadLocalAddress + } + } + + id := stack.TransportEndpointID{ + LocalPort: addr.Port, + LocalAddress: addr.Addr, + } + id, err = e.registerWithStack(addr.NIC, netProtos, id) + if err != nil { + return err + } + if commit != nil { + if err := commit(); err != nil { + // Unregister, the commit failed. + e.stack.UnregisterTransportEndpoint(addr.NIC, netProtos, ProtocolNumber4, id) + return err + } + } + + e.id = id + e.regNICID = addr.NIC + + // Mark endpoint as bound. + e.state = stateBound + + e.rcvMu.Lock() + e.rcvReady = true + e.rcvMu.Unlock() + + return nil +} + +// Bind binds the endpoint to a specific local address and port. +// Specifying a NIC is optional. +func (e *endpoint) Bind(addr tcpip.FullAddress, commit func() *tcpip.Error) *tcpip.Error { + e.mu.Lock() + defer e.mu.Unlock() + + err := e.bindLocked(addr, commit) + if err != nil { + return err + } + + e.bindNICID = addr.NIC + e.bindAddr = addr.Addr + + return nil +} + +// GetLocalAddress returns the address to which the endpoint is bound. +func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { + e.mu.RLock() + defer e.mu.RUnlock() + + return tcpip.FullAddress{ + NIC: e.regNICID, + Addr: e.id.LocalAddress, + Port: e.id.LocalPort, + }, nil +} + +// GetRemoteAddress returns the address to which the endpoint is connected. +func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { + e.mu.RLock() + defer e.mu.RUnlock() + + if e.state != stateConnected { + return tcpip.FullAddress{}, tcpip.ErrNotConnected + } + + return tcpip.FullAddress{ + NIC: e.regNICID, + Addr: e.id.RemoteAddress, + Port: e.id.RemotePort, + }, nil +} + +// Readiness returns the current readiness of the endpoint. For example, if +// waiter.EventIn is set, the endpoint is immediately readable. +func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { + // The endpoint is always writable. + result := waiter.EventOut & mask + + // Determine if the endpoint is readable if requested. + if (mask & waiter.EventIn) != 0 { + e.rcvMu.Lock() + if !e.rcvList.Empty() || e.rcvClosed { + result |= waiter.EventIn + } + e.rcvMu.Unlock() + } + + return result +} + +// HandlePacket is called by the stack when new packets arrive to this transport +// endpoint. +func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv *buffer.VectorisedView) { + e.rcvMu.Lock() + + // Drop the packet if our buffer is currently full. + if !e.rcvReady || e.rcvClosed || e.rcvBufSize >= e.rcvBufSizeMax { + e.rcvMu.Unlock() + return + } + + wasEmpty := e.rcvBufSize == 0 + + // Push new packet into receive list and increment the buffer size. + pkt := &pingPacket{ + senderAddress: tcpip.FullAddress{ + NIC: r.NICID(), + Addr: id.RemoteAddress, + }, + } + pkt.data = vv.Clone(pkt.views[:]) + e.rcvList.PushBack(pkt) + e.rcvBufSize += vv.Size() + + if e.rcvTimestamp { + pkt.timestamp = e.stack.NowNanoseconds() + pkt.hasTimestamp = true + } + + e.rcvMu.Unlock() + + // Notify any waiters that there's data to be read now. + if wasEmpty { + e.waiterQueue.Notify(waiter.EventIn) + } +} + +// HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket. +func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, vv *buffer.VectorisedView) { +} diff --git a/pkg/tcpip/transport/ping/endpoint_state.go b/pkg/tcpip/transport/ping/endpoint_state.go new file mode 100644 index 000000000..e1664f049 --- /dev/null +++ b/pkg/tcpip/transport/ping/endpoint_state.go @@ -0,0 +1,61 @@ +// Copyright 2016 The Netstack Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ping + +import ( + "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" +) + +// saveData saves pingPacket.data field. +func (p *pingPacket) saveData() buffer.VectorisedView { + // We cannot save p.data directly as p.data.views may alias to p.views, + // which is not allowed by state framework (in-struct pointer). + return p.data.Clone(nil) +} + +// loadData loads pingPacket.data field. +func (p *pingPacket) loadData(data buffer.VectorisedView) { + // NOTE: We cannot do the p.data = data.Clone(p.views[:]) optimization + // here because data.views is not guaranteed to be loaded by now. Plus, + // data.views will be allocated anyway so there really is little point + // of utilizing p.views for data.views. + p.data = data +} + +// beforeSave is invoked by stateify. +func (e *endpoint) beforeSave() { + // Stop incoming packets from being handled (and mutate endpoint state). + e.rcvMu.Lock() +} + +// afterLoad is invoked by stateify. +func (e *endpoint) afterLoad() { + e.stack = stack.StackFromEnv + + if e.state != stateBound && e.state != stateConnected { + return + } + + var err *tcpip.Error + if e.state == stateConnected { + e.route, err = e.stack.FindRoute(e.regNICID, e.bindAddr, e.id.RemoteAddress, e.netProto) + if err != nil { + panic(*err) + } + + e.id.LocalAddress = e.route.LocalAddress + } else if len(e.id.LocalAddress) != 0 { // stateBound + if e.stack.CheckLocalAddress(e.regNICID, e.netProto, e.id.LocalAddress) == 0 { + panic(tcpip.ErrBadLocalAddress) + } + } + + e.id, err = e.registerWithStack(e.regNICID, []tcpip.NetworkProtocolNumber{e.netProto}, e.id) + if err != nil { + panic(*err) + } +} diff --git a/pkg/tcpip/transport/ping/protocol.go b/pkg/tcpip/transport/ping/protocol.go new file mode 100644 index 000000000..1459b4d60 --- /dev/null +++ b/pkg/tcpip/transport/ping/protocol.go @@ -0,0 +1,106 @@ +// Copyright 2016 The Netstack Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package ping contains the implementation of the ICMP and IPv6-ICMP transport +// protocols for use in ping. To use it in the networking stack, this package +// must be added to the project, and +// activated on the stack by passing ping.ProtocolName (or "ping") and/or +// ping.ProtocolName6 (or "ping6") as one of the transport protocols when +// calling stack.New(). Then endpoints can be created by passing +// ping.ProtocolNumber or ping.ProtocolNumber6 as the transport protocol number +// when calling Stack.NewEndpoint(). +package ping + +import ( + "encoding/binary" + "fmt" + + "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +const ( + // ProtocolName4 is the string representation of the ping protocol name. + ProtocolName4 = "ping4" + + // ProtocolNumber4 is the ICMP protocol number. + ProtocolNumber4 = header.ICMPv4ProtocolNumber + + // ProtocolName6 is the string representation of the ping protocol name. + ProtocolName6 = "ping6" + + // ProtocolNumber6 is the IPv6-ICMP protocol number. + ProtocolNumber6 = header.ICMPv6ProtocolNumber +) + +type protocol struct { + number tcpip.TransportProtocolNumber +} + +// Number returns the ICMP protocol number. +func (p *protocol) Number() tcpip.TransportProtocolNumber { + return p.number +} + +func (p *protocol) netProto() tcpip.NetworkProtocolNumber { + switch p.number { + case ProtocolNumber4: + return header.IPv4ProtocolNumber + case ProtocolNumber6: + return header.IPv6ProtocolNumber + } + panic(fmt.Sprint("unknown protocol number: ", p.number)) +} + +// NewEndpoint creates a new ping endpoint. +func (p *protocol) NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { + if netProto != p.netProto() { + return nil, tcpip.ErrUnknownProtocol + } + return newEndpoint(stack, netProto, waiterQueue), nil +} + +// MinimumPacketSize returns the minimum valid ping packet size. +func (p *protocol) MinimumPacketSize() int { + switch p.number { + case ProtocolNumber4: + return header.ICMPv4EchoMinimumSize + case ProtocolNumber6: + return header.ICMPv6EchoMinimumSize + } + panic(fmt.Sprint("unknown protocol number: ", p.number)) +} + +// ParsePorts returns the source and destination ports stored in the given udp +// packet. +func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) { + return 0, binary.BigEndian.Uint16(v[header.ICMPv4MinimumSize:]), nil +} + +// HandleUnknownDestinationPacket handles packets targeted at this protocol but +// that don't match any existing endpoint. +func (p *protocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEndpointID, *buffer.VectorisedView) bool { + return true +} + +// SetOption implements TransportProtocol.SetOption. +func (p *protocol) SetOption(option interface{}) *tcpip.Error { + return tcpip.ErrUnknownProtocolOption +} + +// Option implements TransportProtocol.Option. +func (p *protocol) Option(option interface{}) *tcpip.Error { + return tcpip.ErrUnknownProtocolOption +} + +func init() { + stack.RegisterTransportProtocolFactory(ProtocolName4, func() stack.TransportProtocol { + return &protocol{ProtocolNumber4} + }) + + // TODO: Support IPv6. +} diff --git a/pkg/tcpip/transport/udp/endpoint_state.go b/pkg/tcpip/transport/udp/endpoint_state.go index 41b98424a..e20d59ca3 100644 --- a/pkg/tcpip/transport/udp/endpoint_state.go +++ b/pkg/tcpip/transport/udp/endpoint_state.go @@ -13,7 +13,7 @@ import ( // saveData saves udpPacket.data field. func (u *udpPacket) saveData() buffer.VectorisedView { - // We canoot save u.data directly as u.data.views may alias to u.views, + // We cannot save u.data directly as u.data.views may alias to u.views, // which is not allowed by state framework (in-struct pointer). return u.data.Clone(nil) } |