diff options
Diffstat (limited to 'pkg/tcpip/adapters/gonet/gonet.go')
-rwxr-xr-x | pkg/tcpip/adapters/gonet/gonet.go | 722 |
1 files changed, 722 insertions, 0 deletions
diff --git a/pkg/tcpip/adapters/gonet/gonet.go b/pkg/tcpip/adapters/gonet/gonet.go new file mode 100755 index 000000000..cd6ce930a --- /dev/null +++ b/pkg/tcpip/adapters/gonet/gonet.go @@ -0,0 +1,722 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package gonet provides a Go net package compatible wrapper for a tcpip stack. +package gonet + +import ( + "context" + "errors" + "io" + "net" + "sync" + "time" + + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/waiter" +) + +var ( + errCanceled = errors.New("operation canceled") + errWouldBlock = errors.New("operation would block") +) + +// timeoutError is how the net package reports timeouts. +type timeoutError struct{} + +func (e *timeoutError) Error() string { return "i/o timeout" } +func (e *timeoutError) Timeout() bool { return true } +func (e *timeoutError) Temporary() bool { return true } + +// A Listener is a wrapper around a tcpip endpoint that implements +// net.Listener. +type Listener struct { + stack *stack.Stack + ep tcpip.Endpoint + wq *waiter.Queue + cancel chan struct{} +} + +// NewListener creates a new Listener. +func NewListener(s *stack.Stack, addr tcpip.FullAddress, network tcpip.NetworkProtocolNumber) (*Listener, error) { + // Create TCP endpoint, bind it, then start listening. + var wq waiter.Queue + ep, err := s.NewEndpoint(tcp.ProtocolNumber, network, &wq) + if err != nil { + return nil, errors.New(err.String()) + } + + if err := ep.Bind(addr); err != nil { + ep.Close() + return nil, &net.OpError{ + Op: "bind", + Net: "tcp", + Addr: fullToTCPAddr(addr), + Err: errors.New(err.String()), + } + } + + if err := ep.Listen(10); err != nil { + ep.Close() + return nil, &net.OpError{ + Op: "listen", + Net: "tcp", + Addr: fullToTCPAddr(addr), + Err: errors.New(err.String()), + } + } + + return &Listener{ + stack: s, + ep: ep, + wq: &wq, + cancel: make(chan struct{}), + }, nil +} + +// Close implements net.Listener.Close. +func (l *Listener) Close() error { + l.ep.Close() + return nil +} + +// Shutdown stops the HTTP server. +func (l *Listener) Shutdown() { + l.ep.Shutdown(tcpip.ShutdownWrite | tcpip.ShutdownRead) + close(l.cancel) // broadcast cancellation +} + +// Addr implements net.Listener.Addr. +func (l *Listener) Addr() net.Addr { + a, err := l.ep.GetLocalAddress() + if err != nil { + return nil + } + return fullToTCPAddr(a) +} + +type deadlineTimer struct { + // mu protects the fields below. + mu sync.Mutex + + readTimer *time.Timer + readCancelCh chan struct{} + writeTimer *time.Timer + writeCancelCh chan struct{} +} + +func (d *deadlineTimer) init() { + d.readCancelCh = make(chan struct{}) + d.writeCancelCh = make(chan struct{}) +} + +func (d *deadlineTimer) readCancel() <-chan struct{} { + d.mu.Lock() + c := d.readCancelCh + d.mu.Unlock() + return c +} +func (d *deadlineTimer) writeCancel() <-chan struct{} { + d.mu.Lock() + c := d.writeCancelCh + d.mu.Unlock() + return c +} + +// setDeadline contains the shared logic for setting a deadline. +// +// cancelCh and timer must be pointers to deadlineTimer.readCancelCh and +// deadlineTimer.readTimer or deadlineTimer.writeCancelCh and +// deadlineTimer.writeTimer. +// +// setDeadline must only be called while holding d.mu. +func (d *deadlineTimer) setDeadline(cancelCh *chan struct{}, timer **time.Timer, t time.Time) { + if *timer != nil && !(*timer).Stop() { + *cancelCh = make(chan struct{}) + } + + // Create a new channel if we already closed it due to setting an already + // expired time. We won't race with the timer because we already handled + // that above. + select { + case <-*cancelCh: + *cancelCh = make(chan struct{}) + default: + } + + // "A zero value for t means I/O operations will not time out." + // - net.Conn.SetDeadline + if t.IsZero() { + return + } + + timeout := t.Sub(time.Now()) + if timeout <= 0 { + close(*cancelCh) + return + } + + // Timer.Stop returns whether or not the AfterFunc has started, but + // does not indicate whether or not it has completed. Make a copy of + // the cancel channel to prevent this code from racing with the next + // call of setDeadline replacing *cancelCh. + ch := *cancelCh + *timer = time.AfterFunc(timeout, func() { + close(ch) + }) +} + +// SetReadDeadline implements net.Conn.SetReadDeadline and +// net.PacketConn.SetReadDeadline. +func (d *deadlineTimer) SetReadDeadline(t time.Time) error { + d.mu.Lock() + d.setDeadline(&d.readCancelCh, &d.readTimer, t) + d.mu.Unlock() + return nil +} + +// SetWriteDeadline implements net.Conn.SetWriteDeadline and +// net.PacketConn.SetWriteDeadline. +func (d *deadlineTimer) SetWriteDeadline(t time.Time) error { + d.mu.Lock() + d.setDeadline(&d.writeCancelCh, &d.writeTimer, t) + d.mu.Unlock() + return nil +} + +// SetDeadline implements net.Conn.SetDeadline and net.PacketConn.SetDeadline. +func (d *deadlineTimer) SetDeadline(t time.Time) error { + d.mu.Lock() + d.setDeadline(&d.readCancelCh, &d.readTimer, t) + d.setDeadline(&d.writeCancelCh, &d.writeTimer, t) + d.mu.Unlock() + return nil +} + +// A Conn is a wrapper around a tcpip.Endpoint that implements the net.Conn +// interface. +type Conn struct { + deadlineTimer + + wq *waiter.Queue + ep tcpip.Endpoint + + // readMu serializes reads and implicitly protects read. + // + // Lock ordering: + // If both readMu and deadlineTimer.mu are to be used in a single + // request, readMu must be acquired before deadlineTimer.mu. + readMu sync.Mutex + + // read contains bytes that have been read from the endpoint, + // but haven't yet been returned. + read buffer.View +} + +// NewConn creates a new Conn. +func NewConn(wq *waiter.Queue, ep tcpip.Endpoint) *Conn { + c := &Conn{ + wq: wq, + ep: ep, + } + c.deadlineTimer.init() + return c +} + +// Accept implements net.Conn.Accept. +func (l *Listener) Accept() (net.Conn, error) { + n, wq, err := l.ep.Accept() + + if err == tcpip.ErrWouldBlock { + // Create wait queue entry that notifies a channel. + waitEntry, notifyCh := waiter.NewChannelEntry(nil) + l.wq.EventRegister(&waitEntry, waiter.EventIn) + defer l.wq.EventUnregister(&waitEntry) + + for { + n, wq, err = l.ep.Accept() + + if err != tcpip.ErrWouldBlock { + break + } + + select { + case <-l.cancel: + return nil, errCanceled + case <-notifyCh: + } + } + } + + if err != nil { + return nil, &net.OpError{ + Op: "accept", + Net: "tcp", + Addr: l.Addr(), + Err: errors.New(err.String()), + } + } + + return NewConn(wq, n), nil +} + +type opErrorer interface { + newOpError(op string, err error) *net.OpError +} + +// commonRead implements the common logic between net.Conn.Read and +// net.PacketConn.ReadFrom. +func commonRead(ep tcpip.Endpoint, wq *waiter.Queue, deadline <-chan struct{}, addr *tcpip.FullAddress, errorer opErrorer, dontWait bool) ([]byte, error) { + select { + case <-deadline: + return nil, errorer.newOpError("read", &timeoutError{}) + default: + } + + read, _, err := ep.Read(addr) + + if err == tcpip.ErrWouldBlock { + if dontWait { + return nil, errWouldBlock + } + // Create wait queue entry that notifies a channel. + waitEntry, notifyCh := waiter.NewChannelEntry(nil) + wq.EventRegister(&waitEntry, waiter.EventIn) + defer wq.EventUnregister(&waitEntry) + for { + read, _, err = ep.Read(addr) + if err != tcpip.ErrWouldBlock { + break + } + select { + case <-deadline: + return nil, errorer.newOpError("read", &timeoutError{}) + case <-notifyCh: + } + } + } + + if err == tcpip.ErrClosedForReceive { + return nil, io.EOF + } + + if err != nil { + return nil, errorer.newOpError("read", errors.New(err.String())) + } + + return read, nil +} + +// Read implements net.Conn.Read. +func (c *Conn) Read(b []byte) (int, error) { + c.readMu.Lock() + defer c.readMu.Unlock() + + deadline := c.readCancel() + + numRead := 0 + for numRead != len(b) { + if len(c.read) == 0 { + var err error + c.read, err = commonRead(c.ep, c.wq, deadline, nil, c, numRead != 0) + if err != nil { + if numRead != 0 { + return numRead, nil + } + return numRead, err + } + } + n := copy(b[numRead:], c.read) + c.read.TrimFront(n) + numRead += n + if len(c.read) == 0 { + c.read = nil + } + } + return numRead, nil +} + +// Write implements net.Conn.Write. +func (c *Conn) Write(b []byte) (int, error) { + deadline := c.writeCancel() + + // Check if deadlineTimer has already expired. + select { + case <-deadline: + return 0, c.newOpError("write", &timeoutError{}) + default: + } + + v := buffer.NewViewFromBytes(b) + + // We must handle two soft failure conditions simultaneously: + // 1. Write may write nothing and return tcpip.ErrWouldBlock. + // If this happens, we need to register for notifications if we have + // not already and wait to try again. + // 2. Write may write fewer than the full number of bytes and return + // without error. In this case we need to try writing the remaining + // bytes again. I do not need to register for notifications. + // + // What is more, these two soft failure conditions can be interspersed. + // There is no guarantee that all of the condition #1s will occur before + // all of the condition #2s or visa-versa. + var ( + err *tcpip.Error + nbytes int + reg bool + notifyCh chan struct{} + ) + for nbytes < len(b) && (err == tcpip.ErrWouldBlock || err == nil) { + if err == tcpip.ErrWouldBlock { + if !reg { + // Only register once. + reg = true + + // Create wait queue entry that notifies a channel. + var waitEntry waiter.Entry + waitEntry, notifyCh = waiter.NewChannelEntry(nil) + c.wq.EventRegister(&waitEntry, waiter.EventOut) + defer c.wq.EventUnregister(&waitEntry) + } else { + // Don't wait immediately after registration in case more data + // became available between when we last checked and when we setup + // the notification. + select { + case <-deadline: + return nbytes, c.newOpError("write", &timeoutError{}) + case <-notifyCh: + } + } + } + + var n int64 + var resCh <-chan struct{} + n, resCh, err = c.ep.Write(tcpip.SlicePayload(v), tcpip.WriteOptions{}) + nbytes += int(n) + v.TrimFront(int(n)) + + if resCh != nil { + select { + case <-deadline: + return nbytes, c.newOpError("write", &timeoutError{}) + case <-resCh: + } + + n, _, err = c.ep.Write(tcpip.SlicePayload(v), tcpip.WriteOptions{}) + nbytes += int(n) + v.TrimFront(int(n)) + } + } + + if err == nil { + return nbytes, nil + } + + return nbytes, c.newOpError("write", errors.New(err.String())) +} + +// Close implements net.Conn.Close. +func (c *Conn) Close() error { + c.ep.Close() + return nil +} + +// CloseRead shuts down the reading side of the TCP connection. Most callers +// should just use Close. +// +// A TCP Half-Close is performed the same as CloseRead for *net.TCPConn. +func (c *Conn) CloseRead() error { + if terr := c.ep.Shutdown(tcpip.ShutdownRead); terr != nil { + return c.newOpError("close", errors.New(terr.String())) + } + return nil +} + +// CloseWrite shuts down the writing side of the TCP connection. Most callers +// should just use Close. +// +// A TCP Half-Close is performed the same as CloseWrite for *net.TCPConn. +func (c *Conn) CloseWrite() error { + if terr := c.ep.Shutdown(tcpip.ShutdownWrite); terr != nil { + return c.newOpError("close", errors.New(terr.String())) + } + return nil +} + +// LocalAddr implements net.Conn.LocalAddr. +func (c *Conn) LocalAddr() net.Addr { + a, err := c.ep.GetLocalAddress() + if err != nil { + return nil + } + return fullToTCPAddr(a) +} + +// RemoteAddr implements net.Conn.RemoteAddr. +func (c *Conn) RemoteAddr() net.Addr { + a, err := c.ep.GetRemoteAddress() + if err != nil { + return nil + } + return fullToTCPAddr(a) +} + +func (c *Conn) newOpError(op string, err error) *net.OpError { + return &net.OpError{ + Op: op, + Net: "tcp", + Source: c.LocalAddr(), + Addr: c.RemoteAddr(), + Err: err, + } +} + +func fullToTCPAddr(addr tcpip.FullAddress) *net.TCPAddr { + return &net.TCPAddr{IP: net.IP(addr.Addr), Port: int(addr.Port)} +} + +func fullToUDPAddr(addr tcpip.FullAddress) *net.UDPAddr { + return &net.UDPAddr{IP: net.IP(addr.Addr), Port: int(addr.Port)} +} + +// DialTCP creates a new TCP Conn connected to the specified address. +func DialTCP(s *stack.Stack, addr tcpip.FullAddress, network tcpip.NetworkProtocolNumber) (*Conn, error) { + return DialContextTCP(context.Background(), s, addr, network) +} + +// DialContextTCP creates a new TCP Conn connected to the specified address +// with the option of adding cancellation and timeouts. +func DialContextTCP(ctx context.Context, s *stack.Stack, addr tcpip.FullAddress, network tcpip.NetworkProtocolNumber) (*Conn, error) { + // Create TCP endpoint, then connect. + var wq waiter.Queue + ep, err := s.NewEndpoint(tcp.ProtocolNumber, network, &wq) + if err != nil { + return nil, errors.New(err.String()) + } + + // Create wait queue entry that notifies a channel. + // + // We do this unconditionally as Connect will always return an error. + waitEntry, notifyCh := waiter.NewChannelEntry(nil) + wq.EventRegister(&waitEntry, waiter.EventOut) + defer wq.EventUnregister(&waitEntry) + + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + + err = ep.Connect(addr) + if err == tcpip.ErrConnectStarted { + select { + case <-ctx.Done(): + ep.Close() + return nil, ctx.Err() + case <-notifyCh: + } + + err = ep.GetSockOpt(tcpip.ErrorOption{}) + } + if err != nil { + ep.Close() + return nil, &net.OpError{ + Op: "connect", + Net: "tcp", + Addr: fullToTCPAddr(addr), + Err: errors.New(err.String()), + } + } + + return NewConn(&wq, ep), nil +} + +// A PacketConn is a wrapper around a tcpip endpoint that implements +// net.PacketConn. +type PacketConn struct { + deadlineTimer + + stack *stack.Stack + ep tcpip.Endpoint + wq *waiter.Queue +} + +// DialUDP creates a new PacketConn. +// +// If laddr is nil, a local address is automatically chosen. +// +// If raddr is nil, the PacketConn is left unconnected. +func DialUDP(s *stack.Stack, laddr, raddr *tcpip.FullAddress, network tcpip.NetworkProtocolNumber) (*PacketConn, error) { + var wq waiter.Queue + ep, err := s.NewEndpoint(udp.ProtocolNumber, network, &wq) + if err != nil { + return nil, errors.New(err.String()) + } + + if laddr != nil { + if err := ep.Bind(*laddr); err != nil { + ep.Close() + return nil, &net.OpError{ + Op: "bind", + Net: "udp", + Addr: fullToUDPAddr(*laddr), + Err: errors.New(err.String()), + } + } + } + + c := PacketConn{ + stack: s, + ep: ep, + wq: &wq, + } + c.deadlineTimer.init() + + if raddr != nil { + if err := c.ep.Connect(*raddr); err != nil { + c.ep.Close() + return nil, &net.OpError{ + Op: "connect", + Net: "udp", + Addr: fullToUDPAddr(*raddr), + Err: errors.New(err.String()), + } + } + } + + return &c, nil +} + +func (c *PacketConn) newOpError(op string, err error) *net.OpError { + return c.newRemoteOpError(op, nil, err) +} + +func (c *PacketConn) newRemoteOpError(op string, remote net.Addr, err error) *net.OpError { + return &net.OpError{ + Op: op, + Net: "udp", + Source: c.LocalAddr(), + Addr: remote, + Err: err, + } +} + +// RemoteAddr implements net.Conn.RemoteAddr. +func (c *PacketConn) RemoteAddr() net.Addr { + a, err := c.ep.GetRemoteAddress() + if err != nil { + return nil + } + return fullToTCPAddr(a) +} + +// Read implements net.Conn.Read +func (c *PacketConn) Read(b []byte) (int, error) { + bytesRead, _, err := c.ReadFrom(b) + return bytesRead, err +} + +// ReadFrom implements net.PacketConn.ReadFrom. +func (c *PacketConn) ReadFrom(b []byte) (int, net.Addr, error) { + deadline := c.readCancel() + + var addr tcpip.FullAddress + read, err := commonRead(c.ep, c.wq, deadline, &addr, c, false) + if err != nil { + return 0, nil, err + } + + return copy(b, read), fullToUDPAddr(addr), nil +} + +func (c *PacketConn) Write(b []byte) (int, error) { + return c.WriteTo(b, nil) +} + +// WriteTo implements net.PacketConn.WriteTo. +func (c *PacketConn) WriteTo(b []byte, addr net.Addr) (int, error) { + deadline := c.writeCancel() + + // Check if deadline has already expired. + select { + case <-deadline: + return 0, c.newRemoteOpError("write", addr, &timeoutError{}) + default: + } + + // If we're being called by Write, there is no addr + wopts := tcpip.WriteOptions{} + if addr != nil { + ua := addr.(*net.UDPAddr) + wopts.To = &tcpip.FullAddress{Addr: tcpip.Address(ua.IP), Port: uint16(ua.Port)} + } + + v := buffer.NewView(len(b)) + copy(v, b) + + n, resCh, err := c.ep.Write(tcpip.SlicePayload(v), wopts) + if resCh != nil { + select { + case <-deadline: + return int(n), c.newRemoteOpError("write", addr, &timeoutError{}) + case <-resCh: + } + + n, _, err = c.ep.Write(tcpip.SlicePayload(v), wopts) + } + + if err == tcpip.ErrWouldBlock { + // Create wait queue entry that notifies a channel. + waitEntry, notifyCh := waiter.NewChannelEntry(nil) + c.wq.EventRegister(&waitEntry, waiter.EventOut) + defer c.wq.EventUnregister(&waitEntry) + for { + select { + case <-deadline: + return int(n), c.newRemoteOpError("write", addr, &timeoutError{}) + case <-notifyCh: + } + + n, _, err = c.ep.Write(tcpip.SlicePayload(v), wopts) + if err != tcpip.ErrWouldBlock { + break + } + } + } + + if err == nil { + return int(n), nil + } + + return int(n), c.newRemoteOpError("write", addr, errors.New(err.String())) +} + +// Close implements net.PacketConn.Close. +func (c *PacketConn) Close() error { + c.ep.Close() + return nil +} + +// LocalAddr implements net.PacketConn.LocalAddr. +func (c *PacketConn) LocalAddr() net.Addr { + a, err := c.ep.GetLocalAddress() + if err != nil { + return nil + } + return fullToUDPAddr(a) +} |