diff options
author | Michael Pratt <mpratt@google.com> | 2018-10-10 14:09:24 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-10-10 14:10:17 -0700 |
commit | ddb34b3690c07f6c8efe2b96f89166145c4a7d3c (patch) | |
tree | 781361c955c356d26b484f572bc4ad41a250ab72 /pkg/tcpip/transport/queue | |
parent | b78552d30e0af4122710e01bc86cbde6bb412686 (diff) |
Enforce message size limits and avoid host calls with too many iovecs
Currently, in the face of FileMem fragmentation and a large sendmsg or
recvmsg call, host sockets may pass > 1024 iovecs to the host, which
will immediately cause the host to return EMSGSIZE.
When we detect this case, use a single intermediate buffer to pass to
the kernel, copying to/from the src/dst buffer.
To avoid creating unbounded intermediate buffers, enforce message size
checks and truncation w.r.t. the send buffer size. The same
functionality is added to netstack unix sockets for feature parity.
PiperOrigin-RevId: 216590198
Change-Id: I719a32e71c7b1098d5097f35e6daf7dd5190eff7
Diffstat (limited to 'pkg/tcpip/transport/queue')
-rw-r--r-- | pkg/tcpip/transport/queue/queue.go | 69 |
1 files changed, 58 insertions, 11 deletions
diff --git a/pkg/tcpip/transport/queue/queue.go b/pkg/tcpip/transport/queue/queue.go index eb9ee8a3f..b3d2ea68b 100644 --- a/pkg/tcpip/transport/queue/queue.go +++ b/pkg/tcpip/transport/queue/queue.go @@ -24,12 +24,23 @@ import ( "gvisor.googlesource.com/gvisor/pkg/waiter" ) -// Entry implements Linker interface and has both Length and Release methods. +// Entry implements Linker interface and has additional required methods. type Entry interface { ilist.Linker + + // Length returns the number of bytes stored in the entry. Length() int64 + + // Release releases any resources held by the entry. Release() + + // Peek returns a copy of the entry. It must be Released separately. Peek() Entry + + // Truncate reduces the number of bytes stored in the entry to n bytes. + // + // Preconditions: n <= Length(). + Truncate(n int64) } // Queue is a buffer queue. @@ -52,7 +63,7 @@ func New(ReaderQueue *waiter.Queue, WriterQueue *waiter.Queue, limit int64) *Que } // Close closes q for reading and writing. It is immediately not writable and -// will become unreadble will no more data is pending. +// will become unreadable when no more data is pending. // // Both the read and write queues must be notified after closing: // q.ReaderQueue.Notify(waiter.EventIn) @@ -86,38 +97,74 @@ func (q *Queue) IsReadable() bool { return q.closed || q.dataList.Front() != nil } +// bufWritable returns true if there is space for writing. +// +// N.B. Linux only considers a unix socket "writable" if >75% of the buffer is +// free. +// +// See net/unix/af_unix.c:unix_writeable. +func (q *Queue) bufWritable() bool { + return 4*q.used < q.limit +} + // IsWritable determines if q is currently writable. func (q *Queue) IsWritable() bool { q.mu.Lock() defer q.mu.Unlock() - return q.closed || q.used < q.limit + return q.closed || q.bufWritable() } // Enqueue adds an entry to the data queue if room is available. // +// If truncate is true, Enqueue may truncate the message beforing enqueuing it. +// Otherwise, the entire message must fit. If n < e.Length(), err indicates why. +// // If notify is true, ReaderQueue.Notify must be called: // q.ReaderQueue.Notify(waiter.EventIn) -func (q *Queue) Enqueue(e Entry) (notify bool, err *tcpip.Error) { +func (q *Queue) Enqueue(e Entry, truncate bool) (l int64, notify bool, err *tcpip.Error) { q.mu.Lock() if q.closed { q.mu.Unlock() - return false, tcpip.ErrClosedForSend + return 0, false, tcpip.ErrClosedForSend + } + + free := q.limit - q.used + + l = e.Length() + + if l > free && truncate { + if free == 0 { + // Message can't fit right now. + q.mu.Unlock() + return 0, false, tcpip.ErrWouldBlock + } + + e.Truncate(free) + l = e.Length() + err = tcpip.ErrWouldBlock + } + + if l > q.limit { + // Message is too big to ever fit. + q.mu.Unlock() + return 0, false, tcpip.ErrMessageTooLong } - if q.used >= q.limit { + if l > free { + // Message can't fit right now. q.mu.Unlock() - return false, tcpip.ErrWouldBlock + return 0, false, tcpip.ErrWouldBlock } notify = q.dataList.Front() == nil - q.used += e.Length() + q.used += l q.dataList.PushBack(e) q.mu.Unlock() - return notify, nil + return l, notify, err } // Dequeue removes the first entry in the data queue, if one exists. @@ -137,13 +184,13 @@ func (q *Queue) Dequeue() (e Entry, notify bool, err *tcpip.Error) { return nil, false, err } - notify = q.used >= q.limit + notify = !q.bufWritable() e = q.dataList.Front().(Entry) q.dataList.Remove(e) q.used -= e.Length() - notify = notify && q.used < q.limit + notify = notify && q.bufWritable() q.mu.Unlock() |