diff options
author | Bhasker Hariharan <bhaskerh@google.com> | 2019-08-14 14:33:11 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2019-08-14 14:34:27 -0700 |
commit | 570fb1db6b4e01be37386a379fea4d63e5a3cdc2 (patch) | |
tree | 44cd127ffaa546b3f0c9bc1b642fe8980e69bbd9 /pkg/sentry | |
parent | cee044c2ab009c9faae154e1751eef93430fc141 (diff) |
Improve SendMsg performance.
SendMsg before this change would copy all the data over into a
new slice even if the underlying socket could only accept a
small amount of data. This is really inefficient with non-blocking
sockets and under high throughput where large writes could get
ErrWouldBlock or if there was say a timeout associated with the sendmsg()
syscall.
With this change we delay copying bytes in till they are needed and only
copy what can be potentially sent/held in the socket buffer. Reducing
the need to repeatedly copy data over.
Also a minor fix to change state FIN-WAIT-1 when shutdown(..., SHUT_WR) is called
instead of when we transmit the actual FIN. Otherwise the socket could remain in
CONNECTED state even though the user has called shutdown() on the socket.
Updates #627
PiperOrigin-RevId: 263430505
Diffstat (limited to 'pkg/sentry')
-rw-r--r-- | pkg/sentry/socket/epsocket/epsocket.go | 27 |
1 files changed, 13 insertions, 14 deletions
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go index 8cb5c823f..0f2cd05fc 100644 --- a/pkg/sentry/socket/epsocket/epsocket.go +++ b/pkg/sentry/socket/epsocket/epsocket.go @@ -429,6 +429,11 @@ func (i *ioSequencePayload) Size() int { return int(i.src.NumBytes()) } +// DropFirst drops the first n bytes from underlying src. +func (i *ioSequencePayload) DropFirst(n int) { + i.src = i.src.DropFirst(int(n)) +} + // Write implements fs.FileOperations.Write. func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) { f := &ioSequencePayload{ctx: ctx, src: src} @@ -2026,28 +2031,22 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to [] addr = &addrBuf } - v := buffer.NewView(int(src.NumBytes())) - - // Copy all the data into the buffer. - if _, err := src.CopyIn(t, v); err != nil { - return 0, syserr.FromError(err) - } - opts := tcpip.WriteOptions{ To: addr, More: flags&linux.MSG_MORE != 0, EndOfRecord: flags&linux.MSG_EOR != 0, } - n, resCh, err := s.Endpoint.Write(tcpip.SlicePayload(v), opts) + v := &ioSequencePayload{t, src} + n, resCh, err := s.Endpoint.Write(v, opts) if resCh != nil { if err := t.Block(resCh); err != nil { return 0, syserr.FromError(err) } - n, _, err = s.Endpoint.Write(tcpip.SlicePayload(v), opts) + n, _, err = s.Endpoint.Write(v, opts) } dontWait := flags&linux.MSG_DONTWAIT != 0 - if err == nil && (n >= uintptr(len(v)) || dontWait) { + if err == nil && (n >= uintptr(v.Size()) || dontWait) { // Complete write. return int(n), nil } @@ -2061,18 +2060,18 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to [] s.EventRegister(&e, waiter.EventOut) defer s.EventUnregister(&e) - v.TrimFront(int(n)) + v.DropFirst(int(n)) total := n for { - n, _, err = s.Endpoint.Write(tcpip.SlicePayload(v), opts) - v.TrimFront(int(n)) + n, _, err = s.Endpoint.Write(v, opts) + v.DropFirst(int(n)) total += n if err != nil && err != tcpip.ErrWouldBlock && total == 0 { return 0, syserr.TranslateNetstackError(err) } - if err == nil && len(v) == 0 || err != nil && err != tcpip.ErrWouldBlock { + if err == nil && v.Size() == 0 || err != nil && err != tcpip.ErrWouldBlock { return int(total), nil } |