summaryrefslogtreecommitdiffhomepage
path: root/pkg/flipcall/endpoint_unsafe.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/flipcall/endpoint_unsafe.go')
-rw-r--r--pkg/flipcall/endpoint_unsafe.go238
1 files changed, 238 insertions, 0 deletions
diff --git a/pkg/flipcall/endpoint_unsafe.go b/pkg/flipcall/endpoint_unsafe.go
new file mode 100644
index 000000000..8319955e0
--- /dev/null
+++ b/pkg/flipcall/endpoint_unsafe.go
@@ -0,0 +1,238 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flipcall
+
+import (
+ "fmt"
+ "math"
+ "reflect"
+ "sync/atomic"
+ "syscall"
+ "unsafe"
+)
+
+// An Endpoint provides the ability to synchronously transfer data and control
+// to a connected peer Endpoint, which may be in another process.
+//
+// Since the Endpoint control transfer model is synchronous, at any given time
+// one Endpoint "has control" (designated the *active* Endpoint), and the other
+// is "waiting for control" (designated the *inactive* Endpoint). Users of the
+// flipcall package arbitrarily designate one Endpoint as initially-active, and
+// the other as initially-inactive; in a client/server protocol, the client
+// Endpoint is usually initially-active (able to send a request) and the server
+// Endpoint is usually initially-inactive (waiting for a request). The
+// initially-active Endpoint writes data to be sent to Endpoint.Data(), and
+// then synchronously transfers control to the inactive Endpoint by calling
+// Endpoint.SendRecv(), becoming the inactive Endpoint in the process. The
+// initially-inactive Endpoint waits for control by calling
+// Endpoint.RecvFirst(); receiving control causes it to become the active
+// Endpoint. After this, the protocol is symmetric: the active Endpoint reads
+// data sent by the peer by reading from Endpoint.Data(), writes data to be
+// sent to the peer into Endpoint.Data(), and then calls Endpoint.SendRecv() to
+// exchange roles with the peer, which blocks until the peer has done the same.
+type Endpoint struct {
+ // shutdown is non-zero if Endpoint.Shutdown() has been called. shutdown is
+ // accessed using atomic memory operations.
+ shutdown uint32
+
+ // dataCap is the size of the datagram part of the packet window in bytes.
+ // dataCap is immutable.
+ dataCap uint32
+
+ // packet is the beginning of the packet window. packet is immutable.
+ packet unsafe.Pointer
+
+ ctrl endpointControlState
+}
+
+// Init must be called on zero-value Endpoints before first use. If it
+// succeeds, Destroy() must be called once the Endpoint is no longer in use.
+//
+// ctrlMode specifies how connected Endpoints will exchange control. Both
+// connected Endpoints must specify the same value for ctrlMode.
+//
+// pwd represents the packet window used to exchange data with the peer
+// Endpoint. FD may differ between Endpoints if they are in different
+// processes, but must represent the same file. The packet window must
+// initially be filled with zero bytes.
+func (ep *Endpoint) Init(ctrlMode ControlMode, pwd PacketWindowDescriptor) error {
+ if pwd.Length < pageSize {
+ return fmt.Errorf("packet window size (%d) less than minimum (%d)", pwd.Length, pageSize)
+ }
+ if pwd.Length > math.MaxUint32 {
+ return fmt.Errorf("packet window size (%d) exceeds maximum (%d)", pwd.Length, math.MaxUint32)
+ }
+ m, _, e := syscall.Syscall6(syscall.SYS_MMAP, 0, uintptr(pwd.Length), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED, uintptr(pwd.FD), uintptr(pwd.Offset))
+ if e != 0 {
+ return fmt.Errorf("failed to mmap packet window: %v", e)
+ }
+ ep.dataCap = uint32(pwd.Length) - uint32(packetHeaderBytes)
+ ep.packet = (unsafe.Pointer)(m)
+ if err := ep.initControlState(ctrlMode); err != nil {
+ ep.unmapPacket()
+ return err
+ }
+ return nil
+}
+
+// NewEndpoint is a convenience function that returns an initialized Endpoint
+// allocated on the heap.
+func NewEndpoint(ctrlMode ControlMode, pwd PacketWindowDescriptor) (*Endpoint, error) {
+ var ep Endpoint
+ if err := ep.Init(ctrlMode, pwd); err != nil {
+ return nil, err
+ }
+ return &ep, nil
+}
+
+func (ep *Endpoint) unmapPacket() {
+ syscall.Syscall(syscall.SYS_MUNMAP, uintptr(ep.packet), uintptr(ep.dataCap)+packetHeaderBytes, 0)
+ ep.dataCap = 0
+ ep.packet = nil
+}
+
+// Destroy releases resources owned by ep. No other Endpoint methods may be
+// called after Destroy.
+func (ep *Endpoint) Destroy() {
+ ep.unmapPacket()
+}
+
+// Packets consist of an 8-byte header followed by an arbitrarily-sized
+// datagram. The header consists of:
+//
+// - A 4-byte native-endian sequence number, which is incremented by the active
+// Endpoint after it finishes writing to the packet window. The sequence number
+// is needed to handle spurious wakeups.
+//
+// - A 4-byte native-endian datagram length in bytes.
+const (
+ sizeofUint32 = unsafe.Sizeof(uint32(0))
+ packetHeaderBytes = 2 * sizeofUint32
+)
+
+func (ep *Endpoint) seq() *uint32 {
+ return (*uint32)(ep.packet)
+}
+
+func (ep *Endpoint) dataLen() *uint32 {
+ return (*uint32)((unsafe.Pointer)(uintptr(ep.packet) + sizeofUint32))
+}
+
+// DataCap returns the maximum datagram size supported by ep in bytes.
+func (ep *Endpoint) DataCap() uint32 {
+ return ep.dataCap
+}
+
+func (ep *Endpoint) data() unsafe.Pointer {
+ return unsafe.Pointer(uintptr(ep.packet) + packetHeaderBytes)
+}
+
+// Data returns the datagram part of ep's packet window as a byte slice.
+//
+// Note that the packet window is shared with the potentially-untrusted peer
+// Endpoint, which may concurrently mutate the contents of the packet window.
+// Thus:
+//
+// - Readers must not assume that two reads of the same byte in Data() will
+// return the same result. In other words, readers should read any given byte
+// in Data() at most once.
+//
+// - Writers must not assume that they will read back the same data that they
+// have written. In other words, writers should avoid reading from Data() at
+// all.
+func (ep *Endpoint) Data() []byte {
+ var bs []byte
+ bsReflect := (*reflect.SliceHeader)((unsafe.Pointer)(&bs))
+ bsReflect.Data = uintptr(ep.data())
+ bsReflect.Len = int(ep.DataCap())
+ bsReflect.Cap = bsReflect.Len
+ return bs
+}
+
+// SendRecv transfers control to the peer Endpoint, causing its call to
+// Endpoint.SendRecv() or Endpoint.RecvFirst() to return with the given
+// datagram length, then blocks until the peer Endpoint calls
+// Endpoint.SendRecv() or Endpoint.SendLast().
+//
+// Preconditions: No previous call to ep.SendRecv() or ep.RecvFirst() has
+// returned an error. ep.SendLast() has never been called.
+func (ep *Endpoint) SendRecv(dataLen uint32) (uint32, error) {
+ dataCap := ep.DataCap()
+ if dataLen > dataCap {
+ return 0, fmt.Errorf("can't send packet with datagram length %d (maximum %d)", dataLen, dataCap)
+ }
+ atomic.StoreUint32(ep.dataLen(), dataLen)
+ if err := ep.doRoundTrip(); err != nil {
+ return 0, err
+ }
+ recvDataLen := atomic.LoadUint32(ep.dataLen())
+ if recvDataLen > dataCap {
+ return 0, fmt.Errorf("received packet with invalid datagram length %d (maximum %d)", recvDataLen, dataCap)
+ }
+ return recvDataLen, nil
+}
+
+// RecvFirst blocks until the peer Endpoint calls Endpoint.SendRecv(), then
+// returns the datagram length specified by that call.
+//
+// Preconditions: ep.SendRecv(), ep.RecvFirst(), and ep.SendLast() have never
+// been called.
+func (ep *Endpoint) RecvFirst() (uint32, error) {
+ if err := ep.doWaitFirst(); err != nil {
+ return 0, err
+ }
+ recvDataLen := atomic.LoadUint32(ep.dataLen())
+ if dataCap := ep.DataCap(); recvDataLen > dataCap {
+ return 0, fmt.Errorf("received packet with invalid datagram length %d (maximum %d)", recvDataLen, dataCap)
+ }
+ return recvDataLen, nil
+}
+
+// SendLast causes the peer Endpoint's call to Endpoint.SendRecv() or
+// Endpoint.RecvFirst() to return with the given datagram length.
+//
+// Preconditions: No previous call to ep.SendRecv() or ep.RecvFirst() has
+// returned an error. ep.SendLast() has never been called.
+func (ep *Endpoint) SendLast(dataLen uint32) error {
+ dataCap := ep.DataCap()
+ if dataLen > dataCap {
+ return fmt.Errorf("can't send packet with datagram length %d (maximum %d)", dataLen, dataCap)
+ }
+ atomic.StoreUint32(ep.dataLen(), dataLen)
+ if err := ep.doNotifyLast(); err != nil {
+ return err
+ }
+ return nil
+}
+
+// Shutdown causes concurrent and future calls to ep.SendRecv(),
+// ep.RecvFirst(), and ep.SendLast() to unblock and return errors. It does not
+// wait for concurrent calls to return.
+func (ep *Endpoint) Shutdown() {
+ if atomic.SwapUint32(&ep.shutdown, 1) == 0 {
+ ep.interruptForShutdown()
+ }
+}
+
+func (ep *Endpoint) isShutdown() bool {
+ return atomic.LoadUint32(&ep.shutdown) != 0
+}
+
+type endpointShutdownError struct{}
+
+// Error implements error.Error.
+func (endpointShutdownError) Error() string {
+ return "Endpoint.Shutdown() has been called"
+}