diff options
Diffstat (limited to 'pkg/p9')
-rw-r--r-- | pkg/p9/buffer.go | 263 | ||||
-rw-r--r-- | pkg/p9/client.go | 307 | ||||
-rw-r--r-- | pkg/p9/client_file.go | 632 | ||||
-rw-r--r-- | pkg/p9/file.go | 256 | ||||
-rw-r--r-- | pkg/p9/handlers.go | 1291 | ||||
-rw-r--r-- | pkg/p9/messages.go | 2359 | ||||
-rw-r--r-- | pkg/p9/p9.go | 1141 | ||||
-rwxr-xr-x | pkg/p9/p9_state_autogen.go | 4 | ||||
-rw-r--r-- | pkg/p9/path_tree.go | 109 | ||||
-rw-r--r-- | pkg/p9/pool.go | 68 | ||||
-rw-r--r-- | pkg/p9/server.go | 575 | ||||
-rw-r--r-- | pkg/p9/transport.go | 342 | ||||
-rw-r--r-- | pkg/p9/version.go | 150 |
13 files changed, 7497 insertions, 0 deletions
diff --git a/pkg/p9/buffer.go b/pkg/p9/buffer.go new file mode 100644 index 000000000..249536d8a --- /dev/null +++ b/pkg/p9/buffer.go @@ -0,0 +1,263 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package p9 + +import ( + "encoding/binary" +) + +// encoder is used for messages and 9P primitives. +type encoder interface { + // Decode decodes from the given buffer. Decode may be called more than once + // to reuse the instance. It must clear any previous state. + // + // This may not fail, exhaustion will be recorded in the buffer. + Decode(b *buffer) + + // Encode encodes to the given buffer. + // + // This may not fail. + Encode(b *buffer) +} + +// order is the byte order used for encoding. +var order = binary.LittleEndian + +// buffer is a slice that is consumed. +// +// This is passed to the encoder methods. +type buffer struct { + // data is the underlying data. This may grow during Encode. + data []byte + + // overflow indicates whether an overflow has occurred. + overflow bool +} + +// append appends n bytes to the buffer and returns a slice pointing to the +// newly appended bytes. +func (b *buffer) append(n int) []byte { + b.data = append(b.data, make([]byte, n)...) + return b.data[len(b.data)-n:] +} + +// consume consumes n bytes from the buffer. +func (b *buffer) consume(n int) ([]byte, bool) { + if !b.has(n) { + b.markOverrun() + return nil, false + } + rval := b.data[:n] + b.data = b.data[n:] + return rval, true +} + +// has returns true if n bytes are available. +func (b *buffer) has(n int) bool { + return len(b.data) >= n +} + +// markOverrun immediately marks this buffer as overrun. +// +// This is used by ReadString, since some invalid data implies the rest of the +// buffer is no longer valid either. +func (b *buffer) markOverrun() { + b.overflow = true +} + +// isOverrun returns true if this buffer has run past the end. +func (b *buffer) isOverrun() bool { + return b.overflow +} + +// Read8 reads a byte from the buffer. +func (b *buffer) Read8() uint8 { + v, ok := b.consume(1) + if !ok { + return 0 + } + return uint8(v[0]) +} + +// Read16 reads a 16-bit value from the buffer. +func (b *buffer) Read16() uint16 { + v, ok := b.consume(2) + if !ok { + return 0 + } + return order.Uint16(v) +} + +// Read32 reads a 32-bit value from the buffer. +func (b *buffer) Read32() uint32 { + v, ok := b.consume(4) + if !ok { + return 0 + } + return order.Uint32(v) +} + +// Read64 reads a 64-bit value from the buffer. +func (b *buffer) Read64() uint64 { + v, ok := b.consume(8) + if !ok { + return 0 + } + return order.Uint64(v) +} + +// ReadQIDType reads a QIDType value. +func (b *buffer) ReadQIDType() QIDType { + return QIDType(b.Read8()) +} + +// ReadTag reads a Tag value. +func (b *buffer) ReadTag() Tag { + return Tag(b.Read16()) +} + +// ReadFID reads a FID value. +func (b *buffer) ReadFID() FID { + return FID(b.Read32()) +} + +// ReadUID reads a UID value. +func (b *buffer) ReadUID() UID { + return UID(b.Read32()) +} + +// ReadGID reads a GID value. +func (b *buffer) ReadGID() GID { + return GID(b.Read32()) +} + +// ReadPermissions reads a file mode value and applies the mask for permissions. +func (b *buffer) ReadPermissions() FileMode { + return b.ReadFileMode() & permissionsMask +} + +// ReadFileMode reads a file mode value. +func (b *buffer) ReadFileMode() FileMode { + return FileMode(b.Read32()) +} + +// ReadOpenFlags reads an OpenFlags. +func (b *buffer) ReadOpenFlags() OpenFlags { + return OpenFlags(b.Read32()) +} + +// ReadConnectFlags reads a ConnectFlags. +func (b *buffer) ReadConnectFlags() ConnectFlags { + return ConnectFlags(b.Read32()) +} + +// ReadMsgType writes a MsgType. +func (b *buffer) ReadMsgType() MsgType { + return MsgType(b.Read8()) +} + +// ReadString deserializes a string. +func (b *buffer) ReadString() string { + l := b.Read16() + if !b.has(int(l)) { + // Mark the buffer as corrupted. + b.markOverrun() + return "" + } + + bs := make([]byte, l) + for i := 0; i < int(l); i++ { + bs[i] = byte(b.Read8()) + } + return string(bs) +} + +// Write8 writes a byte to the buffer. +func (b *buffer) Write8(v uint8) { + b.append(1)[0] = byte(v) +} + +// Write16 writes a 16-bit value to the buffer. +func (b *buffer) Write16(v uint16) { + order.PutUint16(b.append(2), v) +} + +// Write32 writes a 32-bit value to the buffer. +func (b *buffer) Write32(v uint32) { + order.PutUint32(b.append(4), v) +} + +// Write64 writes a 64-bit value to the buffer. +func (b *buffer) Write64(v uint64) { + order.PutUint64(b.append(8), v) +} + +// WriteQIDType writes a QIDType value. +func (b *buffer) WriteQIDType(qidType QIDType) { + b.Write8(uint8(qidType)) +} + +// WriteTag writes a Tag value. +func (b *buffer) WriteTag(tag Tag) { + b.Write16(uint16(tag)) +} + +// WriteFID writes a FID value. +func (b *buffer) WriteFID(fid FID) { + b.Write32(uint32(fid)) +} + +// WriteUID writes a UID value. +func (b *buffer) WriteUID(uid UID) { + b.Write32(uint32(uid)) +} + +// WriteGID writes a GID value. +func (b *buffer) WriteGID(gid GID) { + b.Write32(uint32(gid)) +} + +// WritePermissions applies a permissions mask and writes the FileMode. +func (b *buffer) WritePermissions(perm FileMode) { + b.WriteFileMode(perm & permissionsMask) +} + +// WriteFileMode writes a FileMode. +func (b *buffer) WriteFileMode(mode FileMode) { + b.Write32(uint32(mode)) +} + +// WriteOpenFlags writes an OpenFlags. +func (b *buffer) WriteOpenFlags(flags OpenFlags) { + b.Write32(uint32(flags)) +} + +// WriteConnectFlags writes a ConnectFlags. +func (b *buffer) WriteConnectFlags(flags ConnectFlags) { + b.Write32(uint32(flags)) +} + +// WriteMsgType writes a MsgType. +func (b *buffer) WriteMsgType(t MsgType) { + b.Write8(uint8(t)) +} + +// WriteString serializes the given string. +func (b *buffer) WriteString(s string) { + b.Write16(uint16(len(s))) + for i := 0; i < len(s); i++ { + b.Write8(byte(s[i])) + } +} diff --git a/pkg/p9/client.go b/pkg/p9/client.go new file mode 100644 index 000000000..56587e2cf --- /dev/null +++ b/pkg/p9/client.go @@ -0,0 +1,307 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package p9 + +import ( + "errors" + "fmt" + "sync" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/unet" +) + +// ErrOutOfTags indicates no tags are available. +var ErrOutOfTags = errors.New("out of tags -- messages lost?") + +// ErrOutOfFIDs indicates no more FIDs are available. +var ErrOutOfFIDs = errors.New("out of FIDs -- messages lost?") + +// ErrUnexpectedTag indicates a response with an unexpected tag was received. +var ErrUnexpectedTag = errors.New("unexpected tag in response") + +// ErrVersionsExhausted indicates that all versions to negotiate have been exhausted. +var ErrVersionsExhausted = errors.New("exhausted all versions to negotiate") + +// ErrBadVersionString indicates that the version string is malformed or unsupported. +var ErrBadVersionString = errors.New("bad version string") + +// ErrBadResponse indicates the response didn't match the request. +type ErrBadResponse struct { + Got MsgType + Want MsgType +} + +// Error returns a highly descriptive error. +func (e *ErrBadResponse) Error() string { + return fmt.Sprintf("unexpected message type: got %v, want %v", e.Got, e.Want) +} + +// response is the asynchronous return from recv. +// +// This is used in the pending map below. +type response struct { + r message + done chan error +} + +var responsePool = sync.Pool{ + New: func() interface{} { + return &response{ + done: make(chan error, 1), + } + }, +} + +// Client is at least a 9P2000.L client. +type Client struct { + // socket is the connected socket. + socket *unet.Socket + + // tagPool is the collection of available tags. + tagPool pool + + // fidPool is the collection of available fids. + fidPool pool + + // pending is the set of pending messages. + pending map[Tag]*response + pendingMu sync.Mutex + + // sendMu is the lock for sending a request. + sendMu sync.Mutex + + // recvr is essentially a mutex for calling recv. + // + // Whoever writes to this channel is permitted to call recv. When + // finished calling recv, this channel should be emptied. + recvr chan bool + + // messageSize is the maximum total size of a message. + messageSize uint32 + + // payloadSize is the maximum payload size of a read or write + // request. For large reads and writes this means that the + // read or write is broken up into buffer-size/payloadSize + // requests. + payloadSize uint32 + + // version is the agreed upon version X of 9P2000.L.Google.X. + // version 0 implies 9P2000.L. + version uint32 +} + +// NewClient creates a new client. It performs a Tversion exchange with +// the server to assert that messageSize is ok to use. +// +// You should not use the same socket for multiple clients. +func NewClient(socket *unet.Socket, messageSize uint32, version string) (*Client, error) { + // Need at least one byte of payload. + if messageSize <= msgRegistry.largestFixedSize { + return nil, &ErrMessageTooLarge{ + size: messageSize, + msize: msgRegistry.largestFixedSize, + } + } + + // Compute a payload size and round to 512 (normal block size) + // if it's larger than a single block. + payloadSize := messageSize - msgRegistry.largestFixedSize + if payloadSize > 512 && payloadSize%512 != 0 { + payloadSize -= (payloadSize % 512) + } + c := &Client{ + socket: socket, + tagPool: pool{start: 1, limit: uint64(NoTag)}, + fidPool: pool{start: 1, limit: uint64(NoFID)}, + pending: make(map[Tag]*response), + recvr: make(chan bool, 1), + messageSize: messageSize, + payloadSize: payloadSize, + } + // Agree upon a version. + requested, ok := parseVersion(version) + if !ok { + return nil, ErrBadVersionString + } + for { + rversion := Rversion{} + err := c.sendRecv(&Tversion{Version: versionString(requested), MSize: messageSize}, &rversion) + + // The server told us to try again with a lower version. + if err == syscall.EAGAIN { + if requested == lowestSupportedVersion { + return nil, ErrVersionsExhausted + } + requested-- + continue + } + + // We requested an impossible version or our other parameters were bogus. + if err != nil { + return nil, err + } + + // Parse the version. + version, ok := parseVersion(rversion.Version) + if !ok { + // The server gave us a bad version. We return a generically worrisome error. + log.Warningf("server returned bad version string %q", rversion.Version) + return nil, ErrBadVersionString + } + c.version = version + break + } + return c, nil +} + +// handleOne handles a single incoming message. +// +// This should only be called with the token from recvr. Note that the received +// tag will automatically be cleared from pending. +func (c *Client) handleOne() { + tag, r, err := recv(c.socket, c.messageSize, func(tag Tag, t MsgType) (message, error) { + c.pendingMu.Lock() + resp := c.pending[tag] + c.pendingMu.Unlock() + + // Not expecting this message? + if resp == nil { + log.Warningf("client received unexpected tag %v, ignoring", tag) + return nil, ErrUnexpectedTag + } + + // Is it an error? We specifically allow this to + // go through, and then we deserialize below. + if t == MsgRlerror { + return &Rlerror{}, nil + } + + // Does it match expectations? + if t != resp.r.Type() { + return nil, &ErrBadResponse{Got: t, Want: resp.r.Type()} + } + + // Return the response. + return resp.r, nil + }) + + if err != nil { + // No tag was extracted (probably a socket error). + // + // Likely catastrophic. Notify all waiters and clear pending. + c.pendingMu.Lock() + for _, resp := range c.pending { + resp.done <- err + } + c.pending = make(map[Tag]*response) + c.pendingMu.Unlock() + } else { + // Process the tag. + // + // We know that is is contained in the map because our lookup function + // above must have succeeded (found the tag) to return nil err. + c.pendingMu.Lock() + resp := c.pending[tag] + delete(c.pending, tag) + c.pendingMu.Unlock() + resp.r = r + resp.done <- err + } +} + +// waitAndRecv co-ordinates with other receivers to handle responses. +func (c *Client) waitAndRecv(done chan error) error { + for { + select { + case err := <-done: + return err + case c.recvr <- true: + select { + case err := <-done: + // It's possible that we got the token, despite + // done also being available. Check for that. + <-c.recvr + return err + default: + // Handle receiving one tag. + c.handleOne() + + // Return the token. + <-c.recvr + } + } + } +} + +// sendRecv performs a roundtrip message exchange. +// +// This is called by internal functions. +func (c *Client) sendRecv(t message, r message) error { + tag, ok := c.tagPool.Get() + if !ok { + return ErrOutOfTags + } + defer c.tagPool.Put(tag) + + // Indicate we're expecting a response. + // + // Note that the tag will be cleared from pending + // automatically (see handleOne for details). + resp := responsePool.Get().(*response) + defer responsePool.Put(resp) + resp.r = r + c.pendingMu.Lock() + c.pending[Tag(tag)] = resp + c.pendingMu.Unlock() + + // Send the request over the wire. + c.sendMu.Lock() + err := send(c.socket, Tag(tag), t) + c.sendMu.Unlock() + if err != nil { + return err + } + + // Co-ordinate with other receivers. + if err := c.waitAndRecv(resp.done); err != nil { + return err + } + + // Is it an error message? + // + // For convenience, we transform these directly + // into errors. Handlers need not handle this case. + if rlerr, ok := resp.r.(*Rlerror); ok { + return syscall.Errno(rlerr.Error) + } + + // At this point, we know it matches. + // + // Per recv call above, we will only allow a type + // match (and give our r) or an instance of Rlerror. + return nil +} + +// Version returns the negotiated 9P2000.L.Google version number. +func (c *Client) Version() uint32 { + return c.version +} + +// Close closes the underlying socket. +func (c *Client) Close() error { + return c.socket.Close() +} diff --git a/pkg/p9/client_file.go b/pkg/p9/client_file.go new file mode 100644 index 000000000..258080f67 --- /dev/null +++ b/pkg/p9/client_file.go @@ -0,0 +1,632 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package p9 + +import ( + "fmt" + "io" + "runtime" + "sync/atomic" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/fd" + "gvisor.googlesource.com/gvisor/pkg/log" +) + +// Attach attaches to a server. +// +// Note that authentication is not currently supported. +func (c *Client) Attach(name string) (File, error) { + fid, ok := c.fidPool.Get() + if !ok { + return nil, ErrOutOfFIDs + } + + rattach := Rattach{} + if err := c.sendRecv(&Tattach{FID: FID(fid), Auth: Tauth{AttachName: name, AuthenticationFID: NoFID, UID: NoUID}}, &rattach); err != nil { + c.fidPool.Put(fid) + return nil, err + } + + return c.newFile(FID(fid)), nil +} + +// newFile returns a new client file. +func (c *Client) newFile(fid FID) *clientFile { + cf := &clientFile{ + client: c, + fid: fid, + } + + // Make sure the file is closed. + runtime.SetFinalizer(cf, (*clientFile).Close) + + return cf +} + +// clientFile is provided to clients. +// +// This proxies all of the interfaces found in file.go. +type clientFile struct { + // client is the originating client. + client *Client + + // fid is the FID for this file. + fid FID + + // closed indicates whether this file has been closed. + closed uint32 +} + +// Walk implements File.Walk. +func (c *clientFile) Walk(names []string) ([]QID, File, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return nil, nil, syscall.EBADF + } + + fid, ok := c.client.fidPool.Get() + if !ok { + return nil, nil, ErrOutOfFIDs + } + + rwalk := Rwalk{} + if err := c.client.sendRecv(&Twalk{FID: c.fid, NewFID: FID(fid), Names: names}, &rwalk); err != nil { + c.client.fidPool.Put(fid) + return nil, nil, err + } + + // Return a new client file. + return rwalk.QIDs, c.client.newFile(FID(fid)), nil +} + +// WalkGetAttr implements File.WalkGetAttr. +func (c *clientFile) WalkGetAttr(components []string) ([]QID, File, AttrMask, Attr, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return nil, nil, AttrMask{}, Attr{}, syscall.EBADF + } + + if !versionSupportsTwalkgetattr(c.client.version) { + qids, file, err := c.Walk(components) + if err != nil { + return nil, nil, AttrMask{}, Attr{}, err + } + _, valid, attr, err := file.GetAttr(AttrMaskAll()) + if err != nil { + file.Close() + return nil, nil, AttrMask{}, Attr{}, err + } + return qids, file, valid, attr, nil + } + + fid, ok := c.client.fidPool.Get() + if !ok { + return nil, nil, AttrMask{}, Attr{}, ErrOutOfFIDs + } + + rwalkgetattr := Rwalkgetattr{} + if err := c.client.sendRecv(&Twalkgetattr{FID: c.fid, NewFID: FID(fid), Names: components}, &rwalkgetattr); err != nil { + c.client.fidPool.Put(fid) + return nil, nil, AttrMask{}, Attr{}, err + } + + // Return a new client file. + return rwalkgetattr.QIDs, c.client.newFile(FID(fid)), rwalkgetattr.Valid, rwalkgetattr.Attr, nil +} + +// StatFS implements File.StatFS. +func (c *clientFile) StatFS() (FSStat, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return FSStat{}, syscall.EBADF + } + + rstatfs := Rstatfs{} + if err := c.client.sendRecv(&Tstatfs{FID: c.fid}, &rstatfs); err != nil { + return FSStat{}, err + } + + return rstatfs.FSStat, nil +} + +// FSync implements File.FSync. +func (c *clientFile) FSync() error { + if atomic.LoadUint32(&c.closed) != 0 { + return syscall.EBADF + } + + return c.client.sendRecv(&Tfsync{FID: c.fid}, &Rfsync{}) +} + +// GetAttr implements File.GetAttr. +func (c *clientFile) GetAttr(req AttrMask) (QID, AttrMask, Attr, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return QID{}, AttrMask{}, Attr{}, syscall.EBADF + } + + rgetattr := Rgetattr{} + if err := c.client.sendRecv(&Tgetattr{FID: c.fid, AttrMask: req}, &rgetattr); err != nil { + return QID{}, AttrMask{}, Attr{}, err + } + + return rgetattr.QID, rgetattr.Valid, rgetattr.Attr, nil +} + +// SetAttr implements File.SetAttr. +func (c *clientFile) SetAttr(valid SetAttrMask, attr SetAttr) error { + if atomic.LoadUint32(&c.closed) != 0 { + return syscall.EBADF + } + + return c.client.sendRecv(&Tsetattr{FID: c.fid, Valid: valid, SetAttr: attr}, &Rsetattr{}) +} + +// Allocate implements File.Allocate. +func (c *clientFile) Allocate(mode AllocateMode, offset, length uint64) error { + if atomic.LoadUint32(&c.closed) != 0 { + return syscall.EBADF + } + if !versionSupportsTallocate(c.client.version) { + return syscall.EOPNOTSUPP + } + + return c.client.sendRecv(&Tallocate{FID: c.fid, Mode: mode, Offset: offset, Length: length}, &Rallocate{}) +} + +// Remove implements File.Remove. +// +// N.B. This method is no longer part of the file interface and should be +// considered deprecated. +func (c *clientFile) Remove() error { + // Avoid double close. + if !atomic.CompareAndSwapUint32(&c.closed, 0, 1) { + return syscall.EBADF + } + runtime.SetFinalizer(c, nil) + + // Send the remove message. + if err := c.client.sendRecv(&Tremove{FID: c.fid}, &Rremove{}); err != nil { + return err + } + + // "It is correct to consider remove to be a clunk with the side effect + // of removing the file if permissions allow." + // https://swtch.com/plan9port/man/man9/remove.html + + // Return the FID to the pool. + c.client.fidPool.Put(uint64(c.fid)) + return nil +} + +// Close implements File.Close. +func (c *clientFile) Close() error { + // Avoid double close. + if !atomic.CompareAndSwapUint32(&c.closed, 0, 1) { + return syscall.EBADF + } + runtime.SetFinalizer(c, nil) + + // Send the close message. + if err := c.client.sendRecv(&Tclunk{FID: c.fid}, &Rclunk{}); err != nil { + // If an error occurred, we toss away the FID. This isn't ideal, + // but I'm not sure what else makes sense in this context. + log.Warningf("Tclunk failed, losing FID %v: %v", c.fid, err) + return err + } + + // Return the FID to the pool. + c.client.fidPool.Put(uint64(c.fid)) + return nil +} + +// Open implements File.Open. +func (c *clientFile) Open(flags OpenFlags) (*fd.FD, QID, uint32, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return nil, QID{}, 0, syscall.EBADF + } + + rlopen := Rlopen{} + if err := c.client.sendRecv(&Tlopen{FID: c.fid, Flags: flags}, &rlopen); err != nil { + return nil, QID{}, 0, err + } + + return rlopen.File, rlopen.QID, rlopen.IoUnit, nil +} + +// Connect implements File.Connect. +func (c *clientFile) Connect(flags ConnectFlags) (*fd.FD, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return nil, syscall.EBADF + } + + if !VersionSupportsConnect(c.client.version) { + return nil, syscall.ECONNREFUSED + } + + rlconnect := Rlconnect{} + if err := c.client.sendRecv(&Tlconnect{FID: c.fid, Flags: flags}, &rlconnect); err != nil { + return nil, err + } + + return rlconnect.File, nil +} + +// chunk applies fn to p in chunkSize-sized chunks until fn returns a partial result, p is +// exhausted, or an error is encountered (which may be io.EOF). +func chunk(chunkSize uint32, fn func([]byte, uint64) (int, error), p []byte, offset uint64) (int, error) { + // Some p9.Clients depend on executing fn on zero-byte buffers. Handle this + // as a special case (normally it is fine to short-circuit and return (0, nil)). + if len(p) == 0 { + return fn(p, offset) + } + + // total is the cumulative bytes processed. + var total int + for { + var n int + var err error + + // We're done, don't bother trying to do anything more. + if total == len(p) { + return total, nil + } + + // Apply fn to a chunkSize-sized (or less) chunk of p. + if len(p) < total+int(chunkSize) { + n, err = fn(p[total:], offset) + } else { + n, err = fn(p[total:total+int(chunkSize)], offset) + } + total += n + offset += uint64(n) + + // Return whatever we have processed if we encounter an error. This error + // could be io.EOF. + if err != nil { + return total, err + } + + // Did we get a partial result? If so, return it immediately. + if n < int(chunkSize) { + return total, nil + } + + // If we received more bytes than we ever requested, this is a problem. + if total > len(p) { + panic(fmt.Sprintf("bytes completed (%d)) > requested (%d)", total, len(p))) + } + } +} + +// ReadAt proxies File.ReadAt. +func (c *clientFile) ReadAt(p []byte, offset uint64) (int, error) { + return chunk(c.client.payloadSize, c.readAt, p, offset) +} + +func (c *clientFile) readAt(p []byte, offset uint64) (int, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return 0, syscall.EBADF + } + + rread := Rread{Data: p} + if err := c.client.sendRecv(&Tread{FID: c.fid, Offset: offset, Count: uint32(len(p))}, &rread); err != nil { + return 0, err + } + + // The message may have been truncated, or for some reason a new buffer + // allocated. This isn't the common path, but we make sure that if the + // payload has changed we copy it. See transport.go for more information. + if len(p) > 0 && len(rread.Data) > 0 && &rread.Data[0] != &p[0] { + copy(p, rread.Data) + } + + // io.EOF is not an error that a p9 server can return. Use POSIX semantics to + // return io.EOF manually: zero bytes were returned and a non-zero buffer was used. + if len(rread.Data) == 0 && len(p) > 0 { + return 0, io.EOF + } + + return len(rread.Data), nil +} + +// WriteAt proxies File.WriteAt. +func (c *clientFile) WriteAt(p []byte, offset uint64) (int, error) { + return chunk(c.client.payloadSize, c.writeAt, p, offset) +} + +func (c *clientFile) writeAt(p []byte, offset uint64) (int, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return 0, syscall.EBADF + } + + rwrite := Rwrite{} + if err := c.client.sendRecv(&Twrite{FID: c.fid, Offset: offset, Data: p}, &rwrite); err != nil { + return 0, err + } + + return int(rwrite.Count), nil +} + +// ReadWriterFile wraps a File and implements io.ReadWriter, io.ReaderAt, and io.WriterAt. +type ReadWriterFile struct { + File File + Offset uint64 +} + +// Read implements part of the io.ReadWriter interface. +func (r *ReadWriterFile) Read(p []byte) (int, error) { + n, err := r.File.ReadAt(p, r.Offset) + r.Offset += uint64(n) + if err != nil { + return n, err + } + if n == 0 && len(p) > 0 { + return n, io.EOF + } + return n, nil +} + +// ReadAt implements the io.ReaderAt interface. +func (r *ReadWriterFile) ReadAt(p []byte, offset int64) (int, error) { + n, err := r.File.ReadAt(p, uint64(offset)) + if err != nil { + return 0, err + } + if n == 0 && len(p) > 0 { + return n, io.EOF + } + return n, nil +} + +// Write implements part of the io.ReadWriter interface. +func (r *ReadWriterFile) Write(p []byte) (int, error) { + n, err := r.File.WriteAt(p, r.Offset) + r.Offset += uint64(n) + if err != nil { + return n, err + } + if n < len(p) { + return n, io.ErrShortWrite + } + return n, nil +} + +// WriteAt implements the io.WriteAt interface. +func (r *ReadWriterFile) WriteAt(p []byte, offset int64) (int, error) { + n, err := r.File.WriteAt(p, uint64(offset)) + if err != nil { + return n, err + } + if n < len(p) { + return n, io.ErrShortWrite + } + return n, nil +} + +// Rename implements File.Rename. +func (c *clientFile) Rename(dir File, name string) error { + if atomic.LoadUint32(&c.closed) != 0 { + return syscall.EBADF + } + + clientDir, ok := dir.(*clientFile) + if !ok { + return syscall.EBADF + } + + return c.client.sendRecv(&Trename{FID: c.fid, Directory: clientDir.fid, Name: name}, &Rrename{}) +} + +// Create implements File.Create. +func (c *clientFile) Create(name string, openFlags OpenFlags, permissions FileMode, uid UID, gid GID) (*fd.FD, File, QID, uint32, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return nil, nil, QID{}, 0, syscall.EBADF + } + + msg := Tlcreate{ + FID: c.fid, + Name: name, + OpenFlags: openFlags, + Permissions: permissions, + GID: NoGID, + } + + if versionSupportsTucreation(c.client.version) { + msg.GID = gid + rucreate := Rucreate{} + if err := c.client.sendRecv(&Tucreate{Tlcreate: msg, UID: uid}, &rucreate); err != nil { + return nil, nil, QID{}, 0, err + } + return rucreate.File, c, rucreate.QID, rucreate.IoUnit, nil + } + + rlcreate := Rlcreate{} + if err := c.client.sendRecv(&msg, &rlcreate); err != nil { + return nil, nil, QID{}, 0, err + } + + return rlcreate.File, c, rlcreate.QID, rlcreate.IoUnit, nil +} + +// Mkdir implements File.Mkdir. +func (c *clientFile) Mkdir(name string, permissions FileMode, uid UID, gid GID) (QID, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return QID{}, syscall.EBADF + } + + msg := Tmkdir{ + Directory: c.fid, + Name: name, + Permissions: permissions, + GID: NoGID, + } + + if versionSupportsTucreation(c.client.version) { + msg.GID = gid + rumkdir := Rumkdir{} + if err := c.client.sendRecv(&Tumkdir{Tmkdir: msg, UID: uid}, &rumkdir); err != nil { + return QID{}, err + } + return rumkdir.QID, nil + } + + rmkdir := Rmkdir{} + if err := c.client.sendRecv(&msg, &rmkdir); err != nil { + return QID{}, err + } + + return rmkdir.QID, nil +} + +// Symlink implements File.Symlink. +func (c *clientFile) Symlink(oldname string, newname string, uid UID, gid GID) (QID, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return QID{}, syscall.EBADF + } + + msg := Tsymlink{ + Directory: c.fid, + Name: newname, + Target: oldname, + GID: NoGID, + } + + if versionSupportsTucreation(c.client.version) { + msg.GID = gid + rusymlink := Rusymlink{} + if err := c.client.sendRecv(&Tusymlink{Tsymlink: msg, UID: uid}, &rusymlink); err != nil { + return QID{}, err + } + return rusymlink.QID, nil + } + + rsymlink := Rsymlink{} + if err := c.client.sendRecv(&msg, &rsymlink); err != nil { + return QID{}, err + } + + return rsymlink.QID, nil +} + +// Link implements File.Link. +func (c *clientFile) Link(target File, newname string) error { + if atomic.LoadUint32(&c.closed) != 0 { + return syscall.EBADF + } + + targetFile, ok := target.(*clientFile) + if !ok { + return syscall.EBADF + } + + return c.client.sendRecv(&Tlink{Directory: c.fid, Name: newname, Target: targetFile.fid}, &Rlink{}) +} + +// Mknod implements File.Mknod. +func (c *clientFile) Mknod(name string, mode FileMode, major uint32, minor uint32, uid UID, gid GID) (QID, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return QID{}, syscall.EBADF + } + + msg := Tmknod{ + Directory: c.fid, + Name: name, + Mode: mode, + Major: major, + Minor: minor, + GID: NoGID, + } + + if versionSupportsTucreation(c.client.version) { + msg.GID = gid + rumknod := Rumknod{} + if err := c.client.sendRecv(&Tumknod{Tmknod: msg, UID: uid}, &rumknod); err != nil { + return QID{}, err + } + return rumknod.QID, nil + } + + rmknod := Rmknod{} + if err := c.client.sendRecv(&msg, &rmknod); err != nil { + return QID{}, err + } + + return rmknod.QID, nil +} + +// RenameAt implements File.RenameAt. +func (c *clientFile) RenameAt(oldname string, newdir File, newname string) error { + if atomic.LoadUint32(&c.closed) != 0 { + return syscall.EBADF + } + + clientNewDir, ok := newdir.(*clientFile) + if !ok { + return syscall.EBADF + } + + return c.client.sendRecv(&Trenameat{OldDirectory: c.fid, OldName: oldname, NewDirectory: clientNewDir.fid, NewName: newname}, &Rrenameat{}) +} + +// UnlinkAt implements File.UnlinkAt. +func (c *clientFile) UnlinkAt(name string, flags uint32) error { + if atomic.LoadUint32(&c.closed) != 0 { + return syscall.EBADF + } + + return c.client.sendRecv(&Tunlinkat{Directory: c.fid, Name: name, Flags: flags}, &Runlinkat{}) +} + +// Readdir implements File.Readdir. +func (c *clientFile) Readdir(offset uint64, count uint32) ([]Dirent, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return nil, syscall.EBADF + } + + rreaddir := Rreaddir{} + if err := c.client.sendRecv(&Treaddir{Directory: c.fid, Offset: offset, Count: count}, &rreaddir); err != nil { + return nil, err + } + + return rreaddir.Entries, nil +} + +// Readlink implements File.Readlink. +func (c *clientFile) Readlink() (string, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return "", syscall.EBADF + } + + rreadlink := Rreadlink{} + if err := c.client.sendRecv(&Treadlink{FID: c.fid}, &rreadlink); err != nil { + return "", err + } + + return rreadlink.Target, nil +} + +// Flush implements File.Flush. +func (c *clientFile) Flush() error { + if atomic.LoadUint32(&c.closed) != 0 { + return syscall.EBADF + } + + if !VersionSupportsTflushf(c.client.version) { + return nil + } + + return c.client.sendRecv(&Tflushf{FID: c.fid}, &Rflushf{}) +} + +// Renamed implements File.Renamed. +func (c *clientFile) Renamed(newDir File, newName string) {} diff --git a/pkg/p9/file.go b/pkg/p9/file.go new file mode 100644 index 000000000..a456e8b3d --- /dev/null +++ b/pkg/p9/file.go @@ -0,0 +1,256 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package p9 + +import ( + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/fd" +) + +// Attacher is provided by the server. +type Attacher interface { + // Attach returns a new File. + // + // The client-side attach will be translate to a series of walks from + // the file returned by this Attach call. + Attach() (File, error) +} + +// File is a set of operations corresponding to a single node. +// +// Note that on the server side, the server logic places constraints on +// concurrent operations to make things easier. This may reduce the need for +// complex, error-prone locking and logic in the backend. These are documented +// for each method. +// +// There are three different types of guarantees provided: +// +// none: There is no concurrency guarantee. The method may be invoked +// concurrently with any other method on any other file. +// +// read: The method is guaranteed to be exclusive of any write or global +// operation that is mutating the state of the directory tree starting at this +// node. For example, this means creating new files, symlinks, directories or +// renaming a directory entry (or renaming in to this target), but the method +// may be called concurrently with other read methods. +// +// write: The method is guaranteed to be exclusive of any read, write or global +// operation that is mutating the state of the directory tree starting at this +// node, as described in read above. There may however, be other write +// operations executing concurrently on other components in the directory tree. +// +// global: The method is guaranteed to be exclusive of any read, write or +// global operation. +type File interface { + // Walk walks to the path components given in names. + // + // Walk returns QIDs in the same order that the names were passed in. + // + // An empty list of arguments should return a copy of the current file. + // + // On the server, Walk has a read concurrency guarantee. + Walk(names []string) ([]QID, File, error) + + // WalkGetAttr walks to the next file and returns its maximal set of + // attributes. + // + // Server-side p9.Files may return syscall.ENOSYS to indicate that Walk + // and GetAttr should be used separately to satisfy this request. + // + // On the server, WalkGetAttr has a read concurrency guarantee. + WalkGetAttr([]string) ([]QID, File, AttrMask, Attr, error) + + // StatFS returns information about the file system associated with + // this file. + // + // On the server, StatFS has no concurrency guarantee. + StatFS() (FSStat, error) + + // GetAttr returns attributes of this node. + // + // On the server, GetAttr has a read concurrency guarantee. + GetAttr(req AttrMask) (QID, AttrMask, Attr, error) + + // SetAttr sets attributes on this node. + // + // On the server, SetAttr has a write concurrency guarantee. + SetAttr(valid SetAttrMask, attr SetAttr) error + + // Allocate allows the caller to directly manipulate the allocated disk space + // for the file. See fallocate(2) for more details. + Allocate(mode AllocateMode, offset, length uint64) error + + // Close is called when all references are dropped on the server side, + // and Close should be called by the client to drop all references. + // + // For server-side implementations of Close, the error is ignored. + // + // Close must be called even when Open has not been called. + // + // On the server, Close has no concurrency guarantee. + Close() error + + // Open must be called prior to using Read, Write or Readdir. Once Open + // is called, some operations, such as Walk, will no longer work. + // + // On the client, Open should be called only once. The fd return is + // optional, and may be nil. + // + // On the server, Open has a read concurrency guarantee. If an *fd.FD + // is provided, ownership now belongs to the caller. Open is guaranteed + // to be called only once. + // + // N.B. The server must resolve any lazy paths when open is called. + // After this point, read and write may be called on files with no + // deletion check, so resolving in the data path is not viable. + Open(mode OpenFlags) (*fd.FD, QID, uint32, error) + + // Read reads from this file. Open must be called first. + // + // This may return io.EOF in addition to syscall.Errno values. + // + // On the server, ReadAt has a read concurrency guarantee. See Open for + // additional requirements regarding lazy path resolution. + ReadAt(p []byte, offset uint64) (int, error) + + // Write writes to this file. Open must be called first. + // + // This may return io.EOF in addition to syscall.Errno values. + // + // On the server, WriteAt has a read concurrency guarantee. See Open + // for additional requirements regarding lazy path resolution. + WriteAt(p []byte, offset uint64) (int, error) + + // FSync syncs this node. Open must be called first. + // + // On the server, FSync has a read concurrency guarantee. + FSync() error + + // Create creates a new regular file and opens it according to the + // flags given. This file is already Open. + // + // N.B. On the client, the returned file is a reference to the current + // file, which now represents the created file. This is not the case on + // the server. These semantics are very subtle and can easily lead to + // bugs, but are a consequence of the 9P create operation. + // + // See p9.File.Open for a description of *fd.FD. + // + // On the server, Create has a write concurrency guarantee. + Create(name string, flags OpenFlags, permissions FileMode, uid UID, gid GID) (*fd.FD, File, QID, uint32, error) + + // Mkdir creates a subdirectory. + // + // On the server, Mkdir has a write concurrency guarantee. + Mkdir(name string, permissions FileMode, uid UID, gid GID) (QID, error) + + // Symlink makes a new symbolic link. + // + // On the server, Symlink has a write concurrency guarantee. + Symlink(oldName string, newName string, uid UID, gid GID) (QID, error) + + // Link makes a new hard link. + // + // On the server, Link has a write concurrency guarantee. + Link(target File, newName string) error + + // Mknod makes a new device node. + // + // On the server, Mknod has a write concurrency guarantee. + Mknod(name string, mode FileMode, major uint32, minor uint32, uid UID, gid GID) (QID, error) + + // Rename renames the file. + // + // Rename will never be called on the server, and RenameAt will always + // be used instead. + Rename(newDir File, newName string) error + + // RenameAt renames a given file to a new name in a potentially new + // directory. + // + // oldName must be a name relative to this file, which must be a + // directory. newName is a name relative to newDir. + // + // On the server, RenameAt has a global concurrency guarantee. + RenameAt(oldName string, newDir File, newName string) error + + // UnlinkAt the given named file. + // + // name must be a file relative to this directory. + // + // Flags are implementation-specific (e.g. O_DIRECTORY), but are + // generally Linux unlinkat(2) flags. + // + // On the server, UnlinkAt has a write concurrency guarantee. + UnlinkAt(name string, flags uint32) error + + // Readdir reads directory entries. + // + // This may return io.EOF in addition to syscall.Errno values. + // + // On the server, Readdir has a read concurrency guarantee. + Readdir(offset uint64, count uint32) ([]Dirent, error) + + // Readlink reads the link target. + // + // On the server, Readlink has a read concurrency guarantee. + Readlink() (string, error) + + // Flush is called prior to Close. + // + // Whereas Close drops all references to the file, Flush cleans up the + // file state. Behavior is implementation-specific. + // + // Flush is not related to flush(9p). Flush is an extension to 9P2000.L, + // see version.go. + // + // On the server, Flush has a read concurrency guarantee. + Flush() error + + // Connect establishes a new host-socket backed connection with a + // socket. A File does not need to be opened before it can be connected + // and it can be connected to multiple times resulting in a unique + // *fd.FD each time. In addition, the lifetime of the *fd.FD is + // independent from the lifetime of the p9.File and must be managed by + // the caller. + // + // The returned FD must be non-blocking. + // + // Flags indicates the requested type of socket. + // + // On the server, Connect has a read concurrency guarantee. + Connect(flags ConnectFlags) (*fd.FD, error) + + // Renamed is called when this node is renamed. + // + // This may not fail. The file will hold a reference to its parent + // within the p9 package, and is therefore safe to use for the lifetime + // of this File (until Close is called). + // + // This method should not be called by clients, who should use the + // relevant Rename methods. (Although the method will be a no-op.) + // + // On the server, Renamed has a global concurrency guarantee. + Renamed(newDir File, newName string) +} + +// DefaultWalkGetAttr implements File.WalkGetAttr to return ENOSYS for server-side Files. +type DefaultWalkGetAttr struct{} + +// WalkGetAttr implements File.WalkGetAttr. +func (DefaultWalkGetAttr) WalkGetAttr([]string) ([]QID, File, AttrMask, Attr, error) { + return nil, nil, AttrMask{}, Attr{}, syscall.ENOSYS +} diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go new file mode 100644 index 000000000..f32368763 --- /dev/null +++ b/pkg/p9/handlers.go @@ -0,0 +1,1291 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package p9 + +import ( + "fmt" + "io" + "os" + "path" + "strings" + "sync/atomic" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/fd" + "gvisor.googlesource.com/gvisor/pkg/log" +) + +// ExtractErrno extracts a syscall.Errno from a error, best effort. +func ExtractErrno(err error) syscall.Errno { + switch err { + case os.ErrNotExist: + return syscall.ENOENT + case os.ErrExist: + return syscall.EEXIST + case os.ErrPermission: + return syscall.EACCES + case os.ErrInvalid: + return syscall.EINVAL + } + + // Attempt to unwrap. + switch e := err.(type) { + case syscall.Errno: + return e + case *os.PathError: + return ExtractErrno(e.Err) + case *os.SyscallError: + return ExtractErrno(e.Err) + } + + // Default case. + log.Warningf("unknown error: %v", err) + return syscall.EIO +} + +// newErr returns a new error message from an error. +func newErr(err error) *Rlerror { + return &Rlerror{Error: uint32(ExtractErrno(err))} +} + +// handler is implemented for server-handled messages. +// +// See server.go for call information. +type handler interface { + // Handle handles the given message. + // + // This may modify the server state. The handle function must return a + // message which will be sent back to the client. It may be useful to + // use newErr to automatically extract an error message. + handle(cs *connState) message +} + +// handle implements handler.handle. +func (t *Tversion) handle(cs *connState) message { + if t.MSize == 0 { + return newErr(syscall.EINVAL) + } + if t.MSize > maximumLength { + return newErr(syscall.EINVAL) + } + atomic.StoreUint32(&cs.messageSize, t.MSize) + requested, ok := parseVersion(t.Version) + if !ok { + return newErr(syscall.EINVAL) + } + // The server cannot support newer versions that it doesn't know about. In this + // case we return EAGAIN to tell the client to try again with a lower version. + if requested > highestSupportedVersion { + return newErr(syscall.EAGAIN) + } + // From Tversion(9P): "The server may respond with the client’s version + // string, or a version string identifying an earlier defined protocol version". + atomic.StoreUint32(&cs.version, requested) + return &Rversion{ + MSize: t.MSize, + Version: t.Version, + } +} + +// handle implements handler.handle. +func (t *Tflush) handle(cs *connState) message { + cs.WaitTag(t.OldTag) + return &Rflush{} +} + +// checkSafeName validates the name and returns nil or returns an error. +func checkSafeName(name string) error { + if name != "" && !strings.Contains(name, "/") && name != "." && name != ".." { + return nil + } + return syscall.EINVAL +} + +// handle implements handler.handle. +func (t *Tclunk) handle(cs *connState) message { + if !cs.DeleteFID(t.FID) { + return newErr(syscall.EBADF) + } + return &Rclunk{} +} + +// handle implements handler.handle. +func (t *Tremove) handle(cs *connState) message { + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + // Frustratingly, because we can't be guaranteed that a rename is not + // occurring simultaneously with this removal, we need to acquire the + // global rename lock for this kind of remove operation to ensure that + // ref.parent does not change out from underneath us. + // + // This is why Tremove is a bad idea, and clients should generally use + // Tunlinkat. All p9 clients will use Tunlinkat. + err := ref.safelyGlobal(func() error { + // Is this a root? Can't remove that. + if ref.isRoot() { + return syscall.EINVAL + } + + // N.B. this remove operation is permitted, even if the file is open. + // See also rename below for reasoning. + + // Is this file already deleted? + if ref.isDeleted() { + return syscall.EINVAL + } + + // Retrieve the file's proper name. + name := ref.parent.pathNode.nameFor(ref) + + // Attempt the removal. + if err := ref.parent.file.UnlinkAt(name, 0); err != nil { + return err + } + + // Mark all relevant fids as deleted. We don't need to lock any + // individual nodes because we already hold the global lock. + ref.parent.markChildDeleted(name) + return nil + }) + + // "The remove request asks the file server both to remove the file + // represented by fid and to clunk the fid, even if the remove fails." + // + // "It is correct to consider remove to be a clunk with the side effect + // of removing the file if permissions allow." + // https://swtch.com/plan9port/man/man9/remove.html + if !cs.DeleteFID(t.FID) { + return newErr(syscall.EBADF) + } + if err != nil { + return newErr(err) + } + + return &Rremove{} +} + +// handle implements handler.handle. +// +// We don't support authentication, so this just returns ENOSYS. +func (t *Tauth) handle(cs *connState) message { + return newErr(syscall.ENOSYS) +} + +// handle implements handler.handle. +func (t *Tattach) handle(cs *connState) message { + // Ensure no authentication FID is provided. + if t.Auth.AuthenticationFID != NoFID { + return newErr(syscall.EINVAL) + } + + // Must provide an absolute path. + if path.IsAbs(t.Auth.AttachName) { + // Trim off the leading / if the path is absolute. We always + // treat attach paths as absolute and call attach with the root + // argument on the server file for clarity. + t.Auth.AttachName = t.Auth.AttachName[1:] + } + + // Do the attach on the root. + sf, err := cs.server.attacher.Attach() + if err != nil { + return newErr(err) + } + qid, valid, attr, err := sf.GetAttr(AttrMaskAll()) + if err != nil { + sf.Close() // Drop file. + return newErr(err) + } + if !valid.Mode { + sf.Close() // Drop file. + return newErr(syscall.EINVAL) + } + + // Build a transient reference. + root := &fidRef{ + server: cs.server, + parent: nil, + file: sf, + refs: 1, + mode: attr.Mode.FileType(), + pathNode: &cs.server.pathTree, + } + defer root.DecRef() + + // Attach the root? + if len(t.Auth.AttachName) == 0 { + cs.InsertFID(t.FID, root) + return &Rattach{QID: qid} + } + + // We want the same traversal checks to apply on attach, so always + // attach at the root and use the regular walk paths. + names := strings.Split(t.Auth.AttachName, "/") + _, newRef, _, _, err := doWalk(cs, root, names, false) + if err != nil { + return newErr(err) + } + defer newRef.DecRef() + + // Insert the FID. + cs.InsertFID(t.FID, newRef) + return &Rattach{QID: qid} +} + +// CanOpen returns whether this file open can be opened, read and written to. +// +// This includes everything except symlinks and sockets. +func CanOpen(mode FileMode) bool { + return mode.IsRegular() || mode.IsDir() || mode.IsNamedPipe() || mode.IsBlockDevice() || mode.IsCharacterDevice() +} + +// handle implements handler.handle. +func (t *Tlopen) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + ref.openedMu.Lock() + defer ref.openedMu.Unlock() + + // Has it been opened already? + if ref.opened || !CanOpen(ref.mode) { + return newErr(syscall.EINVAL) + } + + // Are flags valid? + flags := t.Flags &^ OpenFlagsIgnoreMask + if flags&^OpenFlagsModeMask != 0 { + return newErr(syscall.EINVAL) + } + + // Is this an attempt to open a directory as writable? Don't accept. + if ref.mode.IsDir() && flags != ReadOnly { + return newErr(syscall.EINVAL) + } + + var ( + qid QID + ioUnit uint32 + osFile *fd.FD + ) + if err := ref.safelyRead(func() (err error) { + // Has it been deleted already? + if ref.isDeleted() { + return syscall.EINVAL + } + + // Do the open. + osFile, qid, ioUnit, err = ref.file.Open(t.Flags) + return err + }); err != nil { + return newErr(err) + } + + // Mark file as opened and set open mode. + ref.opened = true + ref.openFlags = t.Flags + + return &Rlopen{QID: qid, IoUnit: ioUnit, File: osFile} +} + +func (t *Tlcreate) do(cs *connState, uid UID) (*Rlcreate, error) { + // Don't allow complex names. + if err := checkSafeName(t.Name); err != nil { + return nil, err + } + + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return nil, syscall.EBADF + } + defer ref.DecRef() + + var ( + osFile *fd.FD + nsf File + qid QID + ioUnit uint32 + newRef *fidRef + ) + if err := ref.safelyWrite(func() (err error) { + // Don't allow creation from non-directories or deleted directories. + if ref.isDeleted() || !ref.mode.IsDir() { + return syscall.EINVAL + } + + // Not allowed on open directories. + if _, opened := ref.OpenFlags(); opened { + return syscall.EINVAL + } + + // Do the create. + osFile, nsf, qid, ioUnit, err = ref.file.Create(t.Name, t.OpenFlags, t.Permissions, uid, t.GID) + if err != nil { + return err + } + + newRef = &fidRef{ + server: cs.server, + parent: ref, + file: nsf, + opened: true, + openFlags: t.OpenFlags, + mode: ModeRegular, + pathNode: ref.pathNode.pathNodeFor(t.Name), + } + ref.pathNode.addChild(newRef, t.Name) + ref.IncRef() // Acquire parent reference. + return nil + }); err != nil { + return nil, err + } + + // Replace the FID reference. + cs.InsertFID(t.FID, newRef) + + return &Rlcreate{Rlopen: Rlopen{QID: qid, IoUnit: ioUnit, File: osFile}}, nil +} + +// handle implements handler.handle. +func (t *Tlcreate) handle(cs *connState) message { + rlcreate, err := t.do(cs, NoUID) + if err != nil { + return newErr(err) + } + return rlcreate +} + +// handle implements handler.handle. +func (t *Tsymlink) handle(cs *connState) message { + rsymlink, err := t.do(cs, NoUID) + if err != nil { + return newErr(err) + } + return rsymlink +} + +func (t *Tsymlink) do(cs *connState, uid UID) (*Rsymlink, error) { + // Don't allow complex names. + if err := checkSafeName(t.Name); err != nil { + return nil, err + } + + // Lookup the FID. + ref, ok := cs.LookupFID(t.Directory) + if !ok { + return nil, syscall.EBADF + } + defer ref.DecRef() + + var qid QID + if err := ref.safelyWrite(func() (err error) { + // Don't allow symlinks from non-directories or deleted directories. + if ref.isDeleted() || !ref.mode.IsDir() { + return syscall.EINVAL + } + + // Not allowed on open directories. + if _, opened := ref.OpenFlags(); opened { + return syscall.EINVAL + } + + // Do the symlink. + qid, err = ref.file.Symlink(t.Target, t.Name, uid, t.GID) + return err + }); err != nil { + return nil, err + } + + return &Rsymlink{QID: qid}, nil +} + +// handle implements handler.handle. +func (t *Tlink) handle(cs *connState) message { + // Don't allow complex names. + if err := checkSafeName(t.Name); err != nil { + return newErr(err) + } + + // Lookup the FID. + ref, ok := cs.LookupFID(t.Directory) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + // Lookup the other FID. + refTarget, ok := cs.LookupFID(t.Target) + if !ok { + return newErr(syscall.EBADF) + } + defer refTarget.DecRef() + + if err := ref.safelyWrite(func() (err error) { + // Don't allow create links from non-directories or deleted directories. + if ref.isDeleted() || !ref.mode.IsDir() { + return syscall.EINVAL + } + + // Not allowed on open directories. + if _, opened := ref.OpenFlags(); opened { + return syscall.EINVAL + } + + // Do the link. + return ref.file.Link(refTarget.file, t.Name) + }); err != nil { + return newErr(err) + } + + return &Rlink{} +} + +// handle implements handler.handle. +func (t *Trenameat) handle(cs *connState) message { + // Don't allow complex names. + if err := checkSafeName(t.OldName); err != nil { + return newErr(err) + } + if err := checkSafeName(t.NewName); err != nil { + return newErr(err) + } + + // Lookup the FID. + ref, ok := cs.LookupFID(t.OldDirectory) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + // Lookup the other FID. + refTarget, ok := cs.LookupFID(t.NewDirectory) + if !ok { + return newErr(syscall.EBADF) + } + defer refTarget.DecRef() + + // Perform the rename holding the global lock. + if err := ref.safelyGlobal(func() (err error) { + // Don't allow renaming across deleted directories. + if ref.isDeleted() || !ref.mode.IsDir() || refTarget.isDeleted() || !refTarget.mode.IsDir() { + return syscall.EINVAL + } + + // Not allowed on open directories. + if _, opened := ref.OpenFlags(); opened { + return syscall.EINVAL + } + + // Is this the same file? If yes, short-circuit and return success. + if ref.pathNode == refTarget.pathNode && t.OldName == t.NewName { + return nil + } + + // Attempt the actual rename. + if err := ref.file.RenameAt(t.OldName, refTarget.file, t.NewName); err != nil { + return err + } + + // Update the path tree. + ref.renameChildTo(t.OldName, refTarget, t.NewName) + return nil + }); err != nil { + return newErr(err) + } + + return &Rrenameat{} +} + +// handle implements handler.handle. +func (t *Tunlinkat) handle(cs *connState) message { + // Don't allow complex names. + if err := checkSafeName(t.Name); err != nil { + return newErr(err) + } + + // Lookup the FID. + ref, ok := cs.LookupFID(t.Directory) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + if err := ref.safelyWrite(func() (err error) { + // Don't allow deletion from non-directories or deleted directories. + if ref.isDeleted() || !ref.mode.IsDir() { + return syscall.EINVAL + } + + // Not allowed on open directories. + if _, opened := ref.OpenFlags(); opened { + return syscall.EINVAL + } + + // Before we do the unlink itself, we need to ensure that there + // are no operations in flight on associated path node. The + // child's path node lock must be held to ensure that the + // unlink at marking the child deleted below is atomic with + // respect to any other read or write operations. + // + // This is one case where we have a lock ordering issue, but + // since we always acquire deeper in the hierarchy, we know + // that we are free of lock cycles. + childPathNode := ref.pathNode.pathNodeFor(t.Name) + childPathNode.mu.Lock() + defer childPathNode.mu.Unlock() + + // Do the unlink. + err = ref.file.UnlinkAt(t.Name, t.Flags) + if err != nil { + return err + } + + // Mark the path as deleted. + ref.markChildDeleted(t.Name) + return nil + }); err != nil { + return newErr(err) + } + + return &Runlinkat{} +} + +// handle implements handler.handle. +func (t *Trename) handle(cs *connState) message { + // Don't allow complex names. + if err := checkSafeName(t.Name); err != nil { + return newErr(err) + } + + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + // Lookup the target. + refTarget, ok := cs.LookupFID(t.Directory) + if !ok { + return newErr(syscall.EBADF) + } + defer refTarget.DecRef() + + if err := ref.safelyGlobal(func() (err error) { + // Don't allow a root rename. + if ref.isRoot() { + return syscall.EINVAL + } + + // Don't allow renaming deleting entries, or target non-directories. + if ref.isDeleted() || refTarget.isDeleted() || !refTarget.mode.IsDir() { + return syscall.EINVAL + } + + // If the parent is deleted, but we not, something is seriously wrong. + // It's fail to die at this point with an assertion failure. + if ref.parent.isDeleted() { + panic(fmt.Sprintf("parent %+v deleted, child %+v is not", ref.parent, ref)) + } + + // N.B. The rename operation is allowed to proceed on open files. It + // does impact the state of its parent, but this is merely a sanity + // check in any case, and the operation is safe. There may be other + // files corresponding to the same path that are renamed anyways. + + // Check for the exact same file and short-circuit. + oldName := ref.parent.pathNode.nameFor(ref) + if ref.parent.pathNode == refTarget.pathNode && oldName == t.Name { + return nil + } + + // Call the rename method on the parent. + if err := ref.parent.file.RenameAt(oldName, refTarget.file, t.Name); err != nil { + return err + } + + // Update the path tree. + ref.parent.renameChildTo(oldName, refTarget, t.Name) + return nil + }); err != nil { + return newErr(err) + } + + return &Rrename{} +} + +// handle implements handler.handle. +func (t *Treadlink) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + var target string + if err := ref.safelyRead(func() (err error) { + // Don't allow readlink on deleted files. There is no need to + // check if this file is opened because symlinks cannot be + // opened. + if ref.isDeleted() || !ref.mode.IsSymlink() { + return syscall.EINVAL + } + + // Do the read. + target, err = ref.file.Readlink() + return err + }); err != nil { + return newErr(err) + } + + return &Rreadlink{target} +} + +// handle implements handler.handle. +func (t *Tread) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + // Constrain the size of the read buffer. + if int(t.Count) > int(maximumLength) { + return newErr(syscall.ENOBUFS) + } + + var ( + data = make([]byte, t.Count) + n int + ) + if err := ref.safelyRead(func() (err error) { + // Has it been opened already? + openFlags, opened := ref.OpenFlags() + if !opened { + return syscall.EINVAL + } + + // Can it be read? Check permissions. + if openFlags&OpenFlagsModeMask == WriteOnly { + return syscall.EPERM + } + + n, err = ref.file.ReadAt(data, t.Offset) + return err + }); err != nil && err != io.EOF { + return newErr(err) + } + + return &Rread{Data: data[:n]} +} + +// handle implements handler.handle. +func (t *Twrite) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + var n int + if err := ref.safelyRead(func() (err error) { + // Has it been opened already? + openFlags, opened := ref.OpenFlags() + if !opened { + return syscall.EINVAL + } + + // Can it be written? Check permissions. + if openFlags&OpenFlagsModeMask == ReadOnly { + return syscall.EPERM + } + + n, err = ref.file.WriteAt(t.Data, t.Offset) + return err + }); err != nil { + return newErr(err) + } + + return &Rwrite{Count: uint32(n)} +} + +// handle implements handler.handle. +func (t *Tmknod) handle(cs *connState) message { + rmknod, err := t.do(cs, NoUID) + if err != nil { + return newErr(err) + } + return rmknod +} + +func (t *Tmknod) do(cs *connState, uid UID) (*Rmknod, error) { + // Don't allow complex names. + if err := checkSafeName(t.Name); err != nil { + return nil, err + } + + // Lookup the FID. + ref, ok := cs.LookupFID(t.Directory) + if !ok { + return nil, syscall.EBADF + } + defer ref.DecRef() + + var qid QID + if err := ref.safelyWrite(func() (err error) { + // Don't allow mknod on deleted files. + if ref.isDeleted() || !ref.mode.IsDir() { + return syscall.EINVAL + } + + // Not allowed on open directories. + if _, opened := ref.OpenFlags(); opened { + return syscall.EINVAL + } + + // Do the mknod. + qid, err = ref.file.Mknod(t.Name, t.Mode, t.Major, t.Minor, uid, t.GID) + return err + }); err != nil { + return nil, err + } + + return &Rmknod{QID: qid}, nil +} + +// handle implements handler.handle. +func (t *Tmkdir) handle(cs *connState) message { + rmkdir, err := t.do(cs, NoUID) + if err != nil { + return newErr(err) + } + return rmkdir +} + +func (t *Tmkdir) do(cs *connState, uid UID) (*Rmkdir, error) { + // Don't allow complex names. + if err := checkSafeName(t.Name); err != nil { + return nil, err + } + + // Lookup the FID. + ref, ok := cs.LookupFID(t.Directory) + if !ok { + return nil, syscall.EBADF + } + defer ref.DecRef() + + var qid QID + if err := ref.safelyWrite(func() (err error) { + // Don't allow mkdir on deleted files. + if ref.isDeleted() || !ref.mode.IsDir() { + return syscall.EINVAL + } + + // Not allowed on open directories. + if _, opened := ref.OpenFlags(); opened { + return syscall.EINVAL + } + + // Do the mkdir. + qid, err = ref.file.Mkdir(t.Name, t.Permissions, uid, t.GID) + return err + }); err != nil { + return nil, err + } + + return &Rmkdir{QID: qid}, nil +} + +// handle implements handler.handle. +func (t *Tgetattr) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + // We allow getattr on deleted files. Depending on the backing + // implementation, it's possible that races exist that might allow + // fetching attributes of other files. But we need to generally allow + // refreshing attributes and this is a minor leak, if at all. + + var ( + qid QID + valid AttrMask + attr Attr + ) + if err := ref.safelyRead(func() (err error) { + qid, valid, attr, err = ref.file.GetAttr(t.AttrMask) + return err + }); err != nil { + return newErr(err) + } + + return &Rgetattr{QID: qid, Valid: valid, Attr: attr} +} + +// handle implements handler.handle. +func (t *Tsetattr) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + if err := ref.safelyWrite(func() error { + // We don't allow setattr on files that have been deleted. + // This might be technically incorrect, as it's possible that + // there were multiple links and you can still change the + // corresponding inode information. + if ref.isDeleted() { + return syscall.EINVAL + } + + // Set the attributes. + return ref.file.SetAttr(t.Valid, t.SetAttr) + }); err != nil { + return newErr(err) + } + + return &Rsetattr{} +} + +// handle implements handler.handle. +func (t *Tallocate) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + if err := ref.safelyWrite(func() error { + // Has it been opened already? + openFlags, opened := ref.OpenFlags() + if !opened { + return syscall.EINVAL + } + + // Can it be written? Check permissions. + if openFlags&OpenFlagsModeMask == ReadOnly { + return syscall.EBADF + } + + // We don't allow allocate on files that have been deleted. + if ref.isDeleted() { + return syscall.EINVAL + } + + return ref.file.Allocate(t.Mode, t.Offset, t.Length) + }); err != nil { + return newErr(err) + } + + return &Rallocate{} +} + +// handle implements handler.handle. +func (t *Txattrwalk) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + // We don't support extended attributes. + return newErr(syscall.ENODATA) +} + +// handle implements handler.handle. +func (t *Txattrcreate) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + // We don't support extended attributes. + return newErr(syscall.ENOSYS) +} + +// handle implements handler.handle. +func (t *Treaddir) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.Directory) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + var entries []Dirent + if err := ref.safelyRead(func() (err error) { + // Don't allow reading deleted directories. + if ref.isDeleted() || !ref.mode.IsDir() { + return syscall.EINVAL + } + + // Has it been opened already? + if _, opened := ref.OpenFlags(); !opened { + return syscall.EINVAL + } + + // Read the entries. + entries, err = ref.file.Readdir(t.Offset, t.Count) + if err != nil && err != io.EOF { + return err + } + return nil + }); err != nil { + return newErr(err) + } + + return &Rreaddir{Count: t.Count, Entries: entries} +} + +// handle implements handler.handle. +func (t *Tfsync) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + if err := ref.safelyRead(func() (err error) { + // Has it been opened already? + if _, opened := ref.OpenFlags(); !opened { + return syscall.EINVAL + } + + // Perform the sync. + return ref.file.FSync() + }); err != nil { + return newErr(err) + } + + return &Rfsync{} +} + +// handle implements handler.handle. +func (t *Tstatfs) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + st, err := ref.file.StatFS() + if err != nil { + return newErr(err) + } + + return &Rstatfs{st} +} + +// handle implements handler.handle. +func (t *Tflushf) handle(cs *connState) message { + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + if err := ref.safelyRead(ref.file.Flush); err != nil { + return newErr(err) + } + + return &Rflushf{} +} + +// walkOne walks zero or one path elements. +// +// The slice passed as qids is append and returned. +func walkOne(qids []QID, from File, names []string, getattr bool) ([]QID, File, AttrMask, Attr, error) { + if len(names) > 1 { + // We require exactly zero or one elements. + return nil, nil, AttrMask{}, Attr{}, syscall.EINVAL + } + var ( + localQIDs []QID + sf File + valid AttrMask + attr Attr + err error + ) + switch { + case getattr: + localQIDs, sf, valid, attr, err = from.WalkGetAttr(names) + // Can't put fallthrough in the if because Go. + if err != syscall.ENOSYS { + break + } + fallthrough + default: + localQIDs, sf, err = from.Walk(names) + if err != nil { + // No way to walk this element. + break + } + if getattr { + _, valid, attr, err = sf.GetAttr(AttrMaskAll()) + if err != nil { + // Don't leak the file. + sf.Close() + } + } + } + if err != nil { + // Error walking, don't return anything. + return nil, nil, AttrMask{}, Attr{}, err + } + if len(localQIDs) != 1 { + // Expected a single QID. + sf.Close() + return nil, nil, AttrMask{}, Attr{}, syscall.EINVAL + } + return append(qids, localQIDs...), sf, valid, attr, nil +} + +// doWalk walks from a given fidRef. +// +// This enforces that all intermediate nodes are walkable (directories). The +// fidRef returned (newRef) has a reference associated with it that is now +// owned by the caller and must be handled appropriately. +func doWalk(cs *connState, ref *fidRef, names []string, getattr bool) (qids []QID, newRef *fidRef, valid AttrMask, attr Attr, err error) { + // Check the names. + for _, name := range names { + err = checkSafeName(name) + if err != nil { + return + } + } + + // Has it been opened already? + if _, opened := ref.OpenFlags(); opened { + err = syscall.EBUSY + return + } + + // Is this an empty list? Handle specially. We don't actually need to + // validate anything since this is always permitted. + if len(names) == 0 { + var sf File // Temporary. + if err := ref.maybeParent().safelyRead(func() (err error) { + // Clone the single element. + qids, sf, valid, attr, err = walkOne(nil, ref.file, nil, getattr) + if err != nil { + return err + } + + newRef = &fidRef{ + server: cs.server, + parent: ref.parent, + file: sf, + mode: ref.mode, + pathNode: ref.pathNode, + + // For the clone case, the cloned fid must + // preserve the deleted property of the + // original FID. + deleted: ref.deleted, + } + if !ref.isRoot() { + if !newRef.isDeleted() { + // Add only if a non-root node; the same node. + ref.parent.pathNode.addChild(newRef, ref.parent.pathNode.nameFor(ref)) + } + ref.parent.IncRef() // Acquire parent reference. + } + // doWalk returns a reference. + newRef.IncRef() + return nil + }); err != nil { + return nil, nil, AttrMask{}, Attr{}, err + } + // Do not return the new QID. + return nil, newRef, valid, attr, nil + } + + // Do the walk, one element at a time. + walkRef := ref + walkRef.IncRef() + for i := 0; i < len(names); i++ { + // We won't allow beyond past symlinks; stop here if this isn't + // a proper directory and we have additional paths to walk. + if !walkRef.mode.IsDir() { + walkRef.DecRef() // Drop walk reference; no lock required. + return nil, nil, AttrMask{}, Attr{}, syscall.EINVAL + } + + var sf File // Temporary. + if err := walkRef.safelyRead(func() (err error) { + // Pass getattr = true to walkOne since we need the file type for + // newRef. + qids, sf, valid, attr, err = walkOne(qids, walkRef.file, names[i:i+1], true) + if err != nil { + return err + } + + // Note that we don't need to acquire a lock on any of + // these individual instances. That's because they are + // not actually addressable via a FID. They are + // anonymous. They exist in the tree for tracking + // purposes. + newRef := &fidRef{ + server: cs.server, + parent: walkRef, + file: sf, + mode: attr.Mode.FileType(), + pathNode: walkRef.pathNode.pathNodeFor(names[i]), + } + walkRef.pathNode.addChild(newRef, names[i]) + // We allow our walk reference to become the new parent + // reference here and so we don't IncRef. Instead, just + // set walkRef to the newRef above and acquire a new + // walk reference. + walkRef = newRef + walkRef.IncRef() + return nil + }); err != nil { + walkRef.DecRef() // Drop the old walkRef. + return nil, nil, AttrMask{}, Attr{}, err + } + } + + // Success. + return qids, walkRef, valid, attr, nil +} + +// handle implements handler.handle. +func (t *Twalk) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + // Do the walk. + qids, newRef, _, _, err := doWalk(cs, ref, t.Names, false) + if err != nil { + return newErr(err) + } + defer newRef.DecRef() + + // Install the new FID. + cs.InsertFID(t.NewFID, newRef) + return &Rwalk{QIDs: qids} +} + +// handle implements handler.handle. +func (t *Twalkgetattr) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + // Do the walk. + qids, newRef, valid, attr, err := doWalk(cs, ref, t.Names, true) + if err != nil { + return newErr(err) + } + defer newRef.DecRef() + + // Install the new FID. + cs.InsertFID(t.NewFID, newRef) + return &Rwalkgetattr{QIDs: qids, Valid: valid, Attr: attr} +} + +// handle implements handler.handle. +func (t *Tucreate) handle(cs *connState) message { + rlcreate, err := t.Tlcreate.do(cs, t.UID) + if err != nil { + return newErr(err) + } + return &Rucreate{*rlcreate} +} + +// handle implements handler.handle. +func (t *Tumkdir) handle(cs *connState) message { + rmkdir, err := t.Tmkdir.do(cs, t.UID) + if err != nil { + return newErr(err) + } + return &Rumkdir{*rmkdir} +} + +// handle implements handler.handle. +func (t *Tusymlink) handle(cs *connState) message { + rsymlink, err := t.Tsymlink.do(cs, t.UID) + if err != nil { + return newErr(err) + } + return &Rusymlink{*rsymlink} +} + +// handle implements handler.handle. +func (t *Tumknod) handle(cs *connState) message { + rmknod, err := t.Tmknod.do(cs, t.UID) + if err != nil { + return newErr(err) + } + return &Rumknod{*rmknod} +} + +// handle implements handler.handle. +func (t *Tlconnect) handle(cs *connState) message { + // Lookup the FID. + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + var osFile *fd.FD + if err := ref.safelyRead(func() (err error) { + // Don't allow connecting to deleted files. + if ref.isDeleted() || !ref.mode.IsSocket() { + return syscall.EINVAL + } + + // Do the connect. + osFile, err = ref.file.Connect(t.Flags) + return err + }); err != nil { + return newErr(err) + } + + return &Rlconnect{File: osFile} +} diff --git a/pkg/p9/messages.go b/pkg/p9/messages.go new file mode 100644 index 000000000..75d6bc832 --- /dev/null +++ b/pkg/p9/messages.go @@ -0,0 +1,2359 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package p9 + +import ( + "fmt" + "math" + + "gvisor.googlesource.com/gvisor/pkg/fd" +) + +// ErrInvalidMsgType is returned when an unsupported message type is found. +type ErrInvalidMsgType struct { + MsgType +} + +// Error returns a useful string. +func (e *ErrInvalidMsgType) Error() string { + return fmt.Sprintf("invalid message type: %d", e.MsgType) +} + +// message is a generic 9P message. +type message interface { + encoder + fmt.Stringer + + // Type returns the message type number. + Type() MsgType +} + +// payloader is a special message which may include an inline payload. +type payloader interface { + // FixedSize returns the size of the fixed portion of this message. + FixedSize() uint32 + + // Payload returns the payload for sending. + Payload() []byte + + // SetPayload returns the decoded message. + // + // This is going to be total message size - FixedSize. But this should + // be validated during Decode, which will be called after SetPayload. + SetPayload([]byte) +} + +// filer is a message capable of passing a file. +type filer interface { + // FilePayload returns the file payload. + FilePayload() *fd.FD + + // SetFilePayload sets the file payload. + SetFilePayload(*fd.FD) +} + +// Tversion is a version request. +type Tversion struct { + // MSize is the message size to use. + MSize uint32 + + // Version is the version string. + // + // For this implementation, this must be 9P2000.L. + Version string +} + +// Decode implements encoder.Decode. +func (t *Tversion) Decode(b *buffer) { + t.MSize = b.Read32() + t.Version = b.ReadString() +} + +// Encode implements encoder.Encode. +func (t *Tversion) Encode(b *buffer) { + b.Write32(t.MSize) + b.WriteString(t.Version) +} + +// Type implements message.Type. +func (*Tversion) Type() MsgType { + return MsgTversion +} + +// String implements fmt.Stringer. +func (t *Tversion) String() string { + return fmt.Sprintf("Tversion{MSize: %d, Version: %s}", t.MSize, t.Version) +} + +// Rversion is a version response. +type Rversion struct { + // MSize is the negotiated size. + MSize uint32 + + // Version is the negotiated version. + Version string +} + +// Decode implements encoder.Decode. +func (r *Rversion) Decode(b *buffer) { + r.MSize = b.Read32() + r.Version = b.ReadString() +} + +// Encode implements encoder.Encode. +func (r *Rversion) Encode(b *buffer) { + b.Write32(r.MSize) + b.WriteString(r.Version) +} + +// Type implements message.Type. +func (*Rversion) Type() MsgType { + return MsgRversion +} + +// String implements fmt.Stringer. +func (r *Rversion) String() string { + return fmt.Sprintf("Rversion{MSize: %d, Version: %s}", r.MSize, r.Version) +} + +// Tflush is a flush request. +type Tflush struct { + // OldTag is the tag to wait on. + OldTag Tag +} + +// Decode implements encoder.Decode. +func (t *Tflush) Decode(b *buffer) { + t.OldTag = b.ReadTag() +} + +// Encode implements encoder.Encode. +func (t *Tflush) Encode(b *buffer) { + b.WriteTag(t.OldTag) +} + +// Type implements message.Type. +func (*Tflush) Type() MsgType { + return MsgTflush +} + +// String implements fmt.Stringer. +func (t *Tflush) String() string { + return fmt.Sprintf("Tflush{OldTag: %d}", t.OldTag) +} + +// Rflush is a flush response. +type Rflush struct { +} + +// Decode implements encoder.Decode. +func (*Rflush) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (*Rflush) Encode(b *buffer) { +} + +// Type implements message.Type. +func (*Rflush) Type() MsgType { + return MsgRflush +} + +// String implements fmt.Stringer. +func (r *Rflush) String() string { + return fmt.Sprintf("RFlush{}") +} + +// Twalk is a walk request. +type Twalk struct { + // FID is the FID to be walked. + FID FID + + // NewFID is the resulting FID. + NewFID FID + + // Names are the set of names to be walked. + Names []string +} + +// Decode implements encoder.Decode. +func (t *Twalk) Decode(b *buffer) { + t.FID = b.ReadFID() + t.NewFID = b.ReadFID() + n := b.Read16() + t.Names = t.Names[:0] + for i := 0; i < int(n); i++ { + t.Names = append(t.Names, b.ReadString()) + } +} + +// Encode implements encoder.Encode. +func (t *Twalk) Encode(b *buffer) { + b.WriteFID(t.FID) + b.WriteFID(t.NewFID) + b.Write16(uint16(len(t.Names))) + for _, name := range t.Names { + b.WriteString(name) + } +} + +// Type implements message.Type. +func (*Twalk) Type() MsgType { + return MsgTwalk +} + +// String implements fmt.Stringer. +func (t *Twalk) String() string { + return fmt.Sprintf("Twalk{FID: %d, NewFID: %d, Names: %v}", t.FID, t.NewFID, t.Names) +} + +// Rwalk is a walk response. +type Rwalk struct { + // QIDs are the set of QIDs returned. + QIDs []QID +} + +// Decode implements encoder.Decode. +func (r *Rwalk) Decode(b *buffer) { + n := b.Read16() + r.QIDs = r.QIDs[:0] + for i := 0; i < int(n); i++ { + var q QID + q.Decode(b) + r.QIDs = append(r.QIDs, q) + } +} + +// Encode implements encoder.Encode. +func (r *Rwalk) Encode(b *buffer) { + b.Write16(uint16(len(r.QIDs))) + for _, q := range r.QIDs { + q.Encode(b) + } +} + +// Type implements message.Type. +func (*Rwalk) Type() MsgType { + return MsgRwalk +} + +// String implements fmt.Stringer. +func (r *Rwalk) String() string { + return fmt.Sprintf("Rwalk{QIDs: %v}", r.QIDs) +} + +// Tclunk is a close request. +type Tclunk struct { + // FID is the FID to be closed. + FID FID +} + +// Decode implements encoder.Decode. +func (t *Tclunk) Decode(b *buffer) { + t.FID = b.ReadFID() +} + +// Encode implements encoder.Encode. +func (t *Tclunk) Encode(b *buffer) { + b.WriteFID(t.FID) +} + +// Type implements message.Type. +func (*Tclunk) Type() MsgType { + return MsgTclunk +} + +// String implements fmt.Stringer. +func (t *Tclunk) String() string { + return fmt.Sprintf("Tclunk{FID: %d}", t.FID) +} + +// Rclunk is a close response. +type Rclunk struct { +} + +// Decode implements encoder.Decode. +func (*Rclunk) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (*Rclunk) Encode(b *buffer) { +} + +// Type implements message.Type. +func (*Rclunk) Type() MsgType { + return MsgRclunk +} + +// String implements fmt.Stringer. +func (r *Rclunk) String() string { + return fmt.Sprintf("Rclunk{}") +} + +// Tremove is a remove request. +// +// This will eventually be replaced by Tunlinkat. +type Tremove struct { + // FID is the FID to be removed. + FID FID +} + +// Decode implements encoder.Decode. +func (t *Tremove) Decode(b *buffer) { + t.FID = b.ReadFID() +} + +// Encode implements encoder.Encode. +func (t *Tremove) Encode(b *buffer) { + b.WriteFID(t.FID) +} + +// Type implements message.Type. +func (*Tremove) Type() MsgType { + return MsgTremove +} + +// String implements fmt.Stringer. +func (t *Tremove) String() string { + return fmt.Sprintf("Tremove{FID: %d}", t.FID) +} + +// Rremove is a remove response. +type Rremove struct { +} + +// Decode implements encoder.Decode. +func (*Rremove) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (*Rremove) Encode(b *buffer) { +} + +// Type implements message.Type. +func (*Rremove) Type() MsgType { + return MsgRremove +} + +// String implements fmt.Stringer. +func (r *Rremove) String() string { + return fmt.Sprintf("Rremove{}") +} + +// Rlerror is an error response. +// +// Note that this replaces the error code used in 9p. +type Rlerror struct { + Error uint32 +} + +// Decode implements encoder.Decode. +func (r *Rlerror) Decode(b *buffer) { + r.Error = b.Read32() +} + +// Encode implements encoder.Encode. +func (r *Rlerror) Encode(b *buffer) { + b.Write32(r.Error) +} + +// Type implements message.Type. +func (*Rlerror) Type() MsgType { + return MsgRlerror +} + +// String implements fmt.Stringer. +func (r *Rlerror) String() string { + return fmt.Sprintf("Rlerror{Error: %d}", r.Error) +} + +// Tauth is an authentication request. +type Tauth struct { + // AuthenticationFID is the FID to attach the authentication result. + AuthenticationFID FID + + // UserName is the user to attach. + UserName string + + // AttachName is the attach name. + AttachName string + + // UserID is the numeric identifier for UserName. + UID UID +} + +// Decode implements encoder.Decode. +func (t *Tauth) Decode(b *buffer) { + t.AuthenticationFID = b.ReadFID() + t.UserName = b.ReadString() + t.AttachName = b.ReadString() + t.UID = b.ReadUID() +} + +// Encode implements encoder.Encode. +func (t *Tauth) Encode(b *buffer) { + b.WriteFID(t.AuthenticationFID) + b.WriteString(t.UserName) + b.WriteString(t.AttachName) + b.WriteUID(t.UID) +} + +// Type implements message.Type. +func (*Tauth) Type() MsgType { + return MsgTauth +} + +// String implements fmt.Stringer. +func (t *Tauth) String() string { + return fmt.Sprintf("Tauth{AuthFID: %d, UserName: %s, AttachName: %s, UID: %d", t.AuthenticationFID, t.UserName, t.AttachName, t.UID) +} + +// Rauth is an authentication response. +// +// Encode, Decode and Length are inherited directly from QID. +type Rauth struct { + QID +} + +// Type implements message.Type. +func (*Rauth) Type() MsgType { + return MsgRauth +} + +// String implements fmt.Stringer. +func (r *Rauth) String() string { + return fmt.Sprintf("Rauth{QID: %s}", r.QID) +} + +// Tattach is an attach request. +type Tattach struct { + // FID is the FID to be attached. + FID FID + + // Auth is the embedded authentication request. + // + // See client.Attach for information regarding authentication. + Auth Tauth +} + +// Decode implements encoder.Decode. +func (t *Tattach) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Auth.Decode(b) +} + +// Encode implements encoder.Encode. +func (t *Tattach) Encode(b *buffer) { + b.WriteFID(t.FID) + t.Auth.Encode(b) +} + +// Type implements message.Type. +func (*Tattach) Type() MsgType { + return MsgTattach +} + +// String implements fmt.Stringer. +func (t *Tattach) String() string { + return fmt.Sprintf("Tattach{FID: %d, AuthFID: %d, UserName: %s, AttachName: %s, UID: %d}", t.FID, t.Auth.AuthenticationFID, t.Auth.UserName, t.Auth.AttachName, t.Auth.UID) +} + +// Rattach is an attach response. +type Rattach struct { + QID +} + +// Type implements message.Type. +func (*Rattach) Type() MsgType { + return MsgRattach +} + +// String implements fmt.Stringer. +func (r *Rattach) String() string { + return fmt.Sprintf("Rattach{QID: %s}", r.QID) +} + +// Tlopen is an open request. +type Tlopen struct { + // FID is the FID to be opened. + FID FID + + // Flags are the open flags. + Flags OpenFlags +} + +// Decode implements encoder.Decode. +func (t *Tlopen) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Flags = b.ReadOpenFlags() +} + +// Encode implements encoder.Encode. +func (t *Tlopen) Encode(b *buffer) { + b.WriteFID(t.FID) + b.WriteOpenFlags(t.Flags) +} + +// Type implements message.Type. +func (*Tlopen) Type() MsgType { + return MsgTlopen +} + +// String implements fmt.Stringer. +func (t *Tlopen) String() string { + return fmt.Sprintf("Tlopen{FID: %d, Flags: %v}", t.FID, t.Flags) +} + +// Rlopen is a open response. +type Rlopen struct { + // QID is the file's QID. + QID QID + + // IoUnit is the recommended I/O unit. + IoUnit uint32 + + // File may be attached via the socket. + // + // This is an extension specific to this package. + File *fd.FD +} + +// Decode implements encoder.Decode. +func (r *Rlopen) Decode(b *buffer) { + r.QID.Decode(b) + r.IoUnit = b.Read32() +} + +// Encode implements encoder.Encode. +func (r *Rlopen) Encode(b *buffer) { + r.QID.Encode(b) + b.Write32(r.IoUnit) +} + +// Type implements message.Type. +func (*Rlopen) Type() MsgType { + return MsgRlopen +} + +// FilePayload returns the file payload. +func (r *Rlopen) FilePayload() *fd.FD { + return r.File +} + +// SetFilePayload sets the received file. +func (r *Rlopen) SetFilePayload(file *fd.FD) { + r.File = file +} + +// String implements fmt.Stringer. +func (r *Rlopen) String() string { + return fmt.Sprintf("Rlopen{QID: %s, IoUnit: %d, File: %v}", r.QID, r.IoUnit, r.File) +} + +// Tlcreate is a create request. +type Tlcreate struct { + // FID is the parent FID. + // + // This becomes the new file. + FID FID + + // Name is the file name to create. + Name string + + // Mode is the open mode (O_RDWR, etc.). + // + // Note that flags like O_TRUNC are ignored, as is O_EXCL. All + // create operations are exclusive. + OpenFlags OpenFlags + + // Permissions is the set of permission bits. + Permissions FileMode + + // GID is the group ID to use for creating the file. + GID GID +} + +// Decode implements encoder.Decode. +func (t *Tlcreate) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Name = b.ReadString() + t.OpenFlags = b.ReadOpenFlags() + t.Permissions = b.ReadPermissions() + t.GID = b.ReadGID() +} + +// Encode implements encoder.Encode. +func (t *Tlcreate) Encode(b *buffer) { + b.WriteFID(t.FID) + b.WriteString(t.Name) + b.WriteOpenFlags(t.OpenFlags) + b.WritePermissions(t.Permissions) + b.WriteGID(t.GID) +} + +// Type implements message.Type. +func (*Tlcreate) Type() MsgType { + return MsgTlcreate +} + +// String implements fmt.Stringer. +func (t *Tlcreate) String() string { + return fmt.Sprintf("Tlcreate{FID: %d, Name: %s, OpenFlags: %s, Permissions: 0o%o, GID: %d}", t.FID, t.Name, t.OpenFlags, t.Permissions, t.GID) +} + +// Rlcreate is a create response. +// +// The Encode, Decode, etc. methods are inherited from Rlopen. +type Rlcreate struct { + Rlopen +} + +// Type implements message.Type. +func (*Rlcreate) Type() MsgType { + return MsgRlcreate +} + +// String implements fmt.Stringer. +func (r *Rlcreate) String() string { + return fmt.Sprintf("Rlcreate{QID: %s, IoUnit: %d, File: %v}", r.QID, r.IoUnit, r.File) +} + +// Tsymlink is a symlink request. +type Tsymlink struct { + // Directory is the directory FID. + Directory FID + + // Name is the new in the directory. + Name string + + // Target is the symlink target. + Target string + + // GID is the owning group. + GID GID +} + +// Decode implements encoder.Decode. +func (t *Tsymlink) Decode(b *buffer) { + t.Directory = b.ReadFID() + t.Name = b.ReadString() + t.Target = b.ReadString() + t.GID = b.ReadGID() +} + +// Encode implements encoder.Encode. +func (t *Tsymlink) Encode(b *buffer) { + b.WriteFID(t.Directory) + b.WriteString(t.Name) + b.WriteString(t.Target) + b.WriteGID(t.GID) +} + +// Type implements message.Type. +func (*Tsymlink) Type() MsgType { + return MsgTsymlink +} + +// String implements fmt.Stringer. +func (t *Tsymlink) String() string { + return fmt.Sprintf("Tsymlink{DirectoryFID: %d, Name: %s, Target: %s, GID: %d}", t.Directory, t.Name, t.Target, t.GID) +} + +// Rsymlink is a symlink response. +type Rsymlink struct { + // QID is the new symlink's QID. + QID QID +} + +// Decode implements encoder.Decode. +func (r *Rsymlink) Decode(b *buffer) { + r.QID.Decode(b) +} + +// Encode implements encoder.Encode. +func (r *Rsymlink) Encode(b *buffer) { + r.QID.Encode(b) +} + +// Type implements message.Type. +func (*Rsymlink) Type() MsgType { + return MsgRsymlink +} + +// String implements fmt.Stringer. +func (r *Rsymlink) String() string { + return fmt.Sprintf("Rsymlink{QID: %s}", r.QID) +} + +// Tlink is a link request. +type Tlink struct { + // Directory is the directory to contain the link. + Directory FID + + // FID is the target. + Target FID + + // Name is the new source name. + Name string +} + +// Decode implements encoder.Decode. +func (t *Tlink) Decode(b *buffer) { + t.Directory = b.ReadFID() + t.Target = b.ReadFID() + t.Name = b.ReadString() +} + +// Encode implements encoder.Encode. +func (t *Tlink) Encode(b *buffer) { + b.WriteFID(t.Directory) + b.WriteFID(t.Target) + b.WriteString(t.Name) +} + +// Type implements message.Type. +func (*Tlink) Type() MsgType { + return MsgTlink +} + +// String implements fmt.Stringer. +func (t *Tlink) String() string { + return fmt.Sprintf("Tlink{DirectoryFID: %d, TargetFID: %d, Name: %s}", t.Directory, t.Target, t.Name) +} + +// Rlink is a link response. +type Rlink struct { +} + +// Type implements message.Type. +func (*Rlink) Type() MsgType { + return MsgRlink +} + +// Decode implements encoder.Decode. +func (*Rlink) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (*Rlink) Encode(b *buffer) { +} + +// String implements fmt.Stringer. +func (r *Rlink) String() string { + return fmt.Sprintf("Rlink{}") +} + +// Trenameat is a rename request. +type Trenameat struct { + // OldDirectory is the source directory. + OldDirectory FID + + // OldName is the source file name. + OldName string + + // NewDirectory is the target directory. + NewDirectory FID + + // NewName is the new file name. + NewName string +} + +// Decode implements encoder.Decode. +func (t *Trenameat) Decode(b *buffer) { + t.OldDirectory = b.ReadFID() + t.OldName = b.ReadString() + t.NewDirectory = b.ReadFID() + t.NewName = b.ReadString() +} + +// Encode implements encoder.Encode. +func (t *Trenameat) Encode(b *buffer) { + b.WriteFID(t.OldDirectory) + b.WriteString(t.OldName) + b.WriteFID(t.NewDirectory) + b.WriteString(t.NewName) +} + +// Type implements message.Type. +func (*Trenameat) Type() MsgType { + return MsgTrenameat +} + +// String implements fmt.Stringer. +func (t *Trenameat) String() string { + return fmt.Sprintf("TrenameAt{OldDirectoryFID: %d, OldName: %s, NewDirectoryFID: %d, NewName: %s}", t.OldDirectory, t.OldName, t.NewDirectory, t.NewName) +} + +// Rrenameat is a rename response. +type Rrenameat struct { +} + +// Decode implements encoder.Decode. +func (*Rrenameat) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (*Rrenameat) Encode(b *buffer) { +} + +// Type implements message.Type. +func (*Rrenameat) Type() MsgType { + return MsgRrenameat +} + +// String implements fmt.Stringer. +func (r *Rrenameat) String() string { + return fmt.Sprintf("Rrenameat{}") +} + +// Tunlinkat is an unlink request. +type Tunlinkat struct { + // Directory is the originating directory. + Directory FID + + // Name is the name of the entry to unlink. + Name string + + // Flags are extra flags (e.g. O_DIRECTORY). These are not interpreted by p9. + Flags uint32 +} + +// Decode implements encoder.Decode. +func (t *Tunlinkat) Decode(b *buffer) { + t.Directory = b.ReadFID() + t.Name = b.ReadString() + t.Flags = b.Read32() +} + +// Encode implements encoder.Encode. +func (t *Tunlinkat) Encode(b *buffer) { + b.WriteFID(t.Directory) + b.WriteString(t.Name) + b.Write32(t.Flags) +} + +// Type implements message.Type. +func (*Tunlinkat) Type() MsgType { + return MsgTunlinkat +} + +// String implements fmt.Stringer. +func (t *Tunlinkat) String() string { + return fmt.Sprintf("Tunlinkat{DirectoryFID: %d, Name: %s, Flags: 0x%X}", t.Directory, t.Name, t.Flags) +} + +// Runlinkat is an unlink response. +type Runlinkat struct { +} + +// Decode implements encoder.Decode. +func (*Runlinkat) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (*Runlinkat) Encode(b *buffer) { +} + +// Type implements message.Type. +func (*Runlinkat) Type() MsgType { + return MsgRunlinkat +} + +// String implements fmt.Stringer. +func (r *Runlinkat) String() string { + return fmt.Sprintf("Runlinkat{}") +} + +// Trename is a rename request. +// +// Note that this generally isn't used anymore, and ideally all rename calls +// should Trenameat below. +type Trename struct { + // FID is the FID to rename. + FID FID + + // Directory is the target directory. + Directory FID + + // Name is the new file name. + Name string +} + +// Decode implements encoder.Decode. +func (t *Trename) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Directory = b.ReadFID() + t.Name = b.ReadString() +} + +// Encode implements encoder.Encode. +func (t *Trename) Encode(b *buffer) { + b.WriteFID(t.FID) + b.WriteFID(t.Directory) + b.WriteString(t.Name) +} + +// Type implements message.Type. +func (*Trename) Type() MsgType { + return MsgTrename +} + +// String implements fmt.Stringer. +func (t *Trename) String() string { + return fmt.Sprintf("Trename{FID: %d, DirectoryFID: %d, Name: %s}", t.FID, t.Directory, t.Name) +} + +// Rrename is a rename response. +type Rrename struct { +} + +// Decode implements encoder.Decode. +func (*Rrename) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (*Rrename) Encode(b *buffer) { +} + +// Type implements message.Type. +func (*Rrename) Type() MsgType { + return MsgRrename +} + +// String implements fmt.Stringer. +func (r *Rrename) String() string { + return fmt.Sprintf("Rrename{}") +} + +// Treadlink is a readlink request. +type Treadlink struct { + // FID is the symlink. + FID FID +} + +// Decode implements encoder.Decode. +func (t *Treadlink) Decode(b *buffer) { + t.FID = b.ReadFID() +} + +// Encode implements encoder.Encode. +func (t *Treadlink) Encode(b *buffer) { + b.WriteFID(t.FID) +} + +// Type implements message.Type. +func (*Treadlink) Type() MsgType { + return MsgTreadlink +} + +// String implements fmt.Stringer. +func (t *Treadlink) String() string { + return fmt.Sprintf("Treadlink{FID: %d}", t.FID) +} + +// Rreadlink is a readlink response. +type Rreadlink struct { + // Target is the symlink target. + Target string +} + +// Decode implements encoder.Decode. +func (r *Rreadlink) Decode(b *buffer) { + r.Target = b.ReadString() +} + +// Encode implements encoder.Encode. +func (r *Rreadlink) Encode(b *buffer) { + b.WriteString(r.Target) +} + +// Type implements message.Type. +func (*Rreadlink) Type() MsgType { + return MsgRreadlink +} + +// String implements fmt.Stringer. +func (r *Rreadlink) String() string { + return fmt.Sprintf("Rreadlink{Target: %s}", r.Target) +} + +// Tread is a read request. +type Tread struct { + // FID is the FID to read. + FID FID + + // Offset indicates the file offset. + Offset uint64 + + // Count indicates the number of bytes to read. + Count uint32 +} + +// Decode implements encoder.Decode. +func (t *Tread) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Offset = b.Read64() + t.Count = b.Read32() +} + +// Encode implements encoder.Encode. +func (t *Tread) Encode(b *buffer) { + b.WriteFID(t.FID) + b.Write64(t.Offset) + b.Write32(t.Count) +} + +// Type implements message.Type. +func (*Tread) Type() MsgType { + return MsgTread +} + +// String implements fmt.Stringer. +func (t *Tread) String() string { + return fmt.Sprintf("Tread{FID: %d, Offset: %d, Count: %d}", t.FID, t.Offset, t.Count) +} + +// Rread is the response for a Tread. +type Rread struct { + // Data is the resulting data. + Data []byte +} + +// Decode implements encoder.Decode. +// +// Data is automatically decoded via Payload. +func (r *Rread) Decode(b *buffer) { + count := b.Read32() + if count != uint32(len(r.Data)) { + b.markOverrun() + } +} + +// Encode implements encoder.Encode. +// +// Data is automatically encoded via Payload. +func (r *Rread) Encode(b *buffer) { + b.Write32(uint32(len(r.Data))) +} + +// Type implements message.Type. +func (*Rread) Type() MsgType { + return MsgRread +} + +// FixedSize implements payloader.FixedSize. +func (*Rread) FixedSize() uint32 { + return 4 +} + +// Payload implements payloader.Payload. +func (r *Rread) Payload() []byte { + return r.Data +} + +// SetPayload implements payloader.SetPayload. +func (r *Rread) SetPayload(p []byte) { + r.Data = p +} + +// String implements fmt.Stringer. +func (r *Rread) String() string { + return fmt.Sprintf("Rread{len(Data): %d}", len(r.Data)) +} + +// Twrite is a write request. +type Twrite struct { + // FID is the FID to read. + FID FID + + // Offset indicates the file offset. + Offset uint64 + + // Data is the data to be written. + Data []byte +} + +// Decode implements encoder.Decode. +func (t *Twrite) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Offset = b.Read64() + count := b.Read32() + if count != uint32(len(t.Data)) { + b.markOverrun() + } +} + +// Encode implements encoder.Encode. +// +// This uses the buffer payload to avoid a copy. +func (t *Twrite) Encode(b *buffer) { + b.WriteFID(t.FID) + b.Write64(t.Offset) + b.Write32(uint32(len(t.Data))) +} + +// Type implements message.Type. +func (*Twrite) Type() MsgType { + return MsgTwrite +} + +// FixedSize implements payloader.FixedSize. +func (*Twrite) FixedSize() uint32 { + return 16 +} + +// Payload implements payloader.Payload. +func (t *Twrite) Payload() []byte { + return t.Data +} + +// SetPayload implements payloader.SetPayload. +func (t *Twrite) SetPayload(p []byte) { + t.Data = p +} + +// String implements fmt.Stringer. +func (t *Twrite) String() string { + return fmt.Sprintf("Twrite{FID: %v, Offset %d, len(Data): %d}", t.FID, t.Offset, len(t.Data)) +} + +// Rwrite is the response for a Twrite. +type Rwrite struct { + // Count indicates the number of bytes successfully written. + Count uint32 +} + +// Decode implements encoder.Decode. +func (r *Rwrite) Decode(b *buffer) { + r.Count = b.Read32() +} + +// Encode implements encoder.Encode. +func (r *Rwrite) Encode(b *buffer) { + b.Write32(r.Count) +} + +// Type implements message.Type. +func (*Rwrite) Type() MsgType { + return MsgRwrite +} + +// String implements fmt.Stringer. +func (r *Rwrite) String() string { + return fmt.Sprintf("Rwrite{Count: %d}", r.Count) +} + +// Tmknod is a mknod request. +type Tmknod struct { + // Directory is the parent directory. + Directory FID + + // Name is the device name. + Name string + + // Mode is the device mode and permissions. + Mode FileMode + + // Major is the device major number. + Major uint32 + + // Minor is the device minor number. + Minor uint32 + + // GID is the device GID. + GID GID +} + +// Decode implements encoder.Decode. +func (t *Tmknod) Decode(b *buffer) { + t.Directory = b.ReadFID() + t.Name = b.ReadString() + t.Mode = b.ReadFileMode() + t.Major = b.Read32() + t.Minor = b.Read32() + t.GID = b.ReadGID() +} + +// Encode implements encoder.Encode. +func (t *Tmknod) Encode(b *buffer) { + b.WriteFID(t.Directory) + b.WriteString(t.Name) + b.WriteFileMode(t.Mode) + b.Write32(t.Major) + b.Write32(t.Minor) + b.WriteGID(t.GID) +} + +// Type implements message.Type. +func (*Tmknod) Type() MsgType { + return MsgTmknod +} + +// String implements fmt.Stringer. +func (t *Tmknod) String() string { + return fmt.Sprintf("Tmknod{DirectoryFID: %d, Name: %s, Mode: 0o%o, Major: %d, Minor: %d, GID: %d}", t.Directory, t.Name, t.Mode, t.Major, t.Minor, t.GID) +} + +// Rmknod is a mknod response. +type Rmknod struct { + // QID is the resulting QID. + QID QID +} + +// Decode implements encoder.Decode. +func (r *Rmknod) Decode(b *buffer) { + r.QID.Decode(b) +} + +// Encode implements encoder.Encode. +func (r *Rmknod) Encode(b *buffer) { + r.QID.Encode(b) +} + +// Type implements message.Type. +func (*Rmknod) Type() MsgType { + return MsgRmknod +} + +// String implements fmt.Stringer. +func (r *Rmknod) String() string { + return fmt.Sprintf("Rmknod{QID: %s}", r.QID) +} + +// Tmkdir is a mkdir request. +type Tmkdir struct { + // Directory is the parent directory. + Directory FID + + // Name is the new directory name. + Name string + + // Permissions is the set of permission bits. + Permissions FileMode + + // GID is the owning group. + GID GID +} + +// Decode implements encoder.Decode. +func (t *Tmkdir) Decode(b *buffer) { + t.Directory = b.ReadFID() + t.Name = b.ReadString() + t.Permissions = b.ReadPermissions() + t.GID = b.ReadGID() +} + +// Encode implements encoder.Encode. +func (t *Tmkdir) Encode(b *buffer) { + b.WriteFID(t.Directory) + b.WriteString(t.Name) + b.WritePermissions(t.Permissions) + b.WriteGID(t.GID) +} + +// Type implements message.Type. +func (*Tmkdir) Type() MsgType { + return MsgTmkdir +} + +// String implements fmt.Stringer. +func (t *Tmkdir) String() string { + return fmt.Sprintf("Tmkdir{DirectoryFID: %d, Name: %s, Permissions: 0o%o, GID: %d}", t.Directory, t.Name, t.Permissions, t.GID) +} + +// Rmkdir is a mkdir response. +type Rmkdir struct { + // QID is the resulting QID. + QID QID +} + +// Decode implements encoder.Decode. +func (r *Rmkdir) Decode(b *buffer) { + r.QID.Decode(b) +} + +// Encode implements encoder.Encode. +func (r *Rmkdir) Encode(b *buffer) { + r.QID.Encode(b) +} + +// Type implements message.Type. +func (*Rmkdir) Type() MsgType { + return MsgRmkdir +} + +// String implements fmt.Stringer. +func (r *Rmkdir) String() string { + return fmt.Sprintf("Rmkdir{QID: %s}", r.QID) +} + +// Tgetattr is a getattr request. +type Tgetattr struct { + // FID is the FID to get attributes for. + FID FID + + // AttrMask is the set of attributes to get. + AttrMask AttrMask +} + +// Decode implements encoder.Decode. +func (t *Tgetattr) Decode(b *buffer) { + t.FID = b.ReadFID() + t.AttrMask.Decode(b) +} + +// Encode implements encoder.Encode. +func (t *Tgetattr) Encode(b *buffer) { + b.WriteFID(t.FID) + t.AttrMask.Encode(b) +} + +// Type implements message.Type. +func (*Tgetattr) Type() MsgType { + return MsgTgetattr +} + +// String implements fmt.Stringer. +func (t *Tgetattr) String() string { + return fmt.Sprintf("Tgetattr{FID: %d, AttrMask: %s}", t.FID, t.AttrMask) +} + +// Rgetattr is a getattr response. +type Rgetattr struct { + // Valid indicates which fields are valid. + Valid AttrMask + + // QID is the QID for this file. + QID + + // Attr is the set of attributes. + Attr Attr +} + +// Decode implements encoder.Decode. +func (r *Rgetattr) Decode(b *buffer) { + r.Valid.Decode(b) + r.QID.Decode(b) + r.Attr.Decode(b) +} + +// Encode implements encoder.Encode. +func (r *Rgetattr) Encode(b *buffer) { + r.Valid.Encode(b) + r.QID.Encode(b) + r.Attr.Encode(b) +} + +// Type implements message.Type. +func (*Rgetattr) Type() MsgType { + return MsgRgetattr +} + +// String implements fmt.Stringer. +func (r *Rgetattr) String() string { + return fmt.Sprintf("Rgetattr{Valid: %v, QID: %s, Attr: %s}", r.Valid, r.QID, r.Attr) +} + +// Tsetattr is a setattr request. +type Tsetattr struct { + // FID is the FID to change. + FID FID + + // Valid is the set of bits which will be used. + Valid SetAttrMask + + // SetAttr is the set request. + SetAttr SetAttr +} + +// Decode implements encoder.Decode. +func (t *Tsetattr) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Valid.Decode(b) + t.SetAttr.Decode(b) +} + +// Encode implements encoder.Encode. +func (t *Tsetattr) Encode(b *buffer) { + b.WriteFID(t.FID) + t.Valid.Encode(b) + t.SetAttr.Encode(b) +} + +// Type implements message.Type. +func (*Tsetattr) Type() MsgType { + return MsgTsetattr +} + +// String implements fmt.Stringer. +func (t *Tsetattr) String() string { + return fmt.Sprintf("Tsetattr{FID: %d, Valid: %v, SetAttr: %s}", t.FID, t.Valid, t.SetAttr) +} + +// Rsetattr is a setattr response. +type Rsetattr struct { +} + +// Decode implements encoder.Decode. +func (*Rsetattr) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (*Rsetattr) Encode(b *buffer) { +} + +// Type implements message.Type. +func (*Rsetattr) Type() MsgType { + return MsgRsetattr +} + +// String implements fmt.Stringer. +func (r *Rsetattr) String() string { + return fmt.Sprintf("Rsetattr{}") +} + +// Tallocate is an allocate request. This is an extension to 9P protocol, not +// present in the 9P2000.L standard. +type Tallocate struct { + FID FID + Mode AllocateMode + Offset uint64 + Length uint64 +} + +// Decode implements encoder.Decode. +func (t *Tallocate) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Mode.Decode(b) + t.Offset = b.Read64() + t.Length = b.Read64() +} + +// Encode implements encoder.Encode. +func (t *Tallocate) Encode(b *buffer) { + b.WriteFID(t.FID) + t.Mode.Encode(b) + b.Write64(t.Offset) + b.Write64(t.Length) +} + +// Type implements message.Type. +func (*Tallocate) Type() MsgType { + return MsgTallocate +} + +// String implements fmt.Stringer. +func (t *Tallocate) String() string { + return fmt.Sprintf("Tallocate{FID: %d, Offset: %d, Length: %d}", t.FID, t.Offset, t.Length) +} + +// Rallocate is an allocate response. +type Rallocate struct { +} + +// Decode implements encoder.Decode. +func (*Rallocate) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (*Rallocate) Encode(b *buffer) { +} + +// Type implements message.Type. +func (*Rallocate) Type() MsgType { + return MsgRallocate +} + +// String implements fmt.Stringer. +func (r *Rallocate) String() string { + return fmt.Sprintf("Rallocate{}") +} + +// Txattrwalk walks extended attributes. +type Txattrwalk struct { + // FID is the FID to check for attributes. + FID FID + + // NewFID is the new FID associated with the attributes. + NewFID FID + + // Name is the attribute name. + Name string +} + +// Decode implements encoder.Decode. +func (t *Txattrwalk) Decode(b *buffer) { + t.FID = b.ReadFID() + t.NewFID = b.ReadFID() + t.Name = b.ReadString() +} + +// Encode implements encoder.Encode. +func (t *Txattrwalk) Encode(b *buffer) { + b.WriteFID(t.FID) + b.WriteFID(t.NewFID) + b.WriteString(t.Name) +} + +// Type implements message.Type. +func (*Txattrwalk) Type() MsgType { + return MsgTxattrwalk +} + +// String implements fmt.Stringer. +func (t *Txattrwalk) String() string { + return fmt.Sprintf("Txattrwalk{FID: %d, NewFID: %d, Name: %s}", t.FID, t.NewFID, t.Name) +} + +// Rxattrwalk is a xattrwalk response. +type Rxattrwalk struct { + // Size is the size of the extended attribute. + Size uint64 +} + +// Decode implements encoder.Decode. +func (r *Rxattrwalk) Decode(b *buffer) { + r.Size = b.Read64() +} + +// Encode implements encoder.Encode. +func (r *Rxattrwalk) Encode(b *buffer) { + b.Write64(r.Size) +} + +// Type implements message.Type. +func (*Rxattrwalk) Type() MsgType { + return MsgRxattrwalk +} + +// String implements fmt.Stringer. +func (r *Rxattrwalk) String() string { + return fmt.Sprintf("Rxattrwalk{Size: %d}", r.Size) +} + +// Txattrcreate prepare to set extended attributes. +type Txattrcreate struct { + // FID is input/output parameter, it identifies the file on which + // extended attributes will be set but after successful Rxattrcreate + // it is used to write the extended attribute value. + FID FID + + // Name is the attribute name. + Name string + + // Size of the attribute value. When the FID is clunked it has to match + // the number of bytes written to the FID. + AttrSize uint64 + + // Linux setxattr(2) flags. + Flags uint32 +} + +// Decode implements encoder.Decode. +func (t *Txattrcreate) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Name = b.ReadString() + t.AttrSize = b.Read64() + t.Flags = b.Read32() +} + +// Encode implements encoder.Encode. +func (t *Txattrcreate) Encode(b *buffer) { + b.WriteFID(t.FID) + b.WriteString(t.Name) + b.Write64(t.AttrSize) + b.Write32(t.Flags) +} + +// Type implements message.Type. +func (*Txattrcreate) Type() MsgType { + return MsgTxattrcreate +} + +// String implements fmt.Stringer. +func (t *Txattrcreate) String() string { + return fmt.Sprintf("Txattrcreate{FID: %d, Name: %s, AttrSize: %d, Flags: %d}", t.FID, t.Name, t.AttrSize, t.Flags) +} + +// Rxattrcreate is a xattrcreate response. +type Rxattrcreate struct { +} + +// Decode implements encoder.Decode. +func (r *Rxattrcreate) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (r *Rxattrcreate) Encode(b *buffer) { +} + +// Type implements message.Type. +func (*Rxattrcreate) Type() MsgType { + return MsgRxattrcreate +} + +// String implements fmt.Stringer. +func (r *Rxattrcreate) String() string { + return fmt.Sprintf("Rxattrcreate{}") +} + +// Treaddir is a readdir request. +type Treaddir struct { + // Directory is the directory FID to read. + Directory FID + + // Offset is the offset to read at. + Offset uint64 + + // Count is the number of bytes to read. + Count uint32 +} + +// Decode implements encoder.Decode. +func (t *Treaddir) Decode(b *buffer) { + t.Directory = b.ReadFID() + t.Offset = b.Read64() + t.Count = b.Read32() +} + +// Encode implements encoder.Encode. +func (t *Treaddir) Encode(b *buffer) { + b.WriteFID(t.Directory) + b.Write64(t.Offset) + b.Write32(t.Count) +} + +// Type implements message.Type. +func (*Treaddir) Type() MsgType { + return MsgTreaddir +} + +// String implements fmt.Stringer. +func (t *Treaddir) String() string { + return fmt.Sprintf("Treaddir{DirectoryFID: %d, Offset: %d, Count: %d}", t.Directory, t.Offset, t.Count) +} + +// Rreaddir is a readdir response. +type Rreaddir struct { + // Count is the byte limit. + // + // This should always be set from the Treaddir request. + Count uint32 + + // Entries are the resulting entries. + // + // This may be constructed in decode. + Entries []Dirent + + // payload is the encoded payload. + // + // This is constructed by encode. + payload []byte +} + +// Decode implements encoder.Decode. +func (r *Rreaddir) Decode(b *buffer) { + r.Count = b.Read32() + entriesBuf := buffer{data: r.payload} + r.Entries = r.Entries[:0] + for { + var d Dirent + d.Decode(&entriesBuf) + if entriesBuf.isOverrun() { + // Couldn't decode a complete entry. + break + } + r.Entries = append(r.Entries, d) + } +} + +// Encode implements encoder.Encode. +func (r *Rreaddir) Encode(b *buffer) { + entriesBuf := buffer{} + for _, d := range r.Entries { + d.Encode(&entriesBuf) + if len(entriesBuf.data) >= int(r.Count) { + break + } + } + if len(entriesBuf.data) < int(r.Count) { + r.Count = uint32(len(entriesBuf.data)) + r.payload = entriesBuf.data + } else { + r.payload = entriesBuf.data[:r.Count] + } + b.Write32(uint32(r.Count)) +} + +// Type implements message.Type. +func (*Rreaddir) Type() MsgType { + return MsgRreaddir +} + +// FixedSize implements payloader.FixedSize. +func (*Rreaddir) FixedSize() uint32 { + return 4 +} + +// Payload implements payloader.Payload. +func (r *Rreaddir) Payload() []byte { + return r.payload +} + +// SetPayload implements payloader.SetPayload. +func (r *Rreaddir) SetPayload(p []byte) { + r.payload = p +} + +// String implements fmt.Stringer. +func (r *Rreaddir) String() string { + return fmt.Sprintf("Rreaddir{Count: %d, Entries: %s}", r.Count, r.Entries) +} + +// Tfsync is an fsync request. +type Tfsync struct { + // FID is the fid to sync. + FID FID +} + +// Decode implements encoder.Decode. +func (t *Tfsync) Decode(b *buffer) { + t.FID = b.ReadFID() +} + +// Encode implements encoder.Encode. +func (t *Tfsync) Encode(b *buffer) { + b.WriteFID(t.FID) +} + +// Type implements message.Type. +func (*Tfsync) Type() MsgType { + return MsgTfsync +} + +// String implements fmt.Stringer. +func (t *Tfsync) String() string { + return fmt.Sprintf("Tfsync{FID: %d}", t.FID) +} + +// Rfsync is an fsync response. +type Rfsync struct { +} + +// Decode implements encoder.Decode. +func (*Rfsync) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (*Rfsync) Encode(b *buffer) { +} + +// Type implements message.Type. +func (*Rfsync) Type() MsgType { + return MsgRfsync +} + +// String implements fmt.Stringer. +func (r *Rfsync) String() string { + return fmt.Sprintf("Rfsync{}") +} + +// Tstatfs is a stat request. +type Tstatfs struct { + // FID is the root. + FID FID +} + +// Decode implements encoder.Decode. +func (t *Tstatfs) Decode(b *buffer) { + t.FID = b.ReadFID() +} + +// Encode implements encoder.Encode. +func (t *Tstatfs) Encode(b *buffer) { + b.WriteFID(t.FID) +} + +// Type implements message.Type. +func (*Tstatfs) Type() MsgType { + return MsgTstatfs +} + +// String implements fmt.Stringer. +func (t *Tstatfs) String() string { + return fmt.Sprintf("Tstatfs{FID: %d}", t.FID) +} + +// Rstatfs is the response for a Tstatfs. +type Rstatfs struct { + // FSStat is the stat result. + FSStat FSStat +} + +// Decode implements encoder.Decode. +func (r *Rstatfs) Decode(b *buffer) { + r.FSStat.Decode(b) +} + +// Encode implements encoder.Encode. +func (r *Rstatfs) Encode(b *buffer) { + r.FSStat.Encode(b) +} + +// Type implements message.Type. +func (*Rstatfs) Type() MsgType { + return MsgRstatfs +} + +// String implements fmt.Stringer. +func (r *Rstatfs) String() string { + return fmt.Sprintf("Rstatfs{FSStat: %v}", r.FSStat) +} + +// Tflushf is a flush file request, not to be confused with Tflush. +type Tflushf struct { + // FID is the FID to be flushed. + FID FID +} + +// Decode implements encoder.Decode. +func (t *Tflushf) Decode(b *buffer) { + t.FID = b.ReadFID() +} + +// Encode implements encoder.Encode. +func (t *Tflushf) Encode(b *buffer) { + b.WriteFID(t.FID) +} + +// Type implements message.Type. +func (*Tflushf) Type() MsgType { + return MsgTflushf +} + +// String implements fmt.Stringer. +func (t *Tflushf) String() string { + return fmt.Sprintf("Tflushf{FID: %d}", t.FID) +} + +// Rflushf is a flush file response. +type Rflushf struct { +} + +// Decode implements encoder.Decode. +func (*Rflushf) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (*Rflushf) Encode(b *buffer) { +} + +// Type implements message.Type. +func (*Rflushf) Type() MsgType { + return MsgRflushf +} + +// String implements fmt.Stringer. +func (*Rflushf) String() string { + return fmt.Sprintf("Rflushf{}") +} + +// Twalkgetattr is a walk request. +type Twalkgetattr struct { + // FID is the FID to be walked. + FID FID + + // NewFID is the resulting FID. + NewFID FID + + // Names are the set of names to be walked. + Names []string +} + +// Decode implements encoder.Decode. +func (t *Twalkgetattr) Decode(b *buffer) { + t.FID = b.ReadFID() + t.NewFID = b.ReadFID() + n := b.Read16() + t.Names = t.Names[:0] + for i := 0; i < int(n); i++ { + t.Names = append(t.Names, b.ReadString()) + } +} + +// Encode implements encoder.Encode. +func (t *Twalkgetattr) Encode(b *buffer) { + b.WriteFID(t.FID) + b.WriteFID(t.NewFID) + b.Write16(uint16(len(t.Names))) + for _, name := range t.Names { + b.WriteString(name) + } +} + +// Type implements message.Type. +func (*Twalkgetattr) Type() MsgType { + return MsgTwalkgetattr +} + +// String implements fmt.Stringer. +func (t *Twalkgetattr) String() string { + return fmt.Sprintf("Twalkgetattr{FID: %d, NewFID: %d, Names: %v}", t.FID, t.NewFID, t.Names) +} + +// Rwalkgetattr is a walk response. +type Rwalkgetattr struct { + // Valid indicates which fields are valid in the Attr below. + Valid AttrMask + + // Attr is the set of attributes for the last QID (the file walked to). + Attr Attr + + // QIDs are the set of QIDs returned. + QIDs []QID +} + +// Decode implements encoder.Decode. +func (r *Rwalkgetattr) Decode(b *buffer) { + r.Valid.Decode(b) + r.Attr.Decode(b) + n := b.Read16() + r.QIDs = r.QIDs[:0] + for i := 0; i < int(n); i++ { + var q QID + q.Decode(b) + r.QIDs = append(r.QIDs, q) + } +} + +// Encode implements encoder.Encode. +func (r *Rwalkgetattr) Encode(b *buffer) { + r.Valid.Encode(b) + r.Attr.Encode(b) + b.Write16(uint16(len(r.QIDs))) + for _, q := range r.QIDs { + q.Encode(b) + } +} + +// Type implements message.Type. +func (*Rwalkgetattr) Type() MsgType { + return MsgRwalkgetattr +} + +// String implements fmt.Stringer. +func (r *Rwalkgetattr) String() string { + return fmt.Sprintf("Rwalkgetattr{Valid: %s, Attr: %s, QIDs: %v}", r.Valid, r.Attr, r.QIDs) +} + +// Tucreate is a Tlcreate message that includes a UID. +type Tucreate struct { + Tlcreate + + // UID is the UID to use as the effective UID in creation messages. + UID UID +} + +// Decode implements encoder.Decode. +func (t *Tucreate) Decode(b *buffer) { + t.Tlcreate.Decode(b) + t.UID = b.ReadUID() +} + +// Encode implements encoder.Encode. +func (t *Tucreate) Encode(b *buffer) { + t.Tlcreate.Encode(b) + b.WriteUID(t.UID) +} + +// Type implements message.Type. +func (t *Tucreate) Type() MsgType { + return MsgTucreate +} + +// String implements fmt.Stringer. +func (t *Tucreate) String() string { + return fmt.Sprintf("Tucreate{Tlcreate: %v, UID: %d}", &t.Tlcreate, t.UID) +} + +// Rucreate is a file creation response. +type Rucreate struct { + Rlcreate +} + +// Type implements message.Type. +func (*Rucreate) Type() MsgType { + return MsgRucreate +} + +// String implements fmt.Stringer. +func (r *Rucreate) String() string { + return fmt.Sprintf("Rucreate{%v}", &r.Rlcreate) +} + +// Tumkdir is a Tmkdir message that includes a UID. +type Tumkdir struct { + Tmkdir + + // UID is the UID to use as the effective UID in creation messages. + UID UID +} + +// Decode implements encoder.Decode. +func (t *Tumkdir) Decode(b *buffer) { + t.Tmkdir.Decode(b) + t.UID = b.ReadUID() +} + +// Encode implements encoder.Encode. +func (t *Tumkdir) Encode(b *buffer) { + t.Tmkdir.Encode(b) + b.WriteUID(t.UID) +} + +// Type implements message.Type. +func (t *Tumkdir) Type() MsgType { + return MsgTumkdir +} + +// String implements fmt.Stringer. +func (t *Tumkdir) String() string { + return fmt.Sprintf("Tumkdir{Tmkdir: %v, UID: %d}", &t.Tmkdir, t.UID) +} + +// Rumkdir is a umkdir response. +type Rumkdir struct { + Rmkdir +} + +// Type implements message.Type. +func (*Rumkdir) Type() MsgType { + return MsgRumkdir +} + +// String implements fmt.Stringer. +func (r *Rumkdir) String() string { + return fmt.Sprintf("Rumkdir{%v}", &r.Rmkdir) +} + +// Tumknod is a Tmknod message that includes a UID. +type Tumknod struct { + Tmknod + + // UID is the UID to use as the effective UID in creation messages. + UID UID +} + +// Decode implements encoder.Decode. +func (t *Tumknod) Decode(b *buffer) { + t.Tmknod.Decode(b) + t.UID = b.ReadUID() +} + +// Encode implements encoder.Encode. +func (t *Tumknod) Encode(b *buffer) { + t.Tmknod.Encode(b) + b.WriteUID(t.UID) +} + +// Type implements message.Type. +func (t *Tumknod) Type() MsgType { + return MsgTumknod +} + +// String implements fmt.Stringer. +func (t *Tumknod) String() string { + return fmt.Sprintf("Tumknod{Tmknod: %v, UID: %d}", &t.Tmknod, t.UID) +} + +// Rumknod is a umknod response. +type Rumknod struct { + Rmknod +} + +// Type implements message.Type. +func (*Rumknod) Type() MsgType { + return MsgRumknod +} + +// String implements fmt.Stringer. +func (r *Rumknod) String() string { + return fmt.Sprintf("Rumknod{%v}", &r.Rmknod) +} + +// Tusymlink is a Tsymlink message that includes a UID. +type Tusymlink struct { + Tsymlink + + // UID is the UID to use as the effective UID in creation messages. + UID UID +} + +// Decode implements encoder.Decode. +func (t *Tusymlink) Decode(b *buffer) { + t.Tsymlink.Decode(b) + t.UID = b.ReadUID() +} + +// Encode implements encoder.Encode. +func (t *Tusymlink) Encode(b *buffer) { + t.Tsymlink.Encode(b) + b.WriteUID(t.UID) +} + +// Type implements message.Type. +func (t *Tusymlink) Type() MsgType { + return MsgTusymlink +} + +// String implements fmt.Stringer. +func (t *Tusymlink) String() string { + return fmt.Sprintf("Tusymlink{Tsymlink: %v, UID: %d}", &t.Tsymlink, t.UID) +} + +// Rusymlink is a usymlink response. +type Rusymlink struct { + Rsymlink +} + +// Type implements message.Type. +func (*Rusymlink) Type() MsgType { + return MsgRusymlink +} + +// String implements fmt.Stringer. +func (r *Rusymlink) String() string { + return fmt.Sprintf("Rusymlink{%v}", &r.Rsymlink) +} + +// Tlconnect is a connect request. +type Tlconnect struct { + // FID is the FID to be connected. + FID FID + + // Flags are the connect flags. + Flags ConnectFlags +} + +// Decode implements encoder.Decode. +func (t *Tlconnect) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Flags = b.ReadConnectFlags() +} + +// Encode implements encoder.Encode. +func (t *Tlconnect) Encode(b *buffer) { + b.WriteFID(t.FID) + b.WriteConnectFlags(t.Flags) +} + +// Type implements message.Type. +func (*Tlconnect) Type() MsgType { + return MsgTlconnect +} + +// String implements fmt.Stringer. +func (t *Tlconnect) String() string { + return fmt.Sprintf("Tlconnect{FID: %d, Flags: %v}", t.FID, t.Flags) +} + +// Rlconnect is a connect response. +type Rlconnect struct { + // File is a host socket. + File *fd.FD +} + +// Decode implements encoder.Decode. +func (r *Rlconnect) Decode(*buffer) {} + +// Encode implements encoder.Encode. +func (r *Rlconnect) Encode(*buffer) {} + +// Type implements message.Type. +func (*Rlconnect) Type() MsgType { + return MsgRlconnect +} + +// FilePayload returns the file payload. +func (r *Rlconnect) FilePayload() *fd.FD { + return r.File +} + +// SetFilePayload sets the received file. +func (r *Rlconnect) SetFilePayload(file *fd.FD) { + r.File = file +} + +// String implements fmt.Stringer. +func (r *Rlconnect) String() string { + return fmt.Sprintf("Rlconnect{File: %v}", r.File) +} + +const maxCacheSize = 3 + +// msgFactory is used to reduce allocations by caching messages for reuse. +type msgFactory struct { + create func() message + cache chan message +} + +// msgRegistry indexes all message factories by type. +var msgRegistry registry + +type registry struct { + factories [math.MaxUint8]msgFactory + + // largestFixedSize is computed so that given some message size M, you can + // compute the maximum payload size (e.g. for Twrite, Rread) with + // M-largestFixedSize. You could do this individual on a per-message basis, + // but it's easier to compute a single maximum safe payload. + largestFixedSize uint32 +} + +// get returns a new message by type. +// +// An error is returned in the case of an unknown message. +// +// This takes, and ignores, a message tag so that it may be used directly as a +// lookupTagAndType function for recv (by design). +func (r *registry) get(_ Tag, t MsgType) (message, error) { + entry := &r.factories[t] + if entry.create == nil { + return nil, &ErrInvalidMsgType{t} + } + + select { + case msg := <-entry.cache: + return msg, nil + default: + return entry.create(), nil + } +} + +func (r *registry) put(msg message) { + if p, ok := msg.(payloader); ok { + p.SetPayload(nil) + } + if f, ok := msg.(filer); ok { + f.SetFilePayload(nil) + } + + entry := &r.factories[msg.Type()] + select { + case entry.cache <- msg: + default: + } +} + +// register registers the given message type. +// +// This may cause panic on failure and should only be used from init. +func (r *registry) register(t MsgType, fn func() message) { + if int(t) >= len(r.factories) { + panic(fmt.Sprintf("message type %d is too large. It must be smaller than %d", t, len(r.factories))) + } + if r.factories[t].create != nil { + panic(fmt.Sprintf("duplicate message type %d: first is %T, second is %T", t, r.factories[t].create(), fn())) + } + r.factories[t] = msgFactory{ + create: fn, + cache: make(chan message, maxCacheSize), + } + + if size := calculateSize(fn()); size > r.largestFixedSize { + r.largestFixedSize = size + } +} + +func calculateSize(m message) uint32 { + if p, ok := m.(payloader); ok { + return p.FixedSize() + } + var dataBuf buffer + m.Encode(&dataBuf) + return uint32(len(dataBuf.data)) +} + +func init() { + msgRegistry.register(MsgRlerror, func() message { return &Rlerror{} }) + msgRegistry.register(MsgTstatfs, func() message { return &Tstatfs{} }) + msgRegistry.register(MsgRstatfs, func() message { return &Rstatfs{} }) + msgRegistry.register(MsgTlopen, func() message { return &Tlopen{} }) + msgRegistry.register(MsgRlopen, func() message { return &Rlopen{} }) + msgRegistry.register(MsgTlcreate, func() message { return &Tlcreate{} }) + msgRegistry.register(MsgRlcreate, func() message { return &Rlcreate{} }) + msgRegistry.register(MsgTsymlink, func() message { return &Tsymlink{} }) + msgRegistry.register(MsgRsymlink, func() message { return &Rsymlink{} }) + msgRegistry.register(MsgTmknod, func() message { return &Tmknod{} }) + msgRegistry.register(MsgRmknod, func() message { return &Rmknod{} }) + msgRegistry.register(MsgTrename, func() message { return &Trename{} }) + msgRegistry.register(MsgRrename, func() message { return &Rrename{} }) + msgRegistry.register(MsgTreadlink, func() message { return &Treadlink{} }) + msgRegistry.register(MsgRreadlink, func() message { return &Rreadlink{} }) + msgRegistry.register(MsgTgetattr, func() message { return &Tgetattr{} }) + msgRegistry.register(MsgRgetattr, func() message { return &Rgetattr{} }) + msgRegistry.register(MsgTsetattr, func() message { return &Tsetattr{} }) + msgRegistry.register(MsgRsetattr, func() message { return &Rsetattr{} }) + msgRegistry.register(MsgTxattrwalk, func() message { return &Txattrwalk{} }) + msgRegistry.register(MsgRxattrwalk, func() message { return &Rxattrwalk{} }) + msgRegistry.register(MsgTxattrcreate, func() message { return &Txattrcreate{} }) + msgRegistry.register(MsgRxattrcreate, func() message { return &Rxattrcreate{} }) + msgRegistry.register(MsgTreaddir, func() message { return &Treaddir{} }) + msgRegistry.register(MsgRreaddir, func() message { return &Rreaddir{} }) + msgRegistry.register(MsgTfsync, func() message { return &Tfsync{} }) + msgRegistry.register(MsgRfsync, func() message { return &Rfsync{} }) + msgRegistry.register(MsgTlink, func() message { return &Tlink{} }) + msgRegistry.register(MsgRlink, func() message { return &Rlink{} }) + msgRegistry.register(MsgTmkdir, func() message { return &Tmkdir{} }) + msgRegistry.register(MsgRmkdir, func() message { return &Rmkdir{} }) + msgRegistry.register(MsgTrenameat, func() message { return &Trenameat{} }) + msgRegistry.register(MsgRrenameat, func() message { return &Rrenameat{} }) + msgRegistry.register(MsgTunlinkat, func() message { return &Tunlinkat{} }) + msgRegistry.register(MsgRunlinkat, func() message { return &Runlinkat{} }) + msgRegistry.register(MsgTversion, func() message { return &Tversion{} }) + msgRegistry.register(MsgRversion, func() message { return &Rversion{} }) + msgRegistry.register(MsgTauth, func() message { return &Tauth{} }) + msgRegistry.register(MsgRauth, func() message { return &Rauth{} }) + msgRegistry.register(MsgTattach, func() message { return &Tattach{} }) + msgRegistry.register(MsgRattach, func() message { return &Rattach{} }) + msgRegistry.register(MsgTflush, func() message { return &Tflush{} }) + msgRegistry.register(MsgRflush, func() message { return &Rflush{} }) + msgRegistry.register(MsgTwalk, func() message { return &Twalk{} }) + msgRegistry.register(MsgRwalk, func() message { return &Rwalk{} }) + msgRegistry.register(MsgTread, func() message { return &Tread{} }) + msgRegistry.register(MsgRread, func() message { return &Rread{} }) + msgRegistry.register(MsgTwrite, func() message { return &Twrite{} }) + msgRegistry.register(MsgRwrite, func() message { return &Rwrite{} }) + msgRegistry.register(MsgTclunk, func() message { return &Tclunk{} }) + msgRegistry.register(MsgRclunk, func() message { return &Rclunk{} }) + msgRegistry.register(MsgTremove, func() message { return &Tremove{} }) + msgRegistry.register(MsgRremove, func() message { return &Rremove{} }) + msgRegistry.register(MsgTflushf, func() message { return &Tflushf{} }) + msgRegistry.register(MsgRflushf, func() message { return &Rflushf{} }) + msgRegistry.register(MsgTwalkgetattr, func() message { return &Twalkgetattr{} }) + msgRegistry.register(MsgRwalkgetattr, func() message { return &Rwalkgetattr{} }) + msgRegistry.register(MsgTucreate, func() message { return &Tucreate{} }) + msgRegistry.register(MsgRucreate, func() message { return &Rucreate{} }) + msgRegistry.register(MsgTumkdir, func() message { return &Tumkdir{} }) + msgRegistry.register(MsgRumkdir, func() message { return &Rumkdir{} }) + msgRegistry.register(MsgTumknod, func() message { return &Tumknod{} }) + msgRegistry.register(MsgRumknod, func() message { return &Rumknod{} }) + msgRegistry.register(MsgTusymlink, func() message { return &Tusymlink{} }) + msgRegistry.register(MsgRusymlink, func() message { return &Rusymlink{} }) + msgRegistry.register(MsgTlconnect, func() message { return &Tlconnect{} }) + msgRegistry.register(MsgRlconnect, func() message { return &Rlconnect{} }) + msgRegistry.register(MsgTallocate, func() message { return &Tallocate{} }) + msgRegistry.register(MsgRallocate, func() message { return &Rallocate{} }) +} diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go new file mode 100644 index 000000000..4039862e6 --- /dev/null +++ b/pkg/p9/p9.go @@ -0,0 +1,1141 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package p9 is a 9P2000.L implementation. +package p9 + +import ( + "fmt" + "math" + "os" + "strings" + "sync/atomic" + "syscall" + + "golang.org/x/sys/unix" +) + +// OpenFlags is the mode passed to Open and Create operations. +// +// These correspond to bits sent over the wire. +type OpenFlags uint32 + +const ( + // ReadOnly is a Topen and Tcreate flag indicating read-only mode. + ReadOnly OpenFlags = 0 + + // WriteOnly is a Topen and Tcreate flag indicating write-only mode. + WriteOnly OpenFlags = 1 + + // ReadWrite is a Topen flag indicates read-write mode. + ReadWrite OpenFlags = 2 + + // OpenFlagsModeMask is a mask of valid OpenFlags mode bits. + OpenFlagsModeMask OpenFlags = 3 + + // OpenFlagsIgnoreMask is a list of OpenFlags mode bits that are ignored for Tlopen. + // Note that syscall.O_LARGEFILE is set to zero, use value from Linux fcntl.h. + OpenFlagsIgnoreMask OpenFlags = syscall.O_DIRECTORY | syscall.O_NOATIME | 0100000 +) + +// ConnectFlags is the mode passed to Connect operations. +// +// These correspond to bits sent over the wire. +type ConnectFlags uint32 + +const ( + // StreamSocket is a Tlconnect flag indicating SOCK_STREAM mode. + StreamSocket ConnectFlags = 0 + + // DgramSocket is a Tlconnect flag indicating SOCK_DGRAM mode. + DgramSocket ConnectFlags = 1 + + // SeqpacketSocket is a Tlconnect flag indicating SOCK_SEQPACKET mode. + SeqpacketSocket ConnectFlags = 2 + + // AnonymousSocket is a Tlconnect flag indicating that the mode does not + // matter and that the requester will accept any socket type. + AnonymousSocket ConnectFlags = 3 +) + +// OSFlags converts a p9.OpenFlags to an int compatible with open(2). +func (o OpenFlags) OSFlags() int { + return int(o & OpenFlagsModeMask) +} + +// String implements fmt.Stringer. +func (o OpenFlags) String() string { + switch o { + case ReadOnly: + return "ReadOnly" + case WriteOnly: + return "WriteOnly" + case ReadWrite: + return "ReadWrite" + case OpenFlagsModeMask: + return "OpenFlagsModeMask" + case OpenFlagsIgnoreMask: + return "OpenFlagsIgnoreMask" + default: + return "UNDEFINED" + } +} + +// Tag is a messsage tag. +type Tag uint16 + +// FID is a file identifier. +type FID uint64 + +// FileMode are flags corresponding to file modes. +// +// These correspond to bits sent over the wire. +// These also correspond to mode_t bits. +type FileMode uint32 + +const ( + // FileModeMask is a mask of all the file mode bits of FileMode. + FileModeMask FileMode = 0170000 + + // ModeSocket is an (unused) mode bit for a socket. + ModeSocket FileMode = 0140000 + + // ModeSymlink is a mode bit for a symlink. + ModeSymlink FileMode = 0120000 + + // ModeRegular is a mode bit for regular files. + ModeRegular FileMode = 0100000 + + // ModeBlockDevice is a mode bit for block devices. + ModeBlockDevice FileMode = 060000 + + // ModeDirectory is a mode bit for directories. + ModeDirectory FileMode = 040000 + + // ModeCharacterDevice is a mode bit for a character device. + ModeCharacterDevice FileMode = 020000 + + // ModeNamedPipe is a mode bit for a named pipe. + ModeNamedPipe FileMode = 010000 + + // Read is a mode bit indicating read permission. + Read FileMode = 04 + + // Write is a mode bit indicating write permission. + Write FileMode = 02 + + // Exec is a mode bit indicating exec permission. + Exec FileMode = 01 + + // AllPermissions is a mask with rwx bits set for user, group and others. + AllPermissions FileMode = 0777 + + // Sticky is a mode bit indicating sticky directories. + Sticky FileMode = 01000 + + // permissionsMask is the mask to apply to FileModes for permissions. It + // includes rwx bits for user, group and others, and sticky bit. + permissionsMask FileMode = 01777 +) + +// QIDType is the most significant byte of the FileMode word, to be used as the +// Type field of p9.QID. +func (m FileMode) QIDType() QIDType { + switch { + case m.IsDir(): + return TypeDir + case m.IsSocket(), m.IsNamedPipe(), m.IsCharacterDevice(): + // Best approximation. + return TypeAppendOnly + case m.IsSymlink(): + return TypeSymlink + default: + return TypeRegular + } +} + +// FileType returns the file mode without the permission bits. +func (m FileMode) FileType() FileMode { + return m & FileModeMask +} + +// Permissions returns just the permission bits of the mode. +func (m FileMode) Permissions() FileMode { + return m & permissionsMask +} + +// Writable returns the mode with write bits added. +func (m FileMode) Writable() FileMode { + return m | 0222 +} + +// IsReadable returns true if m represents a file that can be read. +func (m FileMode) IsReadable() bool { + return m&0444 != 0 +} + +// IsWritable returns true if m represents a file that can be written to. +func (m FileMode) IsWritable() bool { + return m&0222 != 0 +} + +// IsExecutable returns true if m represents a file that can be executed. +func (m FileMode) IsExecutable() bool { + return m&0111 != 0 +} + +// IsRegular returns true if m is a regular file. +func (m FileMode) IsRegular() bool { + return m&FileModeMask == ModeRegular +} + +// IsDir returns true if m represents a directory. +func (m FileMode) IsDir() bool { + return m&FileModeMask == ModeDirectory +} + +// IsNamedPipe returns true if m represents a named pipe. +func (m FileMode) IsNamedPipe() bool { + return m&FileModeMask == ModeNamedPipe +} + +// IsCharacterDevice returns true if m represents a character device. +func (m FileMode) IsCharacterDevice() bool { + return m&FileModeMask == ModeCharacterDevice +} + +// IsBlockDevice returns true if m represents a character device. +func (m FileMode) IsBlockDevice() bool { + return m&FileModeMask == ModeBlockDevice +} + +// IsSocket returns true if m represents a socket. +func (m FileMode) IsSocket() bool { + return m&FileModeMask == ModeSocket +} + +// IsSymlink returns true if m represents a symlink. +func (m FileMode) IsSymlink() bool { + return m&FileModeMask == ModeSymlink +} + +// ModeFromOS returns a FileMode from an os.FileMode. +func ModeFromOS(mode os.FileMode) FileMode { + m := FileMode(mode.Perm()) + switch { + case mode.IsDir(): + m |= ModeDirectory + case mode&os.ModeSymlink != 0: + m |= ModeSymlink + case mode&os.ModeSocket != 0: + m |= ModeSocket + case mode&os.ModeNamedPipe != 0: + m |= ModeNamedPipe + case mode&os.ModeCharDevice != 0: + m |= ModeCharacterDevice + case mode&os.ModeDevice != 0: + m |= ModeBlockDevice + default: + m |= ModeRegular + } + return m +} + +// OSMode converts a p9.FileMode to an os.FileMode. +func (m FileMode) OSMode() os.FileMode { + var osMode os.FileMode + osMode |= os.FileMode(m.Permissions()) + switch { + case m.IsDir(): + osMode |= os.ModeDir + case m.IsSymlink(): + osMode |= os.ModeSymlink + case m.IsSocket(): + osMode |= os.ModeSocket + case m.IsNamedPipe(): + osMode |= os.ModeNamedPipe + case m.IsCharacterDevice(): + osMode |= os.ModeCharDevice | os.ModeDevice + case m.IsBlockDevice(): + osMode |= os.ModeDevice + } + return osMode +} + +// UID represents a user ID. +type UID uint32 + +// Ok returns true if uid is not NoUID. +func (uid UID) Ok() bool { + return uid != NoUID +} + +// GID represents a group ID. +type GID uint32 + +// Ok returns true if gid is not NoGID. +func (gid GID) Ok() bool { + return gid != NoGID +} + +const ( + // NoTag is a sentinel used to indicate no valid tag. + NoTag Tag = math.MaxUint16 + + // NoFID is a sentinel used to indicate no valid FID. + NoFID FID = math.MaxUint32 + + // NoUID is a sentinel used to indicate no valid UID. + NoUID UID = math.MaxUint32 + + // NoGID is a sentinel used to indicate no valid GID. + NoGID GID = math.MaxUint32 +) + +// MsgType is a type identifier. +type MsgType uint8 + +// MsgType declarations. +const ( + MsgTlerror MsgType = 6 + MsgRlerror = 7 + MsgTstatfs = 8 + MsgRstatfs = 9 + MsgTlopen = 12 + MsgRlopen = 13 + MsgTlcreate = 14 + MsgRlcreate = 15 + MsgTsymlink = 16 + MsgRsymlink = 17 + MsgTmknod = 18 + MsgRmknod = 19 + MsgTrename = 20 + MsgRrename = 21 + MsgTreadlink = 22 + MsgRreadlink = 23 + MsgTgetattr = 24 + MsgRgetattr = 25 + MsgTsetattr = 26 + MsgRsetattr = 27 + MsgTxattrwalk = 30 + MsgRxattrwalk = 31 + MsgTxattrcreate = 32 + MsgRxattrcreate = 33 + MsgTreaddir = 40 + MsgRreaddir = 41 + MsgTfsync = 50 + MsgRfsync = 51 + MsgTlink = 70 + MsgRlink = 71 + MsgTmkdir = 72 + MsgRmkdir = 73 + MsgTrenameat = 74 + MsgRrenameat = 75 + MsgTunlinkat = 76 + MsgRunlinkat = 77 + MsgTversion = 100 + MsgRversion = 101 + MsgTauth = 102 + MsgRauth = 103 + MsgTattach = 104 + MsgRattach = 105 + MsgTflush = 108 + MsgRflush = 109 + MsgTwalk = 110 + MsgRwalk = 111 + MsgTread = 116 + MsgRread = 117 + MsgTwrite = 118 + MsgRwrite = 119 + MsgTclunk = 120 + MsgRclunk = 121 + MsgTremove = 122 + MsgRremove = 123 + MsgTflushf = 124 + MsgRflushf = 125 + MsgTwalkgetattr = 126 + MsgRwalkgetattr = 127 + MsgTucreate = 128 + MsgRucreate = 129 + MsgTumkdir = 130 + MsgRumkdir = 131 + MsgTumknod = 132 + MsgRumknod = 133 + MsgTusymlink = 134 + MsgRusymlink = 135 + MsgTlconnect = 136 + MsgRlconnect = 137 + MsgTallocate = 138 + MsgRallocate = 139 +) + +// QIDType represents the file type for QIDs. +// +// QIDType corresponds to the high 8 bits of a Plan 9 file mode. +type QIDType uint8 + +const ( + // TypeDir represents a directory type. + TypeDir QIDType = 0x80 + + // TypeAppendOnly represents an append only file. + TypeAppendOnly QIDType = 0x40 + + // TypeExclusive represents an exclusive-use file. + TypeExclusive QIDType = 0x20 + + // TypeMount represents a mounted channel. + TypeMount QIDType = 0x10 + + // TypeAuth represents an authentication file. + TypeAuth QIDType = 0x08 + + // TypeTemporary represents a temporary file. + TypeTemporary QIDType = 0x04 + + // TypeSymlink represents a symlink. + TypeSymlink QIDType = 0x02 + + // TypeLink represents a hard link. + TypeLink QIDType = 0x01 + + // TypeRegular represents a regular file. + TypeRegular QIDType = 0x00 +) + +// QID is a unique file identifier. +// +// This may be embedded in other requests and responses. +type QID struct { + // Type is the highest order byte of the file mode. + Type QIDType + + // Version is an arbitrary server version number. + Version uint32 + + // Path is a unique server identifier for this path (e.g. inode). + Path uint64 +} + +// String implements fmt.Stringer. +func (q QID) String() string { + return fmt.Sprintf("QID{Type: %d, Version: %d, Path: %d}", q.Type, q.Version, q.Path) +} + +// Decode implements encoder.Decode. +func (q *QID) Decode(b *buffer) { + q.Type = b.ReadQIDType() + q.Version = b.Read32() + q.Path = b.Read64() +} + +// Encode implements encoder.Encode. +func (q *QID) Encode(b *buffer) { + b.WriteQIDType(q.Type) + b.Write32(q.Version) + b.Write64(q.Path) +} + +// QIDGenerator is a simple generator for QIDs that atomically increments Path +// values. +type QIDGenerator struct { + // uids is an ever increasing value that can be atomically incremented + // to provide unique Path values for QIDs. + uids uint64 +} + +// Get returns a new 9P unique ID with a unique Path given a QID type. +// +// While the 9P spec allows Version to be incremented every time the file is +// modified, we currently do not use the Version member for anything. Hence, +// it is set to 0. +func (q *QIDGenerator) Get(t QIDType) QID { + return QID{ + Type: t, + Version: 0, + Path: atomic.AddUint64(&q.uids, 1), + } +} + +// FSStat is used by statfs. +type FSStat struct { + // Type is the filesystem type. + Type uint32 + + // BlockSize is the blocksize. + BlockSize uint32 + + // Blocks is the number of blocks. + Blocks uint64 + + // BlocksFree is the number of free blocks. + BlocksFree uint64 + + // BlocksAvailable is the number of blocks *available*. + BlocksAvailable uint64 + + // Files is the number of files available. + Files uint64 + + // FilesFree is the number of free file nodes. + FilesFree uint64 + + // FSID is the filesystem ID. + FSID uint64 + + // NameLength is the maximum name length. + NameLength uint32 +} + +// Decode implements encoder.Decode. +func (f *FSStat) Decode(b *buffer) { + f.Type = b.Read32() + f.BlockSize = b.Read32() + f.Blocks = b.Read64() + f.BlocksFree = b.Read64() + f.BlocksAvailable = b.Read64() + f.Files = b.Read64() + f.FilesFree = b.Read64() + f.FSID = b.Read64() + f.NameLength = b.Read32() +} + +// Encode implements encoder.Encode. +func (f *FSStat) Encode(b *buffer) { + b.Write32(f.Type) + b.Write32(f.BlockSize) + b.Write64(f.Blocks) + b.Write64(f.BlocksFree) + b.Write64(f.BlocksAvailable) + b.Write64(f.Files) + b.Write64(f.FilesFree) + b.Write64(f.FSID) + b.Write32(f.NameLength) +} + +// AttrMask is a mask of attributes for getattr. +type AttrMask struct { + Mode bool + NLink bool + UID bool + GID bool + RDev bool + ATime bool + MTime bool + CTime bool + INo bool + Size bool + Blocks bool + BTime bool + Gen bool + DataVersion bool +} + +// Contains returns true if a contains all of the attributes masked as b. +func (a AttrMask) Contains(b AttrMask) bool { + if b.Mode && !a.Mode { + return false + } + if b.NLink && !a.NLink { + return false + } + if b.UID && !a.UID { + return false + } + if b.GID && !a.GID { + return false + } + if b.RDev && !a.RDev { + return false + } + if b.ATime && !a.ATime { + return false + } + if b.MTime && !a.MTime { + return false + } + if b.CTime && !a.CTime { + return false + } + if b.INo && !a.INo { + return false + } + if b.Size && !a.Size { + return false + } + if b.Blocks && !a.Blocks { + return false + } + if b.BTime && !a.BTime { + return false + } + if b.Gen && !a.Gen { + return false + } + if b.DataVersion && !a.DataVersion { + return false + } + return true +} + +// Empty returns true if no fields are masked. +func (a AttrMask) Empty() bool { + return !a.Mode && !a.NLink && !a.UID && !a.GID && !a.RDev && !a.ATime && !a.MTime && !a.CTime && !a.INo && !a.Size && !a.Blocks && !a.BTime && !a.Gen && !a.DataVersion +} + +// AttrMaskAll returns an AttrMask with all fields masked. +func AttrMaskAll() AttrMask { + return AttrMask{ + Mode: true, + NLink: true, + UID: true, + GID: true, + RDev: true, + ATime: true, + MTime: true, + CTime: true, + INo: true, + Size: true, + Blocks: true, + BTime: true, + Gen: true, + DataVersion: true, + } +} + +// String implements fmt.Stringer. +func (a AttrMask) String() string { + var masks []string + if a.Mode { + masks = append(masks, "Mode") + } + if a.NLink { + masks = append(masks, "NLink") + } + if a.UID { + masks = append(masks, "UID") + } + if a.GID { + masks = append(masks, "GID") + } + if a.RDev { + masks = append(masks, "RDev") + } + if a.ATime { + masks = append(masks, "ATime") + } + if a.MTime { + masks = append(masks, "MTime") + } + if a.CTime { + masks = append(masks, "CTime") + } + if a.INo { + masks = append(masks, "INo") + } + if a.Size { + masks = append(masks, "Size") + } + if a.Blocks { + masks = append(masks, "Blocks") + } + if a.BTime { + masks = append(masks, "BTime") + } + if a.Gen { + masks = append(masks, "Gen") + } + if a.DataVersion { + masks = append(masks, "DataVersion") + } + return fmt.Sprintf("AttrMask{with: %s}", strings.Join(masks, " ")) +} + +// Decode implements encoder.Decode. +func (a *AttrMask) Decode(b *buffer) { + mask := b.Read64() + a.Mode = mask&0x00000001 != 0 + a.NLink = mask&0x00000002 != 0 + a.UID = mask&0x00000004 != 0 + a.GID = mask&0x00000008 != 0 + a.RDev = mask&0x00000010 != 0 + a.ATime = mask&0x00000020 != 0 + a.MTime = mask&0x00000040 != 0 + a.CTime = mask&0x00000080 != 0 + a.INo = mask&0x00000100 != 0 + a.Size = mask&0x00000200 != 0 + a.Blocks = mask&0x00000400 != 0 + a.BTime = mask&0x00000800 != 0 + a.Gen = mask&0x00001000 != 0 + a.DataVersion = mask&0x00002000 != 0 +} + +// Encode implements encoder.Encode. +func (a *AttrMask) Encode(b *buffer) { + var mask uint64 + if a.Mode { + mask |= 0x00000001 + } + if a.NLink { + mask |= 0x00000002 + } + if a.UID { + mask |= 0x00000004 + } + if a.GID { + mask |= 0x00000008 + } + if a.RDev { + mask |= 0x00000010 + } + if a.ATime { + mask |= 0x00000020 + } + if a.MTime { + mask |= 0x00000040 + } + if a.CTime { + mask |= 0x00000080 + } + if a.INo { + mask |= 0x00000100 + } + if a.Size { + mask |= 0x00000200 + } + if a.Blocks { + mask |= 0x00000400 + } + if a.BTime { + mask |= 0x00000800 + } + if a.Gen { + mask |= 0x00001000 + } + if a.DataVersion { + mask |= 0x00002000 + } + b.Write64(mask) +} + +// Attr is a set of attributes for getattr. +type Attr struct { + Mode FileMode + UID UID + GID GID + NLink uint64 + RDev uint64 + Size uint64 + BlockSize uint64 + Blocks uint64 + ATimeSeconds uint64 + ATimeNanoSeconds uint64 + MTimeSeconds uint64 + MTimeNanoSeconds uint64 + CTimeSeconds uint64 + CTimeNanoSeconds uint64 + BTimeSeconds uint64 + BTimeNanoSeconds uint64 + Gen uint64 + DataVersion uint64 +} + +// String implements fmt.Stringer. +func (a Attr) String() string { + return fmt.Sprintf("Attr{Mode: 0o%o, UID: %d, GID: %d, NLink: %d, RDev: %d, Size: %d, BlockSize: %d, Blocks: %d, ATime: {Sec: %d, NanoSec: %d}, MTime: {Sec: %d, NanoSec: %d}, CTime: {Sec: %d, NanoSec: %d}, BTime: {Sec: %d, NanoSec: %d}, Gen: %d, DataVersion: %d}", + a.Mode, a.UID, a.GID, a.NLink, a.RDev, a.Size, a.BlockSize, a.Blocks, a.ATimeSeconds, a.ATimeNanoSeconds, a.MTimeSeconds, a.MTimeNanoSeconds, a.CTimeSeconds, a.CTimeNanoSeconds, a.BTimeSeconds, a.BTimeNanoSeconds, a.Gen, a.DataVersion) +} + +// Encode implements encoder.Encode. +func (a *Attr) Encode(b *buffer) { + b.WriteFileMode(a.Mode) + b.WriteUID(a.UID) + b.WriteGID(a.GID) + b.Write64(a.NLink) + b.Write64(a.RDev) + b.Write64(a.Size) + b.Write64(a.BlockSize) + b.Write64(a.Blocks) + b.Write64(a.ATimeSeconds) + b.Write64(a.ATimeNanoSeconds) + b.Write64(a.MTimeSeconds) + b.Write64(a.MTimeNanoSeconds) + b.Write64(a.CTimeSeconds) + b.Write64(a.CTimeNanoSeconds) + b.Write64(a.BTimeSeconds) + b.Write64(a.BTimeNanoSeconds) + b.Write64(a.Gen) + b.Write64(a.DataVersion) +} + +// Decode implements encoder.Decode. +func (a *Attr) Decode(b *buffer) { + a.Mode = b.ReadFileMode() + a.UID = b.ReadUID() + a.GID = b.ReadGID() + a.NLink = b.Read64() + a.RDev = b.Read64() + a.Size = b.Read64() + a.BlockSize = b.Read64() + a.Blocks = b.Read64() + a.ATimeSeconds = b.Read64() + a.ATimeNanoSeconds = b.Read64() + a.MTimeSeconds = b.Read64() + a.MTimeNanoSeconds = b.Read64() + a.CTimeSeconds = b.Read64() + a.CTimeNanoSeconds = b.Read64() + a.BTimeSeconds = b.Read64() + a.BTimeNanoSeconds = b.Read64() + a.Gen = b.Read64() + a.DataVersion = b.Read64() +} + +// StatToAttr converts a Linux syscall stat structure to an Attr. +func StatToAttr(s *syscall.Stat_t, req AttrMask) (Attr, AttrMask) { + attr := Attr{ + UID: NoUID, + GID: NoGID, + } + if req.Mode { + // p9.FileMode corresponds to Linux mode_t. + attr.Mode = FileMode(s.Mode) + } + if req.NLink { + attr.NLink = s.Nlink + } + if req.UID { + attr.UID = UID(s.Uid) + } + if req.GID { + attr.GID = GID(s.Gid) + } + if req.RDev { + attr.RDev = s.Dev + } + if req.ATime { + attr.ATimeSeconds = uint64(s.Atim.Sec) + attr.ATimeNanoSeconds = uint64(s.Atim.Nsec) + } + if req.MTime { + attr.MTimeSeconds = uint64(s.Mtim.Sec) + attr.MTimeNanoSeconds = uint64(s.Mtim.Nsec) + } + if req.CTime { + attr.CTimeSeconds = uint64(s.Ctim.Sec) + attr.CTimeNanoSeconds = uint64(s.Ctim.Nsec) + } + if req.Size { + attr.Size = uint64(s.Size) + } + if req.Blocks { + attr.BlockSize = uint64(s.Blksize) + attr.Blocks = uint64(s.Blocks) + } + + // Use the req field because we already have it. + req.BTime = false + req.Gen = false + req.DataVersion = false + + return attr, req +} + +// SetAttrMask specifies a valid mask for setattr. +type SetAttrMask struct { + Permissions bool + UID bool + GID bool + Size bool + ATime bool + MTime bool + CTime bool + ATimeNotSystemTime bool + MTimeNotSystemTime bool +} + +// IsSubsetOf returns whether s is a subset of m. +func (s SetAttrMask) IsSubsetOf(m SetAttrMask) bool { + sb := s.bitmask() + sm := m.bitmask() + return sm|sb == sm +} + +// String implements fmt.Stringer. +func (s SetAttrMask) String() string { + var masks []string + if s.Permissions { + masks = append(masks, "Permissions") + } + if s.UID { + masks = append(masks, "UID") + } + if s.GID { + masks = append(masks, "GID") + } + if s.Size { + masks = append(masks, "Size") + } + if s.ATime { + masks = append(masks, "ATime") + } + if s.MTime { + masks = append(masks, "MTime") + } + if s.CTime { + masks = append(masks, "CTime") + } + if s.ATimeNotSystemTime { + masks = append(masks, "ATimeNotSystemTime") + } + if s.MTimeNotSystemTime { + masks = append(masks, "MTimeNotSystemTime") + } + return fmt.Sprintf("SetAttrMask{with: %s}", strings.Join(masks, " ")) +} + +// Empty returns true if no fields are masked. +func (s SetAttrMask) Empty() bool { + return !s.Permissions && !s.UID && !s.GID && !s.Size && !s.ATime && !s.MTime && !s.CTime && !s.ATimeNotSystemTime && !s.MTimeNotSystemTime +} + +// Decode implements encoder.Decode. +func (s *SetAttrMask) Decode(b *buffer) { + mask := b.Read32() + s.Permissions = mask&0x00000001 != 0 + s.UID = mask&0x00000002 != 0 + s.GID = mask&0x00000004 != 0 + s.Size = mask&0x00000008 != 0 + s.ATime = mask&0x00000010 != 0 + s.MTime = mask&0x00000020 != 0 + s.CTime = mask&0x00000040 != 0 + s.ATimeNotSystemTime = mask&0x00000080 != 0 + s.MTimeNotSystemTime = mask&0x00000100 != 0 +} + +func (s SetAttrMask) bitmask() uint32 { + var mask uint32 + if s.Permissions { + mask |= 0x00000001 + } + if s.UID { + mask |= 0x00000002 + } + if s.GID { + mask |= 0x00000004 + } + if s.Size { + mask |= 0x00000008 + } + if s.ATime { + mask |= 0x00000010 + } + if s.MTime { + mask |= 0x00000020 + } + if s.CTime { + mask |= 0x00000040 + } + if s.ATimeNotSystemTime { + mask |= 0x00000080 + } + if s.MTimeNotSystemTime { + mask |= 0x00000100 + } + return mask +} + +// Encode implements encoder.Encode. +func (s *SetAttrMask) Encode(b *buffer) { + b.Write32(s.bitmask()) +} + +// SetAttr specifies a set of attributes for a setattr. +type SetAttr struct { + Permissions FileMode + UID UID + GID GID + Size uint64 + ATimeSeconds uint64 + ATimeNanoSeconds uint64 + MTimeSeconds uint64 + MTimeNanoSeconds uint64 +} + +// String implements fmt.Stringer. +func (s SetAttr) String() string { + return fmt.Sprintf("SetAttr{Permissions: 0o%o, UID: %d, GID: %d, Size: %d, ATime: {Sec: %d, NanoSec: %d}, MTime: {Sec: %d, NanoSec: %d}}", s.Permissions, s.UID, s.GID, s.Size, s.ATimeSeconds, s.ATimeNanoSeconds, s.MTimeSeconds, s.MTimeNanoSeconds) +} + +// Decode implements encoder.Decode. +func (s *SetAttr) Decode(b *buffer) { + s.Permissions = b.ReadPermissions() + s.UID = b.ReadUID() + s.GID = b.ReadGID() + s.Size = b.Read64() + s.ATimeSeconds = b.Read64() + s.ATimeNanoSeconds = b.Read64() + s.MTimeSeconds = b.Read64() + s.MTimeNanoSeconds = b.Read64() +} + +// Encode implements encoder.Encode. +func (s *SetAttr) Encode(b *buffer) { + b.WritePermissions(s.Permissions) + b.WriteUID(s.UID) + b.WriteGID(s.GID) + b.Write64(s.Size) + b.Write64(s.ATimeSeconds) + b.Write64(s.ATimeNanoSeconds) + b.Write64(s.MTimeSeconds) + b.Write64(s.MTimeNanoSeconds) +} + +// Apply applies this to the given Attr. +func (a *Attr) Apply(mask SetAttrMask, attr SetAttr) { + if mask.Permissions { + a.Mode = a.Mode&^permissionsMask | (attr.Permissions & permissionsMask) + } + if mask.UID { + a.UID = attr.UID + } + if mask.GID { + a.GID = attr.GID + } + if mask.Size { + a.Size = attr.Size + } + if mask.ATime { + a.ATimeSeconds = attr.ATimeSeconds + a.ATimeNanoSeconds = attr.ATimeNanoSeconds + } + if mask.MTime { + a.MTimeSeconds = attr.MTimeSeconds + a.MTimeNanoSeconds = attr.MTimeNanoSeconds + } +} + +// Dirent is used for readdir. +type Dirent struct { + // QID is the entry QID. + QID QID + + // Offset is the offset in the directory. + // + // This will be communicated back the original caller. + Offset uint64 + + // Type is the 9P type. + Type QIDType + + // Name is the name of the entry (i.e. basename). + Name string +} + +// String implements fmt.Stringer. +func (d Dirent) String() string { + return fmt.Sprintf("Dirent{QID: %d, Offset: %d, Type: 0x%X, Name: %s}", d.QID, d.Offset, d.Type, d.Name) +} + +// Decode implements encoder.Decode. +func (d *Dirent) Decode(b *buffer) { + d.QID.Decode(b) + d.Offset = b.Read64() + d.Type = b.ReadQIDType() + d.Name = b.ReadString() +} + +// Encode implements encoder.Encode. +func (d *Dirent) Encode(b *buffer) { + d.QID.Encode(b) + b.Write64(d.Offset) + b.WriteQIDType(d.Type) + b.WriteString(d.Name) +} + +// AllocateMode are possible modes to p9.File.Allocate(). +type AllocateMode struct { + KeepSize bool + PunchHole bool + NoHideStale bool + CollapseRange bool + ZeroRange bool + InsertRange bool + Unshare bool +} + +// ToLinux converts to a value compatible with fallocate(2)'s mode. +func (a *AllocateMode) ToLinux() uint32 { + rv := uint32(0) + if a.KeepSize { + rv |= unix.FALLOC_FL_KEEP_SIZE + } + if a.PunchHole { + rv |= unix.FALLOC_FL_PUNCH_HOLE + } + if a.NoHideStale { + rv |= unix.FALLOC_FL_NO_HIDE_STALE + } + if a.CollapseRange { + rv |= unix.FALLOC_FL_COLLAPSE_RANGE + } + if a.ZeroRange { + rv |= unix.FALLOC_FL_ZERO_RANGE + } + if a.InsertRange { + rv |= unix.FALLOC_FL_INSERT_RANGE + } + if a.Unshare { + rv |= unix.FALLOC_FL_UNSHARE_RANGE + } + return rv +} + +// Decode implements encoder.Decode. +func (a *AllocateMode) Decode(b *buffer) { + mask := b.Read32() + a.KeepSize = mask&0x01 != 0 + a.PunchHole = mask&0x02 != 0 + a.NoHideStale = mask&0x04 != 0 + a.CollapseRange = mask&0x08 != 0 + a.ZeroRange = mask&0x10 != 0 + a.InsertRange = mask&0x20 != 0 + a.Unshare = mask&0x40 != 0 +} + +// Encode implements encoder.Encode. +func (a *AllocateMode) Encode(b *buffer) { + mask := uint32(0) + if a.KeepSize { + mask |= 0x01 + } + if a.PunchHole { + mask |= 0x02 + } + if a.NoHideStale { + mask |= 0x04 + } + if a.CollapseRange { + mask |= 0x08 + } + if a.ZeroRange { + mask |= 0x10 + } + if a.InsertRange { + mask |= 0x20 + } + if a.Unshare { + mask |= 0x40 + } + b.Write32(mask) +} diff --git a/pkg/p9/p9_state_autogen.go b/pkg/p9/p9_state_autogen.go new file mode 100755 index 000000000..0b9556862 --- /dev/null +++ b/pkg/p9/p9_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package p9 + diff --git a/pkg/p9/path_tree.go b/pkg/p9/path_tree.go new file mode 100644 index 000000000..f37ad4ab2 --- /dev/null +++ b/pkg/p9/path_tree.go @@ -0,0 +1,109 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package p9 + +import ( + "fmt" + "sync" +) + +// pathNode is a single node in a path traversal. +// +// These are shared by all fidRefs that point to the same path. +// +// These are not synchronized because we allow certain operations (file walk) +// to proceed without having to acquire a write lock. The lock in this +// structure exists to synchronize high-level, semantic operations, such as the +// simultaneous creation and deletion of a file. +// +// (+) below is the path component string. +type pathNode struct { + mu sync.RWMutex // See above. + fidRefs sync.Map // => map[*fidRef]string(+) + children sync.Map // => map[string(+)]*pathNode + count int64 +} + +// pathNodeFor returns the path node for the given name, or a new one. +// +// Precondition: mu must be held in a readable fashion. +func (p *pathNode) pathNodeFor(name string) *pathNode { + // Load the existing path node. + if pn, ok := p.children.Load(name); ok { + return pn.(*pathNode) + } + + // Create a new pathNode for shared use. + pn, _ := p.children.LoadOrStore(name, new(pathNode)) + return pn.(*pathNode) +} + +// nameFor returns the name for the given fidRef. +// +// Precondition: mu must be held in a readable fashion. +func (p *pathNode) nameFor(ref *fidRef) string { + if s, ok := p.fidRefs.Load(ref); ok { + return s.(string) + } + + // This should not happen, don't proceed. + panic(fmt.Sprintf("expected name for %+v, none found", ref)) +} + +// addChild adds a child to the given pathNode. +// +// This applies only to an individual fidRef. +// +// Precondition: mu must be held in a writable fashion. +func (p *pathNode) addChild(ref *fidRef, name string) { + if s, ok := p.fidRefs.Load(ref); ok { + // This should not happen, don't proceed. + panic(fmt.Sprintf("unexpected fidRef %+v with path %q, wanted %q", ref, s, name)) + } + + p.fidRefs.Store(ref, name) +} + +// removeChild removes the given child. +// +// This applies only to an individual fidRef. +// +// Precondition: mu must be held in a writable fashion. +func (p *pathNode) removeChild(ref *fidRef) { + p.fidRefs.Delete(ref) +} + +// removeWithName removes all references with the given name. +// +// The original pathNode is returned by this function, and removed from this +// pathNode. Any operations on the removed tree must use this value. +// +// The provided function is executed after removal. +// +// Precondition: mu must be held in a writable fashion. +func (p *pathNode) removeWithName(name string, fn func(ref *fidRef)) *pathNode { + p.fidRefs.Range(func(key, value interface{}) bool { + if value.(string) == name { + p.fidRefs.Delete(key) + fn(key.(*fidRef)) + } + return true + }) + + // Return the original path node. + origPathNode := p.pathNodeFor(name) + p.children.Delete(name) + return origPathNode +} diff --git a/pkg/p9/pool.go b/pkg/p9/pool.go new file mode 100644 index 000000000..52de889e1 --- /dev/null +++ b/pkg/p9/pool.go @@ -0,0 +1,68 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package p9 + +import ( + "sync" +) + +// pool is a simple allocator. +// +// It is used for both tags and FIDs. +type pool struct { + mu sync.Mutex + + // cache is the set of returned values. + cache []uint64 + + // start is the starting value (if needed). + start uint64 + + // max is the current maximum issued. + max uint64 + + // limit is the upper limit. + limit uint64 +} + +// Get gets a value from the pool. +func (p *pool) Get() (uint64, bool) { + p.mu.Lock() + defer p.mu.Unlock() + + // Anything cached? + if len(p.cache) > 0 { + v := p.cache[len(p.cache)-1] + p.cache = p.cache[:len(p.cache)-1] + return v, true + } + + // Over the limit? + if p.start == p.limit { + return 0, false + } + + // Generate a new value. + v := p.start + p.start++ + return v, true +} + +// Put returns a value to the pool. +func (p *pool) Put(v uint64) { + p.mu.Lock() + p.cache = append(p.cache, v) + p.mu.Unlock() +} diff --git a/pkg/p9/server.go b/pkg/p9/server.go new file mode 100644 index 000000000..f377a6557 --- /dev/null +++ b/pkg/p9/server.go @@ -0,0 +1,575 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package p9 + +import ( + "io" + "runtime/debug" + "sync" + "sync/atomic" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/unet" +) + +// Server is a 9p2000.L server. +type Server struct { + // attacher provides the attach function. + attacher Attacher + + // pathTree is the full set of paths opened on this server. + // + // These may be across different connections, but rename operations + // must be serialized globally for safely. There is a single pathTree + // for the entire server, and not per connection. + pathTree pathNode + + // renameMu is a global lock protecting rename operations. With this + // lock, we can be certain that any given rename operation can safely + // acquire two path nodes in any order, as all other concurrent + // operations acquire at most a single node. + renameMu sync.RWMutex +} + +// NewServer returns a new server. +// +func NewServer(attacher Attacher) *Server { + return &Server{ + attacher: attacher, + } +} + +// connState is the state for a single connection. +type connState struct { + // server is the backing server. + server *Server + + // sendMu is the send lock. + sendMu sync.Mutex + + // conn is the connection. + conn *unet.Socket + + // fids is the set of active FIDs. + // + // This is used to find FIDs for files. + fidMu sync.Mutex + fids map[FID]*fidRef + + // tags is the set of active tags. + // + // The given channel is closed when the + // tag is finished with processing. + tagMu sync.Mutex + tags map[Tag]chan struct{} + + // messageSize is the maximum message size. The server does not + // do automatic splitting of messages. + messageSize uint32 + + // version is the agreed upon version X of 9P2000.L.Google.X. + // version 0 implies 9P2000.L. + version uint32 + + // recvOkay indicates that a receive may start. + recvOkay chan bool + + // recvDone is signalled when a message is received. + recvDone chan error + + // sendDone is signalled when a send is finished. + sendDone chan error +} + +// fidRef wraps a node and tracks references. +type fidRef struct { + // server is the associated server. + server *Server + + // file is the associated File. + file File + + // refs is an active refence count. + // + // The node above will be closed only when refs reaches zero. + refs int64 + + // openedMu protects opened and openFlags. + openedMu sync.Mutex + + // opened indicates whether this has been opened already. + // + // This is updated in handlers.go. + opened bool + + // mode is the fidRef's mode from the walk. Only the type bits are + // valid, the permissions may change. This is used to sanity check + // operations on this element, and prevent walks across + // non-directories. + mode FileMode + + // openFlags is the mode used in the open. + // + // This is updated in handlers.go. + openFlags OpenFlags + + // pathNode is the current pathNode for this FID. + pathNode *pathNode + + // parent is the parent fidRef. We hold on to a parent reference to + // ensure that hooks, such as Renamed, can be executed safely by the + // server code. + // + // Note that parent cannot be changed without holding both the global + // rename lock and a writable lock on the associated pathNode for this + // fidRef. Holding either of these locks is sufficient to examine + // parent safely. + // + // The parent will be nil for root fidRefs, and non-nil otherwise. The + // method maybeParent can be used to return a cyclical reference, and + // isRoot should be used to check for root over looking at parent + // directly. + parent *fidRef + + // deleted indicates that the backing file has been deleted. We stop + // many operations at the API level if they are incompatible with a + // file that has already been unlinked. + deleted uint32 +} + +// OpenFlags returns the flags the file was opened with and true iff the fid was opened previously. +func (f *fidRef) OpenFlags() (OpenFlags, bool) { + f.openedMu.Lock() + defer f.openedMu.Unlock() + return f.openFlags, f.opened +} + +// IncRef increases the references on a fid. +func (f *fidRef) IncRef() { + atomic.AddInt64(&f.refs, 1) +} + +// DecRef should be called when you're finished with a fid. +func (f *fidRef) DecRef() { + if atomic.AddInt64(&f.refs, -1) == 0 { + f.file.Close() + + // Drop the parent reference. + // + // Since this fidRef is guaranteed to be non-discoverable when + // the references reach zero, we don't need to worry about + // clearing the parent. + if f.parent != nil { + // If we've been previously deleted, this removing this + // ref is a no-op. That's expected. + f.parent.pathNode.removeChild(f) + f.parent.DecRef() + } + } +} + +// isDeleted returns true if this fidRef has been deleted. +func (f *fidRef) isDeleted() bool { + return atomic.LoadUint32(&f.deleted) != 0 +} + +// isRoot indicates whether this is a root fid. +func (f *fidRef) isRoot() bool { + return f.parent == nil +} + +// maybeParent returns a cyclic reference for roots, and the parent otherwise. +func (f *fidRef) maybeParent() *fidRef { + if f.parent != nil { + return f.parent + } + return f // Root has itself. +} + +// notifyDelete marks all fidRefs as deleted. +// +// Precondition: the write lock must be held on the given pathNode. +func notifyDelete(pn *pathNode) { + // Call on all local references. + pn.fidRefs.Range(func(key, _ interface{}) bool { + ref := key.(*fidRef) + atomic.StoreUint32(&ref.deleted, 1) + return true + }) + + // Call on all subtrees. + pn.children.Range(func(_, value interface{}) bool { + notifyDelete(value.(*pathNode)) + return true + }) +} + +// markChildDeleted marks all children below the given name as deleted. +// +// Precondition: this must be called via safelyWrite or safelyGlobal. +func (f *fidRef) markChildDeleted(name string) { + origPathNode := f.pathNode.removeWithName(name, func(ref *fidRef) { + atomic.StoreUint32(&ref.deleted, 1) + }) + + // Mark everything below as deleted. + notifyDelete(origPathNode) +} + +// notifyNameChange calls the relevant Renamed method on all nodes in the path, +// recursively. Note that this applies only for subtrees, as these +// notifications do not apply to the actual file whose name has changed. +// +// Precondition: the write lock must be held on the given pathNode. +func notifyNameChange(pn *pathNode) { + // Call on all local references. + pn.fidRefs.Range(func(key, value interface{}) bool { + ref := key.(*fidRef) + name := value.(string) + ref.file.Renamed(ref.parent.file, name) + return true + }) + + // Call on all subtrees. + pn.children.Range(func(_, value interface{}) bool { + notifyNameChange(value.(*pathNode)) + return true + }) +} + +// renameChildTo renames the given child to the target. +// +// Precondition: this must be called via safelyGlobal. +func (f *fidRef) renameChildTo(oldName string, target *fidRef, newName string) { + target.markChildDeleted(newName) + origPathNode := f.pathNode.removeWithName(oldName, func(ref *fidRef) { + ref.parent.DecRef() // Drop original reference. + ref.parent = target // Change parent. + ref.parent.IncRef() // Acquire new one. + target.pathNode.addChild(ref, newName) + ref.file.Renamed(target.file, newName) + }) + + // Replace the previous (now deleted) path node. + f.pathNode.children.Store(newName, origPathNode) + + // Call Renamed on everything above. + notifyNameChange(origPathNode) +} + +// safelyRead executes the given operation with the local path node locked. +// This implies that paths will not change during the operation. +func (f *fidRef) safelyRead(fn func() error) (err error) { + f.server.renameMu.RLock() + defer f.server.renameMu.RUnlock() + f.pathNode.mu.RLock() + defer f.pathNode.mu.RUnlock() + return fn() +} + +// safelyWrite executes the given operation with the local path node locked in +// a writable fashion. This implies some paths may change. +func (f *fidRef) safelyWrite(fn func() error) (err error) { + f.server.renameMu.RLock() + defer f.server.renameMu.RUnlock() + f.pathNode.mu.Lock() + defer f.pathNode.mu.Unlock() + return fn() +} + +// safelyGlobal executes the given operation with the global path lock held. +func (f *fidRef) safelyGlobal(fn func() error) (err error) { + f.server.renameMu.Lock() + defer f.server.renameMu.Unlock() + return fn() +} + +// LookupFID finds the given FID. +// +// You should call fid.DecRef when you are finished using the fid. +func (cs *connState) LookupFID(fid FID) (*fidRef, bool) { + cs.fidMu.Lock() + defer cs.fidMu.Unlock() + fidRef, ok := cs.fids[fid] + if ok { + fidRef.IncRef() + return fidRef, true + } + return nil, false +} + +// InsertFID installs the given FID. +// +// This fid starts with a reference count of one. If a FID exists in +// the slot already it is closed, per the specification. +func (cs *connState) InsertFID(fid FID, newRef *fidRef) { + cs.fidMu.Lock() + defer cs.fidMu.Unlock() + origRef, ok := cs.fids[fid] + if ok { + defer origRef.DecRef() + } + newRef.IncRef() + cs.fids[fid] = newRef +} + +// DeleteFID removes the given FID. +// +// This simply removes it from the map and drops a reference. +func (cs *connState) DeleteFID(fid FID) bool { + cs.fidMu.Lock() + defer cs.fidMu.Unlock() + fidRef, ok := cs.fids[fid] + if !ok { + return false + } + delete(cs.fids, fid) + fidRef.DecRef() + return true +} + +// StartTag starts handling the tag. +// +// False is returned if this tag is already active. +func (cs *connState) StartTag(t Tag) bool { + cs.tagMu.Lock() + defer cs.tagMu.Unlock() + _, ok := cs.tags[t] + if ok { + return false + } + cs.tags[t] = make(chan struct{}) + return true +} + +// ClearTag finishes handling a tag. +func (cs *connState) ClearTag(t Tag) { + cs.tagMu.Lock() + defer cs.tagMu.Unlock() + ch, ok := cs.tags[t] + if !ok { + // Should never happen. + panic("unused tag cleared") + } + delete(cs.tags, t) + + // Notify. + close(ch) +} + +// WaitTag waits for a tag to finish. +func (cs *connState) WaitTag(t Tag) { + cs.tagMu.Lock() + ch, ok := cs.tags[t] + cs.tagMu.Unlock() + if !ok { + return + } + + // Wait for close. + <-ch +} + +// handleRequest handles a single request. +// +// The recvDone channel is signaled when recv is done (with a error if +// necessary). The sendDone channel is signaled with the result of the send. +func (cs *connState) handleRequest() { + messageSize := atomic.LoadUint32(&cs.messageSize) + if messageSize == 0 { + // Default or not yet negotiated. + messageSize = maximumLength + } + + // Receive a message. + tag, m, err := recv(cs.conn, messageSize, msgRegistry.get) + if errSocket, ok := err.(ErrSocket); ok { + // Connection problem; stop serving. + cs.recvDone <- errSocket.error + return + } + + // Signal receive is done. + cs.recvDone <- nil + + // Deal with other errors. + if err != nil && err != io.EOF { + // If it's not a connection error, but some other protocol error, + // we can send a response immediately. + cs.sendMu.Lock() + err := send(cs.conn, tag, newErr(err)) + cs.sendMu.Unlock() + cs.sendDone <- err + return + } + + // Try to start the tag. + if !cs.StartTag(tag) { + // Nothing we can do at this point; client is bogus. + log.Debugf("no valid tag [%05d]", tag) + cs.sendDone <- ErrNoValidMessage + return + } + + // Handle the message. + var r message // r is the response. + defer func() { + if r == nil { + // Don't allow a panic to propagate. + recover() + + // Include a useful log message. + log.Warningf("panic in handler: %s", debug.Stack()) + + // Wrap in an EFAULT error; we don't really have a + // better way to describe this kind of error. It will + // usually manifest as a result of the test framework. + r = newErr(syscall.EFAULT) + } + + // Clear the tag before sending. That's because as soon as this + // hits the wire, the client can legally send another message + // with the same tag. + cs.ClearTag(tag) + + // Send back the result. + cs.sendMu.Lock() + err = send(cs.conn, tag, r) + cs.sendMu.Unlock() + cs.sendDone <- err + }() + if handler, ok := m.(handler); ok { + // Call the message handler. + r = handler.handle(cs) + } else { + // Produce an ENOSYS error. + r = newErr(syscall.ENOSYS) + } + msgRegistry.put(m) + m = nil // 'm' should not be touched after this point. +} + +func (cs *connState) handleRequests() { + for range cs.recvOkay { + cs.handleRequest() + } +} + +func (cs *connState) stop() { + // Close all channels. + close(cs.recvOkay) + close(cs.recvDone) + close(cs.sendDone) + + for _, fidRef := range cs.fids { + // Drop final reference in the FID table. Note this should + // always close the file, since we've ensured that there are no + // handlers running via the wait for Pending => 0 below. + fidRef.DecRef() + } + + // Ensure the connection is closed. + cs.conn.Close() +} + +// service services requests concurrently. +func (cs *connState) service() error { + // Pending is the number of handlers that have finished receiving but + // not finished processing requests. These must be waiting on properly + // below. See the next comment for an explanation of the loop. + pending := 0 + + // Start the first request handler. + go cs.handleRequests() // S/R-SAFE: Irrelevant. + cs.recvOkay <- true + + // We loop and make sure there's always one goroutine waiting for a new + // request. We process all the data for a single request in one + // goroutine however, to ensure the best turnaround time possible. + for { + select { + case err := <-cs.recvDone: + if err != nil { + // Wait for pending handlers. + for i := 0; i < pending; i++ { + <-cs.sendDone + } + return err + } + + // This handler is now pending. + pending++ + + // Kick the next receiver, or start a new handler + // if no receiver is currently waiting. + select { + case cs.recvOkay <- true: + default: + go cs.handleRequests() // S/R-SAFE: Irrelevant. + cs.recvOkay <- true + } + + case <-cs.sendDone: + // This handler is finished. + pending-- + + // Error sending a response? Nothing can be done. + // + // We don't terminate on a send error though, since + // we still have a pending receive. The error would + // have been logged above, we just ignore it here. + } + } +} + +// Handle handles a single connection. +func (s *Server) Handle(conn *unet.Socket) error { + cs := &connState{ + server: s, + conn: conn, + fids: make(map[FID]*fidRef), + tags: make(map[Tag]chan struct{}), + recvOkay: make(chan bool), + recvDone: make(chan error, 10), + sendDone: make(chan error, 10), + } + defer cs.stop() + return cs.service() +} + +// Serve handles requests from the bound socket. +// +// The passed serverSocket _must_ be created in packet mode. +func (s *Server) Serve(serverSocket *unet.ServerSocket) error { + var wg sync.WaitGroup + defer wg.Wait() + + for { + conn, err := serverSocket.Accept() + if err != nil { + // Something went wrong. + // + // Socket closed? + return err + } + + wg.Add(1) + go func(conn *unet.Socket) { // S/R-SAFE: Irrelevant. + s.Handle(conn) + wg.Done() + }(conn) + } +} diff --git a/pkg/p9/transport.go b/pkg/p9/transport.go new file mode 100644 index 000000000..ef59077ff --- /dev/null +++ b/pkg/p9/transport.go @@ -0,0 +1,342 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package p9 + +import ( + "errors" + "fmt" + "io" + "io/ioutil" + "sync" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/fd" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/unet" +) + +// ErrSocket is returned in cases of a socket issue. +// +// This may be treated differently than other errors. +type ErrSocket struct { + // error is the socket error. + error +} + +// ErrMessageTooLarge indicates the size was larger than reasonable. +type ErrMessageTooLarge struct { + size uint32 + msize uint32 +} + +// Error returns a sensible error. +func (e *ErrMessageTooLarge) Error() string { + return fmt.Sprintf("message too large for fixed buffer: size is %d, limit is %d", e.size, e.msize) +} + +// ErrNoValidMessage indicates no valid message could be decoded. +var ErrNoValidMessage = errors.New("buffer contained no valid message") + +const ( + // headerLength is the number of bytes required for a header. + headerLength uint32 = 7 + + // maximumLength is the largest possible message. + maximumLength uint32 = 4 * 1024 * 1024 + + // initialBufferLength is the initial data buffer we allocate. + initialBufferLength uint32 = 64 +) + +var dataPool = sync.Pool{ + New: func() interface{} { + // These buffers are used for decoding without a payload. + return make([]byte, initialBufferLength) + }, +} + +// send sends the given message over the socket. +func send(s *unet.Socket, tag Tag, m message) error { + data := dataPool.Get().([]byte) + dataBuf := buffer{data: data[:0]} + + if log.IsLogging(log.Debug) { + log.Debugf("send [FD %d] [Tag %06d] %s", s.FD(), tag, m.String()) + } + + // Encode the message. The buffer will grow automatically. + m.Encode(&dataBuf) + + // Get our vectors to send. + var hdr [headerLength]byte + vecs := make([][]byte, 0, 3) + vecs = append(vecs, hdr[:]) + if len(dataBuf.data) > 0 { + vecs = append(vecs, dataBuf.data) + } + totalLength := headerLength + uint32(len(dataBuf.data)) + + // Is there a payload? + if payloader, ok := m.(payloader); ok { + p := payloader.Payload() + if len(p) > 0 { + vecs = append(vecs, p) + totalLength += uint32(len(p)) + } + } + + // Construct the header. + headerBuf := buffer{data: hdr[:0]} + headerBuf.Write32(totalLength) + headerBuf.WriteMsgType(m.Type()) + headerBuf.WriteTag(tag) + + // Pack any files if necessary. + w := s.Writer(true) + if filer, ok := m.(filer); ok { + if f := filer.FilePayload(); f != nil { + defer f.Close() + // Pack the file into the message. + w.PackFDs(f.FD()) + } + } + + for n := 0; n < int(totalLength); { + cur, err := w.WriteVec(vecs) + if err != nil { + return ErrSocket{err} + } + n += cur + + // Consume iovecs. + for consumed := 0; consumed < cur; { + if len(vecs[0]) <= cur-consumed { + consumed += len(vecs[0]) + vecs = vecs[1:] + } else { + vecs[0] = vecs[0][cur-consumed:] + break + } + } + + if n > 0 && n < int(totalLength) { + // Don't resend any control message. + w.UnpackFDs() + } + } + + // All set. + dataPool.Put(dataBuf.data) + return nil +} + +// lookupTagAndType looks up an existing message or creates a new one. +// +// This is called by recv after decoding the header. Any error returned will be +// propagating back to the caller. You may use messageByType directly as a +// lookupTagAndType function (by design). +type lookupTagAndType func(tag Tag, t MsgType) (message, error) + +// recv decodes a message from the socket. +// +// This is done in two parts, and is thus not safe for multiple callers. +// +// On a socket error, the special error type ErrSocket is returned. +// +// The tag value NoTag will always be returned if err is non-nil. +func recv(s *unet.Socket, msize uint32, lookup lookupTagAndType) (Tag, message, error) { + // Read a header. + // + // Since the send above is atomic, we must always receive control + // messages along with the header. This means we need to be careful + // about closing FDs during errors to prevent leaks. + var hdr [headerLength]byte + r := s.Reader(true) + r.EnableFDs(1) + + n, err := r.ReadVec([][]byte{hdr[:]}) + if err != nil && (n == 0 || err != io.EOF) { + r.CloseFDs() + return NoTag, nil, ErrSocket{err} + } + + fds, err := r.ExtractFDs() + if err != nil { + return NoTag, nil, ErrSocket{err} + } + defer func() { + // Close anything left open. The case where + // fds are caught and used is handled below, + // and the fds variable will be set to nil. + for _, fd := range fds { + syscall.Close(fd) + } + }() + r.EnableFDs(0) + + // Continuing reading for a short header. + for n < int(headerLength) { + cur, err := r.ReadVec([][]byte{hdr[n:]}) + if err != nil && (cur == 0 || err != io.EOF) { + return NoTag, nil, ErrSocket{err} + } + n += cur + } + + // Decode the header. + headerBuf := buffer{data: hdr[:]} + size := headerBuf.Read32() + t := headerBuf.ReadMsgType() + tag := headerBuf.ReadTag() + if size < headerLength { + // The message is too small. + // + // See above: it's probably screwed. + return NoTag, nil, ErrSocket{ErrNoValidMessage} + } + if size > maximumLength || size > msize { + // The message is too big. + return NoTag, nil, ErrSocket{&ErrMessageTooLarge{size, msize}} + } + remaining := size - headerLength + + // Find our message to decode. + m, err := lookup(tag, t) + if err != nil { + // Throw away the contents of this message. + if remaining > 0 { + io.Copy(ioutil.Discard, &io.LimitedReader{R: s, N: int64(remaining)}) + } + return tag, nil, err + } + + // Not yet initialized. + var dataBuf buffer + + // Read the rest of the payload. + // + // This requires some special care to ensure that the vectors all line + // up the way they should. We do this to minimize copying data around. + var vecs [][]byte + if payloader, ok := m.(payloader); ok { + fixedSize := payloader.FixedSize() + + // Do we need more than there is? + if fixedSize > remaining { + // This is not a valid message. + if remaining > 0 { + io.Copy(ioutil.Discard, &io.LimitedReader{R: s, N: int64(remaining)}) + } + return NoTag, nil, ErrNoValidMessage + } + + if fixedSize != 0 { + // Pull a data buffer from the pool. + data := dataPool.Get().([]byte) + if int(fixedSize) > len(data) { + // Create a larger data buffer, ensuring + // sufficient capicity for the message. + data = make([]byte, fixedSize) + defer dataPool.Put(data) + dataBuf = buffer{data: data} + vecs = append(vecs, data) + } else { + // Limit the data buffer, and make sure it + // gets filled before the payload buffer. + defer dataPool.Put(data) + dataBuf = buffer{data: data[:fixedSize]} + vecs = append(vecs, data[:fixedSize]) + } + } + + // Include the payload. + p := payloader.Payload() + if p == nil || len(p) != int(remaining-fixedSize) { + p = make([]byte, remaining-fixedSize) + payloader.SetPayload(p) + } + if len(p) > 0 { + vecs = append(vecs, p) + } + } else if remaining != 0 { + // Pull a data buffer from the pool. + data := dataPool.Get().([]byte) + if int(remaining) > len(data) { + // Create a larger data buffer. + data = make([]byte, remaining) + defer dataPool.Put(data) + dataBuf = buffer{data: data} + vecs = append(vecs, data) + } else { + // Limit the data buffer. + defer dataPool.Put(data) + dataBuf = buffer{data: data[:remaining]} + vecs = append(vecs, data[:remaining]) + } + } + + if len(vecs) > 0 { + // Read the rest of the message. + // + // No need to handle a control message. + r := s.Reader(true) + for n := 0; n < int(remaining); { + cur, err := r.ReadVec(vecs) + if err != nil && (cur == 0 || err != io.EOF) { + return NoTag, nil, ErrSocket{err} + } + n += cur + + // Consume iovecs. + for consumed := 0; consumed < cur; { + if len(vecs[0]) <= cur-consumed { + consumed += len(vecs[0]) + vecs = vecs[1:] + } else { + vecs[0] = vecs[0][cur-consumed:] + break + } + } + } + } + + // Decode the message data. + m.Decode(&dataBuf) + if dataBuf.isOverrun() { + // No need to drain the socket. + return NoTag, nil, ErrNoValidMessage + } + + // Save the file, if any came out. + if filer, ok := m.(filer); ok && len(fds) > 0 { + // Set the file object. + filer.SetFilePayload(fd.New(fds[0])) + + // Close the rest. We support only one. + for i := 1; i < len(fds); i++ { + syscall.Close(fds[i]) + } + + // Don't close in the defer. + fds = nil + } + + if log.IsLogging(log.Debug) { + log.Debugf("recv [FD %d] [Tag %06d] %s", s.FD(), tag, m.String()) + } + + // All set. + return tag, m, nil +} diff --git a/pkg/p9/version.go b/pkg/p9/version.go new file mode 100644 index 000000000..c2a2885ae --- /dev/null +++ b/pkg/p9/version.go @@ -0,0 +1,150 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package p9 + +import ( + "fmt" + "strconv" + "strings" +) + +const ( + // highestSupportedVersion is the highest supported version X in a + // version string of the format 9P2000.L.Google.X. + // + // Clients are expected to start requesting this version number and + // to continuously decrement it until a Tversion request succeeds. + highestSupportedVersion uint32 = 7 + + // lowestSupportedVersion is the lowest supported version X in a + // version string of the format 9P2000.L.Google.X. + // + // Clients are free to send a Tversion request at a version below this + // value but are expected to encounter an Rlerror in response. + lowestSupportedVersion uint32 = 0 + + // baseVersion is the base version of 9P that this package must always + // support. It is equivalent to 9P2000.L.Google.0. + baseVersion = "9P2000.L" +) + +// HighestVersionString returns the highest possible version string that a client +// may request or a server may support. +func HighestVersionString() string { + return versionString(highestSupportedVersion) +} + +// parseVersion parses a Tversion version string into a numeric version number +// if the version string is supported by p9. Otherwise returns (0, false). +// +// From Tversion(9P): "Version strings are defined such that, if the client string +// contains one or more period characters, the initial substring up to but not +// including any single period in the version string defines a version of the protocol." +// +// p9 intentionally diverges from this and always requires that the version string +// start with 9P2000.L to express that it is always compatible with 9P2000.L. The +// only supported versions extensions are of the format 9p2000.L.Google.X where X +// is an ever increasing version counter. +// +// Version 9P2000.L.Google.0 implies 9P2000.L. +// +// New versions must always be a strict superset of 9P2000.L. A version increase must +// define a predicate representing the feature extension introduced by that version. The +// predicate must be commented and should take the format: +// +// // VersionSupportsX returns true if version v supports X and must be checked when ... +// func VersionSupportsX(v int32) bool { +// ... +// ) +func parseVersion(str string) (uint32, bool) { + // Special case the base version which lacks the ".Google.X" suffix. This + // version always means version 0. + if str == baseVersion { + return 0, true + } + substr := strings.Split(str, ".") + if len(substr) != 4 { + return 0, false + } + if substr[0] != "9P2000" || substr[1] != "L" || substr[2] != "Google" || len(substr[3]) == 0 { + return 0, false + } + version, err := strconv.ParseUint(substr[3], 10, 32) + if err != nil { + return 0, false + } + return uint32(version), true +} + +// versionString formats a p9 version number into a Tversion version string. +func versionString(version uint32) string { + // Special case the base version so that clients expecting this string + // instead of the 9P2000.L.Google.0 equivalent get it. This is important + // for backwards compatibility with legacy servers that check for exactly + // the baseVersion and allow nothing else. + if version == 0 { + return baseVersion + } + return fmt.Sprintf("9P2000.L.Google.%d", version) +} + +// VersionSupportsTflushf returns true if version v supports the Tflushf message. +// This predicate must be checked by clients before attempting to make a Tflushf +// request. If this predicate returns false, then clients may safely no-op. +func VersionSupportsTflushf(v uint32) bool { + return v >= 1 +} + +// versionSupportsTwalkgetattr returns true if version v supports the +// Twalkgetattr message. This predicate must be checked by clients before +// attempting to make a Twalkgetattr request. +func versionSupportsTwalkgetattr(v uint32) bool { + return v >= 2 +} + +// versionSupportsTucreation returns true if version v supports the Tucreation +// messages (Tucreate, Tusymlink, Tumkdir, Tumknod). This predicate must be +// checked by clients before attempting to make a Tucreation request. +// If Tucreation messages are not supported, their non-UID supporting +// counterparts (Tlcreate, Tsymlink, Tmkdir, Tmknod) should be used. +func versionSupportsTucreation(v uint32) bool { + return v >= 3 +} + +// VersionSupportsConnect returns true if version v supports the Tlconnect +// message. This predicate must be checked by clients +// before attempting to make a Tlconnect request. If Tlconnect messages are not +// supported, Tlopen should be used. +func VersionSupportsConnect(v uint32) bool { + return v >= 4 +} + +// VersionSupportsAnonymous returns true if version v supports Tlconnect +// with the AnonymousSocket mode. This predicate must be checked by clients +// before attempting to use the AnonymousSocket Tlconnect mode. +func VersionSupportsAnonymous(v uint32) bool { + return v >= 5 +} + +// VersionSupportsMultiUser returns true if version v supports multi-user fake +// directory permissions and ID values. +func VersionSupportsMultiUser(v uint32) bool { + return v >= 6 +} + +// versionSupportsTallocate returns true if version v supports Allocate(). +func versionSupportsTallocate(v uint32) bool { + return v >= 7 +} |