diff options
103 files changed, 2212 insertions, 628 deletions
diff --git a/benchmarks/runner/__init__.py b/benchmarks/runner/__init__.py index ca785a148..fc59cf505 100644 --- a/benchmarks/runner/__init__.py +++ b/benchmarks/runner/__init__.py @@ -19,6 +19,7 @@ import logging import pkgutil import pydoc import re +import subprocess import sys import types from typing import List @@ -125,9 +126,8 @@ def run_gcp(ctx, image_file: str, zone_file: str, internal: bool, """Runs all benchmarks on GCP instances.""" # Resolve all files. - image = open(image_file).read().rstrip() - zone = open(zone_file).read().rstrip() - + image = subprocess.check_output([image_file]).rstrip() + zone = subprocess.check_output([zone_file]).rstrip() key_file = harness.make_key() producer = gcloud_producer.GCloudProducer( diff --git a/benchmarks/runner/commands.py b/benchmarks/runner/commands.py index 194804527..e8289f6c5 100644 --- a/benchmarks/runner/commands.py +++ b/benchmarks/runner/commands.py @@ -101,15 +101,15 @@ class GCPCommand(RunCommand): image_file = click.core.Option( ("--image_file",), - help="The file containing the image for VMs.", + help="The binary that emits the GCP image.", default=os.path.join( - os.path.dirname(__file__), "../../tools/images/ubuntu1604.txt"), + os.path.dirname(__file__), "../../tools/images/ubuntu1604"), ) zone_file = click.core.Option( ("--zone_file",), - help="The file containing the GCP zone.", + help="The binary that emits the GCP zone.", default=os.path.join( - os.path.dirname(__file__), "../../tools/images/zone.txt"), + os.path.dirname(__file__), "../../tools/images/zone"), ) internal = click.core.Option( ("--internal/--no-internal",), diff --git a/pkg/context/context.go b/pkg/context/context.go index 23e009ef3..5319b6d8d 100644 --- a/pkg/context/context.go +++ b/pkg/context/context.go @@ -127,10 +127,6 @@ func (logContext) Value(key interface{}) interface{} { var bgContext = &logContext{Logger: log.Log()} // Background returns an empty context using the default logger. -// -// Users should be wary of using a Background context. Please tag any use with -// FIXME(b/38173783) and a note to remove this use. -// // Generally, one should use the Task as their context when available, or avoid // having to use a context in places where a Task is unavailable. // diff --git a/pkg/eventchannel/event_test.go b/pkg/eventchannel/event_test.go index 7f41b4a27..43750360b 100644 --- a/pkg/eventchannel/event_test.go +++ b/pkg/eventchannel/event_test.go @@ -78,7 +78,7 @@ func TestMultiEmitter(t *testing.T) { for _, name := range names { m := testMessage{name: name} if _, err := me.Emit(m); err != nil { - t.Fatal("me.Emit(%v) failed: %v", m, err) + t.Fatalf("me.Emit(%v) failed: %v", m, err) } } @@ -96,7 +96,7 @@ func TestMultiEmitter(t *testing.T) { // Close multiEmitter. if err := me.Close(); err != nil { - t.Fatal("me.Close() failed: %v", err) + t.Fatalf("me.Close() failed: %v", err) } // All testEmitters should be closed. diff --git a/pkg/log/glog.go b/pkg/log/glog.go index b4f7bb5a4..f57c4427b 100644 --- a/pkg/log/glog.go +++ b/pkg/log/glog.go @@ -25,7 +25,7 @@ import ( // GoogleEmitter is a wrapper that emits logs in a format compatible with // package github.com/golang/glog. type GoogleEmitter struct { - Writer + *Writer } // pid is used for the threadid component of the header. @@ -46,7 +46,7 @@ var pid = os.Getpid() // line The line number // msg The user-supplied message // -func (g *GoogleEmitter) Emit(depth int, level Level, timestamp time.Time, format string, args ...interface{}) { +func (g GoogleEmitter) Emit(depth int, level Level, timestamp time.Time, format string, args ...interface{}) { // Log level. prefix := byte('?') switch level { @@ -81,5 +81,5 @@ func (g *GoogleEmitter) Emit(depth int, level Level, timestamp time.Time, format message := fmt.Sprintf(format, args...) // Emit the formatted result. - fmt.Fprintf(&g.Writer, "%c%02d%02d %02d:%02d:%02d.%06d % 7d %s:%d] %s\n", prefix, int(month), day, hour, minute, second, microsecond, pid, file, line, message) + fmt.Fprintf(g.Writer, "%c%02d%02d %02d:%02d:%02d.%06d % 7d %s:%d] %s\n", prefix, int(month), day, hour, minute, second, microsecond, pid, file, line, message) } diff --git a/pkg/log/json.go b/pkg/log/json.go index 0943db1cc..bdf9d691e 100644 --- a/pkg/log/json.go +++ b/pkg/log/json.go @@ -58,7 +58,7 @@ func (lv *Level) UnmarshalJSON(b []byte) error { // JSONEmitter logs messages in json format. type JSONEmitter struct { - Writer + *Writer } // Emit implements Emitter.Emit. diff --git a/pkg/log/json_k8s.go b/pkg/log/json_k8s.go index 6c6fc8b6f..5883e95e1 100644 --- a/pkg/log/json_k8s.go +++ b/pkg/log/json_k8s.go @@ -29,11 +29,11 @@ type k8sJSONLog struct { // K8sJSONEmitter logs messages in json format that is compatible with // Kubernetes fluent configuration. type K8sJSONEmitter struct { - Writer + *Writer } // Emit implements Emitter.Emit. -func (e *K8sJSONEmitter) Emit(_ int, level Level, timestamp time.Time, format string, v ...interface{}) { +func (e K8sJSONEmitter) Emit(_ int, level Level, timestamp time.Time, format string, v ...interface{}) { j := k8sJSONLog{ Log: fmt.Sprintf(format, v...), Level: level, diff --git a/pkg/log/log.go b/pkg/log/log.go index a794da1aa..37e0605ad 100644 --- a/pkg/log/log.go +++ b/pkg/log/log.go @@ -374,5 +374,5 @@ func CopyStandardLogTo(l Level) error { func init() { // Store the initial value for the log. - log.Store(&BasicLogger{Level: Info, Emitter: &GoogleEmitter{Writer{Next: os.Stderr}}}) + log.Store(&BasicLogger{Level: Info, Emitter: GoogleEmitter{&Writer{Next: os.Stderr}}}) } diff --git a/pkg/log/log_test.go b/pkg/log/log_test.go index 402cc29ae..9ff18559b 100644 --- a/pkg/log/log_test.go +++ b/pkg/log/log_test.go @@ -52,7 +52,7 @@ func TestDropMessages(t *testing.T) { t.Fatalf("Write should have failed") } - fmt.Printf("writer: %+v\n", w) + fmt.Printf("writer: %#v\n", &w) tw.fail = false if _, err := w.Write([]byte("line 2\n")); err != nil { @@ -76,7 +76,7 @@ func TestDropMessages(t *testing.T) { func TestCaller(t *testing.T) { tw := &testWriter{} - e := &GoogleEmitter{Writer: Writer{Next: tw}} + e := GoogleEmitter{Writer: &Writer{Next: tw}} bl := &BasicLogger{ Emitter: e, Level: Debug, @@ -94,7 +94,7 @@ func BenchmarkGoogleLogging(b *testing.B) { tw := &testWriter{ limit: 1, // Only record one message. } - e := &GoogleEmitter{Writer: Writer{Next: tw}} + e := GoogleEmitter{Writer: &Writer{Next: tw}} bl := &BasicLogger{ Emitter: e, Level: Debug, diff --git a/pkg/p9/client.go b/pkg/p9/client.go index a6f493b82..71e944c30 100644 --- a/pkg/p9/client.go +++ b/pkg/p9/client.go @@ -174,7 +174,7 @@ func NewClient(socket *unet.Socket, messageSize uint32, version string) (*Client // our sendRecv function to use that functionality. Otherwise, // we stick to sendRecvLegacy. rversion := Rversion{} - err := c.sendRecvLegacy(&Tversion{ + _, err := c.sendRecvLegacy(&Tversion{ Version: versionString(requested), MSize: messageSize, }, &rversion) @@ -219,11 +219,11 @@ func NewClient(socket *unet.Socket, messageSize uint32, version string) (*Client c.sendRecv = c.sendRecvChannel } else { // Channel setup failed; fallback. - c.sendRecv = c.sendRecvLegacy + c.sendRecv = c.sendRecvLegacySyscallErr } } else { // No channels available: use the legacy mechanism. - c.sendRecv = c.sendRecvLegacy + c.sendRecv = c.sendRecvLegacySyscallErr } // Ensure that the socket and channels are closed when the socket is shut @@ -305,7 +305,7 @@ func (c *Client) openChannel(id int) error { ) // Open the data channel. - if err := c.sendRecvLegacy(&Tchannel{ + if _, err := c.sendRecvLegacy(&Tchannel{ ID: uint32(id), Control: 0, }, &rchannel0); err != nil { @@ -319,7 +319,7 @@ func (c *Client) openChannel(id int) error { defer rchannel0.FilePayload().Close() // Open the channel for file descriptors. - if err := c.sendRecvLegacy(&Tchannel{ + if _, err := c.sendRecvLegacy(&Tchannel{ ID: uint32(id), Control: 1, }, &rchannel1); err != nil { @@ -431,13 +431,28 @@ func (c *Client) waitAndRecv(done chan error) error { } } +// sendRecvLegacySyscallErr is a wrapper for sendRecvLegacy that converts all +// non-syscall errors to EIO. +func (c *Client) sendRecvLegacySyscallErr(t message, r message) error { + received, err := c.sendRecvLegacy(t, r) + if !received { + log.Warningf("p9.Client.sendRecvChannel: %v", err) + return syscall.EIO + } + return err +} + // sendRecvLegacy performs a roundtrip message exchange. // +// sendRecvLegacy returns true if a message was received. This allows us to +// differentiate between failed receives and successful receives where the +// response was an error message. +// // This is called by internal functions. -func (c *Client) sendRecvLegacy(t message, r message) error { +func (c *Client) sendRecvLegacy(t message, r message) (bool, error) { tag, ok := c.tagPool.Get() if !ok { - return ErrOutOfTags + return false, ErrOutOfTags } defer c.tagPool.Put(tag) @@ -457,12 +472,12 @@ func (c *Client) sendRecvLegacy(t message, r message) error { err := send(c.socket, Tag(tag), t) c.sendMu.Unlock() if err != nil { - return err + return false, err } // Co-ordinate with other receivers. if err := c.waitAndRecv(resp.done); err != nil { - return err + return false, err } // Is it an error message? @@ -470,14 +485,14 @@ func (c *Client) sendRecvLegacy(t message, r message) error { // For convenience, we transform these directly // into errors. Handlers need not handle this case. if rlerr, ok := resp.r.(*Rlerror); ok { - return syscall.Errno(rlerr.Error) + return true, syscall.Errno(rlerr.Error) } // At this point, we know it matches. // // Per recv call above, we will only allow a type // match (and give our r) or an instance of Rlerror. - return nil + return true, nil } // sendRecvChannel uses channels to send a message. @@ -486,7 +501,7 @@ func (c *Client) sendRecvChannel(t message, r message) error { c.channelsMu.Lock() if len(c.availableChannels) == 0 { c.channelsMu.Unlock() - return c.sendRecvLegacy(t, r) + return c.sendRecvLegacySyscallErr(t, r) } idx := len(c.availableChannels) - 1 ch := c.availableChannels[idx] @@ -526,7 +541,11 @@ func (c *Client) sendRecvChannel(t message, r message) error { } // Parse the server's response. - _, retErr := ch.recv(r, rsz) + resp, retErr := ch.recv(r, rsz) + if resp == nil { + log.Warningf("p9.Client.sendRecvChannel: p9.channel.recv: %v", retErr) + retErr = syscall.EIO + } // Release the channel. c.channelsMu.Lock() diff --git a/pkg/p9/client_test.go b/pkg/p9/client_test.go index 29a0afadf..c757583e0 100644 --- a/pkg/p9/client_test.go +++ b/pkg/p9/client_test.go @@ -96,7 +96,12 @@ func benchmarkSendRecv(b *testing.B, fn func(c *Client) func(message, message) e } func BenchmarkSendRecvLegacy(b *testing.B) { - benchmarkSendRecv(b, func(c *Client) func(message, message) error { return c.sendRecvLegacy }) + benchmarkSendRecv(b, func(c *Client) func(message, message) error { + return func(t message, r message) error { + _, err := c.sendRecvLegacy(t, r) + return err + } + }) } func BenchmarkSendRecvChannel(b *testing.B) { diff --git a/pkg/p9/transport_flipcall.go b/pkg/p9/transport_flipcall.go index a0d274f3b..38038abdf 100644 --- a/pkg/p9/transport_flipcall.go +++ b/pkg/p9/transport_flipcall.go @@ -236,7 +236,7 @@ func (ch *channel) recv(r message, rsz uint32) (message, error) { // Convert errors appropriately; see above. if rlerr, ok := r.(*Rlerror); ok { - return nil, syscall.Errno(rlerr.Error) + return r, syscall.Errno(rlerr.Error) } return r, nil diff --git a/pkg/segment/test/segment_test.go b/pkg/segment/test/segment_test.go index f19a005f3..97b16c158 100644 --- a/pkg/segment/test/segment_test.go +++ b/pkg/segment/test/segment_test.go @@ -63,7 +63,7 @@ func checkSet(s *Set, expectedSegments int) error { return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) } if got, want := seg.Value(), seg.Start()+valueOffset; got != want { - return fmt.Errorf("segment %d has key %d, value %d (expected %d)", nrSegments, seg.Start, got, want) + return fmt.Errorf("segment %d has key %d, value %d (expected %d)", nrSegments, seg.Start(), got, want) } prev = next havePrev = true diff --git a/pkg/sentry/arch/stack.go b/pkg/sentry/arch/stack.go index 09bceabc9..1108fa0bd 100644 --- a/pkg/sentry/arch/stack.go +++ b/pkg/sentry/arch/stack.go @@ -97,7 +97,6 @@ func (s *Stack) Push(vals ...interface{}) (usermem.Addr, error) { if c < 0 { return 0, fmt.Errorf("bad binary.Size for %T", v) } - // TODO(b/38173783): Use a real context.Context. n, err := usermem.CopyObjectOut(context.Background(), s.IO, s.Bottom-usermem.Addr(c), norm, usermem.IOOpts{}) if err != nil || c != n { return 0, err @@ -121,11 +120,9 @@ func (s *Stack) Pop(vals ...interface{}) (usermem.Addr, error) { var err error if isVaddr { value := s.Arch.Native(uintptr(0)) - // TODO(b/38173783): Use a real context.Context. n, err = usermem.CopyObjectIn(context.Background(), s.IO, s.Bottom, value, usermem.IOOpts{}) *vaddr = usermem.Addr(s.Arch.Value(value)) } else { - // TODO(b/38173783): Use a real context.Context. n, err = usermem.CopyObjectIn(context.Background(), s.IO, s.Bottom, v, usermem.IOOpts{}) } if err != nil { diff --git a/pkg/sentry/contexttest/contexttest.go b/pkg/sentry/contexttest/contexttest.go index 031fc64ec..8e5658c7a 100644 --- a/pkg/sentry/contexttest/contexttest.go +++ b/pkg/sentry/contexttest/contexttest.go @@ -97,7 +97,7 @@ type hostClock struct { } // Now implements ktime.Clock.Now. -func (hostClock) Now() ktime.Time { +func (*hostClock) Now() ktime.Time { return ktime.FromNanoseconds(time.Now().UnixNano()) } @@ -127,7 +127,7 @@ func (t *TestContext) Value(key interface{}) interface{} { case uniqueid.CtxInotifyCookie: return atomic.AddUint32(&lastInotifyCookie, 1) case ktime.CtxRealtimeClock: - return hostClock{} + return &hostClock{} default: if val, ok := t.otherValues[key]; ok { return val diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go index 5aff0cc95..a0082ecca 100644 --- a/pkg/sentry/fs/fdpipe/pipe_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_test.go @@ -119,7 +119,7 @@ func TestNewPipe(t *testing.T) { continue } if flags := p.flags; test.flags != flags { - t.Errorf("%s: got file flags %s, want %s", test.desc, flags, test.flags) + t.Errorf("%s: got file flags %v, want %v", test.desc, flags, test.flags) continue } if len(test.readAheadBuffer) != len(p.readAheadBuffer) { @@ -136,7 +136,7 @@ func TestNewPipe(t *testing.T) { continue } if !fdnotifier.HasFD(int32(f.FD())) { - t.Errorf("%s: pipe fd %d is not registered for events", test.desc, f.FD) + t.Errorf("%s: pipe fd %d is not registered for events", test.desc, f.FD()) } } } diff --git a/pkg/sentry/fs/gofer/file_state.go b/pkg/sentry/fs/gofer/file_state.go index ff96b28ba..edd6576aa 100644 --- a/pkg/sentry/fs/gofer/file_state.go +++ b/pkg/sentry/fs/gofer/file_state.go @@ -34,7 +34,6 @@ func (f *fileOperations) afterLoad() { flags := f.flags flags.Truncate = false - // TODO(b/38173783): Context is not plumbed to save/restore. f.handles, err = f.inodeOperations.fileState.getHandles(context.Background(), flags, f.inodeOperations.cachingInodeOps) if err != nil { return fmt.Errorf("failed to re-open handle: %v", err) diff --git a/pkg/sentry/fs/gofer/handles.go b/pkg/sentry/fs/gofer/handles.go index 9f7c3e89f..fc14249be 100644 --- a/pkg/sentry/fs/gofer/handles.go +++ b/pkg/sentry/fs/gofer/handles.go @@ -57,7 +57,6 @@ func (h *handles) DecRef() { } } } - // FIXME(b/38173783): Context is not plumbed here. if err := h.File.close(context.Background()); err != nil { log.Warningf("error closing p9 file: %v", err) } diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index 811e8ea30..a016c896e 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -273,7 +273,7 @@ func (i *inodeFileState) recreateReadHandles(ctx context.Context, writer *handle // operations on the old will see the new data. Then, make the new handle take // ownereship of the old FD and mark the old readHandle to not close the FD // when done. - if err := syscall.Dup3(h.Host.FD(), i.readHandles.Host.FD(), 0); err != nil { + if err := syscall.Dup3(h.Host.FD(), i.readHandles.Host.FD(), syscall.O_CLOEXEC); err != nil { return err } diff --git a/pkg/sentry/fs/gofer/inode_state.go b/pkg/sentry/fs/gofer/inode_state.go index 238f7804c..a3402e343 100644 --- a/pkg/sentry/fs/gofer/inode_state.go +++ b/pkg/sentry/fs/gofer/inode_state.go @@ -123,7 +123,6 @@ func (i *inodeFileState) afterLoad() { // beforeSave. return fmt.Errorf("failed to find path for inode number %d. Device %s contains %s", i.sattr.InodeID, i.s.connID, fs.InodeMappings(i.s.inodeMappings)) } - // TODO(b/38173783): Context is not plumbed to save/restore. ctx := &dummyClockContext{context.Background()} _, i.file, err = i.s.attach.walk(ctx, splitAbsolutePath(name)) diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go index 111da59f9..2d398b753 100644 --- a/pkg/sentry/fs/gofer/session_state.go +++ b/pkg/sentry/fs/gofer/session_state.go @@ -104,7 +104,6 @@ func (s *session) afterLoad() { // If private unix sockets are enabled, create and fill the session's endpoint // maps. if opts.privateunixsocket { - // TODO(b/38173783): Context is not plumbed to save/restore. ctx := &dummyClockContext{context.Background()} if err = s.restoreEndpointMaps(ctx); err != nil { diff --git a/pkg/sentry/fs/host/socket_test.go b/pkg/sentry/fs/host/socket_test.go index eb4afe520..affdbcacb 100644 --- a/pkg/sentry/fs/host/socket_test.go +++ b/pkg/sentry/fs/host/socket_test.go @@ -199,14 +199,14 @@ func TestListen(t *testing.T) { } func TestPasscred(t *testing.T) { - e := ConnectedEndpoint{} + e := &ConnectedEndpoint{} if got, want := e.Passcred(), false; got != want { t.Errorf("Got %#v.Passcred() = %t, want = %t", e, got, want) } } func TestGetLocalAddress(t *testing.T) { - e := ConnectedEndpoint{path: "foo"} + e := &ConnectedEndpoint{path: "foo"} want := tcpip.FullAddress{Addr: tcpip.Address("foo")} if got, err := e.GetLocalAddress(); err != nil || got != want { t.Errorf("Got %#v.GetLocalAddress() = %#v, %v, want = %#v, %v", e, got, err, want, nil) @@ -214,7 +214,7 @@ func TestGetLocalAddress(t *testing.T) { } func TestQueuedSize(t *testing.T) { - e := ConnectedEndpoint{} + e := &ConnectedEndpoint{} tests := []struct { name string f func() int64 diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index 73f89abcc..a34fbc946 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -102,7 +102,6 @@ func (i *Inode) DecRef() { // destroy releases the Inode and releases the msrc reference taken. func (i *Inode) destroy() { - // FIXME(b/38173783): Context is not plumbed here. ctx := context.Background() if err := i.WriteOut(ctx); err != nil { // FIXME(b/65209558): Mark as warning again once noatime is diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index d4c4b533d..702fdd392 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -80,7 +80,7 @@ func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir } // Truncate implements fs.InodeOperations.Truncate. -func (tcpMemInode) Truncate(context.Context, *fs.Inode, int64) error { +func (*tcpMemInode) Truncate(context.Context, *fs.Inode, int64) error { return nil } @@ -196,7 +196,7 @@ func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *f } // Truncate implements fs.InodeOperations.Truncate. -func (tcpSack) Truncate(context.Context, *fs.Inode, int64) error { +func (*tcpSack) Truncate(context.Context, *fs.Inode, int64) error { return nil } diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go index 48eaccdbc..afea58f65 100644 --- a/pkg/sentry/fsimpl/ext/filesystem.go +++ b/pkg/sentry/fsimpl/ext/filesystem.go @@ -476,7 +476,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath } // ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. -func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { +func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { _, _, err := fs.walk(rp, false) if err != nil { return nil, err @@ -485,7 +485,7 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([ } // GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. -func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { +func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) { _, _, err := fs.walk(rp, false) if err != nil { return "", err diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 137260898..cd744bf5e 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -1080,7 +1080,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath } // ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. -func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { +func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { var ds *[]*dentry fs.renameMu.RLock() defer fs.renameMuRUnlockAndCheckCaching(&ds) @@ -1088,11 +1088,11 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([ if err != nil { return nil, err } - return d.listxattr(ctx) + return d.listxattr(ctx, rp.Credentials(), size) } // GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. -func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { +func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) { var ds *[]*dentry fs.renameMu.RLock() defer fs.renameMuRUnlockAndCheckCaching(&ds) @@ -1100,7 +1100,7 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, nam if err != nil { return "", err } - return d.getxattr(ctx, name) + return d.getxattr(ctx, rp.Credentials(), &opts) } // SetxattrAt implements vfs.FilesystemImpl.SetxattrAt. @@ -1112,7 +1112,7 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt if err != nil { return err } - return d.setxattr(ctx, &opts) + return d.setxattr(ctx, rp.Credentials(), &opts) } // RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. @@ -1124,7 +1124,7 @@ func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, if err != nil { return err } - return d.removexattr(ctx, name) + return d.removexattr(ctx, rp.Credentials(), name) } // PrependPath implements vfs.FilesystemImpl.PrependPath. diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 20edaf643..2485cdb53 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -34,6 +34,7 @@ package gofer import ( "fmt" "strconv" + "strings" "sync" "sync/atomic" "syscall" @@ -1024,21 +1025,50 @@ func (d *dentry) setDeleted() { atomic.StoreUint32(&d.deleted, 1) } -func (d *dentry) listxattr(ctx context.Context) ([]string, error) { - return nil, syserror.ENOTSUP +// We only support xattrs prefixed with "user." (see b/148380782). Currently, +// there is no need to expose any other xattrs through a gofer. +func (d *dentry) listxattr(ctx context.Context, creds *auth.Credentials, size uint64) ([]string, error) { + xattrMap, err := d.file.listXattr(ctx, size) + if err != nil { + return nil, err + } + xattrs := make([]string, 0, len(xattrMap)) + for x := range xattrMap { + if strings.HasPrefix(x, linux.XATTR_USER_PREFIX) { + xattrs = append(xattrs, x) + } + } + return xattrs, nil } -func (d *dentry) getxattr(ctx context.Context, name string) (string, error) { - // TODO(jamieliu): add vfs.GetxattrOptions.Size - return d.file.getXattr(ctx, name, linux.XATTR_SIZE_MAX) +func (d *dentry) getxattr(ctx context.Context, creds *auth.Credentials, opts *vfs.GetxattrOptions) (string, error) { + if err := d.checkPermissions(creds, vfs.MayRead); err != nil { + return "", err + } + if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) { + return "", syserror.EOPNOTSUPP + } + return d.file.getXattr(ctx, opts.Name, opts.Size) } -func (d *dentry) setxattr(ctx context.Context, opts *vfs.SetxattrOptions) error { +func (d *dentry) setxattr(ctx context.Context, creds *auth.Credentials, opts *vfs.SetxattrOptions) error { + if err := d.checkPermissions(creds, vfs.MayWrite); err != nil { + return err + } + if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) { + return syserror.EOPNOTSUPP + } return d.file.setXattr(ctx, opts.Name, opts.Value, opts.Flags) } -func (d *dentry) removexattr(ctx context.Context, name string) error { - return syserror.ENOTSUP +func (d *dentry) removexattr(ctx context.Context, creds *auth.Credentials, name string) error { + if err := d.checkPermissions(creds, vfs.MayWrite); err != nil { + return err + } + if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { + return syserror.EOPNOTSUPP + } + return d.file.removeXattr(ctx, name) } // Preconditions: d.isRegularFile() || d.isDirectory(). @@ -1089,7 +1119,7 @@ func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool // description, but this doesn't matter since they refer to the // same file (unless d.fs.opts.overlayfsStaleRead is true, // which we handle separately). - if err := syscall.Dup3(int(h.fd), int(d.handle.fd), 0); err != nil { + if err := syscall.Dup3(int(h.fd), int(d.handle.fd), syscall.O_CLOEXEC); err != nil { d.handleMu.Unlock() ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to dup fd %d to fd %d: %v", h.fd, d.handle.fd, err) h.close(ctx) @@ -1189,21 +1219,21 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) } // Listxattr implements vfs.FileDescriptionImpl.Listxattr. -func (fd *fileDescription) Listxattr(ctx context.Context) ([]string, error) { - return fd.dentry().listxattr(ctx) +func (fd *fileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) { + return fd.dentry().listxattr(ctx, auth.CredentialsFromContext(ctx), size) } // Getxattr implements vfs.FileDescriptionImpl.Getxattr. -func (fd *fileDescription) Getxattr(ctx context.Context, name string) (string, error) { - return fd.dentry().getxattr(ctx, name) +func (fd *fileDescription) Getxattr(ctx context.Context, opts vfs.GetxattrOptions) (string, error) { + return fd.dentry().getxattr(ctx, auth.CredentialsFromContext(ctx), &opts) } // Setxattr implements vfs.FileDescriptionImpl.Setxattr. func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error { - return fd.dentry().setxattr(ctx, &opts) + return fd.dentry().setxattr(ctx, auth.CredentialsFromContext(ctx), &opts) } // Removexattr implements vfs.FileDescriptionImpl.Removexattr. func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { - return fd.dentry().removexattr(ctx, name) + return fd.dentry().removexattr(ctx, auth.CredentialsFromContext(ctx), name) } diff --git a/pkg/sentry/fsimpl/gofer/p9file.go b/pkg/sentry/fsimpl/gofer/p9file.go index 755ac2985..87f0b877f 100644 --- a/pkg/sentry/fsimpl/gofer/p9file.go +++ b/pkg/sentry/fsimpl/gofer/p9file.go @@ -85,6 +85,13 @@ func (f p9file) setAttr(ctx context.Context, valid p9.SetAttrMask, attr p9.SetAt return err } +func (f p9file) listXattr(ctx context.Context, size uint64) (map[string]struct{}, error) { + ctx.UninterruptibleSleepStart(false) + xattrs, err := f.file.ListXattr(size) + ctx.UninterruptibleSleepFinish(false) + return xattrs, err +} + func (f p9file) getXattr(ctx context.Context, name string, size uint64) (string, error) { ctx.UninterruptibleSleepStart(false) val, err := f.file.GetXattr(name, size) @@ -99,6 +106,13 @@ func (f p9file) setXattr(ctx context.Context, name, value string, flags uint32) return err } +func (f p9file) removeXattr(ctx context.Context, name string) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.RemoveXattr(name) + ctx.UninterruptibleSleepFinish(false) + return err +} + func (f p9file) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error { ctx.UninterruptibleSleepStart(false) err := f.file.Allocate(mode, offset, length) diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 4433071aa..baf81b4db 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -763,7 +763,7 @@ func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath } // ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. -func (fs *Filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { +func (fs *Filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { fs.mu.RLock() _, _, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() @@ -776,7 +776,7 @@ func (fs *Filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([ } // GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. -func (fs *Filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { +func (fs *Filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) { fs.mu.RLock() _, _, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go index 6b2a77328..6595fcee6 100644 --- a/pkg/sentry/fsimpl/proc/task_net.go +++ b/pkg/sentry/fsimpl/proc/task_net.go @@ -688,9 +688,9 @@ func (d *netSnmpData) Generate(ctx context.Context, buf *bytes.Buffer) error { if line.prefix == "Tcp" { tcp := stat.(*inet.StatSNMPTCP) // "Tcp" needs special processing because MaxConn is signed. RFC 2012. - fmt.Sprintf("%s: %s %d %s\n", line.prefix, sprintSlice(tcp[:3]), int64(tcp[3]), sprintSlice(tcp[4:])) + fmt.Fprintf(buf, "%s: %s %d %s\n", line.prefix, sprintSlice(tcp[:3]), int64(tcp[3]), sprintSlice(tcp[4:])) } else { - fmt.Sprintf("%s: %s\n", line.prefix, sprintSlice(toSlice(stat))) + fmt.Fprintf(buf, "%s: %s\n", line.prefix, sprintSlice(toSlice(stat))) } } return nil diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index f2ac23c88..4e6cd3491 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -51,6 +51,7 @@ go_library( "//pkg/sentry/usage", "//pkg/sentry/vfs", "//pkg/sentry/vfs/lock", + "//pkg/sentry/vfs/memxattr", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 5339d7072..f4d50d64f 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -696,51 +696,47 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath } // ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. -func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { +func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { fs.mu.RLock() defer fs.mu.RUnlock() - _, err := resolveLocked(rp) + d, err := resolveLocked(rp) if err != nil { return nil, err } - // TODO(b/127675828): support extended attributes - return nil, syserror.ENOTSUP + return d.inode.listxattr(size) } // GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. -func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { +func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) { fs.mu.RLock() defer fs.mu.RUnlock() - _, err := resolveLocked(rp) + d, err := resolveLocked(rp) if err != nil { return "", err } - // TODO(b/127675828): support extended attributes - return "", syserror.ENOTSUP + return d.inode.getxattr(rp.Credentials(), &opts) } // SetxattrAt implements vfs.FilesystemImpl.SetxattrAt. func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { fs.mu.RLock() defer fs.mu.RUnlock() - _, err := resolveLocked(rp) + d, err := resolveLocked(rp) if err != nil { return err } - // TODO(b/127675828): support extended attributes - return syserror.ENOTSUP + return d.inode.setxattr(rp.Credentials(), &opts) } // RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { fs.mu.RLock() defer fs.mu.RUnlock() - _, err := resolveLocked(rp) + d, err := resolveLocked(rp) if err != nil { return err } - // TODO(b/127675828): support extended attributes - return syserror.ENOTSUP + return d.inode.removexattr(rp.Credentials(), name) } // PrependPath implements vfs.FilesystemImpl.PrependPath. diff --git a/pkg/sentry/fsimpl/tmpfs/stat_test.go b/pkg/sentry/fsimpl/tmpfs/stat_test.go index 3e02e7190..d4f59ee5b 100644 --- a/pkg/sentry/fsimpl/tmpfs/stat_test.go +++ b/pkg/sentry/fsimpl/tmpfs/stat_test.go @@ -140,7 +140,7 @@ func TestSetStatAtime(t *testing.T) { Mask: 0, Atime: linux.NsecToStatxTimestamp(100), }}); err != nil { - t.Errorf("SetStat atime without mask failed: %v") + t.Errorf("SetStat atime without mask failed: %v", err) } // Atime should be unchanged. if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { @@ -155,7 +155,7 @@ func TestSetStatAtime(t *testing.T) { Atime: linux.NsecToStatxTimestamp(100), } if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: setStat}); err != nil { - t.Errorf("SetStat atime with mask failed: %v") + t.Errorf("SetStat atime with mask failed: %v", err) } if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { t.Errorf("Stat got error: %v", err) @@ -205,7 +205,7 @@ func TestSetStat(t *testing.T) { Mask: 0, Atime: linux.NsecToStatxTimestamp(100), }}); err != nil { - t.Errorf("SetStat atime without mask failed: %v") + t.Errorf("SetStat atime without mask failed: %v", err) } // Atime should be unchanged. if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { @@ -220,7 +220,7 @@ func TestSetStat(t *testing.T) { Atime: linux.NsecToStatxTimestamp(100), } if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: setStat}); err != nil { - t.Errorf("SetStat atime with mask failed: %v") + t.Errorf("SetStat atime with mask failed: %v", err) } if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { t.Errorf("Stat got error: %v", err) diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 654e788e3..9fa8637d5 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -27,6 +27,7 @@ package tmpfs import ( "fmt" "math" + "strings" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" @@ -37,6 +38,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sentry/vfs/lock" + "gvisor.dev/gvisor/pkg/sentry/vfs/memxattr" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) @@ -186,6 +188,11 @@ type inode struct { // filesystem.RmdirAt() drops the reference. refs int64 + // xattrs implements extended attributes. + // + // TODO(b/148380782): Support xattrs other than user.* + xattrs memxattr.SimpleExtendedAttributes + // Inode metadata. Writing multiple fields atomically requires holding // mu, othewise atomic operations can be used. mu sync.Mutex @@ -535,6 +542,56 @@ func (i *inode) touchCMtimeLocked() { atomic.StoreInt64(&i.ctime, now) } +func (i *inode) listxattr(size uint64) ([]string, error) { + return i.xattrs.Listxattr(size) +} + +func (i *inode) getxattr(creds *auth.Credentials, opts *vfs.GetxattrOptions) (string, error) { + if err := i.checkPermissions(creds, vfs.MayRead); err != nil { + return "", err + } + if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) { + return "", syserror.EOPNOTSUPP + } + if !i.userXattrSupported() { + return "", syserror.ENODATA + } + return i.xattrs.Getxattr(opts) +} + +func (i *inode) setxattr(creds *auth.Credentials, opts *vfs.SetxattrOptions) error { + if err := i.checkPermissions(creds, vfs.MayWrite); err != nil { + return err + } + if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) { + return syserror.EOPNOTSUPP + } + if !i.userXattrSupported() { + return syserror.EPERM + } + return i.xattrs.Setxattr(opts) +} + +func (i *inode) removexattr(creds *auth.Credentials, name string) error { + if err := i.checkPermissions(creds, vfs.MayWrite); err != nil { + return err + } + if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { + return syserror.EOPNOTSUPP + } + if !i.userXattrSupported() { + return syserror.EPERM + } + return i.xattrs.Removexattr(name) +} + +// Extended attributes in the user.* namespace are only supported for regular +// files and directories. +func (i *inode) userXattrSupported() bool { + filetype := linux.S_IFMT & atomic.LoadUint32(&i.mode) + return filetype == linux.S_IFREG || filetype == linux.S_IFDIR +} + // fileDescription is embedded by tmpfs implementations of // vfs.FileDescriptionImpl. type fileDescription struct { @@ -562,3 +619,23 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) creds := auth.CredentialsFromContext(ctx) return fd.inode().setStat(ctx, creds, &opts.Stat) } + +// Listxattr implements vfs.FileDescriptionImpl.Listxattr. +func (fd *fileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) { + return fd.inode().listxattr(size) +} + +// Getxattr implements vfs.FileDescriptionImpl.Getxattr. +func (fd *fileDescription) Getxattr(ctx context.Context, opts vfs.GetxattrOptions) (string, error) { + return fd.inode().getxattr(auth.CredentialsFromContext(ctx), &opts) +} + +// Setxattr implements vfs.FileDescriptionImpl.Setxattr. +func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error { + return fd.inode().setxattr(auth.CredentialsFromContext(ctx), &opts) +} + +// Removexattr implements vfs.FileDescriptionImpl.Removexattr. +func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { + return fd.inode().removexattr(auth.CredentialsFromContext(ctx), name) +} diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index d09d97825..ed40b5303 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -307,6 +307,61 @@ func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags return fds, nil } +// NewFDsVFS2 allocates new FDs guaranteed to be the lowest number available +// greater than or equal to the fd parameter. All files will share the set +// flags. Success is guaranteed to be all or none. +func (f *FDTable) NewFDsVFS2(ctx context.Context, fd int32, files []*vfs.FileDescription, flags FDFlags) (fds []int32, err error) { + if fd < 0 { + // Don't accept negative FDs. + return nil, syscall.EINVAL + } + + // Default limit. + end := int32(math.MaxInt32) + + // Ensure we don't get past the provided limit. + if limitSet := limits.FromContext(ctx); limitSet != nil { + lim := limitSet.Get(limits.NumberOfFiles) + if lim.Cur != limits.Infinity { + end = int32(lim.Cur) + } + if fd >= end { + return nil, syscall.EMFILE + } + } + + f.mu.Lock() + defer f.mu.Unlock() + + // From f.next to find available fd. + if fd < f.next { + fd = f.next + } + + // Install all entries. + for i := fd; i < end && len(fds) < len(files); i++ { + if d, _, _ := f.getVFS2(i); d == nil { + f.setVFS2(i, files[len(fds)], flags) // Set the descriptor. + fds = append(fds, i) // Record the file descriptor. + } + } + + // Failure? Unwind existing FDs. + if len(fds) < len(files) { + for _, i := range fds { + f.setVFS2(i, nil, FDFlags{}) // Zap entry. + } + return nil, syscall.EMFILE + } + + if fd == f.next { + // Update next search start position. + f.next = fds[len(fds)-1] + 1 + } + + return fds, nil +} + // NewFDVFS2 allocates a file descriptor greater than or equal to minfd for // the given file description. If it succeeds, it takes a reference on file. func (f *FDTable) NewFDVFS2(ctx context.Context, minfd int32, file *vfs.FileDescription, flags FDFlags) (int32, error) { diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index 208569057..f66cfcc7f 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -461,7 +461,7 @@ func (s *Shm) AddMapping(ctx context.Context, _ memmap.MappingSpace, _ usermem.A func (s *Shm) RemoveMapping(ctx context.Context, _ memmap.MappingSpace, _ usermem.AddrRange, _ uint64, _ bool) { s.mu.Lock() defer s.mu.Unlock() - // TODO(b/38173783): RemoveMapping may be called during task exit, when ctx + // RemoveMapping may be called during task exit, when ctx // is context.Background. Gracefully handle missing clocks. Failing to // update the detach time in these cases is ok, since no one can observe the // omission. diff --git a/pkg/sentry/kernel/syscalls.go b/pkg/sentry/kernel/syscalls.go index 93c4fe969..2e3565747 100644 --- a/pkg/sentry/kernel/syscalls.go +++ b/pkg/sentry/kernel/syscalls.go @@ -209,65 +209,61 @@ type Stracer interface { // SyscallEnter is called on syscall entry. // // The returned private data is passed to SyscallExit. - // - // TODO(gvisor.dev/issue/155): remove kernel imports from the strace - // package so that the type can be used directly. SyscallEnter(t *Task, sysno uintptr, args arch.SyscallArguments, flags uint32) interface{} // SyscallExit is called on syscall exit. SyscallExit(context interface{}, t *Task, sysno, rval uintptr, err error) } -// SyscallTable is a lookup table of system calls. Critically, a SyscallTable -// is *immutable*. In order to make supporting suspend and resume sane, they -// must be uniquely registered and may not change during operation. +// SyscallTable is a lookup table of system calls. // -// +stateify savable +// Note that a SyscallTable is not savable directly. Instead, they are saved as +// an OS/Arch pair and lookup happens again on restore. type SyscallTable struct { // OS is the operating system that this syscall table implements. - OS abi.OS `state:"wait"` + OS abi.OS // Arch is the architecture that this syscall table targets. - Arch arch.Arch `state:"wait"` + Arch arch.Arch // The OS version that this syscall table implements. - Version Version `state:"manual"` + Version Version // AuditNumber is a numeric constant that represents the syscall table. If // non-zero, auditNumber must be one of the AUDIT_ARCH_* values defined by // linux/audit.h. - AuditNumber uint32 `state:"manual"` + AuditNumber uint32 // Table is the collection of functions. - Table map[uintptr]Syscall `state:"manual"` + Table map[uintptr]Syscall // lookup is a fixed-size array that holds the syscalls (indexed by // their numbers). It is used for fast look ups. - lookup []SyscallFn `state:"manual"` + lookup []SyscallFn // Emulate is a collection of instruction addresses to emulate. The // keys are addresses, and the values are system call numbers. - Emulate map[usermem.Addr]uintptr `state:"manual"` + Emulate map[usermem.Addr]uintptr // The function to call in case of a missing system call. - Missing MissingFn `state:"manual"` + Missing MissingFn // Stracer traces this syscall table. - Stracer Stracer `state:"manual"` + Stracer Stracer // External is used to handle an external callback. - External func(*Kernel) `state:"manual"` + External func(*Kernel) // ExternalFilterBefore is called before External is called before the syscall is executed. // External is not called if it returns false. - ExternalFilterBefore func(*Task, uintptr, arch.SyscallArguments) bool `state:"manual"` + ExternalFilterBefore func(*Task, uintptr, arch.SyscallArguments) bool // ExternalFilterAfter is called before External is called after the syscall is executed. // External is not called if it returns false. - ExternalFilterAfter func(*Task, uintptr, arch.SyscallArguments) bool `state:"manual"` + ExternalFilterAfter func(*Task, uintptr, arch.SyscallArguments) bool // FeatureEnable stores the strace and one-shot enable bits. - FeatureEnable SyscallFlagsTable `state:"manual"` + FeatureEnable SyscallFlagsTable } // allSyscallTables contains all known tables. diff --git a/pkg/sentry/kernel/syscalls_state.go b/pkg/sentry/kernel/syscalls_state.go index 00358326b..90f890495 100644 --- a/pkg/sentry/kernel/syscalls_state.go +++ b/pkg/sentry/kernel/syscalls_state.go @@ -14,16 +14,34 @@ package kernel -import "fmt" +import ( + "fmt" -// afterLoad is invoked by stateify. -func (s *SyscallTable) afterLoad() { - otherTable, ok := LookupSyscallTable(s.OS, s.Arch) - if !ok { - // Couldn't find a reference? - panic(fmt.Sprintf("syscall table not found for OS %v Arch %v", s.OS, s.Arch)) + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/sentry/arch" +) + +// syscallTableInfo is used to reload the SyscallTable. +// +// +stateify savable +type syscallTableInfo struct { + OS abi.OS + Arch arch.Arch +} + +// saveSt saves the SyscallTable. +func (tc *TaskContext) saveSt() syscallTableInfo { + return syscallTableInfo{ + OS: tc.st.OS, + Arch: tc.st.Arch, } +} - // Copy the table. - *s = *otherTable +// loadSt loads the SyscallTable. +func (tc *TaskContext) loadSt(sti syscallTableInfo) { + st, ok := LookupSyscallTable(sti.OS, sti.Arch) + if !ok { + panic(fmt.Sprintf("syscall table not found for OS %v, Arch %v", sti.OS, sti.Arch)) + } + tc.st = st // Save the table reference. } diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index d6546735e..e5d133d6c 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -777,6 +777,15 @@ func (t *Task) NewFDs(fd int32, files []*fs.File, flags FDFlags) ([]int32, error return t.fdTable.NewFDs(t, fd, files, flags) } +// NewFDsVFS2 is a convenience wrapper for t.FDTable().NewFDsVFS2. +// +// This automatically passes the task as the context. +// +// Precondition: same as FDTable. +func (t *Task) NewFDsVFS2(fd int32, files []*vfs.FileDescription, flags FDFlags) ([]int32, error) { + return t.fdTable.NewFDsVFS2(t, fd, files, flags) +} + // NewFDFrom is a convenience wrapper for t.FDTable().NewFDs with a single file. // // This automatically passes the task as the context. diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go index 0158b1788..9fa528384 100644 --- a/pkg/sentry/kernel/task_context.go +++ b/pkg/sentry/kernel/task_context.go @@ -49,7 +49,7 @@ type TaskContext struct { fu *futex.Manager // st is the task's syscall table. - st *SyscallTable + st *SyscallTable `state:".(syscallTableInfo)"` } // release releases all resources held by the TaskContext. release is called by @@ -58,7 +58,6 @@ func (tc *TaskContext) release() { // Nil out pointers so that if the task is saved after release, it doesn't // follow the pointers to possibly now-invalid objects. if tc.MemoryManager != nil { - // TODO(b/38173783) tc.MemoryManager.DecUsers(context.Background()) tc.MemoryManager = nil } diff --git a/pkg/sentry/kernel/task_identity.go b/pkg/sentry/kernel/task_identity.go index ce3e6ef28..0325967e4 100644 --- a/pkg/sentry/kernel/task_identity.go +++ b/pkg/sentry/kernel/task_identity.go @@ -455,7 +455,7 @@ func (t *Task) SetKeepCaps(k bool) { t.creds.Store(creds) } -// updateCredsForExec updates t.creds to reflect an execve(). +// updateCredsForExecLocked updates t.creds to reflect an execve(). // // NOTE(b/30815691): We currently do not implement privileged executables // (set-user/group-ID bits and file capabilities). This allows us to make a lot diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go index 8802db142..6aa798346 100644 --- a/pkg/sentry/kernel/task_signals.go +++ b/pkg/sentry/kernel/task_signals.go @@ -513,8 +513,6 @@ func (t *Task) canReceiveSignalLocked(sig linux.Signal) bool { if t.stop != nil { return false } - // - TODO(b/38173783): No special case for when t is also the sending task, - // because the identity of the sender is unknown. // - Do not choose tasks that have already been interrupted, as they may be // busy handling another signal. if len(t.interruptChan) != 0 { diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go index 706de83ef..e959700f2 100644 --- a/pkg/sentry/kernel/time/time.go +++ b/pkg/sentry/kernel/time/time.go @@ -245,7 +245,7 @@ type Clock interface { type WallRateClock struct{} // WallTimeUntil implements Clock.WallTimeUntil. -func (WallRateClock) WallTimeUntil(t, now Time) time.Duration { +func (*WallRateClock) WallTimeUntil(t, now Time) time.Duration { return t.Sub(now) } @@ -254,16 +254,16 @@ func (WallRateClock) WallTimeUntil(t, now Time) time.Duration { type NoClockEvents struct{} // Readiness implements waiter.Waitable.Readiness. -func (NoClockEvents) Readiness(mask waiter.EventMask) waiter.EventMask { +func (*NoClockEvents) Readiness(mask waiter.EventMask) waiter.EventMask { return 0 } // EventRegister implements waiter.Waitable.EventRegister. -func (NoClockEvents) EventRegister(e *waiter.Entry, mask waiter.EventMask) { +func (*NoClockEvents) EventRegister(e *waiter.Entry, mask waiter.EventMask) { } // EventUnregister implements waiter.Waitable.EventUnregister. -func (NoClockEvents) EventUnregister(e *waiter.Entry) { +func (*NoClockEvents) EventUnregister(e *waiter.Entry) { } // ClockEventsQueue implements waiter.Waitable by wrapping waiter.Queue and @@ -273,7 +273,7 @@ type ClockEventsQueue struct { } // Readiness implements waiter.Waitable.Readiness. -func (ClockEventsQueue) Readiness(mask waiter.EventMask) waiter.EventMask { +func (*ClockEventsQueue) Readiness(mask waiter.EventMask) waiter.EventMask { return 0 } diff --git a/pkg/sentry/platform/kvm/kvm_arm64.go b/pkg/sentry/platform/kvm/kvm_arm64.go index 79045651e..716198712 100644 --- a/pkg/sentry/platform/kvm/kvm_arm64.go +++ b/pkg/sentry/platform/kvm/kvm_arm64.go @@ -18,6 +18,8 @@ package kvm import ( "syscall" + + "gvisor.dev/gvisor/pkg/sentry/platform/ring0" ) type kvmOneReg struct { @@ -46,6 +48,6 @@ type userRegs struct { func updateGlobalOnce(fd int) error { physicalInit() err := updateSystemValues(int(fd)) - updateVectorTable() + ring0.Init() return err } diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index b531f2f85..3b35858ae 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -48,69 +48,6 @@ func (m *machine) initArchState() error { return nil } -func getPageWithReflect(p uintptr) []byte { - return (*(*[0xFFFFFF]byte)(unsafe.Pointer(p & ^uintptr(syscall.Getpagesize()-1))))[:syscall.Getpagesize()] -} - -// Work around: move ring0.Vectors() into a specific address with 11-bits alignment. -// -// According to the design documentation of Arm64, -// the start address of exception vector table should be 11-bits aligned. -// Please see the code in linux kernel as reference: arch/arm64/kernel/entry.S -// But, we can't align a function's start address to a specific address by using golang. -// We have raised this question in golang community: -// https://groups.google.com/forum/m/#!topic/golang-dev/RPj90l5x86I -// This function will be removed when golang supports this feature. -// -// There are 2 jobs were implemented in this function: -// 1, move the start address of exception vector table into the specific address. -// 2, modify the offset of each instruction. -func updateVectorTable() { - fromLocation := reflect.ValueOf(ring0.Vectors).Pointer() - offset := fromLocation & (1<<11 - 1) - if offset != 0 { - offset = 1<<11 - offset - } - - toLocation := fromLocation + offset - page := getPageWithReflect(toLocation) - if err := syscall.Mprotect(page, syscall.PROT_READ|syscall.PROT_WRITE|syscall.PROT_EXEC); err != nil { - panic(err) - } - - page = getPageWithReflect(toLocation + 4096) - if err := syscall.Mprotect(page, syscall.PROT_READ|syscall.PROT_WRITE|syscall.PROT_EXEC); err != nil { - panic(err) - } - - // Move exception-vector-table into the specific address. - var entry *uint32 - var entryFrom *uint32 - for i := 1; i <= 0x800; i++ { - entry = (*uint32)(unsafe.Pointer(toLocation + 0x800 - uintptr(i))) - entryFrom = (*uint32)(unsafe.Pointer(fromLocation + 0x800 - uintptr(i))) - *entry = *entryFrom - } - - // The offset from the address of each unconditionally branch is changed. - // We should modify the offset of each instruction. - nums := []uint32{0x0, 0x80, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400, 0x480, 0x500, 0x580, 0x600, 0x680, 0x700, 0x780} - for _, num := range nums { - entry = (*uint32)(unsafe.Pointer(toLocation + uintptr(num))) - *entry = *entry - (uint32)(offset/4) - } - - page = getPageWithReflect(toLocation) - if err := syscall.Mprotect(page, syscall.PROT_READ|syscall.PROT_EXEC); err != nil { - panic(err) - } - - page = getPageWithReflect(toLocation + 4096) - if err := syscall.Mprotect(page, syscall.PROT_READ|syscall.PROT_EXEC); err != nil { - panic(err) - } -} - // initArchState initializes architecture-specific state. func (c *vCPU) initArchState() error { var ( diff --git a/pkg/sentry/platform/ring0/BUILD b/pkg/sentry/platform/ring0/BUILD index 934b6fbcd..b69520030 100644 --- a/pkg/sentry/platform/ring0/BUILD +++ b/pkg/sentry/platform/ring0/BUILD @@ -72,11 +72,13 @@ go_library( "lib_amd64.s", "lib_arm64.go", "lib_arm64.s", + "lib_arm64_unsafe.go", "ring0.go", ], visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/cpuid", + "//pkg/safecopy", "//pkg/sentry/platform/ring0/pagetables", "//pkg/usermem", ], diff --git a/pkg/sentry/platform/ring0/lib_arm64.go b/pkg/sentry/platform/ring0/lib_arm64.go index af075aae4..242b9305c 100644 --- a/pkg/sentry/platform/ring0/lib_arm64.go +++ b/pkg/sentry/platform/ring0/lib_arm64.go @@ -37,3 +37,10 @@ func SaveVRegs(*byte) // LoadVRegs loads V0-V31 registers. func LoadVRegs(*byte) + +// Init sets function pointers based on architectural features. +// +// This must be called prior to using ring0. +func Init() { + rewriteVectors() +} diff --git a/pkg/sentry/platform/ring0/lib_arm64_unsafe.go b/pkg/sentry/platform/ring0/lib_arm64_unsafe.go new file mode 100644 index 000000000..c05166fea --- /dev/null +++ b/pkg/sentry/platform/ring0/lib_arm64_unsafe.go @@ -0,0 +1,108 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package ring0 + +import ( + "reflect" + "syscall" + "unsafe" + + "gvisor.dev/gvisor/pkg/safecopy" + "gvisor.dev/gvisor/pkg/usermem" +) + +const ( + nopInstruction = 0xd503201f + instSize = unsafe.Sizeof(uint32(0)) + vectorsRawLen = 0x800 +) + +func unsafeSlice(addr uintptr, length int) (slice []uint32) { + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) + hdr.Data = addr + hdr.Len = length / int(instSize) + hdr.Cap = length / int(instSize) + return slice +} + +// Work around: move ring0.Vectors() into a specific address with 11-bits alignment. +// +// According to the design documentation of Arm64, +// the start address of exception vector table should be 11-bits aligned. +// Please see the code in linux kernel as reference: arch/arm64/kernel/entry.S +// But, we can't align a function's start address to a specific address by using golang. +// We have raised this question in golang community: +// https://groups.google.com/forum/m/#!topic/golang-dev/RPj90l5x86I +// This function will be removed when golang supports this feature. +// +// There are 2 jobs were implemented in this function: +// 1, move the start address of exception vector table into the specific address. +// 2, modify the offset of each instruction. +func rewriteVectors() { + vectorsBegin := reflect.ValueOf(Vectors).Pointer() + + // The exception-vector-table is required to be 11-bits aligned. + // And the size is 0x800. + // Please see the documentation as reference: + // https://developer.arm.com/docs/100933/0100/aarch64-exception-vector-table + // + // But, golang does not allow to set a function's address to a specific value. + // So, for gvisor, I defined the size of exception-vector-table as 4K, + // filled the 2nd 2K part with NOP-s. + // So that, I can safely move the 1st 2K part into the address with 11-bits alignment. + // + // So, the prerequisite for this function to work correctly is: + // vectorsSafeLen >= 0x1000 + // vectorsRawLen = 0x800 + vectorsSafeLen := int(safecopy.FindEndAddress(vectorsBegin) - vectorsBegin) + if vectorsSafeLen < 2*vectorsRawLen { + panic("Can't update vectors") + } + + vectorsSafeTable := unsafeSlice(vectorsBegin, vectorsSafeLen) // Now a []uint32 + vectorsRawLen32 := vectorsRawLen / int(instSize) + + offset := vectorsBegin & (1<<11 - 1) + if offset != 0 { + offset = 1<<11 - offset + } + + pageBegin := (vectorsBegin + offset) & ^uintptr(usermem.PageSize-1) + + _, _, errno := syscall.Syscall(syscall.SYS_MPROTECT, uintptr(pageBegin), uintptr(usermem.PageSize), uintptr(syscall.PROT_READ|syscall.PROT_WRITE|syscall.PROT_EXEC)) + if errno != 0 { + panic(errno.Error()) + } + + offset = offset / instSize // By index, not bytes. + // Move exception-vector-table into the specific address, should uses memmove here. + for i := 1; i <= vectorsRawLen32; i++ { + vectorsSafeTable[int(offset)+vectorsRawLen32-i] = vectorsSafeTable[vectorsRawLen32-i] + } + + // Adjust branch since instruction was moved forward. + for i := 0; i < vectorsRawLen32; i++ { + if vectorsSafeTable[int(offset)+i] != nopInstruction { + vectorsSafeTable[int(offset)+i] -= uint32(offset) + } + } + + _, _, errno = syscall.Syscall(syscall.SYS_MPROTECT, uintptr(pageBegin), uintptr(usermem.PageSize), uintptr(syscall.PROT_READ|syscall.PROT_EXEC)) + if errno != 0 { + panic(errno.Error()) + } +} diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go index 77655558e..b94c4fbf5 100644 --- a/pkg/sentry/strace/strace.go +++ b/pkg/sentry/strace/strace.go @@ -778,9 +778,6 @@ func (s SyscallMap) Name(sysno uintptr) string { // // N.B. This is not in an init function because we can't be sure all syscall // tables are registered with the kernel when init runs. -// -// TODO(gvisor.dev/issue/155): remove kernel package dependencies from this -// package and have the kernel package self-initialize all syscall tables. func Initialize() { for _, table := range kernel.SyscallTables() { // Is this known? diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go index 9c6728530..f92bf8096 100644 --- a/pkg/sentry/syscalls/linux/sys_prctl.go +++ b/pkg/sentry/syscalls/linux/sys_prctl.go @@ -161,8 +161,8 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if args[1].Int() != 1 || args[2].Int() != 0 || args[3].Int() != 0 || args[4].Int() != 0 { return 0, nil, syserror.EINVAL } - // no_new_privs is assumed to always be set. See - // kernel.Task.updateCredsForExec. + // PR_SET_NO_NEW_PRIVS is assumed to always be set. + // See kernel.Task.updateCredsForExecLocked. return 0, nil, nil case linux.PR_GET_NO_NEW_PRIVS: diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index 2919228d0..61b2576ac 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -31,6 +31,8 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) +// LINT.IfChange + // minListenBacklog is the minimum reasonable backlog for listening sockets. const minListenBacklog = 8 @@ -244,7 +246,10 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Copy the file descriptors out. if _, err := t.CopyOut(socks, fds); err != nil { - // Note that we don't close files here; see pipe(2) also. + for _, fd := range fds { + _, file := t.FDTable().Remove(fd) + file.DecRef() + } return 0, nil, err } @@ -1128,3 +1133,5 @@ func SendTo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal n, err := sendTo(t, fd, bufPtr, bufLen, flags, namePtr, nameLen) return n, nil, err } + +// LINT.ThenChange(./vfs2/socket.go) diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index 0004e60d9..b32abfe59 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -21,6 +21,7 @@ go_library( "poll.go", "read_write.go", "setstat.go", + "socket.go", "stat.go", "stat_amd64.go", "stat_arm64.go", @@ -32,6 +33,7 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", + "//pkg/binary", "//pkg/bits", "//pkg/fspath", "//pkg/gohacks", @@ -43,10 +45,14 @@ go_library( "//pkg/sentry/limits", "//pkg/sentry/loader", "//pkg/sentry/memmap", + "//pkg/sentry/socket", + "//pkg/sentry/socket/control", + "//pkg/sentry/socket/unix/transport", "//pkg/sentry/syscalls", "//pkg/sentry/syscalls/linux", "//pkg/sentry/vfs", "//pkg/sync", + "//pkg/syserr", "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", diff --git a/pkg/sentry/syscalls/linux/vfs2/getdents.go b/pkg/sentry/syscalls/linux/vfs2/getdents.go index a61cc5059..62e98817d 100644 --- a/pkg/sentry/syscalls/linux/vfs2/getdents.go +++ b/pkg/sentry/syscalls/linux/vfs2/getdents.go @@ -97,6 +97,7 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error { // char d_name[]; /* Filename (null-terminated) */ // }; size := 8 + 8 + 2 + 1 + 1 + len(dirent.Name) + size = (size + 7) &^ 7 // round up to multiple of 8 if size > cb.remaining { return syserror.EINVAL } @@ -106,7 +107,12 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error { usermem.ByteOrder.PutUint16(buf[16:18], uint16(size)) buf[18] = dirent.Type copy(buf[19:], dirent.Name) - buf[size-1] = 0 // NUL terminator + // Zero out all remaining bytes in buf, including the NUL terminator + // after dirent.Name. + bufTail := buf[19+len(dirent.Name):] + for i := range bufTail { + bufTail[i] = 0 + } } else { // struct linux_dirent { // unsigned long d_ino; /* Inode number */ @@ -125,6 +131,7 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error { panic(fmt.Sprintf("unsupported sizeof(unsigned long): %d", cb.t.Arch().Width())) } size := 8 + 8 + 2 + 1 + 1 + 1 + len(dirent.Name) + size = (size + 7) &^ 7 // round up to multiple of sizeof(long) if size > cb.remaining { return syserror.EINVAL } @@ -133,9 +140,14 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error { usermem.ByteOrder.PutUint64(buf[8:16], uint64(dirent.NextOff)) usermem.ByteOrder.PutUint16(buf[16:18], uint16(size)) copy(buf[18:], dirent.Name) - buf[size-3] = 0 // NUL terminator - buf[size-2] = 0 // zero padding byte - buf[size-1] = dirent.Type + // Zero out all remaining bytes in buf, including the NUL terminator + // after dirent.Name and the zero padding byte between the name and + // dirent type. + bufTail := buf[18+len(dirent.Name):] + for i := range bufTail { + bufTail[i] = 0 + } + bufTail[2] = dirent.Type } n, err := cb.t.CopyOutBytes(cb.addr, buf) if err != nil { diff --git a/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go b/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go index 63febc2f7..645e0bcb8 100644 --- a/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go +++ b/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go @@ -44,21 +44,22 @@ func Override(table map[uintptr]kernel.Syscall) { table[32] = syscalls.Supported("dup", Dup) table[33] = syscalls.Supported("dup2", Dup2) delete(table, 40) // sendfile - delete(table, 41) // socket - delete(table, 42) // connect - delete(table, 43) // accept - delete(table, 44) // sendto - delete(table, 45) // recvfrom - delete(table, 46) // sendmsg - delete(table, 47) // recvmsg - delete(table, 48) // shutdown - delete(table, 49) // bind - delete(table, 50) // listen - delete(table, 51) // getsockname - delete(table, 52) // getpeername - delete(table, 53) // socketpair - delete(table, 54) // setsockopt - delete(table, 55) // getsockopt + // TODO(gvisor.dev/issue/1485): Port all socket variants to VFS2. + table[41] = syscalls.PartiallySupported("socket", Socket, "In process of porting socket syscalls to VFS2.", nil) + table[42] = syscalls.PartiallySupported("connect", Connect, "In process of porting socket syscalls to VFS2.", nil) + table[43] = syscalls.PartiallySupported("accept", Accept, "In process of porting socket syscalls to VFS2.", nil) + table[44] = syscalls.PartiallySupported("sendto", SendTo, "In process of porting socket syscalls to VFS2.", nil) + table[45] = syscalls.PartiallySupported("recvfrom", RecvFrom, "In process of porting socket syscalls to VFS2.", nil) + table[46] = syscalls.PartiallySupported("sendmsg", SendMsg, "In process of porting socket syscalls to VFS2.", nil) + table[47] = syscalls.PartiallySupported("recvmsg", RecvMsg, "In process of porting socket syscalls to VFS2.", nil) + table[48] = syscalls.PartiallySupported("shutdown", Shutdown, "In process of porting socket syscalls to VFS2.", nil) + table[49] = syscalls.PartiallySupported("bind", Bind, "In process of porting socket syscalls to VFS2.", nil) + table[50] = syscalls.PartiallySupported("listen", Listen, "In process of porting socket syscalls to VFS2.", nil) + table[51] = syscalls.PartiallySupported("getsockname", GetSockName, "In process of porting socket syscalls to VFS2.", nil) + table[52] = syscalls.PartiallySupported("getpeername", GetPeerName, "In process of porting socket syscalls to VFS2.", nil) + table[53] = syscalls.PartiallySupported("socketpair", SocketPair, "In process of porting socket syscalls to VFS2.", nil) + table[54] = syscalls.PartiallySupported("getsockopt", GetSockOpt, "In process of porting socket syscalls to VFS2.", nil) + table[55] = syscalls.PartiallySupported("setsockopt", SetSockOpt, "In process of porting socket syscalls to VFS2.", nil) table[59] = syscalls.Supported("execve", Execve) table[72] = syscalls.Supported("fcntl", Fcntl) delete(table, 73) // flock @@ -144,7 +145,8 @@ func Override(table map[uintptr]kernel.Syscall) { delete(table, 285) // fallocate table[286] = syscalls.Supported("timerfd_settime", TimerfdSettime) table[287] = syscalls.Supported("timerfd_gettime", TimerfdGettime) - delete(table, 288) // accept4 + // TODO(gvisor.dev/issue/1485): Port all socket variants to VFS2. + table[288] = syscalls.PartiallySupported("accept4", Accept4, "In process of porting socket syscalls to VFS2.", nil) delete(table, 289) // signalfd4 delete(table, 290) // eventfd2 table[291] = syscalls.Supported("epoll_create1", EpollCreate1) @@ -153,9 +155,11 @@ func Override(table map[uintptr]kernel.Syscall) { delete(table, 294) // inotify_init1 table[295] = syscalls.Supported("preadv", Preadv) table[296] = syscalls.Supported("pwritev", Pwritev) - delete(table, 299) // recvmmsg + // TODO(gvisor.dev/issue/1485): Port all socket variants to VFS2. + table[299] = syscalls.PartiallySupported("recvmmsg", RecvMMsg, "In process of porting socket syscalls to VFS2.", nil) table[306] = syscalls.Supported("syncfs", Syncfs) - delete(table, 307) // sendmmsg + // TODO(gvisor.dev/issue/1485): Port all socket variants to VFS2. + table[307] = syscalls.PartiallySupported("sendmmsg", SendMMsg, "In process of porting socket syscalls to VFS2.", nil) table[316] = syscalls.Supported("renameat2", Renameat2) delete(table, 319) // memfd_create table[322] = syscalls.Supported("execveat", Execveat) diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go new file mode 100644 index 000000000..79a4a7ada --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/socket.go @@ -0,0 +1,1138 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs2 + +import ( + "time" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/socket/control" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// minListenBacklog is the minimum reasonable backlog for listening sockets. +const minListenBacklog = 8 + +// maxListenBacklog is the maximum allowed backlog for listening sockets. +const maxListenBacklog = 1024 + +// maxAddrLen is the maximum socket address length we're willing to accept. +const maxAddrLen = 200 + +// maxOptLen is the maximum sockopt parameter length we're willing to accept. +const maxOptLen = 1024 * 8 + +// maxControlLen is the maximum length of the msghdr.msg_control buffer we're +// willing to accept. Note that this limit is smaller than Linux, which allows +// buffers upto INT_MAX. +const maxControlLen = 10 * 1024 * 1024 + +// nameLenOffset is the offset from the start of the MessageHeader64 struct to +// the NameLen field. +const nameLenOffset = 8 + +// controlLenOffset is the offset form the start of the MessageHeader64 struct +// to the ControlLen field. +const controlLenOffset = 40 + +// flagsOffset is the offset form the start of the MessageHeader64 struct +// to the Flags field. +const flagsOffset = 48 + +const sizeOfInt32 = 4 + +// messageHeader64Len is the length of a MessageHeader64 struct. +var messageHeader64Len = uint64(binary.Size(MessageHeader64{})) + +// multipleMessageHeader64Len is the length of a multipeMessageHeader64 struct. +var multipleMessageHeader64Len = uint64(binary.Size(multipleMessageHeader64{})) + +// baseRecvFlags are the flags that are accepted across recvmsg(2), +// recvmmsg(2), and recvfrom(2). +const baseRecvFlags = linux.MSG_OOB | linux.MSG_DONTROUTE | linux.MSG_DONTWAIT | linux.MSG_NOSIGNAL | linux.MSG_WAITALL | linux.MSG_TRUNC | linux.MSG_CTRUNC + +// MessageHeader64 is the 64-bit representation of the msghdr struct used in +// the recvmsg and sendmsg syscalls. +type MessageHeader64 struct { + // Name is the optional pointer to a network address buffer. + Name uint64 + + // NameLen is the length of the buffer pointed to by Name. + NameLen uint32 + _ uint32 + + // Iov is a pointer to an array of io vectors that describe the memory + // locations involved in the io operation. + Iov uint64 + + // IovLen is the length of the array pointed to by Iov. + IovLen uint64 + + // Control is the optional pointer to ancillary control data. + Control uint64 + + // ControlLen is the length of the data pointed to by Control. + ControlLen uint64 + + // Flags on the sent/received message. + Flags int32 + _ int32 +} + +// multipleMessageHeader64 is the 64-bit representation of the mmsghdr struct used in +// the recvmmsg and sendmmsg syscalls. +type multipleMessageHeader64 struct { + msgHdr MessageHeader64 + msgLen uint32 + _ int32 +} + +// CopyInMessageHeader64 copies a message header from user to kernel memory. +func CopyInMessageHeader64(t *kernel.Task, addr usermem.Addr, msg *MessageHeader64) error { + b := t.CopyScratchBuffer(52) + if _, err := t.CopyInBytes(addr, b); err != nil { + return err + } + + msg.Name = usermem.ByteOrder.Uint64(b[0:]) + msg.NameLen = usermem.ByteOrder.Uint32(b[8:]) + msg.Iov = usermem.ByteOrder.Uint64(b[16:]) + msg.IovLen = usermem.ByteOrder.Uint64(b[24:]) + msg.Control = usermem.ByteOrder.Uint64(b[32:]) + msg.ControlLen = usermem.ByteOrder.Uint64(b[40:]) + msg.Flags = int32(usermem.ByteOrder.Uint32(b[48:])) + + return nil +} + +// CaptureAddress allocates memory for and copies a socket address structure +// from the untrusted address space range. +func CaptureAddress(t *kernel.Task, addr usermem.Addr, addrlen uint32) ([]byte, error) { + if addrlen > maxAddrLen { + return nil, syserror.EINVAL + } + + addrBuf := make([]byte, addrlen) + if _, err := t.CopyInBytes(addr, addrBuf); err != nil { + return nil, err + } + + return addrBuf, nil +} + +// writeAddress writes a sockaddr structure and its length to an output buffer +// in the unstrusted address space range. If the address is bigger than the +// buffer, it is truncated. +func writeAddress(t *kernel.Task, addr interface{}, addrLen uint32, addrPtr usermem.Addr, addrLenPtr usermem.Addr) error { + // Get the buffer length. + var bufLen uint32 + if _, err := t.CopyIn(addrLenPtr, &bufLen); err != nil { + return err + } + + if int32(bufLen) < 0 { + return syserror.EINVAL + } + + // Write the length unconditionally. + if _, err := t.CopyOut(addrLenPtr, addrLen); err != nil { + return err + } + + if addr == nil { + return nil + } + + if bufLen > addrLen { + bufLen = addrLen + } + + // Copy as much of the address as will fit in the buffer. + encodedAddr := binary.Marshal(nil, usermem.ByteOrder, addr) + if bufLen > uint32(len(encodedAddr)) { + bufLen = uint32(len(encodedAddr)) + } + _, err := t.CopyOutBytes(addrPtr, encodedAddr[:int(bufLen)]) + return err +} + +// Socket implements the linux syscall socket(2). +func Socket(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + domain := int(args[0].Int()) + stype := args[1].Int() + protocol := int(args[2].Int()) + + // Check and initialize the flags. + if stype & ^(0xf|linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { + return 0, nil, syserror.EINVAL + } + + // Create the new socket. + s, e := socket.NewVFS2(t, domain, linux.SockType(stype&0xf), protocol) + if e != nil { + return 0, nil, e.ToError() + } + defer s.DecRef() + + if err := s.SetStatusFlags(t, t.Credentials(), uint32(stype&linux.SOCK_NONBLOCK)); err != nil { + return 0, nil, err + } + + fd, err := t.NewFDFromVFS2(0, s, kernel.FDFlags{ + CloseOnExec: stype&linux.SOCK_CLOEXEC != 0, + }) + if err != nil { + return 0, nil, err + } + + return uintptr(fd), nil, nil +} + +// SocketPair implements the linux syscall socketpair(2). +func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + domain := int(args[0].Int()) + stype := args[1].Int() + protocol := int(args[2].Int()) + addr := args[3].Pointer() + + // Check and initialize the flags. + if stype & ^(0xf|linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { + return 0, nil, syserror.EINVAL + } + + // Create the socket pair. + s1, s2, e := socket.PairVFS2(t, domain, linux.SockType(stype&0xf), protocol) + if e != nil { + return 0, nil, e.ToError() + } + // Adding to the FD table will cause an extra reference to be acquired. + defer s1.DecRef() + defer s2.DecRef() + + nonblocking := uint32(stype & linux.SOCK_NONBLOCK) + if err := s1.SetStatusFlags(t, t.Credentials(), nonblocking); err != nil { + return 0, nil, err + } + if err := s2.SetStatusFlags(t, t.Credentials(), nonblocking); err != nil { + return 0, nil, err + } + + // Create the FDs for the sockets. + flags := kernel.FDFlags{ + CloseOnExec: stype&linux.SOCK_CLOEXEC != 0, + } + fds, err := t.NewFDsVFS2(0, []*vfs.FileDescription{s1, s2}, flags) + if err != nil { + return 0, nil, err + } + + if _, err := t.CopyOut(addr, fds); err != nil { + for _, fd := range fds { + _, file := t.FDTable().Remove(fd) + file.DecRef() + } + return 0, nil, err + } + + return 0, nil, nil +} + +// Connect implements the linux syscall connect(2). +func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + addr := args[1].Pointer() + addrlen := args[2].Uint() + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, nil, syserror.ENOTSOCK + } + + // Capture address and call syscall implementation. + a, err := CaptureAddress(t, addr, addrlen) + if err != nil { + return 0, nil, err + } + + blocking := (file.StatusFlags() & linux.SOCK_NONBLOCK) == 0 + return 0, nil, syserror.ConvertIntr(s.Connect(t, a, blocking).ToError(), kernel.ERESTARTSYS) +} + +// accept is the implementation of the accept syscall. It is called by accept +// and accept4 syscall handlers. +func accept(t *kernel.Task, fd int32, addr usermem.Addr, addrLen usermem.Addr, flags int) (uintptr, error) { + // Check that no unsupported flags are passed in. + if flags & ^(linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { + return 0, syserror.EINVAL + } + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, syserror.ENOTSOCK + } + + // Call the syscall implementation for this socket, then copy the + // output address if one is specified. + blocking := (file.StatusFlags() & linux.SOCK_NONBLOCK) == 0 + + peerRequested := addrLen != 0 + nfd, peer, peerLen, e := s.Accept(t, peerRequested, flags, blocking) + if e != nil { + return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS) + } + if peerRequested { + // NOTE(magi): Linux does not give you an error if it can't + // write the data back out so neither do we. + if err := writeAddress(t, peer, peerLen, addr, addrLen); err == syserror.EINVAL { + return 0, err + } + } + return uintptr(nfd), nil +} + +// Accept4 implements the linux syscall accept4(2). +func Accept4(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + addr := args[1].Pointer() + addrlen := args[2].Pointer() + flags := int(args[3].Int()) + + n, err := accept(t, fd, addr, addrlen, flags) + return n, nil, err +} + +// Accept implements the linux syscall accept(2). +func Accept(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + addr := args[1].Pointer() + addrlen := args[2].Pointer() + + n, err := accept(t, fd, addr, addrlen, 0) + return n, nil, err +} + +// Bind implements the linux syscall bind(2). +func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + addr := args[1].Pointer() + addrlen := args[2].Uint() + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, nil, syserror.ENOTSOCK + } + + // Capture address and call syscall implementation. + a, err := CaptureAddress(t, addr, addrlen) + if err != nil { + return 0, nil, err + } + + return 0, nil, s.Bind(t, a).ToError() +} + +// Listen implements the linux syscall listen(2). +func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + backlog := args[1].Int() + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, nil, syserror.ENOTSOCK + } + + // Per Linux, the backlog is silently capped to reasonable values. + if backlog <= 0 { + backlog = minListenBacklog + } + if backlog > maxListenBacklog { + backlog = maxListenBacklog + } + + return 0, nil, s.Listen(t, int(backlog)).ToError() +} + +// Shutdown implements the linux syscall shutdown(2). +func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + how := args[1].Int() + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, nil, syserror.ENOTSOCK + } + + // Validate how, then call syscall implementation. + switch how { + case linux.SHUT_RD, linux.SHUT_WR, linux.SHUT_RDWR: + default: + return 0, nil, syserror.EINVAL + } + + return 0, nil, s.Shutdown(t, int(how)).ToError() +} + +// GetSockOpt implements the linux syscall getsockopt(2). +func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + level := args[1].Int() + name := args[2].Int() + optValAddr := args[3].Pointer() + optLenAddr := args[4].Pointer() + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, nil, syserror.ENOTSOCK + } + + // Read the length. Reject negative values. + optLen := int32(0) + if _, err := t.CopyIn(optLenAddr, &optLen); err != nil { + return 0, nil, err + } + if optLen < 0 { + return 0, nil, syserror.EINVAL + } + + // Call syscall implementation then copy both value and value len out. + v, e := getSockOpt(t, s, int(level), int(name), optValAddr, int(optLen)) + if e != nil { + return 0, nil, e.ToError() + } + + vLen := int32(binary.Size(v)) + if _, err := t.CopyOut(optLenAddr, vLen); err != nil { + return 0, nil, err + } + + if v != nil { + if _, err := t.CopyOut(optValAddr, v); err != nil { + return 0, nil, err + } + } + + return 0, nil, nil +} + +// getSockOpt tries to handle common socket options, or dispatches to a specific +// socket implementation. +func getSockOpt(t *kernel.Task, s socket.SocketVFS2, level, name int, optValAddr usermem.Addr, len int) (interface{}, *syserr.Error) { + if level == linux.SOL_SOCKET { + switch name { + case linux.SO_TYPE, linux.SO_DOMAIN, linux.SO_PROTOCOL: + if len < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + } + + switch name { + case linux.SO_TYPE: + _, skType, _ := s.Type() + return int32(skType), nil + case linux.SO_DOMAIN: + family, _, _ := s.Type() + return int32(family), nil + case linux.SO_PROTOCOL: + _, _, protocol := s.Type() + return int32(protocol), nil + } + } + + return s.GetSockOpt(t, level, name, optValAddr, len) +} + +// SetSockOpt implements the linux syscall setsockopt(2). +// +// Note that unlike Linux, enabling SO_PASSCRED does not autobind the socket. +func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + level := args[1].Int() + name := args[2].Int() + optValAddr := args[3].Pointer() + optLen := args[4].Int() + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, nil, syserror.ENOTSOCK + } + + if optLen < 0 { + return 0, nil, syserror.EINVAL + } + if optLen > maxOptLen { + return 0, nil, syserror.EINVAL + } + buf := t.CopyScratchBuffer(int(optLen)) + if _, err := t.CopyIn(optValAddr, &buf); err != nil { + return 0, nil, err + } + + // Call syscall implementation. + if err := s.SetSockOpt(t, int(level), int(name), buf); err != nil { + return 0, nil, err.ToError() + } + + return 0, nil, nil +} + +// GetSockName implements the linux syscall getsockname(2). +func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + addr := args[1].Pointer() + addrlen := args[2].Pointer() + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, nil, syserror.ENOTSOCK + } + + // Get the socket name and copy it to the caller. + v, vl, err := s.GetSockName(t) + if err != nil { + return 0, nil, err.ToError() + } + + return 0, nil, writeAddress(t, v, vl, addr, addrlen) +} + +// GetPeerName implements the linux syscall getpeername(2). +func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + addr := args[1].Pointer() + addrlen := args[2].Pointer() + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, nil, syserror.ENOTSOCK + } + + // Get the socket peer name and copy it to the caller. + v, vl, err := s.GetPeerName(t) + if err != nil { + return 0, nil, err.ToError() + } + + return 0, nil, writeAddress(t, v, vl, addr, addrlen) +} + +// RecvMsg implements the linux syscall recvmsg(2). +func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + msgPtr := args[1].Pointer() + flags := args[2].Int() + + if t.Arch().Width() != 8 { + // We only handle 64-bit for now. + return 0, nil, syserror.EINVAL + } + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, nil, syserror.ENOTSOCK + } + + // Reject flags that we don't handle yet. + if flags & ^(baseRecvFlags|linux.MSG_PEEK|linux.MSG_CMSG_CLOEXEC|linux.MSG_ERRQUEUE) != 0 { + return 0, nil, syserror.EINVAL + } + + if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { + flags |= linux.MSG_DONTWAIT + } + + var haveDeadline bool + var deadline ktime.Time + if dl := s.RecvTimeout(); dl > 0 { + deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond) + haveDeadline = true + } else if dl < 0 { + flags |= linux.MSG_DONTWAIT + } + + n, err := recvSingleMsg(t, s, msgPtr, flags, haveDeadline, deadline) + return n, nil, err +} + +// RecvMMsg implements the linux syscall recvmmsg(2). +func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + msgPtr := args[1].Pointer() + vlen := args[2].Uint() + flags := args[3].Int() + toPtr := args[4].Pointer() + + if t.Arch().Width() != 8 { + // We only handle 64-bit for now. + return 0, nil, syserror.EINVAL + } + + // Reject flags that we don't handle yet. + if flags & ^(baseRecvFlags|linux.MSG_CMSG_CLOEXEC|linux.MSG_ERRQUEUE) != 0 { + return 0, nil, syserror.EINVAL + } + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, nil, syserror.ENOTSOCK + } + + if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { + flags |= linux.MSG_DONTWAIT + } + + var haveDeadline bool + var deadline ktime.Time + if toPtr != 0 { + var ts linux.Timespec + if _, err := ts.CopyIn(t, toPtr); err != nil { + return 0, nil, err + } + if !ts.Valid() { + return 0, nil, syserror.EINVAL + } + deadline = t.Kernel().MonotonicClock().Now().Add(ts.ToDuration()) + haveDeadline = true + } + + if !haveDeadline { + if dl := s.RecvTimeout(); dl > 0 { + deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond) + haveDeadline = true + } else if dl < 0 { + flags |= linux.MSG_DONTWAIT + } + } + + var count uint32 + var err error + for i := uint64(0); i < uint64(vlen); i++ { + mp, ok := msgPtr.AddLength(i * multipleMessageHeader64Len) + if !ok { + return 0, nil, syserror.EFAULT + } + var n uintptr + if n, err = recvSingleMsg(t, s, mp, flags, haveDeadline, deadline); err != nil { + break + } + + // Copy the received length to the caller. + lp, ok := mp.AddLength(messageHeader64Len) + if !ok { + return 0, nil, syserror.EFAULT + } + if _, err = t.CopyOut(lp, uint32(n)); err != nil { + break + } + count++ + } + + if count == 0 { + return 0, nil, err + } + return uintptr(count), nil, nil +} + +func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, flags int32, haveDeadline bool, deadline ktime.Time) (uintptr, error) { + // Capture the message header and io vectors. + var msg MessageHeader64 + if err := CopyInMessageHeader64(t, msgPtr, &msg); err != nil { + return 0, err + } + + if msg.IovLen > linux.UIO_MAXIOV { + return 0, syserror.EMSGSIZE + } + dst, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ + AddressSpaceActive: true, + }) + if err != nil { + return 0, err + } + + // FIXME(b/63594852): Pretend we have an empty error queue. + if flags&linux.MSG_ERRQUEUE != 0 { + return 0, syserror.EAGAIN + } + + // Fast path when no control message nor name buffers are provided. + if msg.ControlLen == 0 && msg.NameLen == 0 { + n, mflags, _, _, cms, err := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, false, 0) + if err != nil { + return 0, syserror.ConvertIntr(err.ToError(), kernel.ERESTARTSYS) + } + if !cms.Unix.Empty() { + mflags |= linux.MSG_CTRUNC + cms.Release() + } + + if int(msg.Flags) != mflags { + // Copy out the flags to the caller. + if _, err := t.CopyOut(msgPtr+flagsOffset, int32(mflags)); err != nil { + return 0, err + } + } + + return uintptr(n), nil + } + + if msg.ControlLen > maxControlLen { + return 0, syserror.ENOBUFS + } + n, mflags, sender, senderLen, cms, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, msg.NameLen != 0, msg.ControlLen) + if e != nil { + return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS) + } + defer cms.Release() + + controlData := make([]byte, 0, msg.ControlLen) + controlData = control.PackControlMessages(t, cms, controlData) + + if cr, ok := s.(transport.Credentialer); ok && cr.Passcred() { + creds, _ := cms.Unix.Credentials.(control.SCMCredentials) + controlData, mflags = control.PackCredentials(t, creds, controlData, mflags) + } + + if cms.Unix.Rights != nil { + controlData, mflags = control.PackRights(t, cms.Unix.Rights.(control.SCMRights), flags&linux.MSG_CMSG_CLOEXEC != 0, controlData, mflags) + } + + // Copy the address to the caller. + if msg.NameLen != 0 { + if err := writeAddress(t, sender, senderLen, usermem.Addr(msg.Name), usermem.Addr(msgPtr+nameLenOffset)); err != nil { + return 0, err + } + } + + // Copy the control data to the caller. + if _, err := t.CopyOut(msgPtr+controlLenOffset, uint64(len(controlData))); err != nil { + return 0, err + } + if len(controlData) > 0 { + if _, err := t.CopyOut(usermem.Addr(msg.Control), controlData); err != nil { + return 0, err + } + } + + // Copy out the flags to the caller. + if _, err := t.CopyOut(msgPtr+flagsOffset, int32(mflags)); err != nil { + return 0, err + } + + return uintptr(n), nil +} + +// recvFrom is the implementation of the recvfrom syscall. It is called by +// recvfrom and recv syscall handlers. +func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLenPtr usermem.Addr) (uintptr, error) { + if int(bufLen) < 0 { + return 0, syserror.EINVAL + } + + // Reject flags that we don't handle yet. + if flags & ^(baseRecvFlags|linux.MSG_PEEK|linux.MSG_CONFIRM) != 0 { + return 0, syserror.EINVAL + } + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, syserror.ENOTSOCK + } + + if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { + flags |= linux.MSG_DONTWAIT + } + + dst, err := t.SingleIOSequence(bufPtr, int(bufLen), usermem.IOOpts{ + AddressSpaceActive: true, + }) + if err != nil { + return 0, err + } + + var haveDeadline bool + var deadline ktime.Time + if dl := s.RecvTimeout(); dl > 0 { + deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond) + haveDeadline = true + } else if dl < 0 { + flags |= linux.MSG_DONTWAIT + } + + n, _, sender, senderLen, cm, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, nameLenPtr != 0, 0) + cm.Release() + if e != nil { + return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS) + } + + // Copy the address to the caller. + if nameLenPtr != 0 { + if err := writeAddress(t, sender, senderLen, namePtr, nameLenPtr); err != nil { + return 0, err + } + } + + return uintptr(n), nil +} + +// RecvFrom implements the linux syscall recvfrom(2). +func RecvFrom(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + bufPtr := args[1].Pointer() + bufLen := args[2].Uint64() + flags := args[3].Int() + namePtr := args[4].Pointer() + nameLenPtr := args[5].Pointer() + + n, err := recvFrom(t, fd, bufPtr, bufLen, flags, namePtr, nameLenPtr) + return n, nil, err +} + +// SendMsg implements the linux syscall sendmsg(2). +func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + msgPtr := args[1].Pointer() + flags := args[2].Int() + + if t.Arch().Width() != 8 { + // We only handle 64-bit for now. + return 0, nil, syserror.EINVAL + } + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, nil, syserror.ENOTSOCK + } + + // Reject flags that we don't handle yet. + if flags & ^(linux.MSG_DONTWAIT|linux.MSG_EOR|linux.MSG_MORE|linux.MSG_NOSIGNAL) != 0 { + return 0, nil, syserror.EINVAL + } + + if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { + flags |= linux.MSG_DONTWAIT + } + + n, err := sendSingleMsg(t, s, file, msgPtr, flags) + return n, nil, err +} + +// SendMMsg implements the linux syscall sendmmsg(2). +func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + msgPtr := args[1].Pointer() + vlen := args[2].Uint() + flags := args[3].Int() + + if t.Arch().Width() != 8 { + // We only handle 64-bit for now. + return 0, nil, syserror.EINVAL + } + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, nil, syserror.ENOTSOCK + } + + // Reject flags that we don't handle yet. + if flags & ^(linux.MSG_DONTWAIT|linux.MSG_EOR|linux.MSG_MORE|linux.MSG_NOSIGNAL) != 0 { + return 0, nil, syserror.EINVAL + } + + if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { + flags |= linux.MSG_DONTWAIT + } + + var count uint32 + var err error + for i := uint64(0); i < uint64(vlen); i++ { + mp, ok := msgPtr.AddLength(i * multipleMessageHeader64Len) + if !ok { + return 0, nil, syserror.EFAULT + } + var n uintptr + if n, err = sendSingleMsg(t, s, file, mp, flags); err != nil { + break + } + + // Copy the received length to the caller. + lp, ok := mp.AddLength(messageHeader64Len) + if !ok { + return 0, nil, syserror.EFAULT + } + if _, err = t.CopyOut(lp, uint32(n)); err != nil { + break + } + count++ + } + + if count == 0 { + return 0, nil, err + } + return uintptr(count), nil, nil +} + +func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescription, msgPtr usermem.Addr, flags int32) (uintptr, error) { + // Capture the message header. + var msg MessageHeader64 + if err := CopyInMessageHeader64(t, msgPtr, &msg); err != nil { + return 0, err + } + + var controlData []byte + if msg.ControlLen > 0 { + // Put an upper bound to prevent large allocations. + if msg.ControlLen > maxControlLen { + return 0, syserror.ENOBUFS + } + controlData = make([]byte, msg.ControlLen) + if _, err := t.CopyIn(usermem.Addr(msg.Control), &controlData); err != nil { + return 0, err + } + } + + // Read the destination address if one is specified. + var to []byte + if msg.NameLen != 0 { + var err error + to, err = CaptureAddress(t, usermem.Addr(msg.Name), msg.NameLen) + if err != nil { + return 0, err + } + } + + // Read data then call the sendmsg implementation. + if msg.IovLen > linux.UIO_MAXIOV { + return 0, syserror.EMSGSIZE + } + src, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ + AddressSpaceActive: true, + }) + if err != nil { + return 0, err + } + + controlMessages, err := control.Parse(t, s, controlData) + if err != nil { + return 0, err + } + + var haveDeadline bool + var deadline ktime.Time + if dl := s.SendTimeout(); dl > 0 { + deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond) + haveDeadline = true + } else if dl < 0 { + flags |= linux.MSG_DONTWAIT + } + + // Call the syscall implementation. + n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, controlMessages) + err = slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), kernel.ERESTARTSYS, "sendmsg", file) + if err != nil { + controlMessages.Release() + } + return uintptr(n), err +} + +// sendTo is the implementation of the sendto syscall. It is called by sendto +// and send syscall handlers. +func sendTo(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLen uint32) (uintptr, error) { + bl := int(bufLen) + if bl < 0 { + return 0, syserror.EINVAL + } + + // Get socket from the file descriptor. + file := t.GetFileVFS2(fd) + if file == nil { + return 0, syserror.EBADF + } + defer file.DecRef() + + // Extract the socket. + s, ok := file.Impl().(socket.SocketVFS2) + if !ok { + return 0, syserror.ENOTSOCK + } + + if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { + flags |= linux.MSG_DONTWAIT + } + + // Read the destination address if one is specified. + var to []byte + var err error + if namePtr != 0 { + to, err = CaptureAddress(t, namePtr, nameLen) + if err != nil { + return 0, err + } + } + + src, err := t.SingleIOSequence(bufPtr, bl, usermem.IOOpts{ + AddressSpaceActive: true, + }) + if err != nil { + return 0, err + } + + var haveDeadline bool + var deadline ktime.Time + if dl := s.SendTimeout(); dl > 0 { + deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond) + haveDeadline = true + } else if dl < 0 { + flags |= linux.MSG_DONTWAIT + } + + // Call the syscall implementation. + n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, socket.ControlMessages{Unix: control.New(t, s, nil)}) + return uintptr(n), slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), kernel.ERESTARTSYS, "sendto", file) +} + +// SendTo implements the linux syscall sendto(2). +func SendTo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + bufPtr := args[1].Pointer() + bufLen := args[2].Uint64() + flags := args[3].Int() + namePtr := args[4].Pointer() + nameLen := args[5].Uint() + + n, err := sendTo(t, fd, bufPtr, bufLen, flags, namePtr, nameLen) + return n, nil, err +} diff --git a/pkg/sentry/syscalls/linux/vfs2/xattr.go b/pkg/sentry/syscalls/linux/vfs2/xattr.go index 89e9ff4d7..af455d5c1 100644 --- a/pkg/sentry/syscalls/linux/vfs2/xattr.go +++ b/pkg/sentry/syscalls/linux/vfs2/xattr.go @@ -51,7 +51,7 @@ func listxattr(t *kernel.Task, args arch.SyscallArguments, shouldFollowFinalSyml } defer tpop.Release() - names, err := t.Kernel().VFS().ListxattrAt(t, t.Credentials(), &tpop.pop) + names, err := t.Kernel().VFS().ListxattrAt(t, t.Credentials(), &tpop.pop, uint64(size)) if err != nil { return 0, nil, err } @@ -74,7 +74,7 @@ func Flistxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy } defer file.DecRef() - names, err := file.Listxattr(t) + names, err := file.Listxattr(t, uint64(size)) if err != nil { return 0, nil, err } @@ -116,7 +116,10 @@ func getxattr(t *kernel.Task, args arch.SyscallArguments, shouldFollowFinalSymli return 0, nil, err } - value, err := t.Kernel().VFS().GetxattrAt(t, t.Credentials(), &tpop.pop, name) + value, err := t.Kernel().VFS().GetxattrAt(t, t.Credentials(), &tpop.pop, &vfs.GetxattrOptions{ + Name: name, + Size: uint64(size), + }) if err != nil { return 0, nil, err } @@ -145,7 +148,7 @@ func Fgetxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, err } - value, err := file.Getxattr(t, name) + value, err := file.Getxattr(t, &vfs.GetxattrOptions{Name: name, Size: uint64(size)}) if err != nil { return 0, nil, err } @@ -230,7 +233,7 @@ func Fsetxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, err } - return 0, nil, file.Setxattr(t, vfs.SetxattrOptions{ + return 0, nil, file.Setxattr(t, &vfs.SetxattrOptions{ Name: name, Value: value, Flags: uint32(flags), diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index d1f6dfb45..a64d86122 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -245,7 +245,7 @@ func (fs *anonFilesystem) BoundEndpointAt(ctx context.Context, rp *ResolvingPath } // ListxattrAt implements FilesystemImpl.ListxattrAt. -func (fs *anonFilesystem) ListxattrAt(ctx context.Context, rp *ResolvingPath) ([]string, error) { +func (fs *anonFilesystem) ListxattrAt(ctx context.Context, rp *ResolvingPath, size uint64) ([]string, error) { if !rp.Done() { return nil, syserror.ENOTDIR } @@ -253,7 +253,7 @@ func (fs *anonFilesystem) ListxattrAt(ctx context.Context, rp *ResolvingPath) ([ } // GetxattrAt implements FilesystemImpl.GetxattrAt. -func (fs *anonFilesystem) GetxattrAt(ctx context.Context, rp *ResolvingPath, name string) (string, error) { +func (fs *anonFilesystem) GetxattrAt(ctx context.Context, rp *ResolvingPath, opts GetxattrOptions) (string, error) { if !rp.Done() { return "", syserror.ENOTDIR } diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 20c545fca..4fb9aea87 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -401,11 +401,11 @@ type FileDescriptionImpl interface { Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) // Listxattr returns all extended attribute names for the file. - Listxattr(ctx context.Context) ([]string, error) + Listxattr(ctx context.Context, size uint64) ([]string, error) // Getxattr returns the value associated with the given extended attribute // for the file. - Getxattr(ctx context.Context, name string) (string, error) + Getxattr(ctx context.Context, opts GetxattrOptions) (string, error) // Setxattr changes the value associated with the given extended attribute // for the file. @@ -605,18 +605,23 @@ func (fd *FileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch. // Listxattr returns all extended attribute names for the file represented by // fd. -func (fd *FileDescription) Listxattr(ctx context.Context) ([]string, error) { +// +// If the size of the list (including a NUL terminating byte after every entry) +// would exceed size, ERANGE may be returned. Note that implementations +// are free to ignore size entirely and return without error). In all cases, +// if size is 0, the list should be returned without error, regardless of size. +func (fd *FileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) { if fd.opts.UseDentryMetadata { vfsObj := fd.vd.mount.vfs rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ Root: fd.vd, Start: fd.vd, }) - names, err := fd.vd.mount.fs.impl.ListxattrAt(ctx, rp) + names, err := fd.vd.mount.fs.impl.ListxattrAt(ctx, rp, size) vfsObj.putResolvingPath(rp) return names, err } - names, err := fd.impl.Listxattr(ctx) + names, err := fd.impl.Listxattr(ctx, size) if err == syserror.ENOTSUP { // Linux doesn't actually return ENOTSUP in this case; instead, // fs/xattr.c:vfs_listxattr() falls back to allowing the security @@ -629,34 +634,39 @@ func (fd *FileDescription) Listxattr(ctx context.Context) ([]string, error) { // Getxattr returns the value associated with the given extended attribute for // the file represented by fd. -func (fd *FileDescription) Getxattr(ctx context.Context, name string) (string, error) { +// +// If the size of the return value exceeds opts.Size, ERANGE may be returned +// (note that implementations are free to ignore opts.Size entirely and return +// without error). In all cases, if opts.Size is 0, the value should be +// returned without error, regardless of size. +func (fd *FileDescription) Getxattr(ctx context.Context, opts *GetxattrOptions) (string, error) { if fd.opts.UseDentryMetadata { vfsObj := fd.vd.mount.vfs rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ Root: fd.vd, Start: fd.vd, }) - val, err := fd.vd.mount.fs.impl.GetxattrAt(ctx, rp, name) + val, err := fd.vd.mount.fs.impl.GetxattrAt(ctx, rp, *opts) vfsObj.putResolvingPath(rp) return val, err } - return fd.impl.Getxattr(ctx, name) + return fd.impl.Getxattr(ctx, *opts) } // Setxattr changes the value associated with the given extended attribute for // the file represented by fd. -func (fd *FileDescription) Setxattr(ctx context.Context, opts SetxattrOptions) error { +func (fd *FileDescription) Setxattr(ctx context.Context, opts *SetxattrOptions) error { if fd.opts.UseDentryMetadata { vfsObj := fd.vd.mount.vfs rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ Root: fd.vd, Start: fd.vd, }) - err := fd.vd.mount.fs.impl.SetxattrAt(ctx, rp, opts) + err := fd.vd.mount.fs.impl.SetxattrAt(ctx, rp, *opts) vfsObj.putResolvingPath(rp) return err } - return fd.impl.Setxattr(ctx, opts) + return fd.impl.Setxattr(ctx, *opts) } // Removexattr removes the given extended attribute from the file represented diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index d45e602ce..f4c111926 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -130,14 +130,14 @@ func (FileDescriptionDefaultImpl) Ioctl(ctx context.Context, uio usermem.IO, arg // Listxattr implements FileDescriptionImpl.Listxattr analogously to // inode_operations::listxattr == NULL in Linux. -func (FileDescriptionDefaultImpl) Listxattr(ctx context.Context) ([]string, error) { +func (FileDescriptionDefaultImpl) Listxattr(ctx context.Context, size uint64) ([]string, error) { // This isn't exactly accurate; see FileDescription.Listxattr. return nil, syserror.ENOTSUP } // Getxattr implements FileDescriptionImpl.Getxattr analogously to // inode::i_opflags & IOP_XATTR == 0 in Linux. -func (FileDescriptionDefaultImpl) Getxattr(ctx context.Context, name string) (string, error) { +func (FileDescriptionDefaultImpl) Getxattr(ctx context.Context, opts GetxattrOptions) (string, error) { return "", syserror.ENOTSUP } diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index bef1bd312..a537a29d1 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -442,7 +442,13 @@ type FilesystemImpl interface { // - If extended attributes are not supported by the filesystem, // ListxattrAt returns nil. (See FileDescription.Listxattr for an // explanation.) - ListxattrAt(ctx context.Context, rp *ResolvingPath) ([]string, error) + // + // - If the size of the list (including a NUL terminating byte after every + // entry) would exceed size, ERANGE may be returned. Note that + // implementations are free to ignore size entirely and return without + // error). In all cases, if size is 0, the list should be returned without + // error, regardless of size. + ListxattrAt(ctx context.Context, rp *ResolvingPath, size uint64) ([]string, error) // GetxattrAt returns the value associated with the given extended // attribute for the file at rp. @@ -451,7 +457,15 @@ type FilesystemImpl interface { // // - If extended attributes are not supported by the filesystem, GetxattrAt // returns ENOTSUP. - GetxattrAt(ctx context.Context, rp *ResolvingPath, name string) (string, error) + // + // - If an extended attribute named opts.Name does not exist, ENODATA is + // returned. + // + // - If the size of the return value exceeds opts.Size, ERANGE may be + // returned (note that implementations are free to ignore opts.Size entirely + // and return without error). In all cases, if opts.Size is 0, the value + // should be returned without error, regardless of size. + GetxattrAt(ctx context.Context, rp *ResolvingPath, opts GetxattrOptions) (string, error) // SetxattrAt changes the value associated with the given extended // attribute for the file at rp. @@ -460,6 +474,10 @@ type FilesystemImpl interface { // // - If extended attributes are not supported by the filesystem, SetxattrAt // returns ENOTSUP. + // + // - If XATTR_CREATE is set in opts.Flag and opts.Name already exists, + // EEXIST is returned. If XATTR_REPLACE is set and opts.Name does not exist, + // ENODATA is returned. SetxattrAt(ctx context.Context, rp *ResolvingPath, opts SetxattrOptions) error // RemovexattrAt removes the given extended attribute from the file at rp. @@ -468,6 +486,8 @@ type FilesystemImpl interface { // // - If extended attributes are not supported by the filesystem, // RemovexattrAt returns ENOTSUP. + // + // - If name does not exist, ENODATA is returned. RemovexattrAt(ctx context.Context, rp *ResolvingPath, name string) error // BoundEndpointAt returns the Unix socket endpoint bound at the path rp. diff --git a/pkg/sentry/vfs/memxattr/BUILD b/pkg/sentry/vfs/memxattr/BUILD new file mode 100644 index 000000000..d8c4d27b9 --- /dev/null +++ b/pkg/sentry/vfs/memxattr/BUILD @@ -0,0 +1,15 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "memxattr", + srcs = ["xattr.go"], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/sentry/vfs", + "//pkg/sync", + "//pkg/syserror", + ], +) diff --git a/pkg/sentry/vfs/memxattr/xattr.go b/pkg/sentry/vfs/memxattr/xattr.go new file mode 100644 index 000000000..cc1e7d764 --- /dev/null +++ b/pkg/sentry/vfs/memxattr/xattr.go @@ -0,0 +1,102 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package memxattr provides a default, in-memory extended attribute +// implementation. +package memxattr + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" +) + +// SimpleExtendedAttributes implements extended attributes using a map of +// names to values. +// +// +stateify savable +type SimpleExtendedAttributes struct { + // mu protects the below fields. + mu sync.RWMutex `state:"nosave"` + xattrs map[string]string +} + +// Getxattr returns the value at 'name'. +func (x *SimpleExtendedAttributes) Getxattr(opts *vfs.GetxattrOptions) (string, error) { + x.mu.RLock() + value, ok := x.xattrs[opts.Name] + x.mu.RUnlock() + if !ok { + return "", syserror.ENODATA + } + // Check that the size of the buffer provided in getxattr(2) is large enough + // to contain the value. + if opts.Size != 0 && uint64(len(value)) > opts.Size { + return "", syserror.ERANGE + } + return value, nil +} + +// Setxattr sets 'value' at 'name'. +func (x *SimpleExtendedAttributes) Setxattr(opts *vfs.SetxattrOptions) error { + x.mu.Lock() + defer x.mu.Unlock() + if x.xattrs == nil { + if opts.Flags&linux.XATTR_REPLACE != 0 { + return syserror.ENODATA + } + x.xattrs = make(map[string]string) + } + + _, ok := x.xattrs[opts.Name] + if ok && opts.Flags&linux.XATTR_CREATE != 0 { + return syserror.EEXIST + } + if !ok && opts.Flags&linux.XATTR_REPLACE != 0 { + return syserror.ENODATA + } + + x.xattrs[opts.Name] = opts.Value + return nil +} + +// Listxattr returns all names in xattrs. +func (x *SimpleExtendedAttributes) Listxattr(size uint64) ([]string, error) { + // Keep track of the size of the buffer needed in listxattr(2) for the list. + listSize := 0 + x.mu.RLock() + names := make([]string, 0, len(x.xattrs)) + for n := range x.xattrs { + names = append(names, n) + // Add one byte per null terminator. + listSize += len(n) + 1 + } + x.mu.RUnlock() + if size != 0 && uint64(listSize) > size { + return nil, syserror.ERANGE + } + return names, nil +} + +// Removexattr removes the xattr at 'name'. +func (x *SimpleExtendedAttributes) Removexattr(name string) error { + x.mu.Lock() + defer x.mu.Unlock() + if _, ok := x.xattrs[name]; !ok { + return syserror.ENODATA + } + delete(x.xattrs, name) + return nil +} diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go index 2f04bf882..534528ce6 100644 --- a/pkg/sentry/vfs/options.go +++ b/pkg/sentry/vfs/options.go @@ -132,6 +132,20 @@ type SetStatOptions struct { Stat linux.Statx } +// GetxattrOptions contains options to VirtualFilesystem.GetxattrAt(), +// FilesystemImpl.GetxattrAt(), FileDescription.Getxattr(), and +// FileDescriptionImpl.Getxattr(). +type GetxattrOptions struct { + // Name is the name of the extended attribute to retrieve. + Name string + + // Size is the maximum value size that the caller will tolerate. If the value + // is larger than size, getxattr methods may return ERANGE, but they are also + // free to ignore the hint entirely (i.e. the value returned may be larger + // than size). All size checking is done independently at the syscall layer. + Size uint64 +} + // SetxattrOptions contains options to VirtualFilesystem.SetxattrAt(), // FilesystemImpl.SetxattrAt(), FileDescription.Setxattr(), and // FileDescriptionImpl.Setxattr(). diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 720b90d8f..f592913d5 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -680,10 +680,10 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti // ListxattrAt returns all extended attribute names for the file at the given // path. -func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) ([]string, error) { +func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, size uint64) ([]string, error) { rp := vfs.getResolvingPath(creds, pop) for { - names, err := rp.mount.fs.impl.ListxattrAt(ctx, rp) + names, err := rp.mount.fs.impl.ListxattrAt(ctx, rp, size) if err == nil { vfs.putResolvingPath(rp) return names, nil @@ -705,10 +705,10 @@ func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Crede // GetxattrAt returns the value associated with the given extended attribute // for the file at the given path. -func (vfs *VirtualFilesystem) GetxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, name string) (string, error) { +func (vfs *VirtualFilesystem) GetxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *GetxattrOptions) (string, error) { rp := vfs.getResolvingPath(creds, pop) for { - val, err := rp.mount.fs.impl.GetxattrAt(ctx, rp, name) + val, err := rp.mount.fs.impl.GetxattrAt(ctx, rp, *opts) if err == nil { vfs.putResolvingPath(rp) return val, nil diff --git a/pkg/state/state.go b/pkg/state/state.go index dbe507ab4..03ae2dbb0 100644 --- a/pkg/state/state.go +++ b/pkg/state/state.go @@ -241,10 +241,7 @@ func Register(name string, instance interface{}, fns Fns) { // // This function is used by the stateify tool. func IsZeroValue(val interface{}) bool { - if val == nil { - return true - } - return reflect.DeepEqual(val, reflect.Zero(reflect.TypeOf(val)).Interface()) + return val == nil || reflect.ValueOf(val).Elem().IsZero() } // step captures one encoding / decoding step. On each step, there is up to one diff --git a/pkg/tcpip/header/eth_test.go b/pkg/tcpip/header/eth_test.go index 7a0014ad9..14413f2ce 100644 --- a/pkg/tcpip/header/eth_test.go +++ b/pkg/tcpip/header/eth_test.go @@ -88,7 +88,7 @@ func TestEthernetAddressFromMulticastIPv4Address(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { if got := EthernetAddressFromMulticastIPv4Address(test.addr); got != test.expectedLinkAddr { - t.Fatalf("got EthernetAddressFromMulticastIPv4Address(%s) = %s, want = %s", got, test.expectedLinkAddr) + t.Fatalf("got EthernetAddressFromMulticastIPv4Address(%s) = %s, want = %s", test.addr, got, test.expectedLinkAddr) } }) } diff --git a/pkg/tcpip/header/ndp_test.go b/pkg/tcpip/header/ndp_test.go index 1cb9f5dc8..969f8f1e8 100644 --- a/pkg/tcpip/header/ndp_test.go +++ b/pkg/tcpip/header/ndp_test.go @@ -115,7 +115,7 @@ func TestNDPNeighborAdvert(t *testing.T) { // Make sure flags got updated in the backing buffer. if got := b[ndpNAFlagsOffset]; got != 64 { - t.Errorf("got flags byte = %d, want = 64") + t.Errorf("got flags byte = %d, want = 64", got) } } diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index 7198742b7..b857ce9d0 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -91,7 +91,7 @@ func (p PacketDispatchMode) String() string { case PacketMMap: return "PacketMMap" default: - return fmt.Sprintf("unknown packet dispatch mode %v", p) + return fmt.Sprintf("unknown packet dispatch mode '%d'", p) } } diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go index b3e239ac7..1646d9cde 100644 --- a/pkg/tcpip/network/arp/arp_test.go +++ b/pkg/tcpip/network/arp/arp_test.go @@ -138,7 +138,8 @@ func TestDirectRequest(t *testing.T) { // Sleep tests are gross, but this will only potentially flake // if there's a bug. If there is no bug this will reliably // succeed. - ctx, _ := context.WithTimeout(context.Background(), 100*time.Millisecond) + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() if pkt, ok := c.linkEP.ReadContext(ctx); ok { t.Errorf("stackAddrBad: unexpected packet sent, Proto=%v", pkt.Proto) } diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go index 27dc8baf9..acb2d4731 100644 --- a/pkg/tcpip/stack/ndp_test.go +++ b/pkg/tcpip/stack/ndp_test.go @@ -1959,7 +1959,7 @@ func TestAutoGenAddrDeprecateFromPI(t *testing.T) { // addr2 is deprecated but if explicitly requested, it should be used. fullAddr2 := tcpip.FullAddress{Addr: addr2.Address, NIC: nicID} if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr2.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address) } // Another PI w/ 0 preferred lifetime should not result in a deprecation @@ -1972,7 +1972,7 @@ func TestAutoGenAddrDeprecateFromPI(t *testing.T) { } expectPrimaryAddr(addr1) if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr2.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address) } // Refresh lifetimes of addr generated from prefix2. @@ -2084,7 +2084,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { // addr1 is deprecated but if explicitly requested, it should be used. fullAddr1 := tcpip.FullAddress{Addr: addr1.Address, NIC: nicID} if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr1.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) } // Refresh valid lifetime for addr of prefix1, w/ 0 preferred lifetime to make @@ -2097,7 +2097,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { } expectPrimaryAddr(addr2) if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr1.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) } // Refresh lifetimes for addr of prefix1. @@ -2121,7 +2121,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { // addr2 should be the primary endpoint now since it is not deprecated. expectPrimaryAddr(addr2) if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr1.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) } // Wait for addr of prefix1 to be invalidated. @@ -2564,7 +2564,7 @@ func TestAutoGenAddrAfterRemoval(t *testing.T) { AddressWithPrefix: addr2, } if err := s.AddProtocolAddressWithOptions(nicID, protoAddr2, stack.FirstPrimaryEndpoint); err != nil { - t.Fatalf("AddProtocolAddressWithOptions(%d, %+v, %d, %s) = %s", nicID, protoAddr2, stack.FirstPrimaryEndpoint, err) + t.Fatalf("AddProtocolAddressWithOptions(%d, %+v, %d) = %s", nicID, protoAddr2, stack.FirstPrimaryEndpoint, err) } // addr2 should be more preferred now since it is at the front of the primary // list. @@ -3483,7 +3483,8 @@ func TestRouterSolicitation(t *testing.T) { e.Endpoint.LinkEPCapabilities |= stack.CapabilityResolutionRequired waitForPkt := func(timeout time.Duration) { t.Helper() - ctx, _ := context.WithTimeout(context.Background(), timeout) + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() p, ok := e.ReadContext(ctx) if !ok { t.Fatal("timed out waiting for packet") @@ -3513,7 +3514,8 @@ func TestRouterSolicitation(t *testing.T) { } waitForNothing := func(timeout time.Duration) { t.Helper() - ctx, _ := context.WithTimeout(context.Background(), timeout) + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() if _, ok := e.ReadContext(ctx); ok { t.Fatal("unexpectedly got a packet") } diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go index 3f8a2a095..c7634ceb1 100644 --- a/pkg/tcpip/stack/stack_test.go +++ b/pkg/tcpip/stack/stack_test.go @@ -1445,19 +1445,19 @@ func TestOutgoingBroadcastWithEmptyRouteTable(t *testing.T) { protoAddr := tcpip.ProtocolAddress{Protocol: fakeNetNumber, AddressWithPrefix: tcpip.AddressWithPrefix{header.IPv4Any, 0}} if err := s.AddProtocolAddress(1, protoAddr); err != nil { - t.Fatalf("AddProtocolAddress(1, %s) failed: %s", protoAddr, err) + t.Fatalf("AddProtocolAddress(1, %v) failed: %v", protoAddr, err) } r, err := s.FindRoute(1, header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, false /* multicastLoop */) if err != nil { - t.Fatalf("FindRoute(1, %s, %s, %d) failed: %s", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err) + t.Fatalf("FindRoute(1, %v, %v, %d) failed: %v", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err) } if err := verifyRoute(r, stack.Route{LocalAddress: header.IPv4Any, RemoteAddress: header.IPv4Broadcast}); err != nil { - t.Errorf("FindRoute(1, %s, %s, %d) returned unexpected Route: %s)", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err) + t.Errorf("FindRoute(1, %v, %v, %d) returned unexpected Route: %v", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err) } // If the NIC doesn't exist, it won't work. if _, err := s.FindRoute(2, header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, false /* multicastLoop */); err != tcpip.ErrNetworkUnreachable { - t.Fatalf("got FindRoute(2, %s, %s, %d) = %s want = %s", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err, tcpip.ErrNetworkUnreachable) + t.Fatalf("got FindRoute(2, %v, %v, %d) = %v want = %v", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err, tcpip.ErrNetworkUnreachable) } } @@ -1483,12 +1483,12 @@ func TestOutgoingBroadcastWithRouteTable(t *testing.T) { } nic1ProtoAddr := tcpip.ProtocolAddress{fakeNetNumber, nic1Addr} if err := s.AddProtocolAddress(1, nic1ProtoAddr); err != nil { - t.Fatalf("AddProtocolAddress(1, %s) failed: %s", nic1ProtoAddr, err) + t.Fatalf("AddProtocolAddress(1, %v) failed: %v", nic1ProtoAddr, err) } nic2ProtoAddr := tcpip.ProtocolAddress{fakeNetNumber, nic2Addr} if err := s.AddProtocolAddress(2, nic2ProtoAddr); err != nil { - t.Fatalf("AddAddress(2, %s) failed: %s", nic2ProtoAddr, err) + t.Fatalf("AddAddress(2, %v) failed: %v", nic2ProtoAddr, err) } // Set the initial route table. @@ -1503,10 +1503,10 @@ func TestOutgoingBroadcastWithRouteTable(t *testing.T) { // When an interface is given, the route for a broadcast goes through it. r, err := s.FindRoute(1, nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, false /* multicastLoop */) if err != nil { - t.Fatalf("FindRoute(1, %s, %s, %d) failed: %s", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err) + t.Fatalf("FindRoute(1, %v, %v, %d) failed: %v", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err) } if err := verifyRoute(r, stack.Route{LocalAddress: nic1Addr.Address, RemoteAddress: header.IPv4Broadcast}); err != nil { - t.Errorf("FindRoute(1, %s, %s, %d) returned unexpected Route: %s)", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err) + t.Errorf("FindRoute(1, %v, %v, %d) returned unexpected Route: %v", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err) } // When an interface is not given, it consults the route table. @@ -2399,7 +2399,7 @@ func TestNICContextPreservation(t *testing.T) { t.Fatalf("got nicinfos[%d] = _, %t, want _, true; nicinfos = %+v", id, ok, nicinfos) } if got, want := nicinfo.Context == test.want, true; got != want { - t.Fatal("got nicinfo.Context == ctx = %t, want %t; nicinfo.Context = %p, ctx = %p", got, want, nicinfo.Context, test.want) + t.Fatalf("got nicinfo.Context == ctx = %t, want %t; nicinfo.Context = %p, ctx = %p", got, want, nicinfo.Context, test.want) } }) } @@ -2768,7 +2768,7 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { { subnet, err := tcpip.NewSubnet("\x00", "\x00") if err != nil { - t.Fatalf("NewSubnet failed:", err) + t.Fatalf("NewSubnet failed: %v", err) } s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}}) } @@ -2782,11 +2782,11 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { // permanentExpired kind. r, err := s.FindRoute(1, "\x01", "\x02", fakeNetNumber, false) if err != nil { - t.Fatal("FindRoute failed:", err) + t.Fatalf("FindRoute failed: %v", err) } defer r.Release() if err := s.RemoveAddress(1, "\x01"); err != nil { - t.Fatalf("RemoveAddress failed:", err) + t.Fatalf("RemoveAddress failed: %v", err) } // @@ -2798,7 +2798,7 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { // Add some other address with peb set to // FirstPrimaryEndpoint. if err := s.AddAddressWithOptions(1, fakeNetNumber, "\x03", stack.FirstPrimaryEndpoint); err != nil { - t.Fatal("AddAddressWithOptions failed:", err) + t.Fatalf("AddAddressWithOptions failed: %v", err) } @@ -2806,7 +2806,7 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { // make sure the new peb was respected. // (The address should just be promoted now). if err := s.AddAddressWithOptions(1, fakeNetNumber, "\x01", ps); err != nil { - t.Fatal("AddAddressWithOptions failed:", err) + t.Fatalf("AddAddressWithOptions failed: %v", err) } var primaryAddrs []tcpip.Address for _, pa := range s.NICInfo()[1].ProtocolAddresses { @@ -2839,11 +2839,11 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { // GetMainNICAddress; else, our original address // should be returned. if err := s.RemoveAddress(1, "\x03"); err != nil { - t.Fatalf("RemoveAddress failed:", err) + t.Fatalf("RemoveAddress failed: %v", err) } addr, err = s.GetMainNICAddress(1, fakeNetNumber) if err != nil { - t.Fatal("s.GetMainNICAddress failed:", err) + t.Fatalf("s.GetMainNICAddress failed: %v", err) } if ps == stack.NeverPrimaryEndpoint { if want := (tcpip.AddressWithPrefix{}); addr != want { diff --git a/pkg/tcpip/tcpip_test.go b/pkg/tcpip/tcpip_test.go index 8c0aacffa..1c8e2bc34 100644 --- a/pkg/tcpip/tcpip_test.go +++ b/pkg/tcpip/tcpip_test.go @@ -218,7 +218,7 @@ func TestAddressWithPrefixSubnet(t *testing.T) { gotSubnet := ap.Subnet() wantSubnet, err := NewSubnet(tt.subnetAddr, tt.subnetMask) if err != nil { - t.Error("NewSubnet(%q, %q) failed: %s", tt.subnetAddr, tt.subnetMask, err) + t.Errorf("NewSubnet(%q, %q) failed: %s", tt.subnetAddr, tt.subnetMask, err) continue } if gotSubnet != wantSubnet { diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 32d0af6c4..29301a45c 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -284,7 +284,7 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) { // are released instantly on Close. tcpTW := tcpip.TCPTimeWaitTimeoutOption(1 * time.Millisecond) if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpTW); err != nil { - t.Fatalf("e.stack.SetTransportProtocolOption(%d, %s) = %s", tcp.ProtocolNumber, tcpTW, err) + t.Fatalf("e.stack.SetTransportProtocolOption(%d, %v) = %v", tcp.ProtocolNumber, tcpTW, err) } c.EP.Close() @@ -5607,7 +5607,7 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) { return } if w := tcp.WindowSize(); w == 0 || w > uint16(wantRcvWnd) { - t.Errorf("expected a non-zero window: got %d, want <= wantRcvWnd", w, wantRcvWnd) + t.Errorf("expected a non-zero window: got %d, want <= wantRcvWnd", w) } }, )) diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go index d4f6bc635..431ab4e6b 100644 --- a/pkg/tcpip/transport/tcp/testing/context/context.go +++ b/pkg/tcpip/transport/tcp/testing/context/context.go @@ -217,7 +217,8 @@ func (c *Context) Stack() *stack.Stack { func (c *Context) CheckNoPacketTimeout(errMsg string, wait time.Duration) { c.t.Helper() - ctx, _ := context.WithTimeout(context.Background(), wait) + ctx, cancel := context.WithTimeout(context.Background(), wait) + defer cancel() if _, ok := c.linkEP.ReadContext(ctx); ok { c.t.Fatal(errMsg) } @@ -235,7 +236,8 @@ func (c *Context) CheckNoPacket(errMsg string) { func (c *Context) GetPacket() []byte { c.t.Helper() - ctx, _ := context.WithTimeout(context.Background(), 2*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() p, ok := c.linkEP.ReadContext(ctx) if !ok { c.t.Fatalf("Packet wasn't written out") @@ -486,7 +488,8 @@ func (c *Context) CreateV6Endpoint(v6only bool) { func (c *Context) GetV6Packet() []byte { c.t.Helper() - ctx, _ := context.WithTimeout(context.Background(), 2*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() p, ok := c.linkEP.ReadContext(ctx) if !ok { c.t.Fatalf("Packet wasn't written out") diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index b3ee688b7..8acaa607a 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -358,7 +358,8 @@ func (c *testContext) createEndpointForFlow(flow testFlow) { func (c *testContext) getPacketAndVerify(flow testFlow, checkers ...checker.NetworkChecker) []byte { c.t.Helper() - ctx, _ := context.WithTimeout(context.Background(), 2*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() p, ok := c.linkEP.ReadContext(ctx) if !ok { c.t.Fatalf("Packet wasn't written out") @@ -607,7 +608,7 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe // Check the peer address. h := flow.header4Tuple(incoming) if addr.Addr != h.srcAddr.Addr { - c.t.Fatalf("unexpected remote address: got %s, want %s", addr.Addr, h.srcAddr) + c.t.Fatalf("unexpected remote address: got %s, want %v", addr.Addr, h.srcAddr) } // Check the payload. @@ -1565,7 +1566,8 @@ func TestV4UnknownDestination(t *testing.T) { } c.injectPacket(tc.flow, payload) if !tc.icmpRequired { - ctx, _ := context.WithTimeout(context.Background(), time.Second) + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() if p, ok := c.linkEP.ReadContext(ctx); ok { t.Fatalf("unexpected packet received: %+v", p) } @@ -1573,7 +1575,8 @@ func TestV4UnknownDestination(t *testing.T) { } // ICMP required. - ctx, _ := context.WithTimeout(context.Background(), time.Second) + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() p, ok := c.linkEP.ReadContext(ctx) if !ok { t.Fatalf("packet wasn't written out") @@ -1641,7 +1644,8 @@ func TestV6UnknownDestination(t *testing.T) { } c.injectPacket(tc.flow, payload) if !tc.icmpRequired { - ctx, _ := context.WithTimeout(context.Background(), time.Second) + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() if p, ok := c.linkEP.ReadContext(ctx); ok { t.Fatalf("unexpected packet received: %+v", p) } @@ -1649,7 +1653,8 @@ func TestV6UnknownDestination(t *testing.T) { } // ICMP required. - ctx, _ := context.WithTimeout(context.Background(), time.Second) + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() p, ok := c.linkEP.ReadContext(ctx) if !ok { t.Fatalf("packet wasn't written out") diff --git a/pkg/usermem/usermem.go b/pkg/usermem/usermem.go index d2f4403b0..cd6a0ea6b 100644 --- a/pkg/usermem/usermem.go +++ b/pkg/usermem/usermem.go @@ -29,9 +29,6 @@ import ( ) // IO provides access to the contents of a virtual memory space. -// -// FIXME(b/38173783): Implementations of IO cannot expect ctx to contain any -// meaningful data. type IO interface { // CopyOut copies len(src) bytes from src to the memory mapped at addr. It // returns the number of bytes copied. If the number of bytes copied is < diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index 8995d678e..b7cfb35bf 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -65,7 +65,7 @@ func newCompatEmitter(logFD int) (*compatEmitter, error) { if logFD > 0 { f := os.NewFile(uintptr(logFD), "user log file") - target := &log.MultiEmitter{c.sink, &log.K8sJSONEmitter{log.Writer{Next: f}}} + target := &log.MultiEmitter{c.sink, log.K8sJSONEmitter{&log.Writer{Next: f}}} c.sink = &log.BasicLogger{Level: log.Info, Emitter: target} } return c, nil diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 06b9f888a..1828d116a 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -44,7 +44,7 @@ var allowedSyscalls = seccomp.SyscallRules{ { seccomp.AllowAny{}, seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.AllowValue(syscall.O_CLOEXEC), }, }, syscall.SYS_EPOLL_CREATE1: {}, diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go index 01c47c79f..5f1c4b7d6 100644 --- a/runsc/container/test_app/test_app.go +++ b/runsc/container/test_app/test_app.go @@ -96,7 +96,7 @@ func (c *uds) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) listener, err := net.Listen("unix", c.socketPath) if err != nil { - log.Fatal("error listening on socket %q:", c.socketPath, err) + log.Fatalf("error listening on socket %q: %v", c.socketPath, err) } go server(listener, outputFile) diff --git a/runsc/main.go b/runsc/main.go index 62e184ec9..59f624842 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -291,7 +291,7 @@ func main() { // want with them. Since Docker and Containerd both eat boot's stderr, we // dup our stderr to the provided log FD so that panics will appear in the // logs, rather than just disappear. - if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil { + if err := syscall.Dup3(fd, int(os.Stderr.Fd()), syscall.O_CLOEXEC); err != nil { cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err) } } @@ -342,11 +342,11 @@ func main() { func newEmitter(format string, logFile io.Writer) log.Emitter { switch format { case "text": - return &log.GoogleEmitter{log.Writer{Next: logFile}} + return log.GoogleEmitter{&log.Writer{Next: logFile}} case "json": - return &log.JSONEmitter{log.Writer{Next: logFile}} + return log.JSONEmitter{&log.Writer{Next: logFile}} case "json-k8s": - return &log.K8sJSONEmitter{log.Writer{Next: logFile}} + return log.K8sJSONEmitter{&log.Writer{Next: logFile}} } cmd.Fatalf("invalid log format %q, must be 'text', 'json', or 'json-k8s'", format) panic("unreachable") diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 0f4a9cf6d..837d5e238 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -92,6 +92,12 @@ func ValidateSpec(spec *specs.Spec) error { log.Warningf("AppArmor profile %q is being ignored", spec.Process.ApparmorProfile) } + // PR_SET_NO_NEW_PRIVS is assumed to always be set. + // See kernel.Task.updateCredsForExecLocked. + if !spec.Process.NoNewPrivileges { + log.Warningf("noNewPrivileges ignored. PR_SET_NO_NEW_PRIVS is assumed to always be set.") + } + // TODO(gvisor.dev/issue/510): Apply seccomp to application inside sandbox. if spec.Linux != nil && spec.Linux.Seccomp != nil { log.Warningf("Seccomp spec is being ignored") diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index ed8689fd3..79c0ccf5c 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -211,11 +211,7 @@ func (conn *TCPIPv4) RecvFrame(timeout time.Duration) Layers { if b == nil { break } - layers, err := ParseEther(b) - if err != nil { - conn.t.Logf("can't parse frame: %s", err) - continue // Ignore packets that can't be parsed. - } + layers := Parse(ParseEther, b) if !conn.incoming.match(layers) { continue // Ignore packets that don't match the expected incoming. } @@ -418,11 +414,7 @@ func (conn *UDPIPv4) Recv(timeout time.Duration) *UDP { if b == nil { break } - layers, err := ParseEther(b) - if err != nil { - conn.t.Logf("can't parse frame: %s", err) - continue // Ignore packets that can't be parsed. - } + layers := Parse(ParseEther, b) if !conn.incoming.match(layers) { continue // Ignore packets that don't match the expected incoming. } diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 093a46e23..b467c15cc 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -153,7 +153,7 @@ func (l *Ether) toBytes() ([]byte, error) { fields.Type = header.IPv4ProtocolNumber default: // TODO(b/150301488): Support more protocols, like IPv6. - return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %d", n) + return nil, fmt.Errorf("ethernet header's next layer is unrecognized: %#v", n) } } h.Encode(fields) @@ -172,27 +172,46 @@ func NetworkProtocolNumber(v tcpip.NetworkProtocolNumber) *tcpip.NetworkProtocol return &v } +// LayerParser parses the input bytes and returns a Layer along with the next +// LayerParser to run. If there is no more parsing to do, the returned +// LayerParser is nil. +type LayerParser func([]byte) (Layer, LayerParser) + +// Parse parses bytes starting with the first LayerParser and using successive +// LayerParsers until all the bytes are parsed. +func Parse(parser LayerParser, b []byte) Layers { + var layers Layers + for { + var layer Layer + layer, parser = parser(b) + layers = append(layers, layer) + if parser == nil { + break + } + b = b[layer.length():] + } + layers.linkLayers() + return layers +} + // ParseEther parses the bytes assuming that they start with an ethernet header // and continues parsing further encapsulations. -func ParseEther(b []byte) (Layers, error) { +func ParseEther(b []byte) (Layer, LayerParser) { h := header.Ethernet(b) ether := Ether{ SrcAddr: LinkAddress(h.SourceAddress()), DstAddr: LinkAddress(h.DestinationAddress()), Type: NetworkProtocolNumber(h.Type()), } - layers := Layers{ðer} + var nextParser LayerParser switch h.Type() { case header.IPv4ProtocolNumber: - moreLayers, err := ParseIPv4(b[ether.length():]) - if err != nil { - return nil, err - } - return append(layers, moreLayers...), nil + nextParser = ParseIPv4 default: - // TODO(b/150301488): Support more protocols, like IPv6. - return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %#v", b) + // Assume that the rest is a payload. + nextParser = ParsePayload } + return ðer, nextParser } func (l *Ether) match(other Layer) bool { @@ -274,7 +293,7 @@ func (l *IPv4) toBytes() ([]byte, error) { fields.Protocol = uint8(header.UDPProtocolNumber) default: // TODO(b/150301488): Support more protocols as needed. - return nil, fmt.Errorf("can't deduce the ip header's next protocol: %#v", n) + return nil, fmt.Errorf("ipv4 header's next layer is unrecognized: %#v", n) } } if l.SrcAddr != nil { @@ -313,7 +332,7 @@ func Address(v tcpip.Address) *tcpip.Address { // ParseIPv4 parses the bytes assuming that they start with an ipv4 header and // continues parsing further encapsulations. -func ParseIPv4(b []byte) (Layers, error) { +func ParseIPv4(b []byte) (Layer, LayerParser) { h := header.IPv4(b) tos, _ := h.TOS() ipv4 := IPv4{ @@ -329,22 +348,17 @@ func ParseIPv4(b []byte) (Layers, error) { SrcAddr: Address(h.SourceAddress()), DstAddr: Address(h.DestinationAddress()), } - layers := Layers{&ipv4} + var nextParser LayerParser switch h.TransportProtocol() { case header.TCPProtocolNumber: - moreLayers, err := ParseTCP(b[ipv4.length():]) - if err != nil { - return nil, err - } - return append(layers, moreLayers...), nil + nextParser = ParseTCP case header.UDPProtocolNumber: - moreLayers, err := ParseUDP(b[ipv4.length():]) - if err != nil { - return nil, err - } - return append(layers, moreLayers...), nil + nextParser = ParseUDP + default: + // Assume that the rest is a payload. + nextParser = ParsePayload } - return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %d", h.Protocol()) + return &ipv4, nextParser } func (l *IPv4) match(other Layer) bool { @@ -470,7 +484,7 @@ func Uint32(v uint32) *uint32 { // ParseTCP parses the bytes assuming that they start with a tcp header and // continues parsing further encapsulations. -func ParseTCP(b []byte) (Layers, error) { +func ParseTCP(b []byte) (Layer, LayerParser) { h := header.TCP(b) tcp := TCP{ SrcPort: Uint16(h.SourcePort()), @@ -483,12 +497,7 @@ func ParseTCP(b []byte) (Layers, error) { Checksum: Uint16(h.Checksum()), UrgentPointer: Uint16(h.UrgentPointer()), } - layers := Layers{&tcp} - moreLayers, err := ParsePayload(b[tcp.length():]) - if err != nil { - return nil, err - } - return append(layers, moreLayers...), nil + return &tcp, ParsePayload } func (l *TCP) match(other Layer) bool { @@ -557,8 +566,8 @@ func setUDPChecksum(h *header.UDP, udp *UDP) error { } // ParseUDP parses the bytes assuming that they start with a udp header and -// continues parsing further encapsulations. -func ParseUDP(b []byte) (Layers, error) { +// returns the parsed layer and the next parser to use. +func ParseUDP(b []byte) (Layer, LayerParser) { h := header.UDP(b) udp := UDP{ SrcPort: Uint16(h.SourcePort()), @@ -566,12 +575,7 @@ func ParseUDP(b []byte) (Layers, error) { Length: Uint16(h.Length()), Checksum: Uint16(h.Checksum()), } - layers := Layers{&udp} - moreLayers, err := ParsePayload(b[udp.length():]) - if err != nil { - return nil, err - } - return append(layers, moreLayers...), nil + return &udp, ParsePayload } func (l *UDP) match(other Layer) bool { @@ -603,11 +607,11 @@ func (l *Payload) String() string { // ParsePayload parses the bytes assuming that they start with a payload and // continue to the end. There can be no further encapsulations. -func ParsePayload(b []byte) (Layers, error) { +func ParsePayload(b []byte) (Layer, LayerParser) { payload := Payload{ Bytes: b, } - return Layers{&payload}, nil + return &payload, nil } func (l *Payload) toBytes() ([]byte, error) { @@ -625,15 +629,24 @@ func (l *Payload) length() int { // Layers is an array of Layer and supports similar functions to Layer. type Layers []Layer -func (ls *Layers) toBytes() ([]byte, error) { +// linkLayers sets the linked-list ponters in ls. +func (ls *Layers) linkLayers() { for i, l := range *ls { if i > 0 { l.setPrev((*ls)[i-1]) + } else { + l.setPrev(nil) } if i+1 < len(*ls) { l.setNext((*ls)[i+1]) + } else { + l.setNext(nil) } } +} + +func (ls *Layers) toBytes() ([]byte, error) { + ls.linkLayers() outBytes := []byte{} for _, l := range *ls { layerBytes, err := l.toBytes() diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index a9b2de9b9..956b1addf 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -40,6 +40,18 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_noaccept_close_rst", + srcs = ["tcp_noaccept_close_rst_test.go"], + # TODO(b/153380909): Fix netstack then remove the line below. + netstack = False, + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + sh_binary( name = "test_runner", srcs = ["test_runner.sh"], diff --git a/test/packetimpact/tests/tcp_noaccept_close_rst_test.go b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go new file mode 100644 index 000000000..7ebdd1950 --- /dev/null +++ b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go @@ -0,0 +1,37 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_noaccept_close_rst_test + +import ( + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func TestTcpNoAcceptCloseReset(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn.Handshake() + defer conn.Close() + dut.Close(listenFd) + if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, 1*time.Second); err != nil { + t.Fatalf("expected a RST-ACK packet but got none: %s", err) + } +} diff --git a/test/root/cgroup_test.go b/test/root/cgroup_test.go index 4038661cb..679342def 100644 --- a/test/root/cgroup_test.go +++ b/test/root/cgroup_test.go @@ -53,7 +53,7 @@ func verifyPid(pid int, path string) error { if scanner.Err() != nil { return scanner.Err() } - return fmt.Errorf("got: %s, want: %d", gots, pid) + return fmt.Errorf("got: %v, want: %d", gots, pid) } // TestCgroup sets cgroup options and checks that cgroup was properly configured. @@ -106,7 +106,7 @@ func TestMemCGroup(t *testing.T) { time.Sleep(100 * time.Millisecond) } - t.Fatalf("%vMB is less than %vMB: %v", memUsage>>20, allocMemSize>>20) + t.Fatalf("%vMB is less than %vMB", memUsage>>20, allocMemSize>>20) } // TestCgroup sets cgroup options and checks that cgroup was properly configured. diff --git a/test/runtimes/blacklist_test.go b/test/runtimes/blacklist_test.go index 52f49b984..0ff69ab18 100644 --- a/test/runtimes/blacklist_test.go +++ b/test/runtimes/blacklist_test.go @@ -32,6 +32,6 @@ func TestBlacklists(t *testing.T) { t.Fatalf("error parsing blacklist: %v", err) } if *blacklistFile != "" && len(bl) == 0 { - t.Errorf("got empty blacklist for file %q", blacklistFile) + t.Errorf("got empty blacklist for file %q", *blacklistFile) } } diff --git a/test/runtimes/runner.go b/test/runtimes/runner.go index ddb890dbc..3c98f4570 100644 --- a/test/runtimes/runner.go +++ b/test/runtimes/runner.go @@ -114,7 +114,7 @@ func getTests(d dockerutil.Docker, blacklist map[string]struct{}) ([]testing.Int F: func(t *testing.T) { // Is the test blacklisted? if _, ok := blacklist[tc]; ok { - t.Skip("SKIP: blacklisted test %q", tc) + t.Skipf("SKIP: blacklisted test %q", tc) } var ( diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc index ff8bdfeb0..853f6231a 100644 --- a/test/syscalls/linux/fork.cc +++ b/test/syscalls/linux/fork.cc @@ -431,7 +431,6 @@ TEST(CloneTest, NewUserNamespacePermitsAllOtherNamespaces) { << "status = " << status; } -#ifdef __x86_64__ // Clone with CLONE_SETTLS and a non-canonical TLS address is rejected. TEST(CloneTest, NonCanonicalTLS) { constexpr uintptr_t kNonCanonical = 1ull << 48; @@ -440,11 +439,25 @@ TEST(CloneTest, NonCanonicalTLS) { // on this. char stack; + // The raw system call interface on x86-64 is: + // long clone(unsigned long flags, void *stack, + // int *parent_tid, int *child_tid, + // unsigned long tls); + // + // While on arm64, the order of the last two arguments is reversed: + // long clone(unsigned long flags, void *stack, + // int *parent_tid, unsigned long tls, + // int *child_tid); +#if defined(__x86_64__) EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr, nullptr, kNonCanonical), SyscallFailsWithErrno(EPERM)); -} +#elif defined(__aarch64__) + EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr, + kNonCanonical, nullptr), + SyscallFailsWithErrno(EPERM)); #endif +} } // namespace } // namespace testing diff --git a/test/syscalls/linux/getrandom.cc b/test/syscalls/linux/getrandom.cc index f97f60029..f87cdd7a1 100644 --- a/test/syscalls/linux/getrandom.cc +++ b/test/syscalls/linux/getrandom.cc @@ -29,6 +29,8 @@ namespace { #define SYS_getrandom 318 #elif defined(__i386__) #define SYS_getrandom 355 +#elif defined(__aarch64__) +#define SYS_getrandom 278 #else #error "Unknown architecture" #endif diff --git a/test/syscalls/linux/lseek.cc b/test/syscalls/linux/lseek.cc index a8af8e545..6ce1e6cc3 100644 --- a/test/syscalls/linux/lseek.cc +++ b/test/syscalls/linux/lseek.cc @@ -53,7 +53,7 @@ TEST(LseekTest, NegativeOffset) { // A 32-bit off_t is not large enough to represent an offset larger than // maximum file size on standard file systems, so it isn't possible to cause // overflow. -#ifdef __x86_64__ +#if defined(__x86_64__) || defined(__aarch64__) TEST(LseekTest, Overflow) { // HA! Classic Linux. We really should have an EOVERFLOW // here, since we're seeking to something that cannot be diff --git a/test/syscalls/linux/mlock.cc b/test/syscalls/linux/mlock.cc index 367a90fe1..78ac96bed 100644 --- a/test/syscalls/linux/mlock.cc +++ b/test/syscalls/linux/mlock.cc @@ -199,8 +199,10 @@ TEST(MunlockallTest, Basic) { } #ifndef SYS_mlock2 -#ifdef __x86_64__ +#if defined(__x86_64__) #define SYS_mlock2 325 +#elif defined(__aarch64__) +#define SYS_mlock2 284 #endif #endif diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc index 11fb1b457..6d3227ab6 100644 --- a/test/syscalls/linux/mmap.cc +++ b/test/syscalls/linux/mmap.cc @@ -361,7 +361,7 @@ TEST_F(MMapTest, MapFixed) { } // 64-bit addresses work too -#ifdef __x86_64__ +#if defined(__x86_64__) || defined(__aarch64__) TEST_F(MMapTest, MapFixed64) { EXPECT_THAT(Map(0x300000000000, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0), @@ -571,6 +571,12 @@ const uint8_t machine_code[] = { 0xb8, 0x2a, 0x00, 0x00, 0x00, // movl $42, %eax 0xc3, // retq }; +#elif defined(__aarch64__) +const uint8_t machine_code[] = { + 0x40, 0x05, 0x80, 0x52, // mov w0, #42 + 0xc0, 0x03, 0x5f, 0xd6, // ret +}; +#endif // PROT_EXEC allows code execution TEST_F(MMapTest, ProtExec) { @@ -605,7 +611,6 @@ TEST_F(MMapTest, NoProtExecDeath) { EXPECT_EXIT(func(), ::testing::KilledBySignal(SIGSEGV), ""); } -#endif TEST_F(MMapTest, NoExceedLimitData) { void* prevbrk; @@ -1644,6 +1649,7 @@ TEST(MMapNoFixtureTest, MapReadOnlyAfterCreateWriteOnly) { } // Conditional on MAP_32BIT. +// This flag is supported only on x86-64, for 64-bit programs. #ifdef __x86_64__ TEST(MMapNoFixtureTest, Map32Bit) { diff --git a/test/syscalls/linux/socket_test_util.cc b/test/syscalls/linux/socket_test_util.cc index 5d3a39868..53b678e94 100644 --- a/test/syscalls/linux/socket_test_util.cc +++ b/test/syscalls/linux/socket_test_util.cc @@ -364,11 +364,6 @@ CreateTCPConnectAcceptSocketPair(int bound, int connected, int type, } MaybeSave(); // Successful accept. - // FIXME(b/110484944) - if (connect_result == -1) { - absl::SleepFor(absl::Seconds(1)); - } - T extra_addr = {}; LocalhostAddr(&extra_addr, dual_stack); return absl::make_unique<AddrFDSocketPair>(connected, accepted, bind_addr, diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index 8b00ef44c..3231732ec 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -41,12 +41,12 @@ class XattrTest : public FileTest {}; TEST_F(XattrTest, XattrNonexistentFile) { const char* path = "/does/not/exist"; - EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0), - SyscallFailsWithErrno(ENOENT)); - EXPECT_THAT(getxattr(path, nullptr, nullptr, 0), + const char* name = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENOENT)); EXPECT_THAT(listxattr(path, nullptr, 0), SyscallFailsWithErrno(ENOENT)); - EXPECT_THAT(removexattr(path, nullptr), SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(ENOENT)); } TEST_F(XattrTest, XattrNullName) { diff --git a/tools/go_stateify/main.go b/tools/go_stateify/main.go index 3437aa476..309ee9c21 100644 --- a/tools/go_stateify/main.go +++ b/tools/go_stateify/main.go @@ -206,7 +206,7 @@ func main() { initCalls = append(initCalls, fmt.Sprintf("%sRegister(\"%s.%s\", (*%s)(nil), state.Fns{Save: (*%s).save, Load: (*%s).load})", statePrefix, *fullPkg, name, name, name, name)) } emitZeroCheck := func(name string) { - fmt.Fprintf(outputFile, " if !%sIsZeroValue(x.%s) { m.Failf(\"%s is %%v, expected zero\", x.%s) }\n", statePrefix, name, name, name) + fmt.Fprintf(outputFile, " if !%sIsZeroValue(&x.%s) { m.Failf(\"%s is %%#v, expected zero\", &x.%s) }\n", statePrefix, name, name, name) } emitLoadValue := func(name, typName string) { fmt.Fprintf(outputFile, " m.LoadValue(\"%s\", new(%s), func(y interface{}) { x.load%s(y.(%s)) })\n", name, typName, camelCased(name), typName) diff --git a/tools/image_build.sh b/tools/image_build.sh deleted file mode 100755 index 9b20a740d..000000000 --- a/tools/image_build.sh +++ /dev/null @@ -1,98 +0,0 @@ -#!/bin/bash - -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script is responsible for building a new GCP image that: 1) has nested -# virtualization enabled, and 2) has been completely set up with the -# image_setup.sh script. This script should be idempotent, as we memoize the -# setup script with a hash and check for that name. -# -# The GCP project name should be defined via a gcloud config. - -set -xeo pipefail - -# Parameters. -declare -r ZONE=${ZONE:-us-central1-f} -declare -r USERNAME=${USERNAME:-test} -declare -r IMAGE_PROJECT=${IMAGE_PROJECT:-ubuntu-os-cloud} -declare -r IMAGE_FAMILY=${IMAGE_FAMILY:-ubuntu-1604-lts} - -# Random names. -declare -r DISK_NAME=$(mktemp -u disk-XXXXXX | tr A-Z a-z) -declare -r SNAPSHOT_NAME=$(mktemp -u snapshot-XXXXXX | tr A-Z a-z) -declare -r INSTANCE_NAME=$(mktemp -u build-XXXXXX | tr A-Z a-z) - -# Hashes inputs. -declare -r SETUP_BLOB=$(echo ${ZONE} ${USERNAME} ${IMAGE_PROJECT} ${IMAGE_FAMILY} && sha256sum "$@") -declare -r SETUP_HASH=$(echo ${SETUP_BLOB} | sha256sum - | cut -d' ' -f1 | cut -c 1-16) -declare -r IMAGE_NAME=${IMAGE_NAME:-image-}${SETUP_HASH} - -# Does the image already exist? Skip the build. -declare -r existing=$(gcloud compute images list --filter="name=(${IMAGE_NAME})" --format="value(name)") -if ! [[ -z "${existing}" ]]; then - echo "${existing}" - exit 0 -fi - -# Set the zone for all actions. -gcloud config set compute/zone "${ZONE}" - -# Start a unique instance. Note that this instance will have a unique persistent -# disk as it's boot disk with the same name as the instance. -gcloud compute instances create \ - --quiet \ - --image-project "${IMAGE_PROJECT}" \ - --image-family "${IMAGE_FAMILY}" \ - --boot-disk-size "200GB" \ - "${INSTANCE_NAME}" -function cleanup { - gcloud compute instances delete --quiet "${INSTANCE_NAME}" -} -trap cleanup EXIT - -# Wait for the instance to become available. -declare attempts=0 -while [[ "${attempts}" -lt 30 ]]; do - attempts=$((${attempts}+1)) - if gcloud compute ssh "${USERNAME}"@"${INSTANCE_NAME}" -- true; then - break - fi -done -if [[ "${attempts}" -ge 30 ]]; then - echo "too many attempts: failed" - exit 1 -fi - -# Run the install scripts provided. -for arg; do - gcloud compute ssh "${USERNAME}"@"${INSTANCE_NAME}" -- sudo bash - <"${arg}" -done - -# Stop the instance; required before creating an image. -gcloud compute instances stop --quiet "${INSTANCE_NAME}" - -# Create a snapshot of the instance disk. -gcloud compute disks snapshot \ - --quiet \ - --zone="${ZONE}" \ - --snapshot-names="${SNAPSHOT_NAME}" \ - "${INSTANCE_NAME}" - -# Create the disk image. -gcloud compute images create \ - --quiet \ - --source-snapshot="${SNAPSHOT_NAME}" \ - --licenses="https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx" \ - "${IMAGE_NAME}" diff --git a/tools/images/BUILD b/tools/images/BUILD index 66ffd02aa..8d319e3e4 100644 --- a/tools/images/BUILD +++ b/tools/images/BUILD @@ -6,14 +6,9 @@ package( licenses = ["notice"], ) -genrule( +sh_binary( name = "zone", - outs = ["zone.txt"], - cmd = "gcloud config get-value compute/zone > \"$@\"", - tags = [ - "local", - "manual", - ], + srcs = ["zone.sh"], ) sh_binary( diff --git a/tools/images/README.md b/tools/images/README.md new file mode 100644 index 000000000..26c0f84f2 --- /dev/null +++ b/tools/images/README.md @@ -0,0 +1,42 @@ +# Images + +All commands in this directory require the `gcloud` project to be set. + +For example: `gcloud config set project gvisor-kokoro-testing`. + +Images can be generated by using the `vm_image` rule. This rule will generate a +binary target that builds an image in an idempotent way, and can be referenced +from other rules. + +For example: + +``` +vm_image( + name = "ubuntu", + project = "ubuntu-1604-lts", + family = "ubuntu-os-cloud", + scripts = [ + "script.sh", + "other.sh", + ], +) +``` + +These images can be built manually by executing the target. The output on +`stdout` will be the image id (in the current project). + +Images are always named per the hash of all the hermetic input scripts. This +allows images to be memoized quickly and easily. + +The `vm_test` rule can be used to execute a command remotely. This is still +under development however, and will likely change over time. + +For example: + +``` +vm_test( + name = "mycommand", + image = ":ubuntu", + targets = [":test"], +) +``` diff --git a/tools/images/build.sh b/tools/images/build.sh index f89f39cbd..f39f723b8 100755 --- a/tools/images/build.sh +++ b/tools/images/build.sh @@ -19,7 +19,7 @@ # image_setup.sh script. This script should be idempotent, as we memoize the # setup script with a hash and check for that name. -set -xeou pipefail +set -eou pipefail # Parameters. declare -r USERNAME=${USERNAME:-test} @@ -34,10 +34,10 @@ declare -r INSTANCE_NAME=$(mktemp -u build-XXXXXX | tr A-Z a-z) # Hash inputs in order to memoize the produced image. declare -r SETUP_HASH=$( (echo ${USERNAME} ${IMAGE_PROJECT} ${IMAGE_FAMILY} && cat "$@") | sha256sum - | cut -d' ' -f1 | cut -c 1-16) -declare -r IMAGE_NAME=${IMAGE_FAMILY:-image-}${SETUP_HASH} +declare -r IMAGE_NAME=${IMAGE_FAMILY:-image}-${SETUP_HASH} # Does the image already exist? Skip the build. -declare -r existing=$(gcloud compute images list --filter="name=(${IMAGE_NAME})" --format="value(name)") +declare -r existing=$(set -x; gcloud compute images list --filter="name=(${IMAGE_NAME})" --format="value(name)") if ! [[ -z "${existing}" ]]; then echo "${existing}" exit 0 @@ -48,28 +48,30 @@ export PATH=${PATH:-/bin:/usr/bin:/usr/local/bin} # Start a unique instance. Note that this instance will have a unique persistent # disk as it's boot disk with the same name as the instance. -gcloud compute instances create \ +(set -x; gcloud compute instances create \ --quiet \ --image-project "${IMAGE_PROJECT}" \ --image-family "${IMAGE_FAMILY}" \ --boot-disk-size "200GB" \ --zone "${ZONE}" \ - "${INSTANCE_NAME}" >/dev/null + "${INSTANCE_NAME}" >/dev/null) function cleanup { - gcloud compute instances delete --quiet --zone "${ZONE}" "${INSTANCE_NAME}" + (set -x; gcloud compute instances delete --quiet --zone "${ZONE}" "${INSTANCE_NAME}") } trap cleanup EXIT # Wait for the instance to become available (up to 5 minutes). +echo -n "Waiting for ${INSTANCE_NAME}" declare timeout=300 declare success=0 declare internal="" declare -r start=$(date +%s) declare -r end=$((${start}+${timeout})) while [[ "$(date +%s)" -lt "${end}" ]] && [[ "${success}" -lt 3 ]]; do - if gcloud compute ssh --zone "${internal}" "${ZONE}" "${USERNAME}"@"${INSTANCE_NAME}" -- env - true 2>/dev/null; then + echo -n "." + if gcloud compute ssh --zone "${ZONE}" "${USERNAME}"@"${INSTANCE_NAME}" -- env - true 2>/dev/null; then success=$((${success}+1)) - elif gcloud compute ssh --zone --internal-ip "${ZONE}" "${USERNAME}"@"${INSTANCE_NAME}" -- env - true 2>/dev/null; then + elif gcloud compute ssh --internal-ip --zone "${ZONE}" "${USERNAME}"@"${INSTANCE_NAME}" -- env - true 2>/dev/null; then success=$((${success}+1)) internal="--internal-ip" fi @@ -78,29 +80,34 @@ done if [[ "${success}" -eq "0" ]]; then echo "connect timed out after ${timeout} seconds." exit 1 +else + echo "done." fi # Run the install scripts provided. for arg; do - gcloud compute ssh --zone "${internal}" "${ZONE}" "${USERNAME}"@"${INSTANCE_NAME}" -- sudo bash - <"${arg}" >/dev/null + (set -x; gcloud compute ssh ${internal} \ + --zone "${ZONE}" \ + "${USERNAME}"@"${INSTANCE_NAME}" -- \ + sudo bash - <"${arg}" >/dev/null) done # Stop the instance; required before creating an image. -gcloud compute instances stop --quiet --zone "${ZONE}" "${INSTANCE_NAME}" >/dev/null +(set -x; gcloud compute instances stop --quiet --zone "${ZONE}" "${INSTANCE_NAME}" >/dev/null) # Create a snapshot of the instance disk. -gcloud compute disks snapshot \ +(set -x; gcloud compute disks snapshot \ --quiet \ --zone "${ZONE}" \ --snapshot-names="${SNAPSHOT_NAME}" \ - "${INSTANCE_NAME}" >/dev/null + "${INSTANCE_NAME}" >/dev/null) # Create the disk image. -gcloud compute images create \ +(set -x; gcloud compute images create \ --quiet \ --source-snapshot="${SNAPSHOT_NAME}" \ --licenses="https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx" \ - "${IMAGE_NAME}" >/dev/null + "${IMAGE_NAME}" >/dev/null) # Finish up. echo "${IMAGE_NAME}" diff --git a/tools/images/defs.bzl b/tools/images/defs.bzl index de365d153..2847e1847 100644 --- a/tools/images/defs.bzl +++ b/tools/images/defs.bzl @@ -1,76 +1,49 @@ -"""Image configuration. - -Images can be generated by using the vm_image rule. For example, - - vm_image( - name = "ubuntu", - project = "...", - family = "...", - scripts = [ - "script.sh", - "other.sh", - ], - ) - -This will always create an vm_image in the current default gcloud project. The -rule has a text file as its output containing the image name. This will enforce -serialization for all dependent rules. - -Images are always named per the hash of all the hermetic input scripts. This -allows images to be memoized quickly and easily. - -The vm_test rule can be used to execute a command remotely. For example, - - vm_test( - name = "mycommand", - image = ":myimage", - targets = [":test"], - ) -""" +"""Image configuration. See README.md.""" load("//tools:defs.bzl", "default_installer") -def _vm_image_impl(ctx): +# vm_image_builder is a rule that will construct a shell script that actually +# generates a given VM image. Note that this does not _run_ the shell script +# (although it can be run manually). It will be run manually during generation +# of the vm_image target itself. This level of indirection is used so that the +# build system itself only runs the builder once when multiple targets depend +# on it, avoiding a set of races and conflicts. +def _vm_image_builder_impl(ctx): + # Generate a binary that actually builds the image. + builder = ctx.actions.declare_file(ctx.label.name) script_paths = [] for script in ctx.files.scripts: script_paths.append(script.short_path) + builder_content = "\n".join([ + "#!/bin/bash", + "export ZONE=$(%s)" % ctx.files.zone[0].short_path, + "export USERNAME=%s" % ctx.attr.username, + "export IMAGE_PROJECT=%s" % ctx.attr.project, + "export IMAGE_FAMILY=%s" % ctx.attr.family, + "%s %s" % (ctx.files._builder[0].short_path, " ".join(script_paths)), + "", + ]) + ctx.actions.write(builder, builder_content, is_executable = True) - resolved_inputs, argv, runfiles_manifests = ctx.resolve_command( - command = "USERNAME=%s ZONE=$(cat %s) IMAGE_PROJECT=%s IMAGE_FAMILY=%s %s %s > %s" % - ( - ctx.attr.username, - ctx.files.zone[0].path, - ctx.attr.project, - ctx.attr.family, - ctx.executable.builder.path, - " ".join(script_paths), - ctx.outputs.out.path, - ), - tools = [ctx.attr.builder] + ctx.attr.scripts, - ) - - ctx.actions.run_shell( - tools = resolved_inputs, - outputs = [ctx.outputs.out], - progress_message = "Building image...", - execution_requirements = {"local": "true"}, - command = argv, - input_manifests = runfiles_manifests, - ) + # Note that the scripts should only be files, and should not include any + # indirect transitive dependencies. The build script wouldn't work. return [DefaultInfo( - files = depset([ctx.outputs.out]), - runfiles = ctx.runfiles(files = [ctx.outputs.out]), + executable = builder, + runfiles = ctx.runfiles( + files = ctx.files.scripts + ctx.files._builder + ctx.files.zone, + ), )] -_vm_image = rule( +vm_image_builder = rule( attrs = { - "builder": attr.label( + "_builder": attr.label( executable = True, default = "//tools/images:builder", cfg = "host", ), "username": attr.string(default = "$(whoami)"), "zone": attr.label( + executable = True, default = "//tools/images:zone", cfg = "host", ), @@ -78,20 +51,55 @@ _vm_image = rule( "project": attr.string(mandatory = True), "scripts": attr.label_list(allow_files = True), }, - outputs = { - "out": "%{name}.txt", + executable = True, + implementation = _vm_image_builder_impl, +) + +# See vm_image_builder above. +def _vm_image_impl(ctx): + # Run the builder to generate our output. + echo = ctx.actions.declare_file(ctx.label.name) + resolved_inputs, argv, runfiles_manifests = ctx.resolve_command( + command = "echo -ne \"#!/bin/bash\\necho $(%s)\\n\" > %s && chmod 0755 %s" % ( + ctx.files.builder[0].path, + echo.path, + echo.path, + ), + tools = [ctx.attr.builder], + ) + ctx.actions.run_shell( + tools = resolved_inputs, + outputs = [echo], + progress_message = "Building image...", + execution_requirements = {"local": "true"}, + command = argv, + input_manifests = runfiles_manifests, + ) + + # Return just the echo command. All of the builder runfiles have been + # resolved and consumed in the generation of the trivial echo script. + return [DefaultInfo(executable = echo)] + +_vm_image = rule( + attrs = { + "builder": attr.label( + executable = True, + cfg = "host", + ), }, + executable = True, implementation = _vm_image_impl, ) -def vm_image(**kwargs): - _vm_image( - tags = [ - "local", - "manual", - ], +def vm_image(name, **kwargs): + vm_image_builder( + name = name + "_builder", **kwargs ) + _vm_image( + name = name, + builder = ":" + name + "_builder", + ) def _vm_test_impl(ctx): runner = ctx.actions.declare_file("%s-executer" % ctx.label.name) diff --git a/tools/images/zone.sh b/tools/images/zone.sh new file mode 100755 index 000000000..79569fb19 --- /dev/null +++ b/tools/images/zone.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# Copyright 2020 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +exec gcloud config get-value compute/zone diff --git a/tools/nogo.json b/tools/nogo.json index 83cb76b93..ae969409e 100644 --- a/tools/nogo.json +++ b/tools/nogo.json @@ -9,27 +9,6 @@ "/external/": "allowed: not subject to unsafe naming rules" } }, - "copylocks": { - "exclude_files": { - ".*_state_autogen.go": "fix: m.Failf copies by value", - "/pkg/log/json.go": "fix: Emit passes lock by value: gvisor.dev/gvisor/pkg/log.JSONEmitter contains gvisor.dev/gvisor/pkg/log.Writer contains gvisor.dev/gvisor/pkg/sync.Mutex", - "/pkg/log/log_test.go": "fix: call of fmt.Printf copies lock value: gvisor.dev/gvisor/pkg/log.Writer contains gvisor.dev/gvisor/pkg/sync.Mutex", - "/pkg/sentry/fs/host/socket_test.go": "fix: call of t.Errorf copies lock value: gvisor.dev/gvisor/pkg/sentry/fs/host.ConnectedEndpoint contains gvisor.dev/gvisor/pkg/refs.AtomicRefCount contains gvisor.dev/gvisor/pkg/sync.Mutex", - "/pkg/sentry/fs/proc/sys_net.go": "fix: Truncate passes lock by value: gvisor.dev/gvisor/pkg/sentry/fs/proc.tcpMemInode contains gvisor.dev/gvisor/pkg/sentry/fs/fsutil.SimpleFileInode contains gvisor.dev/gvisor/pkg/sentry/fs/fsutil.InodeSimpleAttributes contains gvisor.dev/gvisor/pkg/sync.RWMutex", - "/pkg/sentry/fs/proc/sys_net.go": "fix: Truncate passes lock by value: gvisor.dev/gvisor/pkg/sentry/fs/proc.tcpSack contains gvisor.dev/gvisor/pkg/sentry/fs/fsutil.SimpleFileInode contains gvisor.dev/gvisor/pkg/sentry/fs/fsutil.InodeSimpleAttributes contains gvisor.dev/gvisor/pkg/sync.RWMutex", - "/pkg/sentry/fs/tty/slave.go": "fix: Truncate passes lock by value: gvisor.dev/gvisor/pkg/sentry/fs/tty.slaveInodeOperations contains gvisor.dev/gvisor/pkg/sentry/fs/fsutil.SimpleFileInode contains gvisor.dev/gvisor/pkg/sentry/fs/fsutil.InodeSimpleAttributes contains gvisor.dev/gvisor/pkg/sync.RWMutex", - "/pkg/sentry/kernel/time/time.go": "fix: Readiness passes lock by value: gvisor.dev/gvisor/pkg/sentry/kernel/time.ClockEventsQueue contains gvisor.dev/gvisor/pkg/waiter.Queue contains gvisor.dev/gvisor/pkg/sync.RWMutex", - "/pkg/sentry/kernel/syscalls_state.go": "fix: assignment copies lock value to *s: gvisor.dev/gvisor/pkg/sentry/kernel.SyscallTable contains gvisor.dev/gvisor/pkg/sentry/kernel.SyscallFlagsTable contains gvisor.dev/gvisor/pkg/sync.Mutex" - } - }, - "lostcancel": { - "exclude_files": { - "/pkg/tcpip/network/arp/arp_test.go": "fix: the cancel function returned by context.WithTimeout should be called, not discarded, to avoid a context leak", - "/pkg/tcpip/stack/ndp_test.go": "fix: the cancel function returned by context.WithTimeout should be called, not discarded, to avoid a context leak", - "/pkg/tcpip/transport/udp/udp_test.go": "fix: the cancel function returned by context.WithTimeout should be called, not discarded, to avoid a context leak", - "/pkg/tcpip/transport/tcp/testing/context/context.go": "fix: the cancel function returned by context.WithTimeout should be called, not discarded, to avoid a context leak" - } - }, "nilness": { "exclude_files": { "/com_github_vishvananda_netlink/route_linux.go": "allowed: false positive", @@ -40,37 +19,6 @@ "/external/io_opencensus_go/tag/map_codec.go": "allowed: false positive" } }, - "printf": { - "exclude_files": { - ".*_abi_autogen_test.go": "fix: Sprintf format has insufficient args", - "/pkg/segment/test/segment_test.go": "fix: Errorf format %d arg seg.Start is a func value, not called", - "/pkg/tcpip/tcpip_test.go": "fix: Error call has possible formatting directive %q", - "/pkg/tcpip/header/eth_test.go": "fix: Fatalf format %s reads arg #3, but call has 2 args", - "/pkg/tcpip/header/ndp_test.go": "fix: Errorf format %d reads arg #1, but call has 0 args", - "/pkg/eventchannel/event_test.go": "fix: Fatal call has possible formatting directive %v", - "/pkg/tcpip/stack/ndp.go": "fix: Fatalf format %s has arg protocolAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.ProtocolAddress", - "/pkg/sentry/fs/fdpipe/pipe_test.go": "fix: Errorf format %s has arg flags of wrong type gvisor.dev/gvisor/pkg/sentry/fs.FileFlags", - "/pkg/sentry/fs/fdpipe/pipe_test.go": "fix: Errorf format %d arg f.FD is a func value, not called", - "/pkg/tcpip/link/fdbased/endpoint.go": "fix: Sprintf format %v with arg p causes recursive String method call", - "/pkg/tcpip/transport/udp/udp_test.go": "fix: Fatalf format %s has arg h.srcAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.FullAddress", - "/pkg/tcpip/transport/tcp/tcp_test.go": "fix: Fatalf format %s has arg tcpTW of wrong type gvisor.dev/gvisor/pkg/tcpip.TCPTimeWaitTimeoutOption", - "/pkg/tcpip/transport/tcp/tcp_test.go": "fix: Errorf call needs 1 arg but has 2 args", - "/pkg/tcpip/stack/ndp_test.go": "fix: Errorf format %s reads arg #3, but call has 2 args", - "/pkg/tcpip/stack/ndp_test.go": "fix: Fatalf format %s reads arg #5, but call has 4 args", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatalf format %s has arg protoAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.ProtocolAddress", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatalf format %s has arg nic1ProtoAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.ProtocolAddress", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatalf format %s has arg nic2ProtoAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.ProtocolAddress", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatal call has possible formatting directive %t", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatalf call has arguments but no formatting directives", - "/pkg/tcpip/link/fdbased/endpoint.go": "fix: Sprintf format %v with arg p causes recursive String method call", - "/pkg/sentry/fsimpl/tmpfs/stat_test.go": "fix: Errorf format %v reads arg #1, but call has 0 args", - "/runsc/container/test_app/test_app.go": "fix: Fatal call has possible formatting directive %q", - "/test/root/cgroup_test.go": "fix: Errorf format %s has arg gots of wrong type []int", - "/test/root/cgroup_test.go": "fix: Fatalf format %v reads arg #3, but call has 2 args", - "/test/runtimes/runner.go": "fix: Skip call has possible formatting directive %q", - "/test/runtimes/blacklist_test.go": "fix: Errorf format %q has arg blacklistFile of wrong type *string" - } - }, "structtag": { "exclude_files": { "/external/": "allowed: may use arbitrary tags" @@ -83,16 +31,9 @@ "/pkg/gohacks/gohacks_unsafe.go": "allowed: special case", "/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go": "allowed: special case", "/pkg/sentry/platform/kvm/(bluepill|machine)_unsafe.go": "allowed: special case", - "/pkg/sentry/platform/kvm/machine_arm64_unsafe.go": "fix: gvisor.dev/issue/22464", "/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go": "allowed: special case", "/pkg/sentry/platform/safecopy/safecopy_unsafe.go": "allowed: special case", "/pkg/sentry/vfs/mount_unsafe.go": "allowed: special case" } - }, - "unusedresult": { - "exclude_files": { - "/pkg/sentry/fsimpl/proc/task_net.go": "fix: result of fmt.Sprintf call not used", - "/pkg/sentry/fsimpl/proc/tasks_net.go": "fix: result of fmt.Sprintf call not used" - } } } |