From 24cfbf4b981a76e46cab47650ef514835990b72e Mon Sep 17 00:00:00 2001 From: Brad Burlage Date: Fri, 24 Jan 2020 11:44:31 -0800 Subject: Fix corpus_name to match our ingestion config[1]. PiperOrigin-RevId: 291412676 --- kokoro/kythe/generate_xrefs.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kokoro/kythe/generate_xrefs.sh b/kokoro/kythe/generate_xrefs.sh index 4c104afdb..7a0fbb3cd 100644 --- a/kokoro/kythe/generate_xrefs.sh +++ b/kokoro/kythe/generate_xrefs.sh @@ -16,8 +16,6 @@ set -ex -# Install the latest version of Bazel. The default on Kokoro images is out of -# date. if command -v use_bazel.sh >/dev/null; then use_bazel.sh latest fi @@ -45,7 +43,7 @@ bazel \ --bazelrc="${KYTHE_DIR}/extractors.bazelrc" \ build \ --override_repository kythe_release="${KYTHE_DIR}" \ - --define=kythe_corpus=gvisor.dev \ + --define=kythe_corpus=github.com/google/gvisor \ --cxxopt=-std=c++17 \ --config=remote \ --auth_credentials="${KOKORO_BAZEL_AUTH_CREDENTIAL}" \ -- cgit v1.2.3 From 390bb9c241c2b05c311579562d95cc39d899157b Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Fri, 24 Jan 2020 11:58:13 -0800 Subject: Ignore external SIGURG Go 1.14+ sends SIGURG to Ms to attempt asynchronous preemption of a G. Since it can't guarantee that a SIGURG is only related to preemption, it continues to forward them to signal.Notify (see runtime.sighandler). We should ignore these signals, as applications shouldn't receive them. Note that this means that truly external SIGURG can no longer be sent to the application (as with SIGCHLD). PiperOrigin-RevId: 291415357 --- pkg/sentry/kernel/signal.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/sentry/kernel/signal.go b/pkg/sentry/kernel/signal.go index 02eede93d..e8cce37d0 100644 --- a/pkg/sentry/kernel/signal.go +++ b/pkg/sentry/kernel/signal.go @@ -38,6 +38,9 @@ const SignalPanic = linux.SIGUSR2 // Preconditions: Kernel must have an init process. func (k *Kernel) sendExternalSignal(info *arch.SignalInfo, context string) { switch linux.Signal(info.Signo) { + case linux.SIGURG: + // Sent by the Go 1.14+ runtime for asynchronous goroutine preemption. + case platform.SignalInterrupt: // Assume that a call to platform.Context.Interrupt() misfired. -- cgit v1.2.3 From fb80979e3fe2614414d2d23c27e41bdb9e7c8541 Mon Sep 17 00:00:00 2001 From: Ghanan Gowripalan Date: Fri, 24 Jan 2020 12:29:13 -0800 Subject: Increase timeouts for NDP tests' async events Increase the timeout to 1s when waiting for async NDP events to help reduce flakiness. This will not significantly increase test times as the async events continue to receive an event on a channel. The increased timeout allows more time for an event to be sent on the channel as the previous timeout of 100ms caused some flakes. Test: Existing tests pass PiperOrigin-RevId: 291420936 --- pkg/tcpip/stack/ndp_test.go | 47 +++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go index 376681b30..f9460bd51 100644 --- a/pkg/tcpip/stack/ndp_test.go +++ b/pkg/tcpip/stack/ndp_test.go @@ -35,13 +35,14 @@ import ( ) const ( - addr1 = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01") - addr2 = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02") - addr3 = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03") - linkAddr1 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x06") - linkAddr2 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x07") - linkAddr3 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x08") - defaultTimeout = 100 * time.Millisecond + addr1 = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01") + addr2 = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02") + addr3 = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03") + linkAddr1 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x06") + linkAddr2 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x07") + linkAddr3 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x08") + defaultTimeout = 100 * time.Millisecond + defaultAsyncEventTimeout = time.Second ) var ( @@ -1086,7 +1087,7 @@ func TestRouterDiscovery(t *testing.T) { // Wait for the normal lifetime plus an extra bit for the // router to get invalidated. If we don't get an invalidation // event after this time, then something is wrong. - expectAsyncRouterInvalidationEvent(llAddr2, l2LifetimeSeconds*time.Second+defaultTimeout) + expectAsyncRouterInvalidationEvent(llAddr2, l2LifetimeSeconds*time.Second+defaultAsyncEventTimeout) // Rx an RA from lladdr2 with huge lifetime. e.InjectInbound(header.IPv6ProtocolNumber, raBuf(llAddr2, 1000)) @@ -1103,7 +1104,7 @@ func TestRouterDiscovery(t *testing.T) { // Wait for the normal lifetime plus an extra bit for the // router to get invalidated. If we don't get an invalidation // event after this time, then something is wrong. - expectAsyncRouterInvalidationEvent(llAddr3, l3LifetimeSeconds*time.Second+defaultTimeout) + expectAsyncRouterInvalidationEvent(llAddr3, l3LifetimeSeconds*time.Second+defaultAsyncEventTimeout) } // TestRouterDiscoveryMaxRouters tests that only @@ -1342,7 +1343,7 @@ func TestPrefixDiscovery(t *testing.T) { if diff := checkPrefixEvent(e, subnet2, false); diff != "" { t.Errorf("prefix event mismatch (-want +got):\n%s", diff) } - case <-time.After(time.Duration(lifetime)*time.Second + defaultTimeout): + case <-time.After(time.Duration(lifetime)*time.Second + defaultAsyncEventTimeout): t.Fatal("timed out waiting for prefix discovery event") } @@ -1681,7 +1682,7 @@ func TestAutoGenAddr(t *testing.T) { if diff := checkAutoGenAddrEvent(e, addr1, invalidatedAddr); diff != "" { t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) } - case <-time.After(newMinVLDuration + defaultTimeout): + case <-time.After(newMinVLDuration + defaultAsyncEventTimeout): t.Fatal("timed out waiting for addr auto gen event") } if contains(s.NICInfo()[1].ProtocolAddresses, addr1) { @@ -1987,7 +1988,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { expectPrimaryAddr(addr1) // Wait for addr of prefix1 to be deprecated. - expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultTimeout) + expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncEventTimeout) if !contains(s.NICInfo()[nicID].ProtocolAddresses, addr1) { t.Fatalf("should not have %s in the list of addresses", addr1) } @@ -2027,7 +2028,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { expectPrimaryAddr(addr1) // Wait for addr of prefix1 to be deprecated. - expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultTimeout) + expectAutoGenAddrEventAfter(addr1, deprecatedAddr, newMinVLDuration-time.Second+defaultAsyncEventTimeout) if !contains(s.NICInfo()[nicID].ProtocolAddresses, addr1) { t.Fatalf("should not have %s in the list of addresses", addr1) } @@ -2041,7 +2042,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { } // Wait for addr of prefix1 to be invalidated. - expectAutoGenAddrEventAfter(addr1, invalidatedAddr, time.Second+defaultTimeout) + expectAutoGenAddrEventAfter(addr1, invalidatedAddr, time.Second+defaultAsyncEventTimeout) if contains(s.NICInfo()[nicID].ProtocolAddresses, addr1) { t.Fatalf("should not have %s in the list of addresses", addr1) } @@ -2073,7 +2074,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { if diff := checkAutoGenAddrEvent(e, addr2, invalidatedAddr); diff != "" { t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) } - case <-time.After(defaultTimeout): + case <-time.After(defaultAsyncEventTimeout): t.Fatal("timed out waiting for addr auto gen event") } } else if diff := checkAutoGenAddrEvent(e, addr2, invalidatedAddr); diff == "" { @@ -2088,7 +2089,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { t.Fatalf("got unexpected auto-generated event") } - case <-time.After(newMinVLDuration + defaultTimeout): + case <-time.After(newMinVLDuration + defaultAsyncEventTimeout): t.Fatal("timed out waiting for addr auto gen event") } if contains(s.NICInfo()[nicID].ProtocolAddresses, addr1) { @@ -2213,7 +2214,7 @@ func TestAutoGenAddrFiniteToInfiniteToFiniteVL(t *testing.T) { t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) } - case <-time.After(minVLSeconds*time.Second + defaultTimeout): + case <-time.After(minVLSeconds*time.Second + defaultAsyncEventTimeout): t.Fatal("timeout waiting for addr auto gen event") } }) @@ -2701,7 +2702,7 @@ func TestAutoGenAddrWithOpaqueIID(t *testing.T) { if diff := checkAutoGenAddrEvent(e, addr1, invalidatedAddr); diff != "" { t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff) } - case <-time.After(validLifetimeSecondPrefix1*time.Second + defaultTimeout): + case <-time.After(validLifetimeSecondPrefix1*time.Second + defaultAsyncEventTimeout): t.Fatal("timed out waiting for addr auto gen event") } if contains(s.NICInfo()[nicID].ProtocolAddresses, addr1) { @@ -3325,12 +3326,12 @@ func TestRouterSolicitation(t *testing.T) { // times. remaining := test.maxRtrSolicit if remaining > 0 { - waitForPkt(test.effectiveMaxRtrSolicitDelay + defaultTimeout) + waitForPkt(test.effectiveMaxRtrSolicitDelay + defaultAsyncEventTimeout) remaining-- } for ; remaining > 0; remaining-- { waitForNothing(test.effectiveRtrSolicitInt - defaultTimeout) - waitForPkt(2 * defaultTimeout) + waitForPkt(defaultAsyncEventTimeout) } // Make sure no more RS. @@ -3411,9 +3412,9 @@ func TestStopStartSolicitingRouters(t *testing.T) { // Disable forwarding which should start router solicitations. s.SetForwarding(false) - waitForPkt(delay + defaultTimeout) - waitForPkt(interval + defaultTimeout) - waitForPkt(interval + defaultTimeout) + waitForPkt(delay + defaultAsyncEventTimeout) + waitForPkt(interval + defaultAsyncEventTimeout) + waitForPkt(interval + defaultAsyncEventTimeout) select { case <-e.C: t.Fatal("unexpectedly got an extra packet after sending out the expected RSs") -- cgit v1.2.3 From d135b5abf6eafa92d2745dc98d48ef39d2f90e75 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Fri, 24 Jan 2020 12:53:29 -0800 Subject: Add anonymous device number allocation to VFS2. Note that in VFS2, filesystem device numbers are per-vfs.FilesystemImpl rather than global, avoiding the need for a "registry" type to handle save/restore. (This is more consistent with Linux anyway: compare e.g. mm/shmem.c:shmem_mount() => fs/super.c:mount_nodev() => (indirectly) set_anon_super().) PiperOrigin-RevId: 291425193 --- pkg/sentry/vfs/device.go | 29 +++++++++++++++++++++++++++++ pkg/sentry/vfs/vfs.go | 18 ++++++++++++++---- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/pkg/sentry/vfs/device.go b/pkg/sentry/vfs/device.go index cb672e36f..9f9d6e783 100644 --- a/pkg/sentry/vfs/device.go +++ b/pkg/sentry/vfs/device.go @@ -98,3 +98,32 @@ func (vfs *VirtualFilesystem) OpenDeviceSpecialFile(ctx context.Context, mnt *Mo } return rd.dev.Open(ctx, mnt, d, *opts) } + +// GetAnonBlockDevMinor allocates and returns an unused minor device number for +// an "anonymous" block device with major number 0. +func (vfs *VirtualFilesystem) GetAnonBlockDevMinor() (uint32, error) { + vfs.anonBlockDevMinorMu.Lock() + defer vfs.anonBlockDevMinorMu.Unlock() + minor := vfs.anonBlockDevMinorNext + const maxDevMinor = (1 << 20) - 1 + for minor < maxDevMinor { + if _, ok := vfs.anonBlockDevMinor[minor]; !ok { + vfs.anonBlockDevMinor[minor] = struct{}{} + vfs.anonBlockDevMinorNext = minor + 1 + return minor, nil + } + minor++ + } + return 0, syserror.EMFILE +} + +// PutAnonBlockDevMinor deallocates a minor device number returned by a +// previous call to GetAnonBlockDevMinor. +func (vfs *VirtualFilesystem) PutAnonBlockDevMinor(minor uint32) { + vfs.anonBlockDevMinorMu.Lock() + defer vfs.anonBlockDevMinorMu.Unlock() + delete(vfs.anonBlockDevMinor, minor) + if minor < vfs.anonBlockDevMinorNext { + vfs.anonBlockDevMinorNext = minor + } +} diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 1f21b0b31..1f6f56293 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -80,6 +80,14 @@ type VirtualFilesystem struct { devicesMu sync.RWMutex devices map[devTuple]*registeredDevice + // anonBlockDevMinor contains all allocated anonymous block device minor + // numbers. anonBlockDevMinorNext is a lower bound for the smallest + // unallocated anonymous block device number. anonBlockDevMinorNext and + // anonBlockDevMinor are protected by anonBlockDevMinorMu. + anonBlockDevMinorMu sync.Mutex + anonBlockDevMinorNext uint32 + anonBlockDevMinor map[uint32]struct{} + // fsTypes contains all registered FilesystemTypes. fsTypes is protected by // fsTypesMu. fsTypesMu sync.RWMutex @@ -94,10 +102,12 @@ type VirtualFilesystem struct { // New returns a new VirtualFilesystem with no mounts or FilesystemTypes. func New() *VirtualFilesystem { vfs := &VirtualFilesystem{ - mountpoints: make(map[*Dentry]map[*Mount]struct{}), - devices: make(map[devTuple]*registeredDevice), - fsTypes: make(map[string]*registeredFilesystemType), - filesystems: make(map[*Filesystem]struct{}), + mountpoints: make(map[*Dentry]map[*Mount]struct{}), + devices: make(map[devTuple]*registeredDevice), + anonBlockDevMinorNext: 1, + anonBlockDevMinor: make(map[uint32]struct{}), + fsTypes: make(map[string]*registeredFilesystemType), + filesystems: make(map[*Filesystem]struct{}), } vfs.mounts.Init() return vfs -- cgit v1.2.3 From 878bda6e19a0d55525ea6b1600f3413e0c5d6a84 Mon Sep 17 00:00:00 2001 From: Ghanan Gowripalan Date: Fri, 24 Jan 2020 13:02:01 -0800 Subject: Lock the NIC when checking if an address is tentative PiperOrigin-RevId: 291426657 --- pkg/tcpip/stack/nic.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 79556a36f..7dad9a8cb 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -1208,6 +1208,9 @@ func (n *NIC) Stack() *Stack { // false. It will only return true if the address is associated with the NIC // AND it is tentative. func (n *NIC) isAddrTentative(addr tcpip.Address) bool { + n.mu.RLock() + defer n.mu.RUnlock() + ref, ok := n.mu.endpoints[NetworkEndpointID{addr}] if !ok { return false -- cgit v1.2.3 From 18a7e1309decb9bc09879e337adbc00f81d420c5 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Fri, 24 Jan 2020 17:06:30 -0800 Subject: Add support for device special files to VFS2 tmpfs. PiperOrigin-RevId: 291471892 --- pkg/sentry/fsimpl/tmpfs/BUILD | 1 + pkg/sentry/fsimpl/tmpfs/device_file.go | 39 ++++++++++++++++++++++++++++++ pkg/sentry/fsimpl/tmpfs/filesystem.go | 43 +++++++++++++++++++--------------- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 30 +++++++++++++++++++----- 4 files changed, 88 insertions(+), 25 deletions(-) create mode 100644 pkg/sentry/fsimpl/tmpfs/device_file.go diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index 7601c7c04..691476b4f 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -20,6 +20,7 @@ go_library( name = "tmpfs", srcs = [ "dentry_list.go", + "device_file.go", "directory.go", "filesystem.go", "named_pipe.go", diff --git a/pkg/sentry/fsimpl/tmpfs/device_file.go b/pkg/sentry/fsimpl/tmpfs/device_file.go new file mode 100644 index 000000000..84b181b90 --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/device_file.go @@ -0,0 +1,39 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tmpfs + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +type deviceFile struct { + inode inode + kind vfs.DeviceKind + major uint32 + minor uint32 +} + +func (fs *filesystem) newDeviceFile(creds *auth.Credentials, mode linux.FileMode, kind vfs.DeviceKind, major, minor uint32) *inode { + file := &deviceFile{ + kind: kind, + major: major, + minor: minor, + } + file.inode.init(file, fs, creds, mode) + file.inode.nlink = 1 // from parent directory + return &file.inode +} diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index a9f66a42a..d726f03c5 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -228,23 +228,26 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v // MknodAt implements vfs.FilesystemImpl.MknodAt. func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { + var childInode *inode switch opts.Mode.FileType() { case 0, linux.S_IFREG: - child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil + childInode = fs.newRegularFile(rp.Credentials(), opts.Mode) case linux.S_IFIFO: - child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil - case linux.S_IFBLK, linux.S_IFCHR, linux.S_IFSOCK: + childInode = fs.newNamedPipe(rp.Credentials(), opts.Mode) + case linux.S_IFBLK: + childInode = fs.newDeviceFile(rp.Credentials(), opts.Mode, vfs.BlockDevice, opts.DevMajor, opts.DevMinor) + case linux.S_IFCHR: + childInode = fs.newDeviceFile(rp.Credentials(), opts.Mode, vfs.CharDevice, opts.DevMajor, opts.DevMinor) + case linux.S_IFSOCK: // Not yet supported. return syserror.EPERM default: return syserror.EINVAL } + child := fs.newDentry(childInode) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil }) } @@ -264,7 +267,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf if err != nil { return nil, err } - return d.open(ctx, rp, opts.Flags, false /* afterCreate */) + return d.open(ctx, rp, &opts, false /* afterCreate */) } mustCreate := opts.Flags&linux.O_EXCL != 0 @@ -279,7 +282,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf if mustCreate { return nil, syserror.EEXIST } - return start.open(ctx, rp, opts.Flags, false /* afterCreate */) + return start.open(ctx, rp, &opts, false /* afterCreate */) } afterTrailingSymlink: parent, err := walkParentDirLocked(rp, start) @@ -313,7 +316,7 @@ afterTrailingSymlink: child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) parent.vfsd.InsertChild(&child.vfsd, name) parent.inode.impl.(*directory).childList.PushBack(child) - return child.open(ctx, rp, opts.Flags, true) + return child.open(ctx, rp, &opts, true) } if err != nil { return nil, err @@ -327,11 +330,11 @@ afterTrailingSymlink: if mustCreate { return nil, syserror.EEXIST } - return child.open(ctx, rp, opts.Flags, false) + return child.open(ctx, rp, &opts, false) } -func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, afterCreate bool) (*vfs.FileDescription, error) { - ats := vfs.AccessTypesForOpenFlags(flags) +func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions, afterCreate bool) (*vfs.FileDescription, error) { + ats := vfs.AccessTypesForOpenFlags(opts.Flags) if !afterCreate { if err := d.inode.checkPermissions(rp.Credentials(), ats, d.inode.isDir()); err != nil { return nil, err @@ -340,10 +343,10 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, switch impl := d.inode.impl.(type) { case *regularFile: var fd regularFileFD - if err := fd.vfsfd.Init(&fd, flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } - if flags&linux.O_TRUNC != 0 { + if opts.Flags&linux.O_TRUNC != 0 { impl.mu.Lock() impl.data.Truncate(0, impl.memFile) atomic.StoreUint64(&impl.size, 0) @@ -356,7 +359,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, return nil, syserror.EISDIR } var fd directoryFD - if err := fd.vfsfd.Init(&fd, flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } return &fd.vfsfd, nil @@ -364,7 +367,9 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, // Can't open symlinks without O_PATH (which is unimplemented). return nil, syserror.ELOOP case *namedPipe: - return newNamedPipeFD(ctx, impl, rp, &d.vfsd, flags) + return newNamedPipeFD(ctx, impl, rp, &d.vfsd, opts.Flags) + case *deviceFile: + return rp.VirtualFilesystem().OpenDeviceSpecialFile(ctx, rp.Mount(), &d.vfsd, impl.kind, impl.major, impl.minor, opts) default: panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl)) } diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 1d4889c89..515f033f2 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -149,6 +149,10 @@ type inode struct { ctime int64 // nanoseconds mtime int64 // nanoseconds + // Only meaningful for device special files. + rdevMajor uint32 + rdevMinor uint32 + impl interface{} // immutable } @@ -269,6 +273,15 @@ func (i *inode) statTo(stat *linux.Statx) { stat.Blocks = allocatedBlocksForSize(stat.Size) case *namedPipe: stat.Mode |= linux.S_IFIFO + case *deviceFile: + switch impl.kind { + case vfs.BlockDevice: + stat.Mode |= linux.S_IFBLK + case vfs.CharDevice: + stat.Mode |= linux.S_IFCHR + } + stat.RdevMajor = impl.major + stat.RdevMinor = impl.minor default: panic(fmt.Sprintf("unknown inode type: %T", i.impl)) } @@ -309,12 +322,8 @@ func (i *inode) setStat(stat linux.Statx) error { } case *directory: return syserror.EISDIR - case *symlink: - return syserror.EINVAL - case *namedPipe: - // Nothing. default: - panic(fmt.Sprintf("unknown inode type: %T", i.impl)) + return syserror.EINVAL } } if mask&linux.STATX_ATIME != 0 { @@ -353,13 +362,22 @@ func allocatedBlocksForSize(size uint64) uint64 { } func (i *inode) direntType() uint8 { - switch i.impl.(type) { + switch impl := i.impl.(type) { case *regularFile: return linux.DT_REG case *directory: return linux.DT_DIR case *symlink: return linux.DT_LNK + case *deviceFile: + switch impl.kind { + case vfs.BlockDevice: + return linux.DT_BLK + case vfs.CharDevice: + return linux.DT_CHR + default: + panic(fmt.Sprintf("unknown vfs.DeviceKind: %v", impl.kind)) + } default: panic(fmt.Sprintf("unknown inode type: %T", i.impl)) } -- cgit v1.2.3 From 68514d4ba3f7c06a89a8d0cd79327ede62dae65b Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Sun, 26 Jan 2020 18:32:52 -0800 Subject: Unroll checksum computation loop. Checksum computation is one of the most expensive bits of packet processing. Manual unrolling of the loop provides significant improvement in checksum speed. Updates #1656 BenchmarkChecksum/checksum_64-12 49834124 23.6 ns/op BenchmarkChecksum/checksum_128-12 27111997 44.1 ns/op BenchmarkChecksum/checksum_256-12 11416683 91.5 ns/op BenchmarkChecksum/checksum_512-12 6375298 174 ns/op BenchmarkChecksum/checksum_1024-12 3403852 338 ns/op BenchmarkChecksum/checksum_1500-12 2343576 493 ns/op BenchmarkChecksum/checksum_2048-12 1730521 656 ns/op BenchmarkChecksum/checksum_4096-12 920469 1327 ns/op BenchmarkChecksum/checksum_8192-12 445885 2637 ns/op BenchmarkChecksum/checksum_16384-12 226342 5268 ns/op BenchmarkChecksum/checksum_32767-12 114210 10503 ns/op BenchmarkChecksum/checksum_32768-12 99138 10610 ns/op BenchmarkChecksum/checksum_65535-12 53438 21158 ns/op BenchmarkChecksum/checksum_65536-12 52993 21067 ns/op BenchmarkUnrolledChecksum/checksum_64-12 61035639 19.1 ns/op BenchmarkUnrolledChecksum/checksum_128-12 36067015 33.6 ns/op BenchmarkUnrolledChecksum/checksum_256-12 19731220 60.4 ns/op BenchmarkUnrolledChecksum/checksum_512-12 9091291 116 ns/op BenchmarkUnrolledChecksum/checksum_1024-12 4976406 226 ns/op BenchmarkUnrolledChecksum/checksum_1500-12 3685224 328 ns/op BenchmarkUnrolledChecksum/checksum_2048-12 2579108 447 ns/op BenchmarkUnrolledChecksum/checksum_4096-12 1350475 887 ns/op BenchmarkUnrolledChecksum/checksum_8192-12 658248 1780 ns/op BenchmarkUnrolledChecksum/checksum_16384-12 335869 3534 ns/op BenchmarkUnrolledChecksum/checksum_32767-12 168650 7095 ns/op BenchmarkUnrolledChecksum/checksum_32768-12 168075 7098 ns/op BenchmarkUnrolledChecksum/checksum_65535-12 75085 14277 ns/op BenchmarkUnrolledChecksum/checksum_65536-12 75921 14127 ns/op PiperOrigin-RevId: 291643290 --- pkg/tcpip/header/checksum.go | 124 ++++++++++++++++++++++++++++++++++++++ pkg/tcpip/header/checksum_test.go | 62 +++++++++++++++++++ 2 files changed, 186 insertions(+) diff --git a/pkg/tcpip/header/checksum.go b/pkg/tcpip/header/checksum.go index 9749c7f4d..ce57b581a 100644 --- a/pkg/tcpip/header/checksum.go +++ b/pkg/tcpip/header/checksum.go @@ -45,6 +45,121 @@ func calculateChecksum(buf []byte, odd bool, initial uint32) (uint16, bool) { return ChecksumCombine(uint16(v), uint16(v>>16)), odd } +func unrolledCalculateChecksum(buf []byte, odd bool, initial uint32) (uint16, bool) { + v := initial + + if odd { + v += uint32(buf[0]) + buf = buf[1:] + } + + l := len(buf) + odd = l&1 != 0 + if odd { + l-- + v += uint32(buf[l]) << 8 + } + for (l - 64) >= 0 { + i := 0 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + i += 16 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + i += 16 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + i += 16 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + buf = buf[64:] + l = l - 64 + } + if (l - 32) >= 0 { + i := 0 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + i += 16 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + buf = buf[32:] + l = l - 32 + } + if (l - 16) >= 0 { + i := 0 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + buf = buf[16:] + l = l - 16 + } + if (l - 8) >= 0 { + i := 0 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + buf = buf[8:] + l = l - 8 + } + if (l - 4) >= 0 { + i := 0 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + buf = buf[4:] + l = l - 4 + } + + // At this point since l was even before we started unrolling + // there can be only two bytes left to add. + if l != 0 { + v += (uint32(buf[0]) << 8) + uint32(buf[1]) + } + + return ChecksumCombine(uint16(v), uint16(v>>16)), odd +} + // Checksum calculates the checksum (as defined in RFC 1071) of the bytes in the // given byte array. // @@ -54,6 +169,15 @@ func Checksum(buf []byte, initial uint16) uint16 { return s } +// UnrolledChecksum calculates the checksum (as defined in RFC 1071) of the +// bytes in the given byte array. +// +// The initial checksum must have been computed on an even number of bytes. +func UnrolledChecksum(buf []byte, initial uint16) uint16 { + s, _ := unrolledCalculateChecksum(buf, false, uint32(initial)) + return s +} + // ChecksumVV calculates the checksum (as defined in RFC 1071) of the bytes in // the given VectorizedView. // diff --git a/pkg/tcpip/header/checksum_test.go b/pkg/tcpip/header/checksum_test.go index 86b466c1c..2fbd16a65 100644 --- a/pkg/tcpip/header/checksum_test.go +++ b/pkg/tcpip/header/checksum_test.go @@ -17,6 +17,8 @@ package header_test import ( + "fmt" + "math/rand" "testing" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -107,3 +109,63 @@ func TestChecksumVVWithOffset(t *testing.T) { }) } } + +func TestChecksum(t *testing.T) { + var bufSizes = []int{0, 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128, 255, 256, 257, 1023, 1024} + type testCase struct { + buf []byte + initial uint16 + csumOrig uint16 + csumNew uint16 + } + testCases := make([]testCase, 100000) + // Ensure same buffer generation for test consistency. + rnd := rand.New(rand.NewSource(42)) + for i := range testCases { + testCases[i].buf = make([]byte, bufSizes[i%len(bufSizes)]) + testCases[i].initial = uint16(rnd.Intn(65536)) + rnd.Read(testCases[i].buf) + } + + for i := range testCases { + testCases[i].csumOrig = header.Checksum(testCases[i].buf, testCases[i].initial) + testCases[i].csumNew = header.UnrolledChecksum(testCases[i].buf, testCases[i].initial) + if got, want := testCases[i].csumNew, testCases[i].csumOrig; got != want { + t.Fatalf("new checksum for (buf = %x, initial = %d) does not match old got: %d, want: %d", testCases[i].buf, testCases[i].initial, got, want) + } + } +} + +func BenchmarkChecksum(b *testing.B) { + var bufSizes = []int{64, 128, 256, 512, 1024, 1500, 2048, 4096, 8192, 16384, 32767, 32768, 65535, 65536} + + checkSumImpls := []struct { + fn func([]byte, uint16) uint16 + name string + }{ + {header.Checksum, fmt.Sprintf("checksum")}, + {header.UnrolledChecksum, fmt.Sprintf("unrolled_checksum")}, + } + + for _, csumImpl := range checkSumImpls { + // Ensure same buffer generation for test consistency. + rnd := rand.New(rand.NewSource(42)) + for _, bufSz := range bufSizes { + b.Run(fmt.Sprintf("%s_%d", csumImpl.name, bufSz), func(b *testing.B) { + tc := struct { + buf []byte + initial uint16 + csum uint16 + }{ + buf: make([]byte, bufSz), + initial: uint16(rnd.Intn(65536)), + } + rnd.Read(tc.buf) + b.ResetTimer() + for i := 0; i < b.N; i++ { + tc.csum = csumImpl.fn(tc.buf, tc.initial) + } + }) + } + } +} -- cgit v1.2.3 From 6b43cf791a74a746443f70f98d859c1246f87e2a Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Mon, 27 Jan 2020 05:33:03 -0800 Subject: Replace calculateChecksum w/ the unrolled version. Fixes #1656 PiperOrigin-RevId: 291703760 --- pkg/tcpip/header/checksum.go | 15 +++++++++------ pkg/tcpip/header/checksum_test.go | 6 +++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pkg/tcpip/header/checksum.go b/pkg/tcpip/header/checksum.go index ce57b581a..204285576 100644 --- a/pkg/tcpip/header/checksum.go +++ b/pkg/tcpip/header/checksum.go @@ -160,20 +160,23 @@ func unrolledCalculateChecksum(buf []byte, odd bool, initial uint32) (uint16, bo return ChecksumCombine(uint16(v), uint16(v>>16)), odd } -// Checksum calculates the checksum (as defined in RFC 1071) of the bytes in the -// given byte array. +// ChecksumOld calculates the checksum (as defined in RFC 1071) of the bytes in +// the given byte array. This function uses a non-optimized implementation. Its +// only retained for reference and to use as a benchmark/test. Most code should +// use the header.Checksum function. // // The initial checksum must have been computed on an even number of bytes. -func Checksum(buf []byte, initial uint16) uint16 { +func ChecksumOld(buf []byte, initial uint16) uint16 { s, _ := calculateChecksum(buf, false, uint32(initial)) return s } -// UnrolledChecksum calculates the checksum (as defined in RFC 1071) of the -// bytes in the given byte array. +// Checksum calculates the checksum (as defined in RFC 1071) of the bytes in the +// given byte array. This function uses an optimized unrolled version of the +// checksum algorithm. // // The initial checksum must have been computed on an even number of bytes. -func UnrolledChecksum(buf []byte, initial uint16) uint16 { +func Checksum(buf []byte, initial uint16) uint16 { s, _ := unrolledCalculateChecksum(buf, false, uint32(initial)) return s } diff --git a/pkg/tcpip/header/checksum_test.go b/pkg/tcpip/header/checksum_test.go index 2fbd16a65..309403482 100644 --- a/pkg/tcpip/header/checksum_test.go +++ b/pkg/tcpip/header/checksum_test.go @@ -128,8 +128,8 @@ func TestChecksum(t *testing.T) { } for i := range testCases { - testCases[i].csumOrig = header.Checksum(testCases[i].buf, testCases[i].initial) - testCases[i].csumNew = header.UnrolledChecksum(testCases[i].buf, testCases[i].initial) + testCases[i].csumOrig = header.ChecksumOld(testCases[i].buf, testCases[i].initial) + testCases[i].csumNew = header.Checksum(testCases[i].buf, testCases[i].initial) if got, want := testCases[i].csumNew, testCases[i].csumOrig; got != want { t.Fatalf("new checksum for (buf = %x, initial = %d) does not match old got: %d, want: %d", testCases[i].buf, testCases[i].initial, got, want) } @@ -143,8 +143,8 @@ func BenchmarkChecksum(b *testing.B) { fn func([]byte, uint16) uint16 name string }{ + {header.ChecksumOld, fmt.Sprintf("checksum_old")}, {header.Checksum, fmt.Sprintf("checksum")}, - {header.UnrolledChecksum, fmt.Sprintf("unrolled_checksum")}, } for _, csumImpl := range checkSumImpls { -- cgit v1.2.3