diff options
author | Kevin Krakauer <krakauer@google.com> | 2020-02-05 14:43:11 -0800 |
---|---|---|
committer | Kevin Krakauer <krakauer@google.com> | 2020-02-05 14:43:11 -0800 |
commit | bf0ea204e9415a181c63ee10078cca753df14f7e (patch) | |
tree | f4d149ac2bf2ea900f8b2a0cdea0a7a2ea7e6d0e | |
parent | 29ad5762e4549d961f48c65292cfdeb7256524f6 (diff) | |
parent | f2d3efca1deded31a2929ea77c0eecf476764660 (diff) |
Merge branch 'master' into tcp-matchers-submit
137 files changed, 7622 insertions, 1118 deletions
diff --git a/kokoro/kythe/generate_xrefs.sh b/kokoro/kythe/generate_xrefs.sh index 7a0fbb3cd..323b0f77b 100644 --- a/kokoro/kythe/generate_xrefs.sh +++ b/kokoro/kythe/generate_xrefs.sh @@ -23,7 +23,7 @@ bazel version python3 -V -readonly KYTHE_VERSION='v0.0.39' +readonly KYTHE_VERSION='v0.0.41' readonly WORKDIR="$(mktemp -d)" readonly KYTHE_DIR="${WORKDIR}/kythe-${KYTHE_VERSION}" if [[ -n "$KOKORO_GIT_COMMIT" ]]; then diff --git a/kokoro/packetdrill_tests.cfg b/kokoro/packetdrill_tests.cfg new file mode 100644 index 000000000..258d7deb4 --- /dev/null +++ b/kokoro/packetdrill_tests.cfg @@ -0,0 +1,9 @@ +build_file: "repo/scripts/packetdrill_tests.sh" + +action { + define_artifacts { + regex: "**/sponge_log.xml" + regex: "**/sponge_log.log" + regex: "**/outputs.zip" + } +} diff --git a/kokoro/runtime_tests/go1.12.cfg b/kokoro/runtime_tests/go1.12.cfg new file mode 100644 index 000000000..024740ab2 --- /dev/null +++ b/kokoro/runtime_tests/go1.12.cfg @@ -0,0 +1,6 @@ +build_file: "github/kokoro/runtime_tests/runtime_tests.sh" + +env_vars { + key: "RUNTIME_TEST_NAME" + value: "go1.12" +}
\ No newline at end of file diff --git a/kokoro/runtime_tests/java11.cfg b/kokoro/runtime_tests/java11.cfg new file mode 100644 index 000000000..f01d26153 --- /dev/null +++ b/kokoro/runtime_tests/java11.cfg @@ -0,0 +1,6 @@ +build_file: "github/kokoro/runtime_tests/runtime_tests.sh" + +env_vars { + key: "RUNTIME_TEST_NAME" + value: "java11" +}
\ No newline at end of file diff --git a/kokoro/runtime_tests/nodejs12.4.0.cfg b/kokoro/runtime_tests/nodejs12.4.0.cfg new file mode 100644 index 000000000..d4861fb07 --- /dev/null +++ b/kokoro/runtime_tests/nodejs12.4.0.cfg @@ -0,0 +1,6 @@ +build_file: "github/kokoro/runtime_tests/runtime_tests.sh" + +env_vars { + key: "RUNTIME_TEST_NAME" + value: "nodejs12.4.0" +}
\ No newline at end of file diff --git a/kokoro/runtime_tests/php7.3.6.cfg b/kokoro/runtime_tests/php7.3.6.cfg new file mode 100644 index 000000000..b737ed9cb --- /dev/null +++ b/kokoro/runtime_tests/php7.3.6.cfg @@ -0,0 +1,6 @@ +build_file: "github/kokoro/runtime_tests/runtime_tests.sh" + +env_vars { + key: "RUNTIME_TEST_NAME" + value: "php7.3.6" +}
\ No newline at end of file diff --git a/kokoro/runtime_tests/python3.7.3.cfg b/kokoro/runtime_tests/python3.7.3.cfg new file mode 100644 index 000000000..971fcba05 --- /dev/null +++ b/kokoro/runtime_tests/python3.7.3.cfg @@ -0,0 +1,6 @@ +build_file: "github/kokoro/runtime_tests/runtime_tests.sh" + +env_vars { + key: "RUNTIME_TEST_NAME" + value: "python3.7.3" +}
\ No newline at end of file diff --git a/scripts/runtime_tests.sh b/kokoro/runtime_tests/runtime_tests.sh index 9ee991e42..9ee991e42 100755 --- a/scripts/runtime_tests.sh +++ b/kokoro/runtime_tests/runtime_tests.sh diff --git a/pkg/safecopy/safecopy_test.go b/pkg/safecopy/safecopy_test.go index 5818f7f9b..7f7f69d61 100644 --- a/pkg/safecopy/safecopy_test.go +++ b/pkg/safecopy/safecopy_test.go @@ -138,10 +138,14 @@ func TestSwapUint32Success(t *testing.T) { func TestSwapUint32AlignmentError(t *testing.T) { // Test that SwapUint32 returns an AlignmentError when passed an unaligned // address. - data := new(struct{ val uint64 }) - addr := uintptr(unsafe.Pointer(&data.val)) + 1 - want := AlignmentError{Addr: addr, Alignment: 4} - if _, err := SwapUint32(unsafe.Pointer(addr), 1); err != want { + data := make([]byte, 8) // 2 * sizeof(uint32). + alignedIndex := uintptr(0) + if offset := uintptr(unsafe.Pointer(&data[0])) % 4; offset != 0 { + alignedIndex = 4 - offset + } + ptr := unsafe.Pointer(&data[alignedIndex+1]) + want := AlignmentError{Addr: uintptr(ptr), Alignment: 4} + if _, err := SwapUint32(ptr, 1); err != want { t.Errorf("Unexpected error: got %v, want %v", err, want) } } @@ -171,10 +175,14 @@ func TestSwapUint64Success(t *testing.T) { func TestSwapUint64AlignmentError(t *testing.T) { // Test that SwapUint64 returns an AlignmentError when passed an unaligned // address. - data := new(struct{ val1, val2 uint64 }) - addr := uintptr(unsafe.Pointer(&data.val1)) + 1 - want := AlignmentError{Addr: addr, Alignment: 8} - if _, err := SwapUint64(unsafe.Pointer(addr), 1); err != want { + data := make([]byte, 16) // 2 * sizeof(uint64). + alignedIndex := uintptr(0) + if offset := uintptr(unsafe.Pointer(&data[0])) % 8; offset != 0 { + alignedIndex = 8 - offset + } + ptr := unsafe.Pointer(&data[alignedIndex+1]) + want := AlignmentError{Addr: uintptr(ptr), Alignment: 8} + if _, err := SwapUint64(ptr, 1); err != want { t.Errorf("Unexpected error: got %v, want %v", err, want) } } @@ -201,10 +209,14 @@ func TestCompareAndSwapUint32Success(t *testing.T) { func TestCompareAndSwapUint32AlignmentError(t *testing.T) { // Test that CompareAndSwapUint32 returns an AlignmentError when passed an // unaligned address. - data := new(struct{ val uint64 }) - addr := uintptr(unsafe.Pointer(&data.val)) + 1 - want := AlignmentError{Addr: addr, Alignment: 4} - if _, err := CompareAndSwapUint32(unsafe.Pointer(addr), 0, 1); err != want { + data := make([]byte, 8) // 2 * sizeof(uint32). + alignedIndex := uintptr(0) + if offset := uintptr(unsafe.Pointer(&data[0])) % 4; offset != 0 { + alignedIndex = 4 - offset + } + ptr := unsafe.Pointer(&data[alignedIndex+1]) + want := AlignmentError{Addr: uintptr(ptr), Alignment: 4} + if _, err := CompareAndSwapUint32(ptr, 0, 1); err != want { t.Errorf("Unexpected error: got %v, want %v", err, want) } } @@ -252,8 +264,8 @@ func TestCopyInSegvError(t *testing.T) { for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) + src := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault]) dst := randBuf(pageSize) n, err := CopyIn(dst, src) if n != bytesBeforeFault { @@ -276,8 +288,8 @@ func TestCopyInBusError(t *testing.T) { for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) + src := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault]) dst := randBuf(pageSize) n, err := CopyIn(dst, src) if n != bytesBeforeFault { @@ -300,8 +312,8 @@ func TestCopyOutSegvError(t *testing.T) { for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) + dst := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault]) src := randBuf(pageSize) n, err := CopyOut(dst, src) if n != bytesBeforeFault { @@ -324,8 +336,8 @@ func TestCopyOutBusError(t *testing.T) { for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) + dst := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault]) src := randBuf(pageSize) n, err := CopyOut(dst, src) if n != bytesBeforeFault { @@ -348,8 +360,8 @@ func TestCopySourceSegvError(t *testing.T) { for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) + src := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault]) dst := randBuf(pageSize) n, err := Copy(unsafe.Pointer(&dst[0]), src, pageSize) if n != uintptr(bytesBeforeFault) { @@ -372,8 +384,8 @@ func TestCopySourceBusError(t *testing.T) { for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) + src := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault]) dst := randBuf(pageSize) n, err := Copy(unsafe.Pointer(&dst[0]), src, pageSize) if n != uintptr(bytesBeforeFault) { @@ -396,8 +408,8 @@ func TestCopyDestinationSegvError(t *testing.T) { for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) + dst := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault]) src := randBuf(pageSize) n, err := Copy(dst, unsafe.Pointer(&src[0]), pageSize) if n != uintptr(bytesBeforeFault) { @@ -420,8 +432,8 @@ func TestCopyDestinationBusError(t *testing.T) { for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) + dst := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault]) src := randBuf(pageSize) n, err := Copy(dst, unsafe.Pointer(&src[0]), pageSize) if n != uintptr(bytesBeforeFault) { @@ -444,8 +456,8 @@ func TestZeroOutSegvError(t *testing.T) { for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { t.Run(fmt.Sprintf("starting write %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) + dst := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault]) n, err := ZeroOut(dst, pageSize) if n != uintptr(bytesBeforeFault) { t.Errorf("Unexpected write length: got %v, want %v", n, bytesBeforeFault) @@ -467,8 +479,8 @@ func TestZeroOutBusError(t *testing.T) { for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { t.Run(fmt.Sprintf("starting write %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) + dst := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault]) n, err := ZeroOut(dst, pageSize) if n != uintptr(bytesBeforeFault) { t.Errorf("Unexpected write length: got %v, want %v", n, bytesBeforeFault) @@ -488,7 +500,7 @@ func TestSwapUint32SegvError(t *testing.T) { // Test that SwapUint32 returns a SegvError when reaching a page that // signals SIGSEGV. withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) _, err := SwapUint32(unsafe.Pointer(secondPage), 1) if want := (SegvError{secondPage}); err != want { t.Errorf("Unexpected error: got %v, want %v", err, want) @@ -500,7 +512,7 @@ func TestSwapUint32BusError(t *testing.T) { // Test that SwapUint32 returns a BusError when reaching a page that // signals SIGBUS. withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) _, err := SwapUint32(unsafe.Pointer(secondPage), 1) if want := (BusError{secondPage}); err != want { t.Errorf("Unexpected error: got %v, want %v", err, want) @@ -512,7 +524,7 @@ func TestSwapUint64SegvError(t *testing.T) { // Test that SwapUint64 returns a SegvError when reaching a page that // signals SIGSEGV. withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) _, err := SwapUint64(unsafe.Pointer(secondPage), 1) if want := (SegvError{secondPage}); err != want { t.Errorf("Unexpected error: got %v, want %v", err, want) @@ -524,7 +536,7 @@ func TestSwapUint64BusError(t *testing.T) { // Test that SwapUint64 returns a BusError when reaching a page that // signals SIGBUS. withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) _, err := SwapUint64(unsafe.Pointer(secondPage), 1) if want := (BusError{secondPage}); err != want { t.Errorf("Unexpected error: got %v, want %v", err, want) @@ -536,7 +548,7 @@ func TestCompareAndSwapUint32SegvError(t *testing.T) { // Test that CompareAndSwapUint32 returns a SegvError when reaching a page // that signals SIGSEGV. withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) _, err := CompareAndSwapUint32(unsafe.Pointer(secondPage), 0, 1) if want := (SegvError{secondPage}); err != want { t.Errorf("Unexpected error: got %v, want %v", err, want) @@ -548,7 +560,7 @@ func TestCompareAndSwapUint32BusError(t *testing.T) { // Test that CompareAndSwapUint32 returns a BusError when reaching a page // that signals SIGBUS. withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + secondPage := uintptr(unsafe.Pointer(&mapping[pageSize])) _, err := CompareAndSwapUint32(unsafe.Pointer(secondPage), 0, 1) if want := (BusError{secondPage}); err != want { t.Errorf("Unexpected error: got %v, want %v", err, want) diff --git a/pkg/safecopy/safecopy_unsafe.go b/pkg/safecopy/safecopy_unsafe.go index eef028e68..41dd567f3 100644 --- a/pkg/safecopy/safecopy_unsafe.go +++ b/pkg/safecopy/safecopy_unsafe.go @@ -16,6 +16,7 @@ package safecopy import ( "fmt" + "runtime" "syscall" "unsafe" ) @@ -35,7 +36,7 @@ const maxRegisterSize = 16 // successfully copied. // //go:noescape -func memcpy(dst, src unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) +func memcpy(dst, src uintptr, n uintptr) (fault uintptr, sig int32) // memclr sets the n bytes following ptr to zeroes. If a SIGSEGV or SIGBUS // signal is received during the write, it returns the address that caused the @@ -47,7 +48,7 @@ func memcpy(dst, src unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32 // successfully written. // //go:noescape -func memclr(ptr unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) +func memclr(ptr uintptr, n uintptr) (fault uintptr, sig int32) // swapUint32 atomically stores new into *ptr and returns (the previous *ptr // value, 0). If a SIGSEGV or SIGBUS signal is received during the swap, the @@ -90,29 +91,35 @@ func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32) // CopyIn copies len(dst) bytes from src to dst. It returns the number of bytes // copied and an error if SIGSEGV or SIGBUS is received while reading from src. func CopyIn(dst []byte, src unsafe.Pointer) (int, error) { + n, err := copyIn(dst, uintptr(src)) + runtime.KeepAlive(src) + return n, err +} + +// copyIn is the underlying definition for CopyIn. +func copyIn(dst []byte, src uintptr) (int, error) { toCopy := uintptr(len(dst)) if len(dst) == 0 { return 0, nil } - fault, sig := memcpy(unsafe.Pointer(&dst[0]), src, toCopy) + fault, sig := memcpy(uintptr(unsafe.Pointer(&dst[0])), src, toCopy) if sig == 0 { return len(dst), nil } - faultN, srcN := uintptr(fault), uintptr(src) - if faultN < srcN || faultN >= srcN+toCopy { - panic(fmt.Sprintf("CopyIn raised signal %d at %#x, which is outside source [%#x, %#x)", sig, faultN, srcN, srcN+toCopy)) + if fault < src || fault >= src+toCopy { + panic(fmt.Sprintf("CopyIn raised signal %d at %#x, which is outside source [%#x, %#x)", sig, fault, src, src+toCopy)) } // memcpy might have ended the copy up to maxRegisterSize bytes before // fault, if an instruction caused a memory access that straddled two // pages, and the second one faulted. Try to copy up to the fault. var done int - if faultN-srcN > maxRegisterSize { - done = int(faultN - srcN - maxRegisterSize) + if fault-src > maxRegisterSize { + done = int(fault - src - maxRegisterSize) } - n, err := CopyIn(dst[done:int(faultN-srcN)], unsafe.Pointer(srcN+uintptr(done))) + n, err := copyIn(dst[done:int(fault-src)], src+uintptr(done)) done += n if err != nil { return done, err @@ -124,29 +131,35 @@ func CopyIn(dst []byte, src unsafe.Pointer) (int, error) { // bytes done and an error if SIGSEGV or SIGBUS is received while writing to // dst. func CopyOut(dst unsafe.Pointer, src []byte) (int, error) { + n, err := copyOut(uintptr(dst), src) + runtime.KeepAlive(dst) + return n, err +} + +// copyOut is the underlying definition for CopyOut. +func copyOut(dst uintptr, src []byte) (int, error) { toCopy := uintptr(len(src)) if toCopy == 0 { return 0, nil } - fault, sig := memcpy(dst, unsafe.Pointer(&src[0]), toCopy) + fault, sig := memcpy(dst, uintptr(unsafe.Pointer(&src[0])), toCopy) if sig == 0 { return len(src), nil } - faultN, dstN := uintptr(fault), uintptr(dst) - if faultN < dstN || faultN >= dstN+toCopy { - panic(fmt.Sprintf("CopyOut raised signal %d at %#x, which is outside destination [%#x, %#x)", sig, faultN, dstN, dstN+toCopy)) + if fault < dst || fault >= dst+toCopy { + panic(fmt.Sprintf("CopyOut raised signal %d at %#x, which is outside destination [%#x, %#x)", sig, fault, dst, dst+toCopy)) } // memcpy might have ended the copy up to maxRegisterSize bytes before // fault, if an instruction caused a memory access that straddled two // pages, and the second one faulted. Try to copy up to the fault. var done int - if faultN-dstN > maxRegisterSize { - done = int(faultN - dstN - maxRegisterSize) + if fault-dst > maxRegisterSize { + done = int(fault - dst - maxRegisterSize) } - n, err := CopyOut(unsafe.Pointer(dstN+uintptr(done)), src[done:int(faultN-dstN)]) + n, err := copyOut(dst+uintptr(done), src[done:int(fault-dst)]) done += n if err != nil { return done, err @@ -161,6 +174,14 @@ func CopyOut(dst unsafe.Pointer, src []byte) (int, error) { // Data is copied in order; if [src, src+toCopy) and [dst, dst+toCopy) overlap, // the resulting contents of dst are unspecified. func Copy(dst, src unsafe.Pointer, toCopy uintptr) (uintptr, error) { + n, err := copyN(uintptr(dst), uintptr(src), toCopy) + runtime.KeepAlive(dst) + runtime.KeepAlive(src) + return n, err +} + +// copyN is the underlying definition for Copy. +func copyN(dst, src uintptr, toCopy uintptr) (uintptr, error) { if toCopy == 0 { return 0, nil } @@ -171,17 +192,16 @@ func Copy(dst, src unsafe.Pointer, toCopy uintptr) (uintptr, error) { } // Did the fault occur while reading from src or writing to dst? - faultN, srcN, dstN := uintptr(fault), uintptr(src), uintptr(dst) faultAfterSrc := ^uintptr(0) - if faultN >= srcN { - faultAfterSrc = faultN - srcN + if fault >= src { + faultAfterSrc = fault - src } faultAfterDst := ^uintptr(0) - if faultN >= dstN { - faultAfterDst = faultN - dstN + if fault >= dst { + faultAfterDst = fault - dst } if faultAfterSrc >= toCopy && faultAfterDst >= toCopy { - panic(fmt.Sprintf("Copy raised signal %d at %#x, which is outside source [%#x, %#x) and destination [%#x, %#x)", sig, faultN, srcN, srcN+toCopy, dstN, dstN+toCopy)) + panic(fmt.Sprintf("Copy raised signal %d at %#x, which is outside source [%#x, %#x) and destination [%#x, %#x)", sig, fault, src, src+toCopy, dst, dst+toCopy)) } faultedAfter := faultAfterSrc if faultedAfter > faultAfterDst { @@ -195,7 +215,7 @@ func Copy(dst, src unsafe.Pointer, toCopy uintptr) (uintptr, error) { if faultedAfter > maxRegisterSize { done = faultedAfter - maxRegisterSize } - n, err := Copy(unsafe.Pointer(dstN+done), unsafe.Pointer(srcN+done), faultedAfter-done) + n, err := copyN(dst+done, src+done, faultedAfter-done) done += n if err != nil { return done, err @@ -206,6 +226,13 @@ func Copy(dst, src unsafe.Pointer, toCopy uintptr) (uintptr, error) { // ZeroOut writes toZero zero bytes to dst. It returns the number of bytes // written and an error if SIGSEGV or SIGBUS is received while writing to dst. func ZeroOut(dst unsafe.Pointer, toZero uintptr) (uintptr, error) { + n, err := zeroOut(uintptr(dst), toZero) + runtime.KeepAlive(dst) + return n, err +} + +// zeroOut is the underlying definition for ZeroOut. +func zeroOut(dst uintptr, toZero uintptr) (uintptr, error) { if toZero == 0 { return 0, nil } @@ -215,19 +242,18 @@ func ZeroOut(dst unsafe.Pointer, toZero uintptr) (uintptr, error) { return toZero, nil } - faultN, dstN := uintptr(fault), uintptr(dst) - if faultN < dstN || faultN >= dstN+toZero { - panic(fmt.Sprintf("ZeroOut raised signal %d at %#x, which is outside destination [%#x, %#x)", sig, faultN, dstN, dstN+toZero)) + if fault < dst || fault >= dst+toZero { + panic(fmt.Sprintf("ZeroOut raised signal %d at %#x, which is outside destination [%#x, %#x)", sig, fault, dst, dst+toZero)) } // memclr might have ended the write up to maxRegisterSize bytes before // fault, if an instruction caused a memory access that straddled two // pages, and the second one faulted. Try to write up to the fault. var done uintptr - if faultN-dstN > maxRegisterSize { - done = faultN - dstN - maxRegisterSize + if fault-dst > maxRegisterSize { + done = fault - dst - maxRegisterSize } - n, err := ZeroOut(unsafe.Pointer(dstN+done), faultN-dstN-done) + n, err := zeroOut(dst+done, fault-dst-done) done += n if err != nil { return done, err @@ -243,7 +269,7 @@ func SwapUint32(ptr unsafe.Pointer, new uint32) (uint32, error) { return 0, AlignmentError{addr, 4} } old, sig := swapUint32(ptr, new) - return old, errorFromFaultSignal(ptr, sig) + return old, errorFromFaultSignal(uintptr(ptr), sig) } // SwapUint64 is equivalent to sync/atomic.SwapUint64, except that it returns @@ -254,7 +280,7 @@ func SwapUint64(ptr unsafe.Pointer, new uint64) (uint64, error) { return 0, AlignmentError{addr, 8} } old, sig := swapUint64(ptr, new) - return old, errorFromFaultSignal(ptr, sig) + return old, errorFromFaultSignal(uintptr(ptr), sig) } // CompareAndSwapUint32 is equivalent to atomicbitops.CompareAndSwapUint32, @@ -265,7 +291,7 @@ func CompareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (uint32, error) { return 0, AlignmentError{addr, 4} } prev, sig := compareAndSwapUint32(ptr, old, new) - return prev, errorFromFaultSignal(ptr, sig) + return prev, errorFromFaultSignal(uintptr(ptr), sig) } // LoadUint32 is like sync/atomic.LoadUint32, but operates with user memory. It @@ -277,17 +303,17 @@ func LoadUint32(ptr unsafe.Pointer) (uint32, error) { return 0, AlignmentError{addr, 4} } val, sig := loadUint32(ptr) - return val, errorFromFaultSignal(ptr, sig) + return val, errorFromFaultSignal(uintptr(ptr), sig) } -func errorFromFaultSignal(addr unsafe.Pointer, sig int32) error { +func errorFromFaultSignal(addr uintptr, sig int32) error { switch sig { case 0: return nil case int32(syscall.SIGSEGV): - return SegvError{uintptr(addr)} + return SegvError{addr} case int32(syscall.SIGBUS): - return BusError{uintptr(addr)} + return BusError{addr} default: panic(fmt.Sprintf("safecopy got unexpected signal %d at address %#x", sig, addr)) } diff --git a/pkg/safemem/seq_unsafe.go b/pkg/safemem/seq_unsafe.go index 354a95dde..dcdfc9600 100644 --- a/pkg/safemem/seq_unsafe.go +++ b/pkg/safemem/seq_unsafe.go @@ -18,6 +18,7 @@ import ( "bytes" "fmt" "reflect" + "syscall" "unsafe" ) @@ -297,3 +298,19 @@ func ZeroSeq(dsts BlockSeq) (uint64, error) { } return done, nil } + +// IovecsFromBlockSeq returns a []syscall.Iovec representing seq. +func IovecsFromBlockSeq(bs BlockSeq) []syscall.Iovec { + iovs := make([]syscall.Iovec, 0, bs.NumBlocks()) + for ; !bs.IsEmpty(); bs = bs.Tail() { + b := bs.Head() + iovs = append(iovs, syscall.Iovec{ + Base: &b.ToSlice()[0], + Len: uint64(b.Len()), + }) + // We don't need to care about b.NeedSafecopy(), because the host + // kernel will handle such address ranges just fine (by returning + // EFAULT). + } + return iovs +} diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index fc36efa23..55fd6967e 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -219,24 +219,36 @@ func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, action linux.BPFAc switch a := arg.(type) { case AllowAny: case AllowValue: + dataOffsetLow := seccompDataOffsetArgLow(i) + dataOffsetHigh := seccompDataOffsetArgHigh(i) + if i == RuleIP { + dataOffsetLow = seccompDataOffsetIPLow + dataOffsetHigh = seccompDataOffsetIPHigh + } high, low := uint32(a>>32), uint32(a) // assert arg_low == low - p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgLow(i)) + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow) p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) // assert arg_high == high - p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgHigh(i)) + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh) p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) labelled = true case GreaterThan: + dataOffsetLow := seccompDataOffsetArgLow(i) + dataOffsetHigh := seccompDataOffsetArgHigh(i) + if i == RuleIP { + dataOffsetLow = seccompDataOffsetIPLow + dataOffsetHigh = seccompDataOffsetIPHigh + } labelGood := fmt.Sprintf("gt%v", i) high, low := uint32(a>>32), uint32(a) // assert arg_high < high - p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgHigh(i)) + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh) p.AddJumpFalseLabel(bpf.Jmp|bpf.Jge|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) // arg_high > high p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood)) // arg_low < low - p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgLow(i)) + p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow) p.AddJumpFalseLabel(bpf.Jmp|bpf.Jgt|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx)) p.AddLabel(ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood)) labelled = true diff --git a/pkg/seccomp/seccomp_rules.go b/pkg/seccomp/seccomp_rules.go index 84c841d7f..06308cd29 100644 --- a/pkg/seccomp/seccomp_rules.go +++ b/pkg/seccomp/seccomp_rules.go @@ -62,7 +62,11 @@ func (a AllowValue) String() (s string) { // rule := Rule { // AllowValue(linux.ARCH_GET_FS | linux.ARCH_SET_FS), // arg0 // } -type Rule [6]interface{} +type Rule [7]interface{} // 6 arguments + RIP + +// RuleIP indicates what rules in the Rule array have to be applied to +// instruction pointer. +const RuleIP = 6 func (r Rule) String() (s string) { if len(r) == 0 { diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go index abbee7051..da5a5e4b2 100644 --- a/pkg/seccomp/seccomp_test.go +++ b/pkg/seccomp/seccomp_test.go @@ -388,6 +388,33 @@ func TestBasic(t *testing.T) { }, }, }, + { + ruleSets: []RuleSet{ + { + Rules: SyscallRules{ + 1: []Rule{ + { + RuleIP: AllowValue(0x7aabbccdd), + }, + }, + }, + Action: linux.SECCOMP_RET_ALLOW, + }, + }, + defaultAction: linux.SECCOMP_RET_TRAP, + specs: []spec{ + { + desc: "IP: Syscall instruction pointer allowed", + data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{}, instructionPointer: 0x7aabbccdd}, + want: linux.SECCOMP_RET_ALLOW, + }, + { + desc: "IP: Syscall instruction pointer disallowed", + data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{}, instructionPointer: 0x711223344}, + want: linux.SECCOMP_RET_TRAP, + }, + }, + }, } { instrs, err := BuildProgram(test.ruleSets, test.defaultAction) if err != nil { diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD index 4ab2a384f..789369220 100644 --- a/pkg/sentry/fs/fsutil/BUILD +++ b/pkg/sentry/fs/fsutil/BUILD @@ -28,13 +28,13 @@ go_template_instance( "platform": "gvisor.dev/gvisor/pkg/sentry/platform", }, package = "fsutil", - prefix = "frameRef", + prefix = "FrameRef", template = "//pkg/segment:generic_set", types = { "Key": "uint64", "Range": "platform.FileRange", "Value": "uint64", - "Functions": "frameRefSetFunctions", + "Functions": "FrameRefSetFunctions", }, ) diff --git a/pkg/sentry/fs/fsutil/frame_ref_set.go b/pkg/sentry/fs/fsutil/frame_ref_set.go index dd63db32b..6564fd0c6 100644 --- a/pkg/sentry/fs/fsutil/frame_ref_set.go +++ b/pkg/sentry/fs/fsutil/frame_ref_set.go @@ -20,24 +20,25 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" ) -type frameRefSetFunctions struct{} +// FrameRefSetFunctions implements segment.Functions for FrameRefSet. +type FrameRefSetFunctions struct{} // MinKey implements segment.Functions.MinKey. -func (frameRefSetFunctions) MinKey() uint64 { +func (FrameRefSetFunctions) MinKey() uint64 { return 0 } // MaxKey implements segment.Functions.MaxKey. -func (frameRefSetFunctions) MaxKey() uint64 { +func (FrameRefSetFunctions) MaxKey() uint64 { return math.MaxUint64 } // ClearValue implements segment.Functions.ClearValue. -func (frameRefSetFunctions) ClearValue(val *uint64) { +func (FrameRefSetFunctions) ClearValue(val *uint64) { } // Merge implements segment.Functions.Merge. -func (frameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.FileRange, val2 uint64) (uint64, bool) { +func (FrameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.FileRange, val2 uint64) (uint64, bool) { if val1 != val2 { return 0, false } @@ -45,6 +46,6 @@ func (frameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform. } // Split implements segment.Functions.Split. -func (frameRefSetFunctions) Split(_ platform.FileRange, val uint64, _ uint64) (uint64, uint64) { +func (FrameRefSetFunctions) Split(_ platform.FileRange, val uint64, _ uint64) (uint64, uint64) { return val, val } diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go index 573b8586e..800c8b4e1 100644 --- a/pkg/sentry/fs/fsutil/inode_cached.go +++ b/pkg/sentry/fs/fsutil/inode_cached.go @@ -111,7 +111,7 @@ type CachingInodeOperations struct { // refs tracks active references to data in the cache. // // refs is protected by dataMu. - refs frameRefSet + refs FrameRefSet } // CachingInodeOperationsOptions configures a CachingInodeOperations. diff --git a/pkg/sentry/fs/g3doc/inotify.md b/pkg/sentry/fs/g3doc/inotify.md index 71a577d9d..85063d4e6 100644 --- a/pkg/sentry/fs/g3doc/inotify.md +++ b/pkg/sentry/fs/g3doc/inotify.md @@ -112,11 +112,11 @@ attempts to queue a new event, it is already holding `fs.Watches.mu`. If we used `Inotify.mu` to also protect the event queue, this would violate the above lock ordering. -[dirent]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/dirent.go -[event]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify_event.go -[fd_table]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/kernel/fd_table.go -[inode]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inode.go -[inode_watches]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inode_inotify.go -[inotify]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify.go -[syscall_dir]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/syscalls/linux/ -[watch]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify_watch.go +[dirent]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/dirent.go +[event]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify_event.go +[fd_table]: https://github.com/google/gvisor/blob/master/pkg/sentry/kernel/fd_table.go +[inode]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inode.go +[inode_watches]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inode_inotify.go +[inotify]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify.go +[syscall_dir]: https://github.com/google/gvisor/blob/master/pkg/sentry/syscalls/linux/ +[watch]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify_watch.go diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD index 971d3718e..fea135eea 100644 --- a/pkg/sentry/fs/gofer/BUILD +++ b/pkg/sentry/fs/gofer/BUILD @@ -9,6 +9,7 @@ go_library( "cache_policy.go", "context_file.go", "device.go", + "fifo.go", "file.go", "file_state.go", "fs.go", @@ -38,6 +39,7 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/host", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", diff --git a/pkg/sentry/fs/gofer/cache_policy.go b/pkg/sentry/fs/gofer/cache_policy.go index ebea03c42..07a564e92 100644 --- a/pkg/sentry/fs/gofer/cache_policy.go +++ b/pkg/sentry/fs/gofer/cache_policy.go @@ -127,6 +127,9 @@ func (cp cachePolicy) revalidate(ctx context.Context, name string, parent, child childIops, ok := child.InodeOperations.(*inodeOperations) if !ok { + if _, ok := child.InodeOperations.(*fifo); ok { + return false + } panic(fmt.Sprintf("revalidating inode operations of unknown type %T", child.InodeOperations)) } parentIops, ok := parent.InodeOperations.(*inodeOperations) diff --git a/pkg/sentry/fs/gofer/fifo.go b/pkg/sentry/fs/gofer/fifo.go new file mode 100644 index 000000000..456557058 --- /dev/null +++ b/pkg/sentry/fs/gofer/fifo.go @@ -0,0 +1,40 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fs" +) + +// +stateify savable +type fifo struct { + fs.InodeOperations + fileIops *inodeOperations +} + +var _ fs.InodeOperations = (*fifo)(nil) + +// Rename implements fs.InodeOperations. It forwards the call to the underlying +// file inode to handle the file rename. Note that file key remains the same +// after the rename to keep the endpoint mapping. +func (i *fifo) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error { + return i.fileIops.Rename(ctx, inode, oldParent, oldName, newParent, newName, replacement) +} + +// StatFS implements fs.InodeOperations. +func (i *fifo) StatFS(ctx context.Context) (fs.Info, error) { + return i.fileIops.StatFS(ctx) +} diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go index 0c2f89ae8..2df2fe889 100644 --- a/pkg/sentry/fs/gofer/gofer_test.go +++ b/pkg/sentry/fs/gofer/gofer_test.go @@ -61,7 +61,7 @@ func rootTest(t *testing.T, name string, cp cachePolicy, fn func(context.Context ctx := contexttest.Context(t) sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{ file: rootFile, - }, root.QID, p9.AttrMaskAll(), root.Attr, false /* socket */) + }, root.QID, p9.AttrMaskAll(), root.Attr) m := fs.NewMountSource(ctx, s, &filesystem{}, fs.MountSourceFlags{}) rootInode := fs.NewInode(ctx, rootInodeOperations, m, sattr) diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go index 0c1be05ef..a35c3a23d 100644 --- a/pkg/sentry/fs/gofer/path.go +++ b/pkg/sentry/fs/gofer/path.go @@ -23,14 +23,24 @@ import ( "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // maxFilenameLen is the maximum length of a filename. This is dictated by 9P's // encoding of strings, which uses 2 bytes for the length prefix. const maxFilenameLen = (1 << 16) - 1 +func changeType(mode p9.FileMode, newType p9.FileMode) p9.FileMode { + if newType&^p9.FileModeMask != 0 { + panic(fmt.Sprintf("newType contained more bits than just file mode: %x", newType)) + } + clear := mode &^ p9.FileModeMask + return clear | newType +} + // Lookup loads an Inode at name into a Dirent based on the session's cache // policy. func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) { @@ -69,8 +79,25 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string return nil, err } + if i.session().overrides != nil { + // Check if file belongs to a internal named pipe. Note that it doesn't need + // to check for sockets because it's done in newInodeOperations below. + deviceKey := device.MultiDeviceKey{ + Device: p9attr.RDev, + SecondaryDevice: i.session().connID, + Inode: qids[0].Path, + } + unlock := i.session().overrides.lock() + if pipeInode := i.session().overrides.getPipe(deviceKey); pipeInode != nil { + unlock() + pipeInode.IncRef() + return fs.NewDirent(ctx, pipeInode, name), nil + } + unlock() + } + // Construct the Inode operations. - sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr, false) + sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr) // Construct a positive Dirent. return fs.NewDirent(ctx, fs.NewInode(ctx, node, dir.MountSource, sattr), name), nil @@ -138,7 +165,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string qid := qids[0] // Construct the InodeOperations. - sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr, false) + sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr) // Construct the positive Dirent. d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) @@ -223,82 +250,115 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, return nil, syserror.ENAMETOOLONG } - if i.session().endpoints == nil { + if i.session().overrides == nil { return nil, syscall.EOPNOTSUPP } - // Create replaces the directory fid with the newly created/opened - // file, so clone this directory so it doesn't change out from under - // this node. - _, newFile, err := i.fileState.file.walk(ctx, nil) + // Stabilize the override map while creation is in progress. + unlock := i.session().overrides.lock() + defer unlock() + + sattr, iops, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeSocket) if err != nil { return nil, err } - // We're not going to use newFile after return. - defer newFile.close(ctx) - // Stabilize the endpoint map while creation is in progress. - unlock := i.session().endpoints.lock() - defer unlock() + // Construct the positive Dirent. + childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) + i.session().overrides.addBoundEndpoint(iops.fileState.key, childDir, ep) + return childDir, nil +} - // Create a regular file in the gofer and then mark it as a socket by - // adding this inode key in the 'endpoints' map. - owner := fs.FileOwnerFromContext(ctx) - hostFile, err := newFile.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)) - if err != nil { - return nil, err +// CreateFifo implements fs.InodeOperations.CreateFifo. +func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { + if len(name) > maxFilenameLen { + return syserror.ENAMETOOLONG } - // We're not going to use this file. - hostFile.Close() - i.touchModificationAndStatusChangeTime(ctx, dir) + owner := fs.FileOwnerFromContext(ctx) + mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe - // Get the attributes of the file to create inode key. - qid, mask, attr, err := getattr(ctx, newFile) - if err != nil { - return nil, err + // N.B. FIFOs use major/minor numbers 0. + if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil { + if i.session().overrides == nil || err != syscall.EPERM { + return err + } + // If gofer doesn't support mknod, check if we can create an internal fifo. + return i.createInternalFifo(ctx, dir, name, owner, perm) } - key := device.MultiDeviceKey{ - Device: attr.RDev, - SecondaryDevice: i.session().connID, - Inode: qid.Path, + i.touchModificationAndStatusChangeTime(ctx, dir) + return nil +} + +func (i *inodeOperations) createInternalFifo(ctx context.Context, dir *fs.Inode, name string, owner fs.FileOwner, perm fs.FilePermissions) error { + if i.session().overrides == nil { + return syserror.EPERM } - // Create child dirent. + // Stabilize the override map while creation is in progress. + unlock := i.session().overrides.lock() + defer unlock() - // Get an unopened p9.File for the file we created so that it can be - // cloned and re-opened multiple times after creation. - _, unopened, err := i.fileState.file.walk(ctx, []string{name}) + sattr, fileOps, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeNamedPipe) if err != nil { - return nil, err + return err } - // Construct the InodeOperations. - sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr, true) + // First create a pipe. + p := pipe.NewPipe(true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize) + + // Wrap the fileOps with our Fifo. + iops := &fifo{ + InodeOperations: pipe.NewInodeOperations(ctx, perm, p), + fileIops: fileOps, + } + inode := fs.NewInode(ctx, iops, dir.MountSource, sattr) // Construct the positive Dirent. childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) - i.session().endpoints.add(key, childDir, ep) - return childDir, nil + i.session().overrides.addPipe(fileOps.fileState.key, childDir, inode) + return nil } -// CreateFifo implements fs.InodeOperations.CreateFifo. -func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { - if len(name) > maxFilenameLen { - return syserror.ENAMETOOLONG +// Caller must hold Session.endpoint lock. +func (i *inodeOperations) createEndpointFile(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions, fileType p9.FileMode) (fs.StableAttr, *inodeOperations, error) { + _, dirClone, err := i.fileState.file.walk(ctx, nil) + if err != nil { + return fs.StableAttr{}, nil, err } + // We're not going to use dirClone after return. + defer dirClone.close(ctx) + // Create a regular file in the gofer and then mark it as a socket by + // adding this inode key in the 'overrides' map. owner := fs.FileOwnerFromContext(ctx) - mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe - - // N.B. FIFOs use major/minor numbers 0. - if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil { - return err + hostFile, err := dirClone.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)) + if err != nil { + return fs.StableAttr{}, nil, err } + // We're not going to use this file. + hostFile.Close() i.touchModificationAndStatusChangeTime(ctx, dir) - return nil + + // Get the attributes of the file to create inode key. + qid, mask, attr, err := getattr(ctx, dirClone) + if err != nil { + return fs.StableAttr{}, nil, err + } + + // Get an unopened p9.File for the file we created so that it can be + // cloned and re-opened multiple times after creation. + _, unopened, err := i.fileState.file.walk(ctx, []string{name}) + if err != nil { + return fs.StableAttr{}, nil, err + } + + // Construct new inode with file type overridden. + attr.Mode = changeType(attr.Mode, fileType) + sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr) + return sattr, iops, nil } // Remove implements InodeOperations.Remove. @@ -307,20 +367,23 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string return syserror.ENAMETOOLONG } - var key device.MultiDeviceKey - removeSocket := false - if i.session().endpoints != nil { - // Find out if file being deleted is a socket that needs to be + var key *device.MultiDeviceKey + if i.session().overrides != nil { + // Find out if file being deleted is a socket or pipe that needs to be // removed from endpoint map. if d, err := i.Lookup(ctx, dir, name); err == nil { defer d.DecRef() - if fs.IsSocket(d.Inode.StableAttr) { - child := d.Inode.InodeOperations.(*inodeOperations) - key = child.fileState.key - removeSocket = true - // Stabilize the endpoint map while deletion is in progress. - unlock := i.session().endpoints.lock() + if fs.IsSocket(d.Inode.StableAttr) || fs.IsPipe(d.Inode.StableAttr) { + switch iops := d.Inode.InodeOperations.(type) { + case *inodeOperations: + key = &iops.fileState.key + case *fifo: + key = &iops.fileIops.fileState.key + } + + // Stabilize the override map while deletion is in progress. + unlock := i.session().overrides.lock() defer unlock() } } @@ -329,8 +392,8 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string if err := i.fileState.file.unlinkAt(ctx, name, 0); err != nil { return err } - if removeSocket { - i.session().endpoints.remove(key) + if key != nil { + i.session().overrides.remove(*key) } i.touchModificationAndStatusChangeTime(ctx, dir) diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index 498c4645a..f6b3ef178 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -33,60 +33,107 @@ import ( var DefaultDirentCacheSize uint64 = fs.DefaultDirentCacheSize // +stateify savable -type endpointMaps struct { - // mu protexts the direntMap, the keyMap, and the pathMap below. - mu sync.RWMutex `state:"nosave"` +type overrideInfo struct { + dirent *fs.Dirent + + // endpoint is set when dirent points to a socket. inode must not be set. + endpoint transport.BoundEndpoint + + // inode is set when dirent points to a pipe. endpoint must not be set. + inode *fs.Inode +} - // direntMap links sockets to their dirents. - // It is filled concurrently with the keyMap and is stored upon save. - // Before saving, this map is used to populate the pathMap. - direntMap map[transport.BoundEndpoint]*fs.Dirent +func (l *overrideInfo) inodeType() fs.InodeType { + switch { + case l.endpoint != nil: + return fs.Socket + case l.inode != nil: + return fs.Pipe + } + panic("endpoint or node must be set") +} - // keyMap links MultiDeviceKeys (containing inode IDs) to their sockets. +// +stateify savable +type overrideMaps struct { + // mu protexts the keyMap, and the pathMap below. + mu sync.RWMutex `state:"nosave"` + + // keyMap links MultiDeviceKeys (containing inode IDs) to their sockets/pipes. // It is not stored during save because the inode ID may change upon restore. - keyMap map[device.MultiDeviceKey]transport.BoundEndpoint `state:"nosave"` + keyMap map[device.MultiDeviceKey]*overrideInfo `state:"nosave"` - // pathMap links the sockets to their paths. + // pathMap links the sockets/pipes to their paths. // It is filled before saving from the direntMap and is stored upon save. // Upon restore, this map is used to re-populate the keyMap. - pathMap map[transport.BoundEndpoint]string + pathMap map[*overrideInfo]string +} + +// addBoundEndpoint adds the bound endpoint to the map. +// A reference is taken on the dirent argument. +// +// Precondition: maps must have been locked with 'lock'. +func (e *overrideMaps) addBoundEndpoint(key device.MultiDeviceKey, d *fs.Dirent, ep transport.BoundEndpoint) { + d.IncRef() + e.keyMap[key] = &overrideInfo{dirent: d, endpoint: ep} } -// add adds the endpoint to the maps. +// addPipe adds the pipe inode to the map. // A reference is taken on the dirent argument. // // Precondition: maps must have been locked with 'lock'. -func (e *endpointMaps) add(key device.MultiDeviceKey, d *fs.Dirent, ep transport.BoundEndpoint) { - e.keyMap[key] = ep +func (e *overrideMaps) addPipe(key device.MultiDeviceKey, d *fs.Dirent, inode *fs.Inode) { d.IncRef() - e.direntMap[ep] = d + e.keyMap[key] = &overrideInfo{dirent: d, inode: inode} } // remove deletes the key from the maps. // // Precondition: maps must have been locked with 'lock'. -func (e *endpointMaps) remove(key device.MultiDeviceKey) { - endpoint := e.get(key) +func (e *overrideMaps) remove(key device.MultiDeviceKey) { + endpoint := e.keyMap[key] delete(e.keyMap, key) - - d := e.direntMap[endpoint] - d.DecRef() - delete(e.direntMap, endpoint) + endpoint.dirent.DecRef() } // lock blocks other addition and removal operations from happening while // the backing file is being created or deleted. Returns a function that unlocks // the endpoint map. -func (e *endpointMaps) lock() func() { +func (e *overrideMaps) lock() func() { e.mu.Lock() return func() { e.mu.Unlock() } } -// get returns the endpoint mapped to the given key. +// getBoundEndpoint returns the bound endpoint mapped to the given key. // -// Precondition: maps must have been locked for reading. -func (e *endpointMaps) get(key device.MultiDeviceKey) transport.BoundEndpoint { - return e.keyMap[key] +// Precondition: maps must have been locked. +func (e *overrideMaps) getBoundEndpoint(key device.MultiDeviceKey) transport.BoundEndpoint { + if v := e.keyMap[key]; v != nil { + return v.endpoint + } + return nil +} + +// getPipe returns the pipe inode mapped to the given key. +// +// Precondition: maps must have been locked. +func (e *overrideMaps) getPipe(key device.MultiDeviceKey) *fs.Inode { + if v := e.keyMap[key]; v != nil { + return v.inode + } + return nil +} + +// getType returns the inode type if there is a corresponding endpoint for the +// given key. Returns false otherwise. +func (e *overrideMaps) getType(key device.MultiDeviceKey) (fs.InodeType, bool) { + e.mu.Lock() + v := e.keyMap[key] + e.mu.Unlock() + + if v != nil { + return v.inodeType(), true + } + return 0, false } // session holds state for each 9p session established during sys_mount. @@ -137,16 +184,16 @@ type session struct { // mounter is the EUID/EGID that mounted this file system. mounter fs.FileOwner `state:"wait"` - // endpoints is used to map inodes that represent socket files to their - // corresponding endpoint. Socket files are created as regular files in the - // gofer and their presence in this map indicate that they should indeed be - // socket files. This allows unix domain sockets to be used with paths that - // belong to a gofer. + // overrides is used to map inodes that represent socket/pipes files to their + // corresponding endpoint/iops. These files are created as regular files in + // the gofer and their presence in this map indicate that they should indeed + // be socket/pipe files. This allows unix domain sockets and named pipes to + // be used with paths that belong to a gofer. // // TODO(gvisor.dev/issue/1200): there are few possible races with someone // stat'ing the file and another deleting it concurrently, where the file // will not be reported as socket file. - endpoints *endpointMaps `state:"wait"` + overrides *overrideMaps `state:"wait"` } // Destroy tears down the session. @@ -179,15 +226,21 @@ func (s *session) SaveInodeMapping(inode *fs.Inode, path string) { // This is very unintuitive. We *CANNOT* trust the inode's StableAttrs, // because overlay copyUp may have changed them out from under us. // So much for "immutable". - sattr := inode.InodeOperations.(*inodeOperations).fileState.sattr - s.inodeMappings[sattr.InodeID] = path + switch iops := inode.InodeOperations.(type) { + case *inodeOperations: + s.inodeMappings[iops.fileState.sattr.InodeID] = path + case *fifo: + s.inodeMappings[iops.fileIops.fileState.sattr.InodeID] = path + default: + panic(fmt.Sprintf("Invalid type: %T", iops)) + } } -// newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File and attributes -// (p9.QID, p9.AttrMask, p9.Attr). +// newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File +// and attributes (p9.QID, p9.AttrMask, p9.Attr). // // Endpoints lock must not be held if socket == false. -func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr, socket bool) (fs.StableAttr, *inodeOperations) { +func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr) (fs.StableAttr, *inodeOperations) { deviceKey := device.MultiDeviceKey{ Device: attr.RDev, SecondaryDevice: s.connID, @@ -201,17 +254,11 @@ func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p BlockSize: bsize(attr), } - if s.endpoints != nil { - if socket { - sattr.Type = fs.Socket - } else { - // If unix sockets are allowed on this filesystem, check if this file is - // supposed to be a socket file. - unlock := s.endpoints.lock() - if s.endpoints.get(deviceKey) != nil { - sattr.Type = fs.Socket - } - unlock() + if s.overrides != nil && sattr.Type == fs.RegularFile { + // If overrides are allowed on this filesystem, check if this file is + // supposed to be of a different type, e.g. socket. + if t, ok := s.overrides.getType(deviceKey); ok { + sattr.Type = t } } @@ -267,7 +314,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF s.EnableLeakCheck("gofer.session") if o.privateunixsocket { - s.endpoints = newEndpointMaps() + s.overrides = newOverrideMaps() } // Construct the MountSource with the session and superBlockFlags. @@ -305,26 +352,24 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF return nil, err } - sattr, iops := newInodeOperations(ctx, &s, s.attach, qid, valid, attr, false) + sattr, iops := newInodeOperations(ctx, &s, s.attach, qid, valid, attr) return fs.NewInode(ctx, iops, m, sattr), nil } -// newEndpointMaps creates a new endpointMaps. -func newEndpointMaps() *endpointMaps { - return &endpointMaps{ - direntMap: make(map[transport.BoundEndpoint]*fs.Dirent), - keyMap: make(map[device.MultiDeviceKey]transport.BoundEndpoint), - pathMap: make(map[transport.BoundEndpoint]string), +// newOverrideMaps creates a new overrideMaps. +func newOverrideMaps() *overrideMaps { + return &overrideMaps{ + keyMap: make(map[device.MultiDeviceKey]*overrideInfo), + pathMap: make(map[*overrideInfo]string), } } -// fillKeyMap populates key and dirent maps upon restore from saved -// pathmap. +// fillKeyMap populates key and dirent maps upon restore from saved pathmap. func (s *session) fillKeyMap(ctx context.Context) error { - unlock := s.endpoints.lock() + unlock := s.overrides.lock() defer unlock() - for ep, dirPath := range s.endpoints.pathMap { + for ep, dirPath := range s.overrides.pathMap { _, file, err := s.attach.walk(ctx, splitAbsolutePath(dirPath)) if err != nil { return fmt.Errorf("error filling endpointmaps, failed to walk to %q: %v", dirPath, err) @@ -341,25 +386,25 @@ func (s *session) fillKeyMap(ctx context.Context) error { Inode: qid.Path, } - s.endpoints.keyMap[key] = ep + s.overrides.keyMap[key] = ep } return nil } -// fillPathMap populates paths for endpoints from dirents in direntMap +// fillPathMap populates paths for overrides from dirents in direntMap // before save. func (s *session) fillPathMap() error { - unlock := s.endpoints.lock() + unlock := s.overrides.lock() defer unlock() - for ep, dir := range s.endpoints.direntMap { - mountRoot := dir.MountRoot() + for _, endpoint := range s.overrides.keyMap { + mountRoot := endpoint.dirent.MountRoot() defer mountRoot.DecRef() - dirPath, _ := dir.FullName(mountRoot) + dirPath, _ := endpoint.dirent.FullName(mountRoot) if dirPath == "" { return fmt.Errorf("error getting path from dirent") } - s.endpoints.pathMap[ep] = dirPath + s.overrides.pathMap[endpoint] = dirPath } return nil } @@ -368,7 +413,7 @@ func (s *session) fillPathMap() error { func (s *session) restoreEndpointMaps(ctx context.Context) error { // When restoring, only need to create the keyMap because the dirent and path // maps got stored through the save. - s.endpoints.keyMap = make(map[device.MultiDeviceKey]transport.BoundEndpoint) + s.overrides.keyMap = make(map[device.MultiDeviceKey]*overrideInfo) if err := s.fillKeyMap(ctx); err != nil { return fmt.Errorf("failed to insert sockets into endpoint map: %v", err) } @@ -376,6 +421,6 @@ func (s *session) restoreEndpointMaps(ctx context.Context) error { // Re-create pathMap because it can no longer be trusted as socket paths can // change while process continues to run. Empty pathMap will be re-filled upon // next save. - s.endpoints.pathMap = make(map[transport.BoundEndpoint]string) + s.overrides.pathMap = make(map[*overrideInfo]string) return nil } diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go index 0285c5361..111da59f9 100644 --- a/pkg/sentry/fs/gofer/session_state.go +++ b/pkg/sentry/fs/gofer/session_state.go @@ -25,9 +25,9 @@ import ( // beforeSave is invoked by stateify. func (s *session) beforeSave() { - if s.endpoints != nil { + if s.overrides != nil { if err := s.fillPathMap(); err != nil { - panic("failed to save paths to endpoint map before saving" + err.Error()) + panic("failed to save paths to override map before saving" + err.Error()) } } } @@ -74,10 +74,10 @@ func (s *session) afterLoad() { panic(fmt.Sprintf("new attach name %v, want %v", opts.aname, s.aname)) } - // Check if endpointMaps exist when uds sockets are enabled - // (only pathmap will actualy have been saved). - if opts.privateunixsocket != (s.endpoints != nil) { - panic(fmt.Sprintf("new privateunixsocket option %v, want %v", opts.privateunixsocket, s.endpoints != nil)) + // Check if overrideMaps exist when uds sockets are enabled (only pathmaps + // will actually have been saved). + if opts.privateunixsocket != (s.overrides != nil) { + panic(fmt.Sprintf("new privateunixsocket option %v, want %v", opts.privateunixsocket, s.overrides != nil)) } if args.Flags != s.superBlockFlags { panic(fmt.Sprintf("new mount flags %v, want %v", args.Flags, s.superBlockFlags)) diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go index 376cfce2c..10ba2f5f0 100644 --- a/pkg/sentry/fs/gofer/socket.go +++ b/pkg/sentry/fs/gofer/socket.go @@ -32,15 +32,15 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) transport. return nil } - if i.session().endpoints != nil { - unlock := i.session().endpoints.lock() + if i.session().overrides != nil { + unlock := i.session().overrides.lock() defer unlock() - ep := i.session().endpoints.get(i.fileState.key) + ep := i.session().overrides.getBoundEndpoint(i.fileState.key) if ep != nil { return ep } - // Not found in endpoints map, it may be a gofer backed unix socket... + // Not found in overrides map, it may be a gofer backed unix socket... } inode.IncRef() diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go index d36fa74fb..e03a0c665 100644 --- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go +++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go @@ -86,7 +86,7 @@ func NewAccessor(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth // Release must be called when a is no longer in use. func (a *Accessor) Release() { a.root.DecRef() - a.mntns.DecRef(a.vfsObj) + a.mntns.DecRef() } // accessorContext implements context.Context by extending an existing diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go index 82c58c900..73308a2b5 100644 --- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go +++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go @@ -45,7 +45,7 @@ func TestDevtmpfs(t *testing.T) { if err != nil { t.Fatalf("failed to create tmpfs root mount: %v", err) } - defer mntns.DecRef(vfsObj) + defer mntns.DecRef() root := mntns.Root() defer root.DecRef() devpop := vfs.PathOperation{ diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD new file mode 100644 index 000000000..4ba76a1e8 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -0,0 +1,55 @@ +load("//tools:defs.bzl", "go_library") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +licenses(["notice"]) + +go_template_instance( + name = "dentry_list", + out = "dentry_list.go", + package = "gofer", + prefix = "dentry", + template = "//pkg/ilist:generic_list", + types = { + "Element": "*dentry", + "Linker": "*dentry", + }, +) + +go_library( + name = "gofer", + srcs = [ + "dentry_list.go", + "directory.go", + "filesystem.go", + "gofer.go", + "handle.go", + "handle_unsafe.go", + "p9file.go", + "pagemath.go", + "regular_file.go", + "special_file.go", + "symlink.go", + "time.go", + ], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/fd", + "//pkg/fspath", + "//pkg/log", + "//pkg/p9", + "//pkg/safemem", + "//pkg/sentry/fs/fsutil", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/time", + "//pkg/sentry/memmap", + "//pkg/sentry/pgalloc", + "//pkg/sentry/platform", + "//pkg/sentry/usage", + "//pkg/sentry/vfs", + "//pkg/syserror", + "//pkg/unet", + "//pkg/usermem", + ], +) diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go new file mode 100644 index 000000000..baa2cdd8e --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/directory.go @@ -0,0 +1,190 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "sync" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +func (d *dentry) isDir() bool { + return d.fileType() == linux.S_IFDIR +} + +// Preconditions: d.dirMu must be locked. d.isDir(). fs.opts.interop != +// InteropModeShared. +func (d *dentry) cacheNegativeChildLocked(name string) { + if d.negativeChildren == nil { + d.negativeChildren = make(map[string]struct{}) + } + d.negativeChildren[name] = struct{}{} +} + +type directoryFD struct { + fileDescription + vfs.DirectoryFileDescriptionDefaultImpl + + mu sync.Mutex + off int64 + dirents []vfs.Dirent +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *directoryFD) Release() { +} + +// IterDirents implements vfs.FileDescriptionImpl.IterDirents. +func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { + fd.mu.Lock() + defer fd.mu.Unlock() + + if fd.dirents == nil { + ds, err := fd.dentry().getDirents(ctx) + if err != nil { + return err + } + fd.dirents = ds + } + + for fd.off < int64(len(fd.dirents)) { + if !cb.Handle(fd.dirents[fd.off]) { + return nil + } + fd.off++ + } + return nil +} + +// Preconditions: d.isDir(). There exists at least one directoryFD representing d. +func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) { + // 9P2000.L's readdir does not specify behavior in the presence of + // concurrent mutation of an iterated directory, so implementations may + // duplicate or omit entries in this case, which violates POSIX semantics. + // Thus we read all directory entries while holding d.dirMu to exclude + // directory mutations. (Note that it is impossible for the client to + // exclude concurrent mutation from other remote filesystem users. Since + // there is no way to detect if the server has incorrectly omitted + // directory entries, we simply assume that the server is well-behaved + // under InteropModeShared.) This is inconsistent with Linux (which appears + // to assume that directory fids have the correct semantics, and translates + // struct file_operations::readdir calls directly to readdir RPCs), but is + // consistent with VFS1. + + d.fs.renameMu.RLock() + defer d.fs.renameMu.RUnlock() + d.dirMu.Lock() + defer d.dirMu.Unlock() + if d.dirents != nil { + return d.dirents, nil + } + + // It's not clear if 9P2000.L's readdir is expected to return "." and "..", + // so we generate them here. + parent := d.vfsd.ParentOrSelf().Impl().(*dentry) + dirents := []vfs.Dirent{ + { + Name: ".", + Type: linux.DT_DIR, + Ino: d.ino, + NextOff: 1, + }, + { + Name: "..", + Type: uint8(atomic.LoadUint32(&parent.mode) >> 12), + Ino: parent.ino, + NextOff: 2, + }, + } + off := uint64(0) + const count = 64 * 1024 // for consistency with the vfs1 client + d.handleMu.RLock() + defer d.handleMu.RUnlock() + if !d.handleReadable { + // This should not be possible because a readable handle should have + // been opened when the calling directoryFD was opened. + panic("gofer.dentry.getDirents called without a readable handle") + } + for { + p9ds, err := d.handle.file.readdir(ctx, off, count) + if err != nil { + return nil, err + } + if len(p9ds) == 0 { + // Cache dirents for future directoryFDs if permitted. + if d.fs.opts.interop != InteropModeShared { + d.dirents = dirents + } + return dirents, nil + } + for _, p9d := range p9ds { + if p9d.Name == "." || p9d.Name == ".." { + continue + } + dirent := vfs.Dirent{ + Name: p9d.Name, + Ino: p9d.QID.Path, + NextOff: int64(len(dirents) + 1), + } + // p9 does not expose 9P2000.U's DMDEVICE, DMNAMEDPIPE, or + // DMSOCKET. + switch p9d.Type { + case p9.TypeSymlink: + dirent.Type = linux.DT_LNK + case p9.TypeDir: + dirent.Type = linux.DT_DIR + default: + dirent.Type = linux.DT_REG + } + dirents = append(dirents, dirent) + } + off = p9ds[len(p9ds)-1].Offset + } +} + +// Seek implements vfs.FileDescriptionImpl.Seek. +func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + fd.mu.Lock() + defer fd.mu.Unlock() + + switch whence { + case linux.SEEK_SET: + if offset < 0 { + return 0, syserror.EINVAL + } + if offset == 0 { + // Ensure that the next call to fd.IterDirents() calls + // fd.dentry().getDirents(). + fd.dirents = nil + } + fd.off = offset + return fd.off, nil + case linux.SEEK_CUR: + offset += fd.off + if offset < 0 { + return 0, syserror.EINVAL + } + // Don't clear fd.dirents in this case, even if offset == 0. + fd.off = offset + return fd.off, nil + default: + return 0, syserror.EINVAL + } +} diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go new file mode 100644 index 000000000..8eb61debf --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -0,0 +1,1087 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "sync" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Sync implements vfs.FilesystemImpl.Sync. +func (fs *filesystem) Sync(ctx context.Context) error { + // Snapshot current dentries and special files. + fs.syncMu.Lock() + ds := make([]*dentry, 0, len(fs.dentries)) + for d := range fs.dentries { + ds = append(ds, d) + } + sffds := make([]*specialFileFD, 0, len(fs.specialFileFDs)) + for sffd := range fs.specialFileFDs { + sffds = append(sffds, sffd) + } + fs.syncMu.Unlock() + + // Return the first error we encounter, but sync everything we can + // regardless. + var retErr error + + // Sync regular files. + for _, d := range ds { + if !d.TryIncRef() { + continue + } + err := d.syncSharedHandle(ctx) + d.DecRef() + if err != nil && retErr == nil { + retErr = err + } + } + + // Sync special files, which may be writable but do not use dentry shared + // handles (so they won't be synced by the above). + for _, sffd := range sffds { + if !sffd.vfsfd.TryIncRef() { + continue + } + err := sffd.Sync(ctx) + sffd.vfsfd.DecRef() + if err != nil && retErr == nil { + retErr = err + } + } + + return retErr +} + +// maxFilenameLen is the maximum length of a filename. This is dictated by 9P's +// encoding of strings, which uses 2 bytes for the length prefix. +const maxFilenameLen = (1 << 16) - 1 + +// dentrySlicePool is a pool of *[]*dentry used to store dentries for which +// dentry.checkCachingLocked() must be called. The pool holds pointers to +// slices because Go lacks generics, so sync.Pool operates on interface{}, so +// every call to (what should be) sync.Pool<[]*dentry>.Put() allocates a copy +// of the slice header on the heap. +var dentrySlicePool = sync.Pool{ + New: func() interface{} { + ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity + return &ds + }, +} + +func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry { + if ds == nil { + ds = dentrySlicePool.Get().(*[]*dentry) + } + *ds = append(*ds, d) + return ds +} + +// Preconditions: ds != nil. +func putDentrySlice(ds *[]*dentry) { + // Allow dentries to be GC'd. + for i := range *ds { + (*ds)[i] = nil + } + *ds = (*ds)[:0] + dentrySlicePool.Put(ds) +} + +// stepLocked resolves rp.Component() to an existing file, starting from the +// given directory. +// +// Dentries which may become cached as a result of the traversal are appended +// to *ds. +// +// Preconditions: fs.renameMu must be locked. d.dirMu must be locked. +// !rp.Done(). If fs.opts.interop == InteropModeShared, then d's cached +// metadata must be up to date. +func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) { + if !d.isDir() { + return nil, syserror.ENOTDIR + } + if err := d.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + return nil, err + } +afterSymlink: + name := rp.Component() + if name == "." { + rp.Advance() + return d, nil + } + if name == ".." { + parentVFSD, err := rp.ResolveParent(&d.vfsd) + if err != nil { + return nil, err + } + parent := parentVFSD.Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + // We must assume that parentVFSD is correct, because if d has been + // moved elsewhere in the remote filesystem so that its parent has + // changed, we have no way of determining its new parent's location + // in the filesystem. Get updated metadata for parentVFSD. + _, attrMask, attr, err := parent.file.getAttr(ctx, dentryAttrMask()) + if err != nil { + return nil, err + } + parent.updateFromP9Attrs(attrMask, &attr) + } + rp.Advance() + return parent, nil + } + childVFSD, err := rp.ResolveChild(&d.vfsd, name) + if err != nil { + return nil, err + } + // FIXME(jamieliu): Linux performs revalidation before mount lookup + // (fs/namei.c:lookup_fast() => __d_lookup_rcu(), d_revalidate(), + // __follow_mount_rcu()). + child, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), d, name, childVFSD, ds) + if err != nil { + return nil, err + } + if child == nil { + return nil, syserror.ENOENT + } + if child.isSymlink() && rp.ShouldFollowSymlink() { + target, err := child.readlink(ctx, rp.Mount()) + if err != nil { + return nil, err + } + if err := rp.HandleSymlink(target); err != nil { + return nil, err + } + goto afterSymlink // don't check the current directory again + } + rp.Advance() + return child, nil +} + +// revalidateChildLocked must be called after a call to parent.vfsd.Child(name) +// or vfs.ResolvingPath.ResolveChild(name) returns childVFSD (which may be +// nil) to verify that the returned child (or lack thereof) is correct. If no file +// exists at name, revalidateChildLocked returns (nil, nil). +// +// Preconditions: fs.renameMu must be locked. parent.dirMu must be locked. +// parent.isDir(). name is not "." or "..". +// +// Postconditions: If revalidateChildLocked returns a non-nil dentry, its +// cached metadata is up to date. +func (fs *filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *dentry, name string, childVFSD *vfs.Dentry, ds **[]*dentry) (*dentry, error) { + if childVFSD != nil && fs.opts.interop != InteropModeShared { + // We have a cached dentry that is assumed to be correct. + return childVFSD.Impl().(*dentry), nil + } + // We either don't have a cached dentry or need to verify that it's still + // correct, either of which requires a remote lookup. Check if this name is + // valid before performing the lookup. + if len(name) > maxFilenameLen { + return nil, syserror.ENAMETOOLONG + } + // Check if we've already cached this lookup with a negative result. + if _, ok := parent.negativeChildren[name]; ok { + return nil, nil + } + // Perform the remote lookup. + qid, file, attrMask, attr, err := parent.file.walkGetAttrOne(ctx, name) + if err != nil && err != syserror.ENOENT { + return nil, err + } + if childVFSD != nil { + child := childVFSD.Impl().(*dentry) + if !file.isNil() && qid.Path == child.ino { + // The file at this path hasn't changed. Just update cached + // metadata. + file.close(ctx) + child.updateFromP9Attrs(attrMask, &attr) + return child, nil + } + // The file at this path has changed or no longer exists. Remove + // the stale dentry from the tree, and re-evaluate its caching + // status (i.e. if it has 0 references, drop it). + vfsObj.ForceDeleteDentry(childVFSD) + *ds = appendDentry(*ds, child) + childVFSD = nil + } + if file.isNil() { + // No file exists at this path now. Cache the negative lookup if + // allowed. + if fs.opts.interop != InteropModeShared { + parent.cacheNegativeChildLocked(name) + } + return nil, nil + } + // Create a new dentry representing the file. + child, err := fs.newDentry(ctx, file, qid, attrMask, &attr) + if err != nil { + file.close(ctx) + return nil, err + } + parent.IncRef() // reference held by child on its parent + parent.vfsd.InsertChild(&child.vfsd, name) + // For now, child has 0 references, so our caller should call + // child.checkCachingLocked(). + *ds = appendDentry(*ds, child) + return child, nil +} + +// walkParentDirLocked resolves all but the last path component of rp to an +// existing directory, starting from the given directory (which is usually +// rp.Start().Impl().(*dentry)). It does not check that the returned directory +// is searchable by the provider of rp. +// +// Preconditions: fs.renameMu must be locked. !rp.Done(). If fs.opts.interop == +// InteropModeShared, then d's cached metadata must be up to date. +func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) { + for !rp.Final() { + d.dirMu.Lock() + next, err := fs.stepLocked(ctx, rp, d, ds) + d.dirMu.Unlock() + if err != nil { + return nil, err + } + d = next + } + if !d.isDir() { + return nil, syserror.ENOTDIR + } + return d, nil +} + +// resolveLocked resolves rp to an existing file. +// +// Preconditions: fs.renameMu must be locked. +func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) { + d := rp.Start().Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + // Get updated metadata for rp.Start() as required by fs.stepLocked(). + if err := d.updateFromGetattr(ctx); err != nil { + return nil, err + } + } + for !rp.Done() { + d.dirMu.Lock() + next, err := fs.stepLocked(ctx, rp, d, ds) + d.dirMu.Unlock() + if err != nil { + return nil, err + } + d = next + } + if rp.MustBeDir() && !d.isDir() { + return nil, syserror.ENOTDIR + } + return d, nil +} + +// doCreateAt checks that creating a file at rp is permitted, then invokes +// create to do so. +// +// Preconditions: !rp.Done(). For the final path component in rp, +// !rp.ShouldFollowSymlink(). +func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string) error) error { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + start := rp.Start().Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + // Get updated metadata for start as required by + // fs.walkParentDirLocked(). + if err := start.updateFromGetattr(ctx); err != nil { + return err + } + } + parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) + if err != nil { + return err + } + if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil { + return err + } + if parent.isDeleted() { + return syserror.ENOENT + } + name := rp.Component() + if name == "." || name == ".." { + return syserror.EEXIST + } + if len(name) > maxFilenameLen { + return syserror.ENAMETOOLONG + } + if !dir && rp.MustBeDir() { + return syserror.ENOENT + } + mnt := rp.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + parent.dirMu.Lock() + defer parent.dirMu.Unlock() + if fs.opts.interop == InteropModeShared { + // The existence of a dentry at name would be inconclusive because the + // file it represents may have been deleted from the remote filesystem, + // so we would need to make an RPC to revalidate the dentry. Just + // attempt the file creation RPC instead. If a file does exist, the RPC + // will fail with EEXIST like we would have. If the RPC succeeds, and a + // stale dentry exists, the dentry will fail revalidation next time + // it's used. + return create(parent, name) + } + if parent.vfsd.Child(name) != nil { + return syserror.EEXIST + } + // No cached dentry exists; however, there might still be an existing file + // at name. As above, we attempt the file creation RPC anyway. + if err := create(parent, name); err != nil { + return err + } + parent.touchCMtime(ctx) + delete(parent.negativeChildren, name) + parent.dirents = nil + return nil +} + +// Preconditions: !rp.Done(). +func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool) error { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + start := rp.Start().Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + // Get updated metadata for start as required by + // fs.walkParentDirLocked(). + if err := start.updateFromGetattr(ctx); err != nil { + return err + } + } + parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) + if err != nil { + return err + } + if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil { + return err + } + if err := rp.Mount().CheckBeginWrite(); err != nil { + return err + } + defer rp.Mount().EndWrite() + + name := rp.Component() + if dir { + if name == "." { + return syserror.EINVAL + } + if name == ".." { + return syserror.ENOTEMPTY + } + } else { + if name == "." || name == ".." { + return syserror.EISDIR + } + } + vfsObj := rp.VirtualFilesystem() + mntns := vfs.MountNamespaceFromContext(ctx) + parent.dirMu.Lock() + defer parent.dirMu.Unlock() + childVFSD := parent.vfsd.Child(name) + var child *dentry + // We only need a dentry representing the file at name if it can be a mount + // point. If childVFSD is nil, then it can't be a mount point. If childVFSD + // is non-nil but stale, the actual file can't be a mount point either; we + // detect this case by just speculatively calling PrepareDeleteDentry and + // only revalidating the dentry if that fails (indicating that the existing + // dentry is a mount point). + if childVFSD != nil { + child = childVFSD.Impl().(*dentry) + if err := vfsObj.PrepareDeleteDentry(mntns, childVFSD); err != nil { + child, err = fs.revalidateChildLocked(ctx, vfsObj, parent, name, childVFSD, &ds) + if err != nil { + return err + } + if child != nil { + childVFSD = &child.vfsd + if err := vfsObj.PrepareDeleteDentry(mntns, childVFSD); err != nil { + return err + } + } else { + childVFSD = nil + } + } + } else if _, ok := parent.negativeChildren[name]; ok { + return syserror.ENOENT + } + flags := uint32(0) + if dir { + if child != nil && !child.isDir() { + return syserror.ENOTDIR + } + flags = linux.AT_REMOVEDIR + } else { + if child != nil && child.isDir() { + return syserror.EISDIR + } + if rp.MustBeDir() { + return syserror.ENOTDIR + } + } + err = parent.file.unlinkAt(ctx, name, flags) + if err != nil { + if childVFSD != nil { + vfsObj.AbortDeleteDentry(childVFSD) + } + return err + } + if fs.opts.interop != InteropModeShared { + parent.touchCMtime(ctx) + parent.cacheNegativeChildLocked(name) + parent.dirents = nil + } + if child != nil { + child.setDeleted() + vfsObj.CommitDeleteDentry(childVFSD) + ds = appendDentry(ds, child) + } + return nil +} + +// renameMuRUnlockAndCheckCaching calls fs.renameMu.RUnlock(), then calls +// dentry.checkCachingLocked on all dentries in *ds with fs.renameMu locked for +// writing. +// +// ds is a pointer-to-pointer since defer evaluates its arguments immediately, +// but dentry slices are allocated lazily, and it's much easier to say "defer +// fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() { +// fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this. +func (fs *filesystem) renameMuRUnlockAndCheckCaching(ds **[]*dentry) { + fs.renameMu.RUnlock() + if *ds == nil { + return + } + if len(**ds) != 0 { + fs.renameMu.Lock() + for _, d := range **ds { + d.checkCachingLocked() + } + fs.renameMu.Unlock() + } + putDentrySlice(*ds) +} + +func (fs *filesystem) renameMuUnlockAndCheckCaching(ds **[]*dentry) { + if *ds == nil { + fs.renameMu.Unlock() + return + } + for _, d := range **ds { + d.checkCachingLocked() + } + fs.renameMu.Unlock() + putDentrySlice(*ds) +} + +// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. +func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return nil, err + } + if opts.CheckSearchable { + if !d.isDir() { + return nil, syserror.ENOTDIR + } + if err := d.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + return nil, err + } + } + d.IncRef() + return &d.vfsd, nil +} + +// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. +func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + start := rp.Start().Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + // Get updated metadata for start as required by + // fs.walkParentDirLocked(). + if err := start.updateFromGetattr(ctx); err != nil { + return nil, err + } + } + d, err := fs.walkParentDirLocked(ctx, rp, start, &ds) + if err != nil { + return nil, err + } + d.IncRef() + return &d.vfsd, nil +} + +// LinkAt implements vfs.FilesystemImpl.LinkAt. +func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { + return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string) error { + if rp.Mount() != vd.Mount() { + return syserror.EXDEV + } + // 9P2000.L supports hard links, but we don't. + return syserror.EPERM + }) +} + +// MkdirAt implements vfs.FilesystemImpl.MkdirAt. +func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { + return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, name string) error { + creds := rp.Credentials() + _, err := parent.file.mkdir(ctx, name, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) + return err + }) +} + +// MknodAt implements vfs.FilesystemImpl.MknodAt. +func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { + return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string) error { + creds := rp.Credentials() + _, err := parent.file.mknod(ctx, name, (p9.FileMode)(opts.Mode), opts.DevMajor, opts.DevMinor, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) + return err + }) +} + +// OpenAt implements vfs.FilesystemImpl.OpenAt. +func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + // Reject O_TMPFILE, which is not supported; supporting it correctly in the + // presence of other remote filesystem users requires remote filesystem + // support, and it isn't clear that there's any way to implement this in + // 9P. + if opts.Flags&linux.O_TMPFILE != 0 { + return nil, syserror.EOPNOTSUPP + } + mayCreate := opts.Flags&linux.O_CREAT != 0 + mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL) + + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + + start := rp.Start().Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + // Get updated metadata for start as required by fs.stepLocked(). + if err := start.updateFromGetattr(ctx); err != nil { + return nil, err + } + } + if rp.Done() { + return start.openLocked(ctx, rp, opts.Flags) + } + +afterTrailingSymlink: + parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) + if err != nil { + return nil, err + } + // Check for search permission in the parent directory. + if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + return nil, err + } + // Determine whether or not we need to create a file. + parent.dirMu.Lock() + child, err := fs.stepLocked(ctx, rp, parent, &ds) + if err == syserror.ENOENT && mayCreate { + fd, err := parent.createAndOpenChildLocked(ctx, rp, &opts) + parent.dirMu.Unlock() + return fd, err + } + if err != nil { + parent.dirMu.Unlock() + return nil, err + } + // Open existing child or follow symlink. + parent.dirMu.Unlock() + if mustCreate { + return nil, syserror.EEXIST + } + if child.isSymlink() && rp.ShouldFollowSymlink() { + target, err := child.readlink(ctx, rp.Mount()) + if err != nil { + return nil, err + } + if err := rp.HandleSymlink(target); err != nil { + return nil, err + } + start = parent + goto afterTrailingSymlink + } + return child.openLocked(ctx, rp, opts.Flags) +} + +// Preconditions: fs.renameMu must be locked. +func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, flags uint32) (*vfs.FileDescription, error) { + ats := vfs.AccessTypesForOpenFlags(flags) + if err := d.checkPermissions(rp.Credentials(), ats, d.isDir()); err != nil { + return nil, err + } + mnt := rp.Mount() + filetype := d.fileType() + switch { + case filetype == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD: + if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, flags&linux.O_TRUNC != 0); err != nil { + return nil, err + } + fd := ®ularFileFD{} + if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{ + AllowDirectIO: true, + }); err != nil { + return nil, err + } + return &fd.vfsfd, nil + case filetype == linux.S_IFDIR: + // Can't open directories with O_CREAT. + if flags&linux.O_CREAT != 0 { + return nil, syserror.EISDIR + } + // Can't open directories writably. + if ats&vfs.MayWrite != 0 { + return nil, syserror.EISDIR + } + if flags&linux.O_DIRECT != 0 { + return nil, syserror.EINVAL + } + if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, false /* write */, false /* trunc */); err != nil { + return nil, err + } + fd := &directoryFD{} + if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + return nil, err + } + return &fd.vfsfd, nil + case filetype == linux.S_IFLNK: + // Can't open symlinks without O_PATH (which is unimplemented). + return nil, syserror.ELOOP + default: + if flags&linux.O_DIRECT != 0 { + return nil, syserror.EINVAL + } + h, err := openHandle(ctx, d.file, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, flags&linux.O_TRUNC != 0) + if err != nil { + return nil, err + } + fd := &specialFileFD{ + handle: h, + } + if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + h.close(ctx) + return nil, err + } + return &fd.vfsfd, nil + } +} + +// Preconditions: d.fs.renameMu must be locked. d.dirMu must be locked. +func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { + if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil { + return nil, err + } + if d.isDeleted() { + return nil, syserror.ENOENT + } + mnt := rp.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return nil, err + } + defer mnt.EndWrite() + + // 9P2000.L's lcreate takes a fid representing the parent directory, and + // converts it into an open fid representing the created file, so we need + // to duplicate the directory fid first. + _, dirfile, err := d.file.walk(ctx, nil) + if err != nil { + return nil, err + } + creds := rp.Credentials() + name := rp.Component() + fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, (p9.OpenFlags)(opts.Flags), (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) + if err != nil { + dirfile.close(ctx) + return nil, err + } + // Then we need to walk to the file we just created to get a non-open fid + // representing it, and to get its metadata. This must use d.file since, as + // explained above, dirfile was invalidated by dirfile.Create(). + walkQID, nonOpenFile, attrMask, attr, err := d.file.walkGetAttrOne(ctx, name) + if err != nil { + openFile.close(ctx) + if fdobj != nil { + fdobj.Close() + } + return nil, err + } + // Sanity-check that we walked to the file we created. + if createQID.Path != walkQID.Path { + // Probably due to concurrent remote filesystem mutation? + ctx.Warningf("gofer.dentry.createAndOpenChildLocked: created file has QID %v before walk, QID %v after (interop=%v)", createQID, walkQID, d.fs.opts.interop) + nonOpenFile.close(ctx) + openFile.close(ctx) + if fdobj != nil { + fdobj.Close() + } + return nil, syserror.EAGAIN + } + + // Construct the new dentry. + child, err := d.fs.newDentry(ctx, nonOpenFile, createQID, attrMask, &attr) + if err != nil { + nonOpenFile.close(ctx) + openFile.close(ctx) + if fdobj != nil { + fdobj.Close() + } + return nil, err + } + // Incorporate the fid that was opened by lcreate. + useRegularFileFD := child.fileType() == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD + if useRegularFileFD { + child.handleMu.Lock() + child.handle.file = openFile + if fdobj != nil { + child.handle.fd = int32(fdobj.Release()) + } + child.handleReadable = vfs.MayReadFileWithOpenFlags(opts.Flags) + child.handleWritable = vfs.MayWriteFileWithOpenFlags(opts.Flags) + child.handleMu.Unlock() + } + // Take a reference on the new dentry to be held by the new file + // description. (This reference also means that the new dentry is not + // eligible for caching yet, so we don't need to append to a dentry slice.) + child.refs = 1 + // Insert the dentry into the tree. + d.IncRef() // reference held by child on its parent d + d.vfsd.InsertChild(&child.vfsd, name) + if d.fs.opts.interop != InteropModeShared { + d.touchCMtime(ctx) + delete(d.negativeChildren, name) + d.dirents = nil + } + + // Finally, construct a file description representing the created file. + var childVFSFD *vfs.FileDescription + mnt.IncRef() + if useRegularFileFD { + fd := ®ularFileFD{} + if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{ + AllowDirectIO: true, + }); err != nil { + return nil, err + } + childVFSFD = &fd.vfsfd + } else { + fd := &specialFileFD{ + handle: handle{ + file: openFile, + fd: -1, + }, + } + if fdobj != nil { + fd.handle.fd = int32(fdobj.Release()) + } + if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + fd.handle.close(ctx) + return nil, err + } + childVFSFD = &fd.vfsfd + } + return childVFSFD, nil +} + +// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. +func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return "", err + } + if !d.isSymlink() { + return "", syserror.EINVAL + } + return d.readlink(ctx, rp.Mount()) +} + +// RenameAt implements vfs.FilesystemImpl.RenameAt. +func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { + if opts.Flags != 0 { + // Requires 9P support. + return syserror.EINVAL + } + + var ds *[]*dentry + fs.renameMu.Lock() + defer fs.renameMuUnlockAndCheckCaching(&ds) + newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds) + if err != nil { + return err + } + newName := rp.Component() + if newName == "." || newName == ".." { + return syserror.EBUSY + } + mnt := rp.Mount() + if mnt != oldParentVD.Mount() { + return syserror.EXDEV + } + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + + oldParent := oldParentVD.Dentry().Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + if err := oldParent.updateFromGetattr(ctx); err != nil { + return err + } + } + if err := oldParent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil { + return err + } + vfsObj := rp.VirtualFilesystem() + // We need a dentry representing the renamed file since, if it's a + // directory, we need to check for write permission on it. + oldParent.dirMu.Lock() + defer oldParent.dirMu.Unlock() + renamed, err := fs.revalidateChildLocked(ctx, vfsObj, oldParent, oldName, oldParent.vfsd.Child(oldName), &ds) + if err != nil { + return err + } + if renamed == nil { + return syserror.ENOENT + } + if renamed.isDir() { + if renamed == newParent || renamed.vfsd.IsAncestorOf(&newParent.vfsd) { + return syserror.EINVAL + } + if oldParent != newParent { + if err := renamed.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil { + return err + } + } + } else { + if opts.MustBeDir || rp.MustBeDir() { + return syserror.ENOTDIR + } + } + + if oldParent != newParent { + if err := newParent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil { + return err + } + newParent.dirMu.Lock() + defer newParent.dirMu.Unlock() + } + if newParent.isDeleted() { + return syserror.ENOENT + } + replacedVFSD := newParent.vfsd.Child(newName) + var replaced *dentry + // This is similar to unlinkAt, except: + // + // - We revalidate the replaced dentry unconditionally for simplicity. + // + // - If rp.MustBeDir(), then we need a dentry representing the replaced + // file regardless to confirm that it's a directory. + if replacedVFSD != nil || rp.MustBeDir() { + replaced, err = fs.revalidateChildLocked(ctx, vfsObj, newParent, newName, replacedVFSD, &ds) + if err != nil { + return err + } + if replaced != nil { + if replaced.isDir() { + if !renamed.isDir() { + return syserror.EISDIR + } + } else { + if rp.MustBeDir() || renamed.isDir() { + return syserror.ENOTDIR + } + } + replacedVFSD = &replaced.vfsd + } else { + replacedVFSD = nil + } + } + + if oldParent == newParent && oldName == newName { + return nil + } + if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), &renamed.vfsd, replacedVFSD); err != nil { + return err + } + if err := renamed.file.rename(ctx, newParent.file, newName); err != nil { + vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD) + return err + } + if fs.opts.interop != InteropModeShared { + oldParent.cacheNegativeChildLocked(oldName) + oldParent.dirents = nil + delete(newParent.negativeChildren, newName) + newParent.dirents = nil + } + vfsObj.CommitRenameReplaceDentry(&renamed.vfsd, &newParent.vfsd, newName, replacedVFSD) + return nil +} + +// RmdirAt implements vfs.FilesystemImpl.RmdirAt. +func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { + return fs.unlinkAt(ctx, rp, true /* dir */) +} + +// SetStatAt implements vfs.FilesystemImpl.SetStatAt. +func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return err + } + return d.setStat(ctx, rp.Credentials(), &opts.Stat, rp.Mount()) +} + +// StatAt implements vfs.FilesystemImpl.StatAt. +func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return linux.Statx{}, err + } + // Since walking updates metadata for all traversed dentries under + // InteropModeShared, including the returned one, we can return cached + // metadata here regardless of fs.opts.interop. + var stat linux.Statx + d.statTo(&stat) + return stat, nil +} + +// StatFSAt implements vfs.FilesystemImpl.StatFSAt. +func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return linux.Statfs{}, err + } + fsstat, err := d.file.statFS(ctx) + if err != nil { + return linux.Statfs{}, err + } + nameLen := uint64(fsstat.NameLength) + if nameLen > maxFilenameLen { + nameLen = maxFilenameLen + } + return linux.Statfs{ + // This is primarily for distinguishing a gofer file system in + // tests. Testing is important, so instead of defining + // something completely random, use a standard value. + Type: linux.V9FS_MAGIC, + BlockSize: int64(fsstat.BlockSize), + Blocks: fsstat.Blocks, + BlocksFree: fsstat.BlocksFree, + BlocksAvailable: fsstat.BlocksAvailable, + Files: fsstat.Files, + FilesFree: fsstat.FilesFree, + NameLength: nameLen, + }, nil +} + +// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. +func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { + return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string) error { + creds := rp.Credentials() + _, err := parent.file.symlink(ctx, target, name, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) + return err + }) +} + +// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. +func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { + return fs.unlinkAt(ctx, rp, false /* dir */) +} + +// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. +func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return nil, err + } + return d.listxattr(ctx) +} + +// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. +func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return "", err + } + return d.getxattr(ctx, name) +} + +// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt. +func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return err + } + return d.setxattr(ctx, &opts) +} + +// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. +func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return err + } + return d.removexattr(ctx, name) +} + +// PrependPath implements vfs.FilesystemImpl.PrependPath. +func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { + fs.renameMu.RLock() + defer fs.renameMu.RUnlock() + return vfs.GenericPrependPath(vfsroot, vd, b) +} diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go new file mode 100644 index 000000000..d0552bd99 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -0,0 +1,1147 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package gofer provides a filesystem implementation that is backed by a 9p +// server, interchangably referred to as "gofers" throughout this package. +// +// Lock order: +// regularFileFD/directoryFD.mu +// filesystem.renameMu +// dentry.dirMu +// filesystem.syncMu +// dentry.metadataMu +// *** "memmap.Mappable locks" below this point +// dentry.mapsMu +// *** "memmap.Mappable locks taken by Translate" below this point +// dentry.handleMu +// dentry.dataMu +// +// Locking dentry.dirMu in multiple dentries requires holding +// filesystem.renameMu for writing. +package gofer + +import ( + "fmt" + "strconv" + "sync" + "sync/atomic" + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/usermem" +) + +// FilesystemType implements vfs.FilesystemType. +type FilesystemType struct{} + +// filesystem implements vfs.FilesystemImpl. +type filesystem struct { + vfsfs vfs.Filesystem + + // mfp is used to allocate memory that caches regular file contents. mfp is + // immutable. + mfp pgalloc.MemoryFileProvider + + // Immutable options. + opts filesystemOptions + + // client is the client used by this filesystem. client is immutable. + client *p9.Client + + // uid and gid are the effective KUID and KGID of the filesystem's creator, + // and are used as the owner and group for files that don't specify one. + // uid and gid are immutable. + uid auth.KUID + gid auth.KGID + + // renameMu serves two purposes: + // + // - It synchronizes path resolution with renaming initiated by this + // client. + // + // - It is held by path resolution to ensure that reachable dentries remain + // valid. A dentry is reachable by path resolution if it has a non-zero + // reference count (such that it is usable as vfs.ResolvingPath.Start() or + // is reachable from its children), or if it is a child dentry (such that + // it is reachable from its parent). + renameMu sync.RWMutex + + // cachedDentries contains all dentries with 0 references. (Due to race + // conditions, it may also contain dentries with non-zero references.) + // cachedDentriesLen is the number of dentries in cachedDentries. These + // fields are protected by renameMu. + cachedDentries dentryList + cachedDentriesLen uint64 + + // dentries contains all dentries in this filesystem. specialFileFDs + // contains all open specialFileFDs. These fields are protected by syncMu. + syncMu sync.Mutex + dentries map[*dentry]struct{} + specialFileFDs map[*specialFileFD]struct{} +} + +type filesystemOptions struct { + // "Standard" 9P options. + fd int + aname string + interop InteropMode // derived from the "cache" mount option + msize uint32 + version string + + // maxCachedDentries is the maximum number of dentries with 0 references + // retained by the client. + maxCachedDentries uint64 + + // If forcePageCache is true, host FDs may not be used for application + // memory mappings even if available; instead, the client must perform its + // own caching of regular file pages. This is primarily useful for testing. + forcePageCache bool + + // If limitHostFDTranslation is true, apply maxFillRange() constraints to + // host FD mappings returned by dentry.(memmap.Mappable).Translate(). This + // makes memory accounting behavior more consistent between cases where + // host FDs are / are not available, but may increase the frequency of + // sentry-handled page faults on files for which a host FD is available. + limitHostFDTranslation bool + + // If overlayfsStaleRead is true, O_RDONLY host FDs provided by the remote + // filesystem may not be coherent with writable host FDs opened later, so + // mappings of the former must be replaced by mappings of the latter. This + // is usually only the case when the remote filesystem is an overlayfs + // mount on Linux < 4.19. + overlayfsStaleRead bool + + // If regularFilesUseSpecialFileFD is true, application FDs representing + // regular files will use distinct file handles for each FD, in the same + // way that application FDs representing "special files" such as sockets + // do. Note that this disables client caching and mmap for regular files. + regularFilesUseSpecialFileFD bool +} + +// InteropMode controls the client's interaction with other remote filesystem +// users. +type InteropMode uint32 + +const ( + // InteropModeExclusive is appropriate when the filesystem client is the + // only user of the remote filesystem. + // + // - The client may cache arbitrary filesystem state (file data, metadata, + // filesystem structure, etc.). + // + // - Client changes to filesystem state may be sent to the remote + // filesystem asynchronously, except when server permission checks are + // necessary. + // + // - File timestamps are based on client clocks. This ensures that users of + // the client observe timestamps that are coherent with their own clocks + // and consistent with Linux's semantics. However, since it is not always + // possible for clients to set arbitrary atimes and mtimes, and never + // possible for clients to set arbitrary ctimes, file timestamp changes are + // stored in the client only and never sent to the remote filesystem. + InteropModeExclusive InteropMode = iota + + // InteropModeWritethrough is appropriate when there are read-only users of + // the remote filesystem that expect to observe changes made by the + // filesystem client. + // + // - The client may cache arbitrary filesystem state. + // + // - Client changes to filesystem state must be sent to the remote + // filesystem synchronously. + // + // - File timestamps are based on client clocks. As a corollary, access + // timestamp changes from other remote filesystem users will not be visible + // to the client. + InteropModeWritethrough + + // InteropModeShared is appropriate when there are users of the remote + // filesystem that may mutate its state other than the client. + // + // - The client must verify cached filesystem state before using it. + // + // - Client changes to filesystem state must be sent to the remote + // filesystem synchronously. + // + // - File timestamps are based on server clocks. This is necessary to + // ensure that timestamp changes are synchronized between remote filesystem + // users. + // + // Note that the correctness of InteropModeShared depends on the server + // correctly implementing 9P fids (i.e. each fid immutably represents a + // single filesystem object), even in the presence of remote filesystem + // mutations from other users. If this is violated, the behavior of the + // client is undefined. + InteropModeShared +) + +// GetFilesystem implements vfs.FilesystemType.GetFilesystem. +func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { + mfp := pgalloc.MemoryFileProviderFromContext(ctx) + if mfp == nil { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: context does not provide a pgalloc.MemoryFileProvider") + return nil, nil, syserror.EINVAL + } + + mopts := vfs.GenericParseMountOptions(opts.Data) + var fsopts filesystemOptions + + // Check that the transport is "fd". + trans, ok := mopts["trans"] + if !ok { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: transport must be specified as 'trans=fd'") + return nil, nil, syserror.EINVAL + } + delete(mopts, "trans") + if trans != "fd" { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: unsupported transport: trans=%s", trans) + return nil, nil, syserror.EINVAL + } + + // Check that read and write FDs are provided and identical. + rfdstr, ok := mopts["rfdno"] + if !ok { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: read FD must be specified as 'rfdno=<file descriptor>") + return nil, nil, syserror.EINVAL + } + delete(mopts, "rfdno") + rfd, err := strconv.Atoi(rfdstr) + if err != nil { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid read FD: rfdno=%s", rfdstr) + return nil, nil, syserror.EINVAL + } + wfdstr, ok := mopts["wfdno"] + if !ok { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: write FD must be specified as 'wfdno=<file descriptor>") + return nil, nil, syserror.EINVAL + } + delete(mopts, "wfdno") + wfd, err := strconv.Atoi(wfdstr) + if err != nil { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid write FD: wfdno=%s", wfdstr) + return nil, nil, syserror.EINVAL + } + if rfd != wfd { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: read FD (%d) and write FD (%d) must be equal", rfd, wfd) + return nil, nil, syserror.EINVAL + } + fsopts.fd = rfd + + // Get the attach name. + fsopts.aname = "/" + if aname, ok := mopts["aname"]; ok { + delete(mopts, "aname") + fsopts.aname = aname + } + + // Parse the cache policy. For historical reasons, this defaults to the + // least generally-applicable option, InteropModeExclusive. + fsopts.interop = InteropModeExclusive + if cache, ok := mopts["cache"]; ok { + delete(mopts, "cache") + switch cache { + case "fscache": + fsopts.interop = InteropModeExclusive + case "fscache_writethrough": + fsopts.interop = InteropModeWritethrough + case "none": + fsopts.regularFilesUseSpecialFileFD = true + fallthrough + case "remote_revalidating": + fsopts.interop = InteropModeShared + default: + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid cache policy: cache=%s", cache) + return nil, nil, syserror.EINVAL + } + } + + // Parse the 9P message size. + fsopts.msize = 1024 * 1024 // 1M, tested to give good enough performance up to 64M + if msizestr, ok := mopts["msize"]; ok { + delete(mopts, "msize") + msize, err := strconv.ParseUint(msizestr, 10, 32) + if err != nil { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid message size: msize=%s", msizestr) + return nil, nil, syserror.EINVAL + } + fsopts.msize = uint32(msize) + } + + // Parse the 9P protocol version. + fsopts.version = p9.HighestVersionString() + if version, ok := mopts["version"]; ok { + delete(mopts, "version") + fsopts.version = version + } + + // Parse the dentry cache limit. + fsopts.maxCachedDentries = 1000 + if str, ok := mopts["dentry_cache_limit"]; ok { + delete(mopts, "dentry_cache_limit") + maxCachedDentries, err := strconv.ParseUint(str, 10, 64) + if err != nil { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str) + return nil, nil, syserror.EINVAL + } + fsopts.maxCachedDentries = maxCachedDentries + } + + // Handle simple flags. + if _, ok := mopts["force_page_cache"]; ok { + delete(mopts, "force_page_cache") + fsopts.forcePageCache = true + } + if _, ok := mopts["limit_host_fd_translation"]; ok { + delete(mopts, "limit_host_fd_translation") + fsopts.limitHostFDTranslation = true + } + if _, ok := mopts["overlayfs_stale_read"]; ok { + delete(mopts, "overlayfs_stale_read") + fsopts.overlayfsStaleRead = true + } + // fsopts.regularFilesUseSpecialFileFD can only be enabled by specifying + // "cache=none". + + // Check for unparsed options. + if len(mopts) != 0 { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: unknown options: %v", mopts) + return nil, nil, syserror.EINVAL + } + + // Establish a connection with the server. + conn, err := unet.NewSocket(fsopts.fd) + if err != nil { + return nil, nil, err + } + + // Perform version negotiation with the server. + ctx.UninterruptibleSleepStart(false) + client, err := p9.NewClient(conn, fsopts.msize, fsopts.version) + ctx.UninterruptibleSleepFinish(false) + if err != nil { + conn.Close() + return nil, nil, err + } + // Ownership of conn has been transferred to client. + + // Perform attach to obtain the filesystem root. + ctx.UninterruptibleSleepStart(false) + attached, err := client.Attach(fsopts.aname) + ctx.UninterruptibleSleepFinish(false) + if err != nil { + client.Close() + return nil, nil, err + } + attachFile := p9file{attached} + qid, attrMask, attr, err := attachFile.getAttr(ctx, dentryAttrMask()) + if err != nil { + attachFile.close(ctx) + client.Close() + return nil, nil, err + } + + // Construct the filesystem object. + fs := &filesystem{ + mfp: mfp, + opts: fsopts, + uid: creds.EffectiveKUID, + gid: creds.EffectiveKGID, + client: client, + dentries: make(map[*dentry]struct{}), + specialFileFDs: make(map[*specialFileFD]struct{}), + } + fs.vfsfs.Init(vfsObj, fs) + + // Construct the root dentry. + root, err := fs.newDentry(ctx, attachFile, qid, attrMask, &attr) + if err != nil { + attachFile.close(ctx) + fs.vfsfs.DecRef() + return nil, nil, err + } + // Set the root's reference count to 2. One reference is returned to the + // caller, and the other is deliberately leaked to prevent the root from + // being "cached" and subsequently evicted. Its resources will still be + // cleaned up by fs.Release(). + root.refs = 2 + + return &fs.vfsfs, &root.vfsd, nil +} + +// Release implements vfs.FilesystemImpl.Release. +func (fs *filesystem) Release() { + ctx := context.Background() + mf := fs.mfp.MemoryFile() + + fs.syncMu.Lock() + for d := range fs.dentries { + d.handleMu.Lock() + d.dataMu.Lock() + if d.handleWritable { + // Write dirty cached data to the remote file. + if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt); err != nil { + log.Warningf("gofer.filesystem.Release: failed to flush dentry: %v", err) + } + // TODO(jamieliu): Do we need to flushf/fsync d? + } + // Discard cached pages. + d.cache.DropAll(mf) + d.dirty.RemoveAll() + d.dataMu.Unlock() + // Close the host fd if one exists. + if d.handle.fd >= 0 { + syscall.Close(int(d.handle.fd)) + d.handle.fd = -1 + } + d.handleMu.Unlock() + } + // There can't be any specialFileFDs still using fs, since each such + // FileDescription would hold a reference on a Mount holding a reference on + // fs. + fs.syncMu.Unlock() + + // Close the connection to the server. This implicitly clunks all fids. + fs.client.Close() +} + +// dentry implements vfs.DentryImpl. +type dentry struct { + vfsd vfs.Dentry + + // refs is the reference count. Each dentry holds a reference on its + // parent, even if disowned. refs is accessed using atomic memory + // operations. + refs int64 + + // fs is the owning filesystem. fs is immutable. + fs *filesystem + + // We don't support hard links, so each dentry maps 1:1 to an inode. + + // file is the unopened p9.File that backs this dentry. file is immutable. + file p9file + + // If deleted is non-zero, the file represented by this dentry has been + // deleted. deleted is accessed using atomic memory operations. + deleted uint32 + + // If cached is true, dentryEntry links dentry into + // filesystem.cachedDentries. cached and dentryEntry are protected by + // filesystem.renameMu. + cached bool + dentryEntry + + dirMu sync.Mutex + + // If this dentry represents a directory, and InteropModeShared is not in + // effect, negativeChildren is a set of child names in this directory that + // are known not to exist. negativeChildren is protected by dirMu. + negativeChildren map[string]struct{} + + // If this dentry represents a directory, InteropModeShared is not in + // effect, and dirents is not nil, it is a cache of all entries in the + // directory, in the order they were returned by the server. dirents is + // protected by dirMu. + dirents []vfs.Dirent + + // Cached metadata; protected by metadataMu and accessed using atomic + // memory operations unless otherwise specified. + metadataMu sync.Mutex + ino uint64 // immutable + mode uint32 // type is immutable, perms are mutable + uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic + gid uint32 // auth.KGID, but ... + blockSize uint32 // 0 if unknown + // Timestamps, all nsecs from the Unix epoch. + atime int64 + mtime int64 + ctime int64 + btime int64 + // File size, protected by both metadataMu and dataMu (i.e. both must be + // locked to mutate it). + size uint64 + + mapsMu sync.Mutex + + // If this dentry represents a regular file, mappings tracks mappings of + // the file into memmap.MappingSpaces. mappings is protected by mapsMu. + mappings memmap.MappingSet + + // If this dentry represents a regular file or directory: + // + // - handle is the I/O handle used by all regularFileFDs/directoryFDs + // representing this dentry. + // + // - handleReadable is true if handle is readable. + // + // - handleWritable is true if handle is writable. + // + // Invariants: + // + // - If handleReadable == handleWritable == false, then handle.file == nil + // (i.e. there is no open handle). Conversely, if handleReadable || + // handleWritable == true, then handle.file != nil (i.e. there is an open + // handle). + // + // - handleReadable and handleWritable cannot transition from true to false + // (i.e. handles may not be downgraded). + // + // These fields are protected by handleMu. + handleMu sync.RWMutex + handle handle + handleReadable bool + handleWritable bool + + dataMu sync.RWMutex + + // If this dentry represents a regular file that is client-cached, cache + // maps offsets into the cached file to offsets into + // filesystem.mfp.MemoryFile() that store the file's data. cache is + // protected by dataMu. + cache fsutil.FileRangeSet + + // If this dentry represents a regular file that is client-cached, dirty + // tracks dirty segments in cache. dirty is protected by dataMu. + dirty fsutil.DirtySet + + // pf implements platform.File for mappings of handle.fd. + pf dentryPlatformFile + + // If this dentry represents a symbolic link, InteropModeShared is not in + // effect, and haveTarget is true, target is the symlink target. haveTarget + // and target are protected by dataMu. + haveTarget bool + target string +} + +// dentryAttrMask returns a p9.AttrMask enabling all attributes used by the +// gofer client. +func dentryAttrMask() p9.AttrMask { + return p9.AttrMask{ + Mode: true, + UID: true, + GID: true, + ATime: true, + MTime: true, + CTime: true, + Size: true, + BTime: true, + } +} + +// newDentry creates a new dentry representing the given file. The dentry +// initially has no references, but is not cached; it is the caller's +// responsibility to set the dentry's reference count and/or call +// dentry.checkCachingLocked() as appropriate. +func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, mask p9.AttrMask, attr *p9.Attr) (*dentry, error) { + if !mask.Mode { + ctx.Warningf("can't create gofer.dentry without file type") + return nil, syserror.EIO + } + if attr.Mode.FileType() == p9.ModeRegular && !mask.Size { + ctx.Warningf("can't create regular file gofer.dentry without file size") + return nil, syserror.EIO + } + + d := &dentry{ + fs: fs, + file: file, + ino: qid.Path, + mode: uint32(attr.Mode), + uid: uint32(fs.uid), + gid: uint32(fs.gid), + blockSize: usermem.PageSize, + handle: handle{ + fd: -1, + }, + } + d.pf.dentry = d + if mask.UID { + d.uid = uint32(attr.UID) + } + if mask.GID { + d.gid = uint32(attr.GID) + } + if mask.Size { + d.size = attr.Size + } + if attr.BlockSize != 0 { + d.blockSize = uint32(attr.BlockSize) + } + if mask.ATime { + d.atime = dentryTimestampFromP9(attr.ATimeSeconds, attr.ATimeNanoSeconds) + } + if mask.MTime { + d.mtime = dentryTimestampFromP9(attr.MTimeSeconds, attr.MTimeNanoSeconds) + } + if mask.CTime { + d.ctime = dentryTimestampFromP9(attr.CTimeSeconds, attr.CTimeNanoSeconds) + } + if mask.BTime { + d.btime = dentryTimestampFromP9(attr.BTimeSeconds, attr.BTimeNanoSeconds) + } + d.vfsd.Init(d) + + fs.syncMu.Lock() + fs.dentries[d] = struct{}{} + fs.syncMu.Unlock() + return d, nil +} + +// updateFromP9Attrs is called to update d's metadata after an update from the +// remote filesystem. +func (d *dentry) updateFromP9Attrs(mask p9.AttrMask, attr *p9.Attr) { + d.metadataMu.Lock() + if mask.Mode { + if got, want := uint32(attr.Mode.FileType()), d.fileType(); got != want { + d.metadataMu.Unlock() + panic(fmt.Sprintf("gofer.dentry file type changed from %#o to %#o", want, got)) + } + atomic.StoreUint32(&d.mode, uint32(attr.Mode)) + } + if mask.UID { + atomic.StoreUint32(&d.uid, uint32(attr.UID)) + } + if mask.GID { + atomic.StoreUint32(&d.gid, uint32(attr.GID)) + } + // There is no P9_GETATTR_* bit for I/O block size. + if attr.BlockSize != 0 { + atomic.StoreUint32(&d.blockSize, uint32(attr.BlockSize)) + } + if mask.ATime { + atomic.StoreInt64(&d.atime, dentryTimestampFromP9(attr.ATimeSeconds, attr.ATimeNanoSeconds)) + } + if mask.MTime { + atomic.StoreInt64(&d.mtime, dentryTimestampFromP9(attr.MTimeSeconds, attr.MTimeNanoSeconds)) + } + if mask.CTime { + atomic.StoreInt64(&d.ctime, dentryTimestampFromP9(attr.CTimeSeconds, attr.CTimeNanoSeconds)) + } + if mask.BTime { + atomic.StoreInt64(&d.btime, dentryTimestampFromP9(attr.BTimeSeconds, attr.BTimeNanoSeconds)) + } + if mask.Size { + d.dataMu.Lock() + atomic.StoreUint64(&d.size, attr.Size) + d.dataMu.Unlock() + } + d.metadataMu.Unlock() +} + +func (d *dentry) updateFromGetattr(ctx context.Context) error { + // Use d.handle.file, which represents a 9P fid that has been opened, in + // preference to d.file, which represents a 9P fid that has not. This may + // be significantly more efficient in some implementations. + var ( + file p9file + handleMuRLocked bool + ) + d.handleMu.RLock() + if !d.handle.file.isNil() { + file = d.handle.file + handleMuRLocked = true + } else { + file = d.file + d.handleMu.RUnlock() + } + _, attrMask, attr, err := file.getAttr(ctx, dentryAttrMask()) + if handleMuRLocked { + d.handleMu.RUnlock() + } + if err != nil { + return err + } + d.updateFromP9Attrs(attrMask, &attr) + return nil +} + +func (d *dentry) fileType() uint32 { + return atomic.LoadUint32(&d.mode) & linux.S_IFMT +} + +func (d *dentry) statTo(stat *linux.Statx) { + stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME | linux.STATX_INO | linux.STATX_SIZE | linux.STATX_BLOCKS | linux.STATX_BTIME + stat.Blksize = atomic.LoadUint32(&d.blockSize) + stat.Nlink = 1 + if d.isDir() { + stat.Nlink = 2 + } + stat.UID = atomic.LoadUint32(&d.uid) + stat.GID = atomic.LoadUint32(&d.gid) + stat.Mode = uint16(atomic.LoadUint32(&d.mode)) + stat.Ino = d.ino + stat.Size = atomic.LoadUint64(&d.size) + // This is consistent with regularFileFD.Seek(), which treats regular files + // as having no holes. + stat.Blocks = (stat.Size + 511) / 512 + stat.Atime = statxTimestampFromDentry(atomic.LoadInt64(&d.atime)) + stat.Btime = statxTimestampFromDentry(atomic.LoadInt64(&d.btime)) + stat.Ctime = statxTimestampFromDentry(atomic.LoadInt64(&d.ctime)) + stat.Mtime = statxTimestampFromDentry(atomic.LoadInt64(&d.mtime)) + // TODO(jamieliu): device number +} + +func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *linux.Statx, mnt *vfs.Mount) error { + if stat.Mask == 0 { + return nil + } + if stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_SIZE) != 0 { + return syserror.EPERM + } + if err := vfs.CheckSetStat(creds, stat, uint16(atomic.LoadUint32(&d.mode))&^linux.S_IFMT, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil { + return err + } + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + setLocalAtime := false + setLocalMtime := false + if d.fs.opts.interop != InteropModeShared { + // Timestamp updates will be handled locally. + setLocalAtime = stat.Mask&linux.STATX_ATIME != 0 + setLocalMtime = stat.Mask&linux.STATX_MTIME != 0 + stat.Mask &^= linux.STATX_ATIME | linux.STATX_MTIME + if !setLocalMtime && (stat.Mask&linux.STATX_SIZE != 0) { + // Truncate updates mtime. + setLocalMtime = true + stat.Mtime.Nsec = linux.UTIME_NOW + } + } + d.metadataMu.Lock() + defer d.metadataMu.Unlock() + if stat.Mask != 0 { + if err := d.file.setAttr(ctx, p9.SetAttrMask{ + Permissions: stat.Mask&linux.STATX_MODE != 0, + UID: stat.Mask&linux.STATX_UID != 0, + GID: stat.Mask&linux.STATX_GID != 0, + Size: stat.Mask&linux.STATX_SIZE != 0, + ATime: stat.Mask&linux.STATX_ATIME != 0, + MTime: stat.Mask&linux.STATX_MTIME != 0, + ATimeNotSystemTime: stat.Atime.Nsec != linux.UTIME_NOW, + MTimeNotSystemTime: stat.Mtime.Nsec != linux.UTIME_NOW, + }, p9.SetAttr{ + Permissions: p9.FileMode(stat.Mode), + UID: p9.UID(stat.UID), + GID: p9.GID(stat.GID), + Size: stat.Size, + ATimeSeconds: uint64(stat.Atime.Sec), + ATimeNanoSeconds: uint64(stat.Atime.Nsec), + MTimeSeconds: uint64(stat.Mtime.Sec), + MTimeNanoSeconds: uint64(stat.Mtime.Nsec), + }); err != nil { + return err + } + } + if d.fs.opts.interop == InteropModeShared { + // There's no point to updating d's metadata in this case since it'll + // be overwritten by revalidation before the next time it's used + // anyway. (InteropModeShared inhibits client caching of regular file + // data, so there's no cache to truncate either.) + return nil + } + now, haveNow := nowFromContext(ctx) + if !haveNow { + ctx.Warningf("gofer.dentry.setStat: current time not available") + } + if stat.Mask&linux.STATX_MODE != 0 { + atomic.StoreUint32(&d.mode, d.fileType()|uint32(stat.Mode)) + } + if stat.Mask&linux.STATX_UID != 0 { + atomic.StoreUint32(&d.uid, stat.UID) + } + if stat.Mask&linux.STATX_GID != 0 { + atomic.StoreUint32(&d.gid, stat.GID) + } + if setLocalAtime { + if stat.Atime.Nsec == linux.UTIME_NOW { + if haveNow { + atomic.StoreInt64(&d.atime, now) + } + } else { + atomic.StoreInt64(&d.atime, dentryTimestampFromStatx(stat.Atime)) + } + } + if setLocalMtime { + if stat.Mtime.Nsec == linux.UTIME_NOW { + if haveNow { + atomic.StoreInt64(&d.mtime, now) + } + } else { + atomic.StoreInt64(&d.mtime, dentryTimestampFromStatx(stat.Mtime)) + } + } + if haveNow { + atomic.StoreInt64(&d.ctime, now) + } + if stat.Mask&linux.STATX_SIZE != 0 { + d.dataMu.Lock() + oldSize := d.size + d.size = stat.Size + // d.dataMu must be unlocked to lock d.mapsMu and invalidate mappings + // below. This allows concurrent calls to Read/Translate/etc. These + // functions synchronize with truncation by refusing to use cache + // contents beyond the new d.size. (We are still holding d.metadataMu, + // so we can't race with Write or another truncate.) + d.dataMu.Unlock() + if d.size < oldSize { + oldpgend := pageRoundUp(oldSize) + newpgend := pageRoundUp(d.size) + if oldpgend != newpgend { + d.mapsMu.Lock() + d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{ + // Compare Linux's mm/truncate.c:truncate_setsize() => + // truncate_pagecache() => + // mm/memory.c:unmap_mapping_range(evencows=1). + InvalidatePrivate: true, + }) + d.mapsMu.Unlock() + } + // We are now guaranteed that there are no translations of + // truncated pages, and can remove them from the cache. Since + // truncated pages have been removed from the remote file, they + // should be dropped without being written back. + d.dataMu.Lock() + d.cache.Truncate(d.size, d.fs.mfp.MemoryFile()) + d.dirty.KeepClean(memmap.MappableRange{d.size, oldpgend}) + d.dataMu.Unlock() + } + } + return nil +} + +func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error { + return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&d.mode))&0777, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))) +} + +// IncRef implements vfs.DentryImpl.IncRef. +func (d *dentry) IncRef() { + // d.refs may be 0 if d.fs.renameMu is locked, which serializes against + // d.checkCachingLocked(). + atomic.AddInt64(&d.refs, 1) +} + +// TryIncRef implements vfs.DentryImpl.TryIncRef. +func (d *dentry) TryIncRef() bool { + for { + refs := atomic.LoadInt64(&d.refs) + if refs == 0 { + return false + } + if atomic.CompareAndSwapInt64(&d.refs, refs, refs+1) { + return true + } + } +} + +// DecRef implements vfs.DentryImpl.DecRef. +func (d *dentry) DecRef() { + if refs := atomic.AddInt64(&d.refs, -1); refs == 0 { + d.fs.renameMu.Lock() + d.checkCachingLocked() + d.fs.renameMu.Unlock() + } else if refs < 0 { + panic("gofer.dentry.DecRef() called without holding a reference") + } +} + +// checkCachingLocked should be called after d's reference count becomes 0 or it +// becomes disowned. +// +// Preconditions: d.fs.renameMu must be locked for writing. +func (d *dentry) checkCachingLocked() { + // Dentries with a non-zero reference count must be retained. (The only way + // to obtain a reference on a dentry with zero references is via path + // resolution, which requires renameMu, so if d.refs is zero then it will + // remain zero while we hold renameMu for writing.) + if atomic.LoadInt64(&d.refs) != 0 { + if d.cached { + d.fs.cachedDentries.Remove(d) + d.fs.cachedDentriesLen-- + d.cached = false + } + return + } + // Non-child dentries with zero references are no longer reachable by path + // resolution and should be dropped immediately. + if d.vfsd.Parent() == nil || d.vfsd.IsDisowned() { + if d.cached { + d.fs.cachedDentries.Remove(d) + d.fs.cachedDentriesLen-- + d.cached = false + } + d.destroyLocked() + return + } + // If d is already cached, just move it to the front of the LRU. + if d.cached { + d.fs.cachedDentries.Remove(d) + d.fs.cachedDentries.PushFront(d) + return + } + // Cache the dentry, then evict the least recently used cached dentry if + // the cache becomes over-full. + d.fs.cachedDentries.PushFront(d) + d.fs.cachedDentriesLen++ + d.cached = true + if d.fs.cachedDentriesLen > d.fs.opts.maxCachedDentries { + victim := d.fs.cachedDentries.Back() + d.fs.cachedDentries.Remove(victim) + d.fs.cachedDentriesLen-- + victim.cached = false + // victim.refs may have become non-zero from an earlier path + // resolution since it was inserted into fs.cachedDentries; see + // dentry.incRefLocked(). Either way, we brought + // fs.cachedDentriesLen back down to fs.opts.maxCachedDentries, so + // we don't loop. + if atomic.LoadInt64(&victim.refs) == 0 { + if victimParentVFSD := victim.vfsd.Parent(); victimParentVFSD != nil { + victimParent := victimParentVFSD.Impl().(*dentry) + victimParent.dirMu.Lock() + if !victim.vfsd.IsDisowned() { + // victim can't be a mount point (in any mount + // namespace), since VFS holds references on mount + // points. + d.fs.vfsfs.VirtualFilesystem().ForceDeleteDentry(&victim.vfsd) + // We're only deleting the dentry, not the file it + // represents, so we don't need to update + // victimParent.dirents etc. + } + victimParent.dirMu.Unlock() + } + victim.destroyLocked() + } + } +} + +// Preconditions: d.fs.renameMu must be locked for writing. d.refs == 0. d is +// not a child dentry. +func (d *dentry) destroyLocked() { + ctx := context.Background() + d.handleMu.Lock() + if !d.handle.file.isNil() { + mf := d.fs.mfp.MemoryFile() + d.dataMu.Lock() + // Write dirty pages back to the remote filesystem. + if d.handleWritable { + if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil { + log.Warningf("gofer.dentry.DecRef: failed to write dirty data back: %v", err) + } + } + // Discard cached data. + d.cache.DropAll(mf) + d.dirty.RemoveAll() + d.dataMu.Unlock() + // Clunk open fids and close open host FDs. + d.handle.close(ctx) + } + d.handleMu.Unlock() + d.file.close(ctx) + // Remove d from the set of all dentries. + d.fs.syncMu.Lock() + delete(d.fs.dentries, d) + d.fs.syncMu.Unlock() + // Drop the reference held by d on its parent. + if parentVFSD := d.vfsd.Parent(); parentVFSD != nil { + parent := parentVFSD.Impl().(*dentry) + // This is parent.DecRef() without recursive locking of d.fs.renameMu. + if refs := atomic.AddInt64(&parent.refs, -1); refs == 0 { + parent.checkCachingLocked() + } else if refs < 0 { + panic("gofer.dentry.DecRef() called without holding a reference") + } + } +} + +func (d *dentry) isDeleted() bool { + return atomic.LoadUint32(&d.deleted) != 0 +} + +func (d *dentry) setDeleted() { + atomic.StoreUint32(&d.deleted, 1) +} + +func (d *dentry) listxattr(ctx context.Context) ([]string, error) { + return nil, syserror.ENOTSUP +} + +func (d *dentry) getxattr(ctx context.Context, name string) (string, error) { + // TODO(jamieliu): add vfs.GetxattrOptions.Size + return d.file.getXattr(ctx, name, linux.XATTR_SIZE_MAX) +} + +func (d *dentry) setxattr(ctx context.Context, opts *vfs.SetxattrOptions) error { + return d.file.setXattr(ctx, opts.Name, opts.Value, opts.Flags) +} + +func (d *dentry) removexattr(ctx context.Context, name string) error { + return syserror.ENOTSUP +} + +// Preconditions: d.isRegularFile() || d.isDirectory(). +func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool) error { + // O_TRUNC unconditionally requires us to obtain a new handle (opened with + // O_TRUNC). + if !trunc { + d.handleMu.RLock() + if (!read || d.handleReadable) && (!write || d.handleWritable) { + // The current handle is sufficient. + d.handleMu.RUnlock() + return nil + } + d.handleMu.RUnlock() + } + + haveOldFD := false + d.handleMu.Lock() + if (read && !d.handleReadable) || (write && !d.handleWritable) || trunc { + // Get a new handle. + wantReadable := d.handleReadable || read + wantWritable := d.handleWritable || write + h, err := openHandle(ctx, d.file, wantReadable, wantWritable, trunc) + if err != nil { + d.handleMu.Unlock() + return err + } + if !d.handle.file.isNil() { + // Check that old and new handles are compatible: If the old handle + // includes a host file descriptor but the new one does not, or + // vice versa, old and new memory mappings may be incoherent. + haveOldFD = d.handle.fd >= 0 + haveNewFD := h.fd >= 0 + if haveOldFD != haveNewFD { + d.handleMu.Unlock() + ctx.Warningf("gofer.dentry.ensureSharedHandle: can't change host FD availability from %v to %v across dentry handle upgrade", haveOldFD, haveNewFD) + h.close(ctx) + return syserror.EIO + } + if haveOldFD { + // We may have raced with callers of d.pf.FD() that are now + // using the old file descriptor, preventing us from safely + // closing it. We could handle this by invalidating existing + // memmap.Translations, but this is expensive. Instead, use + // dup2() to make the old file descriptor refer to the new file + // description, then close the new file descriptor (which is no + // longer needed). Racing callers may use the old or new file + // description, but this doesn't matter since they refer to the + // same file (unless d.fs.opts.overlayfsStaleRead is true, + // which we handle separately). + if err := syscall.Dup2(int(h.fd), int(d.handle.fd)); err != nil { + d.handleMu.Unlock() + ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to dup fd %d to fd %d: %v", h.fd, d.handle.fd, err) + h.close(ctx) + return err + } + syscall.Close(int(h.fd)) + h.fd = d.handle.fd + if d.fs.opts.overlayfsStaleRead { + // Replace sentry mappings of the old FD with mappings of + // the new FD, since the two are not necessarily coherent. + if err := d.pf.hostFileMapper.RegenerateMappings(int(h.fd)); err != nil { + d.handleMu.Unlock() + ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to replace sentry mappings of old FD with mappings of new FD: %v", err) + h.close(ctx) + return err + } + } + // Clunk the old fid before making the new handle visible (by + // unlocking d.handleMu). + d.handle.file.close(ctx) + } + } + // Switch to the new handle. + d.handle = h + d.handleReadable = wantReadable + d.handleWritable = wantWritable + } + d.handleMu.Unlock() + + if d.fs.opts.overlayfsStaleRead && haveOldFD { + // Invalidate application mappings that may be using the old FD; they + // will be replaced with mappings using the new FD after future calls + // to d.Translate(). This requires holding d.mapsMu, which precedes + // d.handleMu in the lock order. + d.mapsMu.Lock() + d.mappings.InvalidateAll(memmap.InvalidateOpts{}) + d.mapsMu.Unlock() + } + + return nil +} + +// fileDescription is embedded by gofer implementations of +// vfs.FileDescriptionImpl. +type fileDescription struct { + vfsfd vfs.FileDescription + vfs.FileDescriptionDefaultImpl +} + +func (fd *fileDescription) filesystem() *filesystem { + return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) +} + +func (fd *fileDescription) dentry() *dentry { + return fd.vfsfd.Dentry().Impl().(*dentry) +} + +// Stat implements vfs.FileDescriptionImpl.Stat. +func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { + d := fd.dentry() + if d.fs.opts.interop == InteropModeShared && opts.Mask&(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_CTIME|linux.STATX_SIZE|linux.STATX_BLOCKS|linux.STATX_BTIME) != 0 && opts.Sync != linux.AT_STATX_DONT_SYNC { + // TODO(jamieliu): Use specialFileFD.handle.file for the getattr if + // available? + if err := d.updateFromGetattr(ctx); err != nil { + return linux.Statx{}, err + } + } + var stat linux.Statx + d.statTo(&stat) + return stat, nil +} + +// SetStat implements vfs.FileDescriptionImpl.SetStat. +func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { + return fd.dentry().setStat(ctx, auth.CredentialsFromContext(ctx), &opts.Stat, fd.vfsfd.Mount()) +} + +// Listxattr implements vfs.FileDescriptionImpl.Listxattr. +func (fd *fileDescription) Listxattr(ctx context.Context) ([]string, error) { + return fd.dentry().listxattr(ctx) +} + +// Getxattr implements vfs.FileDescriptionImpl.Getxattr. +func (fd *fileDescription) Getxattr(ctx context.Context, name string) (string, error) { + return fd.dentry().getxattr(ctx, name) +} + +// Setxattr implements vfs.FileDescriptionImpl.Setxattr. +func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error { + return fd.dentry().setxattr(ctx, &opts) +} + +// Removexattr implements vfs.FileDescriptionImpl.Removexattr. +func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { + return fd.dentry().removexattr(ctx, name) +} diff --git a/pkg/sentry/fsimpl/gofer/handle.go b/pkg/sentry/fsimpl/gofer/handle.go new file mode 100644 index 000000000..cfe66f797 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/handle.go @@ -0,0 +1,135 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/safemem" +) + +// handle represents a remote "open file descriptor", consisting of an opened +// fid (p9.File) and optionally a host file descriptor. +type handle struct { + file p9file + fd int32 // -1 if unavailable +} + +// Preconditions: read || write. +func openHandle(ctx context.Context, file p9file, read, write, trunc bool) (handle, error) { + _, newfile, err := file.walk(ctx, nil) + if err != nil { + return handle{fd: -1}, err + } + var flags p9.OpenFlags + switch { + case read && !write: + flags = p9.ReadOnly + case !read && write: + flags = p9.WriteOnly + case read && write: + flags = p9.ReadWrite + } + if trunc { + flags |= p9.OpenTruncate + } + fdobj, _, _, err := newfile.open(ctx, flags) + if err != nil { + newfile.close(ctx) + return handle{fd: -1}, err + } + fd := int32(-1) + if fdobj != nil { + fd = int32(fdobj.Release()) + } + return handle{ + file: newfile, + fd: fd, + }, nil +} + +func (h *handle) close(ctx context.Context) { + h.file.close(ctx) + h.file = p9file{} + if h.fd >= 0 { + syscall.Close(int(h.fd)) + h.fd = -1 + } +} + +func (h *handle) readToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) { + if dsts.IsEmpty() { + return 0, nil + } + if h.fd >= 0 { + ctx.UninterruptibleSleepStart(false) + n, err := hostPreadv(h.fd, dsts, int64(offset)) + ctx.UninterruptibleSleepFinish(false) + return n, err + } + if dsts.NumBlocks() == 1 && !dsts.Head().NeedSafecopy() { + n, err := h.file.readAt(ctx, dsts.Head().ToSlice(), offset) + return uint64(n), err + } + // Buffer the read since p9.File.ReadAt() takes []byte. + buf := make([]byte, dsts.NumBytes()) + n, err := h.file.readAt(ctx, buf, offset) + if n == 0 { + return 0, err + } + if cp, cperr := safemem.CopySeq(dsts, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:n]))); cperr != nil { + return cp, cperr + } + return uint64(n), err +} + +func (h *handle) writeFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) { + if srcs.IsEmpty() { + return 0, nil + } + if h.fd >= 0 { + ctx.UninterruptibleSleepStart(false) + n, err := hostPwritev(h.fd, srcs, int64(offset)) + ctx.UninterruptibleSleepFinish(false) + return n, err + } + if srcs.NumBlocks() == 1 && !srcs.Head().NeedSafecopy() { + n, err := h.file.writeAt(ctx, srcs.Head().ToSlice(), offset) + return uint64(n), err + } + // Buffer the write since p9.File.WriteAt() takes []byte. + buf := make([]byte, srcs.NumBytes()) + cp, cperr := safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), srcs) + if cp == 0 { + return 0, cperr + } + n, err := h.file.writeAt(ctx, buf[:cp], offset) + if err != nil { + return uint64(n), err + } + return cp, cperr +} + +func (h *handle) sync(ctx context.Context) error { + if h.fd >= 0 { + ctx.UninterruptibleSleepStart(false) + err := syscall.Fsync(int(h.fd)) + ctx.UninterruptibleSleepFinish(false) + return err + } + return h.file.fsync(ctx) +} diff --git a/pkg/sentry/fsimpl/gofer/handle_unsafe.go b/pkg/sentry/fsimpl/gofer/handle_unsafe.go new file mode 100644 index 000000000..19560ab26 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/handle_unsafe.go @@ -0,0 +1,66 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "syscall" + "unsafe" + + "gvisor.dev/gvisor/pkg/safemem" +) + +// Preconditions: !dsts.IsEmpty(). +func hostPreadv(fd int32, dsts safemem.BlockSeq, off int64) (uint64, error) { + // No buffering is necessary regardless of safecopy; host syscalls will + // return EFAULT if appropriate, instead of raising SIGBUS. + if dsts.NumBlocks() == 1 { + // Use pread() instead of preadv() to avoid iovec allocation and + // copying. + dst := dsts.Head() + n, _, e := syscall.Syscall6(syscall.SYS_PREAD64, uintptr(fd), dst.Addr(), uintptr(dst.Len()), uintptr(off), 0, 0) + if e != 0 { + return 0, e + } + return uint64(n), nil + } + iovs := safemem.IovecsFromBlockSeq(dsts) + n, _, e := syscall.Syscall6(syscall.SYS_PREADV, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)), uintptr(off), 0, 0) + if e != 0 { + return 0, e + } + return uint64(n), nil +} + +// Preconditions: !srcs.IsEmpty(). +func hostPwritev(fd int32, srcs safemem.BlockSeq, off int64) (uint64, error) { + // No buffering is necessary regardless of safecopy; host syscalls will + // return EFAULT if appropriate, instead of raising SIGBUS. + if srcs.NumBlocks() == 1 { + // Use pwrite() instead of pwritev() to avoid iovec allocation and + // copying. + src := srcs.Head() + n, _, e := syscall.Syscall6(syscall.SYS_PWRITE64, uintptr(fd), src.Addr(), uintptr(src.Len()), uintptr(off), 0, 0) + if e != 0 { + return 0, e + } + return uint64(n), nil + } + iovs := safemem.IovecsFromBlockSeq(srcs) + n, _, e := syscall.Syscall6(syscall.SYS_PWRITEV, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)), uintptr(off), 0, 0) + if e != 0 { + return 0, e + } + return uint64(n), nil +} diff --git a/pkg/sentry/fsimpl/gofer/p9file.go b/pkg/sentry/fsimpl/gofer/p9file.go new file mode 100644 index 000000000..755ac2985 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/p9file.go @@ -0,0 +1,219 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/syserror" +) + +// p9file is a wrapper around p9.File that provides methods that are +// Context-aware. +type p9file struct { + file p9.File +} + +func (f p9file) isNil() bool { + return f.file == nil +} + +func (f p9file) walk(ctx context.Context, names []string) ([]p9.QID, p9file, error) { + ctx.UninterruptibleSleepStart(false) + qids, newfile, err := f.file.Walk(names) + ctx.UninterruptibleSleepFinish(false) + return qids, p9file{newfile}, err +} + +func (f p9file) walkGetAttr(ctx context.Context, names []string) ([]p9.QID, p9file, p9.AttrMask, p9.Attr, error) { + ctx.UninterruptibleSleepStart(false) + qids, newfile, attrMask, attr, err := f.file.WalkGetAttr(names) + ctx.UninterruptibleSleepFinish(false) + return qids, p9file{newfile}, attrMask, attr, err +} + +// walkGetAttrOne is a wrapper around p9.File.WalkGetAttr that takes a single +// path component and returns a single qid. +func (f p9file) walkGetAttrOne(ctx context.Context, name string) (p9.QID, p9file, p9.AttrMask, p9.Attr, error) { + ctx.UninterruptibleSleepStart(false) + qids, newfile, attrMask, attr, err := f.file.WalkGetAttr([]string{name}) + ctx.UninterruptibleSleepFinish(false) + if err != nil { + return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, err + } + if len(qids) != 1 { + ctx.Warningf("p9.File.WalkGetAttr returned %d qids (%v), wanted 1", len(qids), qids) + if newfile != nil { + p9file{newfile}.close(ctx) + } + return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, syserror.EIO + } + return qids[0], p9file{newfile}, attrMask, attr, nil +} + +func (f p9file) statFS(ctx context.Context) (p9.FSStat, error) { + ctx.UninterruptibleSleepStart(false) + fsstat, err := f.file.StatFS() + ctx.UninterruptibleSleepFinish(false) + return fsstat, err +} + +func (f p9file) getAttr(ctx context.Context, req p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) { + ctx.UninterruptibleSleepStart(false) + qid, attrMask, attr, err := f.file.GetAttr(req) + ctx.UninterruptibleSleepFinish(false) + return qid, attrMask, attr, err +} + +func (f p9file) setAttr(ctx context.Context, valid p9.SetAttrMask, attr p9.SetAttr) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.SetAttr(valid, attr) + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) getXattr(ctx context.Context, name string, size uint64) (string, error) { + ctx.UninterruptibleSleepStart(false) + val, err := f.file.GetXattr(name, size) + ctx.UninterruptibleSleepFinish(false) + return val, err +} + +func (f p9file) setXattr(ctx context.Context, name, value string, flags uint32) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.SetXattr(name, value, flags) + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.Allocate(mode, offset, length) + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) close(ctx context.Context) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.Close() + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) open(ctx context.Context, flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) { + ctx.UninterruptibleSleepStart(false) + fdobj, qid, iounit, err := f.file.Open(flags) + ctx.UninterruptibleSleepFinish(false) + return fdobj, qid, iounit, err +} + +func (f p9file) readAt(ctx context.Context, p []byte, offset uint64) (int, error) { + ctx.UninterruptibleSleepStart(false) + n, err := f.file.ReadAt(p, offset) + ctx.UninterruptibleSleepFinish(false) + return n, err +} + +func (f p9file) writeAt(ctx context.Context, p []byte, offset uint64) (int, error) { + ctx.UninterruptibleSleepStart(false) + n, err := f.file.WriteAt(p, offset) + ctx.UninterruptibleSleepFinish(false) + return n, err +} + +func (f p9file) fsync(ctx context.Context) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.FSync() + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) create(ctx context.Context, name string, flags p9.OpenFlags, permissions p9.FileMode, uid p9.UID, gid p9.GID) (*fd.FD, p9file, p9.QID, uint32, error) { + ctx.UninterruptibleSleepStart(false) + fdobj, newfile, qid, iounit, err := f.file.Create(name, flags, permissions, uid, gid) + ctx.UninterruptibleSleepFinish(false) + return fdobj, p9file{newfile}, qid, iounit, err +} + +func (f p9file) mkdir(ctx context.Context, name string, permissions p9.FileMode, uid p9.UID, gid p9.GID) (p9.QID, error) { + ctx.UninterruptibleSleepStart(false) + qid, err := f.file.Mkdir(name, permissions, uid, gid) + ctx.UninterruptibleSleepFinish(false) + return qid, err +} + +func (f p9file) symlink(ctx context.Context, oldName string, newName string, uid p9.UID, gid p9.GID) (p9.QID, error) { + ctx.UninterruptibleSleepStart(false) + qid, err := f.file.Symlink(oldName, newName, uid, gid) + ctx.UninterruptibleSleepFinish(false) + return qid, err +} + +func (f p9file) link(ctx context.Context, target p9file, newName string) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.Link(target.file, newName) + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) mknod(ctx context.Context, name string, mode p9.FileMode, major uint32, minor uint32, uid p9.UID, gid p9.GID) (p9.QID, error) { + ctx.UninterruptibleSleepStart(false) + qid, err := f.file.Mknod(name, mode, major, minor, uid, gid) + ctx.UninterruptibleSleepFinish(false) + return qid, err +} + +func (f p9file) rename(ctx context.Context, newDir p9file, newName string) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.Rename(newDir.file, newName) + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) unlinkAt(ctx context.Context, name string, flags uint32) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.UnlinkAt(name, flags) + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) readdir(ctx context.Context, offset uint64, count uint32) ([]p9.Dirent, error) { + ctx.UninterruptibleSleepStart(false) + dirents, err := f.file.Readdir(offset, count) + ctx.UninterruptibleSleepFinish(false) + return dirents, err +} + +func (f p9file) readlink(ctx context.Context) (string, error) { + ctx.UninterruptibleSleepStart(false) + target, err := f.file.Readlink() + ctx.UninterruptibleSleepFinish(false) + return target, err +} + +func (f p9file) flush(ctx context.Context) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.Flush() + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) connect(ctx context.Context, flags p9.ConnectFlags) (*fd.FD, error) { + ctx.UninterruptibleSleepStart(false) + fdobj, err := f.file.Connect(flags) + ctx.UninterruptibleSleepFinish(false) + return fdobj, err +} diff --git a/pkg/sentry/fsimpl/gofer/pagemath.go b/pkg/sentry/fsimpl/gofer/pagemath.go new file mode 100644 index 000000000..847cb0784 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/pagemath.go @@ -0,0 +1,31 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "gvisor.dev/gvisor/pkg/usermem" +) + +// This are equivalent to usermem.Addr.RoundDown/Up, but without the +// potentially truncating conversion to usermem.Addr. This is necessary because +// there is no way to define generic "PageRoundDown/Up" functions in Go. + +func pageRoundDown(x uint64) uint64 { + return x &^ (usermem.PageSize - 1) +} + +func pageRoundUp(x uint64) uint64 { + return pageRoundDown(x + usermem.PageSize - 1) +} diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go new file mode 100644 index 000000000..8e11e06b3 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -0,0 +1,860 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "fmt" + "io" + "math" + "sync" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +func (d *dentry) isRegularFile() bool { + return d.fileType() == linux.S_IFREG +} + +type regularFileFD struct { + fileDescription + + // off is the file offset. off is protected by mu. + mu sync.Mutex + off int64 +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *regularFileFD) Release() { +} + +// OnClose implements vfs.FileDescriptionImpl.OnClose. +func (fd *regularFileFD) OnClose(ctx context.Context) error { + if !fd.vfsfd.IsWritable() { + return nil + } + // Skip flushing if writes may be buffered by the client, since (as with + // the VFS1 client) we don't flush buffered writes on close anyway. + d := fd.dentry() + if d.fs.opts.interop == InteropModeExclusive { + return nil + } + d.handleMu.RLock() + defer d.handleMu.RUnlock() + return d.handle.file.flush(ctx) +} + +// PRead implements vfs.FileDescriptionImpl.PRead. +func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + if opts.Flags != 0 { + return 0, syserror.EOPNOTSUPP + } + + // Check for reading at EOF before calling into MM (but not under + // InteropModeShared, which makes d.size unreliable). + d := fd.dentry() + if d.fs.opts.interop != InteropModeShared && uint64(offset) >= atomic.LoadUint64(&d.size) { + return 0, io.EOF + } + + if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { + // Lock d.metadataMu for the rest of the read to prevent d.size from + // changing. + d.metadataMu.Lock() + defer d.metadataMu.Unlock() + // Write dirty cached pages that will be touched by the read back to + // the remote file. + if err := d.writeback(ctx, offset, dst.NumBytes()); err != nil { + return 0, err + } + } + + rw := getDentryReadWriter(ctx, d, offset) + if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { + // Require the read to go to the remote file. + rw.direct = true + } + n, err := dst.CopyOutFrom(ctx, rw) + putDentryReadWriter(rw) + if d.fs.opts.interop != InteropModeShared { + // Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed(). + d.touchAtime(ctx, fd.vfsfd.Mount()) + } + return n, err +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + fd.mu.Lock() + n, err := fd.PRead(ctx, dst, fd.off, opts) + fd.off += n + fd.mu.Unlock() + return n, err +} + +// PWrite implements vfs.FileDescriptionImpl.PWrite. +func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + if opts.Flags != 0 { + return 0, syserror.EOPNOTSUPP + } + + d := fd.dentry() + d.metadataMu.Lock() + defer d.metadataMu.Unlock() + if d.fs.opts.interop != InteropModeShared { + // Compare Linux's mm/filemap.c:__generic_file_write_iter() => + // file_update_time(). This is d.touchCMtime(), but without locking + // d.metadataMu (recursively). + if now, ok := nowFromContext(ctx); ok { + atomic.StoreInt64(&d.mtime, now) + atomic.StoreInt64(&d.ctime, now) + } + } + if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { + // Write dirty cached pages that will be touched by the write back to + // the remote file. + if err := d.writeback(ctx, offset, src.NumBytes()); err != nil { + return 0, err + } + // Remove touched pages from the cache. + pgstart := pageRoundDown(uint64(offset)) + pgend := pageRoundUp(uint64(offset + src.NumBytes())) + if pgend < pgstart { + return 0, syserror.EINVAL + } + mr := memmap.MappableRange{pgstart, pgend} + var freed []platform.FileRange + d.dataMu.Lock() + cseg := d.cache.LowerBoundSegment(mr.Start) + for cseg.Ok() && cseg.Start() < mr.End { + cseg = d.cache.Isolate(cseg, mr) + freed = append(freed, platform.FileRange{cseg.Value(), cseg.Value() + cseg.Range().Length()}) + cseg = d.cache.Remove(cseg).NextSegment() + } + d.dataMu.Unlock() + // Invalidate mappings of removed pages. + d.mapsMu.Lock() + d.mappings.Invalidate(mr, memmap.InvalidateOpts{}) + d.mapsMu.Unlock() + // Finally free pages removed from the cache. + mf := d.fs.mfp.MemoryFile() + for _, freedFR := range freed { + mf.DecRef(freedFR) + } + } + rw := getDentryReadWriter(ctx, d, offset) + if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { + // Require the write to go to the remote file. + rw.direct = true + } + n, err := src.CopyInTo(ctx, rw) + putDentryReadWriter(rw) + if n != 0 && fd.vfsfd.StatusFlags()&(linux.O_DSYNC|linux.O_SYNC) != 0 { + // Write dirty cached pages touched by the write back to the remote + // file. + if err := d.writeback(ctx, offset, src.NumBytes()); err != nil { + return 0, err + } + // Request the remote filesystem to sync the remote file. + if err := d.handle.file.fsync(ctx); err != nil { + return 0, err + } + } + return n, err +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + fd.mu.Lock() + n, err := fd.PWrite(ctx, src, fd.off, opts) + fd.off += n + fd.mu.Unlock() + return n, err +} + +type dentryReadWriter struct { + ctx context.Context + d *dentry + off uint64 + direct bool +} + +var dentryReadWriterPool = sync.Pool{ + New: func() interface{} { + return &dentryReadWriter{} + }, +} + +func getDentryReadWriter(ctx context.Context, d *dentry, offset int64) *dentryReadWriter { + rw := dentryReadWriterPool.Get().(*dentryReadWriter) + rw.ctx = ctx + rw.d = d + rw.off = uint64(offset) + rw.direct = false + return rw +} + +func putDentryReadWriter(rw *dentryReadWriter) { + rw.ctx = nil + rw.d = nil + dentryReadWriterPool.Put(rw) +} + +// ReadToBlocks implements safemem.Reader.ReadToBlocks. +func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { + if dsts.IsEmpty() { + return 0, nil + } + + // If we have a mmappable host FD (which must be used here to ensure + // coherence with memory-mapped I/O), or if InteropModeShared is in effect + // (which prevents us from caching file contents and makes dentry.size + // unreliable), or if the file was opened O_DIRECT, read directly from + // dentry.handle without locking dentry.dataMu. + rw.d.handleMu.RLock() + if (rw.d.handle.fd >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct { + n, err := rw.d.handle.readToBlocksAt(rw.ctx, dsts, rw.off) + rw.d.handleMu.RUnlock() + rw.off += n + return n, err + } + + // Otherwise read from/through the cache. + mf := rw.d.fs.mfp.MemoryFile() + fillCache := mf.ShouldCacheEvictable() + var dataMuUnlock func() + if fillCache { + rw.d.dataMu.Lock() + dataMuUnlock = rw.d.dataMu.Unlock + } else { + rw.d.dataMu.RLock() + dataMuUnlock = rw.d.dataMu.RUnlock + } + + // Compute the range to read (limited by file size and overflow-checked). + if rw.off >= rw.d.size { + dataMuUnlock() + rw.d.handleMu.RUnlock() + return 0, io.EOF + } + end := rw.d.size + if rend := rw.off + dsts.NumBytes(); rend > rw.off && rend < end { + end = rend + } + + var done uint64 + seg, gap := rw.d.cache.Find(rw.off) + for rw.off < end { + mr := memmap.MappableRange{rw.off, end} + switch { + case seg.Ok(): + // Get internal mappings from the cache. + ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read) + if err != nil { + dataMuUnlock() + rw.d.handleMu.RUnlock() + return done, err + } + + // Copy from internal mappings. + n, err := safemem.CopySeq(dsts, ims) + done += n + rw.off += n + dsts = dsts.DropFirst64(n) + if err != nil { + dataMuUnlock() + rw.d.handleMu.RUnlock() + return done, err + } + + // Continue. + seg, gap = seg.NextNonEmpty() + + case gap.Ok(): + gapMR := gap.Range().Intersect(mr) + if fillCache { + // Read into the cache, then re-enter the loop to read from the + // cache. + reqMR := memmap.MappableRange{ + Start: pageRoundDown(gapMR.Start), + End: pageRoundUp(gapMR.End), + } + optMR := gap.Range() + err := rw.d.cache.Fill(rw.ctx, reqMR, maxFillRange(reqMR, optMR), mf, usage.PageCache, rw.d.handle.readToBlocksAt) + mf.MarkEvictable(rw.d, pgalloc.EvictableRange{optMR.Start, optMR.End}) + seg, gap = rw.d.cache.Find(rw.off) + if !seg.Ok() { + dataMuUnlock() + rw.d.handleMu.RUnlock() + return done, err + } + // err might have occurred in part of gap.Range() outside + // gapMR. Forget about it for now; if the error matters and + // persists, we'll run into it again in a later iteration of + // this loop. + } else { + // Read directly from the file. + gapDsts := dsts.TakeFirst64(gapMR.Length()) + n, err := rw.d.handle.readToBlocksAt(rw.ctx, gapDsts, gapMR.Start) + done += n + rw.off += n + dsts = dsts.DropFirst64(n) + // Partial reads are fine. But we must stop reading. + if n != gapDsts.NumBytes() || err != nil { + dataMuUnlock() + rw.d.handleMu.RUnlock() + return done, err + } + + // Continue. + seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{} + } + } + } + dataMuUnlock() + rw.d.handleMu.RUnlock() + return done, nil +} + +// WriteFromBlocks implements safemem.Writer.WriteFromBlocks. +// +// Preconditions: rw.d.metadataMu must be locked. +func (rw *dentryReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { + if srcs.IsEmpty() { + return 0, nil + } + + // If we have a mmappable host FD (which must be used here to ensure + // coherence with memory-mapped I/O), or if InteropModeShared is in effect + // (which prevents us from caching file contents), or if the file was + // opened with O_DIRECT, write directly to dentry.handle without locking + // dentry.dataMu. + rw.d.handleMu.RLock() + if (rw.d.handle.fd >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct { + n, err := rw.d.handle.writeFromBlocksAt(rw.ctx, srcs, rw.off) + rw.d.handleMu.RUnlock() + rw.off += n + return n, err + } + + // Otherwise write to/through the cache. + mf := rw.d.fs.mfp.MemoryFile() + rw.d.dataMu.Lock() + + // Compute the range to write (overflow-checked). + start := rw.off + end := rw.off + srcs.NumBytes() + if end <= rw.off { + end = math.MaxInt64 + } + + var ( + done uint64 + retErr error + ) + seg, gap := rw.d.cache.Find(rw.off) + for rw.off < end { + mr := memmap.MappableRange{rw.off, end} + switch { + case seg.Ok(): + // Get internal mappings from the cache. + segMR := seg.Range().Intersect(mr) + ims, err := mf.MapInternal(seg.FileRangeOf(segMR), usermem.Write) + if err != nil { + retErr = err + goto exitLoop + } + + // Copy to internal mappings. + n, err := safemem.CopySeq(ims, srcs) + done += n + rw.off += n + srcs = srcs.DropFirst64(n) + rw.d.dirty.MarkDirty(segMR) + if err != nil { + retErr = err + goto exitLoop + } + + // Continue. + seg, gap = seg.NextNonEmpty() + + case gap.Ok(): + // Write directly to the file. At present, we never fill the cache + // when writing, since doing so can convert small writes into + // inefficient read-modify-write cycles, and we have no mechanism + // for detecting or avoiding this. + gapMR := gap.Range().Intersect(mr) + gapSrcs := srcs.TakeFirst64(gapMR.Length()) + n, err := rw.d.handle.writeFromBlocksAt(rw.ctx, gapSrcs, gapMR.Start) + done += n + rw.off += n + srcs = srcs.DropFirst64(n) + // Partial writes are fine. But we must stop writing. + if n != gapSrcs.NumBytes() || err != nil { + retErr = err + goto exitLoop + } + + // Continue. + seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{} + } + } +exitLoop: + if rw.off > rw.d.size { + atomic.StoreUint64(&rw.d.size, rw.off) + // The remote file's size will implicitly be extended to the correct + // value when we write back to it. + } + // If InteropModeWritethrough is in effect, flush written data back to the + // remote filesystem. + if rw.d.fs.opts.interop == InteropModeWritethrough && done != 0 { + if err := fsutil.SyncDirty(rw.ctx, memmap.MappableRange{ + Start: start, + End: rw.off, + }, &rw.d.cache, &rw.d.dirty, rw.d.size, mf, rw.d.handle.writeFromBlocksAt); err != nil { + // We have no idea how many bytes were actually flushed. + rw.off = start + done = 0 + retErr = err + } + } + rw.d.dataMu.Unlock() + rw.d.handleMu.RUnlock() + return done, retErr +} + +func (d *dentry) writeback(ctx context.Context, offset, size int64) error { + if size == 0 { + return nil + } + d.handleMu.RLock() + defer d.handleMu.RUnlock() + d.dataMu.Lock() + defer d.dataMu.Unlock() + // Compute the range of valid bytes (overflow-checked). + if uint64(offset) >= d.size { + return nil + } + end := int64(d.size) + if rend := offset + size; rend > offset && rend < end { + end = rend + } + return fsutil.SyncDirty(ctx, memmap.MappableRange{ + Start: uint64(offset), + End: uint64(end), + }, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt) +} + +// Seek implements vfs.FileDescriptionImpl.Seek. +func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + fd.mu.Lock() + defer fd.mu.Unlock() + switch whence { + case linux.SEEK_SET: + // Use offset as specified. + case linux.SEEK_CUR: + offset += fd.off + case linux.SEEK_END, linux.SEEK_DATA, linux.SEEK_HOLE: + // Ensure file size is up to date. + d := fd.dentry() + if fd.filesystem().opts.interop == InteropModeShared { + if err := d.updateFromGetattr(ctx); err != nil { + return 0, err + } + } + size := int64(atomic.LoadUint64(&d.size)) + // For SEEK_DATA and SEEK_HOLE, treat the file as a single contiguous + // block of data. + switch whence { + case linux.SEEK_END: + offset += size + case linux.SEEK_DATA: + if offset > size { + return 0, syserror.ENXIO + } + // Use offset as specified. + case linux.SEEK_HOLE: + if offset > size { + return 0, syserror.ENXIO + } + offset = size + } + default: + return 0, syserror.EINVAL + } + if offset < 0 { + return 0, syserror.EINVAL + } + fd.off = offset + return offset, nil +} + +// Sync implements vfs.FileDescriptionImpl.Sync. +func (fd *regularFileFD) Sync(ctx context.Context) error { + return fd.dentry().syncSharedHandle(ctx) +} + +func (d *dentry) syncSharedHandle(ctx context.Context) error { + d.handleMu.RLock() + if !d.handleWritable { + d.handleMu.RUnlock() + return nil + } + d.dataMu.Lock() + // Write dirty cached data to the remote file. + err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt) + d.dataMu.Unlock() + if err == nil { + // Sync the remote file. + err = d.handle.sync(ctx) + } + d.handleMu.RUnlock() + return err +} + +// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. +func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { + d := fd.dentry() + switch d.fs.opts.interop { + case InteropModeExclusive: + // Any mapping is fine. + case InteropModeWritethrough: + // Shared writable mappings require a host FD, since otherwise we can't + // synchronously flush memory-mapped writes to the remote file. + if opts.Private || !opts.MaxPerms.Write { + break + } + fallthrough + case InteropModeShared: + // All mappings require a host FD to be coherent with other filesystem + // users. + if d.fs.opts.forcePageCache { + // Whether or not we have a host FD, we're not allowed to use it. + return syserror.ENODEV + } + d.handleMu.RLock() + haveFD := d.handle.fd >= 0 + d.handleMu.RUnlock() + if !haveFD { + return syserror.ENODEV + } + default: + panic(fmt.Sprintf("unknown InteropMode %v", d.fs.opts.interop)) + } + return vfs.GenericConfigureMMap(&fd.vfsfd, d, opts) +} + +func (d *dentry) mayCachePages() bool { + if d.fs.opts.interop == InteropModeShared { + return false + } + if d.fs.opts.forcePageCache { + return true + } + d.handleMu.RLock() + haveFD := d.handle.fd >= 0 + d.handleMu.RUnlock() + return haveFD +} + +// AddMapping implements memmap.Mappable.AddMapping. +func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { + d.mapsMu.Lock() + mapped := d.mappings.AddMapping(ms, ar, offset, writable) + // Do this unconditionally since whether we have a host FD can change + // across save/restore. + for _, r := range mapped { + d.pf.hostFileMapper.IncRefOn(r) + } + if d.mayCachePages() { + // d.Evict() will refuse to evict memory-mapped pages, so tell the + // MemoryFile to not bother trying. + mf := d.fs.mfp.MemoryFile() + for _, r := range mapped { + mf.MarkUnevictable(d, pgalloc.EvictableRange{r.Start, r.End}) + } + } + d.mapsMu.Unlock() + return nil +} + +// RemoveMapping implements memmap.Mappable.RemoveMapping. +func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { + d.mapsMu.Lock() + unmapped := d.mappings.RemoveMapping(ms, ar, offset, writable) + for _, r := range unmapped { + d.pf.hostFileMapper.DecRefOn(r) + } + if d.mayCachePages() { + // Pages that are no longer referenced by any application memory + // mappings are now considered unused; allow MemoryFile to evict them + // when necessary. + mf := d.fs.mfp.MemoryFile() + d.dataMu.Lock() + for _, r := range unmapped { + // Since these pages are no longer mapped, they are no longer + // concurrently dirtyable by a writable memory mapping. + d.dirty.AllowClean(r) + mf.MarkEvictable(d, pgalloc.EvictableRange{r.Start, r.End}) + } + d.dataMu.Unlock() + } + d.mapsMu.Unlock() +} + +// CopyMapping implements memmap.Mappable.CopyMapping. +func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { + return d.AddMapping(ctx, ms, dstAR, offset, writable) +} + +// Translate implements memmap.Mappable.Translate. +func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { + d.handleMu.RLock() + if d.handle.fd >= 0 && !d.fs.opts.forcePageCache { + d.handleMu.RUnlock() + mr := optional + if d.fs.opts.limitHostFDTranslation { + mr = maxFillRange(required, optional) + } + return []memmap.Translation{ + { + Source: mr, + File: &d.pf, + Offset: mr.Start, + Perms: usermem.AnyAccess, + }, + }, nil + } + + d.dataMu.Lock() + + // Constrain translations to d.size (rounded up) to prevent translation to + // pages that may be concurrently truncated. + pgend := pageRoundUp(d.size) + var beyondEOF bool + if required.End > pgend { + if required.Start >= pgend { + d.dataMu.Unlock() + d.handleMu.RUnlock() + return nil, &memmap.BusError{io.EOF} + } + beyondEOF = true + required.End = pgend + } + if optional.End > pgend { + optional.End = pgend + } + + mf := d.fs.mfp.MemoryFile() + cerr := d.cache.Fill(ctx, required, maxFillRange(required, optional), mf, usage.PageCache, d.handle.readToBlocksAt) + + var ts []memmap.Translation + var translatedEnd uint64 + for seg := d.cache.FindSegment(required.Start); seg.Ok() && seg.Start() < required.End; seg, _ = seg.NextNonEmpty() { + segMR := seg.Range().Intersect(optional) + // TODO(jamieliu): Make Translations writable even if writability is + // not required if already kept-dirty by another writable translation. + perms := usermem.AccessType{ + Read: true, + Execute: true, + } + if at.Write { + // From this point forward, this memory can be dirtied through the + // mapping at any time. + d.dirty.KeepDirty(segMR) + perms.Write = true + } + ts = append(ts, memmap.Translation{ + Source: segMR, + File: mf, + Offset: seg.FileRangeOf(segMR).Start, + Perms: perms, + }) + translatedEnd = segMR.End + } + + d.dataMu.Unlock() + d.handleMu.RUnlock() + + // Don't return the error returned by c.cache.Fill if it occurred outside + // of required. + if translatedEnd < required.End && cerr != nil { + return ts, &memmap.BusError{cerr} + } + if beyondEOF { + return ts, &memmap.BusError{io.EOF} + } + return ts, nil +} + +func maxFillRange(required, optional memmap.MappableRange) memmap.MappableRange { + const maxReadahead = 64 << 10 // 64 KB, chosen arbitrarily + if required.Length() >= maxReadahead { + return required + } + if optional.Length() <= maxReadahead { + return optional + } + optional.Start = required.Start + if optional.Length() <= maxReadahead { + return optional + } + optional.End = optional.Start + maxReadahead + return optional +} + +// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. +func (d *dentry) InvalidateUnsavable(ctx context.Context) error { + // Whether we have a host fd (and consequently what platform.File is + // mapped) can change across save/restore, so invalidate all translations + // unconditionally. + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + d.mappings.InvalidateAll(memmap.InvalidateOpts{}) + + // Write the cache's contents back to the remote file so that if we have a + // host fd after restore, the remote file's contents are coherent. + mf := d.fs.mfp.MemoryFile() + d.dataMu.Lock() + defer d.dataMu.Unlock() + if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil { + return err + } + + // Discard the cache so that it's not stored in saved state. This is safe + // because per InvalidateUnsavable invariants, no new translations can have + // been returned after we invalidated all existing translations above. + d.cache.DropAll(mf) + d.dirty.RemoveAll() + + return nil +} + +// Evict implements pgalloc.EvictableMemoryUser.Evict. +func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + d.dataMu.Lock() + defer d.dataMu.Unlock() + + mr := memmap.MappableRange{er.Start, er.End} + mf := d.fs.mfp.MemoryFile() + // Only allow pages that are no longer memory-mapped to be evicted. + for mgap := d.mappings.LowerBoundGap(mr.Start); mgap.Ok() && mgap.Start() < mr.End; mgap = mgap.NextGap() { + mgapMR := mgap.Range().Intersect(mr) + if mgapMR.Length() == 0 { + continue + } + if err := fsutil.SyncDirty(ctx, mgapMR, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil { + log.Warningf("Failed to writeback cached data %v: %v", mgapMR, err) + } + d.cache.Drop(mgapMR, mf) + d.dirty.KeepClean(mgapMR) + } +} + +// dentryPlatformFile implements platform.File. It exists solely because dentry +// cannot implement both vfs.DentryImpl.IncRef and platform.File.IncRef. +// +// dentryPlatformFile is only used when a host FD representing the remote file +// is available (i.e. dentry.handle.fd >= 0), and that FD is used for +// application memory mappings (i.e. !filesystem.opts.forcePageCache). +type dentryPlatformFile struct { + *dentry + + // fdRefs counts references on platform.File offsets. fdRefs is protected + // by dentry.dataMu. + fdRefs fsutil.FrameRefSet + + // If this dentry represents a regular file, and handle.fd >= 0, + // hostFileMapper caches mappings of handle.fd. + hostFileMapper fsutil.HostFileMapper +} + +// IncRef implements platform.File.IncRef. +func (d *dentryPlatformFile) IncRef(fr platform.FileRange) { + d.dataMu.Lock() + seg, gap := d.fdRefs.Find(fr.Start) + for { + switch { + case seg.Ok() && seg.Start() < fr.End: + seg = d.fdRefs.Isolate(seg, fr) + seg.SetValue(seg.Value() + 1) + seg, gap = seg.NextNonEmpty() + case gap.Ok() && gap.Start() < fr.End: + newRange := gap.Range().Intersect(fr) + usage.MemoryAccounting.Inc(newRange.Length(), usage.Mapped) + seg, gap = d.fdRefs.InsertWithoutMerging(gap, newRange, 1).NextNonEmpty() + default: + d.fdRefs.MergeAdjacent(fr) + d.dataMu.Unlock() + return + } + } +} + +// DecRef implements platform.File.DecRef. +func (d *dentryPlatformFile) DecRef(fr platform.FileRange) { + d.dataMu.Lock() + seg := d.fdRefs.FindSegment(fr.Start) + + for seg.Ok() && seg.Start() < fr.End { + seg = d.fdRefs.Isolate(seg, fr) + if old := seg.Value(); old == 1 { + usage.MemoryAccounting.Dec(seg.Range().Length(), usage.Mapped) + seg = d.fdRefs.Remove(seg).NextSegment() + } else { + seg.SetValue(old - 1) + seg = seg.NextSegment() + } + } + d.fdRefs.MergeAdjacent(fr) + d.dataMu.Unlock() + +} + +// MapInternal implements platform.File.MapInternal. +func (d *dentryPlatformFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { + d.handleMu.RLock() + bs, err := d.hostFileMapper.MapInternal(fr, int(d.handle.fd), at.Write) + d.handleMu.RUnlock() + return bs, err +} + +// FD implements platform.File.FD. +func (d *dentryPlatformFile) FD() int { + d.handleMu.RLock() + fd := d.handle.fd + d.handleMu.RUnlock() + return int(fd) +} diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go new file mode 100644 index 000000000..08c691c47 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -0,0 +1,159 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "sync" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// specialFileFD implements vfs.FileDescriptionImpl for files other than +// regular files, directories, and symlinks: pipes, sockets, etc. It is also +// used for regular files when filesystemOptions.specialRegularFiles is in +// effect. specialFileFD differs from regularFileFD by using per-FD handles +// instead of shared per-dentry handles, and never buffering I/O. +type specialFileFD struct { + fileDescription + + // handle is immutable. + handle handle + + // off is the file offset. off is protected by mu. (POSIX 2.9.7 only + // requires operations using the file offset to be atomic for regular files + // and symlinks; however, since specialFileFD may be used for regular + // files, we apply this atomicity unconditionally.) + mu sync.Mutex + off int64 +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *specialFileFD) Release() { + fd.handle.close(context.Background()) + fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) + fs.syncMu.Lock() + delete(fs.specialFileFDs, fd) + fs.syncMu.Unlock() +} + +// OnClose implements vfs.FileDescriptionImpl.OnClose. +func (fd *specialFileFD) OnClose(ctx context.Context) error { + if !fd.vfsfd.IsWritable() { + return nil + } + return fd.handle.file.flush(ctx) +} + +// PRead implements vfs.FileDescriptionImpl.PRead. +func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + if opts.Flags != 0 { + return 0, syserror.EOPNOTSUPP + } + + // Going through dst.CopyOutFrom() holds MM locks around file operations of + // unknown duration. For regularFileFD, doing so is necessary to support + // mmap due to lock ordering; MM locks precede dentry.dataMu. That doesn't + // hold here since specialFileFD doesn't client-cache data. Just buffer the + // read instead. + if d := fd.dentry(); d.fs.opts.interop != InteropModeShared { + d.touchAtime(ctx, fd.vfsfd.Mount()) + } + buf := make([]byte, dst.NumBytes()) + n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset)) + if n == 0 { + return 0, err + } + if cp, cperr := dst.CopyOut(ctx, buf[:n]); cperr != nil { + return int64(cp), cperr + } + return int64(n), err +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + fd.mu.Lock() + n, err := fd.PRead(ctx, dst, fd.off, opts) + fd.off += n + fd.mu.Unlock() + return n, err +} + +// PWrite implements vfs.FileDescriptionImpl.PWrite. +func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + if opts.Flags != 0 { + return 0, syserror.EOPNOTSUPP + } + + // Do a buffered write. See rationale in PRead. + if d := fd.dentry(); d.fs.opts.interop != InteropModeShared { + d.touchCMtime(ctx) + } + buf := make([]byte, src.NumBytes()) + // Don't do partial writes if we get a partial read from src. + if _, err := src.CopyIn(ctx, buf); err != nil { + return 0, err + } + n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset)) + return int64(n), err +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + fd.mu.Lock() + n, err := fd.PWrite(ctx, src, fd.off, opts) + fd.off += n + fd.mu.Unlock() + return n, err +} + +// Seek implements vfs.FileDescriptionImpl.Seek. +func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + fd.mu.Lock() + defer fd.mu.Unlock() + switch whence { + case linux.SEEK_SET: + // Use offset as given. + case linux.SEEK_CUR: + offset += fd.off + default: + // SEEK_END, SEEK_DATA, and SEEK_HOLE aren't supported since it's not + // clear that file size is even meaningful for these files. + return 0, syserror.EINVAL + } + if offset < 0 { + return 0, syserror.EINVAL + } + fd.off = offset + return offset, nil +} + +// Sync implements vfs.FileDescriptionImpl.Sync. +func (fd *specialFileFD) Sync(ctx context.Context) error { + if !fd.vfsfd.IsWritable() { + return nil + } + return fd.handle.sync(ctx) +} diff --git a/pkg/sentry/fsimpl/gofer/symlink.go b/pkg/sentry/fsimpl/gofer/symlink.go new file mode 100644 index 000000000..adf43be60 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/symlink.go @@ -0,0 +1,47 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +func (d *dentry) isSymlink() bool { + return d.fileType() == linux.S_IFLNK +} + +// Precondition: d.isSymlink(). +func (d *dentry) readlink(ctx context.Context, mnt *vfs.Mount) (string, error) { + if d.fs.opts.interop != InteropModeShared { + d.touchAtime(ctx, mnt) + d.dataMu.Lock() + if d.haveTarget { + target := d.target + d.dataMu.Unlock() + return target, nil + } + } + target, err := d.file.readlink(ctx) + if d.fs.opts.interop != InteropModeShared { + if err == nil { + d.haveTarget = true + d.target = target + } + d.dataMu.Unlock() + } + return target, err +} diff --git a/pkg/sentry/fsimpl/gofer/time.go b/pkg/sentry/fsimpl/gofer/time.go new file mode 100644 index 000000000..7598ec6a8 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/time.go @@ -0,0 +1,75 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +func dentryTimestampFromP9(s, ns uint64) int64 { + return int64(s*1e9 + ns) +} + +func dentryTimestampFromStatx(ts linux.StatxTimestamp) int64 { + return ts.Sec*1e9 + int64(ts.Nsec) +} + +func statxTimestampFromDentry(ns int64) linux.StatxTimestamp { + return linux.StatxTimestamp{ + Sec: ns / 1e9, + Nsec: uint32(ns % 1e9), + } +} + +func nowFromContext(ctx context.Context) (int64, bool) { + if clock := ktime.RealtimeClockFromContext(ctx); clock != nil { + return clock.Now().Nanoseconds(), true + } + return 0, false +} + +// Preconditions: fs.interop != InteropModeShared. +func (d *dentry) touchAtime(ctx context.Context, mnt *vfs.Mount) { + if err := mnt.CheckBeginWrite(); err != nil { + return + } + now, ok := nowFromContext(ctx) + if !ok { + mnt.EndWrite() + return + } + d.metadataMu.Lock() + atomic.StoreInt64(&d.atime, now) + d.metadataMu.Unlock() + mnt.EndWrite() +} + +// Preconditions: fs.interop != InteropModeShared. The caller has successfully +// called vfs.Mount.CheckBeginWrite(). +func (d *dentry) touchCMtime(ctx context.Context) { + now, ok := nowFromContext(ctx) + if !ok { + return + } + d.metadataMu.Lock() + atomic.StoreInt64(&d.mtime, now) + atomic.StoreInt64(&d.ctime, now) + d.metadataMu.Unlock() +} diff --git a/pkg/sentry/fsimpl/testutil/testutil.go b/pkg/sentry/fsimpl/testutil/testutil.go index 1c98335c1..69fd84ddd 100644 --- a/pkg/sentry/fsimpl/testutil/testutil.go +++ b/pkg/sentry/fsimpl/testutil/testutil.go @@ -98,7 +98,7 @@ func (s *System) WithTemporaryContext(ctx context.Context) *System { // Destroy release resources associated with a test system. func (s *System) Destroy() { s.Root.DecRef() - s.mns.DecRef(s.VFS) // Reference on mns passed to NewSystem. + s.mns.DecRef() // Reference on mns passed to NewSystem. } // ReadToEnd reads the contents of fd until EOF to a string. diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index c61366224..57abd5583 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -38,6 +38,7 @@ go_library( "//pkg/sentry/arch", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/fs/lock", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/pipe", @@ -47,6 +48,7 @@ go_library( "//pkg/sentry/platform", "//pkg/sentry/usage", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", @@ -86,6 +88,7 @@ go_test( "//pkg/context", "//pkg/fspath", "//pkg/sentry/contexttest", + "//pkg/sentry/fs/lock", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/contexttest", "//pkg/sentry/vfs", diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go index 54241c8e8..9fce5e4b4 100644 --- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go +++ b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go @@ -183,7 +183,7 @@ func BenchmarkVFS2MemfsStat(b *testing.B) { if err != nil { b.Fatalf("failed to create tmpfs root mount: %v", err) } - defer mntns.DecRef(vfsObj) + defer mntns.DecRef() var filePathBuilder strings.Builder filePathBuilder.WriteByte('/') @@ -374,7 +374,7 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) { if err != nil { b.Fatalf("failed to create tmpfs root mount: %v", err) } - defer mntns.DecRef(vfsObj) + defer mntns.DecRef() var filePathBuilder strings.Builder filePathBuilder.WriteByte('/') diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 5ee9cf1e9..72bc15264 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -622,7 +622,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error if child.inode.isDir() { return syserror.EISDIR } - if !rp.MustBeDir() { + if rp.MustBeDir() { return syserror.ENOTDIR } mnt := rp.Mount() diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index e9e6faf67..dab346a41 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" @@ -192,6 +193,28 @@ func (fd *regularFileFD) Sync(ctx context.Context) error { return nil } +// LockBSD implements vfs.FileDescriptionImpl.LockBSD. +func (fd *regularFileFD) LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error { + return fd.inode().lockBSD(uid, t, block) +} + +// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD. +func (fd *regularFileFD) UnlockBSD(ctx context.Context, uid lock.UniqueID) error { + fd.inode().unlockBSD(uid) + return nil +} + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (fd *regularFileFD) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, rng lock.LockRange, block lock.Blocker) error { + return fd.inode().lockPOSIX(uid, t, rng, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (fd *regularFileFD) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, rng lock.LockRange) error { + fd.inode().unlockPOSIX(uid, rng) + return nil +} + // regularFileReadWriter implements safemem.Reader and Safemem.Writer. type regularFileReadWriter struct { file *regularFile diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go index 32552e261..e9f71e334 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go @@ -24,9 +24,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -49,7 +51,7 @@ func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentr root := mntns.Root() return vfsObj, root, func() { root.DecRef() - mntns.DecRef(vfsObj) + mntns.DecRef() }, nil } @@ -260,6 +262,60 @@ func TestPWrite(t *testing.T) { } } +func TestLocks(t *testing.T) { + ctx := contexttest.Context(t) + fd, cleanup, err := newFileFD(ctx, 0644) + if err != nil { + t.Fatal(err) + } + defer cleanup() + + var ( + uid1 lock.UniqueID + uid2 lock.UniqueID + // Non-blocking. + block lock.Blocker + ) + + uid1 = 123 + uid2 = 456 + + if err := fd.Impl().LockBSD(ctx, uid1, lock.ReadLock, block); err != nil { + t.Fatalf("fd.Impl().LockBSD failed: err = %v", err) + } + if err := fd.Impl().LockBSD(ctx, uid2, lock.ReadLock, block); err != nil { + t.Fatalf("fd.Impl().LockBSD failed: err = %v", err) + } + if got, want := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, block), syserror.ErrWouldBlock; got != want { + t.Fatalf("fd.Impl().LockBSD failed: got = %v, want = %v", got, want) + } + if err := fd.Impl().UnlockBSD(ctx, uid1); err != nil { + t.Fatalf("fd.Impl().UnlockBSD failed: err = %v", err) + } + if err := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, block); err != nil { + t.Fatalf("fd.Impl().LockBSD failed: err = %v", err) + } + + rng1 := lock.LockRange{0, 1} + rng2 := lock.LockRange{1, 2} + + if err := fd.Impl().LockPOSIX(ctx, uid1, lock.ReadLock, rng1, block); err != nil { + t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err) + } + if err := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, rng2, block); err != nil { + t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err) + } + if err := fd.Impl().LockPOSIX(ctx, uid1, lock.WriteLock, rng1, block); err != nil { + t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err) + } + if got, want := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, rng1, block), syserror.ErrWouldBlock; got != want { + t.Fatalf("fd.Impl().LockPOSIX failed: got = %v, want = %v", got, want) + } + if err := fd.Impl().UnlockPOSIX(ctx, uid1, rng1); err != nil { + t.Fatalf("fd.Impl().UnlockPOSIX failed: err = %v", err) + } +} + func TestPRead(t *testing.T) { ctx := contexttest.Context(t) fd, cleanup, err := newFileFD(ctx, 0644) diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 88dbd6e35..2108d0f4d 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -30,10 +30,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) @@ -153,6 +155,9 @@ type inode struct { rdevMajor uint32 rdevMinor uint32 + // Advisory file locks, which lock at the inode level. + locks lock.FileLocks + impl interface{} // immutable } @@ -352,6 +357,44 @@ func (i *inode) setStat(stat linux.Statx) error { return nil } +// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular. +func (i *inode) lockBSD(uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { + switch i.impl.(type) { + case *regularFile: + return i.locks.LockBSD(uid, t, block) + } + return syserror.EBADF +} + +// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular. +func (i *inode) unlockBSD(uid fslock.UniqueID) error { + switch i.impl.(type) { + case *regularFile: + i.locks.UnlockBSD(uid) + return nil + } + return syserror.EBADF +} + +// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular. +func (i *inode) lockPOSIX(uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { + switch i.impl.(type) { + case *regularFile: + return i.locks.LockPOSIX(uid, t, rng, block) + } + return syserror.EBADF +} + +// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular. +func (i *inode) unlockPOSIX(uid fslock.UniqueID, rng fslock.LockRange) error { + switch i.impl.(type) { + case *regularFile: + i.locks.UnlockPOSIX(uid, rng) + return nil + } + return syserror.EBADF +} + // allocatedBlocksForSize returns the number of 512B blocks needed to // accommodate the given size in bytes, as appropriate for struct // stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block diff --git a/pkg/sentry/inet/inet.go b/pkg/sentry/inet/inet.go index a7dfb78a7..2916a0644 100644 --- a/pkg/sentry/inet/inet.go +++ b/pkg/sentry/inet/inet.go @@ -28,6 +28,10 @@ type Stack interface { // interface indexes to a slice of associated interface address properties. InterfaceAddrs() map[int32][]InterfaceAddr + // AddInterfaceAddr adds an address to the network interface identified by + // index. + AddInterfaceAddr(idx int32, addr InterfaceAddr) error + // SupportsIPv6 returns true if the stack supports IPv6 connectivity. SupportsIPv6() bool diff --git a/pkg/sentry/inet/test_stack.go b/pkg/sentry/inet/test_stack.go index dcfcbd97e..d8961fc94 100644 --- a/pkg/sentry/inet/test_stack.go +++ b/pkg/sentry/inet/test_stack.go @@ -47,6 +47,12 @@ func (s *TestStack) InterfaceAddrs() map[int32][]InterfaceAddr { return s.InterfaceAddrsMap } +// AddInterfaceAddr implements Stack.AddInterfaceAddr. +func (s *TestStack) AddInterfaceAddr(idx int32, addr InterfaceAddr) error { + s.InterfaceAddrsMap[idx] = append(s.InterfaceAddrsMap[idx], addr) + return nil +} + // SupportsIPv6 implements Stack.SupportsIPv6. func (s *TestStack) SupportsIPv6() bool { return s.SupportsIPv6Flag diff --git a/pkg/sentry/mm/README.md b/pkg/sentry/mm/README.md index e1322e373..f4d43d927 100644 --- a/pkg/sentry/mm/README.md +++ b/pkg/sentry/mm/README.md @@ -274,7 +274,7 @@ In the sentry: methods [`platform.AddressSpace.MapFile` and `platform.AddressSpace.Unmap`][platform]. -[memmap]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/memmap/memmap.go -[mm]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/mm/mm.go -[pgalloc]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/pgalloc/pgalloc.go -[platform]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/platform/platform.go +[memmap]: https://github.com/google/gvisor/blob/master/pkg/sentry/memmap/memmap.go +[mm]: https://github.com/google/gvisor/blob/master/pkg/sentry/mm/mm.go +[pgalloc]: https://github.com/google/gvisor/blob/master/pkg/sentry/pgalloc/pgalloc.go +[platform]: https://github.com/google/gvisor/blob/master/pkg/sentry/platform/platform.go diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index bde4c7a1e..34f63986f 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -126,7 +126,7 @@ func (s *socketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS } return uint64(n), nil } - return readv(s.fd, iovecsFromBlockSeq(dsts)) + return readv(s.fd, safemem.IovecsFromBlockSeq(dsts)) })) return int64(n), err } @@ -149,7 +149,7 @@ func (s *socketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO } return uint64(n), nil } - return writev(s.fd, iovecsFromBlockSeq(srcs)) + return writev(s.fd, safemem.IovecsFromBlockSeq(srcs)) })) return int64(n), err } @@ -402,7 +402,7 @@ func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags // We always do a non-blocking recv*(). sysflags := flags | syscall.MSG_DONTWAIT - iovs := iovecsFromBlockSeq(dsts) + iovs := safemem.IovecsFromBlockSeq(dsts) msg := syscall.Msghdr{ Iov: &iovs[0], Iovlen: uint64(len(iovs)), @@ -522,7 +522,7 @@ func (s *socketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to [] return uint64(n), nil } - iovs := iovecsFromBlockSeq(srcs) + iovs := safemem.IovecsFromBlockSeq(srcs) msg := syscall.Msghdr{ Iov: &iovs[0], Iovlen: uint64(len(iovs)), @@ -567,21 +567,6 @@ func (s *socketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to [] return int(n), syserr.FromError(err) } -func iovecsFromBlockSeq(bs safemem.BlockSeq) []syscall.Iovec { - iovs := make([]syscall.Iovec, 0, bs.NumBlocks()) - for ; !bs.IsEmpty(); bs = bs.Tail() { - b := bs.Head() - iovs = append(iovs, syscall.Iovec{ - Base: &b.ToSlice()[0], - Len: uint64(b.Len()), - }) - // We don't need to care about b.NeedSafecopy(), because the host - // kernel will handle such address ranges just fine (by returning - // EFAULT). - } - return iovs -} - func translateIOSyscallError(err error) error { if err == syscall.EAGAIN || err == syscall.EWOULDBLOCK { return syserror.ErrWouldBlock diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go index 034eca676..a48082631 100644 --- a/pkg/sentry/socket/hostinet/stack.go +++ b/pkg/sentry/socket/hostinet/stack.go @@ -310,6 +310,11 @@ func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { return addrs } +// AddInterfaceAddr implements inet.Stack.AddInterfaceAddr. +func (s *Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error { + return syserror.EACCES +} + // SupportsIPv6 implements inet.Stack.SupportsIPv6. func (s *Stack) SupportsIPv6() bool { return s.supportsIPv6 diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 8f14643b0..ea43a0ce3 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -48,8 +48,7 @@ type metadata struct { Size uint32 } -// nflog logs messages related to the writing and reading of iptables, but only -// when enableDebug is true. +// nflog logs messages related to the writing and reading of iptables. func nflog(format string, args ...interface{}) { log.Infof("netfilter: "+format, args...) } diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index f8b8e467d..1911cd9b8 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -33,3 +33,15 @@ go_library( "//pkg/waiter", ], ) + +go_test( + name = "netlink_test", + size = "small", + srcs = [ + "message_test.go", + ], + deps = [ + ":netlink", + "//pkg/abi/linux", + ], +) diff --git a/pkg/sentry/socket/netlink/message.go b/pkg/sentry/socket/netlink/message.go index b21e0ca4b..4ea252ccb 100644 --- a/pkg/sentry/socket/netlink/message.go +++ b/pkg/sentry/socket/netlink/message.go @@ -30,8 +30,16 @@ func alignUp(length int, align uint) int { return (length + int(align) - 1) &^ (int(align) - 1) } +// alignPad returns the length of padding required for alignment. +// +// Preconditions: align is a power of two. +func alignPad(length int, align uint) int { + return alignUp(length, align) - length +} + // Message contains a complete serialized netlink message. type Message struct { + hdr linux.NetlinkMessageHeader buf []byte } @@ -40,10 +48,86 @@ type Message struct { // The header length will be updated by Finalize. func NewMessage(hdr linux.NetlinkMessageHeader) *Message { return &Message{ + hdr: hdr, buf: binary.Marshal(nil, usermem.ByteOrder, hdr), } } +// ParseMessage parses the first message seen at buf, returning the rest of the +// buffer. If message is malformed, ok of false is returned. For last message, +// padding check is loose, if there isn't enought padding, whole buf is consumed +// and ok is set to true. +func ParseMessage(buf []byte) (msg *Message, rest []byte, ok bool) { + b := BytesView(buf) + + hdrBytes, ok := b.Extract(linux.NetlinkMessageHeaderSize) + if !ok { + return + } + var hdr linux.NetlinkMessageHeader + binary.Unmarshal(hdrBytes, usermem.ByteOrder, &hdr) + + // Msg portion. + totalMsgLen := int(hdr.Length) + _, ok = b.Extract(totalMsgLen - linux.NetlinkMessageHeaderSize) + if !ok { + return + } + + // Padding. + numPad := alignPad(totalMsgLen, linux.NLMSG_ALIGNTO) + // Linux permits the last message not being aligned, just consume all of it. + // Ref: net/netlink/af_netlink.c:netlink_rcv_skb + if numPad > len(b) { + numPad = len(b) + } + _, ok = b.Extract(numPad) + if !ok { + return + } + + return &Message{ + hdr: hdr, + buf: buf[:totalMsgLen], + }, []byte(b), true +} + +// Header returns the header of this message. +func (m *Message) Header() linux.NetlinkMessageHeader { + return m.hdr +} + +// GetData unmarshals the payload message header from this netlink message, and +// returns the attributes portion. +func (m *Message) GetData(msg interface{}) (AttrsView, bool) { + b := BytesView(m.buf) + + _, ok := b.Extract(linux.NetlinkMessageHeaderSize) + if !ok { + return nil, false + } + + size := int(binary.Size(msg)) + msgBytes, ok := b.Extract(size) + if !ok { + return nil, false + } + binary.Unmarshal(msgBytes, usermem.ByteOrder, msg) + + numPad := alignPad(linux.NetlinkMessageHeaderSize+size, linux.NLMSG_ALIGNTO) + // Linux permits the last message not being aligned, just consume all of it. + // Ref: net/netlink/af_netlink.c:netlink_rcv_skb + if numPad > len(b) { + numPad = len(b) + } + _, ok = b.Extract(numPad) + if !ok { + return nil, false + } + + return AttrsView(b), true +} + // Finalize returns the []byte containing the entire message, with the total // length set in the message header. The Message must not be modified after // calling Finalize. @@ -157,3 +241,48 @@ func (ms *MessageSet) AddMessage(hdr linux.NetlinkMessageHeader) *Message { ms.Messages = append(ms.Messages, m) return m } + +// AttrsView is a view into the attributes portion of a netlink message. +type AttrsView []byte + +// Empty returns whether there is no attribute left in v. +func (v AttrsView) Empty() bool { + return len(v) == 0 +} + +// ParseFirst parses first netlink attribute at the beginning of v. +func (v AttrsView) ParseFirst() (hdr linux.NetlinkAttrHeader, value []byte, rest AttrsView, ok bool) { + b := BytesView(v) + + hdrBytes, ok := b.Extract(linux.NetlinkAttrHeaderSize) + if !ok { + return + } + binary.Unmarshal(hdrBytes, usermem.ByteOrder, &hdr) + + value, ok = b.Extract(int(hdr.Length) - linux.NetlinkAttrHeaderSize) + if !ok { + return + } + + _, ok = b.Extract(alignPad(int(hdr.Length), linux.NLA_ALIGNTO)) + if !ok { + return + } + + return hdr, value, AttrsView(b), ok +} + +// BytesView supports extracting data from a byte slice with bounds checking. +type BytesView []byte + +// Extract removes the first n bytes from v and returns it. If n is out of +// bounds, it returns false. +func (v *BytesView) Extract(n int) ([]byte, bool) { + if n < 0 || n > len(*v) { + return nil, false + } + extracted := (*v)[:n] + *v = (*v)[n:] + return extracted, true +} diff --git a/pkg/sentry/socket/netlink/message_test.go b/pkg/sentry/socket/netlink/message_test.go new file mode 100644 index 000000000..ef13d9386 --- /dev/null +++ b/pkg/sentry/socket/netlink/message_test.go @@ -0,0 +1,312 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package message_test + +import ( + "bytes" + "reflect" + "testing" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/socket/netlink" +) + +type dummyNetlinkMsg struct { + Foo uint16 +} + +func TestParseMessage(t *testing.T) { + tests := []struct { + desc string + input []byte + + header linux.NetlinkMessageHeader + dataMsg *dummyNetlinkMsg + restLen int + ok bool + }{ + { + desc: "valid", + input: []byte{ + 0x14, 0x00, 0x00, 0x00, // Length + 0x01, 0x00, // Type + 0x02, 0x00, // Flags + 0x03, 0x00, 0x00, 0x00, // Seq + 0x04, 0x00, 0x00, 0x00, // PortID + 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding + }, + header: linux.NetlinkMessageHeader{ + Length: 20, + Type: 1, + Flags: 2, + Seq: 3, + PortID: 4, + }, + dataMsg: &dummyNetlinkMsg{ + Foo: 0x3130, + }, + restLen: 0, + ok: true, + }, + { + desc: "valid with next message", + input: []byte{ + 0x14, 0x00, 0x00, 0x00, // Length + 0x01, 0x00, // Type + 0x02, 0x00, // Flags + 0x03, 0x00, 0x00, 0x00, // Seq + 0x04, 0x00, 0x00, 0x00, // PortID + 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding + 0xFF, // Next message (rest) + }, + header: linux.NetlinkMessageHeader{ + Length: 20, + Type: 1, + Flags: 2, + Seq: 3, + PortID: 4, + }, + dataMsg: &dummyNetlinkMsg{ + Foo: 0x3130, + }, + restLen: 1, + ok: true, + }, + { + desc: "valid for last message without padding", + input: []byte{ + 0x12, 0x00, 0x00, 0x00, // Length + 0x01, 0x00, // Type + 0x02, 0x00, // Flags + 0x03, 0x00, 0x00, 0x00, // Seq + 0x04, 0x00, 0x00, 0x00, // PortID + 0x30, 0x31, // Data message + }, + header: linux.NetlinkMessageHeader{ + Length: 18, + Type: 1, + Flags: 2, + Seq: 3, + PortID: 4, + }, + dataMsg: &dummyNetlinkMsg{ + Foo: 0x3130, + }, + restLen: 0, + ok: true, + }, + { + desc: "valid for last message not to be aligned", + input: []byte{ + 0x13, 0x00, 0x00, 0x00, // Length + 0x01, 0x00, // Type + 0x02, 0x00, // Flags + 0x03, 0x00, 0x00, 0x00, // Seq + 0x04, 0x00, 0x00, 0x00, // PortID + 0x30, 0x31, // Data message + 0x00, // Excessive 1 byte permitted at end + }, + header: linux.NetlinkMessageHeader{ + Length: 19, + Type: 1, + Flags: 2, + Seq: 3, + PortID: 4, + }, + dataMsg: &dummyNetlinkMsg{ + Foo: 0x3130, + }, + restLen: 0, + ok: true, + }, + { + desc: "header.Length too short", + input: []byte{ + 0x04, 0x00, 0x00, 0x00, // Length + 0x01, 0x00, // Type + 0x02, 0x00, // Flags + 0x03, 0x00, 0x00, 0x00, // Seq + 0x04, 0x00, 0x00, 0x00, // PortID + 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding + }, + ok: false, + }, + { + desc: "header.Length too long", + input: []byte{ + 0xFF, 0xFF, 0x00, 0x00, // Length + 0x01, 0x00, // Type + 0x02, 0x00, // Flags + 0x03, 0x00, 0x00, 0x00, // Seq + 0x04, 0x00, 0x00, 0x00, // PortID + 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding + }, + ok: false, + }, + { + desc: "header incomplete", + input: []byte{ + 0x04, 0x00, 0x00, 0x00, // Length + }, + ok: false, + }, + { + desc: "empty message", + input: []byte{}, + ok: false, + }, + } + for _, test := range tests { + msg, rest, ok := netlink.ParseMessage(test.input) + if ok != test.ok { + t.Errorf("%v: got ok = %v, want = %v", test.desc, ok, test.ok) + continue + } + if !test.ok { + continue + } + if !reflect.DeepEqual(msg.Header(), test.header) { + t.Errorf("%v: got hdr = %+v, want = %+v", test.desc, msg.Header(), test.header) + } + + dataMsg := &dummyNetlinkMsg{} + _, dataOk := msg.GetData(dataMsg) + if !dataOk { + t.Errorf("%v: GetData.ok = %v, want = true", test.desc, dataOk) + } else if !reflect.DeepEqual(dataMsg, test.dataMsg) { + t.Errorf("%v: GetData.msg = %+v, want = %+v", test.desc, dataMsg, test.dataMsg) + } + + if got, want := rest, test.input[len(test.input)-test.restLen:]; !bytes.Equal(got, want) { + t.Errorf("%v: got rest = %v, want = %v", test.desc, got, want) + } + } +} + +func TestAttrView(t *testing.T) { + tests := []struct { + desc string + input []byte + + // Outputs for ParseFirst. + hdr linux.NetlinkAttrHeader + value []byte + restLen int + ok bool + + // Outputs for Empty. + isEmpty bool + }{ + { + desc: "valid", + input: []byte{ + 0x06, 0x00, // Length + 0x01, 0x00, // Type + 0x30, 0x31, 0x00, 0x00, // Data with 2 bytes padding + }, + hdr: linux.NetlinkAttrHeader{ + Length: 6, + Type: 1, + }, + value: []byte{0x30, 0x31}, + restLen: 0, + ok: true, + isEmpty: false, + }, + { + desc: "at alignment", + input: []byte{ + 0x08, 0x00, // Length + 0x01, 0x00, // Type + 0x30, 0x31, 0x32, 0x33, // Data + }, + hdr: linux.NetlinkAttrHeader{ + Length: 8, + Type: 1, + }, + value: []byte{0x30, 0x31, 0x32, 0x33}, + restLen: 0, + ok: true, + isEmpty: false, + }, + { + desc: "at alignment with rest data", + input: []byte{ + 0x08, 0x00, // Length + 0x01, 0x00, // Type + 0x30, 0x31, 0x32, 0x33, // Data + 0xFF, 0xFE, // Rest data + }, + hdr: linux.NetlinkAttrHeader{ + Length: 8, + Type: 1, + }, + value: []byte{0x30, 0x31, 0x32, 0x33}, + restLen: 2, + ok: true, + isEmpty: false, + }, + { + desc: "hdr.Length too long", + input: []byte{ + 0xFF, 0x00, // Length + 0x01, 0x00, // Type + 0x30, 0x31, 0x32, 0x33, // Data + }, + ok: false, + isEmpty: false, + }, + { + desc: "hdr.Length too short", + input: []byte{ + 0x01, 0x00, // Length + 0x01, 0x00, // Type + 0x30, 0x31, 0x32, 0x33, // Data + }, + ok: false, + isEmpty: false, + }, + { + desc: "empty", + input: []byte{}, + ok: false, + isEmpty: true, + }, + } + for _, test := range tests { + attrs := netlink.AttrsView(test.input) + + // Test ParseFirst(). + hdr, value, rest, ok := attrs.ParseFirst() + if ok != test.ok { + t.Errorf("%v: got ok = %v, want = %v", test.desc, ok, test.ok) + } else if test.ok { + if !reflect.DeepEqual(hdr, test.hdr) { + t.Errorf("%v: got hdr = %+v, want = %+v", test.desc, hdr, test.hdr) + } + if !bytes.Equal(value, test.value) { + t.Errorf("%v: got value = %v, want = %v", test.desc, value, test.value) + } + if wantRest := test.input[len(test.input)-test.restLen:]; !bytes.Equal(rest, wantRest) { + t.Errorf("%v: got rest = %v, want = %v", test.desc, rest, wantRest) + } + } + + // Test Empty(). + if got, want := attrs.Empty(), test.isEmpty; got != want { + t.Errorf("%v: got empty = %v, want = %v", test.desc, got, want) + } + } +} diff --git a/pkg/sentry/socket/netlink/provider.go b/pkg/sentry/socket/netlink/provider.go index 07f860a49..b0dc70e5c 100644 --- a/pkg/sentry/socket/netlink/provider.go +++ b/pkg/sentry/socket/netlink/provider.go @@ -42,7 +42,7 @@ type Protocol interface { // If err == nil, any messages added to ms will be sent back to the // other end of the socket. Setting ms.Multi will cause an NLMSG_DONE // message to be sent even if ms contains no messages. - ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *MessageSet) *syserr.Error + ProcessMessage(ctx context.Context, msg *Message, ms *MessageSet) *syserr.Error } // Provider is a function that creates a new Protocol for a specific netlink diff --git a/pkg/sentry/socket/netlink/route/BUILD b/pkg/sentry/socket/netlink/route/BUILD index 622a1eafc..93127398d 100644 --- a/pkg/sentry/socket/netlink/route/BUILD +++ b/pkg/sentry/socket/netlink/route/BUILD @@ -10,13 +10,11 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", - "//pkg/binary", "//pkg/context", "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/socket/netlink", "//pkg/syserr", - "//pkg/usermem", ], ) diff --git a/pkg/sentry/socket/netlink/route/protocol.go b/pkg/sentry/socket/netlink/route/protocol.go index 2b3c7f5b3..c84d8bd7c 100644 --- a/pkg/sentry/socket/netlink/route/protocol.go +++ b/pkg/sentry/socket/netlink/route/protocol.go @@ -17,16 +17,15 @@ package route import ( "bytes" + "syscall" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/netlink" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/usermem" ) // commandKind describes the operational class of a message type. @@ -69,13 +68,7 @@ func (p *Protocol) CanSend() bool { } // dumpLinks handles RTM_GETLINK dump requests. -func (p *Protocol) dumpLinks(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { - // TODO(b/68878065): Only the dump variant of the types below are - // supported. - if hdr.Flags&linux.NLM_F_DUMP != linux.NLM_F_DUMP { - return syserr.ErrNotSupported - } - +func (p *Protocol) dumpLinks(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { // NLM_F_DUMP + RTM_GETLINK messages are supposed to include an // ifinfomsg. However, Linux <3.9 only checked for rtgenmsg, and some // userspace applications (including glibc) still include rtgenmsg. @@ -99,44 +92,105 @@ func (p *Protocol) dumpLinks(ctx context.Context, hdr linux.NetlinkMessageHeader return nil } - for id, i := range stack.Interfaces() { - m := ms.AddMessage(linux.NetlinkMessageHeader{ - Type: linux.RTM_NEWLINK, - }) + for idx, i := range stack.Interfaces() { + addNewLinkMessage(ms, idx, i) + } - m.Put(linux.InterfaceInfoMessage{ - Family: linux.AF_UNSPEC, - Type: i.DeviceType, - Index: id, - Flags: i.Flags, - }) + return nil +} - m.PutAttrString(linux.IFLA_IFNAME, i.Name) - m.PutAttr(linux.IFLA_MTU, i.MTU) +// getLinks handles RTM_GETLINK requests. +func (p *Protocol) getLink(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { + stack := inet.StackFromContext(ctx) + if stack == nil { + // No network devices. + return nil + } - mac := make([]byte, 6) - brd := mac - if len(i.Addr) > 0 { - mac = i.Addr - brd = bytes.Repeat([]byte{0xff}, len(i.Addr)) + // Parse message. + var ifi linux.InterfaceInfoMessage + attrs, ok := msg.GetData(&ifi) + if !ok { + return syserr.ErrInvalidArgument + } + + // Parse attributes. + var byName []byte + for !attrs.Empty() { + ahdr, value, rest, ok := attrs.ParseFirst() + if !ok { + return syserr.ErrInvalidArgument } - m.PutAttr(linux.IFLA_ADDRESS, mac) - m.PutAttr(linux.IFLA_BROADCAST, brd) + attrs = rest - // TODO(gvisor.dev/issue/578): There are many more attributes. + switch ahdr.Type { + case linux.IFLA_IFNAME: + if len(value) < 1 { + return syserr.ErrInvalidArgument + } + byName = value[:len(value)-1] + + // TODO(gvisor.dev/issue/578): Support IFLA_EXT_MASK. + } } + found := false + for idx, i := range stack.Interfaces() { + switch { + case ifi.Index > 0: + if idx != ifi.Index { + continue + } + case byName != nil: + if string(byName) != i.Name { + continue + } + default: + // Criteria not specified. + return syserr.ErrInvalidArgument + } + + addNewLinkMessage(ms, idx, i) + found = true + break + } + if !found { + return syserr.ErrNoDevice + } return nil } -// dumpAddrs handles RTM_GETADDR dump requests. -func (p *Protocol) dumpAddrs(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { - // TODO(b/68878065): Only the dump variant of the types below are - // supported. - if hdr.Flags&linux.NLM_F_DUMP != linux.NLM_F_DUMP { - return syserr.ErrNotSupported +// addNewLinkMessage appends RTM_NEWLINK message for the given interface into +// the message set. +func addNewLinkMessage(ms *netlink.MessageSet, idx int32, i inet.Interface) { + m := ms.AddMessage(linux.NetlinkMessageHeader{ + Type: linux.RTM_NEWLINK, + }) + + m.Put(linux.InterfaceInfoMessage{ + Family: linux.AF_UNSPEC, + Type: i.DeviceType, + Index: idx, + Flags: i.Flags, + }) + + m.PutAttrString(linux.IFLA_IFNAME, i.Name) + m.PutAttr(linux.IFLA_MTU, i.MTU) + + mac := make([]byte, 6) + brd := mac + if len(i.Addr) > 0 { + mac = i.Addr + brd = bytes.Repeat([]byte{0xff}, len(i.Addr)) } + m.PutAttr(linux.IFLA_ADDRESS, mac) + m.PutAttr(linux.IFLA_BROADCAST, brd) + + // TODO(gvisor.dev/issue/578): There are many more attributes. +} +// dumpAddrs handles RTM_GETADDR dump requests. +func (p *Protocol) dumpAddrs(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { // RTM_GETADDR dump requests need not contain anything more than the // netlink header and 1 byte protocol family common to all // NETLINK_ROUTE requests. @@ -168,6 +222,7 @@ func (p *Protocol) dumpAddrs(ctx context.Context, hdr linux.NetlinkMessageHeader Index: uint32(id), }) + m.PutAttr(linux.IFA_LOCAL, []byte(a.Addr)) m.PutAttr(linux.IFA_ADDRESS, []byte(a.Addr)) // TODO(gvisor.dev/issue/578): There are many more attributes. @@ -252,12 +307,12 @@ func fillRoute(routes []inet.Route, addr []byte) (inet.Route, *syserr.Error) { } // parseForDestination parses a message as format of RouteMessage-RtAttr-dst. -func parseForDestination(data []byte) ([]byte, *syserr.Error) { +func parseForDestination(msg *netlink.Message) ([]byte, *syserr.Error) { var rtMsg linux.RouteMessage - if len(data) < linux.SizeOfRouteMessage { + attrs, ok := msg.GetData(&rtMsg) + if !ok { return nil, syserr.ErrInvalidArgument } - binary.Unmarshal(data[:linux.SizeOfRouteMessage], usermem.ByteOrder, &rtMsg) // iproute2 added the RTM_F_LOOKUP_TABLE flag in version v4.4.0. See // commit bc234301af12. Note we don't check this flag for backward // compatibility. @@ -265,26 +320,15 @@ func parseForDestination(data []byte) ([]byte, *syserr.Error) { return nil, syserr.ErrNotSupported } - data = data[linux.SizeOfRouteMessage:] - - // TODO(gvisor.dev/issue/1611): Add generic attribute parsing. - var rtAttr linux.RtAttr - if len(data) < linux.SizeOfRtAttr { - return nil, syserr.ErrInvalidArgument + // Expect first attribute is RTA_DST. + if hdr, value, _, ok := attrs.ParseFirst(); ok && hdr.Type == linux.RTA_DST { + return value, nil } - binary.Unmarshal(data[:linux.SizeOfRtAttr], usermem.ByteOrder, &rtAttr) - if rtAttr.Type != linux.RTA_DST { - return nil, syserr.ErrInvalidArgument - } - - if len(data) < int(rtAttr.Len) { - return nil, syserr.ErrInvalidArgument - } - return data[linux.SizeOfRtAttr:rtAttr.Len], nil + return nil, syserr.ErrInvalidArgument } // dumpRoutes handles RTM_GETROUTE requests. -func (p *Protocol) dumpRoutes(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { +func (p *Protocol) dumpRoutes(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { // RTM_GETROUTE dump requests need not contain anything more than the // netlink header and 1 byte protocol family common to all // NETLINK_ROUTE requests. @@ -295,10 +339,11 @@ func (p *Protocol) dumpRoutes(ctx context.Context, hdr linux.NetlinkMessageHeade return nil } + hdr := msg.Header() routeTables := stack.RouteTable() if hdr.Flags == linux.NLM_F_REQUEST { - dst, err := parseForDestination(data) + dst, err := parseForDestination(msg) if err != nil { return err } @@ -357,10 +402,55 @@ func (p *Protocol) dumpRoutes(ctx context.Context, hdr linux.NetlinkMessageHeade return nil } +// newAddr handles RTM_NEWADDR requests. +func (p *Protocol) newAddr(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { + stack := inet.StackFromContext(ctx) + if stack == nil { + // No network stack. + return syserr.ErrProtocolNotSupported + } + + var ifa linux.InterfaceAddrMessage + attrs, ok := msg.GetData(&ifa) + if !ok { + return syserr.ErrInvalidArgument + } + + for !attrs.Empty() { + ahdr, value, rest, ok := attrs.ParseFirst() + if !ok { + return syserr.ErrInvalidArgument + } + attrs = rest + + switch ahdr.Type { + case linux.IFA_LOCAL: + err := stack.AddInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{ + Family: ifa.Family, + PrefixLen: ifa.PrefixLen, + Flags: ifa.Flags, + Addr: value, + }) + if err == syscall.EEXIST { + flags := msg.Header().Flags + if flags&linux.NLM_F_EXCL != 0 { + return syserr.ErrExists + } + } else if err != nil { + return syserr.ErrInvalidArgument + } + } + } + return nil +} + // ProcessMessage implements netlink.Protocol.ProcessMessage. -func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { +func (p *Protocol) ProcessMessage(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { + hdr := msg.Header() + // All messages start with a 1 byte protocol family. - if len(data) < 1 { + var family uint8 + if _, ok := msg.GetData(&family); !ok { // Linux ignores messages missing the protocol family. See // net/core/rtnetlink.c:rtnetlink_rcv_msg. return nil @@ -374,16 +464,32 @@ func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageH } } - switch hdr.Type { - case linux.RTM_GETLINK: - return p.dumpLinks(ctx, hdr, data, ms) - case linux.RTM_GETADDR: - return p.dumpAddrs(ctx, hdr, data, ms) - case linux.RTM_GETROUTE: - return p.dumpRoutes(ctx, hdr, data, ms) - default: - return syserr.ErrNotSupported + if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP { + // TODO(b/68878065): Only the dump variant of the types below are + // supported. + switch hdr.Type { + case linux.RTM_GETLINK: + return p.dumpLinks(ctx, msg, ms) + case linux.RTM_GETADDR: + return p.dumpAddrs(ctx, msg, ms) + case linux.RTM_GETROUTE: + return p.dumpRoutes(ctx, msg, ms) + default: + return syserr.ErrNotSupported + } + } else if hdr.Flags&linux.NLM_F_REQUEST == linux.NLM_F_REQUEST { + switch hdr.Type { + case linux.RTM_GETLINK: + return p.getLink(ctx, msg, ms) + case linux.RTM_GETROUTE: + return p.dumpRoutes(ctx, msg, ms) + case linux.RTM_NEWADDR: + return p.newAddr(ctx, msg, ms) + default: + return syserr.ErrNotSupported + } } + return syserr.ErrNotSupported } // init registers the NETLINK_ROUTE provider. diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index c4b95debb..2ca02567d 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -644,47 +644,38 @@ func (s *Socket) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error return nil } -func (s *Socket) dumpErrorMesage(ctx context.Context, hdr linux.NetlinkMessageHeader, ms *MessageSet, err *syserr.Error) *syserr.Error { +func dumpErrorMesage(hdr linux.NetlinkMessageHeader, ms *MessageSet, err *syserr.Error) { m := ms.AddMessage(linux.NetlinkMessageHeader{ Type: linux.NLMSG_ERROR, }) - m.Put(linux.NetlinkErrorMessage{ Error: int32(-err.ToLinux().Number()), Header: hdr, }) - return nil +} +func dumpAckMesage(hdr linux.NetlinkMessageHeader, ms *MessageSet) { + m := ms.AddMessage(linux.NetlinkMessageHeader{ + Type: linux.NLMSG_ERROR, + }) + m.Put(linux.NetlinkErrorMessage{ + Error: 0, + Header: hdr, + }) } // processMessages handles each message in buf, passing it to the protocol // handler for final handling. func (s *Socket) processMessages(ctx context.Context, buf []byte) *syserr.Error { for len(buf) > 0 { - if len(buf) < linux.NetlinkMessageHeaderSize { + msg, rest, ok := ParseMessage(buf) + if !ok { // Linux ignores messages that are too short. See // net/netlink/af_netlink.c:netlink_rcv_skb. break } - - var hdr linux.NetlinkMessageHeader - binary.Unmarshal(buf[:linux.NetlinkMessageHeaderSize], usermem.ByteOrder, &hdr) - - if hdr.Length < linux.NetlinkMessageHeaderSize || uint64(hdr.Length) > uint64(len(buf)) { - // Linux ignores malformed messages. See - // net/netlink/af_netlink.c:netlink_rcv_skb. - break - } - - // Data from this message. - data := buf[linux.NetlinkMessageHeaderSize:hdr.Length] - - // Advance to the next message. - next := alignUp(int(hdr.Length), linux.NLMSG_ALIGNTO) - if next >= len(buf)-1 { - next = len(buf) - 1 - } - buf = buf[next:] + buf = rest + hdr := msg.Header() // Ignore control messages. if hdr.Type < linux.NLMSG_MIN_TYPE { @@ -692,19 +683,10 @@ func (s *Socket) processMessages(ctx context.Context, buf []byte) *syserr.Error } ms := NewMessageSet(s.portID, hdr.Seq) - var err *syserr.Error - // TODO(b/68877377): ACKs not supported yet. - if hdr.Flags&linux.NLM_F_ACK == linux.NLM_F_ACK { - err = syserr.ErrNotSupported - } else { - - err = s.protocol.ProcessMessage(ctx, hdr, data, ms) - } - if err != nil { - ms = NewMessageSet(s.portID, hdr.Seq) - if err := s.dumpErrorMesage(ctx, hdr, ms, err); err != nil { - return err - } + if err := s.protocol.ProcessMessage(ctx, msg, ms); err != nil { + dumpErrorMesage(hdr, ms, err) + } else if hdr.Flags&linux.NLM_F_ACK == linux.NLM_F_ACK { + dumpAckMesage(hdr, ms) } if err := s.sendResponse(ctx, ms); err != nil { diff --git a/pkg/sentry/socket/netlink/uevent/protocol.go b/pkg/sentry/socket/netlink/uevent/protocol.go index 1ee4296bc..029ba21b5 100644 --- a/pkg/sentry/socket/netlink/uevent/protocol.go +++ b/pkg/sentry/socket/netlink/uevent/protocol.go @@ -49,7 +49,7 @@ func (p *Protocol) CanSend() bool { } // ProcessMessage implements netlink.Protocol.ProcessMessage. -func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { +func (p *Protocol) ProcessMessage(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { // Silently ignore all messages. return nil } diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index 31ea66eca..0692482e9 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -20,6 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/iptables" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" @@ -88,6 +90,59 @@ func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { return nicAddrs } +// AddInterfaceAddr implements inet.Stack.AddInterfaceAddr. +func (s *Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error { + var ( + protocol tcpip.NetworkProtocolNumber + address tcpip.Address + ) + switch addr.Family { + case linux.AF_INET: + if len(addr.Addr) < header.IPv4AddressSize { + return syserror.EINVAL + } + if addr.PrefixLen > header.IPv4AddressSize*8 { + return syserror.EINVAL + } + protocol = ipv4.ProtocolNumber + address = tcpip.Address(addr.Addr[:header.IPv4AddressSize]) + + case linux.AF_INET6: + if len(addr.Addr) < header.IPv6AddressSize { + return syserror.EINVAL + } + if addr.PrefixLen > header.IPv6AddressSize*8 { + return syserror.EINVAL + } + protocol = ipv6.ProtocolNumber + address = tcpip.Address(addr.Addr[:header.IPv6AddressSize]) + + default: + return syserror.ENOTSUP + } + + protocolAddress := tcpip.ProtocolAddress{ + Protocol: protocol, + AddressWithPrefix: tcpip.AddressWithPrefix{ + Address: address, + PrefixLen: int(addr.PrefixLen), + }, + } + + // Attach address to interface. + if err := s.Stack.AddProtocolAddressWithOptions(tcpip.NICID(idx), protocolAddress, stack.CanBePrimaryEndpoint); err != nil { + return syserr.TranslateNetstackError(err).ToError() + } + + // Add route for local network. + s.Stack.AddRoute(tcpip.Route{ + Destination: protocolAddress.AddressWithPrefix.Subnet(), + Gateway: "", // No gateway for local network. + NIC: tcpip.NICID(idx), + }) + return nil +} + // TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize. func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) { var rs tcp.ReceiveBufferSizeOption diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go index 7435b50bf..588f8b087 100644 --- a/pkg/sentry/syscalls/linux/linux64_amd64.go +++ b/pkg/sentry/syscalls/linux/linux64_amd64.go @@ -228,18 +228,21 @@ var AMD64 = &kernel.SyscallTable{ 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil), 186: syscalls.Supported("gettid", Gettid), 187: syscalls.Supported("readahead", Readahead), - 188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), - 189: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), - 190: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), - 191: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), - 192: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), - 193: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), - 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + // TODO(b/148303075): Enable set/getxattr (in their various + // forms) once we also have list and removexattr. The JVM + // assumes that if get/set exist, then list and remove do too. + 188: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 189: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 190: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 191: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), 200: syscalls.Supported("tkill", Tkill), 201: syscalls.Supported("time", Time), 202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil), diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go index 03a39fe65..06e5ee401 100644 --- a/pkg/sentry/syscalls/linux/linux64_arm64.go +++ b/pkg/sentry/syscalls/linux/linux64_arm64.go @@ -36,23 +36,26 @@ var ARM64 = &kernel.SyscallTable{ }, AuditNumber: linux.AUDIT_ARCH_AARCH64, Table: map[uintptr]kernel.Syscall{ - 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 5: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), - 6: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), - 7: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), - 8: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), - 9: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), - 10: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), - 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 12: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 13: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 14: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 15: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 16: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + // TODO(b/148303075): Enable set/getxattr (in their various + // forms) once we also have list and removexattr. The JVM + // assumes that if get/set exist, then list and remove do too. + 5: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 6: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 7: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 8: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 9: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 10: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 13: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 13: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 14: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 15: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 16: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), 17: syscalls.Supported("getcwd", Getcwd), 18: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil), 19: syscalls.Supported("eventfd2", Eventfd2), diff --git a/pkg/sentry/syscalls/linux/sys_timer.go b/pkg/sentry/syscalls/linux/sys_timer.go index 432351917..a4c400f87 100644 --- a/pkg/sentry/syscalls/linux/sys_timer.go +++ b/pkg/sentry/syscalls/linux/sys_timer.go @@ -146,7 +146,7 @@ func TimerCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S return 0, nil, err } - return uintptr(id), nil, nil + return 0, nil, nil } // TimerSettime implements linux syscall timer_settime(2). diff --git a/pkg/sentry/vfs/lock/BUILD b/pkg/sentry/vfs/lock/BUILD new file mode 100644 index 000000000..d9ab063b7 --- /dev/null +++ b/pkg/sentry/vfs/lock/BUILD @@ -0,0 +1,13 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "lock", + srcs = ["lock.go"], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/sentry/fs/lock", + "//pkg/syserror", + ], +) diff --git a/pkg/sentry/vfs/lock/lock.go b/pkg/sentry/vfs/lock/lock.go new file mode 100644 index 000000000..724dfe743 --- /dev/null +++ b/pkg/sentry/vfs/lock/lock.go @@ -0,0 +1,72 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package lock provides POSIX and BSD style file locking for VFS2 file +// implementations. +// +// The actual implementations can be found in the lock package under +// sentry/fs/lock. +package lock + +import ( + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" + "gvisor.dev/gvisor/pkg/syserror" +) + +// FileLocks supports POSIX and BSD style locks, which correspond to fcntl(2) +// and flock(2) respectively in Linux. It can be embedded into various file +// implementations for VFS2 that support locking. +// +// Note that in Linux these two types of locks are _not_ cooperative, because +// race and deadlock conditions make merging them prohibitive. We do the same +// and keep them oblivious to each other. +type FileLocks struct { + // bsd is a set of BSD-style advisory file wide locks, see flock(2). + bsd fslock.Locks + + // posix is a set of POSIX-style regional advisory locks, see fcntl(2). + posix fslock.Locks +} + +// LockBSD tries to acquire a BSD-style lock on the entire file. +func (fl *FileLocks) LockBSD(uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { + if fl.bsd.LockRegion(uid, t, fslock.LockRange{0, fslock.LockEOF}, block) { + return nil + } + return syserror.ErrWouldBlock +} + +// UnlockBSD releases a BSD-style lock on the entire file. +// +// This operation is always successful, even if there did not exist a lock on +// the requested region held by uid in the first place. +func (fl *FileLocks) UnlockBSD(uid fslock.UniqueID) { + fl.bsd.UnlockRegion(uid, fslock.LockRange{0, fslock.LockEOF}) +} + +// LockPOSIX tries to acquire a POSIX-style lock on a file region. +func (fl *FileLocks) LockPOSIX(uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { + if fl.posix.LockRegion(uid, t, rng, block) { + return nil + } + return syserror.ErrWouldBlock +} + +// UnlockPOSIX releases a POSIX-style lock on a file region. +// +// This operation is always successful, even if there did not exist a lock on +// the requested region held by uid in the first place. +func (fl *FileLocks) UnlockPOSIX(uid fslock.UniqueID, rng fslock.LockRange) { + fl.posix.UnlockRegion(uid, rng) +} diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index d39528051..1fbb420f9 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -423,7 +423,8 @@ func (mntns *MountNamespace) IncRef() { } // DecRef decrements mntns' reference count. -func (mntns *MountNamespace) DecRef(vfs *VirtualFilesystem) { +func (mntns *MountNamespace) DecRef() { + vfs := mntns.root.fs.VirtualFilesystem() if refs := atomic.AddInt64(&mntns.refs, -1); refs == 0 { vfs.mountMu.Lock() vfs.mounts.seq.BeginWrite() diff --git a/pkg/tcpip/header/eth.go b/pkg/tcpip/header/eth.go index f5d2c127f..b1e92d2d7 100644 --- a/pkg/tcpip/header/eth.go +++ b/pkg/tcpip/header/eth.go @@ -134,3 +134,44 @@ func IsValidUnicastEthernetAddress(addr tcpip.LinkAddress) bool { // addr is a valid unicast ethernet address. return true } + +// EthernetAddressFromMulticastIPv4Address returns a multicast Ethernet address +// for a multicast IPv4 address. +// +// addr MUST be a multicast IPv4 address. +func EthernetAddressFromMulticastIPv4Address(addr tcpip.Address) tcpip.LinkAddress { + var linkAddrBytes [EthernetAddressSize]byte + // RFC 1112 Host Extensions for IP Multicasting + // + // 6.4. Extensions to an Ethernet Local Network Module: + // + // An IP host group address is mapped to an Ethernet multicast + // address by placing the low-order 23-bits of the IP address + // into the low-order 23 bits of the Ethernet multicast address + // 01-00-5E-00-00-00 (hex). + linkAddrBytes[0] = 0x1 + linkAddrBytes[2] = 0x5e + linkAddrBytes[3] = addr[1] & 0x7F + copy(linkAddrBytes[4:], addr[IPv4AddressSize-2:]) + return tcpip.LinkAddress(linkAddrBytes[:]) +} + +// EthernetAddressFromMulticastIPv6Address returns a multicast Ethernet address +// for a multicast IPv6 address. +// +// addr MUST be a multicast IPv6 address. +func EthernetAddressFromMulticastIPv6Address(addr tcpip.Address) tcpip.LinkAddress { + // RFC 2464 Transmission of IPv6 Packets over Ethernet Networks + // + // 7. Address Mapping -- Multicast + // + // An IPv6 packet with a multicast destination address DST, + // consisting of the sixteen octets DST[1] through DST[16], is + // transmitted to the Ethernet multicast address whose first + // two octets are the value 3333 hexadecimal and whose last + // four octets are the last four octets of DST. + linkAddrBytes := []byte(addr[IPv6AddressSize-EthernetAddressSize:]) + linkAddrBytes[0] = 0x33 + linkAddrBytes[1] = 0x33 + return tcpip.LinkAddress(linkAddrBytes[:]) +} diff --git a/pkg/tcpip/header/eth_test.go b/pkg/tcpip/header/eth_test.go index 6634c90f5..7a0014ad9 100644 --- a/pkg/tcpip/header/eth_test.go +++ b/pkg/tcpip/header/eth_test.go @@ -66,3 +66,37 @@ func TestIsValidUnicastEthernetAddress(t *testing.T) { }) } } + +func TestEthernetAddressFromMulticastIPv4Address(t *testing.T) { + tests := []struct { + name string + addr tcpip.Address + expectedLinkAddr tcpip.LinkAddress + }{ + { + name: "IPv4 Multicast without 24th bit set", + addr: "\xe0\x7e\xdc\xba", + expectedLinkAddr: "\x01\x00\x5e\x7e\xdc\xba", + }, + { + name: "IPv4 Multicast with 24th bit set", + addr: "\xe0\xfe\xdc\xba", + expectedLinkAddr: "\x01\x00\x5e\x7e\xdc\xba", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if got := EthernetAddressFromMulticastIPv4Address(test.addr); got != test.expectedLinkAddr { + t.Fatalf("got EthernetAddressFromMulticastIPv4Address(%s) = %s, want = %s", got, test.expectedLinkAddr) + } + }) + } +} + +func TestEthernetAddressFromMulticastIPv6Address(t *testing.T) { + addr := tcpip.Address("\xff\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x1a") + if got, want := EthernetAddressFromMulticastIPv6Address(addr), tcpip.LinkAddress("\x33\x33\x0d\x0e\x0f\x1a"); got != want { + t.Fatalf("got EthernetAddressFromMulticastIPv6Address(%s) = %s, want = %s", addr, got, want) + } +} diff --git a/pkg/tcpip/header/ipv6_test.go b/pkg/tcpip/header/ipv6_test.go index 29f54bc57..c3ad503aa 100644 --- a/pkg/tcpip/header/ipv6_test.go +++ b/pkg/tcpip/header/ipv6_test.go @@ -17,6 +17,7 @@ package header_test import ( "bytes" "crypto/sha256" + "fmt" "testing" "github.com/google/go-cmp/cmp" @@ -300,3 +301,31 @@ func TestScopeForIPv6Address(t *testing.T) { }) } } + +func TestSolicitedNodeAddr(t *testing.T) { + tests := []struct { + addr tcpip.Address + want tcpip.Address + }{ + { + addr: "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\xa0", + want: "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xff\x0e\x0f\xa0", + }, + { + addr: "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\xdd\x0e\x0f\xa0", + want: "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xff\x0e\x0f\xa0", + }, + { + addr: "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\xdd\x01\x02\x03", + want: "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xff\x01\x02\x03", + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("%s", test.addr), func(t *testing.T) { + if got := header.SolicitedNodeAddr(test.addr); got != test.want { + t.Fatalf("got header.SolicitedNodeAddr(%s) = %s, want = %s", test.addr, got, test.want) + } + }) + } +} diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go index d660aab04..7d593c35c 100644 --- a/pkg/tcpip/iptables/types.go +++ b/pkg/tcpip/iptables/types.go @@ -132,7 +132,7 @@ type Table struct { // ValidHooks returns a bitmap of the builtin hooks for the given table. func (table *Table) ValidHooks() uint32 { hooks := uint32(0) - for hook, _ := range table.BuiltinChains { + for hook := range table.BuiltinChains { hooks |= 1 << hook } return hooks diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go index 71b9da797..78d447acd 100644 --- a/pkg/tcpip/link/channel/channel.go +++ b/pkg/tcpip/link/channel/channel.go @@ -30,15 +30,16 @@ type PacketInfo struct { Pkt tcpip.PacketBuffer Proto tcpip.NetworkProtocolNumber GSO *stack.GSO + Route stack.Route } // Endpoint is link layer endpoint that stores outbound packets in a channel // and allows injection of inbound packets. type Endpoint struct { - dispatcher stack.NetworkDispatcher - mtu uint32 - linkAddr tcpip.LinkAddress - GSO bool + dispatcher stack.NetworkDispatcher + mtu uint32 + linkAddr tcpip.LinkAddress + LinkEPCapabilities stack.LinkEndpointCapabilities // c is where outbound packets are queued. c chan PacketInfo @@ -122,11 +123,7 @@ func (e *Endpoint) MTU() uint32 { // Capabilities implements stack.LinkEndpoint.Capabilities. func (e *Endpoint) Capabilities() stack.LinkEndpointCapabilities { - caps := stack.LinkEndpointCapabilities(0) - if e.GSO { - caps |= stack.CapabilityHardwareGSO - } - return caps + return e.LinkEPCapabilities } // GSOMaxSize returns the maximum GSO packet size. @@ -146,11 +143,16 @@ func (e *Endpoint) LinkAddress() tcpip.LinkAddress { } // WritePacket stores outbound packets into the channel. -func (e *Endpoint) WritePacket(_ *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) *tcpip.Error { +func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) *tcpip.Error { + // Clone r then release its resource so we only get the relevant fields from + // stack.Route without holding a reference to a NIC's endpoint. + route := r.Clone() + route.Release() p := PacketInfo{ Pkt: pkt, Proto: protocol, GSO: gso, + Route: route, } select { @@ -162,7 +164,11 @@ func (e *Endpoint) WritePacket(_ *stack.Route, gso *stack.GSO, protocol tcpip.Ne } // WritePackets stores outbound packets into the channel. -func (e *Endpoint) WritePackets(_ *stack.Route, gso *stack.GSO, pkts []tcpip.PacketBuffer, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (e *Endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts []tcpip.PacketBuffer, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { + // Clone r then release its resource so we only get the relevant fields from + // stack.Route without holding a reference to a NIC's endpoint. + route := r.Clone() + route.Release() payloadView := pkts[0].Data.ToView() n := 0 packetLoop: @@ -176,6 +182,7 @@ packetLoop: }, Proto: protocol, GSO: gso, + Route: route, } select { diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go index 1ceaebfbd..4da13c5df 100644 --- a/pkg/tcpip/network/arp/arp.go +++ b/pkg/tcpip/network/arp/arp.go @@ -178,24 +178,9 @@ func (*protocol) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bo return broadcastMAC, true } if header.IsV4MulticastAddress(addr) { - // RFC 1112 Host Extensions for IP Multicasting - // - // 6.4. Extensions to an Ethernet Local Network Module: - // - // An IP host group address is mapped to an Ethernet multicast - // address by placing the low-order 23-bits of the IP address - // into the low-order 23 bits of the Ethernet multicast address - // 01-00-5E-00-00-00 (hex). - return tcpip.LinkAddress([]byte{ - 0x01, - 0x00, - 0x5e, - addr[header.IPv4AddressSize-3] & 0x7f, - addr[header.IPv4AddressSize-2], - addr[header.IPv4AddressSize-1], - }), true + return header.EthernetAddressFromMulticastIPv4Address(addr), true } - return "", false + return tcpip.LinkAddress([]byte(nil)), false } // SetOption implements NetworkProtocol. diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go index dc20c0fd7..60817d36d 100644 --- a/pkg/tcpip/network/ipv6/icmp.go +++ b/pkg/tcpip/network/ipv6/icmp.go @@ -408,10 +408,14 @@ func (*protocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber { // LinkAddressRequest implements stack.LinkAddressResolver. func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, linkEP stack.LinkEndpoint) *tcpip.Error { snaddr := header.SolicitedNodeAddr(addr) + + // TODO(b/148672031): Use stack.FindRoute instead of manually creating the + // route here. Note, we would need the nicID to do this properly so the right + // NIC (associated to linkEP) is used to send the NDP NS message. r := &stack.Route{ LocalAddress: localAddr, RemoteAddress: snaddr, - RemoteLinkAddress: broadcastMAC, + RemoteLinkAddress: header.EthernetAddressFromMulticastIPv6Address(snaddr), } hdr := buffer.NewPrependable(int(linkEP.MaxHeaderLength()) + header.IPv6MinimumSize + header.ICMPv6NeighborAdvertSize) pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6NeighborAdvertSize)) @@ -441,23 +445,7 @@ func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, linkEP stack. // ResolveStaticAddress implements stack.LinkAddressResolver. func (*protocol) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) { if header.IsV6MulticastAddress(addr) { - // RFC 2464 Transmission of IPv6 Packets over Ethernet Networks - // - // 7. Address Mapping -- Multicast - // - // An IPv6 packet with a multicast destination address DST, - // consisting of the sixteen octets DST[1] through DST[16], is - // transmitted to the Ethernet multicast address whose first - // two octets are the value 3333 hexadecimal and whose last - // four octets are the last four octets of DST. - return tcpip.LinkAddress([]byte{ - 0x33, - 0x33, - addr[header.IPv6AddressSize-4], - addr[header.IPv6AddressSize-3], - addr[header.IPv6AddressSize-2], - addr[header.IPv6AddressSize-1], - }), true + return header.EthernetAddressFromMulticastIPv6Address(addr), true } - return "", false + return tcpip.LinkAddress([]byte(nil)), false } diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go index 7a6820643..d0e930e20 100644 --- a/pkg/tcpip/network/ipv6/icmp_test.go +++ b/pkg/tcpip/network/ipv6/icmp_test.go @@ -270,8 +270,9 @@ func (c *testContext) cleanup() { } type routeArgs struct { - src, dst *channel.Endpoint - typ header.ICMPv6Type + src, dst *channel.Endpoint + typ header.ICMPv6Type + remoteLinkAddr tcpip.LinkAddress } func routeICMPv6Packet(t *testing.T, args routeArgs, fn func(*testing.T, header.ICMPv6)) { @@ -292,6 +293,11 @@ func routeICMPv6Packet(t *testing.T, args routeArgs, fn func(*testing.T, header. t.Errorf("unexpected protocol number %d", pi.Proto) return } + + if len(args.remoteLinkAddr) != 0 && args.remoteLinkAddr != pi.Route.RemoteLinkAddress { + t.Errorf("got remote link address = %s, want = %s", pi.Route.RemoteLinkAddress, args.remoteLinkAddr) + } + ipv6 := header.IPv6(pi.Pkt.Header.View()) transProto := tcpip.TransportProtocolNumber(ipv6.NextHeader()) if transProto != header.ICMPv6ProtocolNumber { @@ -339,7 +345,7 @@ func TestLinkResolution(t *testing.T) { t.Fatalf("ep.Write(_) = _, <non-nil>, %s, want = _, <non-nil>, tcpip.ErrNoLinkAddress", err) } for _, args := range []routeArgs{ - {src: c.linkEP0, dst: c.linkEP1, typ: header.ICMPv6NeighborSolicit}, + {src: c.linkEP0, dst: c.linkEP1, typ: header.ICMPv6NeighborSolicit, remoteLinkAddr: header.EthernetAddressFromMulticastIPv6Address(header.SolicitedNodeAddr(lladdr1))}, {src: c.linkEP1, dst: c.linkEP0, typ: header.ICMPv6NeighborAdvert}, } { routeICMPv6Packet(t, args, func(t *testing.T, icmpv6 header.ICMPv6) { diff --git a/pkg/tcpip/stack/ndp.go b/pkg/tcpip/stack/ndp.go index 31294345d..6123fda33 100644 --- a/pkg/tcpip/stack/ndp.go +++ b/pkg/tcpip/stack/ndp.go @@ -538,6 +538,14 @@ func (ndp *ndpState) sendDADPacket(addr tcpip.Address) *tcpip.Error { r := makeRoute(header.IPv6ProtocolNumber, header.IPv6Any, snmc, ndp.nic.linkEP.LinkAddress(), ref, false, false) defer r.Release() + // Route should resolve immediately since snmc is a multicast address so a + // remote link address can be calculated without a resolution process. + if c, err := r.Resolve(nil); err != nil { + log.Fatalf("ndp: error when resolving route to send NDP NS for DAD (%s -> %s on NIC(%d)): %s", header.IPv6Any, snmc, ndp.nic.ID(), err) + } else if c != nil { + log.Fatalf("ndp: route resolution not immediate for route to send NDP NS for DAD (%s -> %s on NIC(%d))", header.IPv6Any, snmc, ndp.nic.ID()) + } + hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv6NeighborSolicitMinimumSize) pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6NeighborSolicitMinimumSize)) pkt.SetType(header.ICMPv6NeighborSolicit) @@ -1197,6 +1205,15 @@ func (ndp *ndpState) startSolicitingRouters() { r := makeRoute(header.IPv6ProtocolNumber, header.IPv6Any, header.IPv6AllRoutersMulticastAddress, ndp.nic.linkEP.LinkAddress(), ref, false, false) defer r.Release() + // Route should resolve immediately since + // header.IPv6AllRoutersMulticastAddress is a multicast address so a + // remote link address can be calculated without a resolution process. + if c, err := r.Resolve(nil); err != nil { + log.Fatalf("ndp: error when resolving route to send NDP RS (%s -> %s on NIC(%d)): %s", header.IPv6Any, header.IPv6AllRoutersMulticastAddress, ndp.nic.ID(), err) + } else if c != nil { + log.Fatalf("ndp: route resolution not immediate for route to send NDP RS (%s -> %s on NIC(%d))", header.IPv6Any, header.IPv6AllRoutersMulticastAddress, ndp.nic.ID()) + } + payloadSize := header.ICMPv6HeaderSize + header.NDPRSMinimumSize hdr := buffer.NewPrependable(header.IPv6MinimumSize + payloadSize) pkt := header.ICMPv6(hdr.Prepend(payloadSize)) diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go index bc7cfbcb4..8af8565f7 100644 --- a/pkg/tcpip/stack/ndp_test.go +++ b/pkg/tcpip/stack/ndp_test.go @@ -336,6 +336,7 @@ func TestDADResolve(t *testing.T) { opts.NDPConfigs.DupAddrDetectTransmits = test.dupAddrDetectTransmits e := channel.New(int(test.dupAddrDetectTransmits), 1280, linkAddr1) + e.LinkEPCapabilities |= stack.CapabilityResolutionRequired s := stack.New(opts) if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) @@ -413,6 +414,12 @@ func TestDADResolve(t *testing.T) { t.Fatalf("got Proto = %d, want = %d", p.Proto, header.IPv6ProtocolNumber) } + // Make sure the right remote link address is used. + snmc := header.SolicitedNodeAddr(addr1) + if want := header.EthernetAddressFromMulticastIPv6Address(snmc); p.Route.RemoteLinkAddress != want { + t.Errorf("got remote link address = %s, want = %s", p.Route.RemoteLinkAddress, want) + } + // Check NDP NS packet. // // As per RFC 4861 section 4.3, a possible option is the Source Link @@ -420,7 +427,7 @@ func TestDADResolve(t *testing.T) { // address of the packet is the unspecified address. checker.IPv6(t, p.Pkt.Header.View().ToVectorisedView().First(), checker.SrcAddr(header.IPv6Any), - checker.DstAddr(header.SolicitedNodeAddr(addr1)), + checker.DstAddr(snmc), checker.TTL(header.NDPHopLimit), checker.NDPNS( checker.NDPNSTargetAddress(addr1), @@ -3292,6 +3299,7 @@ func TestRouterSolicitation(t *testing.T) { t.Run(test.name, func(t *testing.T) { t.Parallel() e := channel.New(int(test.maxRtrSolicit), 1280, linkAddr1) + e.LinkEPCapabilities |= stack.CapabilityResolutionRequired waitForPkt := func(timeout time.Duration) { t.Helper() ctx, _ := context.WithTimeout(context.Background(), timeout) @@ -3304,6 +3312,12 @@ func TestRouterSolicitation(t *testing.T) { if p.Proto != header.IPv6ProtocolNumber { t.Fatalf("got Proto = %d, want = %d", p.Proto, header.IPv6ProtocolNumber) } + + // Make sure the right remote link address is used. + if want := header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersMulticastAddress); p.Route.RemoteLinkAddress != want { + t.Errorf("got remote link address = %s, want = %s", p.Route.RemoteLinkAddress, want) + } + checker.IPv6(t, p.Pkt.Header.View(), checker.SrcAddr(header.IPv6Any), diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go index 517f4b941..f565aafb2 100644 --- a/pkg/tcpip/stack/route.go +++ b/pkg/tcpip/stack/route.go @@ -225,7 +225,9 @@ func (r *Route) Release() { // Clone Clone a route such that the original one can be released and the new // one will remain valid. func (r *Route) Clone() Route { - r.ref.incRef() + if r.ref != nil { + r.ref.incRef() + } return *r } diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 7057b110e..b793f1d74 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -795,6 +795,8 @@ func (s *Stack) Forwarding() bool { // SetRouteTable assigns the route table to be used by this stack. It // specifies which NIC to use for given destination address ranges. +// +// This method takes ownership of the table. func (s *Stack) SetRouteTable(table []tcpip.Route) { s.mu.Lock() defer s.mu.Unlock() @@ -809,6 +811,13 @@ func (s *Stack) GetRouteTable() []tcpip.Route { return append([]tcpip.Route(nil), s.routeTable...) } +// AddRoute appends a route to the route table. +func (s *Stack) AddRoute(route tcpip.Route) { + s.mu.Lock() + defer s.mu.Unlock() + s.routeTable = append(s.routeTable, route) +} + // NewEndpoint creates a new transport layer endpoint of the given protocol. func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { t, ok := s.transportProtocols[transport] diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index 6101f2945..08afb7c17 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -271,8 +271,8 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i return n, nil } -// createEndpoint creates a new endpoint in connected state and then performs -// the TCP 3-way handshake. +// createEndpointAndPerformHandshake creates a new endpoint in connected state +// and then performs the TCP 3-way handshake. func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *header.TCPSynOptions, queue *waiter.Queue) (*endpoint, *tcpip.Error) { // Create new endpoint. irs := s.sequenceNumber diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 9ff7ac261..5c5397823 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -989,6 +989,10 @@ func (e *endpoint) transitionToStateCloseLocked() { // to any other listening endpoint. We reply with RST if we cannot find one. func (e *endpoint) tryDeliverSegmentFromClosedEndpoint(s *segment) { ep := e.stack.FindTransportEndpoint(e.NetProto, e.TransProto, e.ID, &s.route) + if ep == nil && e.NetProto == header.IPv6ProtocolNumber && e.EndpointInfo.TransportEndpointInfo.ID.LocalAddress.To4() != "" { + // Dual-stack socket, try IPv4. + ep = e.stack.FindTransportEndpoint(header.IPv4ProtocolNumber, e.TransProto, e.ID, &s.route) + } if ep == nil { replyWithReset(s) s.decRef() diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go index 730ac4292..1e9a0dea3 100644 --- a/pkg/tcpip/transport/tcp/testing/context/context.go +++ b/pkg/tcpip/transport/tcp/testing/context/context.go @@ -1082,7 +1082,11 @@ func (c *Context) SACKEnabled() bool { // SetGSOEnabled enables or disables generic segmentation offload. func (c *Context) SetGSOEnabled(enable bool) { - c.linkEP.GSO = enable + if enable { + c.linkEP.LinkEPCapabilities |= stack.CapabilityHardwareGSO + } else { + c.linkEP.LinkEPCapabilities &^= stack.CapabilityHardwareGSO + } } // MSSWithoutOptions returns the value for the MSS used by the stack when no diff --git a/runsc/BUILD b/runsc/BUILD index b35b41d81..375241921 100644 --- a/runsc/BUILD +++ b/runsc/BUILD @@ -117,4 +117,5 @@ sh_test( srcs = ["version_test.sh"], args = ["$(location :runsc)"], data = [":runsc"], + tags = ["noguitar"], ) diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 4fb9adca6..f8d351c7b 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -174,6 +174,18 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_LSEEK: {}, syscall.SYS_MADVISE: {}, syscall.SYS_MINCORE: {}, + // Used by the Go runtime as a temporarily workaround for a Linux + // 5.2-5.4 bug. + // + // See src/runtime/os_linux_x86.go. + // + // TODO(b/148688965): Remove once this is gone from Go. + syscall.SYS_MLOCK: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(4096), + }, + }, syscall.SYS_MMAP: []seccomp.Rule{ { seccomp.AllowAny{}, diff --git a/runsc/container/BUILD b/runsc/container/BUILD index e21431e4c..0aaeea3a8 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -30,7 +30,7 @@ go_library( go_test( name = "container_test", - size = "medium", + size = "large", srcs = [ "console_test.go", "container_test.go", diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go index a1792330f..1dce36965 100644 --- a/runsc/fsgofer/filter/config.go +++ b/runsc/fsgofer/filter/config.go @@ -128,6 +128,18 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_MADVISE: {}, unix.SYS_MEMFD_CREATE: {}, /// Used by flipcall.PacketWindowAllocator.Init(). syscall.SYS_MKDIRAT: {}, + // Used by the Go runtime as a temporarily workaround for a Linux + // 5.2-5.4 bug. + // + // See src/runtime/os_linux_x86.go. + // + // TODO(b/148688965): Remove once this is gone from Go. + syscall.SYS_MLOCK: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(4096), + }, + }, syscall.SYS_MMAP: []seccomp.Rule{ { seccomp.AllowAny{}, diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go index fb22eae39..edf2e809a 100644 --- a/runsc/testutil/testutil.go +++ b/runsc/testutil/testutil.go @@ -136,8 +136,13 @@ func FindFile(path string) (string, error) { // TestConfig returns the default configuration to use in tests. Note that // 'RootDir' must be set by caller if required. func TestConfig() *boot.Config { + logDir := "" + if dir, ok := os.LookupEnv("TEST_UNDECLARED_OUTPUTS_DIR"); ok { + logDir = dir + "/" + } return &boot.Config{ Debug: true, + DebugLog: logDir, LogFormat: "text", DebugLogFormat: "text", AlsoLogToStderr: true, @@ -434,43 +439,40 @@ func IsStatic(filename string) (bool, error) { return true, nil } -// TestBoundsForShard calculates the beginning and end indices for the test -// based on the TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars. The -// returned ints are the beginning (inclusive) and end (exclusive) of the -// subslice corresponding to the shard. If either of the env vars are not -// present, then the function will return bounds that include all tests. If -// there are more shards than there are tests, then the returned list may be -// empty. -func TestBoundsForShard(numTests int) (int, int, error) { +// TestIndicesForShard returns indices for this test shard based on the +// TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars. +// +// If either of the env vars are not present, then the function will return all +// tests. If there are more shards than there are tests, then the returned list +// may be empty. +func TestIndicesForShard(numTests int) ([]int, error) { var ( - begin = 0 - end = numTests + shardIndex = 0 + shardTotal = 1 ) - indexStr, totalStr := os.Getenv("TEST_SHARD_INDEX"), os.Getenv("TEST_TOTAL_SHARDS") - if indexStr == "" || totalStr == "" { - return begin, end, nil - } - // Parse index and total to ints. - shardIndex, err := strconv.Atoi(indexStr) - if err != nil { - return 0, 0, fmt.Errorf("invalid TEST_SHARD_INDEX %q: %v", indexStr, err) - } - shardTotal, err := strconv.Atoi(totalStr) - if err != nil { - return 0, 0, fmt.Errorf("invalid TEST_TOTAL_SHARDS %q: %v", totalStr, err) + indexStr, totalStr := os.Getenv("TEST_SHARD_INDEX"), os.Getenv("TEST_TOTAL_SHARDS") + if indexStr != "" && totalStr != "" { + // Parse index and total to ints. + var err error + shardIndex, err = strconv.Atoi(indexStr) + if err != nil { + return nil, fmt.Errorf("invalid TEST_SHARD_INDEX %q: %v", indexStr, err) + } + shardTotal, err = strconv.Atoi(totalStr) + if err != nil { + return nil, fmt.Errorf("invalid TEST_TOTAL_SHARDS %q: %v", totalStr, err) + } } // Calculate! - shardSize := int(math.Ceil(float64(numTests) / float64(shardTotal))) - begin = shardIndex * shardSize - end = ((shardIndex + 1) * shardSize) - if begin > numTests { - // Nothing to run. - return 0, 0, nil - } - if end > numTests { - end = numTests + var indices []int + numBlocks := int(math.Ceil(float64(numTests) / float64(shardTotal))) + for i := 0; i < numBlocks; i++ { + pick := i*shardTotal + shardIndex + if pick < numTests { + indices = append(indices, pick) + } } - return begin, end, nil + return indices, nil } diff --git a/scripts/common_build.sh b/scripts/common_build.sh index a473a88a4..2c2a826c7 100755 --- a/scripts/common_build.sh +++ b/scripts/common_build.sh @@ -32,21 +32,21 @@ declare -r BAZEL_FLAGS=( "--keep_going" "--verbose_failures=true" ) +BAZEL_RBE_AUTH_FLAGS="" +BAZEL_RBE_FLAGS="" if [[ -v KOKORO_BAZEL_AUTH_CREDENTIAL ]]; then - declare -r BAZEL_RBE_AUTH_FLAGS=( - "--auth_credentials=${KOKORO_BAZEL_AUTH_CREDENTIAL}" - ) - declare -r BAZEL_RBE_FLAGS=("--config=remote") + declare -r BAZEL_RBE_AUTH_FLAGS="--auth_credentials=${KOKORO_BAZEL_AUTH_CREDENTIAL}" + declare -r BAZEL_RBE_FLAGS="--config=remote" fi # Wrap bazel. function build() { - bazel build "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@" 2>&1 | + bazel build "${BAZEL_RBE_FLAGS}" "${BAZEL_RBE_AUTH_FLAGS}" "${BAZEL_FLAGS[@]}" "$@" 2>&1 | tee /dev/fd/2 | grep -E '^ bazel-bin/' | awk '{ print $1; }' } function test() { - bazel test "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@" + bazel test "${BAZEL_RBE_FLAGS}" "${BAZEL_RBE_AUTH_FLAGS}" "${BAZEL_FLAGS[@]}" "$@" } function run() { diff --git a/scripts/packetdrill_tests.sh b/scripts/packetdrill_tests.sh new file mode 100755 index 000000000..fc6bef79c --- /dev/null +++ b/scripts/packetdrill_tests.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source $(dirname $0)/common.sh + +install_runsc_for_test runsc-d +test_runsc $(bazel query "attr(tags, manual, tests(//test/packetdrill/...))") diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index e9f0978eb..bd6059921 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -256,10 +256,12 @@ func (FilterInputDropAll) LocalAction(ip net.IP) error { // misunderstand and save the wrong tables. type FilterInputMultiUDPRules struct{} +// Name implements TestCase.Name. func (FilterInputMultiUDPRules) Name() string { return "FilterInputMultiUDPRules" } +// ContainerAction implements TestCase.ContainerAction. func (FilterInputMultiUDPRules) ContainerAction(ip net.IP) error { if err := filterTable("-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { return err @@ -270,6 +272,7 @@ func (FilterInputMultiUDPRules) ContainerAction(ip net.IP) error { return filterTable("-L") } +// LocalAction implements TestCase.LocalAction. func (FilterInputMultiUDPRules) LocalAction(ip net.IP) error { // No-op. return nil @@ -279,10 +282,12 @@ func (FilterInputMultiUDPRules) LocalAction(ip net.IP) error { // specified. type FilterInputRequireProtocolUDP struct{} +// Name implements TestCase.Name. func (FilterInputRequireProtocolUDP) Name() string { return "FilterInputRequireProtocolUDP" } +// ContainerAction implements TestCase.ContainerAction. func (FilterInputRequireProtocolUDP) ContainerAction(ip net.IP) error { if err := filterTable("-A", "INPUT", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err == nil { return errors.New("expected iptables to fail with out \"-p udp\", but succeeded") @@ -290,6 +295,7 @@ func (FilterInputRequireProtocolUDP) ContainerAction(ip net.IP) error { return nil } +// LocalAction implements TestCase.LocalAction. func (FilterInputRequireProtocolUDP) LocalAction(ip net.IP) error { // No-op. return nil diff --git a/test/packetdrill/BUILD b/test/packetdrill/BUILD new file mode 100644 index 000000000..d113555b1 --- /dev/null +++ b/test/packetdrill/BUILD @@ -0,0 +1,8 @@ +load("defs.bzl", "packetdrill_test") + +package(licenses = ["notice"]) + +packetdrill_test( + name = "fin_wait2_timeout", + scripts = ["fin_wait2_timeout.pkt"], +) diff --git a/test/packetdrill/Dockerfile b/test/packetdrill/Dockerfile new file mode 100644 index 000000000..bd4451355 --- /dev/null +++ b/test/packetdrill/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:bionic + +RUN apt-get update +RUN apt-get install -y net-tools git iptables iputils-ping netcat tcpdump jq tar +RUN hash -r +RUN git clone --branch packetdrill-v2.0 \ + https://github.com/google/packetdrill.git +RUN cd packetdrill/gtests/net/packetdrill && ./configure && \ + apt-get install -y bison flex make && make diff --git a/test/packetdrill/defs.bzl b/test/packetdrill/defs.bzl new file mode 100644 index 000000000..8623ce7b1 --- /dev/null +++ b/test/packetdrill/defs.bzl @@ -0,0 +1,87 @@ +"""Defines a rule for packetdrill test targets.""" + +def _packetdrill_test_impl(ctx): + test_runner = ctx.executable._test_runner + runner = ctx.actions.declare_file("%s-runner" % ctx.label.name) + + script_paths = [] + for script in ctx.files.scripts: + script_paths.append(script.short_path) + runner_content = "\n".join([ + "#!/bin/bash", + # This test will run part in a distinct user namespace. This can cause + # permission problems, because all runfiles may not be owned by the + # current user, and no other users will be mapped in that namespace. + # Make sure that everything is readable here. + "find . -type f -exec chmod a+rx {} \\;", + "find . -type d -exec chmod a+rx {} \\;", + "%s %s --init_script %s $@ -- %s\n" % ( + test_runner.short_path, + " ".join(ctx.attr.flags), + ctx.files._init_script[0].short_path, + " ".join(script_paths), + ), + ]) + ctx.actions.write(runner, runner_content, is_executable = True) + + transitive_files = depset() + if hasattr(ctx.attr._test_runner, "data_runfiles"): + transitive_files = depset(ctx.attr._test_runner.data_runfiles.files) + runfiles = ctx.runfiles( + files = [test_runner] + ctx.files._init_script + ctx.files.scripts, + transitive_files = transitive_files, + collect_default = True, + collect_data = True, + ) + return [DefaultInfo(executable = runner, runfiles = runfiles)] + +_packetdrill_test = rule( + attrs = { + "_test_runner": attr.label( + executable = True, + cfg = "host", + allow_files = True, + default = "packetdrill_test.sh", + ), + "_init_script": attr.label( + allow_single_file = True, + default = "packetdrill_setup.sh", + ), + "flags": attr.string_list( + mandatory = False, + default = [], + ), + "scripts": attr.label_list( + mandatory = True, + allow_files = True, + ), + }, + test = True, + implementation = _packetdrill_test_impl, +) + +_PACKETDRILL_TAGS = ["local", "manual"] + +def packetdrill_linux_test(name, **kwargs): + if "tags" not in kwargs: + kwargs["tags"] = _PACKETDRILL_TAGS + _packetdrill_test( + name = name + "_linux_test", + flags = ["--dut_platform", "linux"], + **kwargs + ) + +def packetdrill_netstack_test(name, **kwargs): + if "tags" not in kwargs: + kwargs["tags"] = _PACKETDRILL_TAGS + _packetdrill_test( + name = name + "_netstack_test", + # This is the default runtime unless + # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. + flags = ["--dut_platform", "netstack", "--runtime", "runsc-d"], + **kwargs + ) + +def packetdrill_test(**kwargs): + packetdrill_linux_test(**kwargs) + packetdrill_netstack_test(**kwargs) diff --git a/test/packetdrill/fin_wait2_timeout.pkt b/test/packetdrill/fin_wait2_timeout.pkt new file mode 100644 index 000000000..613f0bec9 --- /dev/null +++ b/test/packetdrill/fin_wait2_timeout.pkt @@ -0,0 +1,23 @@ +// Test that a socket in FIN_WAIT_2 eventually times out and a subsequent +// packet generates a RST. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 bind(3, ..., ...) = 0 + ++0 listen(3, 1) = 0 + +// Establish a connection without timestamps. ++0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7> ++0 > S. 0:0(0) ack 1 <...> ++0 < P. 1:1(0) ack 1 win 257 + ++0.100 accept(3, ..., ...) = 4 +// set FIN_WAIT2 timeout to 1 seconds. ++0.100 setsockopt(4, SOL_TCP, TCP_LINGER2, [1], 4) = 0 ++0 close(4) = 0 + ++0 > F. 1:1(0) ack 1 <...> ++0 < . 1:1(0) ack 2 win 257 + ++1.1 < . 1:1(0) ack 2 win 257 ++0 > R 2:2(0) win 0 diff --git a/test/packetdrill/packetdrill_setup.sh b/test/packetdrill/packetdrill_setup.sh new file mode 100755 index 000000000..b858072f0 --- /dev/null +++ b/test/packetdrill/packetdrill_setup.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright 2018 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script runs both within the sentry context and natively. It should tweak +# TCP parameters to match expectations found in the script files. +sysctl -q net.ipv4.tcp_sack=1 +sysctl -q net.ipv4.tcp_rmem="4096 2097152 $((8*1024*1024))" +sysctl -q net.ipv4.tcp_wmem="4096 2097152 $((8*1024*1024))" + +# There may be errors from the above, but they will show up in the test logs and +# we always want to proceed from this point. It's possible that values were +# already set correctly and the nodes were not available in the namespace. +exit 0 diff --git a/test/packetdrill/packetdrill_test.sh b/test/packetdrill/packetdrill_test.sh new file mode 100755 index 000000000..0b22dfd5c --- /dev/null +++ b/test/packetdrill/packetdrill_test.sh @@ -0,0 +1,225 @@ +#!/bin/bash + +# Copyright 2020 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Run a packetdrill test. Two docker containers are made, one for the +# Device-Under-Test (DUT) and one for the test runner. Each is attached with +# two networks, one for control packets that aid the test and one for test +# packets which are sent as part of the test and observed for correctness. + +set -euxo pipefail + +function failure() { + local lineno=$1 + local msg=$2 + local filename="$0" + echo "FAIL: $filename:$lineno: $msg" +} +trap 'failure ${LINENO} "$BASH_COMMAND"' ERR + +declare -r LONGOPTS="dut_platform:,init_script:,runtime:" + +# Don't use declare below so that the error from getopt will end the script. +PARSED=$(getopt --options "" --longoptions=$LONGOPTS --name "$0" -- "$@") + +eval set -- "$PARSED" + +while true; do + case "$1" in + --dut_platform) + # Either "linux" or "netstack". + declare -r DUT_PLATFORM="$2" + shift 2 + ;; + --init_script) + declare -r INIT_SCRIPT="$2" + shift 2 + ;; + --runtime) + # Not readonly because there might be multiple --runtime arguments and we + # want to use just the last one. Only used if --dut_platform is + # "netstack". + declare RUNTIME="$2" + shift 2 + ;; + --) + shift + break + ;; + *) + echo "Programming error" + exit 3 + esac +done + +# All the other arguments are scripts. +declare -r scripts="$@" + +# Check that the required flags are defined in a way that is safe for "set -u". +if [[ "${DUT_PLATFORM-}" == "netstack" ]]; then + if [[ -z "${RUNTIME-}" ]]; then + echo "FAIL: Missing --runtime argument: ${RUNTIME-}" + exit 2 + fi + declare -r RUNTIME_ARG="--runtime ${RUNTIME}" +elif [[ "${DUT_PLATFORM-}" == "linux" ]]; then + declare -r RUNTIME_ARG="" +else + echo "FAIL: Bad or missing --dut_platform argument: ${DUT_PLATFORM-}" + exit 2 +fi +if [[ ! -x "${INIT_SCRIPT-}" ]]; then + echo "FAIL: Bad or missing --init_script: ${INIT_SCRIPT-}" + exit 2 +fi + +# Variables specific to the control network and interface start with CTRL_. +# Variables specific to the test network and interface start with TEST_. +# Variables specific to the DUT start with DUT_. +# Variables specific to the test runner start with TEST_RUNNER_. +declare -r PACKETDRILL="/packetdrill/gtests/net/packetdrill/packetdrill" +# Use random numbers so that test networks don't collide. +declare -r CTRL_NET="ctrl_net-${RANDOM}${RANDOM}" +declare -r TEST_NET="test_net-${RANDOM}${RANDOM}" +declare -r tolerance_usecs=100000 +# On both DUT and test runner, testing packets are on the eth2 interface. +declare -r TEST_DEVICE="eth2" +# Number of bits in the *_NET_PREFIX variables. +declare -r NET_MASK="24" +function new_net_prefix() { + # Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. + echo "$(shuf -i 192-223 -n 1).$(shuf -i 0-255 -n 1).$(shuf -i 0-255 -n 1)" +} +# Last bits of the DUT's IP address. +declare -r DUT_NET_SUFFIX=".10" +# Control port. +declare -r CTRL_PORT="40000" +# Last bits of the test runner's IP address. +declare -r TEST_RUNNER_NET_SUFFIX=".20" +declare -r TIMEOUT="60" +declare -r IMAGE_TAG="gcr.io/gvisor-presubmit/packetdrill" + +# Make sure that docker is installed. +docker --version + +function finish { + local cleanup_success=1 + for net in "${CTRL_NET}" "${TEST_NET}"; do + # Kill all processes attached to ${net}. + for docker_command in "kill" "rm"; do + (docker network inspect "${net}" \ + --format '{{range $key, $value := .Containers}}{{$key}} {{end}}' \ + | xargs -r docker "${docker_command}") || \ + cleanup_success=0 + done + # Remove the network. + docker network rm "${net}" || \ + cleanup_success=0 + done + + if ((!$cleanup_success)); then + echo "FAIL: Cleanup command failed" + exit 4 + fi +} +trap finish EXIT + +# Subnet for control packets between test runner and DUT. +declare CTRL_NET_PREFIX=$(new_net_prefix) +while ! docker network create \ + "--subnet=${CTRL_NET_PREFIX}.0/${NET_MASK}" "${CTRL_NET}"; do + sleep 0.1 + declare CTRL_NET_PREFIX=$(new_net_prefix) +done + +# Subnet for the packets that are part of the test. +declare TEST_NET_PREFIX=$(new_net_prefix) +while ! docker network create \ + "--subnet=${TEST_NET_PREFIX}.0/${NET_MASK}" "${TEST_NET}"; do + sleep 0.1 + declare TEST_NET_PREFIX=$(new_net_prefix) +done + +docker pull "${IMAGE_TAG}" + +# Create the DUT container and connect to network. +DUT=$(docker create ${RUNTIME_ARG} --privileged --rm \ + --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) +docker network connect "${CTRL_NET}" \ + --ip "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ + || (docker kill ${DUT}; docker rm ${DUT}; false) +docker network connect "${TEST_NET}" \ + --ip "${TEST_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ + || (docker kill ${DUT}; docker rm ${DUT}; false) +docker start "${DUT}" + +# Create the test runner container and connect to network. +TEST_RUNNER=$(docker create --privileged --rm \ + --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) +docker network connect "${CTRL_NET}" \ + --ip "${CTRL_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" "${TEST_RUNNER}" \ + || (docker kill ${TEST_RUNNER}; docker rm ${REST_RUNNER}; false) +docker network connect "${TEST_NET}" \ + --ip "${TEST_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" "${TEST_RUNNER}" \ + || (docker kill ${TEST_RUNNER}; docker rm ${REST_RUNNER}; false) +docker start "${TEST_RUNNER}" + +# Run tcpdump in the test runner unbuffered, without dns resolution, just on the +# interface with the test packets. +docker exec -t ${TEST_RUNNER} tcpdump -U -n -i "${TEST_DEVICE}" & + +# Start a packetdrill server on the test_runner. The packetdrill server sends +# packets and asserts that they are received. +docker exec -d "${TEST_RUNNER}" \ + ${PACKETDRILL} --wire_server --wire_server_dev="${TEST_DEVICE}" \ + --wire_server_ip="${CTRL_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" \ + --wire_server_port="${CTRL_PORT}" \ + --local_ip="${TEST_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" \ + --remote_ip="${TEST_NET_PREFIX}${DUT_NET_SUFFIX}" + +# Because the Linux kernel receives the SYN-ACK but didn't send the SYN it will +# issue a RST. To prevent this IPtables can be used to filter those out. +docker exec "${TEST_RUNNER}" \ + iptables -A OUTPUT -p tcp --tcp-flags RST RST -j DROP + +# Wait for the packetdrill server on the test runner to come. Attempt to +# connect to it from the DUT every 100 milliseconds until success. +while ! docker exec "${DUT}" \ + nc -zv "${CTRL_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" "${CTRL_PORT}"; do + sleep 0.1 +done + +# Copy the packetdrill setup script to the DUT. +docker cp -L "${INIT_SCRIPT}" "${DUT}:packetdrill_setup.sh" + +# Copy the packetdrill scripts to the DUT. +declare -a dut_scripts +for script in $scripts; do + docker cp -L "${script}" "${DUT}:$(basename ${script})" + dut_scripts+=("/$(basename ${script})") +done + +# Start a packetdrill client on the DUT. The packetdrill client runs POSIX +# socket commands and also sends instructions to the server. +docker exec -t "${DUT}" \ + ${PACKETDRILL} --wire_client --wire_client_dev="${TEST_DEVICE}" \ + --wire_server_ip="${CTRL_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" \ + --wire_server_port="${CTRL_PORT}" \ + --local_ip="${TEST_NET_PREFIX}${DUT_NET_SUFFIX}" \ + --remote_ip="${TEST_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" \ + --init_scripts=/packetdrill_setup.sh \ + --tolerance_usecs="${tolerance_usecs}" "${dut_scripts[@]}" + +echo PASS: No errors. diff --git a/test/runtimes/runner.go b/test/runtimes/runner.go index bec37c69d..ddb890dbc 100644 --- a/test/runtimes/runner.go +++ b/test/runtimes/runner.go @@ -20,7 +20,6 @@ import ( "flag" "fmt" "io" - "log" "os" "sort" "strings" @@ -101,17 +100,15 @@ func getTests(d dockerutil.Docker, blacklist map[string]struct{}) ([]testing.Int // shard. tests := strings.Fields(list) sort.Strings(tests) - begin, end, err := testutil.TestBoundsForShard(len(tests)) + indices, err := testutil.TestIndicesForShard(len(tests)) if err != nil { return nil, fmt.Errorf("TestsForShard() failed: %v", err) } - log.Printf("Got bounds [%d:%d) for shard out of %d total tests", begin, end, len(tests)) - tests = tests[begin:end] var itests []testing.InternalTest - for _, tc := range tests { + for _, tci := range indices { // Capture tc in this scope. - tc := tc + tc := tests[tci] itests = append(itests, testing.InternalTest{ Name: tc, F: func(t *testing.T) { diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 40e974314..31d239c0e 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -46,6 +46,15 @@ syscall_test(test = "//test/syscalls/linux:brk_test") syscall_test(test = "//test/syscalls/linux:socket_test") syscall_test( + size = "large", + shard_count = 50, + # Takes too long for TSAN. Since this is kind of a stress test that doesn't + # involve much concurrency, TSAN's usefulness here is limited anyway. + tags = ["nogotsan"], + test = "//test/syscalls/linux:socket_stress_test", +) + +syscall_test( add_overlay = True, test = "//test/syscalls/linux:chdir_test", ) @@ -225,7 +234,6 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:mknod_test", - use_tmpfs = True, # mknod is not supported over gofer. ) syscall_test( diff --git a/test/syscalls/linux/32bit.cc b/test/syscalls/linux/32bit.cc index 2751fb4e7..9883aef61 100644 --- a/test/syscalls/linux/32bit.cc +++ b/test/syscalls/linux/32bit.cc @@ -102,7 +102,8 @@ TEST(Syscall32Bit, Int80) { } TEST(Syscall32Bit, Sysenter) { - if (PlatformSupport32Bit() == PlatformSupport::Allowed && + if ((PlatformSupport32Bit() == PlatformSupport::Allowed || + PlatformSupport32Bit() == PlatformSupport::Ignored) && GetCPUVendor() == CPUVendor::kAMD) { // SYSENTER is an illegal instruction in compatibility mode on AMD. EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), @@ -133,7 +134,8 @@ TEST(Syscall32Bit, Sysenter) { } TEST(Syscall32Bit, Syscall) { - if (PlatformSupport32Bit() == PlatformSupport::Allowed && + if ((PlatformSupport32Bit() == PlatformSupport::Allowed || + PlatformSupport32Bit() == PlatformSupport::Ignored) && GetCPUVendor() == CPUVendor::kIntel) { // SYSCALL is an illegal instruction in compatibility mode on Intel. EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 6f57c9755..f2e3c7072 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "cc_binary", "cc_library", "default_net_util", "select_system") +load("//tools:defs.bzl", "cc_binary", "cc_library", "default_net_util", "gtest", "select_arch", "select_system") package( default_visibility = ["//:sandbox"], @@ -11,6 +11,7 @@ exports_files( "socket_inet_loopback.cc", "socket_ip_loopback_blocking.cc", "socket_ip_tcp_loopback.cc", + "socket_ip_udp_loopback.cc", "socket_ipv4_udp_unbound_loopback.cc", "tcp_socket.cc", "udp_socket.cc", @@ -82,14 +83,14 @@ cc_library( srcs = ["base_poll_test.cc"], hdrs = ["base_poll_test.h"], deps = [ + "@com_google_absl//absl/memory", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:signal_util", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -99,11 +100,11 @@ cc_library( hdrs = ["file_base.h"], deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -130,11 +131,12 @@ cc_library( hdrs = ["socket_test_util.h"], defines = select_system(), deps = default_net_util() + [ - "@com_google_googletest//:gtest", + gtest, "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/time", + "@com_google_absl//absl/types:optional", "//test/util:file_descriptor", "//test/util:posix_error", "//test/util:temp_path", @@ -155,9 +157,9 @@ cc_library( hdrs = ["unix_domain_socket_test_util.h"], deps = [ ":socket_test_util", - "//test/util:test_util", "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_util", ], ) @@ -179,30 +181,33 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:cleanup", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) cc_binary( name = "32bit_test", testonly = 1, - srcs = ["32bit.cc"], + srcs = select_arch( + amd64 = ["32bit.cc"], + arm64 = [], + ), linkstatic = 1, deps = [ + "@com_google_absl//absl/base:core_headers", + gtest, "//test/util:memory_util", "//test/util:platform_util", "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_googletest//:gtest", ], ) @@ -215,9 +220,9 @@ cc_binary( ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -230,9 +235,9 @@ cc_binary( ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -244,10 +249,10 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:fs_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -259,12 +264,12 @@ cc_binary( deps = [ "//test/util:cleanup", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -277,12 +282,11 @@ cc_binary( ], linkstatic = 1, deps = [ - # The heapchecker doesn't recognize that io_destroy munmaps. - "@com_google_googletest//:gtest", - "@com_google_absl//absl/strings", "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:memory_util", "//test/util:posix_error", "//test/util:proc_util", @@ -299,12 +303,12 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:signal_util", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -317,9 +321,9 @@ cc_binary( "//:sandbox", ], deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -331,9 +335,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -345,9 +349,9 @@ cc_binary( deps = [ ":socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -369,10 +373,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -385,10 +389,10 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -401,14 +405,14 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/synchronization", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/synchronization", - "@com_google_googletest//:gtest", ], ) @@ -421,12 +425,12 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/flags:flag", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_googletest//:gtest", ], ) @@ -440,12 +444,12 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:mount_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -455,9 +459,9 @@ cc_binary( srcs = ["clock_getres.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -467,11 +471,11 @@ cc_binary( srcs = ["clock_gettime.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/time", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -481,13 +485,13 @@ cc_binary( srcs = ["concurrency.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:platform_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -500,9 +504,9 @@ cc_binary( ":socket_test_util", "//test/util:file_descriptor", "//test/util:fs_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -513,10 +517,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:fs_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -527,9 +531,9 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -541,11 +545,11 @@ cc_binary( deps = [ "//test/util:eventfd_util", "//test/util:file_descriptor", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -558,10 +562,10 @@ cc_binary( "//test/util:epoll_util", "//test/util:eventfd_util", "//test/util:file_descriptor", + gtest, "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -573,10 +577,10 @@ cc_binary( deps = [ "//test/util:epoll_util", "//test/util:eventfd_util", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -586,12 +590,12 @@ cc_binary( srcs = ["exceptions.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:logging", "//test/util:platform_util", "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -601,10 +605,10 @@ cc_binary( srcs = ["getcpu.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/time", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -614,10 +618,10 @@ cc_binary( srcs = ["getcpu.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/time", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -627,33 +631,36 @@ cc_binary( srcs = ["getrusage.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:memory_util", "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) cc_binary( name = "exec_binary_test", testonly = 1, - srcs = ["exec_binary.cc"], + srcs = select_arch( + amd64 = ["exec_binary.cc"], + arm64 = [], + ), linkstatic = 1, deps = [ "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:proc_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -676,15 +683,15 @@ cc_binary( deps = [ "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/types:optional", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/types:optional", - "@com_google_googletest//:gtest", ], ) @@ -695,11 +702,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:time_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -712,10 +719,10 @@ cc_binary( ":file_base", "//test/util:cleanup", "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -725,9 +732,9 @@ cc_binary( srcs = ["fault.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -738,10 +745,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -755,18 +762,18 @@ cc_binary( "//test/util:cleanup", "//test/util:eventfd_util", "//test/util:fs_util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:save_util", "//test/util:temp_path", "//test/util:test_util", "//test/util:timer_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -780,15 +787,15 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -799,40 +806,46 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:memory_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) cc_binary( name = "fpsig_fork_test", testonly = 1, - srcs = ["fpsig_fork.cc"], + srcs = select_arch( + amd64 = ["fpsig_fork.cc"], + arm64 = [], + ), linkstatic = 1, deps = [ + gtest, "//test/util:logging", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) cc_binary( name = "fpsig_nested_test", testonly = 1, - srcs = ["fpsig_nested.cc"], + srcs = select_arch( + amd64 = ["fpsig_nested.cc"], + arm64 = [], + ), linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -843,10 +856,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -857,10 +870,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -872,6 +885,9 @@ cc_binary( deps = [ "//test/util:cleanup", "//test/util:file_descriptor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/time", + gtest, "//test/util:memory_util", "//test/util:save_util", "//test/util:temp_path", @@ -880,9 +896,6 @@ cc_binary( "//test/util:thread_util", "//test/util:time_util", "//test/util:timer_util", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -895,12 +908,12 @@ cc_binary( "//test/util:eventfd_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -910,9 +923,9 @@ cc_binary( srcs = ["getrandom.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -945,10 +958,10 @@ cc_binary( ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -972,9 +985,9 @@ cc_binary( ":socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -985,6 +998,9 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:posix_error", @@ -992,9 +1008,6 @@ cc_binary( "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1006,15 +1019,15 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:file_descriptor", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1027,14 +1040,14 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1045,10 +1058,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1059,6 +1072,7 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:logging", "//test/util:memory_util", "//test/util:multiprocess_util", @@ -1066,7 +1080,6 @@ cc_binary( "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1077,12 +1090,12 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:cleanup", + "@com_google_absl//absl/memory", + gtest, "//test/util:memory_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/memory", - "@com_google_googletest//:gtest", ], ) @@ -1092,11 +1105,11 @@ cc_binary( srcs = ["mincore.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:memory_util", "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1109,10 +1122,10 @@ cc_binary( ":temp_umask", "//test/util:capability_util", "//test/util:fs_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1123,11 +1136,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -1139,12 +1152,12 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:cleanup", + gtest, "//test/util:memory_util", "//test/util:multiprocess_util", "//test/util:rlimit_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1157,13 +1170,13 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:memory_util", "//test/util:multiprocess_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1176,6 +1189,9 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:mount_util", "//test/util:multiprocess_util", "//test/util:posix_error", @@ -1183,9 +1199,6 @@ cc_binary( "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1195,10 +1208,9 @@ cc_binary( srcs = ["mremap.cc"], linkstatic = 1, deps = [ - # The heap check fails due to MremapDeathTest - "@com_google_googletest//:gtest", - "@com_google_absl//absl/strings", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:logging", "//test/util:memory_util", "//test/util:multiprocess_util", @@ -1230,9 +1242,9 @@ cc_binary( srcs = ["munmap.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1249,14 +1261,14 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1270,10 +1282,10 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1287,11 +1299,11 @@ cc_binary( ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", - "//test/util:test_main", - "//test/util:test_util", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:endian", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", ], ) @@ -1305,11 +1317,11 @@ cc_binary( ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", - "//test/util:test_main", - "//test/util:test_util", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:endian", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", ], ) @@ -1321,16 +1333,16 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:pty_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1342,12 +1354,12 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + gtest, "//test/util:posix_error", "//test/util:pty_util", "//test/util:test_main", "//test/util:thread_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_googletest//:gtest", ], ) @@ -1360,12 +1372,12 @@ cc_binary( "//test/syscalls/linux:socket_test_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1375,13 +1387,13 @@ cc_binary( srcs = ["pause.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1393,15 +1405,15 @@ cc_binary( deps = [ "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1414,13 +1426,13 @@ cc_binary( ":base_poll_test", "//test/util:eventfd_util", "//test/util:file_descriptor", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1431,11 +1443,11 @@ cc_binary( linkstatic = 1, deps = [ ":base_poll_test", + "@com_google_absl//absl/time", + gtest, "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1446,9 +1458,9 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1460,12 +1472,12 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:cleanup", + "@com_google_absl//absl/flags:flag", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_googletest//:gtest", ], ) @@ -1476,13 +1488,13 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + "@com_google_absl//absl/flags:flag", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_googletest//:gtest", ], ) @@ -1493,10 +1505,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1507,6 +1519,8 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:memory_util", "//test/util:temp_path", @@ -1514,8 +1528,6 @@ cc_binary( "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1529,13 +1541,13 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1547,11 +1559,11 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1565,6 +1577,10 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:memory_util", "//test/util:posix_error", "//test/util:temp_path", @@ -1572,10 +1588,6 @@ cc_binary( "//test/util:thread_util", "//test/util:time_util", "//test/util:timer_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1589,11 +1601,11 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", - "//test/util:test_main", - "//test/util:test_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", ], ) @@ -1605,17 +1617,17 @@ cc_binary( deps = [ "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/types:optional", + gtest, "//test/util:memory_util", "//test/util:posix_error", "//test/util:proc_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - "@com_google_absl//absl/types:optional", - "@com_google_googletest//:gtest", ], ) @@ -1629,6 +1641,8 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:posix_error", @@ -1636,8 +1650,6 @@ cc_binary( "//test/util:test_main", "//test/util:test_util", "//test/util:time_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1648,11 +1660,11 @@ cc_binary( linkstatic = 1, deps = [ ":base_poll_test", + "@com_google_absl//absl/time", + gtest, "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1662,6 +1674,9 @@ cc_binary( srcs = ["ptrace.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:platform_util", @@ -1669,9 +1684,6 @@ cc_binary( "//test/util:test_util", "//test/util:thread_util", "//test/util:time_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1681,10 +1693,10 @@ cc_binary( srcs = ["pwrite64.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1698,12 +1710,12 @@ cc_binary( deps = [ ":file_base", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1717,11 +1729,11 @@ cc_binary( ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", - "//test/util:test_main", - "//test/util:test_util", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:endian", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", ], ) @@ -1735,10 +1747,10 @@ cc_binary( ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_googletest//:gtest", ], ) @@ -1752,10 +1764,10 @@ cc_binary( ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_googletest//:gtest", ], ) @@ -1766,10 +1778,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1780,10 +1792,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1799,13 +1811,13 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:timer_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1820,12 +1832,12 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1839,11 +1851,11 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1867,11 +1879,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/syscalls/linux/rseq:lib", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1882,11 +1894,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:cleanup", + gtest, "//test/util:logging", "//test/util:posix_error", "//test/util:signal_util", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1896,9 +1908,9 @@ cc_binary( srcs = ["sched.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1908,9 +1920,9 @@ cc_binary( srcs = ["sched_yield.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1920,6 +1932,8 @@ cc_binary( srcs = ["seccomp.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/base:core_headers", + gtest, "//test/util:logging", "//test/util:memory_util", "//test/util:multiprocess_util", @@ -1927,8 +1941,6 @@ cc_binary( "//test/util:proc_util", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_googletest//:gtest", ], ) @@ -1940,14 +1952,14 @@ cc_binary( deps = [ ":base_poll_test", "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:rlimit_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1959,13 +1971,13 @@ cc_binary( deps = [ "//test/util:eventfd_util", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1977,12 +1989,12 @@ cc_binary( deps = [ ":socket_test_util", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1993,13 +2005,13 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -2009,9 +2021,9 @@ cc_binary( srcs = ["sigaction.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2026,28 +2038,34 @@ cc_binary( deps = [ "//test/util:cleanup", "//test/util:fs_util", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) cc_binary( name = "sigiret_test", testonly = 1, - srcs = ["sigiret.cc"], + srcs = select_arch( + amd64 = ["sigiret.cc"], + arm64 = [], + ), linkstatic = 1, deps = [ + gtest, "//test/util:logging", "//test/util:signal_util", "//test/util:test_util", "//test/util:timer_util", - "@com_google_googletest//:gtest", - ], + ] + select_arch( + amd64 = [], + arm64 = ["//test/util:test_main"], + ), ) cc_binary( @@ -2057,14 +2075,14 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/synchronization", + gtest, "//test/util:logging", "//test/util:posix_error", "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/synchronization", - "@com_google_googletest//:gtest", ], ) @@ -2074,10 +2092,10 @@ cc_binary( srcs = ["sigprocmask.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2087,13 +2105,13 @@ cc_binary( srcs = ["sigstop.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/time", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -2104,13 +2122,13 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:signal_util", "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -2126,14 +2144,30 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", - "//test/util:test_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_util", ], alwayslink = 1, ) +cc_binary( + name = "socket_stress_test", + testonly = 1, + srcs = [ + "socket_generic_stress.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + cc_library( name = "socket_unix_dgram_test_cases", testonly = 1, @@ -2142,8 +2176,8 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2156,8 +2190,8 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2173,11 +2207,11 @@ cc_library( ], deps = [ ":socket_test_util", - "//test/util:test_util", - "//test/util:thread_util", "@com_google_absl//absl/memory", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_util", + "//test/util:thread_util", ], alwayslink = 1, ) @@ -2194,8 +2228,8 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2212,9 +2246,9 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:memory_util", "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2232,8 +2266,8 @@ cc_library( ":ip_socket_test_util", ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2250,8 +2284,8 @@ cc_library( deps = [ ":ip_socket_test_util", ":socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2268,9 +2302,9 @@ cc_library( deps = [ ":ip_socket_test_util", ":socket_test_util", - "//test/util:test_util", "@com_google_absl//absl/memory", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_util", ], alwayslink = 1, ) @@ -2287,8 +2321,8 @@ cc_library( deps = [ ":ip_socket_test_util", ":socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2305,8 +2339,8 @@ cc_library( deps = [ ":ip_socket_test_util", ":socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2369,9 +2403,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2401,9 +2435,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2433,9 +2467,9 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2533,10 +2567,10 @@ cc_binary( ":socket_bind_to_device_util", ":socket_test_util", "//test/util:capability_util", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -2552,10 +2586,10 @@ cc_binary( ":socket_bind_to_device_util", ":socket_test_util", "//test/util:capability_util", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -2571,10 +2605,10 @@ cc_binary( ":socket_bind_to_device_util", ":socket_test_util", "//test/util:capability_util", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -2620,9 +2654,9 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2701,15 +2735,15 @@ cc_binary( ":ip_socket_test_util", ":socket_test_util", "//test/util:file_descriptor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:save_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -2721,9 +2755,9 @@ cc_binary( deps = [ ":socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2735,12 +2769,14 @@ cc_binary( deps = [ ":socket_netlink_util", ":socket_test_util", + "//test/util:capability_util", "//test/util:cleanup", "//test/util:file_descriptor", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/types:optional", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest", ], ) @@ -2753,9 +2789,9 @@ cc_binary( ":socket_netlink_util", ":socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2773,9 +2809,9 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", - "//test/util:test_util", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_util", ], alwayslink = 1, ) @@ -2792,11 +2828,11 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + "@com_google_absl//absl/time", + gtest, "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2813,10 +2849,10 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2833,10 +2869,10 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2853,11 +2889,11 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + "@com_google_absl//absl/time", + gtest, "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2874,8 +2910,8 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2892,10 +2928,10 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + "@com_google_absl//absl/time", + gtest, "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2989,9 +3025,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3003,9 +3039,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3017,9 +3053,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3034,9 +3070,9 @@ cc_binary( ":socket_blocking_test_cases", ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3051,9 +3087,9 @@ cc_binary( ":ip_socket_test_util", ":socket_blocking_test_cases", ":socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3068,9 +3104,9 @@ cc_binary( ":socket_non_stream_blocking_test_cases", ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3085,9 +3121,9 @@ cc_binary( ":ip_socket_test_util", ":socket_non_stream_blocking_test_cases", ":socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3103,9 +3139,9 @@ cc_binary( ":socket_unix_cmsg_test_cases", ":socket_unix_test_cases", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3117,9 +3153,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3131,9 +3167,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3146,10 +3182,10 @@ cc_binary( ":socket_netlink_util", ":socket_test_util", "//test/util:file_descriptor", + "@com_google_absl//absl/base:endian", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/base:endian", - "@com_google_googletest//:gtest", ], ) @@ -3165,12 +3201,12 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3181,11 +3217,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -3199,12 +3235,12 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3217,10 +3253,10 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3230,10 +3266,10 @@ cc_binary( srcs = ["sync.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3243,10 +3279,10 @@ cc_binary( srcs = ["sysinfo.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/time", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -3256,9 +3292,9 @@ cc_binary( srcs = ["syslog.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3268,10 +3304,10 @@ cc_binary( srcs = ["sysret.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:logging", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3283,12 +3319,12 @@ cc_binary( deps = [ ":socket_test_util", "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -3298,11 +3334,11 @@ cc_binary( srcs = ["tgkill.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -3312,10 +3348,10 @@ cc_binary( srcs = ["time.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:proc_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3340,15 +3376,15 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:cleanup", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:signal_util", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -3358,11 +3394,11 @@ cc_binary( srcs = ["tkill.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:logging", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -3376,11 +3412,11 @@ cc_binary( "//test/util:capability_util", "//test/util:cleanup", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3396,12 +3432,12 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/time", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -3424,9 +3460,9 @@ cc_binary( deps = [ ":socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3437,14 +3473,14 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", "//test/util:uid_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3455,11 +3491,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3472,11 +3508,11 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3486,11 +3522,11 @@ cc_binary( srcs = ["unshare.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/synchronization", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/synchronization", - "@com_google_googletest//:gtest", ], ) @@ -3516,11 +3552,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:fs_util", + gtest, "//test/util:posix_error", "//test/util:proc_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3530,13 +3566,13 @@ cc_binary( srcs = ["vfork.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:test_util", "//test/util:time_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -3548,6 +3584,10 @@ cc_binary( deps = [ "//test/util:cleanup", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:posix_error", @@ -3556,10 +3596,6 @@ cc_binary( "//test/util:test_util", "//test/util:thread_util", "//test/util:time_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -3570,10 +3606,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:cleanup", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3584,12 +3620,12 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + gtest, "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest", ], ) @@ -3600,14 +3636,14 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", - "//test/util:test_main", - "//test/util:test_util", - "//test/util:thread_util", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/memory", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", ], ) @@ -3633,10 +3669,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3646,11 +3682,11 @@ cc_binary( srcs = ["vdso_clock_gettime.cc"], linkstatic = 1, deps = [ - "//test/util:test_main", - "//test/util:test_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", ], ) @@ -3660,10 +3696,10 @@ cc_binary( srcs = ["vsyscall.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:proc_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3676,11 +3712,11 @@ cc_binary( ":unix_domain_socket_test_util", "//test/util:file_descriptor", "//test/util:fs_util", - "//test/util:test_main", - "//test/util:test_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", ], ) @@ -3692,12 +3728,12 @@ cc_binary( deps = [ "//test/util:file_descriptor", "//test/util:fs_util", + gtest, "//test/util:memory_util", "//test/util:multiprocess_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3709,10 +3745,10 @@ cc_binary( deps = [ ":ip_socket_test_util", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3724,10 +3760,10 @@ cc_binary( deps = [ ":ip_socket_test_util", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3743,11 +3779,11 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) diff --git a/test/syscalls/linux/bad.cc b/test/syscalls/linux/bad.cc index f246a799e..adfb149df 100644 --- a/test/syscalls/linux/bad.cc +++ b/test/syscalls/linux/bad.cc @@ -22,11 +22,17 @@ namespace gvisor { namespace testing { namespace { +#ifdef __x86_64__ +// get_kernel_syms is not supported in Linux > 2.6, and not implemented in +// gVisor. +constexpr uint32_t kNotImplementedSyscall = SYS_get_kernel_syms; +#elif __aarch64__ +// Use the last of arch_specific_syscalls which are not implemented on arm64. +constexpr uint32_t kNotImplementedSyscall = SYS_arch_specific_syscall + 15; +#endif TEST(BadSyscallTest, NotImplemented) { - // get_kernel_syms is not supported in Linux > 2.6, and not implemented in - // gVisor. - EXPECT_THAT(syscall(SYS_get_kernel_syms), SyscallFailsWithErrno(ENOSYS)); + EXPECT_THAT(syscall(kNotImplementedSyscall), SyscallFailsWithErrno(ENOSYS)); } TEST(BadSyscallTest, NegativeOne) { diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc index 04bc2d7b9..0a2d44a2c 100644 --- a/test/syscalls/linux/chroot.cc +++ b/test/syscalls/linux/chroot.cc @@ -162,7 +162,7 @@ TEST(ChrootTest, DotDotFromOpenFD) { // getdents on fd should not error. char buf[1024]; - ASSERT_THAT(syscall(SYS_getdents, fd.get(), buf, sizeof(buf)), + ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)), SyscallSucceeds()); } diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc index 371890110..906f3358d 100644 --- a/test/syscalls/linux/fork.cc +++ b/test/syscalls/linux/fork.cc @@ -215,6 +215,8 @@ TEST_F(ForkTest, PrivateMapping) { EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); } +// CPUID is x86 specific. +#ifdef __x86_64__ // Test that cpuid works after a fork. TEST_F(ForkTest, Cpuid) { pid_t child = Fork(); @@ -227,6 +229,7 @@ TEST_F(ForkTest, Cpuid) { } EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); } +#endif TEST_F(ForkTest, Mmap) { pid_t child = Fork(); diff --git a/test/syscalls/linux/getdents.cc b/test/syscalls/linux/getdents.cc index ad2dbacb8..b147d6181 100644 --- a/test/syscalls/linux/getdents.cc +++ b/test/syscalls/linux/getdents.cc @@ -228,19 +228,28 @@ class GetdentsTest : public ::testing::Test { // Multiple template parameters are not allowed, so we must use explicit // template specialization to set the syscall number. + +// SYS_getdents isn't defined on arm64. +#ifdef __x86_64__ template <> int GetdentsTest<struct linux_dirent>::SyscallNum() { return SYS_getdents; } +#endif template <> int GetdentsTest<struct linux_dirent64>::SyscallNum() { return SYS_getdents64; } -// Test both legacy getdents and getdents64. +#ifdef __x86_64__ +// Test both legacy getdents and getdents64 on x86_64. typedef ::testing::Types<struct linux_dirent, struct linux_dirent64> GetdentsTypes; +#elif __aarch64__ +// Test only getdents64 on arm64. +typedef ::testing::Types<struct linux_dirent64> GetdentsTypes; +#endif TYPED_TEST_SUITE(GetdentsTest, GetdentsTypes); // N.B. TYPED_TESTs require explicitly using this-> to access members of diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc index 6b472eb2f..bba022a41 100644 --- a/test/syscalls/linux/ip_socket_test_util.cc +++ b/test/syscalls/linux/ip_socket_test_util.cc @@ -79,6 +79,33 @@ SocketPairKind DualStackTCPAcceptBindSocketPair(int type) { /* dual_stack = */ true)}; } +SocketPairKind IPv6TCPAcceptBindPersistentListenerSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected IPv6 TCP socket"); + return SocketPairKind{description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindPersistentListenerSocketPairCreator( + AF_INET6, type | SOCK_STREAM, 0, + /* dual_stack = */ false)}; +} + +SocketPairKind IPv4TCPAcceptBindPersistentListenerSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected IPv4 TCP socket"); + return SocketPairKind{description, AF_INET, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindPersistentListenerSocketPairCreator( + AF_INET, type | SOCK_STREAM, 0, + /* dual_stack = */ false)}; +} + +SocketPairKind DualStackTCPAcceptBindPersistentListenerSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected dual stack TCP socket"); + return SocketPairKind{description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindPersistentListenerSocketPairCreator( + AF_INET6, type | SOCK_STREAM, 0, + /* dual_stack = */ true)}; +} + SocketPairKind IPv6UDPBidirectionalBindSocketPair(int type) { std::string description = absl::StrCat(DescribeSocketType(type), "connected IPv6 UDP socket"); diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h index 0f58e0f77..083ebbcf0 100644 --- a/test/syscalls/linux/ip_socket_test_util.h +++ b/test/syscalls/linux/ip_socket_test_util.h @@ -50,6 +50,21 @@ SocketPairKind IPv4TCPAcceptBindSocketPair(int type); // given type bound to the IPv4 loopback. SocketPairKind DualStackTCPAcceptBindSocketPair(int type); +// IPv6TCPAcceptBindPersistentListenerSocketPair is like +// IPv6TCPAcceptBindSocketPair except it uses a persistent listening socket to +// create all socket pairs. +SocketPairKind IPv6TCPAcceptBindPersistentListenerSocketPair(int type); + +// IPv4TCPAcceptBindPersistentListenerSocketPair is like +// IPv4TCPAcceptBindSocketPair except it uses a persistent listening socket to +// create all socket pairs. +SocketPairKind IPv4TCPAcceptBindPersistentListenerSocketPair(int type); + +// DualStackTCPAcceptBindPersistentListenerSocketPair is like +// DualStackTCPAcceptBindSocketPair except it uses a persistent listening socket +// to create all socket pairs. +SocketPairKind DualStackTCPAcceptBindPersistentListenerSocketPair(int type); + // IPv6UDPBidirectionalBindSocketPair returns a SocketPairKind that represents // SocketPairs created with bind() and connect() syscalls with AF_INET6 and the // given type bound to the IPv6 loopback. diff --git a/test/syscalls/linux/preadv2.cc b/test/syscalls/linux/preadv2.cc index cd936ea90..4a9acd7ae 100644 --- a/test/syscalls/linux/preadv2.cc +++ b/test/syscalls/linux/preadv2.cc @@ -35,6 +35,8 @@ namespace { #ifndef SYS_preadv2 #if defined(__x86_64__) #define SYS_preadv2 327 +#elif defined(__aarch64__) +#define SYS_preadv2 286 #else #error "Unknown architecture" #endif diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index a03c1e43d..169b723eb 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -1994,7 +1994,7 @@ TEST(Proc, GetdentsEnoent) { }, nullptr, nullptr)); char buf[1024]; - ASSERT_THAT(syscall(SYS_getdents, fd.get(), buf, sizeof(buf)), + ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(ENOENT)); } diff --git a/test/syscalls/linux/pwritev2.cc b/test/syscalls/linux/pwritev2.cc index 1dbc0d6df..3fe5a600f 100644 --- a/test/syscalls/linux/pwritev2.cc +++ b/test/syscalls/linux/pwritev2.cc @@ -34,6 +34,8 @@ namespace { #ifndef SYS_pwritev2 #if defined(__x86_64__) #define SYS_pwritev2 328 +#elif defined(__aarch64__) +#define SYS_pwritev2 287 #else #error "Unknown architecture" #endif diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc index 294ee6808..2c947feb7 100644 --- a/test/syscalls/linux/seccomp.cc +++ b/test/syscalls/linux/seccomp.cc @@ -49,7 +49,12 @@ namespace testing { namespace { // A syscall not implemented by Linux that we don't expect to be called. +#ifdef __x86_64__ constexpr uint32_t kFilteredSyscall = SYS_vserver; +#elif __aarch64__ +// Use the last of arch_specific_syscalls which are not implemented on arm64. +constexpr uint32_t kFilteredSyscall = SYS_arch_specific_syscall + 15; +#endif // Applies a seccomp-bpf filter that returns `filtered_result` for // `sysno` and allows all other syscalls. Async-signal-safe. diff --git a/test/syscalls/linux/socket_generic_stress.cc b/test/syscalls/linux/socket_generic_stress.cc new file mode 100644 index 000000000..6a232238d --- /dev/null +++ b/test/syscalls/linux/socket_generic_stress.cc @@ -0,0 +1,83 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected sockets. +using ConnectStressTest = SocketPairTest; + +TEST_P(ConnectStressTest, Reset65kTimes) { + for (int i = 0; i < 1 << 16; ++i) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Send some data to ensure that the connection gets reset and the port gets + // released immediately. This avoids either end entering TIME-WAIT. + char sent_data[100] = {}; + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + } +} + +INSTANTIATE_TEST_SUITE_P( + AllConnectedSockets, ConnectStressTest, + ::testing::Values(IPv6UDPBidirectionalBindSocketPair(0), + IPv4UDPBidirectionalBindSocketPair(0), + DualStackUDPBidirectionalBindSocketPair(0), + + // Without REUSEADDR, we get port exhaustion on Linux. + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn)(IPv6TCPAcceptBindSocketPair(0)), + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn)(IPv4TCPAcceptBindSocketPair(0)), + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)( + DualStackTCPAcceptBindSocketPair(0)))); + +// Test fixture for tests that apply to pairs of connected sockets created with +// a persistent listener (if applicable). +using PersistentListenerConnectStressTest = SocketPairTest; + +TEST_P(PersistentListenerConnectStressTest, 65kTimes) { + for (int i = 0; i < 1 << 16; ++i) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + } +} + +INSTANTIATE_TEST_SUITE_P( + AllConnectedSockets, PersistentListenerConnectStressTest, + ::testing::Values( + IPv6UDPBidirectionalBindSocketPair(0), + IPv4UDPBidirectionalBindSocketPair(0), + DualStackUDPBidirectionalBindSocketPair(0), + + // Without REUSEADDR, we get port exhaustion on Linux. + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)( + IPv6TCPAcceptBindPersistentListenerSocketPair(0)), + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)( + IPv4TCPAcceptBindPersistentListenerSocketPair(0)), + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)( + DualStackTCPAcceptBindPersistentListenerSocketPair(0)))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 3bf7081b9..b24618a88 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -325,6 +325,12 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { TestAddress const& listener = param.listener; TestAddress const& connector = param.connector; + constexpr int kAcceptCount = 32; + constexpr int kBacklog = kAcceptCount * 2; + constexpr int kFDs = 128; + constexpr int kThreadCount = 4; + constexpr int kFDsPerThread = kFDs / kThreadCount; + // Create the listening socket. FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); @@ -332,7 +338,7 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), listener.addr_len), SyscallSucceeds()); - ASSERT_THAT(listen(listen_fd.get(), 1001), SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); // Get the port bound by the listening socket. socklen_t addrlen = listener.addr_len; @@ -345,9 +351,6 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { DisableSave ds; // Too many system calls. sockaddr_storage conn_addr = connector.addr; ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); - constexpr int kFDs = 2048; - constexpr int kThreadCount = 4; - constexpr int kFDsPerThread = kFDs / kThreadCount; FileDescriptor clients[kFDs]; std::unique_ptr<ScopedThread> threads[kThreadCount]; for (int i = 0; i < kFDs; i++) { @@ -371,7 +374,7 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { for (int i = 0; i < kThreadCount; i++) { threads[i]->Join(); } - for (int i = 0; i < 32; i++) { + for (int i = 0; i < kAcceptCount; i++) { auto accepted = ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); } diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index 1e28e658d..e5aed1eec 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -14,6 +14,7 @@ #include <arpa/inet.h> #include <ifaddrs.h> +#include <linux/if.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> #include <sys/socket.h> @@ -25,8 +26,10 @@ #include "gtest/gtest.h" #include "absl/strings/str_format.h" +#include "absl/types/optional.h" #include "test/syscalls/linux/socket_netlink_util.h" #include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" #include "test/util/cleanup.h" #include "test/util/file_descriptor.h" #include "test/util/test_util.h" @@ -38,6 +41,8 @@ namespace testing { namespace { +constexpr uint32_t kSeq = 12345; + using ::testing::AnyOf; using ::testing::Eq; @@ -113,58 +118,224 @@ void CheckGetLinkResponse(const struct nlmsghdr* hdr, int seq, int port) { // TODO(mpratt): Check ifinfomsg contents and following attrs. } +PosixError DumpLinks( + const FileDescriptor& fd, uint32_t seq, + const std::function<void(const struct nlmsghdr* hdr)>& fn) { + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = seq; + req.ifm.ifi_family = AF_UNSPEC; + + return NetlinkRequestResponse(fd, &req, sizeof(req), fn, false); +} + TEST(NetlinkRouteTest, GetLinkDump) { FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + // Loopback is common among all tests, check that it's found. + bool loopbackFound = false; + ASSERT_NO_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) { + CheckGetLinkResponse(hdr, kSeq, port); + if (hdr->nlmsg_type != RTM_NEWLINK) { + return; + } + ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg))); + const struct ifinfomsg* msg = + reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr)); + std::cout << "Found interface idx=" << msg->ifi_index + << ", type=" << std::hex << msg->ifi_type; + if (msg->ifi_type == ARPHRD_LOOPBACK) { + loopbackFound = true; + EXPECT_NE(msg->ifi_flags & IFF_LOOPBACK, 0); + } + })); + EXPECT_TRUE(loopbackFound); +} + +struct Link { + int index; + std::string name; +}; + +PosixErrorOr<absl::optional<Link>> FindLoopbackLink() { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + absl::optional<Link> link; + RETURN_IF_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) { + if (hdr->nlmsg_type != RTM_NEWLINK || + hdr->nlmsg_len < NLMSG_SPACE(sizeof(struct ifinfomsg))) { + return; + } + const struct ifinfomsg* msg = + reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr)); + if (msg->ifi_type == ARPHRD_LOOPBACK) { + const auto* rta = FindRtAttr(hdr, msg, IFLA_IFNAME); + if (rta == nullptr) { + // Ignore links that do not have a name. + return; + } + + link = Link(); + link->index = msg->ifi_index; + link->name = std::string(reinterpret_cast<const char*>(RTA_DATA(rta))); + } + })); + return link; +} + +// CheckLinkMsg checks a netlink message against an expected link. +void CheckLinkMsg(const struct nlmsghdr* hdr, const Link& link) { + ASSERT_THAT(hdr->nlmsg_type, Eq(RTM_NEWLINK)); + ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg))); + const struct ifinfomsg* msg = + reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr)); + EXPECT_EQ(msg->ifi_index, link.index); + + const struct rtattr* rta = FindRtAttr(hdr, msg, IFLA_IFNAME); + EXPECT_NE(nullptr, rta) << "IFLA_IFNAME not found in message."; + if (rta != nullptr) { + std::string name(reinterpret_cast<const char*>(RTA_DATA(rta))); + EXPECT_EQ(name, link.name); + } +} + +TEST(NetlinkRouteTest, GetLinkByIndex) { + absl::optional<Link> loopback_link = + ASSERT_NO_ERRNO_AND_VALUE(FindLoopbackLink()); + ASSERT_TRUE(loopback_link.has_value()); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + struct request { struct nlmsghdr hdr; struct ifinfomsg ifm; }; - constexpr uint32_t kSeq = 12345; - struct request req = {}; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETLINK; - req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_flags = NLM_F_REQUEST; req.hdr.nlmsg_seq = kSeq; req.ifm.ifi_family = AF_UNSPEC; + req.ifm.ifi_index = loopback_link->index; - // Loopback is common among all tests, check that it's found. - bool loopbackFound = false; + bool found = false; ASSERT_NO_ERRNO(NetlinkRequestResponse( fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) { - CheckGetLinkResponse(hdr, kSeq, port); - if (hdr->nlmsg_type != RTM_NEWLINK) { - return; - } - ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg))); - const struct ifinfomsg* msg = - reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr)); - std::cout << "Found interface idx=" << msg->ifi_index - << ", type=" << std::hex << msg->ifi_type; - if (msg->ifi_type == ARPHRD_LOOPBACK) { - loopbackFound = true; - EXPECT_NE(msg->ifi_flags & IFF_LOOPBACK, 0); - } + CheckLinkMsg(hdr, *loopback_link); + found = true; }, false)); - EXPECT_TRUE(loopbackFound); + EXPECT_TRUE(found) << "Netlink response does not contain any links."; } -TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) { +TEST(NetlinkRouteTest, GetLinkByName) { + absl::optional<Link> loopback_link = + ASSERT_NO_ERRNO_AND_VALUE(FindLoopbackLink()); + ASSERT_TRUE(loopback_link.has_value()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); struct request { struct nlmsghdr hdr; struct ifinfomsg ifm; + struct rtattr rtattr; + char ifname[IFNAMSIZ]; + char pad[NLMSG_ALIGNTO + RTA_ALIGNTO]; }; - constexpr uint32_t kSeq = 12345; + struct request req = {}; + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.rtattr.rta_type = IFLA_IFNAME; + req.rtattr.rta_len = RTA_LENGTH(loopback_link->name.size() + 1); + strncpy(req.ifname, loopback_link->name.c_str(), sizeof(req.ifname)); + req.hdr.nlmsg_len = + NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len); + + bool found = false; + ASSERT_NO_ERRNO(NetlinkRequestResponse( + fd, &req, sizeof(req), + [&](const struct nlmsghdr* hdr) { + CheckLinkMsg(hdr, *loopback_link); + found = true; + }, + false)); + EXPECT_TRUE(found) << "Netlink response does not contain any links."; +} + +TEST(NetlinkRouteTest, GetLinkByIndexNotFound) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.ifm.ifi_index = 1234590; + + EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), + PosixErrorIs(ENODEV, ::testing::_)); +} + +TEST(NetlinkRouteTest, GetLinkByNameNotFound) { + const std::string name = "nodevice?!"; + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + struct rtattr rtattr; + char ifname[IFNAMSIZ]; + char pad[NLMSG_ALIGNTO + RTA_ALIGNTO]; + }; + + struct request req = {}; + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.rtattr.rta_type = IFLA_IFNAME; + req.rtattr.rta_len = RTA_LENGTH(name.size() + 1); + strncpy(req.ifname, name.c_str(), sizeof(req.ifname)); + req.hdr.nlmsg_len = + NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len); + + EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), + PosixErrorIs(ENODEV, ::testing::_)); +} + +TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; struct request req = {}; req.hdr.nlmsg_len = sizeof(req); @@ -175,18 +346,8 @@ TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) { req.hdr.nlmsg_seq = kSeq; req.ifm.ifi_family = AF_UNSPEC; - ASSERT_NO_ERRNO(NetlinkRequestResponse( - fd, &req, sizeof(req), - [&](const struct nlmsghdr* hdr) { - EXPECT_THAT(hdr->nlmsg_type, Eq(NLMSG_ERROR)); - EXPECT_EQ(hdr->nlmsg_seq, kSeq); - EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct nlmsgerr)); - - const struct nlmsgerr* msg = - reinterpret_cast<const struct nlmsgerr*>(NLMSG_DATA(hdr)); - EXPECT_EQ(msg->error, -EOPNOTSUPP); - }, - true)); + EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), + PosixErrorIs(EOPNOTSUPP, ::testing::_)); } TEST(NetlinkRouteTest, MsgHdrMsgTrunc) { @@ -198,8 +359,6 @@ TEST(NetlinkRouteTest, MsgHdrMsgTrunc) { struct ifinfomsg ifm; }; - constexpr uint32_t kSeq = 12345; - struct request req = {}; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETLINK; @@ -238,8 +397,6 @@ TEST(NetlinkRouteTest, MsgTruncMsgHdrMsgTrunc) { struct ifinfomsg ifm; }; - constexpr uint32_t kSeq = 12345; - struct request req = {}; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETLINK; @@ -282,8 +439,6 @@ TEST(NetlinkRouteTest, ControlMessageIgnored) { struct ifinfomsg ifm; }; - constexpr uint32_t kSeq = 12345; - struct request req = {}; // This control message is ignored. We still receive a response for the @@ -317,8 +472,6 @@ TEST(NetlinkRouteTest, GetAddrDump) { struct rtgenmsg rgm; }; - constexpr uint32_t kSeq = 12345; - struct request req; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETADDR; @@ -367,6 +520,57 @@ TEST(NetlinkRouteTest, LookupAll) { ASSERT_GT(count, 0); } +TEST(NetlinkRouteTest, AddAddr) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + absl::optional<Link> loopback_link = + ASSERT_NO_ERRNO_AND_VALUE(FindLoopbackLink()); + ASSERT_TRUE(loopback_link.has_value()); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifaddrmsg ifa; + struct rtattr rtattr; + struct in_addr addr; + char pad[NLMSG_ALIGNTO + RTA_ALIGNTO]; + }; + + struct request req = {}; + req.hdr.nlmsg_type = RTM_NEWADDR; + req.hdr.nlmsg_seq = kSeq; + req.ifa.ifa_family = AF_INET; + req.ifa.ifa_prefixlen = 24; + req.ifa.ifa_flags = 0; + req.ifa.ifa_scope = 0; + req.ifa.ifa_index = loopback_link->index; + req.rtattr.rta_type = IFA_LOCAL; + req.rtattr.rta_len = RTA_LENGTH(sizeof(req.addr)); + inet_pton(AF_INET, "10.0.0.1", &req.addr); + req.hdr.nlmsg_len = + NLMSG_LENGTH(sizeof(req.ifa)) + NLMSG_ALIGN(req.rtattr.rta_len); + + // Create should succeed, as no such address in kernel. + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK; + EXPECT_NO_ERRNO( + NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len)); + + // Replace an existing address should succeed. + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE | NLM_F_ACK; + req.hdr.nlmsg_seq++; + EXPECT_NO_ERRNO( + NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len)); + + // Create exclusive should fail, as we created the address above. + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK; + req.hdr.nlmsg_seq++; + EXPECT_THAT( + NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len), + PosixErrorIs(EEXIST, ::testing::_)); +} + // GetRouteDump tests a RTM_GETROUTE + NLM_F_DUMP request. TEST(NetlinkRouteTest, GetRouteDump) { FileDescriptor fd = @@ -378,8 +582,6 @@ TEST(NetlinkRouteTest, GetRouteDump) { struct rtmsg rtm; }; - constexpr uint32_t kSeq = 12345; - struct request req = {}; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETROUTE; @@ -538,8 +740,6 @@ TEST(NetlinkRouteTest, RecvmsgTrunc) { struct rtgenmsg rgm; }; - constexpr uint32_t kSeq = 12345; - struct request req; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETADDR; @@ -615,8 +815,6 @@ TEST(NetlinkRouteTest, RecvmsgTruncPeek) { struct rtgenmsg rgm; }; - constexpr uint32_t kSeq = 12345; - struct request req; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETADDR; @@ -695,8 +893,6 @@ TEST(NetlinkRouteTest, NoPasscredNoCreds) { struct rtgenmsg rgm; }; - constexpr uint32_t kSeq = 12345; - struct request req; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETADDR; @@ -743,8 +939,6 @@ TEST(NetlinkRouteTest, PasscredCreds) { struct rtgenmsg rgm; }; - constexpr uint32_t kSeq = 12345; - struct request req; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETADDR; diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc index cd2212a1a..952eecfe8 100644 --- a/test/syscalls/linux/socket_netlink_util.cc +++ b/test/syscalls/linux/socket_netlink_util.cc @@ -16,6 +16,7 @@ #include <linux/if_arp.h> #include <linux/netlink.h> +#include <linux/rtnetlink.h> #include <sys/socket.h> #include <vector> @@ -71,9 +72,10 @@ PosixError NetlinkRequestResponse( iov.iov_base = buf.data(); iov.iov_len = buf.size(); - // Response is a series of NLM_F_MULTI messages, ending with a NLMSG_DONE - // message. + // If NLM_F_MULTI is set, response is a series of messages that ends with a + // NLMSG_DONE message. int type = -1; + int flags = 0; do { int len; RETURN_ERROR_IF_SYSCALL_FAIL(len = RetryEINTR(recvmsg)(fd.get(), &msg, 0)); @@ -89,6 +91,7 @@ PosixError NetlinkRequestResponse( for (struct nlmsghdr* hdr = reinterpret_cast<struct nlmsghdr*>(buf.data()); NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) { fn(hdr); + flags = hdr->nlmsg_flags; type = hdr->nlmsg_type; // Done should include an integer payload for dump_done_errno. // See net/netlink/af_netlink.c:netlink_dump @@ -98,11 +101,11 @@ PosixError NetlinkRequestResponse( EXPECT_GE(hdr->nlmsg_len, NLMSG_LENGTH(sizeof(int))); } } - } while (type != NLMSG_DONE && type != NLMSG_ERROR); + } while ((flags & NLM_F_MULTI) && type != NLMSG_DONE && type != NLMSG_ERROR); if (expect_nlmsgerr) { EXPECT_EQ(type, NLMSG_ERROR); - } else { + } else if (flags & NLM_F_MULTI) { EXPECT_EQ(type, NLMSG_DONE); } return NoError(); @@ -146,5 +149,39 @@ PosixError NetlinkRequestResponseSingle( return NoError(); } +PosixError NetlinkRequestAckOrError(const FileDescriptor& fd, uint32_t seq, + void* request, size_t len) { + // Dummy negative number for no error message received. + // We won't get a negative error number so there will be no confusion. + int err = -42; + RETURN_IF_ERRNO(NetlinkRequestResponse( + fd, request, len, + [&](const struct nlmsghdr* hdr) { + EXPECT_EQ(NLMSG_ERROR, hdr->nlmsg_type); + EXPECT_EQ(hdr->nlmsg_seq, seq); + EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct nlmsgerr)); + + const struct nlmsgerr* msg = + reinterpret_cast<const struct nlmsgerr*>(NLMSG_DATA(hdr)); + err = -msg->error; + }, + true)); + return PosixError(err); +} + +const struct rtattr* FindRtAttr(const struct nlmsghdr* hdr, + const struct ifinfomsg* msg, int16_t attr) { + const int ifi_space = NLMSG_SPACE(sizeof(*msg)); + int attrlen = hdr->nlmsg_len - ifi_space; + const struct rtattr* rta = reinterpret_cast<const struct rtattr*>( + reinterpret_cast<const uint8_t*>(hdr) + NLMSG_ALIGN(ifi_space)); + for (; RTA_OK(rta, attrlen); rta = RTA_NEXT(rta, attrlen)) { + if (rta->rta_type == attr) { + return rta; + } + } + return nullptr; +} + } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_util.h b/test/syscalls/linux/socket_netlink_util.h index 3678c0599..e13ead406 100644 --- a/test/syscalls/linux/socket_netlink_util.h +++ b/test/syscalls/linux/socket_netlink_util.h @@ -19,6 +19,7 @@ // socket.h has to be included before if_arp.h. #include <linux/if_arp.h> #include <linux/netlink.h> +#include <linux/rtnetlink.h> #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" @@ -47,6 +48,14 @@ PosixError NetlinkRequestResponseSingle( const FileDescriptor& fd, void* request, size_t len, const std::function<void(const struct nlmsghdr* hdr)>& fn); +// Send the passed request then expect and return an ack or error. +PosixError NetlinkRequestAckOrError(const FileDescriptor& fd, uint32_t seq, + void* request, size_t len); + +// Find rtnetlink attribute in message. +const struct rtattr* FindRtAttr(const struct nlmsghdr* hdr, + const struct ifinfomsg* msg, int16_t attr); + } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_test_util.cc b/test/syscalls/linux/socket_test_util.cc index eff7d577e..c0c5ab3fe 100644 --- a/test/syscalls/linux/socket_test_util.cc +++ b/test/syscalls/linux/socket_test_util.cc @@ -18,10 +18,13 @@ #include <poll.h> #include <sys/socket.h> +#include <memory> + #include "gtest/gtest.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "absl/time/clock.h" +#include "absl/types/optional.h" #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" #include "test/util/temp_path.h" @@ -109,7 +112,10 @@ Creator<SocketPair> AcceptBindSocketPairCreator(bool abstract, int domain, MaybeSave(); // Unlinked path. } - return absl::make_unique<AddrFDSocketPair>(connected, accepted, bind_addr, + // accepted is before connected to destruct connected before accepted. + // Destructors for nonstatic member objects are called in the reverse order + // in which they appear in the class declaration. + return absl::make_unique<AddrFDSocketPair>(accepted, connected, bind_addr, extra_addr); }; } @@ -311,11 +317,16 @@ PosixErrorOr<T> BindIP(int fd, bool dual_stack) { } template <typename T> -PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateTCPAcceptBindSocketPair( - int bound, int connected, int type, bool dual_stack) { - ASSIGN_OR_RETURN_ERRNO(T bind_addr, BindIP<T>(bound, dual_stack)); - RETURN_ERROR_IF_SYSCALL_FAIL(listen(bound, /* backlog = */ 5)); +PosixErrorOr<T> TCPBindAndListen(int fd, bool dual_stack) { + ASSIGN_OR_RETURN_ERRNO(T addr, BindIP<T>(fd, dual_stack)); + RETURN_ERROR_IF_SYSCALL_FAIL(listen(fd, /* backlog = */ 5)); + return addr; +} +template <typename T> +PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> +CreateTCPConnectAcceptSocketPair(int bound, int connected, int type, + bool dual_stack, T bind_addr) { int connect_result = 0; RETURN_ERROR_IF_SYSCALL_FAIL( (connect_result = RetryEINTR(connect)( @@ -358,16 +369,27 @@ PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateTCPAcceptBindSocketPair( absl::SleepFor(absl::Seconds(1)); } - // Cleanup no longer needed resources. - RETURN_ERROR_IF_SYSCALL_FAIL(close(bound)); - MaybeSave(); // Successful close. - T extra_addr = {}; LocalhostAddr(&extra_addr, dual_stack); return absl::make_unique<AddrFDSocketPair>(connected, accepted, bind_addr, extra_addr); } +template <typename T> +PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateTCPAcceptBindSocketPair( + int bound, int connected, int type, bool dual_stack) { + ASSIGN_OR_RETURN_ERRNO(T bind_addr, TCPBindAndListen<T>(bound, dual_stack)); + + auto result = CreateTCPConnectAcceptSocketPair(bound, connected, type, + dual_stack, bind_addr); + + // Cleanup no longer needed resources. + RETURN_ERROR_IF_SYSCALL_FAIL(close(bound)); + MaybeSave(); // Successful close. + + return result; +} + Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type, int protocol, bool dual_stack) { @@ -389,6 +411,63 @@ Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type, }; } +Creator<SocketPair> TCPAcceptBindPersistentListenerSocketPairCreator( + int domain, int type, int protocol, bool dual_stack) { + // These are lazily initialized below, on the first call to the returned + // lambda. These values are private to each returned lambda, but shared across + // invocations of a specific lambda. + // + // The sharing allows pairs created with the same parameters to share a + // listener. This prevents future connects from failing if the connecting + // socket selects a port which had previously been used by a listening socket + // that still has some connections in TIME-WAIT. + // + // The lazy initialization is to avoid creating sockets during parameter + // enumeration. This is important because parameters are enumerated during the + // build process where networking may not be available. + auto listener = std::make_shared<absl::optional<int>>(absl::optional<int>()); + auto addr4 = std::make_shared<absl::optional<sockaddr_in>>( + absl::optional<sockaddr_in>()); + auto addr6 = std::make_shared<absl::optional<sockaddr_in6>>( + absl::optional<sockaddr_in6>()); + + return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { + int connected; + RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + // Share the listener across invocations. + if (!listener->has_value()) { + int fd = socket(domain, type, protocol); + if (fd < 0) { + return PosixError(errno, absl::StrCat("socket(", domain, ", ", type, + ", ", protocol, ")")); + } + listener->emplace(fd); + MaybeSave(); // Successful socket creation. + } + + // Bind the listener once, but create a new connect/accept pair each + // time. + if (domain == AF_INET) { + if (!addr4->has_value()) { + addr4->emplace( + TCPBindAndListen<sockaddr_in>(listener->value(), dual_stack) + .ValueOrDie()); + } + return CreateTCPConnectAcceptSocketPair(listener->value(), connected, + type, dual_stack, addr4->value()); + } + if (!addr6->has_value()) { + addr6->emplace( + TCPBindAndListen<sockaddr_in6>(listener->value(), dual_stack) + .ValueOrDie()); + } + return CreateTCPConnectAcceptSocketPair(listener->value(), connected, type, + dual_stack, addr6->value()); + }; +} + template <typename T> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateUDPBoundSocketPair( int sock1, int sock2, int type, bool dual_stack) { @@ -518,8 +597,8 @@ size_t CalculateUnixSockAddrLen(const char* sun_path) { if (sun_path[0] == 0) { return sizeof(sockaddr_un); } - // Filesystem addresses use the address length plus the 2 byte sun_family and - // null terminator. + // Filesystem addresses use the address length plus the 2 byte sun_family + // and null terminator. return strlen(sun_path) + 3; } diff --git a/test/syscalls/linux/socket_test_util.h b/test/syscalls/linux/socket_test_util.h index 2dbb8bed3..bfaa6e397 100644 --- a/test/syscalls/linux/socket_test_util.h +++ b/test/syscalls/linux/socket_test_util.h @@ -273,6 +273,12 @@ Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type, int protocol, bool dual_stack); +// TCPAcceptBindPersistentListenerSocketPairCreator is like +// TCPAcceptBindSocketPairCreator, except it uses the same listening socket to +// create all SocketPairs. +Creator<SocketPair> TCPAcceptBindPersistentListenerSocketPairCreator( + int domain, int type, int protocol, bool dual_stack); + // UDPBidirectionalBindSocketPairCreator returns a Creator<SocketPair> that // obtains file descriptors by invoking the bind() and connect() syscalls on UDP // sockets. diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc index 388d75835..c951ac3b3 100644 --- a/test/syscalls/linux/stat.cc +++ b/test/syscalls/linux/stat.cc @@ -557,6 +557,8 @@ TEST(SimpleStatTest, AnonDeviceAllocatesUniqueInodesAcrossSaveRestore) { #ifndef SYS_statx #if defined(__x86_64__) #define SYS_statx 332 +#elif defined(__aarch64__) +#define SYS_statx 291 #else #error "Unknown architecture" #endif diff --git a/test/syscalls/linux/timers.cc b/test/syscalls/linux/timers.cc index 3db18d7ac..2f92c27da 100644 --- a/test/syscalls/linux/timers.cc +++ b/test/syscalls/linux/timers.cc @@ -297,9 +297,13 @@ class IntervalTimer { PosixErrorOr<IntervalTimer> TimerCreate(clockid_t clockid, const struct sigevent& sev) { int timerid; - if (syscall(SYS_timer_create, clockid, &sev, &timerid) < 0) { + int ret = syscall(SYS_timer_create, clockid, &sev, &timerid); + if (ret < 0) { return PosixError(errno, "timer_create"); } + if (ret > 0) { + return PosixError(EINVAL, "timer_create should never return positive"); + } MaybeSave(); return IntervalTimer(timerid); } @@ -317,6 +321,18 @@ TEST(IntervalTimerTest, IsInitiallyStopped) { EXPECT_EQ(0, its.it_value.tv_nsec); } +// Kernel can create multiple timers without issue. +// +// Regression test for gvisor.dev/issue/1738. +TEST(IntervalTimerTest, MultipleTimers) { + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_NONE; + const auto timer1 = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + const auto timer2 = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); +} + TEST(IntervalTimerTest, SingleShotSilent) { struct sigevent sev = {}; sev.sigev_notify = SIGEV_NONE; diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index ab21d68c6..85eb31847 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -39,6 +39,10 @@ namespace { class XattrTest : public FileTest {}; TEST_F(XattrTest, XattrNullName) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0), @@ -48,6 +52,10 @@ TEST_F(XattrTest, XattrNullName) { } TEST_F(XattrTest, XattrEmptyName) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); EXPECT_THAT(setxattr(path, "", nullptr, 0, /*flags=*/0), @@ -56,6 +64,10 @@ TEST_F(XattrTest, XattrEmptyName) { } TEST_F(XattrTest, XattrLargeName) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); std::string name = "user."; name += std::string(XATTR_NAME_MAX - name.length(), 'a'); @@ -77,6 +89,10 @@ TEST_F(XattrTest, XattrLargeName) { } TEST_F(XattrTest, XattrInvalidPrefix) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); std::string name(XATTR_NAME_MAX, 'a'); EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), @@ -88,6 +104,10 @@ TEST_F(XattrTest, XattrInvalidPrefix) { // Do not allow save/restore cycles after making the test file read-only, as // the restore will fail to open it with r/w permissions. TEST_F(XattrTest, XattrReadOnly_NoRandomSave) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -113,6 +133,10 @@ TEST_F(XattrTest, XattrReadOnly_NoRandomSave) { // Do not allow save/restore cycles after making the test file write-only, as // the restore will fail to open it with r/w permissions. TEST_F(XattrTest, XattrWriteOnly_NoRandomSave) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -143,6 +167,10 @@ TEST_F(XattrTest, XattrTrustedWithNonadmin) { } TEST_F(XattrTest, XattrOnDirectory) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); const char name[] = "user.test"; EXPECT_THAT(setxattr(dir.path().c_str(), name, NULL, 0, /*flags=*/0), @@ -152,6 +180,10 @@ TEST_F(XattrTest, XattrOnDirectory) { } TEST_F(XattrTest, XattrOnSymlink) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); TempPath link = ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateSymlinkTo(dir.path(), test_file_name_)); @@ -163,6 +195,10 @@ TEST_F(XattrTest, XattrOnSymlink) { } TEST_F(XattrTest, XattrOnInvalidFileTypes) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char name[] = "user.test"; char char_device[] = "/dev/zero"; @@ -181,6 +217,10 @@ TEST_F(XattrTest, XattrOnInvalidFileTypes) { } TEST_F(XattrTest, SetxattrSizeSmallerThanValue) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector<char> val = {'a', 'a'}; @@ -196,6 +236,10 @@ TEST_F(XattrTest, SetxattrSizeSmallerThanValue) { } TEST_F(XattrTest, SetxattrZeroSize) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -208,6 +252,10 @@ TEST_F(XattrTest, SetxattrZeroSize) { } TEST_F(XattrTest, SetxattrSizeTooLarge) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; @@ -223,6 +271,10 @@ TEST_F(XattrTest, SetxattrSizeTooLarge) { } TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 1, /*flags=*/0), @@ -232,6 +284,10 @@ TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) { } TEST_F(XattrTest, SetxattrNullValueAndZeroSize) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); @@ -240,6 +296,10 @@ TEST_F(XattrTest, SetxattrNullValueAndZeroSize) { } TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector<char> val(XATTR_SIZE_MAX + 1); @@ -256,6 +316,10 @@ TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) { } TEST_F(XattrTest, SetxattrReplaceWithSmaller) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector<char> val = {'a', 'a'}; @@ -271,6 +335,10 @@ TEST_F(XattrTest, SetxattrReplaceWithSmaller) { } TEST_F(XattrTest, SetxattrReplaceWithLarger) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector<char> val = {'a', 'a'}; @@ -285,6 +353,10 @@ TEST_F(XattrTest, SetxattrReplaceWithLarger) { } TEST_F(XattrTest, SetxattrCreateFlag) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE), @@ -296,6 +368,10 @@ TEST_F(XattrTest, SetxattrCreateFlag) { } TEST_F(XattrTest, SetxattrReplaceFlag) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_REPLACE), @@ -308,6 +384,10 @@ TEST_F(XattrTest, SetxattrReplaceFlag) { } TEST_F(XattrTest, SetxattrInvalidFlags) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); int invalid_flags = 0xff; EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, invalid_flags), @@ -315,6 +395,10 @@ TEST_F(XattrTest, SetxattrInvalidFlags) { } TEST_F(XattrTest, Getxattr) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; int val = 1234; @@ -327,6 +411,10 @@ TEST_F(XattrTest, Getxattr) { } TEST_F(XattrTest, GetxattrSizeSmallerThanValue) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector<char> val = {'a', 'a'}; @@ -339,6 +427,10 @@ TEST_F(XattrTest, GetxattrSizeSmallerThanValue) { } TEST_F(XattrTest, GetxattrSizeLargerThanValue) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -354,6 +446,10 @@ TEST_F(XattrTest, GetxattrSizeLargerThanValue) { } TEST_F(XattrTest, GetxattrZeroSize) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -367,6 +463,10 @@ TEST_F(XattrTest, GetxattrZeroSize) { } TEST_F(XattrTest, GetxattrSizeTooLarge) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -383,6 +483,10 @@ TEST_F(XattrTest, GetxattrSizeTooLarge) { } TEST_F(XattrTest, GetxattrNullValue) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -394,6 +498,10 @@ TEST_F(XattrTest, GetxattrNullValue) { } TEST_F(XattrTest, GetxattrNullValueAndZeroSize) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -410,12 +518,20 @@ TEST_F(XattrTest, GetxattrNullValueAndZeroSize) { } TEST_F(XattrTest, GetxattrNonexistentName) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); } TEST_F(XattrTest, LGetSetxattrOnSymlink) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); TempPath link = ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateSymlinkTo(dir.path(), test_file_name_)); @@ -427,6 +543,10 @@ TEST_F(XattrTest, LGetSetxattrOnSymlink) { } TEST_F(XattrTest, LGetSetxattrOnNonsymlink) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; int val = 1234; @@ -441,6 +561,10 @@ TEST_F(XattrTest, LGetSetxattrOnNonsymlink) { } TEST_F(XattrTest, FGetSetxattr) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_.c_str(), 0)); const char name[] = "user.test"; diff --git a/test/syscalls/syscall_test_runner.go b/test/syscalls/syscall_test_runner.go index b9fd885ff..ae342b68c 100644 --- a/test/syscalls/syscall_test_runner.go +++ b/test/syscalls/syscall_test_runner.go @@ -450,17 +450,16 @@ func main() { } // Get subset of tests corresponding to shard. - begin, end, err := testutil.TestBoundsForShard(len(testCases)) + indices, err := testutil.TestIndicesForShard(len(testCases)) if err != nil { fatalf("TestsForShard() failed: %v", err) } - testCases = testCases[begin:end] // Run the tests. var tests []testing.InternalTest - for _, tc := range testCases { + for _, tci := range indices { // Capture tc. - tc := tc + tc := testCases[tci] testName := fmt.Sprintf("%s_%s", tc.Suite, tc.Name) tests = append(tests, testing.InternalTest{ Name: testName, diff --git a/test/util/BUILD b/test/util/BUILD index 1ac8b3fd6..1f22ebe29 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "cc_library", "cc_test", "select_system") +load("//tools:defs.bzl", "cc_library", "cc_test", "gtest", "select_system") package( default_visibility = ["//:sandbox"], @@ -41,7 +41,7 @@ cc_library( ":save_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -55,7 +55,7 @@ cc_library( ":posix_error", ":test_util", "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -67,7 +67,7 @@ cc_test( ":proc_util", ":test_main", ":test_util", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -87,7 +87,7 @@ cc_library( ":file_descriptor", ":posix_error", "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -101,7 +101,7 @@ cc_test( ":temp_path", ":test_main", ":test_util", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -134,7 +134,7 @@ cc_library( ":cleanup", ":posix_error", ":test_util", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -183,7 +183,7 @@ cc_library( "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:variant", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -194,7 +194,7 @@ cc_test( deps = [ ":posix_error", ":test_main", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -218,7 +218,7 @@ cc_library( ":cleanup", ":posix_error", ":test_util", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -233,7 +233,7 @@ cc_library( ":test_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -259,7 +259,7 @@ cc_library( "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -291,7 +291,7 @@ cc_library( ":posix_error", ":test_util", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -302,7 +302,7 @@ cc_test( deps = [ ":test_main", ":test_util", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -322,7 +322,7 @@ cc_library( ":file_descriptor", ":posix_error", ":save_util", - "@com_google_googletest//:gtest", + gtest, ], ) diff --git a/test/util/platform_util.cc b/test/util/platform_util.cc index 2724e63f3..c9200d381 100644 --- a/test/util/platform_util.cc +++ b/test/util/platform_util.cc @@ -20,10 +20,9 @@ namespace gvisor { namespace testing { PlatformSupport PlatformSupport32Bit() { - if (GvisorPlatform() == Platform::kPtrace) { + if (GvisorPlatform() == Platform::kPtrace || + GvisorPlatform() == Platform::kKVM) { return PlatformSupport::NotSupported; - } else if (GvisorPlatform() == Platform::kKVM) { - return PlatformSupport::Segfault; } else { return PlatformSupport::Allowed; } diff --git a/test/util/signal_util.h b/test/util/signal_util.h index bcf85c337..e7b66aa51 100644 --- a/test/util/signal_util.h +++ b/test/util/signal_util.h @@ -85,6 +85,20 @@ inline void FixupFault(ucontext_t* ctx) { // The encoding is 0x48 0xab 0x00. ctx->uc_mcontext.gregs[REG_RIP] += 3; } +#elif __aarch64__ +inline void Fault() { + // Zero and dereference x0. + asm("mov xzr, x0\r\n" + "str xzr, [x0]\r\n" + : + : + : "x0"); +} + +inline void FixupFault(ucontext_t* ctx) { + // Skip the bad instruction above. + ctx->uc_mcontext.pc += 4; +} #endif } // namespace testing diff --git a/test/util/test_util.cc b/test/util/test_util.cc index 15cbc6da6..95e1e0c96 100644 --- a/test/util/test_util.cc +++ b/test/util/test_util.cc @@ -69,7 +69,6 @@ bool IsRunningWithHostinet() { "xchg %%rdi, %%rbx\n" \ : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \ : "a"(a_inp), "2"(c_inp)) -#endif // defined(__x86_64__) CPUVendor GetCPUVendor() { uint32_t eax, ebx, ecx, edx; @@ -86,6 +85,7 @@ CPUVendor GetCPUVendor() { } return CPUVendor::kUnknownVendor; } +#endif // defined(__x86_64__) bool operator==(const KernelVersion& first, const KernelVersion& second) { return first.major == second.major && first.minor == second.minor && diff --git a/tools/build/defs.bzl b/tools/build/defs.bzl index d0556abd1..1a1a0d825 100644 --- a/tools/build/defs.bzl +++ b/tools/build/defs.bzl @@ -8,6 +8,7 @@ load("@rules_pkg//:pkg.bzl", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar") load("@io_bazel_rules_docker//go:image.bzl", _go_image = "go_image") load("@io_bazel_rules_docker//container:container.bzl", _container_image = "container_image") load("@pydeps//:requirements.bzl", _py_requirement = "requirement") +load("//tools/build:tags.bzl", _go_suffixes = "go_suffixes") container_image = _container_image cc_binary = _cc_binary @@ -18,6 +19,8 @@ cc_test = _cc_test cc_toolchain = "@bazel_tools//tools/cpp:current_cc_toolchain" go_image = _go_image go_embed_data = _go_embed_data +go_suffixes = _go_suffixes +gtest = "@com_google_googletest//:gtest" loopback = "//tools/build:loopback" proto_library = native.proto_library pkg_deb = _pkg_deb diff --git a/tools/build/tags.bzl b/tools/build/tags.bzl new file mode 100644 index 000000000..e99c87f81 --- /dev/null +++ b/tools/build/tags.bzl @@ -0,0 +1,36 @@ +"""List of special Go suffixes.""" + +go_suffixes = [ + "_386", + "_386_unsafe", + "_amd64", + "_amd64_unsafe", + "_aarch64", + "_aarch64_unsafe", + "_arm", + "_arm_unsafe", + "_arm64", + "_arm64_unsafe", + "_mips", + "_mips_unsafe", + "_mipsle", + "_mipsle_unsafe", + "_mips64", + "_mips64_unsafe", + "_mips64le", + "_mips64le_unsafe", + "_ppc64", + "_ppc64_unsafe", + "_ppc64le", + "_ppc64le_unsafe", + "_riscv64", + "_riscv64_unsafe", + "_s390x", + "_s390x_unsafe", + "_sparc64", + "_sparc64_unsafe", + "_wasm", + "_wasm_unsafe", + "_linux", + "_linux_unsafe", +] diff --git a/tools/defs.bzl b/tools/defs.bzl index 819f12b0d..5d5fa134a 100644 --- a/tools/defs.bzl +++ b/tools/defs.bzl @@ -7,7 +7,7 @@ change for Google-internal and bazel-compatible rules. load("//tools/go_stateify:defs.bzl", "go_stateify") load("//tools/go_marshal:defs.bzl", "go_marshal", "marshal_deps", "marshal_test_deps") -load("//tools/build:defs.bzl", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _go_tool_library = "go_tool_library", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system") +load("//tools/build:defs.bzl", "go_suffixes", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _go_tool_library = "go_tool_library", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system") # Delegate directly. cc_binary = _cc_binary @@ -20,6 +20,7 @@ go_embed_data = _go_embed_data go_image = _go_image go_test = _go_test go_tool_library = _go_tool_library +gtest = _gtest pkg_deb = _pkg_deb pkg_tar = _pkg_tar py_library = _py_library @@ -44,6 +45,34 @@ def go_binary(name, **kwargs): **kwargs ) +def calculate_sets(srcs): + """Calculates special Go sets for templates. + + Args: + srcs: the full set of Go sources. + + Returns: + A dictionary of the form: + + "": [src1.go, src2.go] + "suffix": [src3suffix.go, src4suffix.go] + + Note that suffix will typically start with '_'. + """ + result = dict() + for file in srcs: + if not file.endswith(".go"): + continue + target = "" + for suffix in go_suffixes: + if file.endswith(suffix + ".go"): + target = suffix + if not target in result: + result[target] = [file] + else: + result[target].append(file) + return result + def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = False, **kwargs): """Wraps the standard go_library and does stateification and marshalling. @@ -69,39 +98,49 @@ def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = F marshal: whether marshal is enabled (default: false). **kwargs: standard go_library arguments. """ + all_srcs = srcs + all_deps = deps if stateify: # Only do stateification for non-state packages without manual autogen. - go_stateify( - name = name + "_state_autogen", - srcs = [src for src in srcs if src.endswith(".go")], - imports = imports, - package = name, - arch = select_arch(), - out = name + "_state_autogen.go", - ) - all_srcs = srcs + [name + "_state_autogen.go"] - if "//pkg/state" not in deps: - all_deps = deps + ["//pkg/state"] - else: - all_deps = deps - else: - all_deps = deps - all_srcs = srcs + # First, we need to segregate the input files via the special suffixes, + # and calculate the final output set. + state_sets = calculate_sets(srcs) + for (suffix, srcs) in state_sets.items(): + go_stateify( + name = name + suffix + "_state_autogen", + srcs = srcs, + imports = imports, + package = name, + out = name + suffix + "_state_autogen.go", + ) + all_srcs = all_srcs + [ + name + suffix + "_state_autogen.go" + for suffix in state_sets.keys() + ] + if "//pkg/state" not in all_deps: + all_deps = all_deps + ["//pkg/state"] + if marshal: - go_marshal( - name = name + "_abi_autogen", - srcs = [src for src in srcs if src.endswith(".go")], - debug = False, - imports = imports, - package = name, - ) + # See above. + marshal_sets = calculate_sets(srcs) + for (suffix, srcs) in marshal_sets.items(): + go_marshal( + name = name + suffix + "_abi_autogen", + srcs = srcs, + debug = False, + imports = imports, + package = name, + ) extra_deps = [ dep for dep in marshal_deps if not dep in all_deps ] all_deps = all_deps + extra_deps - all_srcs = srcs + [name + "_abi_autogen_unsafe.go"] + all_srcs = all_srcs + [ + name + suffix + "_abi_autogen_unsafe.go" + for suffix in marshal_sets.keys() + ] _go_library( name = name, @@ -114,13 +153,16 @@ def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = F # Ignore importpath for go_test. kwargs.pop("importpath", None) - _go_test( - name = name + "_abi_autogen_test", - srcs = [name + "_abi_autogen_test.go"], - library = ":" + name, - deps = marshal_test_deps, - **kwargs - ) + # See above. + marshal_sets = calculate_sets(srcs) + for (suffix, srcs) in marshal_sets.items(): + _go_test( + name = name + suffix + "_abi_autogen_test", + srcs = [name + suffix + "_abi_autogen_test.go"], + library = ":" + name + suffix, + deps = marshal_test_deps, + **kwargs + ) def proto_library(name, srcs, **kwargs): """Wraps the standard proto_library. diff --git a/tools/go_marshal/gomarshal/BUILD b/tools/go_marshal/gomarshal/BUILD index c92b59dd6..b5d5a4487 100644 --- a/tools/go_marshal/gomarshal/BUILD +++ b/tools/go_marshal/gomarshal/BUILD @@ -14,4 +14,5 @@ go_library( visibility = [ "//:sandbox", ], + deps = ["//tools/tags"], ) diff --git a/tools/go_marshal/gomarshal/generator.go b/tools/go_marshal/gomarshal/generator.go index af90bdecb..0b3f600fe 100644 --- a/tools/go_marshal/gomarshal/generator.go +++ b/tools/go_marshal/gomarshal/generator.go @@ -23,6 +23,9 @@ import ( "go/token" "os" "sort" + "strings" + + "gvisor.dev/gvisor/tools/tags" ) const ( @@ -104,6 +107,14 @@ func NewGenerator(srcs []string, out, outTest, pkg string, imports []string) (*G func (g *Generator) writeHeader() error { var b sourceBuffer b.emit("// Automatically generated marshal implementation. See tools/go_marshal.\n\n") + + // Emit build tags. + if t := tags.Aggregate(g.inputs); len(t) > 0 { + b.emit(strings.Join(t.Lines(), "\n")) + b.emit("\n") + } + + // Package header. b.emit("package %s\n\n", g.pkg) if err := b.write(g.output); err != nil { return err diff --git a/tools/go_stateify/BUILD b/tools/go_stateify/BUILD index a133d6f8b..6036faf7b 100644 --- a/tools/go_stateify/BUILD +++ b/tools/go_stateify/BUILD @@ -6,4 +6,5 @@ go_binary( name = "stateify", srcs = ["main.go"], visibility = ["//visibility:public"], + deps = ["//tools/tags"], ) diff --git a/tools/go_stateify/defs.bzl b/tools/go_stateify/defs.bzl index 0f261d89f..bdb966362 100644 --- a/tools/go_stateify/defs.bzl +++ b/tools/go_stateify/defs.bzl @@ -7,7 +7,6 @@ def _go_stateify_impl(ctx): # Run the stateify command. args = ["-output=%s" % output.path] args.append("-pkg=%s" % ctx.attr.package) - args.append("-arch=%s" % ctx.attr.arch) if ctx.attr._statepkg: args.append("-statepkg=%s" % ctx.attr._statepkg) if ctx.attr.imports: @@ -47,15 +46,8 @@ for statified types. doc = "The package name for the input sources.", mandatory = True, ), - "arch": attr.string( - doc = "Target platform.", - mandatory = True, - ), "out": attr.output( - doc = """ -The name of the generated file output. This must not conflict with any other -files and must be added to the srcs of the relevant go_library. -""", + doc = "Name of the generator output file.", mandatory = True, ), "_tool": attr.label( diff --git a/tools/go_stateify/main.go b/tools/go_stateify/main.go index 7d5d291e6..aa9d4543e 100644 --- a/tools/go_stateify/main.go +++ b/tools/go_stateify/main.go @@ -22,12 +22,12 @@ import ( "go/ast" "go/parser" "go/token" - "io/ioutil" "os" - "path/filepath" "reflect" "strings" "sync" + + "gvisor.dev/gvisor/tools/tags" ) var ( @@ -35,113 +35,8 @@ var ( imports = flag.String("imports", "", "extra imports for the output file") output = flag.String("output", "", "output file") statePkg = flag.String("statepkg", "", "state import package; defaults to empty") - arch = flag.String("arch", "", "specify the target platform") ) -// The known architectures. -var okgoarch = []string{ - "386", - "amd64", - "arm", - "arm64", - "mips", - "mipsle", - "mips64", - "mips64le", - "ppc64", - "ppc64le", - "riscv64", - "s390x", - "sparc64", - "wasm", -} - -// readfile returns the content of the named file. -func readfile(file string) string { - data, err := ioutil.ReadFile(file) - if err != nil { - panic(fmt.Sprintf("readfile err: %v", err)) - } - return string(data) -} - -// matchfield reports whether the field (x,y,z) matches this build. -// all the elements in the field must be satisfied. -func matchfield(f string, goarch string) bool { - for _, tag := range strings.Split(f, ",") { - if !matchtag(tag, goarch) { - return false - } - } - return true -} - -// matchtag reports whether the tag (x or !x) matches this build. -func matchtag(tag string, goarch string) bool { - if tag == "" { - return false - } - if tag[0] == '!' { - if len(tag) == 1 || tag[1] == '!' { - return false - } - return !matchtag(tag[1:], goarch) - } - return tag == goarch -} - -// canBuild reports whether we can build this file for target platform by -// checking file name and build tags. The code is derived from the Go source -// cmd.dist.build.shouldbuild. -func canBuild(file, goTargetArch string) bool { - name := filepath.Base(file) - excluded := func(list []string, ok string) bool { - for _, x := range list { - if x == ok || (ok == "android" && x == "linux") || (ok == "illumos" && x == "solaris") { - continue - } - i := strings.Index(name, x) - if i <= 0 || name[i-1] != '_' { - continue - } - i += len(x) - if i == len(name) || name[i] == '.' || name[i] == '_' { - return true - } - } - return false - } - if excluded(okgoarch, goTargetArch) { - return false - } - - // Check file contents for // +build lines. - for _, p := range strings.Split(readfile(file), "\n") { - p = strings.TrimSpace(p) - if p == "" { - continue - } - if !strings.HasPrefix(p, "//") { - break - } - if !strings.Contains(p, "+build") { - continue - } - fields := strings.Fields(p[2:]) - if len(fields) < 1 || fields[0] != "+build" { - continue - } - for _, p := range fields[1:] { - if matchfield(p, goTargetArch) { - goto fieldmatch - } - } - return false - fieldmatch: - } - return true -} - // resolveTypeName returns a qualified type name. func resolveTypeName(name string, typ ast.Expr) (field string, qualified string) { for done := false; !done; { @@ -329,8 +224,15 @@ func main() { fmt.Fprintf(outputFile, " m.Save(\"%s\", &x.%s)\n", name, name) } - // Emit the package name. + // Automated warning. fmt.Fprint(outputFile, "// automatically generated by stateify.\n\n") + + // Emit build tags. + if t := tags.Aggregate(flag.Args()); len(t) > 0 { + fmt.Fprintf(outputFile, "%s\n\n", strings.Join(t.Lines(), "\n")) + } + + // Emit the package name. fmt.Fprintf(outputFile, "package %s\n\n", *pkg) // Emit the imports lazily. @@ -364,10 +266,6 @@ func main() { os.Exit(1) } - if !canBuild(filename, *arch) { - continue - } - files = append(files, f) } diff --git a/tools/images/BUILD b/tools/images/BUILD index f1699b184..fe11f08a3 100644 --- a/tools/images/BUILD +++ b/tools/images/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "cc_binary") +load("//tools:defs.bzl", "cc_binary", "gtest") load("//tools/images:defs.bzl", "vm_image", "vm_test") package( @@ -32,8 +32,8 @@ cc_binary( srcs = ["test.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", - "@com_google_googletest//:gtest", ], ) diff --git a/tools/tags/BUILD b/tools/tags/BUILD new file mode 100644 index 000000000..1c02e2c89 --- /dev/null +++ b/tools/tags/BUILD @@ -0,0 +1,11 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "tags", + srcs = ["tags.go"], + marshal = False, + stateify = False, + visibility = ["//tools:__subpackages__"], +) diff --git a/tools/tags/tags.go b/tools/tags/tags.go new file mode 100644 index 000000000..f35904e0a --- /dev/null +++ b/tools/tags/tags.go @@ -0,0 +1,89 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package tags is a utility for parsing build tags. +package tags + +import ( + "fmt" + "io/ioutil" + "strings" +) + +// OrSet is a set of tags on a single line. +// +// Note that tags may include ",", and we don't distinguish this case in the +// logic below. Ideally, this constraints can be split into separate top-level +// build tags in order to resolve any issues. +type OrSet []string + +// Line returns the line for this or. +func (or OrSet) Line() string { + return fmt.Sprintf("// +build %s", strings.Join([]string(or), " ")) +} + +// AndSet is the set of all OrSets. +type AndSet []OrSet + +// Lines returns the lines to be printed. +func (and AndSet) Lines() (ls []string) { + for _, or := range and { + ls = append(ls, or.Line()) + } + return +} + +// Join joins this AndSet with another. +func (and AndSet) Join(other AndSet) AndSet { + return append(and, other...) +} + +// Tags returns the unique set of +build tags. +// +// Derived form the runtime's canBuild. +func Tags(file string) (tags AndSet) { + data, err := ioutil.ReadFile(file) + if err != nil { + return nil + } + // Check file contents for // +build lines. + for _, p := range strings.Split(string(data), "\n") { + p = strings.TrimSpace(p) + if p == "" { + continue + } + if !strings.HasPrefix(p, "//") { + break + } + if !strings.Contains(p, "+build") { + continue + } + fields := strings.Fields(p[2:]) + if len(fields) < 1 || fields[0] != "+build" { + continue + } + tags = append(tags, OrSet(fields[1:])) + } + return tags +} + +// Aggregate aggregates all tags from a set of files. +// +// Note that these may be in conflict, in which case the build will fail. +func Aggregate(files []string) (tags AndSet) { + for _, file := range files { + tags = tags.Join(Tags(file)) + } + return tags +} |