From 267560d159b299a17f626029d7091ab923afeef6 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Mon, 16 Nov 2020 18:52:20 -0800 Subject: Reset watchdog timer between sendfile() iterations. As part of this, change Task.interrupted() to not drain Task.interruptChan, and do so explicitly using new function Task.unsetInterrupted() instead. PiperOrigin-RevId: 342768365 --- pkg/sentry/kernel/task_block.go | 28 +++++++++++++--------- pkg/sentry/kernel/task_sched.go | 12 ++++++++++ pkg/sentry/kernel/task_signals.go | 12 ++++------ pkg/sentry/syscalls/linux/vfs2/splice.go | 40 ++++++++++++++++++++++---------- test/syscalls/linux/sendfile.cc | 18 ++++++++++++++ 5 files changed, 80 insertions(+), 30 deletions(-) diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go index 147a3d286..9419f2e95 100644 --- a/pkg/sentry/kernel/task_block.go +++ b/pkg/sentry/kernel/task_block.go @@ -177,19 +177,23 @@ func (t *Task) SleepStart() <-chan struct{} { // SleepFinish implements context.ChannelSleeper.SleepFinish. func (t *Task) SleepFinish(success bool) { if !success { - // The interrupted notification is consumed only at the top-level - // (Run). Therefore we attempt to reset the pending notification. - // This will also elide our next entry back into the task, so we - // will process signals, state changes, etc. + // Our caller received from t.interruptChan; we need to re-send to it + // to ensure that t.interrupted() is still true. t.interruptSelf() } t.accountTaskGoroutineLeave(TaskGoroutineBlockedInterruptible) t.Activate() } -// Interrupted implements amutex.Sleeper.Interrupted +// Interrupted implements context.ChannelSleeper.Interrupted. func (t *Task) Interrupted() bool { - return len(t.interruptChan) != 0 + if t.interrupted() { + return true + } + // Indicate that t's task goroutine is still responsive (i.e. reset the + // watchdog timer). + t.accountTaskGoroutineRunning() + return false } // UninterruptibleSleepStart implements context.Context.UninterruptibleSleepStart. @@ -210,13 +214,17 @@ func (t *Task) UninterruptibleSleepFinish(activate bool) { } // interrupted returns true if interrupt or interruptSelf has been called at -// least once since the last call to interrupted. +// least once since the last call to unsetInterrupted. func (t *Task) interrupted() bool { + return len(t.interruptChan) != 0 +} + +// unsetInterrupted causes interrupted to return false until the next call to +// interrupt or interruptSelf. +func (t *Task) unsetInterrupted() { select { case <-t.interruptChan: - return true default: - return false } } @@ -232,9 +240,7 @@ func (t *Task) interrupt() { func (t *Task) interruptSelf() { select { case t.interruptChan <- struct{}{}: - t.Debugf("Interrupt queued") default: - t.Debugf("Dropping duplicate interrupt") } // platform.Context.Interrupt() is unnecessary since a task goroutine // calling interruptSelf() cannot also be blocked in diff --git a/pkg/sentry/kernel/task_sched.go b/pkg/sentry/kernel/task_sched.go index 52c55d13d..9ba5f8d78 100644 --- a/pkg/sentry/kernel/task_sched.go +++ b/pkg/sentry/kernel/task_sched.go @@ -157,6 +157,18 @@ func (t *Task) accountTaskGoroutineLeave(state TaskGoroutineState) { t.goschedSeq.EndWrite() } +// Preconditions: The caller must be running on the task goroutine. +func (t *Task) accountTaskGoroutineRunning() { + now := t.k.CPUClockNow() + if t.gosched.State != TaskGoroutineRunningSys { + panic(fmt.Sprintf("Task goroutine in state %v (expected %v)", t.gosched.State, TaskGoroutineRunningSys)) + } + t.goschedSeq.BeginWrite() + t.gosched.SysTicks += now - t.gosched.Timestamp + t.gosched.Timestamp = now + t.goschedSeq.EndWrite() +} + // TaskGoroutineSchedInfo returns a copy of t's task goroutine scheduling info. // Most clients should use t.CPUStats() instead. func (t *Task) TaskGoroutineSchedInfo() TaskGoroutineSchedInfo { diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go index ebdb83061..42dd3e278 100644 --- a/pkg/sentry/kernel/task_signals.go +++ b/pkg/sentry/kernel/task_signals.go @@ -619,9 +619,6 @@ func (t *Task) setSignalMaskLocked(mask linux.SignalSet) { return } }) - // We have to re-issue the interrupt consumed by t.interrupted() since - // it might have been for a different reason. - t.interruptSelf() } // Conversely, if the new mask unblocks any signals that were blocked by @@ -931,10 +928,10 @@ func (t *Task) signalStop(target *Task, code int32, status int32) { type runInterrupt struct{} func (*runInterrupt) execute(t *Task) taskRunState { - // Interrupts are de-duplicated (if t is interrupted twice before - // t.interrupted() is called, t.interrupted() will only return true once), - // so early exits from this function must re-enter the runInterrupt state - // to check for more interrupt-signaled conditions. + // Interrupts are de-duplicated (t.unsetInterrupted() will undo the effect + // of all previous calls to t.interrupted() regardless of how many such + // calls there have been), so early exits from this function must re-enter + // the runInterrupt state to check for more interrupt-signaled conditions. t.tg.signalHandlers.mu.Lock() @@ -1080,6 +1077,7 @@ func (*runInterrupt) execute(t *Task) taskRunState { return t.deliverSignal(info, act) } + t.unsetInterrupted() t.tg.signalHandlers.mu.Unlock() return (*runApp)(nil) } diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go index 9ce4f280a..8bb763a47 100644 --- a/pkg/sentry/syscalls/linux/vfs2/splice.go +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -343,8 +343,8 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Copy data. var ( - n int64 - err error + total int64 + err error ) dw := dualWaiter{ inFile: inFile, @@ -357,13 +357,20 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // can block. nonBlock := outFile.StatusFlags()&linux.O_NONBLOCK != 0 if outIsPipe { - for n < count { - var spliceN int64 - spliceN, err = outPipeFD.SpliceFromNonPipe(t, inFile, offset, count) + for { + var n int64 + n, err = outPipeFD.SpliceFromNonPipe(t, inFile, offset, count-total) if offset != -1 { - offset += spliceN + offset += n + } + total += n + if total == count { + break + } + if err == nil && t.Interrupted() { + err = syserror.ErrInterrupted + break } - n += spliceN if err == syserror.ErrWouldBlock && !nonBlock { err = dw.waitForBoth(t) } @@ -374,7 +381,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } else { // Read inFile to buffer, then write the contents to outFile. buf := make([]byte, count) - for n < count { + for { var readN int64 if offset != -1 { readN, err = inFile.PRead(t, usermem.BytesIOSequence(buf), offset, vfs.ReadOptions{}) @@ -382,7 +389,6 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } else { readN, err = inFile.Read(t, usermem.BytesIOSequence(buf), vfs.ReadOptions{}) } - n += readN // Write all of the bytes that we read. This may need // multiple write calls to complete. @@ -398,7 +404,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // We didn't complete the write. Only report the bytes that were actually // written, and rewind offsets as needed. notWritten := int64(len(wbuf)) - n -= notWritten + readN -= notWritten if offset == -1 { // We modified the offset of the input file itself during the read // operation. Rewind it. @@ -415,6 +421,16 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc break } } + + total += readN + buf = buf[readN:] + if total == count { + break + } + if err == nil && t.Interrupted() { + err = syserror.ErrInterrupted + break + } if err == syserror.ErrWouldBlock && !nonBlock { err = dw.waitForBoth(t) } @@ -432,7 +448,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } } - if n != 0 { + if total != 0 { inFile.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) @@ -445,7 +461,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // We can only pass a single file to handleIOError, so pick inFile arbitrarily. // This is used only for debugging purposes. - return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "sendfile", inFile) + return uintptr(total), nil, slinux.HandleIOErrorVFS2(t, total != 0, err, syserror.ERESTARTSYS, "sendfile", inFile) } // dualWaiter is used to wait on one or both vfs.FileDescriptions. It is not diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc index cf0977118..3924e0001 100644 --- a/test/syscalls/linux/sendfile.cc +++ b/test/syscalls/linux/sendfile.cc @@ -631,6 +631,24 @@ TEST(SendFileTest, SendFileToPipe) { SyscallSucceedsWithValue(kDataSize)); } +TEST(SendFileTest, SendFileToSelf_NoRandomSave) { + int rawfd; + ASSERT_THAT(rawfd = memfd_create("memfd", 0), SyscallSucceeds()); + const FileDescriptor fd(rawfd); + + char c = 0x01; + ASSERT_THAT(WriteFd(fd.get(), &c, 1), SyscallSucceedsWithValue(1)); + + // Arbitrarily chosen to make sendfile() take long enough that the sentry + // watchdog usually fires unless it's reset by sendfile() between iterations + // of the buffered copy. See b/172076632. + constexpr size_t kSendfileSize = 0xa00000; + + off_t offset = 0; + ASSERT_THAT(sendfile(fd.get(), fd.get(), &offset, kSendfileSize), + SyscallSucceedsWithValue(kSendfileSize)); +} + static volatile int signaled = 0; void SigUsr1Handler(int sig, siginfo_t* info, void* context) { signaled = 1; } -- cgit v1.2.3