diff options
Diffstat (limited to 'pkg/sentry/fsimpl/tmpfs/regular_file.go')
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/regular_file.go | 570 |
1 files changed, 0 insertions, 570 deletions
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go deleted file mode 100644 index 711442424..000000000 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ /dev/null @@ -1,570 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "fmt" - "io" - "math" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/safemem" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/lock" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -// regularFile is a regular (=S_IFREG) tmpfs file. -type regularFile struct { - inode inode - - // memFile is a platform.File used to allocate pages to this regularFile. - memFile *pgalloc.MemoryFile - - // mapsMu protects mappings. - mapsMu sync.Mutex `state:"nosave"` - - // mappings tracks mappings of the file into memmap.MappingSpaces. - // - // Protected by mapsMu. - mappings memmap.MappingSet - - // writableMappingPages tracks how many pages of virtual memory are mapped - // as potentially writable from this file. If a page has multiple mappings, - // each mapping is counted separately. - // - // This counter is susceptible to overflow as we can potentially count - // mappings from many VMAs. We count pages rather than bytes to slightly - // mitigate this. - // - // Protected by mapsMu. - writableMappingPages uint64 - - // dataMu protects the fields below. - dataMu sync.RWMutex - - // data maps offsets into the file to offsets into memFile that store - // the file's data. - // - // Protected by dataMu. - data fsutil.FileRangeSet - - // seals represents file seals on this inode. - // - // Protected by dataMu. - seals uint32 - - // size is the size of data. - // - // Protected by both dataMu and inode.mu; reading it requires holding - // either mutex, while writing requires holding both AND using atomics. - // Readers that do not require consistency (like Stat) may read the - // value atomically without holding either lock. - size uint64 -} - -func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMode) *inode { - file := ®ularFile{ - memFile: fs.memFile, - } - file.inode.init(file, fs, creds, mode) - file.inode.nlink = 1 // from parent directory - return &file.inode -} - -// truncate grows or shrinks the file to the given size. It returns true if the -// file size was updated. -func (rf *regularFile) truncate(newSize uint64) (bool, error) { - rf.inode.mu.Lock() - defer rf.inode.mu.Unlock() - return rf.truncateLocked(newSize) -} - -// Preconditions: rf.inode.mu must be held. -func (rf *regularFile) truncateLocked(newSize uint64) (bool, error) { - oldSize := rf.size - if newSize == oldSize { - // Nothing to do. - return false, nil - } - - // Need to hold inode.mu and dataMu while modifying size. - rf.dataMu.Lock() - if newSize > oldSize { - // Can we grow the file? - if rf.seals&linux.F_SEAL_GROW != 0 { - rf.dataMu.Unlock() - return false, syserror.EPERM - } - // We only need to update the file size. - atomic.StoreUint64(&rf.size, newSize) - rf.dataMu.Unlock() - return true, nil - } - - // We are shrinking the file. First check if this is allowed. - if rf.seals&linux.F_SEAL_SHRINK != 0 { - rf.dataMu.Unlock() - return false, syserror.EPERM - } - - // Update the file size. - atomic.StoreUint64(&rf.size, newSize) - rf.dataMu.Unlock() - - // Invalidate past translations of truncated pages. - oldpgend := fs.OffsetPageEnd(int64(oldSize)) - newpgend := fs.OffsetPageEnd(int64(newSize)) - if newpgend < oldpgend { - rf.mapsMu.Lock() - rf.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{ - // Compare Linux's mm/shmem.c:shmem_setattr() => - // mm/memory.c:unmap_mapping_range(evencows=1). - InvalidatePrivate: true, - }) - rf.mapsMu.Unlock() - } - - // We are now guaranteed that there are no translations of truncated pages, - // and can remove them. - rf.dataMu.Lock() - rf.data.Truncate(newSize, rf.memFile) - rf.dataMu.Unlock() - return true, nil -} - -// AddMapping implements memmap.Mappable.AddMapping. -func (rf *regularFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { - rf.mapsMu.Lock() - defer rf.mapsMu.Unlock() - rf.dataMu.RLock() - defer rf.dataMu.RUnlock() - - // Reject writable mapping if F_SEAL_WRITE is set. - if rf.seals&linux.F_SEAL_WRITE != 0 && writable { - return syserror.EPERM - } - - rf.mappings.AddMapping(ms, ar, offset, writable) - if writable { - pagesBefore := rf.writableMappingPages - - // ar is guaranteed to be page aligned per memmap.Mappable. - rf.writableMappingPages += uint64(ar.Length() / usermem.PageSize) - - if rf.writableMappingPages < pagesBefore { - panic(fmt.Sprintf("Overflow while mapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, rf.writableMappingPages)) - } - } - - return nil -} - -// RemoveMapping implements memmap.Mappable.RemoveMapping. -func (rf *regularFile) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { - rf.mapsMu.Lock() - defer rf.mapsMu.Unlock() - - rf.mappings.RemoveMapping(ms, ar, offset, writable) - - if writable { - pagesBefore := rf.writableMappingPages - - // ar is guaranteed to be page aligned per memmap.Mappable. - rf.writableMappingPages -= uint64(ar.Length() / usermem.PageSize) - - if rf.writableMappingPages > pagesBefore { - panic(fmt.Sprintf("Underflow while unmapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, rf.writableMappingPages)) - } - } -} - -// CopyMapping implements memmap.Mappable.CopyMapping. -func (rf *regularFile) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { - return rf.AddMapping(ctx, ms, dstAR, offset, writable) -} - -// Translate implements memmap.Mappable.Translate. -func (rf *regularFile) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { - rf.dataMu.Lock() - defer rf.dataMu.Unlock() - - // Constrain translations to f.attr.Size (rounded up) to prevent - // translation to pages that may be concurrently truncated. - pgend := fs.OffsetPageEnd(int64(rf.size)) - var beyondEOF bool - if required.End > pgend { - if required.Start >= pgend { - return nil, &memmap.BusError{io.EOF} - } - beyondEOF = true - required.End = pgend - } - if optional.End > pgend { - optional.End = pgend - } - - cerr := rf.data.Fill(ctx, required, optional, rf.memFile, usage.Tmpfs, func(_ context.Context, dsts safemem.BlockSeq, _ uint64) (uint64, error) { - // Newly-allocated pages are zeroed, so we don't need to do anything. - return dsts.NumBytes(), nil - }) - - var ts []memmap.Translation - var translatedEnd uint64 - for seg := rf.data.FindSegment(required.Start); seg.Ok() && seg.Start() < required.End; seg, _ = seg.NextNonEmpty() { - segMR := seg.Range().Intersect(optional) - ts = append(ts, memmap.Translation{ - Source: segMR, - File: rf.memFile, - Offset: seg.FileRangeOf(segMR).Start, - Perms: usermem.AnyAccess, - }) - translatedEnd = segMR.End - } - - // Don't return the error returned by f.data.Fill if it occurred outside of - // required. - if translatedEnd < required.End && cerr != nil { - return ts, &memmap.BusError{cerr} - } - if beyondEOF { - return ts, &memmap.BusError{io.EOF} - } - return ts, nil -} - -// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. -func (*regularFile) InvalidateUnsavable(context.Context) error { - return nil -} - -type regularFileFD struct { - fileDescription - - // off is the file offset. off is accessed using atomic memory operations. - // offMu serializes operations that may mutate off. - off int64 - offMu sync.Mutex -} - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *regularFileFD) Release() { - // noop -} - -// PRead implements vfs.FileDescriptionImpl.PRead. -func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - if offset < 0 { - return 0, syserror.EINVAL - } - if dst.NumBytes() == 0 { - return 0, nil - } - f := fd.inode().impl.(*regularFile) - rw := getRegularFileReadWriter(f, offset) - n, err := dst.CopyOutFrom(ctx, rw) - putRegularFileReadWriter(rw) - return int64(n), err -} - -// Read implements vfs.FileDescriptionImpl.Read. -func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - fd.offMu.Lock() - n, err := fd.PRead(ctx, dst, fd.off, opts) - fd.off += n - fd.offMu.Unlock() - return n, err -} - -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - if offset < 0 { - return 0, syserror.EINVAL - } - srclen := src.NumBytes() - if srclen == 0 { - return 0, nil - } - f := fd.inode().impl.(*regularFile) - end := offset + srclen - if end < offset { - // Overflow. - return 0, syserror.EFBIG - } - f.inode.mu.Lock() - rw := getRegularFileReadWriter(f, offset) - n, err := src.CopyInTo(ctx, rw) - f.inode.mu.Unlock() - putRegularFileReadWriter(rw) - return n, err -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - fd.offMu.Lock() - n, err := fd.PWrite(ctx, src, fd.off, opts) - fd.off += n - fd.offMu.Unlock() - return n, err -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - fd.offMu.Lock() - defer fd.offMu.Unlock() - switch whence { - case linux.SEEK_SET: - // use offset as specified - case linux.SEEK_CUR: - offset += fd.off - case linux.SEEK_END: - offset += int64(atomic.LoadUint64(&fd.inode().impl.(*regularFile).size)) - default: - return 0, syserror.EINVAL - } - if offset < 0 { - return 0, syserror.EINVAL - } - fd.off = offset - return offset, nil -} - -// Sync implements vfs.FileDescriptionImpl.Sync. -func (fd *regularFileFD) Sync(ctx context.Context) error { - return nil -} - -// LockBSD implements vfs.FileDescriptionImpl.LockBSD. -func (fd *regularFileFD) LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error { - return fd.inode().lockBSD(uid, t, block) -} - -// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD. -func (fd *regularFileFD) UnlockBSD(ctx context.Context, uid lock.UniqueID) error { - fd.inode().unlockBSD(uid) - return nil -} - -// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. -func (fd *regularFileFD) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, rng lock.LockRange, block lock.Blocker) error { - return fd.inode().lockPOSIX(uid, t, rng, block) -} - -// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. -func (fd *regularFileFD) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, rng lock.LockRange) error { - fd.inode().unlockPOSIX(uid, rng) - return nil -} - -// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. -func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { - file := fd.inode().impl.(*regularFile) - return vfs.GenericConfigureMMap(&fd.vfsfd, file, opts) -} - -// regularFileReadWriter implements safemem.Reader and Safemem.Writer. -type regularFileReadWriter struct { - file *regularFile - - // Offset into the file to read/write at. Note that this may be - // different from the FD offset if PRead/PWrite is used. - off uint64 -} - -var regularFileReadWriterPool = sync.Pool{ - New: func() interface{} { - return ®ularFileReadWriter{} - }, -} - -func getRegularFileReadWriter(file *regularFile, offset int64) *regularFileReadWriter { - rw := regularFileReadWriterPool.Get().(*regularFileReadWriter) - rw.file = file - rw.off = uint64(offset) - return rw -} - -func putRegularFileReadWriter(rw *regularFileReadWriter) { - rw.file = nil - regularFileReadWriterPool.Put(rw) -} - -// ReadToBlocks implements safemem.Reader.ReadToBlocks. -func (rw *regularFileReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { - rw.file.dataMu.RLock() - defer rw.file.dataMu.RUnlock() - size := rw.file.size - - // Compute the range to read (limited by file size and overflow-checked). - if rw.off >= size { - return 0, io.EOF - } - end := size - if rend := rw.off + dsts.NumBytes(); rend > rw.off && rend < end { - end = rend - } - - var done uint64 - seg, gap := rw.file.data.Find(uint64(rw.off)) - for rw.off < end { - mr := memmap.MappableRange{uint64(rw.off), uint64(end)} - switch { - case seg.Ok(): - // Get internal mappings. - ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read) - if err != nil { - return done, err - } - - // Copy from internal mappings. - n, err := safemem.CopySeq(dsts, ims) - done += n - rw.off += uint64(n) - dsts = dsts.DropFirst64(n) - if err != nil { - return done, err - } - - // Continue. - seg, gap = seg.NextNonEmpty() - - case gap.Ok(): - // Tmpfs holes are zero-filled. - gapmr := gap.Range().Intersect(mr) - dst := dsts.TakeFirst64(gapmr.Length()) - n, err := safemem.ZeroSeq(dst) - done += n - rw.off += uint64(n) - dsts = dsts.DropFirst64(n) - if err != nil { - return done, err - } - - // Continue. - seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{} - } - } - return done, nil -} - -// WriteFromBlocks implements safemem.Writer.WriteFromBlocks. -// -// Preconditions: inode.mu must be held. -func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { - // Hold dataMu so we can modify size. - rw.file.dataMu.Lock() - defer rw.file.dataMu.Unlock() - - // Compute the range to write (overflow-checked). - end := rw.off + srcs.NumBytes() - if end <= rw.off { - end = math.MaxInt64 - } - - // Check if seals prevent either file growth or all writes. - switch { - case rw.file.seals&linux.F_SEAL_WRITE != 0: // Write sealed - return 0, syserror.EPERM - case end > rw.file.size && rw.file.seals&linux.F_SEAL_GROW != 0: // Grow sealed - // When growth is sealed, Linux effectively allows writes which would - // normally grow the file to partially succeed up to the current EOF, - // rounded down to the page boundary before the EOF. - // - // This happens because writes (and thus the growth check) for tmpfs - // files proceed page-by-page on Linux, and the final write to the page - // containing EOF fails, resulting in a partial write up to the start of - // that page. - // - // To emulate this behaviour, artifically truncate the write to the - // start of the page containing the current EOF. - // - // See Linux, mm/filemap.c:generic_perform_write() and - // mm/shmem.c:shmem_write_begin(). - if pgstart := uint64(usermem.Addr(rw.file.size).RoundDown()); end > pgstart { - end = pgstart - } - if end <= rw.off { - // Truncation would result in no data being written. - return 0, syserror.EPERM - } - } - - // Page-aligned mr for when we need to allocate memory. RoundUp can't - // overflow since end is an int64. - pgstartaddr := usermem.Addr(rw.off).RoundDown() - pgendaddr, _ := usermem.Addr(end).RoundUp() - pgMR := memmap.MappableRange{uint64(pgstartaddr), uint64(pgendaddr)} - - var ( - done uint64 - retErr error - ) - seg, gap := rw.file.data.Find(uint64(rw.off)) - for rw.off < end { - mr := memmap.MappableRange{uint64(rw.off), uint64(end)} - switch { - case seg.Ok(): - // Get internal mappings. - ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Write) - if err != nil { - retErr = err - goto exitLoop - } - - // Copy to internal mappings. - n, err := safemem.CopySeq(ims, srcs) - done += n - rw.off += uint64(n) - srcs = srcs.DropFirst64(n) - if err != nil { - retErr = err - goto exitLoop - } - - // Continue. - seg, gap = seg.NextNonEmpty() - - case gap.Ok(): - // Allocate memory for the write. - gapMR := gap.Range().Intersect(pgMR) - fr, err := rw.file.memFile.Allocate(gapMR.Length(), usage.Tmpfs) - if err != nil { - retErr = err - goto exitLoop - } - - // Write to that memory as usual. - seg, gap = rw.file.data.Insert(gap, gapMR, fr.Start), fsutil.FileRangeGapIterator{} - } - } -exitLoop: - // If the write ends beyond the file's previous size, it causes the - // file to grow. - if rw.off > rw.file.size { - rw.file.size = rw.off - } - - return done, retErr -} |