// Copyright 2019 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package gofer import ( "fmt" "io" "math" "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) func (d *dentry) isRegularFile() bool { return d.fileType() == linux.S_IFREG } type regularFileFD struct { fileDescription // off is the file offset. off is protected by mu. mu sync.Mutex off int64 } // Release implements vfs.FileDescriptionImpl.Release. func (fd *regularFileFD) Release() { } // OnClose implements vfs.FileDescriptionImpl.OnClose. func (fd *regularFileFD) OnClose(ctx context.Context) error { if !fd.vfsfd.IsWritable() { return nil } // Skip flushing if writes may be buffered by the client, since (as with // the VFS1 client) we don't flush buffered writes on close anyway. d := fd.dentry() if d.fs.opts.interop == InteropModeExclusive { return nil } d.handleMu.RLock() defer d.handleMu.RUnlock() return d.handle.file.flush(ctx) } // PRead implements vfs.FileDescriptionImpl.PRead. func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { if offset < 0 { return 0, syserror.EINVAL } if opts.Flags != 0 { return 0, syserror.EOPNOTSUPP } // Check for reading at EOF before calling into MM (but not under // InteropModeShared, which makes d.size unreliable). d := fd.dentry() if d.fs.opts.interop != InteropModeShared && uint64(offset) >= atomic.LoadUint64(&d.size) { return 0, io.EOF } if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { // Lock d.metadataMu for the rest of the read to prevent d.size from // changing. d.metadataMu.Lock() defer d.metadataMu.Unlock() // Write dirty cached pages that will be touched by the read back to // the remote file. if err := d.writeback(ctx, offset, dst.NumBytes()); err != nil { return 0, err } } rw := getDentryReadWriter(ctx, d, offset) if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { // Require the read to go to the remote file. rw.direct = true } n, err := dst.CopyOutFrom(ctx, rw) putDentryReadWriter(rw) if d.fs.opts.interop != InteropModeShared { // Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed(). d.touchAtime(ctx, fd.vfsfd.Mount()) } return n, err } // Read implements vfs.FileDescriptionImpl.Read. func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { fd.mu.Lock() n, err := fd.PRead(ctx, dst, fd.off, opts) fd.off += n fd.mu.Unlock() return n, err } // PWrite implements vfs.FileDescriptionImpl.PWrite. func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { if offset < 0 { return 0, syserror.EINVAL } if opts.Flags != 0 { return 0, syserror.EOPNOTSUPP } d := fd.dentry() d.metadataMu.Lock() defer d.metadataMu.Unlock() if d.fs.opts.interop != InteropModeShared { // Compare Linux's mm/filemap.c:__generic_file_write_iter() => // file_update_time(). This is d.touchCMtime(), but without locking // d.metadataMu (recursively). if now, ok := nowFromContext(ctx); ok { atomic.StoreInt64(&d.mtime, now) atomic.StoreInt64(&d.ctime, now) } } if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { // Write dirty cached pages that will be touched by the write back to // the remote file. if err := d.writeback(ctx, offset, src.NumBytes()); err != nil { return 0, err } // Remove touched pages from the cache. pgstart := pageRoundDown(uint64(offset)) pgend := pageRoundUp(uint64(offset + src.NumBytes())) if pgend < pgstart { return 0, syserror.EINVAL } mr := memmap.MappableRange{pgstart, pgend} var freed []platform.FileRange d.dataMu.Lock() cseg := d.cache.LowerBoundSegment(mr.Start) for cseg.Ok() && cseg.Start() < mr.End { cseg = d.cache.Isolate(cseg, mr) freed = append(freed, platform.FileRange{cseg.Value(), cseg.Value() + cseg.Range().Length()}) cseg = d.cache.Remove(cseg).NextSegment() } d.dataMu.Unlock() // Invalidate mappings of removed pages. d.mapsMu.Lock() d.mappings.Invalidate(mr, memmap.InvalidateOpts{}) d.mapsMu.Unlock() // Finally free pages removed from the cache. mf := d.fs.mfp.MemoryFile() for _, freedFR := range freed { mf.DecRef(freedFR) } } rw := getDentryReadWriter(ctx, d, offset) if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { // Require the write to go to the remote file. rw.direct = true } n, err := src.CopyInTo(ctx, rw) putDentryReadWriter(rw) if n != 0 && fd.vfsfd.StatusFlags()&(linux.O_DSYNC|linux.O_SYNC) != 0 { // Write dirty cached pages touched by the write back to the remote // file. if err := d.writeback(ctx, offset, src.NumBytes()); err != nil { return 0, err } // Request the remote filesystem to sync the remote file. if err := d.handle.file.fsync(ctx); err != nil { return 0, err } } return n, err } // Write implements vfs.FileDescriptionImpl.Write. func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { fd.mu.Lock() n, err := fd.PWrite(ctx, src, fd.off, opts) fd.off += n fd.mu.Unlock() return n, err } type dentryReadWriter struct { ctx context.Context d *dentry off uint64 direct bool } var dentryReadWriterPool = sync.Pool{ New: func() interface{} { return &dentryReadWriter{} }, } func getDentryReadWriter(ctx context.Context, d *dentry, offset int64) *dentryReadWriter { rw := dentryReadWriterPool.Get().(*dentryReadWriter) rw.ctx = ctx rw.d = d rw.off = uint64(offset) rw.direct = false return rw } func putDentryReadWriter(rw *dentryReadWriter) { rw.ctx = nil rw.d = nil dentryReadWriterPool.Put(rw) } // ReadToBlocks implements safemem.Reader.ReadToBlocks. func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { if dsts.IsEmpty() { return 0, nil } // If we have a mmappable host FD (which must be used here to ensure // coherence with memory-mapped I/O), or if InteropModeShared is in effect // (which prevents us from caching file contents and makes dentry.size // unreliable), or if the file was opened O_DIRECT, read directly from // dentry.handle without locking dentry.dataMu. rw.d.handleMu.RLock() if (rw.d.handle.fd >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct { n, err := rw.d.handle.readToBlocksAt(rw.ctx, dsts, rw.off) rw.d.handleMu.RUnlock() rw.off += n return n, err } // Otherwise read from/through the cache. mf := rw.d.fs.mfp.MemoryFile() fillCache := mf.ShouldCacheEvictable() var dataMuUnlock func() if fillCache { rw.d.dataMu.Lock() dataMuUnlock = rw.d.dataMu.Unlock } else { rw.d.dataMu.RLock() dataMuUnlock = rw.d.dataMu.RUnlock } // Compute the range to read (limited by file size and overflow-checked). if rw.off >= rw.d.size { dataMuUnlock() rw.d.handleMu.RUnlock() return 0, io.EOF } end := rw.d.size if rend := rw.off + dsts.NumBytes(); rend > rw.off && rend < end { end = rend } var done uint64 seg, gap := rw.d.cache.Find(rw.off) for rw.off < end { mr := memmap.MappableRange{rw.off, end} switch { case seg.Ok(): // Get internal mappings from the cache. ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read) if err != nil { dataMuUnlock() rw.d.handleMu.RUnlock() return done, err } // Copy from internal mappings. n, err := safemem.CopySeq(dsts, ims) done += n rw.off += n dsts = dsts.DropFirst64(n) if err != nil { dataMuUnlock() rw.d.handleMu.RUnlock() return done, err } // Continue. seg, gap = seg.NextNonEmpty() case gap.Ok(): gapMR := gap.Range().Intersect(mr) if fillCache { // Read into the cache, then re-enter the loop to read from the // cache. reqMR := memmap.MappableRange{ Start: pageRoundDown(gapMR.Start), End: pageRoundUp(gapMR.End), } optMR := gap.Range() err := rw.d.cache.Fill(rw.ctx, reqMR, maxFillRange(reqMR, optMR), mf, usage.PageCache, rw.d.handle.readToBlocksAt) mf.MarkEvictable(rw.d, pgalloc.EvictableRange{optMR.Start, optMR.End}) seg, gap = rw.d.cache.Find(rw.off) if !seg.Ok() { dataMuUnlock() rw.d.handleMu.RUnlock() return done, err } // err might have occurred in part of gap.Range() outside // gapMR. Forget about it for now; if the error matters and // persists, we'll run into it again in a later iteration of // this loop. } else { // Read directly from the file. gapDsts := dsts.TakeFirst64(gapMR.Length()) n, err := rw.d.handle.readToBlocksAt(rw.ctx, gapDsts, gapMR.Start) done += n rw.off += n dsts = dsts.DropFirst64(n) // Partial reads are fine. But we must stop reading. if n != gapDsts.NumBytes() || err != nil { dataMuUnlock() rw.d.handleMu.RUnlock() return done, err } // Continue. seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{} } } } dataMuUnlock() rw.d.handleMu.RUnlock() return done, nil } // WriteFromBlocks implements safemem.Writer.WriteFromBlocks. // // Preconditions: rw.d.metadataMu must be locked. func (rw *dentryReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { if srcs.IsEmpty() { return 0, nil } // If we have a mmappable host FD (which must be used here to ensure // coherence with memory-mapped I/O), or if InteropModeShared is in effect // (which prevents us from caching file contents), or if the file was // opened with O_DIRECT, write directly to dentry.handle without locking // dentry.dataMu. rw.d.handleMu.RLock() if (rw.d.handle.fd >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct { n, err := rw.d.handle.writeFromBlocksAt(rw.ctx, srcs, rw.off) rw.d.handleMu.RUnlock() rw.off += n return n, err } // Otherwise write to/through the cache. mf := rw.d.fs.mfp.MemoryFile() rw.d.dataMu.Lock() // Compute the range to write (overflow-checked). start := rw.off end := rw.off + srcs.NumBytes() if end <= rw.off { end = math.MaxInt64 } var ( done uint64 retErr error ) seg, gap := rw.d.cache.Find(rw.off) for rw.off < end { mr := memmap.MappableRange{rw.off, end} switch { case seg.Ok(): // Get internal mappings from the cache. segMR := seg.Range().Intersect(mr) ims, err := mf.MapInternal(seg.FileRangeOf(segMR), usermem.Write) if err != nil { retErr = err goto exitLoop } // Copy to internal mappings. n, err := safemem.CopySeq(ims, srcs) done += n rw.off += n srcs = srcs.DropFirst64(n) rw.d.dirty.MarkDirty(segMR) if err != nil { retErr = err goto exitLoop } // Continue. seg, gap = seg.NextNonEmpty() case gap.Ok(): // Write directly to the file. At present, we never fill the cache // when writing, since doing so can convert small writes into // inefficient read-modify-write cycles, and we have no mechanism // for detecting or avoiding this. gapMR := gap.Range().Intersect(mr) gapSrcs := srcs.TakeFirst64(gapMR.Length()) n, err := rw.d.handle.writeFromBlocksAt(rw.ctx, gapSrcs, gapMR.Start) done += n rw.off += n srcs = srcs.DropFirst64(n) // Partial writes are fine. But we must stop writing. if n != gapSrcs.NumBytes() || err != nil { retErr = err goto exitLoop } // Continue. seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{} } } exitLoop: if rw.off > rw.d.size { atomic.StoreUint64(&rw.d.size, rw.off) // The remote file's size will implicitly be extended to the correct // value when we write back to it. } // If InteropModeWritethrough is in effect, flush written data back to the // remote filesystem. if rw.d.fs.opts.interop == InteropModeWritethrough && done != 0 { if err := fsutil.SyncDirty(rw.ctx, memmap.MappableRange{ Start: start, End: rw.off, }, &rw.d.cache, &rw.d.dirty, rw.d.size, mf, rw.d.handle.writeFromBlocksAt); err != nil { // We have no idea how many bytes were actually flushed. rw.off = start done = 0 retErr = err } } rw.d.dataMu.Unlock() rw.d.handleMu.RUnlock() return done, retErr } func (d *dentry) writeback(ctx context.Context, offset, size int64) error { if size == 0 { return nil } d.handleMu.RLock() defer d.handleMu.RUnlock() d.dataMu.Lock() defer d.dataMu.Unlock() // Compute the range of valid bytes (overflow-checked). if uint64(offset) >= d.size { return nil } end := int64(d.size) if rend := offset + size; rend > offset && rend < end { end = rend } return fsutil.SyncDirty(ctx, memmap.MappableRange{ Start: uint64(offset), End: uint64(end), }, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt) } // Seek implements vfs.FileDescriptionImpl.Seek. func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { fd.mu.Lock() defer fd.mu.Unlock() switch whence { case linux.SEEK_SET: // Use offset as specified. case linux.SEEK_CUR: offset += fd.off case linux.SEEK_END, linux.SEEK_DATA, linux.SEEK_HOLE: // Ensure file size is up to date. d := fd.dentry() if fd.filesystem().opts.interop == InteropModeShared { if err := d.updateFromGetattr(ctx); err != nil { return 0, err } } size := int64(atomic.LoadUint64(&d.size)) // For SEEK_DATA and SEEK_HOLE, treat the file as a single contiguous // block of data. switch whence { case linux.SEEK_END: offset += size case linux.SEEK_DATA: if offset > size { return 0, syserror.ENXIO } // Use offset as specified. case linux.SEEK_HOLE: if offset > size { return 0, syserror.ENXIO } offset = size } default: return 0, syserror.EINVAL } if offset < 0 { return 0, syserror.EINVAL } fd.off = offset return offset, nil } // Sync implements vfs.FileDescriptionImpl.Sync. func (fd *regularFileFD) Sync(ctx context.Context) error { return fd.dentry().syncSharedHandle(ctx) } func (d *dentry) syncSharedHandle(ctx context.Context) error { d.handleMu.RLock() if !d.handleWritable { d.handleMu.RUnlock() return nil } d.dataMu.Lock() // Write dirty cached data to the remote file. err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt) d.dataMu.Unlock() if err == nil { // Sync the remote file. err = d.handle.sync(ctx) } d.handleMu.RUnlock() return err } // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { d := fd.dentry() switch d.fs.opts.interop { case InteropModeExclusive: // Any mapping is fine. case InteropModeWritethrough: // Shared writable mappings require a host FD, since otherwise we can't // synchronously flush memory-mapped writes to the remote file. if opts.Private || !opts.MaxPerms.Write { break } fallthrough case InteropModeShared: // All mappings require a host FD to be coherent with other filesystem // users. if d.fs.opts.forcePageCache { // Whether or not we have a host FD, we're not allowed to use it. return syserror.ENODEV } d.handleMu.RLock() haveFD := d.handle.fd >= 0 d.handleMu.RUnlock() if !haveFD { return syserror.ENODEV } default: panic(fmt.Sprintf("unknown InteropMode %v", d.fs.opts.interop)) } return vfs.GenericConfigureMMap(&fd.vfsfd, d, opts) } func (d *dentry) mayCachePages() bool { if d.fs.opts.interop == InteropModeShared { return false } if d.fs.opts.forcePageCache { return true } d.handleMu.RLock() haveFD := d.handle.fd >= 0 d.handleMu.RUnlock() return haveFD } // AddMapping implements memmap.Mappable.AddMapping. func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { d.mapsMu.Lock() mapped := d.mappings.AddMapping(ms, ar, offset, writable) // Do this unconditionally since whether we have a host FD can change // across save/restore. for _, r := range mapped { d.pf.hostFileMapper.IncRefOn(r) } if d.mayCachePages() { // d.Evict() will refuse to evict memory-mapped pages, so tell the // MemoryFile to not bother trying. mf := d.fs.mfp.MemoryFile() for _, r := range mapped { mf.MarkUnevictable(d, pgalloc.EvictableRange{r.Start, r.End}) } } d.mapsMu.Unlock() return nil } // RemoveMapping implements memmap.Mappable.RemoveMapping. func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { d.mapsMu.Lock() unmapped := d.mappings.RemoveMapping(ms, ar, offset, writable) for _, r := range unmapped { d.pf.hostFileMapper.DecRefOn(r) } if d.mayCachePages() { // Pages that are no longer referenced by any application memory // mappings are now considered unused; allow MemoryFile to evict them // when necessary. mf := d.fs.mfp.MemoryFile() d.dataMu.Lock() for _, r := range unmapped { // Since these pages are no longer mapped, they are no longer // concurrently dirtyable by a writable memory mapping. d.dirty.AllowClean(r) mf.MarkEvictable(d, pgalloc.EvictableRange{r.Start, r.End}) } d.dataMu.Unlock() } d.mapsMu.Unlock() } // CopyMapping implements memmap.Mappable.CopyMapping. func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { return d.AddMapping(ctx, ms, dstAR, offset, writable) } // Translate implements memmap.Mappable.Translate. func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { d.handleMu.RLock() if d.handle.fd >= 0 && !d.fs.opts.forcePageCache { d.handleMu.RUnlock() mr := optional if d.fs.opts.limitHostFDTranslation { mr = maxFillRange(required, optional) } return []memmap.Translation{ { Source: mr, File: &d.pf, Offset: mr.Start, Perms: usermem.AnyAccess, }, }, nil } d.dataMu.Lock() // Constrain translations to d.size (rounded up) to prevent translation to // pages that may be concurrently truncated. pgend := pageRoundUp(d.size) var beyondEOF bool if required.End > pgend { if required.Start >= pgend { d.dataMu.Unlock() d.handleMu.RUnlock() return nil, &memmap.BusError{io.EOF} } beyondEOF = true required.End = pgend } if optional.End > pgend { optional.End = pgend } mf := d.fs.mfp.MemoryFile() cerr := d.cache.Fill(ctx, required, maxFillRange(required, optional), mf, usage.PageCache, d.handle.readToBlocksAt) var ts []memmap.Translation var translatedEnd uint64 for seg := d.cache.FindSegment(required.Start); seg.Ok() && seg.Start() < required.End; seg, _ = seg.NextNonEmpty() { segMR := seg.Range().Intersect(optional) // TODO(jamieliu): Make Translations writable even if writability is // not required if already kept-dirty by another writable translation. perms := usermem.AccessType{ Read: true, Execute: true, } if at.Write { // From this point forward, this memory can be dirtied through the // mapping at any time. d.dirty.KeepDirty(segMR) perms.Write = true } ts = append(ts, memmap.Translation{ Source: segMR, File: mf, Offset: seg.FileRangeOf(segMR).Start, Perms: perms, }) translatedEnd = segMR.End } d.dataMu.Unlock() d.handleMu.RUnlock() // Don't return the error returned by c.cache.Fill if it occurred outside // of required. if translatedEnd < required.End && cerr != nil { return ts, &memmap.BusError{cerr} } if beyondEOF { return ts, &memmap.BusError{io.EOF} } return ts, nil } func maxFillRange(required, optional memmap.MappableRange) memmap.MappableRange { const maxReadahead = 64 << 10 // 64 KB, chosen arbitrarily if required.Length() >= maxReadahead { return required } if optional.Length() <= maxReadahead { return optional } optional.Start = required.Start if optional.Length() <= maxReadahead { return optional } optional.End = optional.Start + maxReadahead return optional } // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. func (d *dentry) InvalidateUnsavable(ctx context.Context) error { // Whether we have a host fd (and consequently what platform.File is // mapped) can change across save/restore, so invalidate all translations // unconditionally. d.mapsMu.Lock() defer d.mapsMu.Unlock() d.mappings.InvalidateAll(memmap.InvalidateOpts{}) // Write the cache's contents back to the remote file so that if we have a // host fd after restore, the remote file's contents are coherent. mf := d.fs.mfp.MemoryFile() d.dataMu.Lock() defer d.dataMu.Unlock() if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil { return err } // Discard the cache so that it's not stored in saved state. This is safe // because per InvalidateUnsavable invariants, no new translations can have // been returned after we invalidated all existing translations above. d.cache.DropAll(mf) d.dirty.RemoveAll() return nil } // Evict implements pgalloc.EvictableMemoryUser.Evict. func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) { d.mapsMu.Lock() defer d.mapsMu.Unlock() d.dataMu.Lock() defer d.dataMu.Unlock() mr := memmap.MappableRange{er.Start, er.End} mf := d.fs.mfp.MemoryFile() // Only allow pages that are no longer memory-mapped to be evicted. for mgap := d.mappings.LowerBoundGap(mr.Start); mgap.Ok() && mgap.Start() < mr.End; mgap = mgap.NextGap() { mgapMR := mgap.Range().Intersect(mr) if mgapMR.Length() == 0 { continue } if err := fsutil.SyncDirty(ctx, mgapMR, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil { log.Warningf("Failed to writeback cached data %v: %v", mgapMR, err) } d.cache.Drop(mgapMR, mf) d.dirty.KeepClean(mgapMR) } } // dentryPlatformFile implements platform.File. It exists solely because dentry // cannot implement both vfs.DentryImpl.IncRef and platform.File.IncRef. // // dentryPlatformFile is only used when a host FD representing the remote file // is available (i.e. dentry.handle.fd >= 0), and that FD is used for // application memory mappings (i.e. !filesystem.opts.forcePageCache). type dentryPlatformFile struct { *dentry // fdRefs counts references on platform.File offsets. fdRefs is protected // by dentry.dataMu. fdRefs fsutil.FrameRefSet // If this dentry represents a regular file, and handle.fd >= 0, // hostFileMapper caches mappings of handle.fd. hostFileMapper fsutil.HostFileMapper } // IncRef implements platform.File.IncRef. func (d *dentryPlatformFile) IncRef(fr platform.FileRange) { d.dataMu.Lock() seg, gap := d.fdRefs.Find(fr.Start) for { switch { case seg.Ok() && seg.Start() < fr.End: seg = d.fdRefs.Isolate(seg, fr) seg.SetValue(seg.Value() + 1) seg, gap = seg.NextNonEmpty() case gap.Ok() && gap.Start() < fr.End: newRange := gap.Range().Intersect(fr) usage.MemoryAccounting.Inc(newRange.Length(), usage.Mapped) seg, gap = d.fdRefs.InsertWithoutMerging(gap, newRange, 1).NextNonEmpty() default: d.fdRefs.MergeAdjacent(fr) d.dataMu.Unlock() return } } } // DecRef implements platform.File.DecRef. func (d *dentryPlatformFile) DecRef(fr platform.FileRange) { d.dataMu.Lock() seg := d.fdRefs.FindSegment(fr.Start) for seg.Ok() && seg.Start() < fr.End { seg = d.fdRefs.Isolate(seg, fr) if old := seg.Value(); old == 1 { usage.MemoryAccounting.Dec(seg.Range().Length(), usage.Mapped) seg = d.fdRefs.Remove(seg).NextSegment() } else { seg.SetValue(old - 1) seg = seg.NextSegment() } } d.fdRefs.MergeAdjacent(fr) d.dataMu.Unlock() } // MapInternal implements platform.File.MapInternal. func (d *dentryPlatformFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { d.handleMu.RLock() bs, err := d.hostFileMapper.MapInternal(fr, int(d.handle.fd), at.Write) d.handleMu.RUnlock() return bs, err } // FD implements platform.File.FD. func (d *dentryPlatformFile) FD() int { d.handleMu.RLock() fd := d.handle.fd d.handleMu.RUnlock() return int(fd) }