tinymist_vfs/
lib.rs

1//! upstream of following files <https://github.com/rust-lang/rust-analyzer/tree/master/crates/vfs>
2//!   ::path_interner.rs -> path_interner.rs
3
4/// Provides ProxyAccessModel that makes access to JavaScript objects for
5/// browser compilation.
6#[cfg(feature = "browser")]
7pub mod browser;
8
9/// Provides SystemAccessModel that makes access to the local file system for
10/// system compilation.
11#[cfg(feature = "system")]
12pub mod system;
13
14/// Provides dummy access model.
15///
16/// Note: we can still perform compilation with dummy access model, since
17/// [`Vfs`] will make a overlay access model over the provided dummy access
18/// model.
19pub mod dummy;
20
21/// Provides mock access models and in-memory workspaces for tests.
22#[cfg(any(test, feature = "mock"))]
23pub mod mock;
24
25/// Provides snapshot models
26pub mod snapshot;
27pub use snapshot::*;
28use tinymist_std::hash::{FxDashMap, FxHashMap};
29
30/// Provides notify access model which retrieves file system events and changes
31/// from some notify backend.
32pub mod notify;
33pub use notify::{FilesystemEvent, MemoryEvent};
34/// Provides overlay access model which allows to shadow the underlying access
35/// model with memory contents.
36pub mod overlay;
37/// Provides resolve access model.
38pub mod resolve;
39/// Provides trace access model which traces the underlying access model.
40pub mod trace;
41mod utils;
42
43mod path_mapper;
44pub use path_mapper::{PathResolution, RootResolver, WorkspaceResolution, WorkspaceResolver};
45
46use core::fmt;
47use std::num::NonZeroUsize;
48use std::sync::OnceLock;
49use std::{path::Path, sync::Arc};
50
51use ecow::EcoVec;
52use parking_lot::Mutex;
53use rpds::RedBlackTreeMapSync;
54use typst::diag::{FileError, FileResult};
55use typst::foundations::Dict;
56use typst::syntax::Source;
57use typst::utils::LazyHash;
58
59use crate::notify::NotifyAccessModel;
60use crate::overlay::OverlayAccessModel;
61use crate::resolve::ResolveAccessModel;
62
63pub use tinymist_std::ImmutPath;
64pub use tinymist_std::time::Time;
65pub use typst::foundations::Bytes;
66pub use typst::syntax::FileId;
67
68/// Immutable prehashed reference to dictionary.
69pub type ImmutDict = Arc<LazyHash<Dict>>;
70
71/// A trait for accessing underlying file system.
72///
73/// This trait is simplified by [`Vfs`] and requires a minimal method set for
74/// typst compilation.
75pub trait PathAccessModel {
76    /// Clears the cache of the access model.
77    ///
78    /// This is called when the vfs is reset. See [`Vfs`]'s reset method for
79    /// more information.
80    fn reset(&mut self) {}
81
82    /// Returns the content of a file entry.
83    fn content(&self, src: &Path) -> FileResult<Bytes>;
84}
85
86/// A trait for accessing underlying file system.
87///
88/// This trait is simplified by [`Vfs`] and requires a minimal method set for
89/// typst compilation.
90pub trait AccessModel {
91    /// Clears the cache of the access model.
92    ///
93    /// This is called when the vfs is reset. See [`Vfs`]'s reset method for
94    /// more information.
95    fn reset(&mut self) {}
96
97    /// Returns the content of a file entry.
98    fn content(&self, src: FileId) -> (Option<ImmutPath>, FileResult<Bytes>);
99}
100
101type VfsPathAccessModel<M> = OverlayAccessModel<ImmutPath, NotifyAccessModel<M>>;
102/// we add notify access model here since notify access model doesn't introduce
103/// overheads by our observation
104type VfsAccessModel<M> = OverlayAccessModel<FileId, ResolveAccessModel<VfsPathAccessModel<M>>>;
105
106/// A trait to perform file system query.
107pub trait FsProvider {
108    /// Gets the file path corresponding to the given `id`.
109    fn file_path(&self, id: FileId) -> FileResult<PathResolution>;
110    /// Gets the file content corresponding to the given `id`.
111    fn read(&self, id: FileId) -> FileResult<Bytes>;
112    /// Gets the source code corresponding to the given `id`. It is preferred to
113    /// be used for source files so that parsing is reused across editions.
114    fn read_source(&self, id: FileId) -> FileResult<Source>;
115}
116
117struct SourceEntry {
118    last_accessed_rev: NonZeroUsize,
119    source: FileResult<Source>,
120}
121
122#[derive(Default)]
123struct SourceIdShard {
124    last_accessed_rev: usize,
125    recent_source: Option<Source>,
126    sources: FxHashMap<Bytes, SourceEntry>,
127}
128
129/// A source cache shared across VFS.
130#[derive(Default, Clone)]
131pub struct SourceCache {
132    /// The cache entries for each paths
133    cache_entries: Arc<FxDashMap<FileId, SourceIdShard>>,
134}
135
136impl SourceCache {
137    /// Evicts cache, given a current revision `curr`, and a threshold. The too
138    /// old cache entries will be evicted from the cache.
139    pub fn evict(&self, curr: NonZeroUsize, threshold: usize) {
140        self.cache_entries.retain(|_, shard| {
141            let diff = curr.get().saturating_sub(shard.last_accessed_rev);
142            if diff > threshold {
143                return false;
144            }
145
146            shard.sources.retain(|_, entry| {
147                let diff = curr.get().saturating_sub(entry.last_accessed_rev.get());
148                diff <= threshold
149            });
150
151            true
152        });
153    }
154}
155
156/// Creates a new `Vfs` harnessing over the given `access_model` specific for
157/// `reflexo_world::CompilerWorld`. With vfs, we can minimize the
158/// implementation overhead for [`AccessModel`] trait.
159pub struct Vfs<M: PathAccessModel + Sized> {
160    source_cache: SourceCache,
161    managed: Arc<Mutex<EntryMap>>,
162    paths: Arc<Mutex<PathMap>>,
163    revision: NonZeroUsize,
164    // access_model: TraceAccessModel<VfsAccessModel<M>>,
165    /// The wrapped access model.
166    access_model: VfsAccessModel<M>,
167}
168
169impl<M: PathAccessModel + Sized> fmt::Debug for Vfs<M> {
170    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171        f.debug_struct("Vfs")
172            .field("revision", &self.revision)
173            .field("managed", &self.managed.lock().entries.size())
174            .field("paths", &self.paths.lock().paths.len())
175            .finish()
176    }
177}
178
179impl<M: PathAccessModel + Clone + Sized> Vfs<M> {
180    /// Gets current revision of the vfs.
181    pub fn revision(&self) -> NonZeroUsize {
182        self.revision
183    }
184
185    /// Performs snapshot with sharing cache and managed resource.
186    pub fn snapshot(&self) -> Self {
187        Self {
188            revision: self.revision,
189            source_cache: self.source_cache.clone(),
190            managed: self.managed.clone(),
191            paths: self.paths.clone(),
192            access_model: self.access_model.clone(),
193        }
194    }
195
196    /// Performs snapshot with sharing cache, but not the resources.
197    pub fn fork(&self) -> Self {
198        Self {
199            // todo: it is not correct to merely share source cache.
200            source_cache: self.source_cache.clone(),
201            managed: Arc::new(Mutex::new(EntryMap::default())),
202            paths: Arc::new(Mutex::new(PathMap::default())),
203            revision: NonZeroUsize::new(2).expect("initial revision is 2"),
204            access_model: self.access_model.clone(),
205        }
206    }
207
208    /// Detects whether the vfs is clean respecting a given revision and
209    /// `file_ids`.
210    pub fn is_clean_compile(&self, rev: usize, file_ids: &[FileId]) -> bool {
211        let mut m = self.managed.lock();
212        for id in file_ids {
213            let Some(entry) = m.entries.get_mut(id) else {
214                log::debug!("Vfs(dirty, {id:?}): file id not found");
215                return false;
216            };
217            if entry.changed_at > rev {
218                log::debug!("Vfs(dirty, {id:?}): rev {rev:?} => {:?}", entry.changed_at);
219                return false;
220            }
221            log::debug!(
222                "Vfs(clean, {id:?}, rev={rev}, changed_at={})",
223                entry.changed_at
224            );
225        }
226        true
227    }
228}
229
230impl<M: PathAccessModel + Sized> Vfs<M> {
231    /// Creates a new `Vfs` with a given `access_model`.
232    ///
233    /// Retrieving an [`AccessModel`], it will further wrap the access model
234    /// with [`OverlayAccessModel`] and [`NotifyAccessModel`]. This means that
235    /// you don't need to implement:
236    /// + overlay: allowing to shadow the underlying access model with memory
237    ///   contents, which is useful for a limited execution environment and
238    ///   instrumenting or overriding source files or packages.
239    /// + notify: regards problems of synchronizing with the file system when
240    ///   the vfs is watching the file system.
241    ///
242    /// See [`AccessModel`] for more information.
243    pub fn new(resolver: Arc<dyn RootResolver + Send + Sync>, access_model: M) -> Self {
244        let access_model = NotifyAccessModel::new(access_model);
245        let access_model = OverlayAccessModel::new(access_model);
246        let access_model = ResolveAccessModel {
247            resolver,
248            inner: access_model,
249        };
250        let access_model = OverlayAccessModel::new(access_model);
251
252        // If you want to trace the access model, uncomment the following line
253        // let access_model = TraceAccessModel::new(access_model);
254
255        Self {
256            source_cache: SourceCache::default(),
257            managed: Arc::default(),
258            paths: Arc::default(),
259            revision: NonZeroUsize::new(2).expect("initial revision is 2"),
260            access_model,
261        }
262    }
263
264    /// Resets all state.
265    pub fn reset_all(&mut self) {
266        self.reset_access_model();
267        self.reset_read();
268        self.take_source_cache();
269    }
270
271    /// Resets access model.
272    pub fn reset_access_model(&mut self) {
273        self.access_model.reset();
274    }
275
276    /// Resets all read caches. This can happen when:
277    /// - package paths are reconfigured.
278    /// - The root of the workspace is switched.
279    pub fn reset_read(&mut self) {
280        self.managed = Arc::default();
281        self.paths = Arc::default();
282    }
283
284    /// Clears the cache that is not touched for a long time.
285    pub fn evict(&mut self, threshold: usize) {
286        let mut m = self.managed.lock();
287        let rev = self.revision.get();
288        for (id, entry) in m.entries.clone().iter() {
289            let entry_rev = entry.bytes.get().map(|e| e.1).unwrap_or_default();
290            if entry_rev + threshold < rev {
291                m.entries.remove_mut(id);
292            }
293        }
294    }
295
296    /// Takes source cache. It also cleans the cache in the current vfs.
297    pub fn take_source_cache(&mut self) -> SourceCache {
298        std::mem::take(&mut self.source_cache)
299    }
300
301    /// Takes source cache for sharing.
302    pub fn clone_source_cache(&self) -> SourceCache {
303        self.source_cache.clone()
304    }
305
306    /// Resolve the real path for a file id.
307    pub fn file_path(&self, id: FileId) -> Result<PathResolution, FileError> {
308        self.access_model.inner.resolver.path_for_id(id)
309    }
310
311    /// Get paths to all the shadowing paths in [`OverlayAccessModel`].
312    pub fn shadow_paths(&self) -> Vec<ImmutPath> {
313        self.access_model.inner.inner.file_paths()
314    }
315
316    /// Get paths to all the shadowing file ids in [`OverlayAccessModel`].
317    ///
318    /// The in memory untitled files can have no path so
319    /// they only have file ids.
320    pub fn shadow_ids(&self) -> Vec<FileId> {
321        self.access_model.file_paths()
322    }
323
324    /// Returns the overall memory usage for the stored files.
325    pub fn memory_usage(&self) -> usize {
326        0
327    }
328
329    /// Obtains an object to revise. The object will update the original vfs
330    /// when it is dropped.
331    pub fn revise(&mut self) -> RevisingVfs<'_, M> {
332        let managed = self.managed.lock().clone();
333        let paths = self.paths.lock().clone();
334        let goal_revision = self.revision.checked_add(1).expect("revision overflowed");
335
336        RevisingVfs {
337            managed,
338            paths,
339            inner: self,
340            goal_revision,
341            view_changed: false,
342        }
343    }
344
345    /// Obtains an object to display.
346    pub fn display(&self) -> DisplayVfs<'_, M> {
347        DisplayVfs { inner: self }
348    }
349
350    /// Reads a file by id.
351    pub fn read(&self, fid: FileId) -> FileResult<Bytes> {
352        let bytes = self.managed.lock().slot(fid, |entry| entry.bytes.clone());
353
354        self.read_content(&bytes, fid).clone()
355    }
356
357    /// Reads a source file by id. It is preferred to be used for source files
358    /// so that parsing is reused across editions.
359    pub fn source(&self, file_id: FileId) -> FileResult<Source> {
360        let (bytes, source) = self
361            .managed
362            .lock()
363            .slot(file_id, |entry| (entry.bytes.clone(), entry.source.clone()));
364
365        let source = source.get_or_init(|| {
366            let content = self
367                .read_content(&bytes, file_id)
368                .as_ref()
369                .map_err(Clone::clone)?;
370
371            let mut cache_entry = self.source_cache.cache_entries.entry(file_id).or_default();
372            if let Some(source) = cache_entry.sources.get(content) {
373                return source.source.clone();
374            }
375
376            let source = (|| {
377                let prev = cache_entry.recent_source.clone();
378                let content = from_utf8_or_bom(content).map_err(|_| FileError::InvalidUtf8)?;
379
380                let next = match prev {
381                    Some(mut prev) => {
382                        prev.replace(content);
383                        prev
384                    }
385                    None => Source::new(file_id, content.to_owned()),
386                };
387
388                let should_update = cache_entry.recent_source.is_none()
389                    || cache_entry.last_accessed_rev < self.revision.get();
390                if should_update {
391                    cache_entry.recent_source = Some(next.clone());
392                }
393
394                Ok(next)
395            })();
396
397            let entry = cache_entry
398                .sources
399                .entry(content.clone())
400                .or_insert_with(|| SourceEntry {
401                    last_accessed_rev: self.revision,
402                    source: source.clone(),
403                });
404
405            if entry.last_accessed_rev < self.revision {
406                entry.last_accessed_rev = self.revision;
407            }
408
409            source
410        });
411
412        source.clone()
413    }
414
415    /// Reads and caches content of a file.
416    fn read_content<'a>(&self, bytes: &'a BytesQuery, fid: FileId) -> &'a FileResult<Bytes> {
417        &bytes
418            .get_or_init(|| {
419                let (path, content) = self.access_model.content(fid);
420                if let Some(path) = path.as_ref() {
421                    self.paths.lock().insert(path, fid, self.revision);
422                }
423
424                (path, self.revision.get(), content)
425            })
426            .2
427    }
428}
429
430/// A display wrapper for [`Vfs`].
431pub struct DisplayVfs<'a, M: PathAccessModel + Sized> {
432    inner: &'a Vfs<M>,
433}
434
435impl<M: PathAccessModel + Sized> fmt::Debug for DisplayVfs<'_, M> {
436    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
437        f.debug_struct("Vfs")
438            .field("revision", &self.inner.revision)
439            .field("managed", &self.inner.managed.lock().display())
440            .field("paths", &self.inner.paths.lock().display())
441            .finish()
442    }
443}
444
445/// A revising wrapper for [`Vfs`].
446pub struct RevisingVfs<'a, M: PathAccessModel + Sized> {
447    inner: &'a mut Vfs<M>,
448    managed: EntryMap,
449    paths: PathMap,
450    goal_revision: NonZeroUsize,
451    view_changed: bool,
452}
453
454impl<M: PathAccessModel + Sized> Drop for RevisingVfs<'_, M> {
455    fn drop(&mut self) {
456        if self.view_changed {
457            self.inner.managed = Arc::new(Mutex::new(std::mem::take(&mut self.managed)));
458            self.inner.paths = Arc::new(Mutex::new(std::mem::take(&mut self.paths)));
459            let revision = &mut self.inner.revision;
460            *revision = self.goal_revision;
461        }
462    }
463}
464
465impl<M: PathAccessModel + Sized> RevisingVfs<'_, M> {
466    /// Returns the underlying vfs.
467    pub fn vfs(&mut self) -> &mut Vfs<M> {
468        self.inner
469    }
470
471    fn am(&mut self) -> &mut VfsAccessModel<M> {
472        &mut self.inner.access_model
473    }
474
475    fn invalidate_path(&mut self, path: &Path, snap: Option<&FileSnapshot>) {
476        if let Some(fids) = self.paths.get(path) {
477            if fids.is_empty() {
478                return;
479            }
480
481            // Always changes view if snap is none.
482            self.view_changed = snap.is_none();
483            for fid in fids.clone() {
484                self.invalidate_file_id(fid, snap);
485            }
486        }
487    }
488
489    fn invalidate_file_id(&mut self, file_id: FileId, snap: Option<&FileSnapshot>) {
490        let mut changed = false;
491        self.managed.slot(file_id, |e| {
492            if let Some(snap) = snap {
493                let may_read_bytes = e.bytes.get().map(|b| &b.2);
494                match (snap, may_read_bytes) {
495                    (FileSnapshot(Ok(snap)), Some(Ok(read))) if snap == read => {
496                        return;
497                    }
498                    (FileSnapshot(Err(snap)), Some(Err(read))) if snap.as_ref() == read => {
499                        return;
500                    }
501                    _ => {}
502                }
503            }
504
505            e.changed_at = self.goal_revision.get();
506            e.bytes = Arc::default();
507            e.source = Arc::default();
508            changed = true;
509        });
510        self.view_changed = changed;
511    }
512
513    /// Reset the shadowing files in [`OverlayAccessModel`].
514    pub fn reset_shadow(&mut self) {
515        for path in self.am().inner.inner.file_paths() {
516            self.invalidate_path(&path, None);
517        }
518        for fid in self.am().file_paths() {
519            self.invalidate_file_id(fid, None);
520        }
521
522        self.am().clear_shadow();
523        self.am().inner.inner.clear_shadow();
524    }
525
526    /// Unconditionally changes the view of the vfs.
527    pub fn change_view(&mut self) -> FileResult<()> {
528        self.view_changed = true;
529        Ok(())
530    }
531
532    /// Adds a shadowing file to the [`OverlayAccessModel`].
533    pub fn map_shadow(&mut self, path: &Path, snap: FileSnapshot) -> FileResult<()> {
534        self.invalidate_path(path, Some(&snap));
535        self.am().inner.inner.add_file(path, snap, |c| c.into());
536
537        Ok(())
538    }
539
540    /// Removes a shadowing file from the [`OverlayAccessModel`].
541    pub fn unmap_shadow(&mut self, path: &Path) -> FileResult<()> {
542        self.invalidate_path(path, None);
543        self.am().inner.inner.remove_file(path);
544
545        Ok(())
546    }
547
548    /// Adds a shadowing file to the [`OverlayAccessModel`] by file id.
549    pub fn map_shadow_by_id(&mut self, file_id: FileId, snap: FileSnapshot) -> FileResult<()> {
550        self.invalidate_file_id(file_id, Some(&snap));
551        self.am().add_file(&file_id, snap, |c| *c);
552
553        Ok(())
554    }
555
556    /// Removes a shadowing file from the [`OverlayAccessModel`] by file id.
557    pub fn remove_shadow_by_id(&mut self, file_id: FileId) {
558        self.invalidate_file_id(file_id, None);
559        self.am().remove_file(&file_id);
560    }
561
562    /// Notifies the access model with a filesystem event.
563    ///
564    /// See [`NotifyAccessModel`] for more information.
565    pub fn notify_fs_event(&mut self, event: FilesystemEvent) {
566        self.notify_fs_changes(event.split().0);
567    }
568    /// Notifies the access model with a filesystem changes.
569    ///
570    /// See [`NotifyAccessModel`] for more information.
571    pub fn notify_fs_changes(&mut self, event: FileChangeSet) {
572        for path in &event.removes {
573            self.invalidate_path(path, None);
574        }
575        for (path, snap) in &event.inserts {
576            self.invalidate_path(path, Some(snap));
577        }
578
579        self.am().inner.inner.inner.notify(event);
580    }
581}
582
583type BytesQuery = Arc<OnceLock<(Option<ImmutPath>, usize, FileResult<Bytes>)>>;
584
585#[derive(Debug, Clone, Default)]
586struct VfsEntry {
587    changed_at: usize,
588    bytes: BytesQuery,
589    source: Arc<OnceLock<FileResult<Source>>>,
590}
591
592#[derive(Debug, Clone, Default)]
593struct EntryMap {
594    entries: RedBlackTreeMapSync<FileId, VfsEntry>,
595}
596
597impl EntryMap {
598    /// Read a slot.
599    #[inline(always)]
600    fn slot<T>(&mut self, path: FileId, f: impl FnOnce(&mut VfsEntry) -> T) -> T {
601        if let Some(entry) = self.entries.get_mut(&path) {
602            f(entry)
603        } else {
604            let mut entry = VfsEntry::default();
605            let res = f(&mut entry);
606            self.entries.insert_mut(path, entry);
607            res
608        }
609    }
610
611    fn display(&self) -> DisplayEntryMap<'_> {
612        DisplayEntryMap { map: self }
613    }
614}
615
616/// A display wrapper for `EntryMap`.
617pub struct DisplayEntryMap<'a> {
618    map: &'a EntryMap,
619}
620
621impl fmt::Debug for DisplayEntryMap<'_> {
622    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
623        f.debug_map().entries(self.map.entries.iter()).finish()
624    }
625}
626
627#[derive(Debug, Clone, Default)]
628struct PathMap {
629    paths: FxHashMap<ImmutPath, EcoVec<FileId>>,
630    file_ids: FxHashMap<FileId, (ImmutPath, NonZeroUsize)>,
631}
632
633impl PathMap {
634    fn insert(&mut self, next: &ImmutPath, fid: FileId, rev: NonZeroUsize) {
635        use std::collections::hash_map::Entry;
636        let rev_entry = self.file_ids.entry(fid);
637
638        match rev_entry {
639            Entry::Occupied(mut entry) => {
640                let (prev, prev_rev) = entry.get_mut();
641                if prev != next {
642                    if *prev_rev == rev {
643                        log::warn!("Vfs: {fid:?} is changed in rev({rev:?}), {prev:?} -> {next:?}");
644                    }
645
646                    if let Some(fids) = self.paths.get_mut(prev) {
647                        fids.retain(|f| *f != fid);
648                    }
649
650                    *prev = next.clone();
651                    *prev_rev = rev;
652
653                    self.paths.entry(next.clone()).or_default().push(fid);
654                }
655            }
656            Entry::Vacant(entry) => {
657                entry.insert((next.clone(), rev));
658                self.paths.entry(next.clone()).or_default().push(fid);
659            }
660        }
661    }
662
663    fn get(&mut self, path: &Path) -> Option<&EcoVec<FileId>> {
664        self.paths.get(path)
665    }
666
667    fn display(&self) -> DisplayPathMap<'_> {
668        DisplayPathMap { map: self }
669    }
670}
671
672/// A display wrapper for `PathMap`.
673pub struct DisplayPathMap<'a> {
674    map: &'a PathMap,
675}
676
677impl fmt::Debug for DisplayPathMap<'_> {
678    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
679        f.debug_map().entries(self.map.paths.iter()).finish()
680    }
681}
682
683/// Convert a byte slice to a string, removing UTF-8 BOM if present.
684fn from_utf8_or_bom(buf: &[u8]) -> FileResult<&str> {
685    Ok(std::str::from_utf8(if buf.starts_with(b"\xef\xbb\xbf") {
686        // remove UTF-8 BOM
687        &buf[3..]
688    } else {
689        // Assume UTF-8
690        buf
691    })?)
692}
693
694#[cfg(test)]
695mod tests {
696    fn is_send<T: Send>() {}
697    fn is_sync<T: Sync>() {}
698
699    #[test]
700    fn test_vfs_send_sync() {
701        is_send::<super::Vfs<super::dummy::DummyAccessModel>>();
702        is_sync::<super::Vfs<super::dummy::DummyAccessModel>>();
703    }
704}