tinymist_query/index/
mod.rs

1//! Dumps typst knowledge from workspace.
2//!
3//! Reference Impls:
4//! - <https://github.com/sourcegraph/lsif-jsonnet/blob/e186f9fde623efa8735261e9cb059ad3a58b535f/dumper/dumper.go>
5//! - <https://github.com/rust-lang/rust-analyzer/blob/5c0b555a65cadc14a6a16865c3e065c9d30b0bef/crates/ide/src/static_index.rs>
6//! - <https://github.com/rust-lang/rust-analyzer/blob/5c0b555a65cadc14a6a16865c3e065c9d30b0bef/crates/rust-analyzer/src/cli/lsif.rs>
7
8use core::fmt;
9use std::sync::Arc;
10
11use crate::analysis::{SemanticTokens, SharedContext};
12use crate::index::protocol::ResultSet;
13use crate::prelude::Definition;
14use crate::{LocalContext, path_to_url};
15use ecow::EcoString;
16use lsp_types::Url;
17use tinymist_analysis::syntax::classify_syntax;
18use tinymist_std::error::WithContextUntyped;
19use tinymist_std::hash::FxHashMap;
20use tinymist_std::typst::TypstDocument;
21use tinymist_world::EntryReader;
22use typst::syntax::{FileId, LinkedNode, Source, Span};
23
24pub mod protocol;
25use protocol as p;
26
27/// The dumped knowledge.
28pub struct Knowledge {
29    /// The meta data.
30    pub meta: p::MetaData,
31    /// The files.
32    pub files: Vec<FileIndex>,
33}
34
35impl Knowledge {
36    /// Creates a new empty knowledge.
37    pub fn bind<'a>(&'a self, ctx: &'a Arc<SharedContext>) -> KnowledgeWithContext<'a> {
38        KnowledgeWithContext {
39            knowledge: self,
40            ctx,
41        }
42    }
43}
44
45/// A view of knowledge with context for dumping.
46pub struct KnowledgeWithContext<'a> {
47    knowledge: &'a Knowledge,
48    ctx: &'a Arc<SharedContext>,
49}
50
51impl fmt::Display for KnowledgeWithContext<'_> {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        let mut files = FxHashMap::default();
54        let mut encoder = LsifEncoder {
55            ctx: self.ctx,
56            writer: f,
57            id: IdCounter::new(),
58            files: &mut files,
59            results: FxHashMap::default(),
60        };
61
62        encoder.emit_meta(&self.knowledge.meta).map_err(|err| {
63            log::error!("cannot write meta data: {err}");
64            fmt::Error
65        })?;
66        encoder.emit_files(&self.knowledge.files).map_err(|err| {
67            log::error!("cannot write files: {err}");
68            fmt::Error
69        })
70    }
71}
72
73struct IdCounter {
74    next: usize,
75}
76
77impl IdCounter {
78    fn new() -> Self {
79        Self { next: 0 }
80    }
81
82    fn next(&mut self) -> usize {
83        let id = self.next;
84        self.next += 1;
85        id
86    }
87}
88
89trait LsifWrite {
90    fn write_element(&mut self, id: i32, element: p::Element) -> fmt::Result;
91}
92
93impl<T: fmt::Write> LsifWrite for T {
94    fn write_element(&mut self, id: i32, element: p::Element) -> fmt::Result {
95        let entry = p::Entry { id, data: element };
96        self.write_str(&serde_json::to_string(&entry).unwrap())?;
97        self.write_char('\n')
98    }
99}
100
101struct LsifEncoder<'a, W: fmt::Write> {
102    ctx: &'a Arc<SharedContext>,
103    writer: &'a mut W,
104    id: IdCounter,
105    files: &'a mut FxHashMap<FileId, i32>,
106    results: FxHashMap<Span, i32>,
107}
108
109impl<'a, W: fmt::Write> LsifEncoder<'a, W> {
110    fn alloc_file_id(&mut self, fid: FileId) -> i32 {
111        *self.files.entry(fid).or_insert_with(|| {
112            let id = self.id.next() as i32;
113            self.writer
114                .write_element(
115                    id,
116                    p::Element::Vertex(p::Vertex::Document(&p::Document {
117                        uri: self.ctx.uri_for_id(fid).unwrap_or_else(|err| {
118                            log::error!("cannot get uri for {fid:?}: {err}");
119                            Url::parse("file:///unknown").unwrap()
120                        }),
121                        language_id: EcoString::inline("typst"),
122                    })),
123                )
124                .unwrap();
125
126            id
127        })
128    }
129
130    fn alloc_result_id(&mut self, span: Span) -> tinymist_std::Result<i32> {
131        if let Some(id) = self.results.get(&span) {
132            return Ok(*id);
133        }
134
135        let id = self.emit_element(p::Element::Vertex(p::Vertex::ResultSet(ResultSet {
136            key: None,
137        })))?;
138
139        self.results.insert(span, id);
140        Ok(id)
141    }
142
143    fn emit_element(&mut self, element: p::Element) -> tinymist_std::Result<i32> {
144        let id = self.id.next() as i32;
145        self.writer
146            .write_element(id, element)
147            .context_ut("cannot write element")?;
148        Ok(id)
149    }
150
151    fn emit_meta(&mut self, meta: &p::MetaData) -> tinymist_std::Result<()> {
152        let obj = p::Element::Vertex(p::Vertex::MetaData(meta));
153        self.emit_element(obj).map(|_| ())
154    }
155
156    fn emit_files(&mut self, files: &[FileIndex]) -> tinymist_std::Result<()> {
157        for (idx, file) in files.iter().enumerate() {
158            eprintln!("emit file: {:?}, {idx} of {}", file.fid, files.len());
159            let source = self
160                .ctx
161                .source_by_id(file.fid)
162                .context_ut("cannot get source")?;
163            let fid = self.alloc_file_id(file.fid);
164            let semantic_tokens_id =
165                self.emit_element(p::Element::Vertex(p::Vertex::SemanticTokensResult {
166                    result: lsp_types::SemanticTokens {
167                        result_id: None,
168                        data: file.semantic_tokens.as_ref().clone(),
169                    },
170                }))?;
171            self.emit_element(p::Element::Edge(p::Edge::SemanticTokens(p::EdgeData {
172                out_v: fid,
173                in_v: semantic_tokens_id,
174            })))?;
175
176            let tokens_id = file
177                .references
178                .iter()
179                .flat_map(|(k, v)| {
180                    let rng = self.emit_span(*k, &source);
181                    let def_rng = self.emit_def_span(v, &source, false);
182                    rng.into_iter().chain(def_rng.into_iter())
183                })
184                .collect();
185            self.emit_element(p::Element::Edge(p::Edge::Contains(p::EdgeDataMultiIn {
186                out_v: fid,
187                in_vs: tokens_id,
188            })))?;
189
190            for (s, def) in &file.references {
191                let res_id = self.alloc_result_id(*s)?;
192                self.emit_element(p::Element::Edge(p::Edge::Next(p::EdgeData {
193                    out_v: res_id,
194                    in_v: fid,
195                })))?;
196                let def_id = self.emit_element(p::Element::Vertex(p::Vertex::DefinitionResult))?;
197                let Some(def_range) = self.emit_def_span(def, &source, true) else {
198                    continue;
199                };
200                let Some(file_id) = def.file_id() else {
201                    continue;
202                };
203                let file_vertex_id = self.alloc_file_id(file_id);
204
205                self.emit_element(p::Element::Edge(p::Edge::Item(p::Item {
206                    document: file_vertex_id,
207                    property: None,
208                    edge_data: p::EdgeDataMultiIn {
209                        in_vs: vec![def_range],
210                        out_v: def_id,
211                    },
212                })))?;
213                self.emit_element(p::Element::Edge(p::Edge::Definition(p::EdgeData {
214                    in_v: def_id,
215                    out_v: res_id,
216                })))?;
217            }
218        }
219        Ok(())
220    }
221
222    fn emit_span(&mut self, span: Span, source: &Source) -> Option<i32> {
223        let range = source.range(span)?;
224        self.emit_element(p::Element::Vertex(p::Vertex::Range {
225            range: self.ctx.to_lsp_range(range, source),
226            tag: None,
227        }))
228        .ok()
229    }
230
231    fn emit_def_span(&mut self, def: &Definition, source: &Source, external: bool) -> Option<i32> {
232        let s = def.decl.span();
233        if !s.is_detached() && s.id() == Some(source.id()) {
234            self.emit_span(s, source)
235        } else if let Some(fid) = def.file_id()
236            && fid == source.id()
237        {
238            // todo: module it self
239            None
240        } else if external && !s.is_detached() {
241            let external_src = self.ctx.source_by_id(def.file_id()?).ok()?;
242            self.emit_span(s, &external_src)
243        } else {
244            None
245        }
246    }
247}
248
249/// The index of a file.
250pub struct FileIndex {
251    /// The file id.
252    pub fid: FileId,
253    /// The semantic tokens of the file.
254    pub semantic_tokens: SemanticTokens,
255    /// The documentation of the file.
256    pub documentation: Option<EcoString>,
257    /// The references in the file.
258    pub references: FxHashMap<Span, Definition>,
259}
260
261/// Dumps typst knowledge in [LSIF] format from workspace.
262///
263/// [LSIF]: https://microsoft.github.io/language-server-protocol/specifications/lsif/0.6.0/specification/
264pub fn knowledge(ctx: &mut LocalContext) -> tinymist_std::Result<Knowledge> {
265    let root = ctx
266        .world()
267        .entry_state()
268        .workspace_root()
269        .ok_or_else(|| tinymist_std::error_once!("workspace root is not set"))?;
270
271    let files = ctx.source_files().clone();
272
273    let mut worker = DumpWorker {
274        ctx,
275        strings: FxHashMap::default(),
276        references: FxHashMap::default(),
277        doc: None,
278    };
279    let files = files
280        .iter()
281        .map(move |fid| worker.file(fid))
282        .collect::<tinymist_std::Result<Vec<FileIndex>>>()?;
283
284    Ok(Knowledge {
285        meta: p::MetaData {
286            version: "0.6.0".to_string(),
287            project_root: path_to_url(&root)?,
288            position_encoding: p::Encoding::Utf16,
289            tool_info: Some(p::ToolInfo {
290                name: "tinymist".to_string(),
291                args: vec![],
292                version: Some(env!("CARGO_PKG_VERSION").to_string()),
293            }),
294        },
295        files,
296    })
297}
298
299struct DumpWorker<'a> {
300    /// The context.
301    ctx: &'a mut LocalContext,
302    /// The document.
303    doc: Option<TypstDocument>,
304    /// A string interner.
305    strings: FxHashMap<EcoString, EcoString>,
306    /// The references collected so far.
307    references: FxHashMap<Span, Definition>,
308}
309
310impl DumpWorker<'_> {
311    fn file(&mut self, fid: &FileId) -> tinymist_std::Result<FileIndex> {
312        let source = self.ctx.source_by_id(*fid).context_ut("cannot parse")?;
313        let semantic_tokens = crate::SemanticTokensFullRequest::compute(self.ctx, &source);
314
315        let root = LinkedNode::new(source.root());
316        self.walk(&source, &root);
317        let references = std::mem::take(&mut self.references);
318
319        Ok(FileIndex {
320            fid: *fid,
321            semantic_tokens,
322            documentation: Some(self.intern("File documentation.")), // todo
323            references,
324        })
325    }
326
327    fn intern(&mut self, s: &str) -> EcoString {
328        if let Some(v) = self.strings.get(s) {
329            return v.clone();
330        }
331        let v = EcoString::from(s);
332        self.strings.insert(v.clone(), v.clone());
333        v
334    }
335
336    fn walk(&mut self, source: &Source, node: &LinkedNode) {
337        if node.get().children().len() == 0 {
338            let Some(syntax) = classify_syntax(node.clone(), node.offset()) else {
339                return;
340            };
341            let span = syntax.node().span();
342            if self.references.contains_key(&span) {
343                return;
344            }
345
346            let Some(def) = self.ctx.def_of_syntax(source, self.doc.as_ref(), syntax) else {
347                return;
348            };
349            self.references.insert(span, def);
350
351            return;
352        }
353
354        for child in node.children() {
355            self.walk(source, &child);
356        }
357    }
358}