tinymist_query/index/
mod.rs

1//! Dumps typst knowledge from workspace.
2//!
3//! Reference Impls:
4//! - <https://github.com/sourcegraph/lsif-jsonnet/blob/e186f9fde623efa8735261e9cb059ad3a58b535f/dumper/dumper.go>
5//! - <https://github.com/rust-lang/rust-analyzer/blob/5c0b555a65cadc14a6a16865c3e065c9d30b0bef/crates/ide/src/static_index.rs>
6//! - <https://github.com/rust-lang/rust-analyzer/blob/5c0b555a65cadc14a6a16865c3e065c9d30b0bef/crates/rust-analyzer/src/cli/lsif.rs>
7
8use core::fmt;
9use std::sync::Arc;
10
11use crate::analysis::{SemanticTokens, SharedContext};
12use crate::index::protocol::ResultSet;
13use crate::prelude::Definition;
14use crate::{LocalContext, path_to_url};
15use ecow::EcoString;
16use lsp_types::Url;
17use tinymist_analysis::syntax::classify_syntax;
18use tinymist_std::error::WithContextUntyped;
19use tinymist_std::hash::FxHashMap;
20use tinymist_world::EntryReader;
21use typst::syntax::{FileId, LinkedNode, Source, Span};
22
23pub mod protocol;
24use protocol as p;
25
26/// The dumped knowledge.
27pub struct Knowledge {
28    /// The meta data.
29    pub meta: p::MetaData,
30    /// The files.
31    pub files: Vec<FileIndex>,
32}
33
34impl Knowledge {
35    /// Creates a new empty knowledge.
36    pub fn bind<'a>(&'a self, ctx: &'a Arc<SharedContext>) -> KnowledgeWithContext<'a> {
37        KnowledgeWithContext {
38            knowledge: self,
39            ctx,
40        }
41    }
42}
43
44/// A view of knowledge with context for dumping.
45pub struct KnowledgeWithContext<'a> {
46    knowledge: &'a Knowledge,
47    ctx: &'a Arc<SharedContext>,
48}
49
50impl fmt::Display for KnowledgeWithContext<'_> {
51    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
52        let mut files = FxHashMap::default();
53        let mut encoder = LsifEncoder {
54            ctx: self.ctx,
55            writer: f,
56            id: IdCounter::new(),
57            files: &mut files,
58            results: FxHashMap::default(),
59        };
60
61        encoder.emit_meta(&self.knowledge.meta).map_err(|err| {
62            log::error!("cannot write meta data: {err}");
63            fmt::Error
64        })?;
65        encoder.emit_files(&self.knowledge.files).map_err(|err| {
66            log::error!("cannot write files: {err}");
67            fmt::Error
68        })
69    }
70}
71
72struct IdCounter {
73    next: usize,
74}
75
76impl IdCounter {
77    fn new() -> Self {
78        Self { next: 0 }
79    }
80
81    fn next(&mut self) -> usize {
82        let id = self.next;
83        self.next += 1;
84        id
85    }
86}
87
88trait LsifWrite {
89    fn write_element(&mut self, id: i32, element: p::Element) -> fmt::Result;
90}
91
92impl<T: fmt::Write> LsifWrite for T {
93    fn write_element(&mut self, id: i32, element: p::Element) -> fmt::Result {
94        let entry = p::Entry { id, data: element };
95        self.write_str(&serde_json::to_string(&entry).unwrap())?;
96        self.write_char('\n')
97    }
98}
99
100struct LsifEncoder<'a, W: fmt::Write> {
101    ctx: &'a Arc<SharedContext>,
102    writer: &'a mut W,
103    id: IdCounter,
104    files: &'a mut FxHashMap<FileId, i32>,
105    results: FxHashMap<Span, i32>,
106}
107
108impl<'a, W: fmt::Write> LsifEncoder<'a, W> {
109    fn alloc_file_id(&mut self, fid: FileId) -> i32 {
110        *self.files.entry(fid).or_insert_with(|| {
111            let id = self.id.next() as i32;
112            self.writer
113                .write_element(
114                    id,
115                    p::Element::Vertex(p::Vertex::Document(&p::Document {
116                        uri: self.ctx.uri_for_id(fid).unwrap_or_else(|err| {
117                            log::error!("cannot get uri for {fid:?}: {err}");
118                            Url::parse("file:///unknown").unwrap()
119                        }),
120                        language_id: EcoString::inline("typst"),
121                    })),
122                )
123                .unwrap();
124
125            id
126        })
127    }
128
129    fn alloc_result_id(&mut self, span: Span) -> tinymist_std::Result<i32> {
130        if let Some(id) = self.results.get(&span) {
131            return Ok(*id);
132        }
133
134        let id = self.emit_element(p::Element::Vertex(p::Vertex::ResultSet(ResultSet {
135            key: None,
136        })))?;
137
138        self.results.insert(span, id);
139        Ok(id)
140    }
141
142    fn emit_element(&mut self, element: p::Element) -> tinymist_std::Result<i32> {
143        let id = self.id.next() as i32;
144        self.writer
145            .write_element(id, element)
146            .context_ut("cannot write element")?;
147        Ok(id)
148    }
149
150    fn emit_meta(&mut self, meta: &p::MetaData) -> tinymist_std::Result<()> {
151        let obj = p::Element::Vertex(p::Vertex::MetaData(meta));
152        self.emit_element(obj).map(|_| ())
153    }
154
155    fn emit_files(&mut self, files: &[FileIndex]) -> tinymist_std::Result<()> {
156        for (idx, file) in files.iter().enumerate() {
157            eprintln!("emit file: {:?}, {idx} of {}", file.fid, files.len());
158            let source = self
159                .ctx
160                .source_by_id(file.fid)
161                .context_ut("cannot get source")?;
162            let fid = self.alloc_file_id(file.fid);
163            let semantic_tokens_id =
164                self.emit_element(p::Element::Vertex(p::Vertex::SemanticTokensResult {
165                    result: lsp_types::SemanticTokens {
166                        result_id: None,
167                        data: file.semantic_tokens.as_ref().clone(),
168                    },
169                }))?;
170            self.emit_element(p::Element::Edge(p::Edge::SemanticTokens(p::EdgeData {
171                out_v: fid,
172                in_v: semantic_tokens_id,
173            })))?;
174
175            let tokens_id = file
176                .references
177                .iter()
178                .flat_map(|(k, v)| {
179                    let rng = self.emit_span(*k, &source);
180                    let def_rng = self.emit_def_span(v, &source, false);
181                    rng.into_iter().chain(def_rng.into_iter())
182                })
183                .collect();
184            self.emit_element(p::Element::Edge(p::Edge::Contains(p::EdgeDataMultiIn {
185                out_v: fid,
186                in_vs: tokens_id,
187            })))?;
188
189            for (s, def) in &file.references {
190                let res_id = self.alloc_result_id(*s)?;
191                self.emit_element(p::Element::Edge(p::Edge::Next(p::EdgeData {
192                    out_v: res_id,
193                    in_v: fid,
194                })))?;
195                let def_id = self.emit_element(p::Element::Vertex(p::Vertex::DefinitionResult))?;
196                let Some(def_range) = self.emit_def_span(def, &source, true) else {
197                    continue;
198                };
199                let Some(file_id) = def.file_id() else {
200                    continue;
201                };
202                let file_vertex_id = self.alloc_file_id(file_id);
203
204                self.emit_element(p::Element::Edge(p::Edge::Item(p::Item {
205                    document: file_vertex_id,
206                    property: None,
207                    edge_data: p::EdgeDataMultiIn {
208                        in_vs: vec![def_range],
209                        out_v: def_id,
210                    },
211                })))?;
212                self.emit_element(p::Element::Edge(p::Edge::Definition(p::EdgeData {
213                    in_v: def_id,
214                    out_v: res_id,
215                })))?;
216            }
217        }
218        Ok(())
219    }
220
221    fn emit_span(&mut self, span: Span, source: &Source) -> Option<i32> {
222        let range = source.range(span)?;
223        self.emit_element(p::Element::Vertex(p::Vertex::Range {
224            range: self.ctx.to_lsp_range(range, source),
225            tag: None,
226        }))
227        .ok()
228    }
229
230    fn emit_def_span(&mut self, def: &Definition, source: &Source, external: bool) -> Option<i32> {
231        let s = def.decl.span();
232        if !s.is_detached() && s.id() == Some(source.id()) {
233            self.emit_span(s, source)
234        } else if let Some(fid) = def.file_id()
235            && fid == source.id()
236        {
237            // todo: module it self
238            None
239        } else if external && !s.is_detached() {
240            let external_src = self.ctx.source_by_id(def.file_id()?).ok()?;
241            self.emit_span(s, &external_src)
242        } else {
243            None
244        }
245    }
246}
247
248/// The index of a file.
249pub struct FileIndex {
250    /// The file id.
251    pub fid: FileId,
252    /// The semantic tokens of the file.
253    pub semantic_tokens: SemanticTokens,
254    /// The documentation of the file.
255    pub documentation: Option<EcoString>,
256    /// The references in the file.
257    pub references: FxHashMap<Span, Definition>,
258}
259
260/// Dumps typst knowledge in [LSIF] format from workspace.
261///
262/// [LSIF]: https://microsoft.github.io/language-server-protocol/specifications/lsif/0.6.0/specification/
263pub fn knowledge(ctx: &mut LocalContext) -> tinymist_std::Result<Knowledge> {
264    let root = ctx
265        .world()
266        .entry_state()
267        .workspace_root()
268        .ok_or_else(|| tinymist_std::error_once!("workspace root is not set"))?;
269
270    let files = ctx.source_files().clone();
271
272    let mut worker = DumpWorker {
273        ctx,
274        strings: FxHashMap::default(),
275        references: FxHashMap::default(),
276    };
277    let files = files
278        .iter()
279        .map(move |fid| worker.file(fid))
280        .collect::<tinymist_std::Result<Vec<FileIndex>>>()?;
281
282    Ok(Knowledge {
283        meta: p::MetaData {
284            version: "0.6.0".to_string(),
285            project_root: path_to_url(&root)?,
286            position_encoding: p::Encoding::Utf16,
287            tool_info: Some(p::ToolInfo {
288                name: "tinymist".to_string(),
289                args: vec![],
290                version: Some(env!("CARGO_PKG_VERSION").to_string()),
291            }),
292        },
293        files,
294    })
295}
296
297struct DumpWorker<'a> {
298    /// The context.
299    ctx: &'a mut LocalContext,
300    /// A string interner.
301    strings: FxHashMap<EcoString, EcoString>,
302    /// The references collected so far.
303    references: FxHashMap<Span, Definition>,
304}
305
306impl DumpWorker<'_> {
307    fn file(&mut self, fid: &FileId) -> tinymist_std::Result<FileIndex> {
308        let source = self.ctx.source_by_id(*fid).context_ut("cannot parse")?;
309        let semantic_tokens = crate::SemanticTokensFullRequest::compute(self.ctx, &source);
310
311        let root = LinkedNode::new(source.root());
312        self.walk(&source, &root);
313        let references = std::mem::take(&mut self.references);
314
315        Ok(FileIndex {
316            fid: *fid,
317            semantic_tokens,
318            documentation: Some(self.intern("File documentation.")), // todo
319            references,
320        })
321    }
322
323    fn intern(&mut self, s: &str) -> EcoString {
324        if let Some(v) = self.strings.get(s) {
325            return v.clone();
326        }
327        let v = EcoString::from(s);
328        self.strings.insert(v.clone(), v.clone());
329        v
330    }
331
332    fn walk(&mut self, source: &Source, node: &LinkedNode) {
333        if node.get().children().len() == 0 {
334            let Some(syntax) = classify_syntax(node.clone(), node.offset()) else {
335                return;
336            };
337            let span = syntax.node().span();
338            if self.references.contains_key(&span) {
339                return;
340            }
341
342            let Some(def) = self.ctx.def_of_syntax(source, syntax) else {
343                return;
344            };
345            self.references.insert(span, def);
346
347            return;
348        }
349
350        for child in node.children() {
351            self.walk(source, &child);
352        }
353    }
354}