tinymist_query/syntax/
lexical_hierarchy.rs

1use std::ops::{Deref, Range};
2
3use ecow::{EcoString, EcoVec, eco_vec};
4use lsp_types::SymbolKind;
5use serde::{Deserialize, Serialize};
6use typst::syntax::{
7    LinkedNode, Source, SyntaxKind,
8    ast::{self},
9};
10use typst_shim::utils::LazyHash;
11
12use super::{CommentGroupMatcher, is_mark};
13
14#[typst_macros::time(span = source.root().span())]
15pub(crate) fn get_lexical_hierarchy(
16    source: &Source,
17    scope_kind: LexicalScopeKind,
18) -> Option<EcoVec<LexicalHierarchy>> {
19    let start = tinymist_std::time::Instant::now();
20    let root = LinkedNode::new(source.root());
21
22    let mut worker = LexicalHierarchyWorker {
23        sk: scope_kind,
24        ..LexicalHierarchyWorker::default()
25    };
26    worker.stack.push((
27        LexicalInfo {
28            name: "deadbeef".into(),
29            kind: LexicalKind::Heading(-1),
30            range: 0..0,
31        },
32        eco_vec![],
33    ));
34    let res = match worker.check_node(root) {
35        Some(()) => Some(()),
36        None => {
37            log::error!("lexical hierarchy analysis failed");
38            None
39        }
40    };
41
42    while worker.stack.len() > 1 {
43        worker.finish_hierarchy();
44    }
45
46    crate::log_debug_ct!("lexical hierarchy analysis took {:?}", start.elapsed());
47    res.map(|_| worker.stack.pop().unwrap().1)
48}
49
50/// The kind of a variable.
51#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
52pub enum LexicalVarKind {
53    /// A value reference.
54    /// `#foo`
55    ///   ^^^
56    ValRef,
57    /// A label reference.
58    /// `@foo`
59    ///   ^^^
60    LabelRef,
61    /// A label.
62    /// `<foo>`
63    ///   ^^^
64    Label,
65    /// A bib key.
66    /// `x:`
67    ///  ^^
68    BibKey,
69    /// A variable.
70    /// `let foo`
71    ///      ^^^
72    Variable,
73    /// A function.
74    /// `let foo()`
75    ///      ^^^
76    Function,
77}
78
79/// The kind of a lexical hierarchy recogized by the analyzers.
80#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
81pub enum LexicalKind {
82    /// A heading.
83    Heading(i16),
84    /// A variable.
85    Var(LexicalVarKind),
86    /// A block.
87    Block,
88    /// A comment group.
89    CommentGroup,
90}
91
92impl LexicalKind {
93    /// Creates a label.
94    const fn label() -> LexicalKind {
95        LexicalKind::Var(LexicalVarKind::Label)
96    }
97
98    /// Creates a function.
99    const fn function() -> LexicalKind {
100        LexicalKind::Var(LexicalVarKind::Function)
101    }
102
103    /// Creates a variable.
104    const fn variable() -> LexicalKind {
105        LexicalKind::Var(LexicalVarKind::Variable)
106    }
107
108    /// Checks if the kind is a valid LSP symbol.
109    pub fn is_valid_lsp_symbol(&self) -> bool {
110        !matches!(self, LexicalKind::Block | LexicalKind::CommentGroup)
111    }
112}
113
114impl From<LexicalKind> for SymbolKind {
115    fn from(value: LexicalKind) -> Self {
116        use LexicalVarKind::*;
117        match value {
118            LexicalKind::Heading(..) => SymbolKind::NAMESPACE,
119            LexicalKind::Var(ValRef | Variable) => SymbolKind::VARIABLE,
120            LexicalKind::Var(Function) => SymbolKind::FUNCTION,
121            LexicalKind::Var(LabelRef | Label | BibKey) => SymbolKind::CONSTANT,
122            LexicalKind::Block | LexicalKind::CommentGroup => SymbolKind::CONSTANT,
123        }
124    }
125}
126
127#[derive(Debug, Clone, Copy, Hash, Default, PartialEq, Eq)]
128pub(crate) enum LexicalScopeKind {
129    #[default]
130    Symbol,
131    Braced,
132}
133
134impl LexicalScopeKind {
135    fn affect_symbol(&self) -> bool {
136        matches!(self, Self::Symbol)
137    }
138
139    fn affect_markup(&self) -> bool {
140        matches!(self, Self::Braced)
141    }
142
143    fn affect_block(&self) -> bool {
144        matches!(self, Self::Braced)
145    }
146
147    fn affect_expr(&self) -> bool {
148        matches!(self, Self::Braced)
149    }
150
151    fn affect_heading(&self) -> bool {
152        matches!(self, Self::Symbol | Self::Braced)
153    }
154}
155
156#[derive(Debug, Clone, Hash)]
157pub(crate) struct LexicalInfo {
158    pub name: EcoString,
159    pub kind: LexicalKind,
160    pub range: Range<usize>,
161}
162
163#[derive(Debug, Clone, Hash)]
164pub(crate) struct LexicalHierarchy {
165    pub info: LexicalInfo,
166    pub children: Option<LazyHash<EcoVec<LexicalHierarchy>>>,
167}
168
169impl Serialize for LexicalHierarchy {
170    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
171        use serde::ser::SerializeStruct;
172        let mut state = serializer.serialize_struct("LexicalHierarchy", 2)?;
173        state.serialize_field("name", &self.info.name)?;
174        state.serialize_field("kind", &self.info.kind)?;
175        state.serialize_field("range", &self.info.range)?;
176        if let Some(children) = &self.children {
177            state.serialize_field("children", children.deref())?;
178        }
179        state.end()
180    }
181}
182
183impl<'de> Deserialize<'de> for LexicalHierarchy {
184    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
185        use serde::de::MapAccess;
186        struct LexicalHierarchyVisitor;
187        impl<'de> serde::de::Visitor<'de> for LexicalHierarchyVisitor {
188            type Value = LexicalHierarchy;
189
190            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
191                formatter.write_str("a struct")
192            }
193
194            fn visit_map<A: MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
195                let mut name = None;
196                let mut kind = None;
197                let mut range = None;
198                let mut children = None;
199                while let Some(key) = map.next_key()? {
200                    match key {
201                        "name" => name = Some(map.next_value()?),
202                        "kind" => kind = Some(map.next_value()?),
203                        "range" => range = Some(map.next_value()?),
204                        "children" => children = Some(map.next_value()?),
205                        _ => {}
206                    }
207                }
208                let name = name.ok_or_else(|| serde::de::Error::missing_field("name"))?;
209                let kind = kind.ok_or_else(|| serde::de::Error::missing_field("kind"))?;
210                let range = range.ok_or_else(|| serde::de::Error::missing_field("range"))?;
211                Ok(LexicalHierarchy {
212                    info: LexicalInfo { name, kind, range },
213                    children: children.map(LazyHash::new),
214                })
215            }
216        }
217
218        deserializer.deserialize_struct(
219            "LexicalHierarchy",
220            &["name", "kind", "range", "children"],
221            LexicalHierarchyVisitor,
222        )
223    }
224}
225
226#[derive(Debug, Clone, Copy, Hash, Default, PartialEq, Eq)]
227enum IdentContext {
228    #[default]
229    Ref,
230    Func,
231    Var,
232    Params,
233}
234
235#[derive(Default)]
236struct LexicalHierarchyWorker {
237    sk: LexicalScopeKind,
238    stack: Vec<(LexicalInfo, EcoVec<LexicalHierarchy>)>,
239    ident_context: IdentContext,
240}
241
242impl LexicalHierarchyWorker {
243    fn is_plain_token(kind: SyntaxKind) -> bool {
244        kind.is_trivia() || kind.is_keyword() || is_mark(kind) || kind.is_error()
245    }
246
247    /// Finish the current top of the stack.
248    fn finish_hierarchy(&mut self) {
249        let (symbol, children) = self.stack.pop().unwrap();
250        let current = &mut self.stack.last_mut().unwrap().1;
251
252        current.push(finish_hierarchy(symbol, children));
253    }
254
255    /// Enter a node and setup the context.
256    fn enter_node(&mut self, node: &LinkedNode) -> Option<IdentContext> {
257        let checkpoint = self.ident_context;
258        match node.kind() {
259            SyntaxKind::RefMarker => self.ident_context = IdentContext::Ref,
260            SyntaxKind::LetBinding => self.ident_context = IdentContext::Ref,
261            SyntaxKind::Closure => self.ident_context = IdentContext::Func,
262            SyntaxKind::Params => self.ident_context = IdentContext::Params,
263            _ => {}
264        }
265
266        Some(checkpoint)
267    }
268
269    /// Exit a node and restore the context.
270    fn exit_node(&mut self, checkpoint: IdentContext) -> Option<()> {
271        self.ident_context = checkpoint;
272        Some(())
273    }
274
275    /// Check nodes in a list recursively.
276    fn check_nodes(&mut self, node: LinkedNode) -> Option<()> {
277        let mut group_matcher = CommentGroupMatcher::default();
278        let mut comment_range: Option<Range<usize>> = None;
279        for child in node.children() {
280            match group_matcher.process(&child) {
281                super::CommentGroupSignal::Space => {}
282                super::CommentGroupSignal::LineComment
283                | super::CommentGroupSignal::BlockComment => {
284                    let child_range = child.range();
285                    match comment_range {
286                        Some(ref mut comment_range) => comment_range.end = child_range.end,
287                        None => comment_range = Some(child_range),
288                    }
289                }
290                super::CommentGroupSignal::Hash | super::CommentGroupSignal::BreakGroup => {
291                    if let Some(comment_range) = comment_range.take() {
292                        self.stack.push((
293                            LexicalInfo {
294                                name: "".into(),
295                                kind: LexicalKind::CommentGroup,
296                                range: comment_range,
297                            },
298                            eco_vec![],
299                        ));
300                    }
301
302                    if !Self::is_plain_token(child.kind()) {
303                        self.check_node(child)?;
304                    }
305                }
306            }
307        }
308
309        Some(())
310    }
311
312    /// Check lexical hierarchy a node recursively.
313    fn check_node(&mut self, node: LinkedNode) -> Option<()> {
314        let own_symbol = self.get_ident(&node)?;
315
316        let checkpoint = self.enter_node(&node)?;
317
318        if let Some(symbol) = own_symbol {
319            if let LexicalKind::Heading(level) = symbol.kind {
320                'heading_break: while let Some((w, _)) = self.stack.last() {
321                    match w.kind {
322                        LexicalKind::Heading(lvl) if lvl < level => break 'heading_break,
323                        LexicalKind::Block => break 'heading_break,
324                        _ if self.stack.len() <= 1 => break 'heading_break,
325                        _ => {}
326                    }
327
328                    self.finish_hierarchy();
329                }
330            }
331            let is_heading = matches!(symbol.kind, LexicalKind::Heading(..));
332
333            self.stack.push((symbol, eco_vec![]));
334            let stack_height = self.stack.len();
335
336            if node.kind() != SyntaxKind::ModuleImport {
337                self.check_nodes(node)?;
338            }
339
340            if is_heading {
341                while stack_height < self.stack.len() {
342                    self.finish_hierarchy();
343                }
344            } else {
345                while stack_height <= self.stack.len() {
346                    self.finish_hierarchy();
347                }
348            }
349        } else {
350            // todo: for loop variable
351            match node.kind() {
352                SyntaxKind::LetBinding => 'let_binding: {
353                    let pattern = node.children().find(|n| n.cast::<ast::Pattern>().is_some());
354
355                    if let Some(name) = &pattern {
356                        let pat = name.cast::<ast::Pattern>().unwrap();
357
358                        // special case: it will then match SyntaxKind::Closure in the inner looking
359                        // up.
360                        if matches!(pat, ast::Pattern::Normal(ast::Expr::Closure(..))) {
361                            let closure = name.clone();
362                            self.check_node_with(closure, IdentContext::Ref)?;
363                            break 'let_binding;
364                        }
365                    }
366
367                    // reverse order for correct symbol affection
368                    let name_offset = pattern.as_ref().map(|node| node.offset());
369                    self.check_opt_node_with(pattern, IdentContext::Var)?;
370                    self.check_first_sub_expr(node.children().rev(), name_offset)?;
371                }
372                SyntaxKind::ForLoop => {
373                    let pattern = node.children().find(|child| child.is::<ast::Pattern>());
374                    let iterable = node
375                        .children()
376                        .skip_while(|child| child.kind() != SyntaxKind::In)
377                        .find(|child| child.is::<ast::Expr>());
378
379                    let iterable_offset = iterable.as_ref().map(|node| node.offset());
380                    self.check_opt_node_with(iterable, IdentContext::Ref)?;
381                    self.check_opt_node_with(pattern, IdentContext::Var)?;
382                    self.check_first_sub_expr(node.children().rev(), iterable_offset)?;
383                }
384                SyntaxKind::Closure => {
385                    let first_child = node.children().next();
386                    let current = self.stack.last_mut().unwrap().1.len();
387                    if let Some(first_child) = first_child
388                        && first_child.kind() == SyntaxKind::Ident
389                    {
390                        self.check_node_with(first_child, IdentContext::Func)?;
391                    }
392                    let body = node
393                        .children()
394                        .rev()
395                        .find(|child| child.cast::<ast::Expr>().is_some());
396                    if let Some(body) = body {
397                        let symbol = if current == self.stack.last().unwrap().1.len() {
398                            // Closure has no updated symbol stack
399                            LexicalInfo {
400                                name: "<anonymous>".into(),
401                                kind: LexicalKind::function(),
402                                range: node.range(),
403                            }
404                        } else {
405                            // Closure has a name
406                            let mut info = self.stack.last_mut().unwrap().1.pop().unwrap().info;
407                            info.range = node.range();
408                            info
409                        };
410
411                        self.stack.push((symbol, eco_vec![]));
412                        let stack_height = self.stack.len();
413
414                        self.check_node_with(body, IdentContext::Ref)?;
415                        while stack_height <= self.stack.len() {
416                            self.finish_hierarchy();
417                        }
418                    }
419                }
420                SyntaxKind::FieldAccess => {
421                    self.check_first_sub_expr(node.children(), None)?;
422                }
423                SyntaxKind::Named => {
424                    self.check_first_sub_expr(node.children().rev(), None)?;
425
426                    if self.ident_context == IdentContext::Params {
427                        let ident = node.children().find(|n| n.kind() == SyntaxKind::Ident);
428                        self.check_opt_node_with(ident, IdentContext::Var)?;
429                    }
430                }
431                kind if Self::is_plain_token(kind) => {}
432                _ => {
433                    self.check_nodes(node)?;
434                }
435            }
436        }
437
438        self.exit_node(checkpoint)?;
439
440        Some(())
441    }
442
443    /// Check a possible node with a specific context.
444    #[inline(always)]
445    fn check_opt_node_with(
446        &mut self,
447        node: Option<LinkedNode>,
448        context: IdentContext,
449    ) -> Option<()> {
450        if let Some(node) = node {
451            self.check_node_with(node, context)?;
452        }
453
454        Some(())
455    }
456
457    /// Check the first sub-expression of a node. If an offset is provided, it
458    /// only checks the sub-expression if it starts after the offset.
459    fn check_first_sub_expr<'a>(
460        &mut self,
461        mut nodes: impl Iterator<Item = LinkedNode<'a>>,
462        after_offset: Option<usize>,
463    ) -> Option<()> {
464        let body = nodes.find(|n| n.is::<ast::Expr>());
465        if let Some(body) = body {
466            if after_offset.is_some_and(|offset| offset >= body.offset()) {
467                return Some(());
468            }
469            self.check_node_with(body, IdentContext::Ref)?;
470        }
471
472        Some(())
473    }
474
475    /// Check a node with a specific context.
476    fn check_node_with(&mut self, node: LinkedNode, context: IdentContext) -> Option<()> {
477        let parent_context = self.ident_context;
478        self.ident_context = context;
479
480        let res = self.check_node(node);
481
482        self.ident_context = parent_context;
483        res
484    }
485
486    /// Get symbol for a leaf node of a valid type, or `None` if the node is an
487    /// invalid type.
488    #[allow(deprecated)]
489    fn get_ident(&self, node: &LinkedNode) -> Option<Option<LexicalInfo>> {
490        let (name, kind) = match node.kind() {
491            SyntaxKind::Label if self.sk.affect_symbol() => {
492                // filter out label in code context.
493                let prev_kind = node.prev_sibling_kind();
494                if prev_kind.is_some_and(|prev_kind| {
495                    matches!(
496                        prev_kind,
497                        SyntaxKind::LeftBracket
498                            | SyntaxKind::LeftBrace
499                            | SyntaxKind::LeftParen
500                            | SyntaxKind::Comma
501                            | SyntaxKind::Colon
502                    ) || prev_kind.is_keyword()
503                }) {
504                    return Some(None);
505                }
506                let ast_node = node.cast::<ast::Label>()?;
507                let name = ast_node.get().into();
508
509                (name, LexicalKind::label())
510            }
511            SyntaxKind::Ident if self.sk.affect_symbol() => {
512                let ast_node = node.cast::<ast::Ident>()?;
513                let name = ast_node.get().clone();
514                let kind = match self.ident_context {
515                    IdentContext::Func => LexicalKind::function(),
516                    IdentContext::Var | IdentContext::Params => LexicalKind::variable(),
517                    _ => return Some(None),
518                };
519
520                (name, kind)
521            }
522            SyntaxKind::Equation | SyntaxKind::Raw | SyntaxKind::BlockComment
523                if self.sk.affect_markup() =>
524            {
525                (EcoString::new(), LexicalKind::Block)
526            }
527            SyntaxKind::CodeBlock | SyntaxKind::ContentBlock if self.sk.affect_block() => {
528                (EcoString::new(), LexicalKind::Block)
529            }
530            SyntaxKind::Parenthesized
531            | SyntaxKind::Destructuring
532            | SyntaxKind::Args
533            | SyntaxKind::Array
534            | SyntaxKind::Dict
535                if self.sk.affect_expr() =>
536            {
537                (EcoString::new(), LexicalKind::Block)
538            }
539            SyntaxKind::Markup => {
540                let name = node.get().to_owned().into_text();
541                if name.is_empty() {
542                    return Some(None);
543                }
544                let Some(parent) = node.parent() else {
545                    return Some(None);
546                };
547                let kind = match parent.kind() {
548                    SyntaxKind::Heading if self.sk.affect_heading() => LexicalKind::Heading(
549                        parent.cast::<ast::Heading>().unwrap().depth().get() as i16,
550                    ),
551                    _ => return Some(None),
552                };
553
554                (name, kind)
555            }
556            SyntaxKind::ListItem => (EcoString::new(), LexicalKind::Block),
557            SyntaxKind::EnumItem => (EcoString::new(), LexicalKind::Block),
558            _ => return Some(None),
559        };
560
561        Some(Some(LexicalInfo {
562            name,
563            kind,
564            range: node.range(),
565        }))
566    }
567}
568
569fn finish_hierarchy(sym: LexicalInfo, curr: EcoVec<LexicalHierarchy>) -> LexicalHierarchy {
570    LexicalHierarchy {
571        info: sym,
572        children: if curr.is_empty() {
573            None
574        } else {
575            Some(LazyHash::new(curr))
576        },
577    }
578}