tinymist_query/syntax/
lexical_hierarchy.rs

1use std::ops::{Deref, Range};
2
3use ecow::{EcoString, EcoVec, eco_vec};
4use lsp_types::SymbolKind;
5use serde::{Deserialize, Serialize};
6use typst::syntax::{
7    LinkedNode, Source, SyntaxKind,
8    ast::{self},
9};
10use typst_shim::utils::LazyHash;
11
12use super::{CommentGroupMatcher, is_mark};
13
14#[typst_macros::time(span = source.root().span())]
15pub(crate) fn get_lexical_hierarchy(
16    source: &Source,
17    scope_kind: LexicalScopeKind,
18) -> Option<EcoVec<LexicalHierarchy>> {
19    let start = tinymist_std::time::Instant::now();
20    let root = LinkedNode::new(source.root());
21
22    let mut worker = LexicalHierarchyWorker {
23        sk: scope_kind,
24        ..LexicalHierarchyWorker::default()
25    };
26    worker.stack.push((
27        LexicalInfo {
28            name: "deadbeef".into(),
29            kind: LexicalKind::Heading(-1),
30            range: 0..0,
31        },
32        eco_vec![],
33    ));
34    let res = match worker.check_node(root) {
35        Some(()) => Some(()),
36        None => {
37            log::error!("lexical hierarchy analysis failed");
38            None
39        }
40    };
41
42    while worker.stack.len() > 1 {
43        worker.finish_hierarchy();
44    }
45
46    crate::log_debug_ct!("lexical hierarchy analysis took {:?}", start.elapsed());
47    res.map(|_| worker.stack.pop().unwrap().1)
48}
49
50/// The kind of a variable.
51#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
52pub enum LexicalVarKind {
53    /// A value reference.
54    /// `#foo`
55    ///   ^^^
56    ValRef,
57    /// A label reference.
58    /// `@foo`
59    ///   ^^^
60    LabelRef,
61    /// A label.
62    /// `<foo>`
63    ///   ^^^
64    Label,
65    /// A bib key.
66    /// `x:`
67    ///  ^^
68    BibKey,
69    /// A variable.
70    /// `let foo`
71    ///      ^^^
72    Variable,
73    /// A function.
74    /// `let foo()`
75    ///      ^^^
76    Function,
77}
78
79/// The kind of a lexical hierarchy recogized by the analyzers.
80#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
81pub enum LexicalKind {
82    /// A heading.
83    Heading(i16),
84    /// A variable.
85    Var(LexicalVarKind),
86    /// A block.
87    Block,
88    /// A comment group.
89    CommentGroup,
90}
91
92impl LexicalKind {
93    /// Creates a label.
94    const fn label() -> LexicalKind {
95        LexicalKind::Var(LexicalVarKind::Label)
96    }
97
98    /// Creates a function.
99    const fn function() -> LexicalKind {
100        LexicalKind::Var(LexicalVarKind::Function)
101    }
102
103    /// Creates a variable.
104    const fn variable() -> LexicalKind {
105        LexicalKind::Var(LexicalVarKind::Variable)
106    }
107
108    /// Checks if the kind is a valid LSP symbol.
109    pub fn is_valid_lsp_symbol(&self) -> bool {
110        !matches!(self, LexicalKind::Block | LexicalKind::CommentGroup)
111    }
112}
113
114impl From<LexicalKind> for SymbolKind {
115    fn from(value: LexicalKind) -> Self {
116        use LexicalVarKind::*;
117        match value {
118            LexicalKind::Heading(..) => SymbolKind::NAMESPACE,
119            LexicalKind::Var(ValRef | Variable) => SymbolKind::VARIABLE,
120            LexicalKind::Var(Function) => SymbolKind::FUNCTION,
121            LexicalKind::Var(LabelRef | Label | BibKey) => SymbolKind::CONSTANT,
122            LexicalKind::Block | LexicalKind::CommentGroup => SymbolKind::CONSTANT,
123        }
124    }
125}
126
127#[derive(Debug, Clone, Copy, Hash, Default, PartialEq, Eq)]
128pub(crate) enum LexicalScopeKind {
129    #[default]
130    Symbol,
131    Braced,
132}
133
134impl LexicalScopeKind {
135    fn affect_symbol(&self) -> bool {
136        matches!(self, Self::Symbol)
137    }
138
139    fn affect_markup(&self) -> bool {
140        matches!(self, Self::Braced)
141    }
142
143    fn affect_block(&self) -> bool {
144        matches!(self, Self::Braced)
145    }
146
147    fn affect_expr(&self) -> bool {
148        matches!(self, Self::Braced)
149    }
150
151    fn affect_heading(&self) -> bool {
152        matches!(self, Self::Symbol | Self::Braced)
153    }
154}
155
156#[derive(Debug, Clone, Hash)]
157pub(crate) struct LexicalInfo {
158    pub name: EcoString,
159    pub kind: LexicalKind,
160    pub range: Range<usize>,
161}
162
163#[derive(Debug, Clone, Hash)]
164pub(crate) struct LexicalHierarchy {
165    pub info: LexicalInfo,
166    pub children: Option<LazyHash<EcoVec<LexicalHierarchy>>>,
167}
168
169impl Serialize for LexicalHierarchy {
170    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
171        use serde::ser::SerializeStruct;
172        let mut state = serializer.serialize_struct("LexicalHierarchy", 2)?;
173        state.serialize_field("name", &self.info.name)?;
174        state.serialize_field("kind", &self.info.kind)?;
175        state.serialize_field("range", &self.info.range)?;
176        if let Some(children) = &self.children {
177            state.serialize_field("children", children.deref())?;
178        }
179        state.end()
180    }
181}
182
183impl<'de> Deserialize<'de> for LexicalHierarchy {
184    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
185        use serde::de::MapAccess;
186        struct LexicalHierarchyVisitor;
187        impl<'de> serde::de::Visitor<'de> for LexicalHierarchyVisitor {
188            type Value = LexicalHierarchy;
189
190            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
191                formatter.write_str("a struct")
192            }
193
194            fn visit_map<A: MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
195                let mut name = None;
196                let mut kind = None;
197                let mut range = None;
198                let mut children = None;
199                while let Some(key) = map.next_key()? {
200                    match key {
201                        "name" => name = Some(map.next_value()?),
202                        "kind" => kind = Some(map.next_value()?),
203                        "range" => range = Some(map.next_value()?),
204                        "children" => children = Some(map.next_value()?),
205                        _ => {}
206                    }
207                }
208                let name = name.ok_or_else(|| serde::de::Error::missing_field("name"))?;
209                let kind = kind.ok_or_else(|| serde::de::Error::missing_field("kind"))?;
210                let range = range.ok_or_else(|| serde::de::Error::missing_field("range"))?;
211                Ok(LexicalHierarchy {
212                    info: LexicalInfo { name, kind, range },
213                    children: children.map(LazyHash::new),
214                })
215            }
216        }
217
218        deserializer.deserialize_struct(
219            "LexicalHierarchy",
220            &["name", "kind", "range", "children"],
221            LexicalHierarchyVisitor,
222        )
223    }
224}
225
226#[derive(Debug, Clone, Copy, Hash, Default, PartialEq, Eq)]
227enum IdentContext {
228    #[default]
229    Ref,
230    Func,
231    Var,
232    Params,
233}
234
235#[derive(Default)]
236struct LexicalHierarchyWorker {
237    sk: LexicalScopeKind,
238    stack: Vec<(LexicalInfo, EcoVec<LexicalHierarchy>)>,
239    ident_context: IdentContext,
240}
241
242impl LexicalHierarchyWorker {
243    fn is_plain_token(kind: SyntaxKind) -> bool {
244        kind.is_trivia() || kind.is_keyword() || is_mark(kind) || kind.is_error()
245    }
246
247    /// Finish the current top of the stack.
248    fn finish_hierarchy(&mut self) {
249        let (symbol, children) = self.stack.pop().unwrap();
250        let current = &mut self.stack.last_mut().unwrap().1;
251
252        current.push(finish_hierarchy(symbol, children));
253    }
254
255    /// Enter a node and setup the context.
256    fn enter_node(&mut self, node: &LinkedNode) -> Option<IdentContext> {
257        let checkpoint = self.ident_context;
258        match node.kind() {
259            SyntaxKind::RefMarker => self.ident_context = IdentContext::Ref,
260            SyntaxKind::LetBinding => self.ident_context = IdentContext::Ref,
261            SyntaxKind::Closure => self.ident_context = IdentContext::Func,
262            SyntaxKind::Params => self.ident_context = IdentContext::Params,
263            _ => {}
264        }
265
266        Some(checkpoint)
267    }
268
269    /// Exit a node and restore the context.
270    fn exit_node(&mut self, checkpoint: IdentContext) -> Option<()> {
271        self.ident_context = checkpoint;
272        Some(())
273    }
274
275    /// Check nodes in a list recursively.
276    fn check_nodes(&mut self, node: LinkedNode) -> Option<()> {
277        let mut group_matcher = CommentGroupMatcher::default();
278        let mut comment_range: Option<Range<usize>> = None;
279        for child in node.children() {
280            match group_matcher.process(&child) {
281                super::CommentGroupSignal::Space => {}
282                super::CommentGroupSignal::LineComment
283                | super::CommentGroupSignal::BlockComment => {
284                    let child_range = child.range();
285                    match comment_range {
286                        Some(ref mut comment_range) => comment_range.end = child_range.end,
287                        None => comment_range = Some(child_range),
288                    }
289                }
290                super::CommentGroupSignal::Hash | super::CommentGroupSignal::BreakGroup => {
291                    if let Some(comment_range) = comment_range.take() {
292                        self.stack.push((
293                            LexicalInfo {
294                                name: "".into(),
295                                kind: LexicalKind::CommentGroup,
296                                range: comment_range,
297                            },
298                            eco_vec![],
299                        ));
300                        // Push the lexical node to the children of the current top of the stack.
301                        self.finish_hierarchy();
302                    }
303
304                    if !Self::is_plain_token(child.kind()) {
305                        self.check_node(child)?;
306                    }
307                }
308            }
309        }
310
311        Some(())
312    }
313
314    /// Check lexical hierarchy a node recursively.
315    fn check_node(&mut self, node: LinkedNode) -> Option<()> {
316        let own_symbol = self.get_ident(&node)?;
317
318        let checkpoint = self.enter_node(&node)?;
319
320        if let Some(symbol) = own_symbol {
321            if let LexicalKind::Heading(level) = symbol.kind {
322                'heading_break: while let Some((w, _)) = self.stack.last() {
323                    match w.kind {
324                        LexicalKind::Heading(lvl) if lvl < level => break 'heading_break,
325                        LexicalKind::Block => break 'heading_break,
326                        _ if self.stack.len() <= 1 => break 'heading_break,
327                        _ => {}
328                    }
329
330                    self.finish_hierarchy();
331                }
332            }
333            let is_heading = matches!(symbol.kind, LexicalKind::Heading(..));
334
335            self.stack.push((symbol, eco_vec![]));
336            let stack_height = self.stack.len();
337
338            if node.kind() != SyntaxKind::ModuleImport {
339                self.check_nodes(node)?;
340            }
341
342            if is_heading {
343                while stack_height < self.stack.len() {
344                    self.finish_hierarchy();
345                }
346            } else {
347                while stack_height <= self.stack.len() {
348                    self.finish_hierarchy();
349                }
350            }
351        } else {
352            // todo: for loop variable
353            match node.kind() {
354                SyntaxKind::LetBinding => 'let_binding: {
355                    let pattern = node.children().find(|n| n.cast::<ast::Pattern>().is_some());
356
357                    if let Some(name) = &pattern {
358                        let pat = name.cast::<ast::Pattern>().unwrap();
359
360                        // special case: it will then match SyntaxKind::Closure in the inner looking
361                        // up.
362                        if matches!(pat, ast::Pattern::Normal(ast::Expr::Closure(..))) {
363                            let closure = name.clone();
364                            self.check_node_with(closure, IdentContext::Ref)?;
365                            break 'let_binding;
366                        }
367                    }
368
369                    // reverse order for correct symbol affection
370                    let name_offset = pattern.as_ref().map(|node| node.offset());
371                    self.check_opt_node_with(pattern, IdentContext::Var)?;
372                    self.check_first_sub_expr(node.children().rev(), name_offset)?;
373                }
374                SyntaxKind::ForLoop => {
375                    let pattern = node.children().find(|child| child.is::<ast::Pattern>());
376                    let iterable = node
377                        .children()
378                        .skip_while(|child| child.kind() != SyntaxKind::In)
379                        .find(|child| child.is::<ast::Expr>());
380
381                    let iterable_offset = iterable.as_ref().map(|node| node.offset());
382                    self.check_opt_node_with(iterable, IdentContext::Ref)?;
383                    self.check_opt_node_with(pattern, IdentContext::Var)?;
384                    self.check_first_sub_expr(node.children().rev(), iterable_offset)?;
385                }
386                SyntaxKind::Closure => {
387                    let first_child = node.children().next();
388                    let current = self.stack.last_mut().unwrap().1.len();
389                    if let Some(first_child) = first_child
390                        && first_child.kind() == SyntaxKind::Ident
391                    {
392                        self.check_node_with(first_child, IdentContext::Func)?;
393                    }
394                    let body = node
395                        .children()
396                        .rev()
397                        .find(|child| child.cast::<ast::Expr>().is_some());
398                    if let Some(body) = body {
399                        let symbol = if current == self.stack.last().unwrap().1.len() {
400                            // Closure has no updated symbol stack
401                            LexicalInfo {
402                                name: "<anonymous>".into(),
403                                kind: LexicalKind::function(),
404                                range: node.range(),
405                            }
406                        } else {
407                            // Closure has a name
408                            let mut info = self.stack.last_mut().unwrap().1.pop().unwrap().info;
409                            info.range = node.range();
410                            info
411                        };
412
413                        self.stack.push((symbol, eco_vec![]));
414                        let stack_height = self.stack.len();
415
416                        self.check_node_with(body, IdentContext::Ref)?;
417                        while stack_height <= self.stack.len() {
418                            self.finish_hierarchy();
419                        }
420                    }
421                }
422                SyntaxKind::FieldAccess => {
423                    self.check_first_sub_expr(node.children(), None)?;
424                }
425                SyntaxKind::Named => {
426                    self.check_first_sub_expr(node.children().rev(), None)?;
427
428                    if self.ident_context == IdentContext::Params {
429                        let ident = node.children().find(|n| n.kind() == SyntaxKind::Ident);
430                        self.check_opt_node_with(ident, IdentContext::Var)?;
431                    }
432                }
433                kind if Self::is_plain_token(kind) => {}
434                _ => {
435                    self.check_nodes(node)?;
436                }
437            }
438        }
439
440        self.exit_node(checkpoint)?;
441
442        Some(())
443    }
444
445    /// Check a possible node with a specific context.
446    #[inline(always)]
447    fn check_opt_node_with(
448        &mut self,
449        node: Option<LinkedNode>,
450        context: IdentContext,
451    ) -> Option<()> {
452        if let Some(node) = node {
453            self.check_node_with(node, context)?;
454        }
455
456        Some(())
457    }
458
459    /// Check the first sub-expression of a node. If an offset is provided, it
460    /// only checks the sub-expression if it starts after the offset.
461    fn check_first_sub_expr<'a>(
462        &mut self,
463        mut nodes: impl Iterator<Item = LinkedNode<'a>>,
464        after_offset: Option<usize>,
465    ) -> Option<()> {
466        let body = nodes.find(|n| n.is::<ast::Expr>());
467        if let Some(body) = body {
468            if after_offset.is_some_and(|offset| offset >= body.offset()) {
469                return Some(());
470            }
471            self.check_node_with(body, IdentContext::Ref)?;
472        }
473
474        Some(())
475    }
476
477    /// Check a node with a specific context.
478    fn check_node_with(&mut self, node: LinkedNode, context: IdentContext) -> Option<()> {
479        let parent_context = self.ident_context;
480        self.ident_context = context;
481
482        let res = self.check_node(node);
483
484        self.ident_context = parent_context;
485        res
486    }
487
488    /// Get symbol for a leaf node of a valid type, or `None` if the node is an
489    /// invalid type.
490    #[allow(deprecated)]
491    fn get_ident(&self, node: &LinkedNode) -> Option<Option<LexicalInfo>> {
492        let (name, kind) = match node.kind() {
493            SyntaxKind::Label if self.sk.affect_symbol() => {
494                // filter out label in code context.
495                let prev_kind = node.prev_sibling_kind();
496                if prev_kind.is_some_and(|prev_kind| {
497                    matches!(
498                        prev_kind,
499                        SyntaxKind::LeftBracket
500                            | SyntaxKind::LeftBrace
501                            | SyntaxKind::LeftParen
502                            | SyntaxKind::Comma
503                            | SyntaxKind::Colon
504                    ) || prev_kind.is_keyword()
505                }) {
506                    return Some(None);
507                }
508                let ast_node = node.cast::<ast::Label>()?;
509                let name = ast_node.get().into();
510
511                (name, LexicalKind::label())
512            }
513            SyntaxKind::Ident if self.sk.affect_symbol() => {
514                let ast_node = node.cast::<ast::Ident>()?;
515                let name = ast_node.get().clone();
516                let kind = match self.ident_context {
517                    IdentContext::Func => LexicalKind::function(),
518                    IdentContext::Var | IdentContext::Params => LexicalKind::variable(),
519                    _ => return Some(None),
520                };
521
522                (name, kind)
523            }
524            SyntaxKind::Equation | SyntaxKind::Raw | SyntaxKind::BlockComment
525                if self.sk.affect_markup() =>
526            {
527                (EcoString::new(), LexicalKind::Block)
528            }
529            SyntaxKind::CodeBlock | SyntaxKind::ContentBlock if self.sk.affect_block() => {
530                (EcoString::new(), LexicalKind::Block)
531            }
532            SyntaxKind::Parenthesized
533            | SyntaxKind::Destructuring
534            | SyntaxKind::Args
535            | SyntaxKind::Array
536            | SyntaxKind::Dict
537                if self.sk.affect_expr() =>
538            {
539                (EcoString::new(), LexicalKind::Block)
540            }
541            SyntaxKind::Markup => {
542                let name = node.get().to_owned().into_text();
543                if name.is_empty() {
544                    return Some(None);
545                }
546                let Some(parent) = node.parent() else {
547                    return Some(None);
548                };
549                let kind = match parent.kind() {
550                    SyntaxKind::Heading if self.sk.affect_heading() => LexicalKind::Heading(
551                        parent.cast::<ast::Heading>().unwrap().depth().get() as i16,
552                    ),
553                    _ => return Some(None),
554                };
555
556                (name, kind)
557            }
558            SyntaxKind::ListItem => (EcoString::new(), LexicalKind::Block),
559            SyntaxKind::EnumItem => (EcoString::new(), LexicalKind::Block),
560            _ => return Some(None),
561        };
562
563        Some(Some(LexicalInfo {
564            name,
565            kind,
566            range: node.range(),
567        }))
568    }
569}
570
571fn finish_hierarchy(sym: LexicalInfo, curr: EcoVec<LexicalHierarchy>) -> LexicalHierarchy {
572    LexicalHierarchy {
573        info: sym,
574        children: if curr.is_empty() {
575            None
576        } else {
577            Some(LazyHash::new(curr))
578        },
579    }
580}