tinymist_world/parser/
semantic_tokens.rs1use strum::IntoEnumIterator;
4use typst::syntax::{LinkedNode, Source, SyntaxKind, ast};
5
6use super::modifier_set::ModifierSet;
7use super::typst_tokens::{Modifier, TokenType};
8
9#[derive(serde::Deserialize, serde::Serialize, Debug, Clone)]
11pub struct SemanticTokensLegend {
12 #[serde(rename = "tokenTypes")]
14 pub token_types: Vec<String>,
15 #[serde(rename = "tokenModifiers")]
17 pub token_modifiers: Vec<String>,
18}
19
20pub fn get_semantic_tokens_legend() -> SemanticTokensLegend {
22 SemanticTokensLegend {
23 token_types: TokenType::iter()
24 .map(|e| {
25 let e: &'static str = e.into();
26
27 e.to_owned()
28 })
29 .collect(),
30 token_modifiers: Modifier::iter()
31 .map(|e| {
32 let e: &'static str = e.into();
33
34 e.to_owned()
35 })
36 .collect(),
37 }
38}
39
40#[derive(Debug, Clone, Copy)]
42pub enum OffsetEncoding {
43 Utf8,
45 Utf16,
47}
48
49pub fn get_semantic_tokens_full(source: &Source, encoding: OffsetEncoding) -> Vec<SemanticToken> {
51 let root = LinkedNode::new(source.root());
52 let mut full = tokenize_tree(&root, ModifierSet::empty());
53
54 let mut init = (0, 0);
55 for token in full.iter_mut() {
56 let offset = ((token.delta_line as u64) << 32) | token.delta_start_character as u64;
58 let position = (match encoding {
59 OffsetEncoding::Utf8 => offset_to_position_utf8,
60 OffsetEncoding::Utf16 => offset_to_position_utf16,
61 })(offset as usize, source);
62 token.delta_line = position.0;
63 token.delta_start_character = position.1;
64
65 let next = (token.delta_line, token.delta_start_character);
66 token.delta_line -= init.0;
67 if token.delta_line == 0 {
68 token.delta_start_character -= init.1;
69 }
70 init = next;
71 }
72
73 full
74}
75
76fn tokenize_single_node(node: &LinkedNode, modifiers: ModifierSet) -> Option<SemanticToken> {
78 let is_leaf = node.children().next().is_none();
79
80 token_from_node(node)
81 .or_else(|| is_leaf.then_some(TokenType::Text))
82 .map(|token_type| SemanticToken::new(token_type, modifiers, node))
83}
84
85fn tokenize_tree(root: &LinkedNode<'_>, parent_modifiers: ModifierSet) -> Vec<SemanticToken> {
87 let modifiers = parent_modifiers | modifiers_from_node(root);
88
89 let token = tokenize_single_node(root, modifiers).into_iter();
90 let children = root
91 .children()
92 .flat_map(move |child| tokenize_tree(&child, modifiers));
93 token.chain(children).collect()
94}
95
96#[derive(Debug, Clone, Copy)]
98pub struct SemanticToken {
99 pub delta_line: u32,
101 pub delta_start_character: u32,
103 pub length: u32,
105 pub token_type: u32,
107 pub token_modifiers: u32,
109}
110
111impl SemanticToken {
112 fn new(token_type: TokenType, modifiers: ModifierSet, node: &LinkedNode) -> Self {
114 let source = node.get().clone().into_text();
115
116 let raw_position = node.offset() as u64;
117 let raw_position = ((raw_position >> 32) as u32, raw_position as u32);
118
119 Self {
120 token_type: token_type as u32,
121 token_modifiers: modifiers.bitset(),
122 delta_line: raw_position.0,
123 delta_start_character: raw_position.1,
124 length: source.chars().map(char::len_utf16).sum::<usize>() as u32,
125 }
126 }
127}
128
129fn modifiers_from_node(node: &LinkedNode) -> ModifierSet {
136 match node.kind() {
137 SyntaxKind::Emph => ModifierSet::new(&[Modifier::Emph]),
138 SyntaxKind::Strong => ModifierSet::new(&[Modifier::Strong]),
139 SyntaxKind::Math | SyntaxKind::Equation => ModifierSet::new(&[Modifier::Math]),
140 _ => ModifierSet::empty(),
141 }
142}
143
144fn token_from_node(node: &LinkedNode) -> Option<TokenType> {
152 use SyntaxKind::*;
153
154 match node.kind() {
155 Star if node.parent_kind() == Some(Strong) => Some(TokenType::Punctuation),
156 Star if node.parent_kind() == Some(ModuleImport) => Some(TokenType::Operator),
157
158 Underscore if node.parent_kind() == Some(Emph) => Some(TokenType::Punctuation),
159 Underscore if node.parent_kind() == Some(MathAttach) => Some(TokenType::Operator),
160
161 MathIdent | Ident => Some(token_from_ident(node)),
162 Hash => token_from_hashtag(node),
163
164 LeftBrace | RightBrace | LeftBracket | RightBracket | LeftParen | RightParen | Comma
165 | Semicolon | Colon => Some(TokenType::Punctuation),
166 Linebreak | Escape | Shorthand => Some(TokenType::Escape),
167 Link => Some(TokenType::Link),
168 Raw => Some(TokenType::Raw),
169 Label => Some(TokenType::Label),
170 RefMarker => Some(TokenType::Ref),
171 Heading | HeadingMarker => Some(TokenType::Heading),
172 ListMarker | EnumMarker | TermMarker => Some(TokenType::ListMarker),
173 MathAlignPoint | Plus | Minus | Slash | Hat | Dot | Eq | EqEq | ExclEq | Lt | LtEq | Gt
174 | GtEq | PlusEq | HyphEq | StarEq | SlashEq | Dots | Arrow | Not | And | Or => {
175 Some(TokenType::Operator)
176 }
177 Dollar => Some(TokenType::Delimiter),
178 None | Auto | Let | Show | If | Else | For | In | While | Break | Continue | Return
179 | Import | Include | As | Set => Some(TokenType::Keyword),
180 Bool => Some(TokenType::Bool),
181 Int | Float | Numeric => Some(TokenType::Number),
182 Str => Some(TokenType::String),
183 LineComment | BlockComment => Some(TokenType::Comment),
184 Error => Some(TokenType::Error),
185
186 _ => Option::None,
188 }
189}
190
191fn is_function_ident(ident: &LinkedNode) -> bool {
195 let Some(next) = ident.next_leaf() else {
196 return false;
197 };
198 let function_call = matches!(next.kind(), SyntaxKind::LeftParen)
199 && matches!(
200 next.parent_kind(),
201 Some(SyntaxKind::Args | SyntaxKind::Params)
202 );
203 let function_content = matches!(next.kind(), SyntaxKind::LeftBracket)
204 && matches!(next.parent_kind(), Some(SyntaxKind::ContentBlock));
205 function_call || function_content
206}
207
208fn token_from_ident(ident: &LinkedNode) -> TokenType {
210 if is_function_ident(ident) {
211 TokenType::Function
212 } else {
213 TokenType::Interpolated
214 }
215}
216
217fn get_expr_following_hashtag<'a>(hashtag: &LinkedNode<'a>) -> Option<LinkedNode<'a>> {
219 hashtag
220 .next_sibling()
221 .filter(|next| next.cast::<ast::Expr>().is_some_and(|expr| expr.hash()))
222 .and_then(|node| node.leftmost_leaf())
223}
224
225fn token_from_hashtag(hashtag: &LinkedNode) -> Option<TokenType> {
227 get_expr_following_hashtag(hashtag)
228 .as_ref()
229 .and_then(token_from_node)
230}
231
232fn offset_to_position_utf8(typst_offset: usize, typst_source: &Source) -> (u32, u32) {
234 let line_index = typst_source.byte_to_line(typst_offset).unwrap();
235 let column_index = typst_source.byte_to_column(typst_offset).unwrap();
236
237 (line_index as u32, column_index as u32)
238}
239
240fn offset_to_position_utf16(typst_offset: usize, typst_source: &Source) -> (u32, u32) {
242 let line_index = typst_source.byte_to_line(typst_offset).unwrap();
243
244 let lsp_line = line_index as u32;
245
246 let utf16_offset = typst_source.byte_to_utf16(typst_offset).unwrap();
254
255 let byte_line_offset = typst_source.line_to_byte(line_index).unwrap();
256 let utf16_line_offset = typst_source.byte_to_utf16(byte_line_offset).unwrap();
257
258 let utf16_column_offset = utf16_offset - utf16_line_offset;
259 let lsp_column = utf16_column_offset;
260
261 (lsp_line, lsp_column as u32)
262}