1use std::collections::HashMap;
4use std::{
5 num::NonZeroUsize,
6 ops::Range,
7 path::Path,
8 sync::{Arc, OnceLock},
9};
10
11use lsp_types::SemanticToken;
12use lsp_types::{SemanticTokenModifier, SemanticTokenType};
13use parking_lot::Mutex;
14use strum::EnumIter;
15use tinymist_std::ImmutPath;
16use typst::syntax::{LinkedNode, Source, SyntaxKind, ast};
17
18use crate::{
19 LocalContext, LspPosition, PositionEncoding,
20 adt::revision::{RevisionLock, RevisionManager, RevisionManagerLike, RevisionSlot},
21 syntax::{Expr, ExprInfo},
22 ty::Ty,
23};
24
25pub type SemanticTokens = Arc<Vec<SemanticToken>>;
27
28#[typst_macros::time(span = source.root().span())]
30pub(crate) fn get_semantic_tokens(ctx: &mut LocalContext, source: &Source) -> SemanticTokens {
31 let mut tokenizer = Tokenizer::new(
32 source.clone(),
33 ctx.expr_stage(source),
34 ctx.analysis.allow_multiline_token,
35 ctx.analysis.position_encoding,
36 );
37 tokenizer.tokenize_tree(&LinkedNode::new(source.root()), ModifierSet::empty());
38 SemanticTokens::new(tokenizer.output)
39}
40
41#[derive(Default)]
43pub struct SemanticTokenCache {
44 next_id: usize,
45 manager: HashMap<ImmutPath, RevisionManager<OnceLock<SemanticTokens>>>,
47}
48
49impl SemanticTokenCache {
50 pub(crate) fn clear(&mut self) {
51 self.next_id = 0;
52 self.manager.clear();
53 }
54
55 pub(crate) fn acquire(
57 cache: Arc<Mutex<Self>>,
58 path: &Path,
59 prev: Option<&str>,
60 ) -> SemanticTokenContext {
61 let that = cache.clone();
62 let mut that = that.lock();
63
64 that.next_id += 1;
65 let prev = prev.and_then(|id| {
66 id.parse::<NonZeroUsize>()
67 .inspect_err(|_| {
68 log::warn!("invalid previous id: {id}");
69 })
70 .ok()
71 });
72 let next = NonZeroUsize::new(that.next_id).expect("id overflow");
73
74 let path = ImmutPath::from(path);
75 let manager = that.manager.entry(path.clone()).or_default();
76 let _rev_lock = manager.lock(prev.unwrap_or(next));
77 let prev = prev.and_then(|prev| {
78 manager
79 .find_revision(prev, |_| OnceLock::new())
80 .data
81 .get()
82 .cloned()
83 });
84 let next = manager.find_revision(next, |_| OnceLock::new());
85
86 SemanticTokenContext {
87 _rev_lock,
88 cache,
89 path,
90 prev,
91 next,
92 }
93 }
94}
95
96pub(crate) struct SemanticTokenContext {
98 _rev_lock: RevisionLock,
99 cache: Arc<Mutex<SemanticTokenCache>>,
100 path: ImmutPath,
101 pub prev: Option<SemanticTokens>,
102 pub next: Arc<RevisionSlot<OnceLock<SemanticTokens>>>,
103}
104
105impl Drop for SemanticTokenContext {
106 fn drop(&mut self) {
107 let mut cache = self.cache.lock();
108 let manager = cache.manager.get_mut(&self.path);
109 if let Some(manager) = manager {
110 let min_rev = manager.unlock(&mut self._rev_lock);
111 if let Some(min_rev) = min_rev {
112 manager.gc(min_rev);
113 }
114 }
115 }
116}
117
118const BOOL: SemanticTokenType = SemanticTokenType::new("bool");
119const PUNCTUATION: SemanticTokenType = SemanticTokenType::new("punct");
120const ESCAPE: SemanticTokenType = SemanticTokenType::new("escape");
121const LINK: SemanticTokenType = SemanticTokenType::new("link");
122const RAW: SemanticTokenType = SemanticTokenType::new("raw");
123const LABEL: SemanticTokenType = SemanticTokenType::new("label");
124const REF: SemanticTokenType = SemanticTokenType::new("ref");
125const HEADING: SemanticTokenType = SemanticTokenType::new("heading");
126const LIST_MARKER: SemanticTokenType = SemanticTokenType::new("marker");
127const LIST_TERM: SemanticTokenType = SemanticTokenType::new("term");
128const DELIMITER: SemanticTokenType = SemanticTokenType::new("delim");
129const INTERPOLATED: SemanticTokenType = SemanticTokenType::new("pol");
130const ERROR: SemanticTokenType = SemanticTokenType::new("error");
131const TEXT: SemanticTokenType = SemanticTokenType::new("text");
132
133#[derive(Clone, Copy, Eq, PartialEq, EnumIter, Default)]
136#[repr(u32)]
137pub enum TokenType {
138 Comment,
141 String,
143 Keyword,
145 Operator,
147 Number,
149 Function,
151 Decorator,
153 Type,
155 Namespace,
157 Bool,
160 Punctuation,
162 Escape,
164 Link,
166 Raw,
168 Label,
170 Ref,
172 Heading,
174 ListMarker,
176 ListTerm,
178 Delimiter,
180 Interpolated,
182 Error,
184 Text,
191 #[default]
193 None,
194}
195
196impl From<TokenType> for SemanticTokenType {
197 fn from(token_type: TokenType) -> Self {
198 use TokenType::*;
199
200 match token_type {
201 Comment => Self::COMMENT,
202 String => Self::STRING,
203 Keyword => Self::KEYWORD,
204 Operator => Self::OPERATOR,
205 Number => Self::NUMBER,
206 Function => Self::FUNCTION,
207 Decorator => Self::DECORATOR,
208 Type => Self::TYPE,
209 Namespace => Self::NAMESPACE,
210 Bool => BOOL,
211 Punctuation => PUNCTUATION,
212 Escape => ESCAPE,
213 Link => LINK,
214 Raw => RAW,
215 Label => LABEL,
216 Ref => REF,
217 Heading => HEADING,
218 ListMarker => LIST_MARKER,
219 ListTerm => LIST_TERM,
220 Delimiter => DELIMITER,
221 Interpolated => INTERPOLATED,
222 Error => ERROR,
223 Text => TEXT,
224 None => unreachable!(),
225 }
226 }
227}
228
229const STRONG: SemanticTokenModifier = SemanticTokenModifier::new("strong");
230const EMPH: SemanticTokenModifier = SemanticTokenModifier::new("emph");
231const MATH: SemanticTokenModifier = SemanticTokenModifier::new("math");
232
233#[derive(Clone, Copy, EnumIter)]
235#[repr(u8)]
236pub enum Modifier {
237 Strong,
239 Emph,
241 Math,
243 ReadOnly,
245 Static,
247 DefaultLibrary,
249}
250
251impl Modifier {
252 pub const fn index(self) -> u8 {
254 self as u8
255 }
256
257 pub const fn bitmask(self) -> u32 {
259 0b1 << self.index()
260 }
261}
262
263impl From<Modifier> for SemanticTokenModifier {
264 fn from(modifier: Modifier) -> Self {
265 use Modifier::*;
266
267 match modifier {
268 Strong => STRONG,
269 Emph => EMPH,
270 Math => MATH,
271 ReadOnly => Self::READONLY,
272 Static => Self::STATIC,
273 DefaultLibrary => Self::DEFAULT_LIBRARY,
274 }
275 }
276}
277
278#[derive(Default, Clone, Copy)]
279pub(crate) struct ModifierSet(u32);
280
281impl ModifierSet {
282 pub fn empty() -> Self {
283 Self::default()
284 }
285
286 pub fn new(modifiers: &[Modifier]) -> Self {
287 let bits = modifiers
288 .iter()
289 .copied()
290 .map(Modifier::bitmask)
291 .fold(0, |bits, mask| bits | mask);
292 Self(bits)
293 }
294
295 pub fn bitset(self) -> u32 {
296 self.0
297 }
298}
299
300impl std::ops::BitOr for ModifierSet {
301 type Output = Self;
302
303 fn bitor(self, rhs: Self) -> Self::Output {
304 Self(self.0 | rhs.0)
305 }
306}
307
308pub(crate) struct Tokenizer {
309 curr_pos: LspPosition,
310 pos_offset: usize,
311 output: Vec<SemanticToken>,
312 source: Source,
313 ei: ExprInfo,
314 encoding: PositionEncoding,
315
316 allow_multiline_token: bool,
317
318 token: Option<Token>,
319}
320
321impl Tokenizer {
322 pub fn new(
323 source: Source,
324 ei: ExprInfo,
325 allow_multiline_token: bool,
326 encoding: PositionEncoding,
327 ) -> Self {
328 Self {
329 curr_pos: LspPosition::new(0, 0),
330 pos_offset: 0,
331 output: Vec::new(),
332 source,
333 ei,
334 allow_multiline_token,
335 encoding,
336
337 token: None,
338 }
339 }
340
341 fn tokenize_tree(&mut self, root: &LinkedNode, modifiers: ModifierSet) {
343 let is_leaf = root.get().children().len() == 0;
344 let mut modifiers = modifiers | modifiers_from_node(root);
345
346 let range = root.range();
347 let mut token = token_from_node(&self.ei, root, &mut modifiers)
348 .or_else(|| is_leaf.then_some(TokenType::Text))
349 .map(|token_type| Token::new(token_type, modifiers, range.clone()));
350
351 if let Some(prev_token) = self.token.as_mut()
353 && !prev_token.range.is_empty()
354 && prev_token.range.start < range.start
355 {
356 let end = prev_token.range.end.min(range.start);
357 let sliced = Token {
358 token_type: prev_token.token_type,
359 modifiers: prev_token.modifiers,
360 range: prev_token.range.start..end,
361 };
362 prev_token.range.start = end;
364 self.push(sliced);
365 }
366
367 if !is_leaf {
368 std::mem::swap(&mut self.token, &mut token);
369 for child in root.children() {
370 self.tokenize_tree(&child, modifiers);
371 }
372 std::mem::swap(&mut self.token, &mut token);
373 }
374
375 if let Some(token) = token.clone()
377 && !token.range.is_empty()
378 {
379 if let Some(prev_token) = self.token.as_mut() {
381 prev_token.range.start = token.range.end;
382 }
383 self.push(token);
384 }
385 }
386
387 fn push(&mut self, token: Token) {
388 let Token {
389 token_type,
390 modifiers,
391 range,
392 } = token;
393
394 use crate::lsp_typst_boundary;
395 use lsp_types::Position;
396 let utf8_start = range.start;
397 if self.pos_offset > utf8_start {
398 return;
399 }
400
401 let source_len = self.source.text().len();
403 let utf8_end = (range.end).min(source_len);
404 self.pos_offset = utf8_start;
405 if utf8_end <= utf8_start || utf8_start > source_len {
406 return;
407 }
408
409 let position = lsp_typst_boundary::to_lsp_position(utf8_start, self.encoding, &self.source);
410
411 let delta = self.curr_pos.delta(&position);
412
413 let encode_length = |s, t| {
414 match self.encoding {
415 PositionEncoding::Utf8 => t - s,
416 PositionEncoding::Utf16 => {
417 let utf16_start = self.source.byte_to_utf16(s).unwrap();
419 let utf16_end = self.source.byte_to_utf16(t).unwrap();
420 utf16_end - utf16_start
421 }
422 }
423 };
424
425 if self.allow_multiline_token {
426 self.output.push(SemanticToken {
427 delta_line: delta.delta_line,
428 delta_start: delta.delta_start,
429 length: encode_length(utf8_start, utf8_end) as u32,
430 token_type: token_type as u32,
431 token_modifiers_bitset: modifiers.bitset(),
432 });
433 self.curr_pos = position;
434 } else {
435 let final_line = self
436 .source
437 .byte_to_line(utf8_end)
438 .unwrap_or_else(|| self.source.len_lines()) as u32;
439 let next_offset = self
440 .source
441 .line_to_byte((self.curr_pos.line + 1) as usize)
442 .unwrap_or(source_len);
443 let inline_length = encode_length(utf8_start, utf8_end.min(next_offset)) as u32;
444 if inline_length != 0 {
445 self.output.push(SemanticToken {
446 delta_line: delta.delta_line,
447 delta_start: delta.delta_start,
448 length: inline_length,
449 token_type: token_type as u32,
450 token_modifiers_bitset: modifiers.bitset(),
451 });
452 self.curr_pos = position;
453 }
454 if self.curr_pos.line >= final_line {
455 return;
456 }
457
458 let mut utf8_cursor = next_offset;
459 let mut delta_line = 0;
460 for line in self.curr_pos.line + 1..=final_line {
461 let next_offset = if line == final_line {
462 utf8_end
463 } else {
464 self.source
465 .line_to_byte((line + 1) as usize)
466 .unwrap_or(source_len)
467 };
468
469 if utf8_cursor < next_offset {
470 let inline_length = encode_length(utf8_cursor, next_offset) as u32;
471 self.output.push(SemanticToken {
472 delta_line: delta_line + 1,
473 delta_start: 0,
474 length: inline_length,
475 token_type: token_type as u32,
476 token_modifiers_bitset: modifiers.bitset(),
477 });
478 delta_line = 0;
479 self.curr_pos.character = 0;
480 } else {
481 delta_line += 1;
482 }
483 self.pos_offset = utf8_cursor;
484 utf8_cursor = next_offset;
485 }
486 self.curr_pos.line = final_line - delta_line;
487 }
488
489 pub trait PositionExt {
490 fn delta(&self, to: &Self) -> PositionDelta;
491 }
492
493 impl PositionExt for Position {
494 fn delta(&self, to: &Self) -> PositionDelta {
500 let line_delta = to.line - self.line;
501 let char_delta = if line_delta == 0 {
502 to.character - self.character
503 } else {
504 to.character
505 };
506
507 PositionDelta {
508 delta_line: line_delta,
509 delta_start: char_delta,
510 }
511 }
512 }
513
514 #[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Copy, Clone, Default)]
515 pub struct PositionDelta {
516 pub delta_line: u32,
517 pub delta_start: u32,
518 }
519 }
520}
521
522#[derive(Clone, Default)]
523struct Token {
524 pub token_type: TokenType,
525 pub modifiers: ModifierSet,
526 pub range: Range<usize>,
527}
528
529impl Token {
530 pub fn new(token_type: TokenType, modifiers: ModifierSet, range: Range<usize>) -> Self {
531 Self {
532 token_type,
533 modifiers,
534 range,
535 }
536 }
537}
538
539fn modifiers_from_node(node: &LinkedNode) -> ModifierSet {
544 match node.kind() {
545 SyntaxKind::Emph => ModifierSet::new(&[Modifier::Emph]),
546 SyntaxKind::Strong => ModifierSet::new(&[Modifier::Strong]),
547 SyntaxKind::Math | SyntaxKind::Equation => ModifierSet::new(&[Modifier::Math]),
548 _ => ModifierSet::empty(),
549 }
550}
551
552fn token_from_node(
560 ei: &ExprInfo,
561 node: &LinkedNode,
562 modifier: &mut ModifierSet,
563) -> Option<TokenType> {
564 use SyntaxKind::*;
565
566 match node.kind() {
567 Star if node.parent_kind() == Some(Strong) => Some(TokenType::Punctuation),
568 Star if node.parent_kind() == Some(ModuleImport) => Some(TokenType::Operator),
569
570 Underscore if node.parent_kind() == Some(Emph) => Some(TokenType::Punctuation),
571 Underscore if node.parent_kind() == Some(MathAttach) => Some(TokenType::Operator),
572
573 MathIdent | Ident => Some(token_from_ident(ei, node, modifier)),
574 Hash => token_from_hashtag(ei, node, modifier),
575
576 LeftBrace | RightBrace | LeftBracket | RightBracket | LeftParen | RightParen | Comma
577 | Semicolon | Colon => Some(TokenType::Punctuation),
578 Linebreak | Escape | Shorthand => Some(TokenType::Escape),
579 Link => Some(TokenType::Link),
580 Raw => Some(TokenType::Raw),
581 Label => Some(TokenType::Label),
582 RefMarker => Some(TokenType::Ref),
583 Heading | HeadingMarker => Some(TokenType::Heading),
584 ListMarker | EnumMarker | TermMarker => Some(TokenType::ListMarker),
585 Not | And | Or => Some(TokenType::Keyword),
586 MathAlignPoint | Plus | Minus | Slash | Hat | Dot | Eq | EqEq | ExclEq | Lt | LtEq | Gt
587 | GtEq | PlusEq | HyphEq | StarEq | SlashEq | Dots | Arrow => Some(TokenType::Operator),
588 Dollar => Some(TokenType::Delimiter),
589 None | Auto | Let | Show | If | Else | For | In | While | Break | Continue | Return
590 | Import | Include | As | Set | Context => Some(TokenType::Keyword),
591 Bool => Some(TokenType::Bool),
592 Int | Float | Numeric => Some(TokenType::Number),
593 Str => Some(TokenType::String),
594 LineComment | BlockComment => Some(TokenType::Comment),
595 Error => Some(TokenType::Error),
596
597 _ => Option::None,
599 }
600}
601
602fn token_from_ident(ei: &ExprInfo, ident: &LinkedNode, modifier: &mut ModifierSet) -> TokenType {
604 let resolved = ei.resolves.get(&ident.span());
605 let context = if let Some(resolved) = resolved {
606 match (&resolved.root, &resolved.term) {
607 (Some(root), term) => Some(token_from_decl_expr(root, term.as_ref(), modifier)),
608 (_, Some(ty)) => Some(token_from_term(ty, modifier)),
609 _ => None,
610 }
611 } else {
612 None
613 };
614
615 if !matches!(context, None | Some(TokenType::Interpolated)) {
616 return context.unwrap_or(TokenType::Interpolated);
617 }
618
619 let next = ident.next_leaf();
620 let next_is_adjacent = next
621 .as_ref()
622 .is_some_and(|n| n.range().start == ident.range().end);
623 let next_parent = next.as_ref().and_then(|n| n.parent_kind());
624 let next_kind = next.map(|n| n.kind());
625 let lexical_function_call = next_is_adjacent
626 && matches!(next_kind, Some(SyntaxKind::LeftParen))
627 && matches!(next_parent, Some(SyntaxKind::Args | SyntaxKind::Params));
628 if lexical_function_call {
629 return TokenType::Function;
630 }
631
632 let function_content = next_is_adjacent
633 && matches!(next_kind, Some(SyntaxKind::LeftBracket))
634 && matches!(next_parent, Some(SyntaxKind::ContentBlock));
635 if function_content {
636 return TokenType::Function;
637 }
638
639 TokenType::Interpolated
640}
641
642fn token_from_term(t: &Ty, modifier: &mut ModifierSet) -> TokenType {
643 use typst::foundations::Value::*;
644 match t {
645 Ty::Func(..) => TokenType::Function,
646 Ty::Value(v) => {
647 match &v.val {
648 Func(..) => TokenType::Function,
649 Type(..) => {
650 *modifier = *modifier | ModifierSet::new(&[Modifier::DefaultLibrary]);
651 TokenType::Function
652 }
653 Module(..) => ns(modifier),
654 _ => TokenType::Interpolated,
656 }
657 }
658 _ => TokenType::Interpolated,
659 }
660}
661
662fn token_from_decl_expr(expr: &Expr, term: Option<&Ty>, modifier: &mut ModifierSet) -> TokenType {
663 use crate::syntax::Decl::*;
664 match expr {
665 Expr::Type(term) => token_from_term(term, modifier),
666 Expr::Decl(decl) => match decl.as_ref() {
667 Func(..) => TokenType::Function,
668 Var(..) => TokenType::Interpolated,
669 Module(..) => ns(modifier),
670 ModuleAlias(..) => ns(modifier),
671 PathStem(..) => ns(modifier),
672 ImportAlias(..) => TokenType::Interpolated,
673 IdentRef(..) => TokenType::Interpolated,
674 ImportPath(..) => TokenType::Interpolated,
675 IncludePath(..) => TokenType::Interpolated,
676 Import(..) => TokenType::Interpolated,
677 ContentRef(..) => TokenType::Interpolated,
678 Label(..) => TokenType::Interpolated,
679 StrName(..) => TokenType::Interpolated,
680 ModuleImport(..) => TokenType::Interpolated,
681 Closure(..) => TokenType::Interpolated,
682 Pattern(..) => TokenType::Interpolated,
683 Spread(..) => TokenType::Interpolated,
684 Content(..) => TokenType::Interpolated,
685 Constant(..) => TokenType::Interpolated,
686 BibEntry(..) => TokenType::Interpolated,
687 Docs(..) => TokenType::Interpolated,
688 Generated(..) => TokenType::Interpolated,
689 },
690 _ => term
691 .map(|term| token_from_term(term, modifier))
692 .unwrap_or(TokenType::Interpolated),
693 }
694}
695
696fn ns(modifier: &mut ModifierSet) -> TokenType {
697 *modifier = *modifier | ModifierSet::new(&[Modifier::Static, Modifier::ReadOnly]);
698 TokenType::Namespace
699}
700
701fn get_expr_following_hashtag<'a>(hashtag: &LinkedNode<'a>) -> Option<LinkedNode<'a>> {
702 hashtag
703 .next_sibling()
704 .filter(|next| next.cast::<ast::Expr>().is_some_and(|expr| expr.hash()))
705 .and_then(|node| node.leftmost_leaf())
706}
707
708fn token_from_hashtag(
709 ei: &ExprInfo,
710 hashtag: &LinkedNode,
711 modifier: &mut ModifierSet,
712) -> Option<TokenType> {
713 get_expr_following_hashtag(hashtag)
714 .as_ref()
715 .and_then(|node| token_from_node(ei, node, modifier))
716}
717
718#[cfg(test)]
719mod tests {
720 use strum::IntoEnumIterator;
721
722 use super::*;
723
724 #[test]
725 fn ensure_not_too_many_modifiers() {
726 assert!(Modifier::iter().len() <= 32);
729 }
730}