1use std::collections::HashMap;
4use std::{
5 num::NonZeroUsize,
6 ops::Range,
7 path::Path,
8 sync::{Arc, OnceLock},
9};
10
11use lsp_types::SemanticToken;
12use lsp_types::{SemanticTokenModifier, SemanticTokenType};
13use parking_lot::Mutex;
14use strum::EnumIter;
15use tinymist_std::ImmutPath;
16use typst::syntax::{LinkedNode, Source, SyntaxKind, ast};
17
18use crate::{
19 LocalContext, LspPosition, PositionEncoding,
20 adt::revision::{RevisionLock, RevisionManager, RevisionManagerLike, RevisionSlot},
21 syntax::{Expr, ExprInfo},
22 ty::Ty,
23};
24
25pub type SemanticTokens = Arc<Vec<SemanticToken>>;
27
28#[typst_macros::time(span = source.root().span())]
30pub(crate) fn get_semantic_tokens(ctx: &mut LocalContext, source: &Source) -> SemanticTokens {
31 let mut tokenizer = Tokenizer::new(
32 source.clone(),
33 ctx.expr_stage(source),
34 ctx.analysis.allow_multiline_token,
35 ctx.analysis.position_encoding,
36 );
37 tokenizer.tokenize_tree(&LinkedNode::new(source.root()), ModifierSet::empty());
38 SemanticTokens::new(tokenizer.output)
39}
40
41#[derive(Default)]
43pub struct SemanticTokenCache {
44 next_id: usize,
45 manager: HashMap<ImmutPath, RevisionManager<OnceLock<SemanticTokens>>>,
47}
48
49impl SemanticTokenCache {
50 pub(crate) fn clear(&mut self) {
51 self.next_id = 0;
52 self.manager.clear();
53 }
54
55 pub(crate) fn acquire(
57 cache: Arc<Mutex<Self>>,
58 path: &Path,
59 prev: Option<&str>,
60 ) -> SemanticTokenContext {
61 let that = cache.clone();
62 let mut that = that.lock();
63
64 that.next_id += 1;
65 let prev = prev.and_then(|id| {
66 id.parse::<NonZeroUsize>()
67 .inspect_err(|_| {
68 log::warn!("invalid previous id: {id}");
69 })
70 .ok()
71 });
72 let next = NonZeroUsize::new(that.next_id).expect("id overflow");
73
74 let path = ImmutPath::from(path);
75 let manager = that.manager.entry(path.clone()).or_default();
76 let _rev_lock = manager.lock(prev.unwrap_or(next));
77 let prev = prev.and_then(|prev| {
78 manager
79 .find_revision(prev, |_| OnceLock::new())
80 .data
81 .get()
82 .cloned()
83 });
84 let next = manager.find_revision(next, |_| OnceLock::new());
85
86 SemanticTokenContext {
87 _rev_lock,
88 cache,
89 path,
90 prev,
91 next,
92 }
93 }
94}
95
96pub(crate) struct SemanticTokenContext {
98 _rev_lock: RevisionLock,
99 cache: Arc<Mutex<SemanticTokenCache>>,
100 path: ImmutPath,
101 pub prev: Option<SemanticTokens>,
102 pub next: Arc<RevisionSlot<OnceLock<SemanticTokens>>>,
103}
104
105impl Drop for SemanticTokenContext {
106 fn drop(&mut self) {
107 let mut cache = self.cache.lock();
108 let manager = cache.manager.get_mut(&self.path);
109 if let Some(manager) = manager {
110 let min_rev = manager.unlock(&mut self._rev_lock);
111 if let Some(min_rev) = min_rev {
112 manager.gc(min_rev);
113 }
114 }
115 }
116}
117
118const BOOL: SemanticTokenType = SemanticTokenType::new("bool");
119const PUNCTUATION: SemanticTokenType = SemanticTokenType::new("punct");
120const ESCAPE: SemanticTokenType = SemanticTokenType::new("escape");
121const LINK: SemanticTokenType = SemanticTokenType::new("link");
122const RAW: SemanticTokenType = SemanticTokenType::new("raw");
123const LABEL: SemanticTokenType = SemanticTokenType::new("label");
124const REF: SemanticTokenType = SemanticTokenType::new("ref");
125const HEADING: SemanticTokenType = SemanticTokenType::new("heading");
126const LIST_MARKER: SemanticTokenType = SemanticTokenType::new("marker");
127const LIST_TERM: SemanticTokenType = SemanticTokenType::new("term");
128const DELIMITER: SemanticTokenType = SemanticTokenType::new("delim");
129const INTERPOLATED: SemanticTokenType = SemanticTokenType::new("pol");
130const ERROR: SemanticTokenType = SemanticTokenType::new("error");
131const TEXT: SemanticTokenType = SemanticTokenType::new("text");
132
133#[derive(Clone, Copy, Eq, PartialEq, EnumIter, Default)]
136#[repr(u32)]
137pub enum TokenType {
138 Comment,
141 String,
143 Keyword,
145 Operator,
147 Number,
149 Function,
151 Decorator,
153 Type,
155 Namespace,
157 Bool,
160 Punctuation,
162 Escape,
164 Link,
166 Raw,
168 Label,
170 Ref,
172 Heading,
174 ListMarker,
176 ListTerm,
178 Delimiter,
180 Interpolated,
182 Error,
184 Text,
191 #[default]
193 None,
194}
195
196impl From<TokenType> for SemanticTokenType {
197 fn from(token_type: TokenType) -> Self {
198 use TokenType::*;
199
200 match token_type {
201 Comment => Self::COMMENT,
202 String => Self::STRING,
203 Keyword => Self::KEYWORD,
204 Operator => Self::OPERATOR,
205 Number => Self::NUMBER,
206 Function => Self::FUNCTION,
207 Decorator => Self::DECORATOR,
208 Type => Self::TYPE,
209 Namespace => Self::NAMESPACE,
210 Bool => BOOL,
211 Punctuation => PUNCTUATION,
212 Escape => ESCAPE,
213 Link => LINK,
214 Raw => RAW,
215 Label => LABEL,
216 Ref => REF,
217 Heading => HEADING,
218 ListMarker => LIST_MARKER,
219 ListTerm => LIST_TERM,
220 Delimiter => DELIMITER,
221 Interpolated => INTERPOLATED,
222 Error => ERROR,
223 Text => TEXT,
224 None => unreachable!(),
225 }
226 }
227}
228
229const STRONG: SemanticTokenModifier = SemanticTokenModifier::new("strong");
230const EMPH: SemanticTokenModifier = SemanticTokenModifier::new("emph");
231const MATH: SemanticTokenModifier = SemanticTokenModifier::new("math");
232
233#[derive(Clone, Copy, EnumIter)]
235#[repr(u8)]
236pub enum Modifier {
237 Strong,
239 Emph,
241 Math,
243 ReadOnly,
245 Static,
247 DefaultLibrary,
249}
250
251impl Modifier {
252 pub const fn index(self) -> u8 {
254 self as u8
255 }
256
257 pub const fn bitmask(self) -> u32 {
259 0b1 << self.index()
260 }
261}
262
263impl From<Modifier> for SemanticTokenModifier {
264 fn from(modifier: Modifier) -> Self {
265 use Modifier::*;
266
267 match modifier {
268 Strong => STRONG,
269 Emph => EMPH,
270 Math => MATH,
271 ReadOnly => Self::READONLY,
272 Static => Self::STATIC,
273 DefaultLibrary => Self::DEFAULT_LIBRARY,
274 }
275 }
276}
277
278#[derive(Default, Clone, Copy)]
279pub(crate) struct ModifierSet(u32);
280
281impl ModifierSet {
282 pub fn empty() -> Self {
283 Self::default()
284 }
285
286 pub fn new(modifiers: &[Modifier]) -> Self {
287 let bits = modifiers
288 .iter()
289 .copied()
290 .map(Modifier::bitmask)
291 .fold(0, |bits, mask| bits | mask);
292 Self(bits)
293 }
294
295 pub fn bitset(self) -> u32 {
296 self.0
297 }
298}
299
300impl std::ops::BitOr for ModifierSet {
301 type Output = Self;
302
303 fn bitor(self, rhs: Self) -> Self::Output {
304 Self(self.0 | rhs.0)
305 }
306}
307
308pub(crate) struct Tokenizer {
309 curr_pos: LspPosition,
310 pos_offset: usize,
311 output: Vec<SemanticToken>,
312 source: Source,
313 ei: ExprInfo,
314 encoding: PositionEncoding,
315
316 allow_multiline_token: bool,
317
318 token: Option<Token>,
319}
320
321impl Tokenizer {
322 pub fn new(
323 source: Source,
324 ei: ExprInfo,
325 allow_multiline_token: bool,
326 encoding: PositionEncoding,
327 ) -> Self {
328 Self {
329 curr_pos: LspPosition::new(0, 0),
330 pos_offset: 0,
331 output: Vec::new(),
332 source,
333 ei,
334 allow_multiline_token,
335 encoding,
336
337 token: None,
338 }
339 }
340
341 fn tokenize_tree(&mut self, root: &LinkedNode, modifiers: ModifierSet) {
343 let is_leaf = root.get().children().len() == 0;
344 let mut modifiers = modifiers | modifiers_from_node(root);
345
346 let range = root.range();
347 let mut token = token_from_node(&self.ei, root, &mut modifiers)
348 .or_else(|| is_leaf.then_some(TokenType::Text))
349 .map(|token_type| Token::new(token_type, modifiers, range.clone()));
350
351 if let Some(prev_token) = self.token.as_mut()
353 && !prev_token.range.is_empty()
354 && prev_token.range.start < range.start
355 {
356 let end = prev_token.range.end.min(range.start);
357 let sliced = Token {
358 token_type: prev_token.token_type,
359 modifiers: prev_token.modifiers,
360 range: prev_token.range.start..end,
361 };
362 prev_token.range.start = end;
364 self.push(sliced);
365 }
366
367 if !is_leaf {
368 std::mem::swap(&mut self.token, &mut token);
369 for child in root.children() {
370 self.tokenize_tree(&child, modifiers);
371 }
372 std::mem::swap(&mut self.token, &mut token);
373 }
374
375 if let Some(token) = token.clone()
377 && !token.range.is_empty()
378 {
379 if let Some(prev_token) = self.token.as_mut() {
381 prev_token.range.start = token.range.end;
382 }
383 self.push(token);
384 }
385 }
386
387 fn push(&mut self, token: Token) {
388 let Token {
389 token_type,
390 modifiers,
391 range,
392 } = token;
393
394 use crate::lsp_typst_boundary;
395 use lsp_types::Position;
396 let utf8_start = range.start;
397 if self.pos_offset > utf8_start {
398 return;
399 }
400
401 let source_len = self.source.text().len();
403 let utf8_end = (range.end).min(source_len);
404 self.pos_offset = utf8_start;
405 if utf8_end <= utf8_start || utf8_start > source_len {
406 return;
407 }
408
409 let position = lsp_typst_boundary::to_lsp_position(utf8_start, self.encoding, &self.source);
410
411 let delta = self.curr_pos.delta(&position);
412
413 let encode_length = |s, t| {
414 match self.encoding {
415 PositionEncoding::Utf8 => t - s,
416 PositionEncoding::Utf16 => {
417 let utf16_start = self.source.lines().byte_to_utf16(s).unwrap();
419 let utf16_end = self.source.lines().byte_to_utf16(t).unwrap();
420 utf16_end - utf16_start
421 }
422 }
423 };
424
425 if self.allow_multiline_token {
426 self.output.push(SemanticToken {
427 delta_line: delta.delta_line,
428 delta_start: delta.delta_start,
429 length: encode_length(utf8_start, utf8_end) as u32,
430 token_type: token_type as u32,
431 token_modifiers_bitset: modifiers.bitset(),
432 });
433 self.curr_pos = position;
434 } else {
435 let final_line =
436 self.source
437 .lines()
438 .byte_to_line(utf8_end)
439 .unwrap_or_else(|| self.source.lines().len_lines()) as u32;
440 let next_offset = self
441 .source
442 .lines()
443 .line_to_byte((self.curr_pos.line + 1) as usize)
444 .unwrap_or(source_len);
445 let inline_length = encode_length(utf8_start, utf8_end.min(next_offset)) as u32;
446 if inline_length != 0 {
447 self.output.push(SemanticToken {
448 delta_line: delta.delta_line,
449 delta_start: delta.delta_start,
450 length: inline_length,
451 token_type: token_type as u32,
452 token_modifiers_bitset: modifiers.bitset(),
453 });
454 self.curr_pos = position;
455 }
456 if self.curr_pos.line >= final_line {
457 return;
458 }
459
460 let mut utf8_cursor = next_offset;
461 let mut delta_line = 0;
462 for line in self.curr_pos.line + 1..=final_line {
463 let next_offset = if line == final_line {
464 utf8_end
465 } else {
466 self.source
467 .lines()
468 .line_to_byte((line + 1) as usize)
469 .unwrap_or(source_len)
470 };
471
472 if utf8_cursor < next_offset {
473 let inline_length = encode_length(utf8_cursor, next_offset) as u32;
474 self.output.push(SemanticToken {
475 delta_line: delta_line + 1,
476 delta_start: 0,
477 length: inline_length,
478 token_type: token_type as u32,
479 token_modifiers_bitset: modifiers.bitset(),
480 });
481 delta_line = 0;
482 self.curr_pos.character = 0;
483 } else {
484 delta_line += 1;
485 }
486 self.pos_offset = utf8_cursor;
487 utf8_cursor = next_offset;
488 }
489 self.curr_pos.line = final_line - delta_line;
490 }
491
492 pub trait PositionExt {
493 fn delta(&self, to: &Self) -> PositionDelta;
494 }
495
496 impl PositionExt for Position {
497 fn delta(&self, to: &Self) -> PositionDelta {
503 let line_delta = to.line - self.line;
504 let char_delta = if line_delta == 0 {
505 to.character - self.character
506 } else {
507 to.character
508 };
509
510 PositionDelta {
511 delta_line: line_delta,
512 delta_start: char_delta,
513 }
514 }
515 }
516
517 #[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Copy, Clone, Default)]
518 pub struct PositionDelta {
519 pub delta_line: u32,
520 pub delta_start: u32,
521 }
522 }
523}
524
525#[derive(Clone, Default)]
526struct Token {
527 pub token_type: TokenType,
528 pub modifiers: ModifierSet,
529 pub range: Range<usize>,
530}
531
532impl Token {
533 pub fn new(token_type: TokenType, modifiers: ModifierSet, range: Range<usize>) -> Self {
534 Self {
535 token_type,
536 modifiers,
537 range,
538 }
539 }
540}
541
542fn modifiers_from_node(node: &LinkedNode) -> ModifierSet {
547 match node.kind() {
548 SyntaxKind::Emph => ModifierSet::new(&[Modifier::Emph]),
549 SyntaxKind::Strong => ModifierSet::new(&[Modifier::Strong]),
550 SyntaxKind::Math | SyntaxKind::Equation => ModifierSet::new(&[Modifier::Math]),
551 _ => ModifierSet::empty(),
552 }
553}
554
555fn token_from_node(
563 ei: &ExprInfo,
564 node: &LinkedNode,
565 modifier: &mut ModifierSet,
566) -> Option<TokenType> {
567 use SyntaxKind::*;
568
569 match node.kind() {
570 Star if node.parent_kind() == Some(Strong) => Some(TokenType::Punctuation),
571 Star if node.parent_kind() == Some(ModuleImport) => Some(TokenType::Operator),
572
573 Underscore if node.parent_kind() == Some(Emph) => Some(TokenType::Punctuation),
574 Underscore if node.parent_kind() == Some(MathAttach) => Some(TokenType::Operator),
575
576 MathIdent | Ident => Some(token_from_ident(ei, node, modifier)),
577 Hash => token_from_hashtag(ei, node, modifier),
578
579 LeftBrace | RightBrace | LeftBracket | RightBracket | LeftParen | RightParen | Comma
580 | Semicolon | Colon => Some(TokenType::Punctuation),
581 Linebreak | Escape | Shorthand => Some(TokenType::Escape),
582 Link => Some(TokenType::Link),
583 Raw => Some(TokenType::Raw),
584 Label => Some(TokenType::Label),
585 RefMarker => Some(TokenType::Ref),
586 Heading | HeadingMarker => Some(TokenType::Heading),
587 ListMarker | EnumMarker | TermMarker => Some(TokenType::ListMarker),
588 Not | And | Or => Some(TokenType::Keyword),
589 MathAlignPoint | Plus | Minus | Slash | Hat | Dot | Eq | EqEq | ExclEq | Lt | LtEq | Gt
590 | GtEq | PlusEq | HyphEq | StarEq | SlashEq | Dots | Arrow => Some(TokenType::Operator),
591 Dollar => Some(TokenType::Delimiter),
592 None | Auto | Let | Show | If | Else | For | In | While | Break | Continue | Return
593 | Import | Include | As | Set | Context => Some(TokenType::Keyword),
594 Bool => Some(TokenType::Bool),
595 Int | Float | Numeric => Some(TokenType::Number),
596 Str => Some(TokenType::String),
597 LineComment | BlockComment => Some(TokenType::Comment),
598 Error => Some(TokenType::Error),
599
600 _ => Option::None,
602 }
603}
604
605fn token_from_ident(ei: &ExprInfo, ident: &LinkedNode, modifier: &mut ModifierSet) -> TokenType {
607 let resolved = ei.resolves.get(&ident.span());
608 let context = if let Some(resolved) = resolved {
609 match (&resolved.root, &resolved.term) {
610 (Some(root), term) => Some(token_from_decl_expr(root, term.as_ref(), modifier)),
611 (_, Some(ty)) => Some(token_from_term(ty, modifier)),
612 _ => None,
613 }
614 } else {
615 None
616 };
617
618 if !matches!(context, None | Some(TokenType::Interpolated)) {
619 return context.unwrap_or(TokenType::Interpolated);
620 }
621
622 let next = ident.next_leaf();
623 let next_is_adjacent = next
624 .as_ref()
625 .is_some_and(|n| n.range().start == ident.range().end);
626 let next_parent = next.as_ref().and_then(|n| n.parent_kind());
627 let next_kind = next.map(|n| n.kind());
628 let lexical_function_call = next_is_adjacent
629 && matches!(next_kind, Some(SyntaxKind::LeftParen))
630 && matches!(next_parent, Some(SyntaxKind::Args | SyntaxKind::Params));
631 if lexical_function_call {
632 return TokenType::Function;
633 }
634
635 let function_content = next_is_adjacent
636 && matches!(next_kind, Some(SyntaxKind::LeftBracket))
637 && matches!(next_parent, Some(SyntaxKind::ContentBlock));
638 if function_content {
639 return TokenType::Function;
640 }
641
642 TokenType::Interpolated
643}
644
645fn token_from_term(t: &Ty, modifier: &mut ModifierSet) -> TokenType {
646 use typst::foundations::Value::*;
647 match t {
648 Ty::Func(..) => TokenType::Function,
649 Ty::Value(v) => {
650 match &v.val {
651 Func(..) => TokenType::Function,
652 Type(..) => {
653 *modifier = *modifier | ModifierSet::new(&[Modifier::DefaultLibrary]);
654 TokenType::Function
655 }
656 Module(..) => ns(modifier),
657 _ => TokenType::Interpolated,
659 }
660 }
661 _ => TokenType::Interpolated,
662 }
663}
664
665fn token_from_decl_expr(expr: &Expr, term: Option<&Ty>, modifier: &mut ModifierSet) -> TokenType {
666 use crate::syntax::Decl::*;
667 match expr {
668 Expr::Type(term) => token_from_term(term, modifier),
669 Expr::Decl(decl) => match decl.as_ref() {
670 Func(..) => TokenType::Function,
671 Var(..) => TokenType::Interpolated,
672 Module(..) => ns(modifier),
673 ModuleAlias(..) => ns(modifier),
674 PathStem(..) => ns(modifier),
675 ImportAlias(..) => TokenType::Interpolated,
676 IdentRef(..) => TokenType::Interpolated,
677 ImportPath(..) => TokenType::Interpolated,
678 IncludePath(..) => TokenType::Interpolated,
679 Import(..) => TokenType::Interpolated,
680 ContentRef(..) => TokenType::Interpolated,
681 Label(..) => TokenType::Interpolated,
682 StrName(..) => TokenType::Interpolated,
683 ModuleImport(..) => TokenType::Interpolated,
684 Closure(..) => TokenType::Interpolated,
685 Pattern(..) => TokenType::Interpolated,
686 Spread(..) => TokenType::Interpolated,
687 Content(..) => TokenType::Interpolated,
688 Constant(..) => TokenType::Interpolated,
689 BibEntry(..) => TokenType::Interpolated,
690 Docs(..) => TokenType::Interpolated,
691 Generated(..) => TokenType::Interpolated,
692 },
693 _ => term
694 .map(|term| token_from_term(term, modifier))
695 .unwrap_or(TokenType::Interpolated),
696 }
697}
698
699fn ns(modifier: &mut ModifierSet) -> TokenType {
700 *modifier = *modifier | ModifierSet::new(&[Modifier::Static, Modifier::ReadOnly]);
701 TokenType::Namespace
702}
703
704fn get_expr_following_hashtag<'a>(hashtag: &LinkedNode<'a>) -> Option<LinkedNode<'a>> {
705 hashtag
706 .next_sibling()
707 .filter(|next| next.cast::<ast::Expr>().is_some_and(|expr| expr.hash()))
708 .and_then(|node| node.leftmost_leaf())
709}
710
711fn token_from_hashtag(
712 ei: &ExprInfo,
713 hashtag: &LinkedNode,
714 modifier: &mut ModifierSet,
715) -> Option<TokenType> {
716 get_expr_following_hashtag(hashtag)
717 .as_ref()
718 .and_then(|node| token_from_node(ei, node, modifier))
719}
720
721#[cfg(test)]
722mod tests {
723 use strum::IntoEnumIterator;
724
725 use super::*;
726
727 #[test]
728 fn ensure_not_too_many_modifiers() {
729 assert!(Modifier::iter().len() <= 32);
732 }
733}