tinymist_analysis/
location.rs

1//! Conversions between Typst and LSP locations
2
3use std::cmp::Ordering;
4use std::ops::Range;
5
6use typst::syntax::Source;
7
8/// An LSP Position encoded by [`PositionEncoding`].
9pub type LspPosition = tinymist_world::debug_loc::LspPosition;
10/// An LSP range encoded by [`PositionEncoding`].
11pub type LspRange = tinymist_world::debug_loc::LspRange;
12
13/// What counts as "1 character" for string indexing. We should always prefer
14/// UTF-8, but support UTF-16 as long as it is standard. For more background on
15/// encodings and LSP, try ["The bottom emoji breaks rust-analyzer"](https://fasterthanli.me/articles/the-bottom-emoji-breaks-rust-analyzer),
16/// a well-written article on the topic.
17#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Default)]
18pub enum PositionEncoding {
19    /// "1 character" means "1 UTF-16 code unit"
20    ///
21    /// This is the only required encoding for LSPs to support, but it's not a
22    /// natural one (unless you're working in JS). Prefer UTF-8, and refer
23    /// to the article linked in the `PositionEncoding` docs for more
24    /// background.
25    #[default]
26    Utf16,
27    /// "1 character" means "1 byte"
28    Utf8,
29}
30
31impl From<PositionEncoding> for tinymist_world::debug_loc::PositionEncodingKind {
32    fn from(position_encoding: PositionEncoding) -> Self {
33        match position_encoding {
34            PositionEncoding::Utf16 => Self::UTF16,
35            PositionEncoding::Utf8 => Self::UTF8,
36        }
37    }
38}
39
40/// Convert an LSP position to a Typst position.
41pub fn to_typst_position(
42    lsp_position: LspPosition,
43    lsp_position_encoding: PositionEncoding,
44    typst_source: &Source,
45) -> Option<usize> {
46    let lines = typst_source.len_lines() as u32;
47
48    'bound_checking: {
49        let should_warning = match lsp_position.line.cmp(&lines) {
50            Ordering::Greater => true,
51            Ordering::Equal => lsp_position.character > 0,
52            Ordering::Less if lsp_position.line + 1 == lines => {
53                let last_line_offset = typst_source.line_to_byte(lines as usize - 1)?;
54                let last_line_chars = &typst_source.text()[last_line_offset..];
55                let len = match lsp_position_encoding {
56                    PositionEncoding::Utf8 => last_line_chars.len(),
57                    PositionEncoding::Utf16 => {
58                        last_line_chars.chars().map(char::len_utf16).sum::<usize>()
59                    }
60                };
61
62                match lsp_position.character.cmp(&(len as u32)) {
63                    Ordering::Less => break 'bound_checking,
64                    Ordering::Greater => true,
65                    Ordering::Equal => false,
66                }
67            }
68            Ordering::Less => break 'bound_checking,
69        };
70
71        if should_warning {
72            log::warn!(
73                "LSP position is out of bounds: {:?}, while only {:?} lines and {:?} characters at the end.",
74                lsp_position,
75                typst_source.len_lines(),
76                typst_source.line_to_range(typst_source.len_lines() - 1),
77            );
78        }
79
80        return Some(typst_source.len_bytes());
81    }
82
83    match lsp_position_encoding {
84        PositionEncoding::Utf8 => {
85            let line_index = lsp_position.line as usize;
86            let column_index = lsp_position.character as usize;
87            typst_source.line_column_to_byte(line_index, column_index)
88        }
89        PositionEncoding::Utf16 => {
90            // We have a line number and a UTF-16 offset into that line. We want a byte
91            // offset into the file.
92            //
93            // Typst's `Source` provides several UTF-16 methods:
94            //  - `len_utf16` for the length of the file
95            //  - `byte_to_utf16` to convert a byte offset from the start of the file to a
96            //    UTF-16 offset from the start of the file
97            //  - `utf16_to_byte` to do the opposite of `byte_to_utf16`
98            //
99            // Unfortunately, none of these address our needs well, so we do some math
100            // instead. This is not the fastest possible implementation, but
101            // it's the most reasonable without access to the internal state
102            // of `Source`.
103
104            // TODO: Typst's `Source` could easily provide an implementation of the method
105            // we need   here. Submit a PR against `typst` to add it, then
106            // update this if/when merged.
107
108            let line_index = lsp_position.line as usize;
109            let utf16_offset_in_line = lsp_position.character as usize;
110
111            let byte_line_offset = typst_source.line_to_byte(line_index)?;
112            let utf16_line_offset = typst_source.byte_to_utf16(byte_line_offset)?;
113            let utf16_offset = utf16_line_offset + utf16_offset_in_line;
114
115            typst_source.utf16_to_byte(utf16_offset)
116        }
117    }
118}
119
120/// Convert a Typst position to an LSP position.
121pub fn to_lsp_position(
122    typst_offset: usize,
123    lsp_position_encoding: PositionEncoding,
124    typst_source: &Source,
125) -> LspPosition {
126    if typst_offset > typst_source.len_bytes() {
127        return LspPosition::new(typst_source.len_lines() as u32, 0);
128    }
129
130    let line_index = typst_source.byte_to_line(typst_offset).unwrap();
131    let column_index = typst_source.byte_to_column(typst_offset).unwrap();
132
133    let lsp_line = line_index as u32;
134    let lsp_column = match lsp_position_encoding {
135        PositionEncoding::Utf8 => column_index as u32,
136        PositionEncoding::Utf16 => {
137            // See the implementation of `position_to_offset` for discussion
138            // relevant to this function.
139
140            // TODO: Typst's `Source` could easily provide an implementation of the method
141            // we   need here. Submit a PR to `typst` to add it, then update
142            // this if/when merged.
143
144            let utf16_offset = typst_source.byte_to_utf16(typst_offset).unwrap();
145
146            let byte_line_offset = typst_source.line_to_byte(line_index).unwrap();
147            let utf16_line_offset = typst_source.byte_to_utf16(byte_line_offset).unwrap();
148
149            let utf16_column_offset = utf16_offset - utf16_line_offset;
150            utf16_column_offset as u32
151        }
152    };
153
154    LspPosition::new(lsp_line, lsp_column)
155}
156
157/// Convert an LSP range to a Typst range.
158pub fn to_typst_range(
159    lsp_range: LspRange,
160    lsp_position_encoding: PositionEncoding,
161    source: &Source,
162) -> Option<Range<usize>> {
163    let lsp_start = lsp_range.start;
164    let typst_start = to_typst_position(lsp_start, lsp_position_encoding, source)?;
165
166    let lsp_end = lsp_range.end;
167    let typst_end = to_typst_position(lsp_end, lsp_position_encoding, source)?;
168
169    Some(Range {
170        start: typst_start,
171        end: typst_end,
172    })
173}
174
175/// Convert a Typst range to an LSP range.
176pub fn to_lsp_range(
177    typst_range: Range<usize>,
178    typst_source: &Source,
179    lsp_position_encoding: PositionEncoding,
180) -> LspRange {
181    let typst_start = typst_range.start;
182    let lsp_start = to_lsp_position(typst_start, lsp_position_encoding, typst_source);
183
184    let typst_end = typst_range.end;
185    let lsp_end = to_lsp_position(typst_end, lsp_position_encoding, typst_source);
186
187    LspRange::new(lsp_start, lsp_end)
188}
189
190#[cfg(test)]
191mod test {
192    use super::LspPosition as Position;
193
194    use super::*;
195
196    const ENCODING_TEST_STRING: &str = "test 🥺 test";
197
198    #[test]
199    fn issue_14_invalid_range() {
200        let source = Source::detached("#set page(height: 2cm)");
201        let rng = LspRange {
202            start: LspPosition {
203                line: 0,
204                character: 22,
205            },
206            // EOF
207            end: LspPosition {
208                line: 1,
209                character: 0,
210            },
211        };
212        let res = to_typst_range(rng, PositionEncoding::Utf16, &source).unwrap();
213        assert_eq!(res, 22..22);
214    }
215
216    #[test]
217    fn issue_14_invalid_range_2() {
218        let source = Source::detached(
219            r"#let f(a) = {
220  a
221}
222",
223        );
224        let rng = LspRange {
225            start: LspPosition {
226                line: 2,
227                character: 1,
228            },
229            // EOF
230            end: LspPosition {
231                line: 3,
232                character: 0,
233            },
234        };
235        let res = to_typst_range(rng, PositionEncoding::Utf16, &source).unwrap();
236        assert_eq!(res, 19..source.len_bytes());
237        // EOF
238        let rng = LspRange {
239            start: LspPosition {
240                line: 3,
241                character: 1,
242            },
243            end: LspPosition {
244                line: 4,
245                character: 0,
246            },
247        };
248        let res = to_typst_range(rng, PositionEncoding::Utf16, &source).unwrap();
249        assert_eq!(res, source.len_bytes()..source.len_bytes());
250
251        for line in 0..=5 {
252            for character in 0..2 {
253                let off = to_typst_position(
254                    Position { line, character },
255                    PositionEncoding::Utf16,
256                    &source,
257                );
258                assert!(off.is_some(), "line: {line}, character: {character}");
259            }
260        }
261    }
262
263    #[test]
264    fn overflow_offset_to_position() {
265        let source = Source::detached("test");
266
267        let offset = source.len_bytes();
268        let position = to_lsp_position(offset, PositionEncoding::Utf16, &source);
269        assert_eq!(
270            position,
271            LspPosition {
272                line: 0,
273                character: 4
274            }
275        );
276
277        let offset = source.len_bytes() + 1;
278        let position = to_lsp_position(offset, PositionEncoding::Utf16, &source);
279        assert_eq!(
280            position,
281            LspPosition {
282                line: 1,
283                character: 0
284            }
285        );
286    }
287
288    #[test]
289    fn utf16_position_to_utf8_offset() {
290        let source = Source::detached(ENCODING_TEST_STRING);
291
292        let start = LspPosition {
293            line: 0,
294            character: 0,
295        };
296        let emoji = LspPosition {
297            line: 0,
298            character: 5,
299        };
300        let post_emoji = LspPosition {
301            line: 0,
302            character: 7,
303        };
304        let end = LspPosition {
305            line: 0,
306            character: 12,
307        };
308
309        let start_offset = to_typst_position(start, PositionEncoding::Utf16, &source).unwrap();
310        let start_actual = 0;
311
312        let emoji_offset = to_typst_position(emoji, PositionEncoding::Utf16, &source).unwrap();
313        let emoji_actual = 5;
314
315        let post_emoji_offset =
316            to_typst_position(post_emoji, PositionEncoding::Utf16, &source).unwrap();
317        let post_emoji_actual = 9;
318
319        let end_offset = to_typst_position(end, PositionEncoding::Utf16, &source).unwrap();
320        let end_actual = 14;
321
322        assert_eq!(start_offset, start_actual);
323        assert_eq!(emoji_offset, emoji_actual);
324        assert_eq!(post_emoji_offset, post_emoji_actual);
325        assert_eq!(end_offset, end_actual);
326    }
327
328    #[test]
329    fn utf8_offset_to_utf16_position() {
330        let source = Source::detached(ENCODING_TEST_STRING);
331
332        let start = 0;
333        let emoji = 5;
334        let post_emoji = 9;
335        let end = 14;
336
337        let start_position = LspPosition {
338            line: 0,
339            character: 0,
340        };
341        let start_actual = to_lsp_position(start, PositionEncoding::Utf16, &source);
342
343        let emoji_position = LspPosition {
344            line: 0,
345            character: 5,
346        };
347        let emoji_actual = to_lsp_position(emoji, PositionEncoding::Utf16, &source);
348
349        let post_emoji_position = LspPosition {
350            line: 0,
351            character: 7,
352        };
353        let post_emoji_actual = to_lsp_position(post_emoji, PositionEncoding::Utf16, &source);
354
355        let end_position = LspPosition {
356            line: 0,
357            character: 12,
358        };
359        let end_actual = to_lsp_position(end, PositionEncoding::Utf16, &source);
360
361        assert_eq!(start_position, start_actual);
362        assert_eq!(emoji_position, emoji_actual);
363        assert_eq!(post_emoji_position, post_emoji_actual);
364        assert_eq!(end_position, end_actual);
365    }
366}