tinymist_analysis/
location.rs

1//! Conversions between Typst and LSP locations
2
3use std::cmp::Ordering;
4use std::ops::Range;
5
6use typst::syntax::Source;
7
8/// An LSP Position encoded by [`PositionEncoding`].
9pub type LspPosition = tinymist_world::debug_loc::LspPosition;
10/// An LSP range encoded by [`PositionEncoding`].
11pub type LspRange = tinymist_world::debug_loc::LspRange;
12
13/// What counts as "1 character" for string indexing. We should always prefer
14/// UTF-8, but support UTF-16 as long as it is standard. For more background on
15/// encodings and LSP, try ["The bottom emoji breaks rust-analyzer"](https://fasterthanli.me/articles/the-bottom-emoji-breaks-rust-analyzer),
16/// a well-written article on the topic.
17#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Default)]
18pub enum PositionEncoding {
19    /// "1 character" means "1 UTF-16 code unit"
20    ///
21    /// This is the only required encoding for LSPs to support, but it's not a
22    /// natural one (unless you're working in JS). Prefer UTF-8, and refer
23    /// to the article linked in the `PositionEncoding` docs for more
24    /// background.
25    #[default]
26    Utf16,
27    /// "1 character" means "1 byte"
28    Utf8,
29}
30
31impl From<PositionEncoding> for tinymist_world::debug_loc::PositionEncodingKind {
32    fn from(position_encoding: PositionEncoding) -> Self {
33        match position_encoding {
34            PositionEncoding::Utf16 => Self::UTF16,
35            PositionEncoding::Utf8 => Self::UTF8,
36        }
37    }
38}
39
40/// Convert an LSP position to a Typst position.
41pub fn to_typst_position(
42    lsp_position: LspPosition,
43    lsp_position_encoding: PositionEncoding,
44    typst_source: &Source,
45) -> Option<usize> {
46    let lines = typst_source.lines().len_lines() as u32;
47
48    'bound_checking: {
49        let should_warning = match lsp_position.line.cmp(&lines) {
50            Ordering::Greater => true,
51            Ordering::Equal => lsp_position.character > 0,
52            Ordering::Less if lsp_position.line + 1 == lines => {
53                let last_line_offset = typst_source.lines().line_to_byte(lines as usize - 1)?;
54                let last_line_chars = &typst_source.text()[last_line_offset..];
55                let len = match lsp_position_encoding {
56                    PositionEncoding::Utf8 => last_line_chars.len(),
57                    PositionEncoding::Utf16 => {
58                        last_line_chars.chars().map(char::len_utf16).sum::<usize>()
59                    }
60                };
61
62                match lsp_position.character.cmp(&(len as u32)) {
63                    Ordering::Less => break 'bound_checking,
64                    Ordering::Greater => true,
65                    Ordering::Equal => false,
66                }
67            }
68            Ordering::Less => break 'bound_checking,
69        };
70
71        if should_warning {
72            log::warn!(
73                "LSP position is out of bounds: {:?}, while only {:?} lines and {:?} characters at the end.",
74                lsp_position,
75                typst_source.lines().len_lines(),
76                typst_source
77                    .lines()
78                    .line_to_range(typst_source.lines().len_lines() - 1),
79            );
80        }
81
82        return Some(typst_source.lines().len_bytes());
83    }
84
85    match lsp_position_encoding {
86        PositionEncoding::Utf8 => {
87            let line_index = lsp_position.line as usize;
88            let column_index = lsp_position.character as usize;
89            typst_source
90                .lines()
91                .line_column_to_byte(line_index, column_index)
92        }
93        PositionEncoding::Utf16 => {
94            // We have a line number and a UTF-16 offset into that line. We want a byte
95            // offset into the file.
96            //
97            // Typst's `Source` provides several UTF-16 methods:
98            //  - `len_utf16` for the length of the file
99            //  - `byte_to_utf16` to convert a byte offset from the start of the file to a
100            //    UTF-16 offset from the start of the file
101            //  - `utf16_to_byte` to do the opposite of `byte_to_utf16`
102            //
103            // Unfortunately, none of these address our needs well, so we do some math
104            // instead. This is not the fastest possible implementation, but
105            // it's the most reasonable without access to the internal state
106            // of `Source`.
107
108            // TODO: Typst's `Source` could easily provide an implementation of the method
109            // we need   here. Submit a PR against `typst` to add it, then
110            // update this if/when merged.
111
112            let line_index = lsp_position.line as usize;
113            let utf16_offset_in_line = lsp_position.character as usize;
114
115            let byte_line_offset = typst_source.lines().line_to_byte(line_index)?;
116            let utf16_line_offset = typst_source.lines().byte_to_utf16(byte_line_offset)?;
117            let utf16_offset = utf16_line_offset + utf16_offset_in_line;
118
119            typst_source.lines().utf16_to_byte(utf16_offset)
120        }
121    }
122}
123
124/// Convert a Typst position to an LSP position.
125pub fn to_lsp_position(
126    typst_offset: usize,
127    lsp_position_encoding: PositionEncoding,
128    typst_source: &Source,
129) -> LspPosition {
130    if typst_offset > typst_source.lines().len_bytes() {
131        return LspPosition::new(typst_source.lines().len_lines() as u32, 0);
132    }
133
134    let line_index = typst_source.lines().byte_to_line(typst_offset).unwrap();
135    let column_index = typst_source.lines().byte_to_column(typst_offset).unwrap();
136
137    let lsp_line = line_index as u32;
138    let lsp_column = match lsp_position_encoding {
139        PositionEncoding::Utf8 => column_index as u32,
140        PositionEncoding::Utf16 => {
141            // See the implementation of `position_to_offset` for discussion
142            // relevant to this function.
143
144            // TODO: Typst's `Source` could easily provide an implementation of the method
145            // we   need here. Submit a PR to `typst` to add it, then update
146            // this if/when merged.
147
148            let utf16_offset = typst_source.lines().byte_to_utf16(typst_offset).unwrap();
149
150            let byte_line_offset = typst_source.lines().line_to_byte(line_index).unwrap();
151            let utf16_line_offset = typst_source
152                .lines()
153                .byte_to_utf16(byte_line_offset)
154                .unwrap();
155
156            let utf16_column_offset = utf16_offset - utf16_line_offset;
157            utf16_column_offset as u32
158        }
159    };
160
161    LspPosition::new(lsp_line, lsp_column)
162}
163
164/// Convert an LSP range to a Typst range.
165pub fn to_typst_range(
166    lsp_range: LspRange,
167    lsp_position_encoding: PositionEncoding,
168    source: &Source,
169) -> Option<Range<usize>> {
170    let lsp_start = lsp_range.start;
171    let typst_start = to_typst_position(lsp_start, lsp_position_encoding, source)?;
172
173    let lsp_end = lsp_range.end;
174    let typst_end = to_typst_position(lsp_end, lsp_position_encoding, source)?;
175
176    Some(Range {
177        start: typst_start,
178        end: typst_end,
179    })
180}
181
182/// Convert a Typst range to an LSP range.
183pub fn to_lsp_range(
184    typst_range: Range<usize>,
185    typst_source: &Source,
186    lsp_position_encoding: PositionEncoding,
187) -> LspRange {
188    let typst_start = typst_range.start;
189    let lsp_start = to_lsp_position(typst_start, lsp_position_encoding, typst_source);
190
191    let typst_end = typst_range.end;
192    let lsp_end = to_lsp_position(typst_end, lsp_position_encoding, typst_source);
193
194    LspRange::new(lsp_start, lsp_end)
195}
196
197#[cfg(test)]
198mod test {
199    use super::LspPosition as Position;
200
201    use super::*;
202
203    const ENCODING_TEST_STRING: &str = "test 🥺 test";
204
205    #[test]
206    fn issue_14_invalid_range() {
207        let source = Source::detached("#set page(height: 2cm)");
208        let rng = LspRange {
209            start: LspPosition {
210                line: 0,
211                character: 22,
212            },
213            // EOF
214            end: LspPosition {
215                line: 1,
216                character: 0,
217            },
218        };
219        let res = to_typst_range(rng, PositionEncoding::Utf16, &source).unwrap();
220        assert_eq!(res, 22..22);
221    }
222
223    #[test]
224    fn issue_14_invalid_range_2() {
225        let source = Source::detached(
226            r"#let f(a) = {
227  a
228}
229",
230        );
231        let rng = LspRange {
232            start: LspPosition {
233                line: 2,
234                character: 1,
235            },
236            // EOF
237            end: LspPosition {
238                line: 3,
239                character: 0,
240            },
241        };
242        let res = to_typst_range(rng, PositionEncoding::Utf16, &source).unwrap();
243        assert_eq!(res, 19..source.lines().len_bytes());
244        // EOF
245        let rng = LspRange {
246            start: LspPosition {
247                line: 3,
248                character: 1,
249            },
250            end: LspPosition {
251                line: 4,
252                character: 0,
253            },
254        };
255        let res = to_typst_range(rng, PositionEncoding::Utf16, &source).unwrap();
256        assert_eq!(res, source.lines().len_bytes()..source.lines().len_bytes());
257
258        for line in 0..=5 {
259            for character in 0..2 {
260                let off = to_typst_position(
261                    Position { line, character },
262                    PositionEncoding::Utf16,
263                    &source,
264                );
265                assert!(off.is_some(), "line: {line}, character: {character}");
266            }
267        }
268    }
269
270    #[test]
271    fn overflow_offset_to_position() {
272        let source = Source::detached("test");
273
274        let offset = source.lines().len_bytes();
275        let position = to_lsp_position(offset, PositionEncoding::Utf16, &source);
276        assert_eq!(
277            position,
278            LspPosition {
279                line: 0,
280                character: 4
281            }
282        );
283
284        let offset = source.lines().len_bytes() + 1;
285        let position = to_lsp_position(offset, PositionEncoding::Utf16, &source);
286        assert_eq!(
287            position,
288            LspPosition {
289                line: 1,
290                character: 0
291            }
292        );
293    }
294
295    #[test]
296    fn utf16_position_to_utf8_offset() {
297        let source = Source::detached(ENCODING_TEST_STRING);
298
299        let start = LspPosition {
300            line: 0,
301            character: 0,
302        };
303        let emoji = LspPosition {
304            line: 0,
305            character: 5,
306        };
307        let post_emoji = LspPosition {
308            line: 0,
309            character: 7,
310        };
311        let end = LspPosition {
312            line: 0,
313            character: 12,
314        };
315
316        let start_offset = to_typst_position(start, PositionEncoding::Utf16, &source).unwrap();
317        let start_actual = 0;
318
319        let emoji_offset = to_typst_position(emoji, PositionEncoding::Utf16, &source).unwrap();
320        let emoji_actual = 5;
321
322        let post_emoji_offset =
323            to_typst_position(post_emoji, PositionEncoding::Utf16, &source).unwrap();
324        let post_emoji_actual = 9;
325
326        let end_offset = to_typst_position(end, PositionEncoding::Utf16, &source).unwrap();
327        let end_actual = 14;
328
329        assert_eq!(start_offset, start_actual);
330        assert_eq!(emoji_offset, emoji_actual);
331        assert_eq!(post_emoji_offset, post_emoji_actual);
332        assert_eq!(end_offset, end_actual);
333    }
334
335    #[test]
336    fn utf8_offset_to_utf16_position() {
337        let source = Source::detached(ENCODING_TEST_STRING);
338
339        let start = 0;
340        let emoji = 5;
341        let post_emoji = 9;
342        let end = 14;
343
344        let start_position = LspPosition {
345            line: 0,
346            character: 0,
347        };
348        let start_actual = to_lsp_position(start, PositionEncoding::Utf16, &source);
349
350        let emoji_position = LspPosition {
351            line: 0,
352            character: 5,
353        };
354        let emoji_actual = to_lsp_position(emoji, PositionEncoding::Utf16, &source);
355
356        let post_emoji_position = LspPosition {
357            line: 0,
358            character: 7,
359        };
360        let post_emoji_actual = to_lsp_position(post_emoji, PositionEncoding::Utf16, &source);
361
362        let end_position = LspPosition {
363            line: 0,
364            character: 12,
365        };
366        let end_actual = to_lsp_position(end, PositionEncoding::Utf16, &source);
367
368        assert_eq!(start_position, start_actual);
369        assert_eq!(emoji_position, emoji_actual);
370        assert_eq!(post_emoji_position, post_emoji_actual);
371        assert_eq!(end_position, end_actual);
372    }
373}