tinymist_analysis/syntax/
comment.rs

1//! Convenient utilities to match comment in code.
2
3use itertools::Itertools;
4
5use crate::prelude::*;
6
7/// Extracts the module-level documentation from a source.
8pub fn find_module_level_docs(src: &Source) -> Option<String> {
9    crate::log_debug_ct!("finding docs at: {id:?}", id = src.id());
10
11    let root = LinkedNode::new(src.root());
12    for n in root.children() {
13        if n.kind().is_trivia() {
14            continue;
15        }
16
17        return extract_mod_docs_between(&root, 0..n.offset(), true);
18    }
19
20    extract_mod_docs_between(&root, 0..src.text().len(), true)
21}
22
23/// Extracts the module-level documentation from a source.
24fn extract_mod_docs_between(
25    node: &LinkedNode,
26    rng: Range<usize>,
27    first_group: bool,
28) -> Option<String> {
29    let mut matcher = DocCommentMatcher {
30        strict: true,
31        ..Default::default()
32    };
33    let nodes = node.children();
34    'scan_comments: for n in nodes {
35        let offset = n.offset();
36        if offset < rng.start {
37            continue 'scan_comments;
38        }
39        if offset >= rng.end {
40            break 'scan_comments;
41        }
42
43        crate::log_debug_ct!("found comment for docs: {:?}: {:?}", n.kind(), n.text());
44        if matcher.process(n.get()) {
45            if first_group {
46                break 'scan_comments;
47            }
48            matcher.comments.clear();
49        }
50    }
51
52    matcher.collect()
53}
54
55/// A signal raised by the comment group matcher.
56pub enum CommentGroupSignal {
57    /// A hash marker is found.
58    Hash,
59    /// A space is found.
60    Space,
61    /// A line comment is found.
62    LineComment,
63    /// A block comment is found.
64    BlockComment,
65    /// The comment group should be broken.
66    BreakGroup,
67}
68
69/// A matcher that groups comments.
70#[derive(Default)]
71pub struct CommentGroupMatcher {
72    newline_count: u32,
73}
74
75impl CommentGroupMatcher {
76    /// Resets the matcher. This usually happens after a group is collected or
77    /// when some other child item is breaking the comment group manually.
78    pub fn reset(&mut self) {
79        self.newline_count = 0;
80    }
81
82    /// Processes a child relative to some [`SyntaxNode`].
83    ///
84    /// ## Example
85    ///
86    /// See [`DocCommentMatcher`] for a real-world example.
87    pub fn process(&mut self, n: &SyntaxNode) -> CommentGroupSignal {
88        match n.kind() {
89            SyntaxKind::Hash => {
90                self.newline_count = 0;
91
92                CommentGroupSignal::Hash
93            }
94            SyntaxKind::Space => {
95                if n.text().contains('\n') {
96                    self.newline_count += 1;
97                }
98                if self.newline_count > 1 {
99                    return CommentGroupSignal::BreakGroup;
100                }
101
102                CommentGroupSignal::Space
103            }
104            SyntaxKind::Parbreak => {
105                self.newline_count = 2;
106                CommentGroupSignal::BreakGroup
107            }
108            SyntaxKind::LineComment => {
109                self.newline_count = 0;
110                CommentGroupSignal::LineComment
111            }
112            SyntaxKind::BlockComment => {
113                self.newline_count = 0;
114                CommentGroupSignal::BlockComment
115            }
116            _ => {
117                self.newline_count = 0;
118                CommentGroupSignal::BreakGroup
119            }
120        }
121    }
122}
123
124/// A raw comment.
125enum RawComment {
126    /// A line comment.
127    Line(EcoString),
128    /// A block comment.
129    Block(EcoString),
130}
131
132/// A matcher that collects documentation comments.
133#[derive(Default)]
134pub struct DocCommentMatcher {
135    /// The collected comments.
136    comments: Vec<RawComment>,
137    /// The matcher for grouping comments.
138    group_matcher: CommentGroupMatcher,
139    /// Whether to strictly match the comment format.
140    strict: bool,
141}
142
143impl DocCommentMatcher {
144    /// Resets the matcher. This usually happens after a group is collected or
145    /// when some other child item is breaking the comment group manually.
146    pub fn reset(&mut self) {
147        self.comments.clear();
148        self.group_matcher.reset();
149    }
150
151    /// Processes a child relative to some [`SyntaxNode`].
152    pub fn process(&mut self, n: &SyntaxNode) -> bool {
153        match self.group_matcher.process(n) {
154            CommentGroupSignal::LineComment => {
155                let text = n.text();
156                if !self.strict || text.starts_with("///") {
157                    self.comments.push(RawComment::Line(text.clone()));
158                }
159            }
160            CommentGroupSignal::BlockComment => {
161                let text = n.text();
162                if !self.strict {
163                    self.comments.push(RawComment::Block(text.clone()));
164                }
165            }
166            CommentGroupSignal::BreakGroup => {
167                return true;
168            }
169            CommentGroupSignal::Hash | CommentGroupSignal::Space => {}
170        }
171
172        false
173    }
174
175    /// Collects the comments and returns the result.
176    pub fn collect(&mut self) -> Option<String> {
177        let comments = &self.comments;
178        if comments.is_empty() {
179            return None;
180        }
181
182        let comments = comments.iter().map(|comment| match comment {
183            RawComment::Line(line) => {
184                // strip all slash prefix
185                line.trim_start_matches('/')
186            }
187            RawComment::Block(block) => {
188                fn remove_comment(text: &str) -> Option<&str> {
189                    let mut text = text.strip_prefix("/*")?.strip_suffix("*/")?.trim();
190                    // trip start star
191                    if text.starts_with('*') {
192                        text = text.strip_prefix('*')?.trim();
193                    }
194                    Some(text)
195                }
196
197                remove_comment(block).unwrap_or(block.as_str())
198            }
199        });
200        let comments = comments.collect::<Vec<_>>();
201
202        let dedent = comments
203            .iter()
204            .flat_map(|line| {
205                let mut chars = line.chars();
206                let cnt = chars
207                    .by_ref()
208                    .peeking_take_while(|c| c.is_whitespace())
209                    .count();
210                chars.next().map(|_| cnt)
211            })
212            .min()
213            .unwrap_or(0);
214
215        let size_hint = comments.iter().map(|comment| comment.len()).sum::<usize>();
216        let mut comments = comments
217            .iter()
218            .map(|comment| comment.chars().skip(dedent).collect::<String>());
219
220        let res = comments.try_fold(String::with_capacity(size_hint), |mut acc, comment| {
221            if !acc.is_empty() {
222                acc.push('\n');
223            }
224
225            acc.push_str(&comment);
226            Some(acc)
227        });
228
229        self.comments.clear();
230        res
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    fn test(it: &str) -> String {
239        find_module_level_docs(&Source::detached(it)).unwrap()
240    }
241
242    #[test]
243    fn simple() {
244        assert_eq!(
245            test(
246                r#"/// foo
247/// bar
248#let main() = printf("hello World")"#
249            ),
250            "foo\nbar"
251        );
252    }
253
254    #[test]
255    fn dedent() {
256        assert_eq!(
257            test(
258                r#"/// a
259/// b
260/// c
261#let main() = printf("hello World")"#
262            ),
263            "a\nb\nc"
264        );
265        assert_eq!(
266            test(
267                r#"///a
268/// b
269/// c
270#let main() = printf("hello World")"#
271            ),
272            "a\n b\n c"
273        );
274    }
275
276    #[test]
277    fn issue_1687_postive() {
278        assert_eq!(
279            test(
280                r#"/// Description.
281/// 
282/// Note.
283#let main() = printf("hello World")"#
284            ),
285            "Description.\n\nNote."
286        );
287    }
288
289    #[test]
290    fn issue_1687_negative() {
291        assert_eq!(
292            test(
293                r#"/// Description.
294///
295/// Note.
296#let main() = printf("hello World")"#
297            ),
298            "Description.\n\nNote."
299        );
300    }
301}