typlite/writer/
latex.rs

1//! LaTeX writer implementation
2
3use std::path::Path;
4
5use cmark_writer::ast::Node;
6use ecow::EcoString;
7use tinymist_std::path::unix_slash;
8
9use crate::Result;
10use crate::common::{
11    BlockVerbatimNode, CenterNode, ExternalFrameNode, FigureNode, FormatWriter, HighlightNode,
12    InlineNode, ListState, VerbatimNode,
13};
14
15/// LaTeX writer implementation
16pub struct LaTeXWriter {
17    list_state: Option<ListState>,
18}
19
20impl Default for LaTeXWriter {
21    fn default() -> Self {
22        Self::new()
23    }
24}
25
26impl LaTeXWriter {
27    pub fn new() -> Self {
28        Self { list_state: None }
29    }
30
31    fn write_inline_nodes(&mut self, nodes: &[Node], output: &mut EcoString) -> Result<()> {
32        for node in nodes {
33            self.write_node(node, output)?;
34        }
35        Ok(())
36    }
37
38    /// Write the document to LaTeX format
39    fn write_node(&mut self, node: &Node, output: &mut EcoString) -> Result<()> {
40        match node {
41            Node::Document(blocks) => {
42                for block in blocks {
43                    self.write_node(block, output)?;
44                }
45            }
46            Node::Paragraph(inlines) => {
47                self.write_inline_nodes(inlines, output)?;
48                output.push_str("\n\n");
49            }
50            Node::Heading {
51                level,
52                content,
53                heading_type: _,
54            } => {
55                if *level > 4 {
56                    return Err(format!("heading level {level} is not supported in LaTeX").into());
57                }
58
59                output.push('\\');
60                match level {
61                    1 => output.push_str("chapter{"),
62                    2 => output.push_str("section{"),
63                    3 => output.push_str("subsection{"),
64                    4 => output.push_str("subsubsection{"),
65                    _ => return Err(format!("Heading level {level} is not supported").into()),
66                }
67
68                self.write_inline_nodes(content, output)?;
69                output.push_str("}\n\n");
70            }
71            Node::BlockQuote(content) => {
72                output.push_str("\\begin{quote}\n");
73                for block in content {
74                    self.write_node(block, output)?;
75                }
76                output.push_str("\\end{quote}\n");
77            }
78            Node::CodeBlock {
79                language,
80                content,
81                block_type: _,
82            } => {
83                if let Some(lang) = language {
84                    if !lang.is_empty() {
85                        output.push_str("\\begin{lstlisting}[language=");
86                        output.push_str(lang);
87                        output.push_str("]\n");
88                    } else {
89                        output.push_str("\\begin{verbatim}\n");
90                    }
91                } else {
92                    output.push_str("\\begin{verbatim}\n");
93                }
94
95                output.push_str(content);
96
97                if language.as_ref().is_none_or(|lang| lang.is_empty()) {
98                    output.push_str("\n\\end{verbatim}");
99                } else {
100                    output.push_str("\n\\end{lstlisting}");
101                }
102                output.push_str("\n\n");
103            }
104            Node::OrderedList { start: _, items } => {
105                let previous_state = self.list_state;
106                self.list_state = Some(ListState::Ordered);
107
108                output.push_str("\\begin{enumerate}\n");
109                for item in items {
110                    match item {
111                        cmark_writer::ast::ListItem::Ordered { content, .. }
112                        | cmark_writer::ast::ListItem::Unordered { content } => {
113                            output.push_str("\\item ");
114                            for block in content {
115                                match block {
116                                    // For paragraphs, we want inline content rather than creating a
117                                    // new paragraph
118                                    Node::Paragraph(inlines) => {
119                                        self.write_inline_nodes(inlines, output)?;
120                                    }
121                                    _ => self.write_node(block, output)?,
122                                }
123                            }
124                            output.push('\n');
125                        }
126                        _ => {}
127                    }
128                }
129                output.push_str("\\end{enumerate}\n\n");
130
131                self.list_state = previous_state;
132            }
133            Node::UnorderedList(items) => {
134                let previous_state = self.list_state;
135                self.list_state = Some(ListState::Unordered);
136
137                output.push_str("\\begin{itemize}\n");
138                for item in items {
139                    match item {
140                        cmark_writer::ast::ListItem::Ordered { content, .. }
141                        | cmark_writer::ast::ListItem::Unordered { content } => {
142                            output.push_str("\\item ");
143                            for block in content {
144                                match block {
145                                    // For paragraphs, we want inline content rather than creating a
146                                    // new paragraph
147                                    Node::Paragraph(inlines) => {
148                                        self.write_inline_nodes(inlines, output)?;
149                                    }
150                                    _ => self.write_node(block, output)?,
151                                }
152                            }
153                            output.push('\n');
154                        }
155                        _ => {}
156                    }
157                }
158                output.push_str("\\end{itemize}\n\n");
159
160                self.list_state = previous_state;
161            }
162            Node::Table {
163                headers,
164                rows,
165                alignments: _,
166            } => {
167                // Calculate column count
168                let col_count = headers
169                    .len()
170                    .max(rows.iter().map(|row| row.len()).max().unwrap_or(0));
171
172                output.push_str("\\begin{table}[htbp]\n");
173                output.push_str("\\centering\n");
174                output.push_str("\\begin{tabular}{");
175
176                // Add column format (centered alignment)
177                for _ in 0..col_count {
178                    output.push('c');
179                }
180                output.push_str("}\n\\hline\n");
181
182                // Process header
183                if !headers.is_empty() {
184                    for (i, cell) in headers.iter().enumerate() {
185                        if i > 0 {
186                            output.push_str(" & ");
187                        }
188                        self.write_node(cell, output)?;
189                    }
190                    output.push_str(" \\\\\n\\hline\n");
191                }
192
193                // Process all rows
194                for row in rows {
195                    for (i, cell) in row.iter().enumerate() {
196                        if i > 0 {
197                            output.push_str(" & ");
198                        }
199                        self.write_node(cell, output)?;
200                    }
201                    output.push_str(" \\\\\n");
202                }
203
204                // Close table environment
205                output.push_str("\\hline\n");
206                output.push_str("\\end{tabular}\n");
207                output.push_str("\\end{table}\n\n");
208            }
209            node if node.is_custom_type::<FigureNode>() => {
210                let figure_node = node.as_custom_type::<FigureNode>().unwrap();
211                // Start figure environment
212                output.push_str("\\begin{figure}[htbp]\n\\centering\n");
213
214                // Handle the body content (typically an image)
215                match &*figure_node.body {
216                    Node::Paragraph(content) => {
217                        for node in content {
218                            // Special handling for image nodes in figures
219                            if let Node::Image {
220                                url,
221                                title: _,
222                                alt: _,
223                            } = node
224                            {
225                                // Path to the image file
226                                let path = unix_slash(Path::new(url.as_str()));
227
228                                // Write includegraphics command
229                                output.push_str("\\includegraphics[width=0.8\\textwidth]{");
230                                output.push_str(&path);
231                                output.push_str("}\n");
232                            } else {
233                                // For non-image content, just render it normally
234                                self.write_node(node, output)?;
235                            }
236                        }
237                    }
238                    // Directly handle the node if it's not in a paragraph
239                    node => self.write_node(node, output)?,
240                }
241
242                // Add caption if present
243                if !figure_node.caption.is_empty() {
244                    output.push_str("\\caption{");
245                    output.push_str(&escape_latex(&figure_node.caption));
246                    output.push_str("}\n");
247                }
248
249                // Close figure environment
250                output.push_str("\\end{figure}\n\n");
251            }
252            node if node.is_custom_type::<ExternalFrameNode>() => {
253                let external_frame = node.as_custom_type::<ExternalFrameNode>().unwrap();
254                // Handle externally stored frames
255                let path = unix_slash(&external_frame.file_path);
256
257                output.push_str("\\begin{figure}[htbp]\n");
258                output.push_str("\\centering\n");
259                output.push_str("\\includegraphics[width=0.8\\textwidth]{");
260                output.push_str(&path);
261                output.push_str("}\n");
262
263                if !external_frame.alt_text.is_empty() {
264                    output.push_str("\\caption{");
265                    output.push_str(&escape_latex(&external_frame.alt_text));
266                    output.push_str("}\n");
267                }
268
269                output.push_str("\\end{figure}\n\n");
270            }
271            node if node.is_custom_type::<CenterNode>() => {
272                let center_node = node.as_custom_type::<CenterNode>().unwrap();
273                output.push_str("\\begin{center}\n");
274                self.write_node(&center_node.node, output)?;
275                output.push_str("\\end{center}\n\n");
276            }
277            node if node.is_custom_type::<HighlightNode>() => {
278                let highlight_node = node.as_custom_type::<HighlightNode>().unwrap();
279                output.push_str("\\colorbox{yellow}{");
280                for child in &highlight_node.content {
281                    self.write_node(child, output)?;
282                }
283                output.push_str("}");
284            }
285            node if node.is_custom_type::<InlineNode>() => {
286                let inline_node = node.as_custom_type::<InlineNode>().unwrap();
287                // Process all child nodes inline
288                for child in &inline_node.content {
289                    self.write_node(child, output)?;
290                }
291            }
292            node if node.is_custom_type::<BlockVerbatimNode>() => {
293                let block_node = node.as_custom_type::<BlockVerbatimNode>().unwrap();
294                output.push_str(&block_node.content);
295                output.push_str("\n\n");
296            }
297            node if node.is_custom_type::<VerbatimNode>() => {
298                let inline_node = node.as_custom_type::<VerbatimNode>().unwrap();
299                output.push_str(&inline_node.content);
300            }
301            Node::Text(text) => {
302                output.push_str(&escape_latex(text));
303            }
304            Node::Emphasis(content) => {
305                output.push_str("\\textit{");
306                self.write_inline_nodes(content, output)?;
307                output.push_str("}");
308            }
309            Node::Strong(content) => {
310                output.push_str("\\textbf{");
311                self.write_inline_nodes(content, output)?;
312                output.push_str("}");
313            }
314            Node::Strikethrough(content) => {
315                output.push_str("\\sout{");
316                self.write_inline_nodes(content, output)?;
317                output.push_str("}");
318            }
319            Node::Link {
320                url,
321                title: _,
322                content,
323            } => {
324                output.push_str("\\href{");
325                output.push_str(url);
326                output.push_str("}{");
327                self.write_inline_nodes(content, output)?;
328                output.push_str("}");
329            }
330            Node::Image { url, title: _, alt } => {
331                let alt_text = if !alt.is_empty() {
332                    let mut alt_str = EcoString::new();
333                    self.write_inline_nodes(alt, &mut alt_str)?;
334                    alt_str
335                } else {
336                    "".into()
337                };
338
339                let path = unix_slash(Path::new(&url.as_str()));
340
341                output.push_str("\\begin{figure}\n");
342                output.push_str("\\centering\n");
343                output.push_str("\\includegraphics[width=0.8\\textwidth]{");
344                output.push_str(&path);
345                output.push_str("}\n");
346
347                if !alt_text.is_empty() {
348                    output.push_str("\\caption{");
349                    output.push_str(&alt_text);
350                    output.push_str("}\n");
351                }
352
353                output.push_str("\\end{figure}\n\n");
354            }
355            Node::InlineCode(code) => {
356                output.push_str("\\texttt{");
357                output.push_str(&escape_latex(code));
358                output.push_str("}");
359            }
360            Node::HardBreak => {
361                output.push_str("\\\\\n");
362            }
363            Node::SoftBreak => {
364                output.push(' ');
365            }
366            Node::ThematicBreak => {
367                output.push_str("\\hrule\n\n");
368            }
369            Node::HtmlElement(element) => {
370                if element.tag == "table" {
371                    self.write_html_table(element, output)?;
372                } else {
373                    for child in &element.children {
374                        self.write_node(child, output)?;
375                    }
376                }
377            }
378            _ => {}
379        }
380
381        Ok(())
382    }
383
384    /// Write HTML table element to LaTeX format
385    fn write_html_table(
386        &mut self,
387        table_element: &cmark_writer::ast::HtmlElement,
388        output: &mut EcoString,
389    ) -> Result<()> {
390        // Collect rows and determine column count
391        let mut headers: Vec<Vec<Vec<Node>>> = Vec::new();
392        let mut rows: Vec<Vec<Vec<Node>>> = Vec::new();
393        let mut col_count = 0;
394
395        // Process table structure
396        for child in &table_element.children {
397            if let Node::HtmlElement(elem) = child {
398                match elem.tag.as_str() {
399                    "thead" => {
400                        for row_node in &elem.children {
401                            if let Node::HtmlElement(row) = row_node
402                                && row.tag == "tr"
403                            {
404                                let cells: Vec<Vec<Node>> = row
405                                    .children
406                                    .iter()
407                                    .filter_map(|cell_node| {
408                                        if let Node::HtmlElement(cell) = cell_node
409                                            && (cell.tag == "th" || cell.tag == "td")
410                                        {
411                                            return Some(cell.children.clone());
412                                        }
413                                        None
414                                    })
415                                    .collect();
416                                col_count = col_count.max(cells.len());
417                                headers.push(cells);
418                            }
419                        }
420                    }
421                    "tbody" => {
422                        for row_node in &elem.children {
423                            if let Node::HtmlElement(row) = row_node
424                                && row.tag == "tr"
425                            {
426                                let cells: Vec<Vec<Node>> = row
427                                    .children
428                                    .iter()
429                                    .filter_map(|cell_node| {
430                                        if let Node::HtmlElement(cell) = cell_node
431                                            && (cell.tag == "th" || cell.tag == "td")
432                                        {
433                                            return Some(cell.children.clone());
434                                        }
435                                        None
436                                    })
437                                    .collect();
438                                col_count = col_count.max(cells.len());
439                                rows.push(cells);
440                            }
441                        }
442                    }
443                    "tr" => {
444                        // Direct row without thead/tbody
445                        let cells: Vec<Vec<Node>> = elem
446                            .children
447                            .iter()
448                            .filter_map(|cell_node| {
449                                if let Node::HtmlElement(cell) = cell_node
450                                    && (cell.tag == "th" || cell.tag == "td")
451                                {
452                                    return Some(cell.children.clone());
453                                }
454                                None
455                            })
456                            .collect();
457                        col_count = col_count.max(cells.len());
458
459                        // First row with th elements is header
460                        if headers.is_empty()
461                            && elem.children.iter().any(|n| {
462                                if let Node::HtmlElement(e) = n {
463                                    e.tag == "th"
464                                } else {
465                                    false
466                                }
467                            })
468                        {
469                            headers.push(cells);
470                        } else {
471                            rows.push(cells);
472                        }
473                    }
474                    _ => {}
475                }
476            }
477        }
478
479        if col_count == 0 {
480            return Ok(());
481        }
482
483        // Write LaTeX table
484        output.push_str("\\begin{table}[htbp]\n");
485        output.push_str("\\centering\n");
486        output.push_str("\\begin{tabular}{");
487        for _ in 0..col_count {
488            output.push('c');
489        }
490        output.push_str("}\n\\hline\n");
491
492        // Write headers
493        for header_row in &headers {
494            for (i, cell_nodes) in header_row.iter().enumerate() {
495                if i > 0 {
496                    output.push_str(" & ");
497                }
498                self.write_inline_nodes(cell_nodes, output)?;
499            }
500            output.push_str(" \\\\\n\\hline\n");
501        }
502
503        // Write rows
504        for row in &rows {
505            for (i, cell_nodes) in row.iter().enumerate() {
506                if i > 0 {
507                    output.push_str(" & ");
508                }
509                self.write_inline_nodes(cell_nodes, output)?;
510            }
511            output.push_str(" \\\\\n");
512        }
513
514        output.push_str("\\hline\n");
515        output.push_str("\\end{tabular}\n");
516        output.push_str("\\end{table}\n\n");
517
518        Ok(())
519    }
520}
521
522/// Escape LaTeX special characters in a string
523fn escape_latex(text: &str) -> String {
524    text.replace('&', "\\&")
525        .replace('%', "\\%")
526        .replace('$', "\\$")
527        .replace('#', "\\#")
528        .replace('_', "\\_")
529        .replace('{', "\\{")
530        .replace('}', "\\}")
531        .replace('~', "\\textasciitilde{}")
532        .replace('^', "\\textasciicircum{}")
533        .replace('\\', "\\textbackslash{}")
534}
535
536impl FormatWriter for LaTeXWriter {
537    fn write_eco(&mut self, document: &Node, output: &mut EcoString) -> Result<()> {
538        // Write the document content
539        self.write_node(document, output)?;
540        Ok(())
541    }
542
543    fn write_vec(&mut self, document: &Node) -> Result<Vec<u8>> {
544        let mut output = EcoString::new();
545        self.write_eco(document, &mut output)?;
546        Ok(output.as_str().as_bytes().to_vec())
547    }
548}