1use std::path::Path;
4
5use cmark_writer::ast::Node;
6use ecow::EcoString;
7use tinymist_std::path::unix_slash;
8
9use crate::Result;
10use crate::common::{
11 BlockVerbatimNode, CenterNode, ExternalFrameNode, FigureNode, FormatWriter, HighlightNode,
12 InlineNode, ListState, VerbatimNode,
13};
14
15pub struct LaTeXWriter {
17 list_state: Option<ListState>,
18}
19
20impl Default for LaTeXWriter {
21 fn default() -> Self {
22 Self::new()
23 }
24}
25
26impl LaTeXWriter {
27 pub fn new() -> Self {
28 Self { list_state: None }
29 }
30
31 fn write_inline_nodes(&mut self, nodes: &[Node], output: &mut EcoString) -> Result<()> {
32 for node in nodes {
33 self.write_node(node, output)?;
34 }
35 Ok(())
36 }
37
38 fn write_node(&mut self, node: &Node, output: &mut EcoString) -> Result<()> {
40 match node {
41 Node::Document(blocks) => {
42 for block in blocks {
43 self.write_node(block, output)?;
44 }
45 }
46 Node::Paragraph(inlines) => {
47 self.write_inline_nodes(inlines, output)?;
48 output.push_str("\n\n");
49 }
50 Node::Heading {
51 level,
52 content,
53 heading_type: _,
54 } => {
55 if *level > 4 {
56 return Err(format!("heading level {level} is not supported in LaTeX").into());
57 }
58
59 output.push('\\');
60 match level {
61 1 => output.push_str("chapter{"),
62 2 => output.push_str("section{"),
63 3 => output.push_str("subsection{"),
64 4 => output.push_str("subsubsection{"),
65 _ => return Err(format!("Heading level {level} is not supported").into()),
66 }
67
68 self.write_inline_nodes(content, output)?;
69 output.push_str("}\n\n");
70 }
71 Node::BlockQuote(content) => {
72 output.push_str("\\begin{quote}\n");
73 for block in content {
74 self.write_node(block, output)?;
75 }
76 output.push_str("\\end{quote}\n");
77 }
78 Node::CodeBlock {
79 language,
80 content,
81 block_type: _,
82 } => {
83 if let Some(lang) = language {
84 if !lang.is_empty() {
85 output.push_str("\\begin{lstlisting}[language=");
86 output.push_str(lang);
87 output.push_str("]\n");
88 } else {
89 output.push_str("\\begin{verbatim}\n");
90 }
91 } else {
92 output.push_str("\\begin{verbatim}\n");
93 }
94
95 output.push_str(content);
96
97 if language.as_ref().is_none_or(|lang| lang.is_empty()) {
98 output.push_str("\n\\end{verbatim}");
99 } else {
100 output.push_str("\n\\end{lstlisting}");
101 }
102 output.push_str("\n\n");
103 }
104 Node::OrderedList { start: _, items } => {
105 let previous_state = self.list_state;
106 self.list_state = Some(ListState::Ordered);
107
108 output.push_str("\\begin{enumerate}\n");
109 for item in items {
110 match item {
111 cmark_writer::ast::ListItem::Ordered { content, .. }
112 | cmark_writer::ast::ListItem::Unordered { content } => {
113 output.push_str("\\item ");
114 for block in content {
115 match block {
116 Node::Paragraph(inlines) => {
119 self.write_inline_nodes(inlines, output)?;
120 }
121 _ => self.write_node(block, output)?,
122 }
123 }
124 output.push('\n');
125 }
126 _ => {}
127 }
128 }
129 output.push_str("\\end{enumerate}\n\n");
130
131 self.list_state = previous_state;
132 }
133 Node::UnorderedList(items) => {
134 let previous_state = self.list_state;
135 self.list_state = Some(ListState::Unordered);
136
137 output.push_str("\\begin{itemize}\n");
138 for item in items {
139 match item {
140 cmark_writer::ast::ListItem::Ordered { content, .. }
141 | cmark_writer::ast::ListItem::Unordered { content } => {
142 output.push_str("\\item ");
143 for block in content {
144 match block {
145 Node::Paragraph(inlines) => {
148 self.write_inline_nodes(inlines, output)?;
149 }
150 _ => self.write_node(block, output)?,
151 }
152 }
153 output.push('\n');
154 }
155 _ => {}
156 }
157 }
158 output.push_str("\\end{itemize}\n\n");
159
160 self.list_state = previous_state;
161 }
162 Node::Table {
163 headers,
164 rows,
165 alignments: _,
166 } => {
167 let col_count = headers
169 .len()
170 .max(rows.iter().map(|row| row.len()).max().unwrap_or(0));
171
172 output.push_str("\\begin{table}[htbp]\n");
173 output.push_str("\\centering\n");
174 output.push_str("\\begin{tabular}{");
175
176 for _ in 0..col_count {
178 output.push('c');
179 }
180 output.push_str("}\n\\hline\n");
181
182 if !headers.is_empty() {
184 for (i, cell) in headers.iter().enumerate() {
185 if i > 0 {
186 output.push_str(" & ");
187 }
188 self.write_node(cell, output)?;
189 }
190 output.push_str(" \\\\\n\\hline\n");
191 }
192
193 for row in rows {
195 for (i, cell) in row.iter().enumerate() {
196 if i > 0 {
197 output.push_str(" & ");
198 }
199 self.write_node(cell, output)?;
200 }
201 output.push_str(" \\\\\n");
202 }
203
204 output.push_str("\\hline\n");
206 output.push_str("\\end{tabular}\n");
207 output.push_str("\\end{table}\n\n");
208 }
209 node if node.is_custom_type::<FigureNode>() => {
210 let figure_node = node.as_custom_type::<FigureNode>().unwrap();
211 output.push_str("\\begin{figure}[htbp]\n\\centering\n");
213
214 match &*figure_node.body {
216 Node::Paragraph(content) => {
217 for node in content {
218 if let Node::Image {
220 url,
221 title: _,
222 alt: _,
223 } = node
224 {
225 let path = unix_slash(Path::new(url.as_str()));
227
228 output.push_str("\\includegraphics[width=0.8\\textwidth]{");
230 output.push_str(&path);
231 output.push_str("}\n");
232 } else {
233 self.write_node(node, output)?;
235 }
236 }
237 }
238 node => self.write_node(node, output)?,
240 }
241
242 if !figure_node.caption.is_empty() {
244 output.push_str("\\caption{");
245 output.push_str(&escape_latex(&figure_node.caption));
246 output.push_str("}\n");
247 }
248
249 output.push_str("\\end{figure}\n\n");
251 }
252 node if node.is_custom_type::<ExternalFrameNode>() => {
253 let external_frame = node.as_custom_type::<ExternalFrameNode>().unwrap();
254 let path = unix_slash(&external_frame.file_path);
256
257 output.push_str("\\begin{figure}[htbp]\n");
258 output.push_str("\\centering\n");
259 output.push_str("\\includegraphics[width=0.8\\textwidth]{");
260 output.push_str(&path);
261 output.push_str("}\n");
262
263 if !external_frame.alt_text.is_empty() {
264 output.push_str("\\caption{");
265 output.push_str(&escape_latex(&external_frame.alt_text));
266 output.push_str("}\n");
267 }
268
269 output.push_str("\\end{figure}\n\n");
270 }
271 node if node.is_custom_type::<CenterNode>() => {
272 let center_node = node.as_custom_type::<CenterNode>().unwrap();
273 output.push_str("\\begin{center}\n");
274 self.write_node(¢er_node.node, output)?;
275 output.push_str("\\end{center}\n\n");
276 }
277 node if node.is_custom_type::<HighlightNode>() => {
278 let highlight_node = node.as_custom_type::<HighlightNode>().unwrap();
279 output.push_str("\\colorbox{yellow}{");
280 for child in &highlight_node.content {
281 self.write_node(child, output)?;
282 }
283 output.push_str("}");
284 }
285 node if node.is_custom_type::<InlineNode>() => {
286 let inline_node = node.as_custom_type::<InlineNode>().unwrap();
287 for child in &inline_node.content {
289 self.write_node(child, output)?;
290 }
291 }
292 node if node.is_custom_type::<BlockVerbatimNode>() => {
293 let block_node = node.as_custom_type::<BlockVerbatimNode>().unwrap();
294 output.push_str(&block_node.content);
295 output.push_str("\n\n");
296 }
297 node if node.is_custom_type::<VerbatimNode>() => {
298 let inline_node = node.as_custom_type::<VerbatimNode>().unwrap();
299 output.push_str(&inline_node.content);
300 }
301 Node::Text(text) => {
302 output.push_str(&escape_latex(text));
303 }
304 Node::Emphasis(content) => {
305 output.push_str("\\textit{");
306 self.write_inline_nodes(content, output)?;
307 output.push_str("}");
308 }
309 Node::Strong(content) => {
310 output.push_str("\\textbf{");
311 self.write_inline_nodes(content, output)?;
312 output.push_str("}");
313 }
314 Node::Strikethrough(content) => {
315 output.push_str("\\sout{");
316 self.write_inline_nodes(content, output)?;
317 output.push_str("}");
318 }
319 Node::Link {
320 url,
321 title: _,
322 content,
323 } => {
324 output.push_str("\\href{");
325 output.push_str(url);
326 output.push_str("}{");
327 self.write_inline_nodes(content, output)?;
328 output.push_str("}");
329 }
330 Node::Image { url, title: _, alt } => {
331 let alt_text = if !alt.is_empty() {
332 let mut alt_str = EcoString::new();
333 self.write_inline_nodes(alt, &mut alt_str)?;
334 alt_str
335 } else {
336 "".into()
337 };
338
339 let path = unix_slash(Path::new(&url.as_str()));
340
341 output.push_str("\\begin{figure}\n");
342 output.push_str("\\centering\n");
343 output.push_str("\\includegraphics[width=0.8\\textwidth]{");
344 output.push_str(&path);
345 output.push_str("}\n");
346
347 if !alt_text.is_empty() {
348 output.push_str("\\caption{");
349 output.push_str(&alt_text);
350 output.push_str("}\n");
351 }
352
353 output.push_str("\\end{figure}\n\n");
354 }
355 Node::InlineCode(code) => {
356 output.push_str("\\texttt{");
357 output.push_str(&escape_latex(code));
358 output.push_str("}");
359 }
360 Node::HardBreak => {
361 output.push_str("\\\\\n");
362 }
363 Node::SoftBreak => {
364 output.push(' ');
365 }
366 Node::ThematicBreak => {
367 output.push_str("\\hrule\n\n");
368 }
369 Node::HtmlElement(element) => {
370 if element.tag == "table" {
371 self.write_html_table(element, output)?;
372 } else {
373 for child in &element.children {
374 self.write_node(child, output)?;
375 }
376 }
377 }
378 _ => {}
379 }
380
381 Ok(())
382 }
383
384 fn write_html_table(
386 &mut self,
387 table_element: &cmark_writer::ast::HtmlElement,
388 output: &mut EcoString,
389 ) -> Result<()> {
390 let mut headers: Vec<Vec<Vec<Node>>> = Vec::new();
392 let mut rows: Vec<Vec<Vec<Node>>> = Vec::new();
393 let mut col_count = 0;
394
395 for child in &table_element.children {
397 if let Node::HtmlElement(elem) = child {
398 match elem.tag.as_str() {
399 "thead" => {
400 for row_node in &elem.children {
401 if let Node::HtmlElement(row) = row_node
402 && row.tag == "tr"
403 {
404 let cells: Vec<Vec<Node>> = row
405 .children
406 .iter()
407 .filter_map(|cell_node| {
408 if let Node::HtmlElement(cell) = cell_node
409 && (cell.tag == "th" || cell.tag == "td")
410 {
411 return Some(cell.children.clone());
412 }
413 None
414 })
415 .collect();
416 col_count = col_count.max(cells.len());
417 headers.push(cells);
418 }
419 }
420 }
421 "tbody" => {
422 for row_node in &elem.children {
423 if let Node::HtmlElement(row) = row_node
424 && row.tag == "tr"
425 {
426 let cells: Vec<Vec<Node>> = row
427 .children
428 .iter()
429 .filter_map(|cell_node| {
430 if let Node::HtmlElement(cell) = cell_node
431 && (cell.tag == "th" || cell.tag == "td")
432 {
433 return Some(cell.children.clone());
434 }
435 None
436 })
437 .collect();
438 col_count = col_count.max(cells.len());
439 rows.push(cells);
440 }
441 }
442 }
443 "tr" => {
444 let cells: Vec<Vec<Node>> = elem
446 .children
447 .iter()
448 .filter_map(|cell_node| {
449 if let Node::HtmlElement(cell) = cell_node
450 && (cell.tag == "th" || cell.tag == "td")
451 {
452 return Some(cell.children.clone());
453 }
454 None
455 })
456 .collect();
457 col_count = col_count.max(cells.len());
458
459 if headers.is_empty()
461 && elem.children.iter().any(|n| {
462 if let Node::HtmlElement(e) = n {
463 e.tag == "th"
464 } else {
465 false
466 }
467 })
468 {
469 headers.push(cells);
470 } else {
471 rows.push(cells);
472 }
473 }
474 _ => {}
475 }
476 }
477 }
478
479 if col_count == 0 {
480 return Ok(());
481 }
482
483 output.push_str("\\begin{table}[htbp]\n");
485 output.push_str("\\centering\n");
486 output.push_str("\\begin{tabular}{");
487 for _ in 0..col_count {
488 output.push('c');
489 }
490 output.push_str("}\n\\hline\n");
491
492 for header_row in &headers {
494 for (i, cell_nodes) in header_row.iter().enumerate() {
495 if i > 0 {
496 output.push_str(" & ");
497 }
498 self.write_inline_nodes(cell_nodes, output)?;
499 }
500 output.push_str(" \\\\\n\\hline\n");
501 }
502
503 for row in &rows {
505 for (i, cell_nodes) in row.iter().enumerate() {
506 if i > 0 {
507 output.push_str(" & ");
508 }
509 self.write_inline_nodes(cell_nodes, output)?;
510 }
511 output.push_str(" \\\\\n");
512 }
513
514 output.push_str("\\hline\n");
515 output.push_str("\\end{tabular}\n");
516 output.push_str("\\end{table}\n\n");
517
518 Ok(())
519 }
520}
521
522fn escape_latex(text: &str) -> String {
524 text.replace('&', "\\&")
525 .replace('%', "\\%")
526 .replace('$', "\\$")
527 .replace('#', "\\#")
528 .replace('_', "\\_")
529 .replace('{', "\\{")
530 .replace('}', "\\}")
531 .replace('~', "\\textasciitilde{}")
532 .replace('^', "\\textasciicircum{}")
533 .replace('\\', "\\textbackslash{}")
534}
535
536impl FormatWriter for LaTeXWriter {
537 fn write_eco(&mut self, document: &Node, output: &mut EcoString) -> Result<()> {
538 self.write_node(document, output)?;
540 Ok(())
541 }
542
543 fn write_vec(&mut self, document: &Node) -> Result<Vec<u8>> {
544 let mut output = EcoString::new();
545 self.write_eco(document, &mut output)?;
546 Ok(output.as_str().as_bytes().to_vec())
547 }
548}