1use base64::Engine;
4use cmark_writer::ast::{ListItem, Node};
5use docx_rs::*;
6use ecow::EcoString;
7use std::fs;
8use std::io::Cursor;
9
10use crate::Result;
11use crate::common::{
12 CenterNode, FigureNode, FormatWriter, HighlightNode, InlineNode, VerbatimNode,
13};
14
15use super::image_processor::DocxImageProcessor;
16use super::numbering::DocxNumbering;
17use super::styles::DocxStyles;
18
19pub struct DocxWriter {
22 styles: DocxStyles,
23 numbering: DocxNumbering,
24 list_level: usize,
25 list_numbering_count: usize,
26 image_processor: DocxImageProcessor,
27}
28
29impl Default for DocxWriter {
30 fn default() -> Self {
31 Self::new()
32 }
33}
34
35impl DocxWriter {
36 pub fn new() -> Self {
37 Self {
38 styles: DocxStyles::new(),
39 numbering: DocxNumbering::new(),
40 list_level: 0,
41 list_numbering_count: 0,
42 image_processor: DocxImageProcessor::new(),
43 }
44 }
45
46 fn process_image(&self, docx: Docx, url: &str, alt_nodes: &[Node]) -> Result<Docx> {
48 let alt_text = if !alt_nodes.is_empty() {
50 let mut text = String::new();
51 for node in alt_nodes {
52 if let Node::Text(content) = node {
53 text.push_str(content);
54 }
55 }
56 Some(text)
57 } else {
58 None
59 };
60
61 if let Ok(img_data) = fs::read(url) {
63 Ok(self
64 .image_processor
65 .process_image_data(docx, &img_data, alt_text.as_deref(), None))
66 } else {
67 let placeholder = format!("[Image not found: {url}]");
68 let para = Paragraph::new().add_run(Run::new().add_text(placeholder));
69 Ok(docx.add_paragraph(para))
70 }
71 }
72
73 fn process_figure(&mut self, mut docx: Docx, figure_node: &FigureNode) -> Result<Docx> {
75 match &*figure_node.body {
77 Node::Paragraph(content) => {
78 for node in content {
79 if let Node::Image {
80 url,
81 title: _,
82 alt: _,
83 } = node
84 {
85 if let Ok(img_data) = fs::read(url.as_str()) {
87 let alt_text = figure_node.caption.clone();
88 docx = self.image_processor.process_image_data(
90 docx,
91 &img_data,
92 Some(&alt_text),
93 None,
94 );
95
96 if !figure_node.caption.is_empty() {
98 let caption_text = format!("Figure: {}", figure_node.caption);
99 let caption_para = Paragraph::new()
100 .style("Caption")
101 .add_run(Run::new().add_text(caption_text));
102 docx = docx.add_paragraph(caption_para);
103 }
104 } else {
105 let placeholder = format!("[Image not found: {url}]");
107 let para = Paragraph::new().add_run(Run::new().add_text(placeholder));
108 docx = docx.add_paragraph(para);
109
110 if !figure_node.caption.is_empty() {
112 let caption_para = Paragraph::new()
113 .style("Caption")
114 .add_run(Run::new().add_text(&figure_node.caption));
115 docx = docx.add_paragraph(caption_para);
116 }
117 }
118 } else {
119 let mut para = Paragraph::new();
121 let run = Run::new();
122 let run = self.process_inline_to_run(run, node)?;
123 if !run.children.is_empty() {
124 para = para.add_run(run);
125 docx = docx.add_paragraph(para);
126 }
127
128 if !figure_node.caption.is_empty() {
130 let caption_para = Paragraph::new()
131 .style("Caption")
132 .add_run(Run::new().add_text(&figure_node.caption));
133 docx = docx.add_paragraph(caption_para);
134 }
135 }
136 }
137 }
138 _ => {
140 docx = self.process_node(docx, &figure_node.body)?;
142
143 if !figure_node.caption.is_empty() {
145 let caption_para = Paragraph::new()
146 .style("Caption")
147 .add_run(Run::new().add_text(&figure_node.caption));
148 docx = docx.add_paragraph(caption_para);
149 }
150 }
151 }
152
153 Ok(docx)
154 }
155
156 fn process_inline_to_run(&self, mut run: Run, node: &Node) -> Result<Run> {
158 match node {
159 Node::Text(text) => {
160 run = run.add_text(text);
161 }
162 Node::Strong(content) => {
163 run = run.style("Strong");
164 for child in content {
165 run = self.process_inline_to_run(run, child)?;
166 }
167 }
168 Node::Emphasis(content) => {
169 run = run.style("Emphasis");
170 for child in content {
171 run = self.process_inline_to_run(run, child)?;
172 }
173 }
174 Node::Strikethrough(content) => {
175 run = run.strike();
176 for child in content {
177 run = self.process_inline_to_run(run, child)?;
178 }
179 }
180 Node::Link {
181 url: _,
182 title: _,
183 content,
184 } => {
185 run = run.style("Hyperlink");
187 for child in content {
188 run = self.process_inline_to_run(run, child)?;
189 }
190 }
191 Node::Image {
192 url,
193 title: _,
194 alt: _,
195 } => {
196 if let Ok(img_data) = fs::read(url.as_str()) {
197 run = self.image_processor.process_inline_image(run, &img_data)?;
198 } else {
199 run = run.add_text(format!("[Image not found: {url}]"));
200 }
201 }
202 Node::HtmlElement(element) => {
203 if element.tag == "img" && element.self_closing {
205 let is_typst_block = element
206 .attributes
207 .iter()
208 .any(|a| a.name == "alt" && a.value == "typst-block");
209
210 let src = element
211 .attributes
212 .iter()
213 .find(|a| a.name == "src")
214 .map(|a| a.value.as_str())
215 .unwrap_or("");
216
217 if src.starts_with("data:image/") {
218 run = self.image_processor.process_data_url_image(
219 run,
220 src,
221 is_typst_block,
222 )?;
223 }
224 } else {
225 for child in &element.children {
227 run = self.process_inline_to_run(run, child)?;
228 }
229 }
230 }
231 Node::InlineCode(code) => {
232 run = run.style("CodeInline").add_text(code);
233 }
234 Node::HardBreak => {
235 run = run.add_break(BreakType::TextWrapping);
236 }
237 Node::SoftBreak => {
238 run = run.add_text(" ");
239 }
240 node if node.is_custom_type::<HighlightNode>() => {
241 let highlight_node = node.as_custom_type::<HighlightNode>().unwrap();
242 run = run.highlight("yellow");
243 for child in &highlight_node.content {
244 run = self.process_inline_to_run(run, child)?;
245 }
246 }
247 node if node.is_custom_type::<InlineNode>() => {
248 let inline_node = node.as_custom_type::<InlineNode>().unwrap();
249 for child in &inline_node.content {
250 run = self.process_inline_to_run(run, child)?;
251 }
252 }
253 node if node.is_custom_type::<VerbatimNode>() => {
254 let node = node.as_custom_type::<VerbatimNode>().unwrap();
255 eprintln!("Warning: `m1verbatim` is ignored {:?}.", node.content);
256 }
257 _ => {
259 eprintln!("other inline element: {node:?}");
260 }
261 }
262
263 Ok(run)
264 }
265
266 fn process_paragraph(
268 &self,
269 mut docx: Docx,
270 content: &[Node],
271 style: Option<&str>,
272 ) -> Result<Docx> {
273 let mut para = Paragraph::new();
274
275 if let Some(style_name) = style {
277 para = para.style(style_name);
278 }
279
280 let mut links = Vec::new();
282 for (i, node) in content.iter().enumerate() {
283 if let Node::Link {
284 url,
285 title: _,
286 content: _,
287 } = node
288 {
289 links.push((i, url.clone()));
290 }
291 }
292
293 if links.is_empty() {
295 for node in content {
297 let run = Run::new();
298 let run = self.process_inline_to_run(run, node)?;
299 if !run.children.is_empty() {
300 para = para.add_run(run);
301 }
302 }
303 } else {
304 let mut last_idx = 0;
306 for (idx, url) in links {
307 for item in content.iter().take(idx).skip(last_idx) {
309 let run = Run::new();
310 let run = self.process_inline_to_run(run, item)?;
311 if !run.children.is_empty() {
312 para = para.add_run(run);
313 }
314 }
315
316 if let Node::Link {
318 url: _,
319 title: _,
320 content: link_content,
321 } = &content[idx]
322 {
323 let mut hyperlink_run = Run::new().style("Hyperlink");
324 for child in link_content {
325 hyperlink_run = self.process_inline_to_run(hyperlink_run, child)?;
326 }
327
328 if !hyperlink_run.children.is_empty() {
330 let hyperlink =
331 Hyperlink::new(&url, HyperlinkType::External).add_run(hyperlink_run);
332 para = para.add_hyperlink(hyperlink);
333 }
334 }
335
336 last_idx = idx + 1;
337 }
338
339 for item in content.iter().skip(last_idx) {
341 let run = Run::new();
342 let run = self.process_inline_to_run(run, item)?;
343 if !run.children.is_empty() {
344 para = para.add_run(run);
345 }
346 }
347 }
348
349 if !para.children.is_empty() {
351 docx = docx.add_paragraph(para);
352 }
353
354 Ok(docx)
355 }
356
357 fn process_node(&mut self, mut docx: Docx, node: &Node) -> Result<Docx> {
359 match node {
360 Node::Document(blocks) => {
361 for block in blocks {
362 docx = self.process_node(docx, block)?;
363 }
364 }
365 Node::Paragraph(content) => {
366 docx = self.process_paragraph(docx, content, None)?;
367 }
368 Node::Heading {
369 level,
370 content,
371 heading_type: _,
372 } => {
373 let style_name = match level {
375 1 => "Heading1",
376 2 => "Heading2",
377 3 => "Heading3",
378 4 => "Heading4",
379 5 => "Heading5",
380 _ => "Heading6",
381 };
382
383 docx = self.process_paragraph(docx, content, Some(style_name))?;
384 }
385 Node::BlockQuote(content) => {
386 for block in content {
387 if let Node::Paragraph(inline) = block {
388 docx = self.process_paragraph(docx, inline, Some("Blockquote"))?;
389 } else {
390 docx = self.process_node(docx, block)?;
391 }
392 }
393 }
394 Node::CodeBlock {
395 language,
396 content,
397 block_type: _,
398 } => {
399 if let Some(lang) = language
401 && !lang.is_empty()
402 {
403 let lang_para = Paragraph::new()
404 .style("CodeBlock")
405 .add_run(Run::new().add_text(lang));
406 docx = docx.add_paragraph(lang_para);
407 }
408
409 let lines: Vec<&str> = content.split('\n').collect();
411 for line in lines {
412 let code_para = Paragraph::new()
413 .style("CodeBlock")
414 .add_run(Run::new().add_text(line));
415 docx = docx.add_paragraph(code_para);
416 }
417 }
418 Node::OrderedList { start: _, items } => {
419 docx = self.process_ordered_list(docx, items)?;
420 }
421 Node::UnorderedList(items) => {
422 docx = self.process_unordered_list(docx, items)?;
423 }
424 Node::Table {
425 headers,
426 rows,
427 alignments: _,
428 } => {
429 docx = self.process_table(docx, headers, rows)?;
430 }
431 Node::Image { url, title: _, alt } => {
432 docx = self.process_image(docx, url, alt)?;
433 }
434 node if node.is_custom_type::<FigureNode>() => {
435 let figure_node = node.as_custom_type::<FigureNode>().unwrap();
436 docx = self.process_figure(docx, figure_node)?;
437 }
438 node if node.is_custom_type::<CenterNode>() => {
439 let center_node = node.as_custom_type::<CenterNode>().unwrap();
440 match ¢er_node.node {
442 Node::Paragraph(content) => {
443 docx = self.process_paragraph(docx, content, None)?;
444 if let Some(DocumentChild::Paragraph(para)) =
446 docx.document.children.last_mut()
447 {
448 para.property = para.property.clone().align(AlignmentType::Center);
449 }
450 }
451 other => {
452 docx = self.process_node(docx, other)?;
453 if let Some(DocumentChild::Paragraph(para)) =
455 docx.document.children.last_mut()
456 {
457 para.property = para.property.clone().align(AlignmentType::Center);
458 }
459 }
460 }
461 }
462 node if node.is_custom_type::<crate::common::ExternalFrameNode>() => {
463 let external_frame = node
464 .as_custom_type::<crate::common::ExternalFrameNode>()
465 .unwrap();
466 let data = base64::engine::general_purpose::STANDARD
467 .decode(&external_frame.svg)
468 .map_err(|e| format!("Failed to decode SVG data: {e}"))?;
469
470 docx = self.image_processor.process_image_data(
471 docx,
472 &data,
473 Some(&external_frame.alt_text),
474 None,
475 );
476 }
477 node if node.is_custom_type::<HighlightNode>() => {
478 let highlight_node = node.as_custom_type::<HighlightNode>().unwrap();
479 let mut para = Paragraph::new();
481 let mut run = Run::new().highlight("yellow");
482
483 for child in &highlight_node.content {
484 run = self.process_inline_to_run(run, child)?;
485 }
486
487 if !run.children.is_empty() {
488 para = para.add_run(run);
489 docx = docx.add_paragraph(para);
490 }
491 }
492 node if node.is_custom_type::<InlineNode>() => {
493 let inline_node = node.as_custom_type::<InlineNode>().unwrap();
494 let mut para = Paragraph::new();
496 let mut run = Run::new();
497
498 for child in &inline_node.content {
499 run = self.process_inline_to_run(run, child)?;
500 }
501
502 if !run.children.is_empty() {
503 para = para.add_run(run);
504 docx = docx.add_paragraph(para);
505 }
506 }
507 Node::ThematicBreak => {
508 let hr_para = Paragraph::new()
510 .style("HorizontalLine")
511 .add_run(Run::new().add_text(""));
512 docx = docx.add_paragraph(hr_para);
513 }
514 _ => {}
516 }
517
518 Ok(docx)
519 }
520
521 fn process_ordered_list(&mut self, mut docx: Docx, items: &[ListItem]) -> Result<Docx> {
523 self.list_level += 1;
525 let current_level = self.list_level - 1;
526
527 let (doc, num_id) = self.numbering.create_ordered_numbering(docx);
529 docx = doc;
530
531 for item in items {
533 if let ListItem::Ordered { content, .. } = item {
534 docx = self.process_list_item_content(docx, content, num_id, current_level)?;
535 }
536 }
537
538 self.list_level -= 1;
540 Ok(docx)
541 }
542
543 fn process_unordered_list(&mut self, mut docx: Docx, items: &[ListItem]) -> Result<Docx> {
545 self.list_level += 1;
547 let current_level = self.list_level - 1;
548
549 let (doc, num_id) = self.numbering.create_unordered_numbering(docx);
551 docx = doc;
552
553 for item in items {
555 if let ListItem::Unordered { content } = item {
556 docx = self.process_list_item_content(docx, content, num_id, current_level)?;
557 }
558 }
559
560 self.list_level -= 1;
562 Ok(docx)
563 }
564
565 fn process_list_item_content(
567 &mut self,
568 mut docx: Docx,
569 content: &[Node],
570 num_id: usize,
571 level: usize,
572 ) -> Result<Docx> {
573 if content.is_empty() {
575 let empty_para = Paragraph::new()
576 .numbering(NumberingId::new(num_id), IndentLevel::new(level))
577 .add_run(Run::new().add_text(""));
578 return Ok(docx.add_paragraph(empty_para));
579 }
580
581 for block in content {
583 match block {
584 Node::Paragraph(inline) => {
585 let mut para = Paragraph::new()
586 .numbering(NumberingId::new(num_id), IndentLevel::new(level));
587
588 for node in inline {
590 let run = Run::new();
591 let run = self.process_inline_to_run(run, node)?;
592 if !run.children.is_empty() {
593 para = para.add_run(run);
594 }
595 }
596
597 docx = docx.add_paragraph(para);
598 }
599 Node::OrderedList { start: _, items: _ } | Node::UnorderedList(_) => {
601 docx = self.process_node(docx, block)?;
602 }
603 _ => {
604 docx = self.process_node(docx, block)?;
605 }
606 }
607 }
608
609 Ok(docx)
610 }
611
612 fn process_table(&self, mut docx: Docx, headers: &[Node], rows: &[Vec<Node>]) -> Result<Docx> {
614 let mut table = Table::new(vec![]).style("Table");
615
616 if !headers.is_empty() {
618 let mut cells = Vec::new();
619
620 for header_node in headers {
621 let mut table_cell = TableCell::new();
622 let mut para = Paragraph::new();
623
624 let run = Run::new();
625 let run = self.process_inline_to_run(run, header_node)?;
626 if !run.children.is_empty() {
627 para = para.add_run(run);
628 }
629
630 if !para.children.is_empty() {
631 table_cell = table_cell.add_paragraph(para);
632 }
633
634 cells.push(table_cell);
635 }
636
637 if !cells.is_empty() {
638 let header_row = TableRow::new(cells);
639 table = table.add_row(header_row);
640 }
641 }
642
643 for row in rows {
645 let mut cells = Vec::new();
646
647 for cell_node in row {
648 let mut table_cell = TableCell::new();
649 let mut para = Paragraph::new();
650
651 let run = Run::new();
652 let run = self.process_inline_to_run(run, cell_node)?;
653 if !run.children.is_empty() {
654 para = para.add_run(run);
655 }
656
657 if !para.children.is_empty() {
658 table_cell = table_cell.add_paragraph(para);
659 }
660
661 cells.push(table_cell);
662 }
663
664 if !cells.is_empty() {
665 let data_row = TableRow::new(cells);
666 table = table.add_row(data_row);
667 }
668 }
669
670 docx = docx.add_table(table);
672
673 Ok(docx)
674 }
675
676 pub fn generate_docx(&mut self, doc: &Node) -> Result<Vec<u8>> {
678 let mut docx = Docx::new();
680 docx = self.styles.initialize_styles(docx);
681
682 docx = self.process_node(docx, doc)?;
684
685 docx = self.numbering.initialize_numbering(docx);
687
688 let docx_built = docx.build();
690 let mut buffer = Vec::new();
691 docx_built
692 .pack(&mut Cursor::new(&mut buffer))
693 .map_err(|e| format!("Failed to pack DOCX: {e}"))?;
694
695 Ok(buffer)
696 }
697}
698
699impl FormatWriter for DocxWriter {
700 fn write_vec(&mut self, document: &Node) -> Result<Vec<u8>> {
701 self.list_level = 0;
702 self.list_numbering_count = 0;
703 self.generate_docx(document)
704 }
705
706 fn write_eco(&mut self, _document: &Node, _output: &mut EcoString) -> Result<()> {
707 Err("DOCX format does not support EcoString output".into())
708 }
709}