1use base64::Engine;
4use cmark_writer::ast::{ListItem, Node};
5use docx_rs::*;
6use ecow::EcoString;
7use log::{debug, warn};
8use std::fs;
9use std::io::Cursor;
10
11use crate::Result;
12use crate::common::{
13 CenterNode, FigureNode, FormatWriter, HighlightNode, InlineNode, VerbatimNode,
14};
15
16use super::image_processor::DocxImageProcessor;
17use super::numbering::DocxNumbering;
18use super::styles::DocxStyles;
19
20pub struct DocxWriter {
23 styles: DocxStyles,
24 numbering: DocxNumbering,
25 list_level: usize,
26 list_numbering_count: usize,
27 image_processor: DocxImageProcessor,
28}
29
30impl Default for DocxWriter {
31 fn default() -> Self {
32 Self::new()
33 }
34}
35
36impl DocxWriter {
37 pub fn new() -> Self {
38 Self {
39 styles: DocxStyles::new(),
40 numbering: DocxNumbering::new(),
41 list_level: 0,
42 list_numbering_count: 0,
43 image_processor: DocxImageProcessor::new(),
44 }
45 }
46
47 fn process_image(&self, docx: Docx, url: &str, alt_nodes: &[Node]) -> Result<Docx> {
49 let alt_text = if !alt_nodes.is_empty() {
51 let mut text = String::new();
52 for node in alt_nodes {
53 if let Node::Text(content) = node {
54 text.push_str(content);
55 }
56 }
57 Some(text)
58 } else {
59 None
60 };
61
62 if let Ok(img_data) = fs::read(url) {
64 Ok(self
65 .image_processor
66 .process_image_data(docx, &img_data, alt_text.as_deref(), None))
67 } else {
68 let placeholder = format!("[Image not found: {url}]");
69 let para = Paragraph::new().add_run(Run::new().add_text(placeholder));
70 Ok(docx.add_paragraph(para))
71 }
72 }
73
74 fn process_figure(&mut self, mut docx: Docx, figure_node: &FigureNode) -> Result<Docx> {
76 match &*figure_node.body {
78 Node::Paragraph(content) => {
79 for node in content {
80 if let Node::Image {
81 url,
82 title: _,
83 alt: _,
84 } = node
85 {
86 if let Ok(img_data) = fs::read(url.as_str()) {
88 let alt_text = figure_node.caption.clone();
89 docx = self.image_processor.process_image_data(
91 docx,
92 &img_data,
93 Some(&alt_text),
94 None,
95 );
96
97 if !figure_node.caption.is_empty() {
99 let caption_text = format!("Figure: {}", figure_node.caption);
100 let caption_para = Paragraph::new()
101 .style("Caption")
102 .add_run(Run::new().add_text(caption_text));
103 docx = docx.add_paragraph(caption_para);
104 }
105 } else {
106 let placeholder = format!("[Image not found: {url}]");
108 let para = Paragraph::new().add_run(Run::new().add_text(placeholder));
109 docx = docx.add_paragraph(para);
110
111 if !figure_node.caption.is_empty() {
113 let caption_para = Paragraph::new()
114 .style("Caption")
115 .add_run(Run::new().add_text(&figure_node.caption));
116 docx = docx.add_paragraph(caption_para);
117 }
118 }
119 } else {
120 let mut para = Paragraph::new();
122 let run = Run::new();
123 let run = self.process_inline_to_run(run, node)?;
124 if !run.children.is_empty() {
125 para = para.add_run(run);
126 docx = docx.add_paragraph(para);
127 }
128
129 if !figure_node.caption.is_empty() {
131 let caption_para = Paragraph::new()
132 .style("Caption")
133 .add_run(Run::new().add_text(&figure_node.caption));
134 docx = docx.add_paragraph(caption_para);
135 }
136 }
137 }
138 }
139 _ => {
141 docx = self.process_node(docx, &figure_node.body)?;
143
144 if !figure_node.caption.is_empty() {
146 let caption_para = Paragraph::new()
147 .style("Caption")
148 .add_run(Run::new().add_text(&figure_node.caption));
149 docx = docx.add_paragraph(caption_para);
150 }
151 }
152 }
153
154 Ok(docx)
155 }
156
157 fn process_inline_to_run(&self, mut run: Run, node: &Node) -> Result<Run> {
159 match node {
160 Node::Text(text) => {
161 run = run.add_text(text);
162 }
163 Node::Strong(content) => {
164 run = run.style("Strong");
165 for child in content {
166 run = self.process_inline_to_run(run, child)?;
167 }
168 }
169 Node::Emphasis(content) => {
170 run = run.style("Emphasis");
171 for child in content {
172 run = self.process_inline_to_run(run, child)?;
173 }
174 }
175 Node::Strikethrough(content) => {
176 run = run.strike();
177 for child in content {
178 run = self.process_inline_to_run(run, child)?;
179 }
180 }
181 Node::Link {
182 url: _,
183 title: _,
184 content,
185 } => {
186 run = run.style("Hyperlink");
188 for child in content {
189 run = self.process_inline_to_run(run, child)?;
190 }
191 }
192 Node::Image {
193 url,
194 title: _,
195 alt: _,
196 } => {
197 if let Ok(img_data) = fs::read(url.as_str()) {
198 run = self.image_processor.process_inline_image(run, &img_data)?;
199 } else {
200 run = run.add_text(format!("[Image not found: {url}]"));
201 }
202 }
203 Node::HtmlElement(element) => {
204 if element.tag == "img" && element.self_closing {
206 let is_typst_block = element
207 .attributes
208 .iter()
209 .any(|a| a.name == "alt" && a.value == "typst-block");
210
211 let src = element
212 .attributes
213 .iter()
214 .find(|a| a.name == "src")
215 .map(|a| a.value.as_str())
216 .unwrap_or("");
217
218 if src.starts_with("data:image/") {
219 run = self.image_processor.process_data_url_image(
220 run,
221 src,
222 is_typst_block,
223 )?;
224 }
225 } else {
226 for child in &element.children {
228 run = self.process_inline_to_run(run, child)?;
229 }
230 }
231 }
232 Node::InlineCode(code) => {
233 run = run.style("CodeInline").add_text(code);
234 }
235 Node::HardBreak => {
236 run = run.add_break(BreakType::TextWrapping);
237 }
238 Node::SoftBreak => {
239 run = run.add_text(" ");
240 }
241 node if node.is_custom_type::<HighlightNode>() => {
242 let highlight_node = node.as_custom_type::<HighlightNode>().unwrap();
243 run = run.highlight("yellow");
244 for child in &highlight_node.content {
245 run = self.process_inline_to_run(run, child)?;
246 }
247 }
248 node if node.is_custom_type::<InlineNode>() => {
249 let inline_node = node.as_custom_type::<InlineNode>().unwrap();
250 for child in &inline_node.content {
251 run = self.process_inline_to_run(run, child)?;
252 }
253 }
254 node if node.is_custom_type::<VerbatimNode>() => {
255 let node = node.as_custom_type::<VerbatimNode>().unwrap();
256 warn!(
257 "ignoring `m1verbatim` content in DOCX export: {:?}",
258 node.content
259 );
260 }
261 _ => {
263 debug!("unhandled inline node in DOCX export: {node:?}");
264 }
265 }
266
267 Ok(run)
268 }
269
270 fn process_paragraph(
272 &self,
273 mut docx: Docx,
274 content: &[Node],
275 style: Option<&str>,
276 ) -> Result<Docx> {
277 let mut para = Paragraph::new();
278
279 if let Some(style_name) = style {
281 para = para.style(style_name);
282 }
283
284 let mut links = Vec::new();
286 for (i, node) in content.iter().enumerate() {
287 if let Node::Link {
288 url,
289 title: _,
290 content: _,
291 } = node
292 {
293 links.push((i, url.clone()));
294 }
295 }
296
297 if links.is_empty() {
299 for node in content {
301 let run = Run::new();
302 let run = self.process_inline_to_run(run, node)?;
303 if !run.children.is_empty() {
304 para = para.add_run(run);
305 }
306 }
307 } else {
308 let mut last_idx = 0;
310 for (idx, url) in links {
311 for item in content.iter().take(idx).skip(last_idx) {
313 let run = Run::new();
314 let run = self.process_inline_to_run(run, item)?;
315 if !run.children.is_empty() {
316 para = para.add_run(run);
317 }
318 }
319
320 if let Node::Link {
322 url: _,
323 title: _,
324 content: link_content,
325 } = &content[idx]
326 {
327 let mut hyperlink_run = Run::new().style("Hyperlink");
328 for child in link_content {
329 hyperlink_run = self.process_inline_to_run(hyperlink_run, child)?;
330 }
331
332 if !hyperlink_run.children.is_empty() {
334 let hyperlink =
335 Hyperlink::new(&url, HyperlinkType::External).add_run(hyperlink_run);
336 para = para.add_hyperlink(hyperlink);
337 }
338 }
339
340 last_idx = idx + 1;
341 }
342
343 for item in content.iter().skip(last_idx) {
345 let run = Run::new();
346 let run = self.process_inline_to_run(run, item)?;
347 if !run.children.is_empty() {
348 para = para.add_run(run);
349 }
350 }
351 }
352
353 if !para.children.is_empty() {
355 docx = docx.add_paragraph(para);
356 }
357
358 Ok(docx)
359 }
360
361 fn process_node(&mut self, mut docx: Docx, node: &Node) -> Result<Docx> {
363 match node {
364 Node::Document(blocks) => {
365 for block in blocks {
366 docx = self.process_node(docx, block)?;
367 }
368 }
369 Node::Paragraph(content) => {
370 docx = self.process_paragraph(docx, content, None)?;
371 }
372 Node::Heading {
373 level,
374 content,
375 heading_type: _,
376 } => {
377 let style_name = match level {
379 1 => "Heading1",
380 2 => "Heading2",
381 3 => "Heading3",
382 4 => "Heading4",
383 5 => "Heading5",
384 _ => "Heading6",
385 };
386
387 docx = self.process_paragraph(docx, content, Some(style_name))?;
388 }
389 Node::BlockQuote(content) => {
390 for block in content {
391 if let Node::Paragraph(inline) = block {
392 docx = self.process_paragraph(docx, inline, Some("Blockquote"))?;
393 } else {
394 docx = self.process_node(docx, block)?;
395 }
396 }
397 }
398 Node::CodeBlock {
399 language,
400 content,
401 block_type: _,
402 } => {
403 if let Some(lang) = language
405 && !lang.is_empty()
406 {
407 let lang_para = Paragraph::new()
408 .style("CodeBlock")
409 .add_run(Run::new().add_text(lang));
410 docx = docx.add_paragraph(lang_para);
411 }
412
413 let lines: Vec<&str> = content.split('\n').collect();
415 for line in lines {
416 let code_para = Paragraph::new()
417 .style("CodeBlock")
418 .add_run(Run::new().add_text(line));
419 docx = docx.add_paragraph(code_para);
420 }
421 }
422 Node::OrderedList { start: _, items } => {
423 docx = self.process_ordered_list(docx, items)?;
424 }
425 Node::UnorderedList(items) => {
426 docx = self.process_unordered_list(docx, items)?;
427 }
428 Node::Table {
429 headers,
430 rows,
431 alignments: _,
432 } => {
433 docx = self.process_table(docx, headers, rows)?;
434 }
435 Node::Image { url, title: _, alt } => {
436 docx = self.process_image(docx, url, alt)?;
437 }
438 node if node.is_custom_type::<FigureNode>() => {
439 let figure_node = node.as_custom_type::<FigureNode>().unwrap();
440 docx = self.process_figure(docx, figure_node)?;
441 }
442 node if node.is_custom_type::<CenterNode>() => {
443 let center_node = node.as_custom_type::<CenterNode>().unwrap();
444 match ¢er_node.node {
446 Node::Paragraph(content) => {
447 docx = self.process_paragraph(docx, content, None)?;
448 if let Some(DocumentChild::Paragraph(para)) =
450 docx.document.children.last_mut()
451 {
452 para.property = para.property.clone().align(AlignmentType::Center);
453 }
454 }
455 Node::HtmlElement(element) => {
456 let start_idx = docx.document.children.len();
457 for child in &element.children {
458 docx = self.process_node(docx, child)?;
459 }
460 for child in docx.document.children.iter_mut().skip(start_idx) {
461 if let DocumentChild::Paragraph(para) = child {
462 para.property = para.property.clone().align(AlignmentType::Center);
463 }
464 }
465 }
466 other => {
467 docx = self.process_node(docx, other)?;
468 if let Some(DocumentChild::Paragraph(para)) =
470 docx.document.children.last_mut()
471 {
472 para.property = para.property.clone().align(AlignmentType::Center);
473 }
474 }
475 }
476 }
477 node if node.is_custom_type::<crate::common::ExternalFrameNode>() => {
478 let external_frame = node
479 .as_custom_type::<crate::common::ExternalFrameNode>()
480 .unwrap();
481 let data = base64::engine::general_purpose::STANDARD
482 .decode(&external_frame.svg)
483 .map_err(|e| format!("Failed to decode SVG data: {e}"))?;
484
485 docx = self.image_processor.process_image_data(
486 docx,
487 &data,
488 Some(&external_frame.alt_text),
489 None,
490 );
491 }
492 node if node.is_custom_type::<HighlightNode>() => {
493 let highlight_node = node.as_custom_type::<HighlightNode>().unwrap();
494 let mut para = Paragraph::new();
496 let mut run = Run::new().highlight("yellow");
497
498 for child in &highlight_node.content {
499 run = self.process_inline_to_run(run, child)?;
500 }
501
502 if !run.children.is_empty() {
503 para = para.add_run(run);
504 docx = docx.add_paragraph(para);
505 }
506 }
507 node if node.is_custom_type::<InlineNode>() => {
508 let inline_node = node.as_custom_type::<InlineNode>().unwrap();
509 let mut para = Paragraph::new();
511 let mut run = Run::new();
512
513 for child in &inline_node.content {
514 run = self.process_inline_to_run(run, child)?;
515 }
516
517 if !run.children.is_empty() {
518 para = para.add_run(run);
519 docx = docx.add_paragraph(para);
520 }
521 }
522 Node::ThematicBreak => {
523 let hr_para = Paragraph::new()
525 .style("HorizontalLine")
526 .add_run(Run::new().add_text(""));
527 docx = docx.add_paragraph(hr_para);
528 }
529 _ => {}
531 }
532
533 Ok(docx)
534 }
535
536 fn process_ordered_list(&mut self, mut docx: Docx, items: &[ListItem]) -> Result<Docx> {
538 self.list_level += 1;
540 let current_level = self.list_level - 1;
541
542 let (doc, num_id) = self.numbering.create_ordered_numbering(docx);
544 docx = doc;
545
546 for item in items {
548 if let ListItem::Ordered { content, .. } = item {
549 docx = self.process_list_item_content(docx, content, num_id, current_level)?;
550 }
551 }
552
553 self.list_level -= 1;
555 Ok(docx)
556 }
557
558 fn process_unordered_list(&mut self, mut docx: Docx, items: &[ListItem]) -> Result<Docx> {
560 self.list_level += 1;
562 let current_level = self.list_level - 1;
563
564 let (doc, num_id) = self.numbering.create_unordered_numbering(docx);
566 docx = doc;
567
568 for item in items {
570 if let ListItem::Unordered { content } = item {
571 docx = self.process_list_item_content(docx, content, num_id, current_level)?;
572 }
573 }
574
575 self.list_level -= 1;
577 Ok(docx)
578 }
579
580 fn process_list_item_content(
582 &mut self,
583 mut docx: Docx,
584 content: &[Node],
585 num_id: usize,
586 level: usize,
587 ) -> Result<Docx> {
588 if content.is_empty() {
590 let empty_para = Paragraph::new()
591 .numbering(NumberingId::new(num_id), IndentLevel::new(level))
592 .add_run(Run::new().add_text(""));
593 return Ok(docx.add_paragraph(empty_para));
594 }
595
596 for block in content {
598 match block {
599 Node::Paragraph(inline) => {
600 let mut para = Paragraph::new()
601 .numbering(NumberingId::new(num_id), IndentLevel::new(level));
602
603 for node in inline {
605 let run = Run::new();
606 let run = self.process_inline_to_run(run, node)?;
607 if !run.children.is_empty() {
608 para = para.add_run(run);
609 }
610 }
611
612 docx = docx.add_paragraph(para);
613 }
614 Node::OrderedList { start: _, items: _ } | Node::UnorderedList(_) => {
616 docx = self.process_node(docx, block)?;
617 }
618 _ => {
619 docx = self.process_node(docx, block)?;
620 }
621 }
622 }
623
624 Ok(docx)
625 }
626
627 fn process_table(&self, mut docx: Docx, headers: &[Node], rows: &[Vec<Node>]) -> Result<Docx> {
629 let mut table = Table::new(vec![]).style("Table");
630
631 if !headers.is_empty() {
633 let mut cells = Vec::new();
634
635 for header_node in headers {
636 let mut table_cell = TableCell::new();
637 let mut para = Paragraph::new();
638
639 let run = Run::new();
640 let run = self.process_inline_to_run(run, header_node)?;
641 if !run.children.is_empty() {
642 para = para.add_run(run);
643 }
644
645 if !para.children.is_empty() {
646 table_cell = table_cell.add_paragraph(para);
647 }
648
649 cells.push(table_cell);
650 }
651
652 if !cells.is_empty() {
653 let header_row = TableRow::new(cells);
654 table = table.add_row(header_row);
655 }
656 }
657
658 for row in rows {
660 let mut cells = Vec::new();
661
662 for cell_node in row {
663 let mut table_cell = TableCell::new();
664 let mut para = Paragraph::new();
665
666 let run = Run::new();
667 let run = self.process_inline_to_run(run, cell_node)?;
668 if !run.children.is_empty() {
669 para = para.add_run(run);
670 }
671
672 if !para.children.is_empty() {
673 table_cell = table_cell.add_paragraph(para);
674 }
675
676 cells.push(table_cell);
677 }
678
679 if !cells.is_empty() {
680 let data_row = TableRow::new(cells);
681 table = table.add_row(data_row);
682 }
683 }
684
685 docx = docx.add_table(table);
687
688 Ok(docx)
689 }
690
691 pub fn generate_docx(&mut self, doc: &Node) -> Result<Vec<u8>> {
693 let mut docx = Docx::new();
695 docx = self.styles.initialize_styles(docx);
696
697 docx = self.process_node(docx, doc)?;
699
700 docx = self.numbering.initialize_numbering(docx);
702
703 let docx_built = docx.build();
705 let mut buffer = Vec::new();
706 docx_built
707 .pack(&mut Cursor::new(&mut buffer))
708 .map_err(|e| format!("Failed to pack DOCX: {e}"))?;
709
710 Ok(buffer)
711 }
712}
713
714impl FormatWriter for DocxWriter {
715 fn write_vec(&mut self, document: &Node) -> Result<Vec<u8>> {
716 self.list_level = 0;
717 self.list_numbering_count = 0;
718 self.generate_docx(document)
719 }
720
721 fn write_eco(&mut self, _document: &Node, _output: &mut EcoString) -> Result<()> {
722 Err("DOCX format does not support EcoString output".into())
723 }
724}