1use base64::Engine;
4use cmark_writer::ast::{ListItem, Node};
5use docx_rs::*;
6use ecow::EcoString;
7use log::debug;
8use std::fs;
9use std::io::Cursor;
10
11use crate::Result;
12use crate::common::{
13 BlockVerbatimNode, CenterNode, FigureNode, FormatWriter, HighlightNode, InlineNode,
14 VerbatimNode,
15};
16
17use super::image_processor::DocxImageProcessor;
18use super::numbering::DocxNumbering;
19use super::styles::DocxStyles;
20
21pub struct DocxWriter {
24 styles: DocxStyles,
25 numbering: DocxNumbering,
26 list_level: usize,
27 list_numbering_count: usize,
28 image_processor: DocxImageProcessor,
29}
30
31impl Default for DocxWriter {
32 fn default() -> Self {
33 Self::new()
34 }
35}
36
37impl DocxWriter {
38 pub fn new() -> Self {
39 Self {
40 styles: DocxStyles::new(),
41 numbering: DocxNumbering::new(),
42 list_level: 0,
43 list_numbering_count: 0,
44 image_processor: DocxImageProcessor::new(),
45 }
46 }
47
48 fn process_image(&self, docx: Docx, url: &str, alt_nodes: &[Node]) -> Result<Docx> {
50 let alt_text = if !alt_nodes.is_empty() {
52 let mut text = String::new();
53 for node in alt_nodes {
54 if let Node::Text(content) = node {
55 text.push_str(content);
56 }
57 }
58 Some(text)
59 } else {
60 None
61 };
62
63 if let Ok(img_data) = fs::read(url) {
65 Ok(self
66 .image_processor
67 .process_image_data(docx, &img_data, alt_text.as_deref(), None))
68 } else {
69 let placeholder = format!("[Image not found: {url}]");
70 let para = Paragraph::new().add_run(Run::new().add_text(placeholder));
71 Ok(docx.add_paragraph(para))
72 }
73 }
74
75 fn process_figure(&mut self, mut docx: Docx, figure_node: &FigureNode) -> Result<Docx> {
77 match &*figure_node.body {
79 Node::Paragraph(content) => {
80 for node in content {
81 if let Node::Image {
82 url,
83 title: _,
84 alt: _,
85 } = node
86 {
87 if let Ok(img_data) = fs::read(url.as_str()) {
89 let alt_text = figure_node.caption.clone();
90 docx = self.image_processor.process_image_data(
92 docx,
93 &img_data,
94 Some(&alt_text),
95 None,
96 );
97
98 if !figure_node.caption.is_empty() {
100 let caption_text = format!("Figure: {}", figure_node.caption);
101 let caption_para = Paragraph::new()
102 .style("Caption")
103 .add_run(Run::new().add_text(caption_text));
104 docx = docx.add_paragraph(caption_para);
105 }
106 } else {
107 let placeholder = format!("[Image not found: {url}]");
109 let para = Paragraph::new().add_run(Run::new().add_text(placeholder));
110 docx = docx.add_paragraph(para);
111
112 if !figure_node.caption.is_empty() {
114 let caption_para = Paragraph::new()
115 .style("Caption")
116 .add_run(Run::new().add_text(&figure_node.caption));
117 docx = docx.add_paragraph(caption_para);
118 }
119 }
120 } else {
121 let mut para = Paragraph::new();
123 let run = Run::new();
124 let run = self.process_inline_to_run(run, node)?;
125 if !run.children.is_empty() {
126 para = para.add_run(run);
127 docx = docx.add_paragraph(para);
128 }
129
130 if !figure_node.caption.is_empty() {
132 let caption_para = Paragraph::new()
133 .style("Caption")
134 .add_run(Run::new().add_text(&figure_node.caption));
135 docx = docx.add_paragraph(caption_para);
136 }
137 }
138 }
139 }
140 _ => {
142 docx = self.process_node(docx, &figure_node.body)?;
144
145 if !figure_node.caption.is_empty() {
147 let caption_para = Paragraph::new()
148 .style("Caption")
149 .add_run(Run::new().add_text(&figure_node.caption));
150 docx = docx.add_paragraph(caption_para);
151 }
152 }
153 }
154
155 Ok(docx)
156 }
157
158 fn process_inline_to_run(&self, mut run: Run, node: &Node) -> Result<Run> {
160 match node {
161 Node::Text(text) => {
162 run = run.add_text(text);
163 }
164 Node::Strong(content) => {
165 run = run.style("Strong");
166 for child in content {
167 run = self.process_inline_to_run(run, child)?;
168 }
169 }
170 Node::Emphasis(content) => {
171 run = run.style("Emphasis");
172 for child in content {
173 run = self.process_inline_to_run(run, child)?;
174 }
175 }
176 Node::Strikethrough(content) => {
177 run = run.strike();
178 for child in content {
179 run = self.process_inline_to_run(run, child)?;
180 }
181 }
182 Node::Link {
183 url: _,
184 title: _,
185 content,
186 } => {
187 run = run.style("Hyperlink");
189 for child in content {
190 run = self.process_inline_to_run(run, child)?;
191 }
192 }
193 Node::Image {
194 url,
195 title: _,
196 alt: _,
197 } => {
198 if let Ok(img_data) = fs::read(url.as_str()) {
199 run = self.image_processor.process_inline_image(run, &img_data)?;
200 } else {
201 run = run.add_text(format!("[Image not found: {url}]"));
202 }
203 }
204 Node::HtmlElement(element) => {
205 if element.tag == "img" && element.self_closing {
207 let is_typst_block = element
208 .attributes
209 .iter()
210 .any(|a| a.name == "alt" && a.value == "typst-block");
211
212 let src = element
213 .attributes
214 .iter()
215 .find(|a| a.name == "src")
216 .map(|a| a.value.as_str())
217 .unwrap_or("");
218
219 if src.starts_with("data:image/") {
220 run = self.image_processor.process_data_url_image(
221 run,
222 src,
223 is_typst_block,
224 )?;
225 }
226 } else {
227 for child in &element.children {
229 run = self.process_inline_to_run(run, child)?;
230 }
231 }
232 }
233 Node::InlineCode(code) => {
234 run = run.style("CodeInline").add_text(code);
235 }
236 Node::HardBreak => {
237 run = run.add_break(BreakType::TextWrapping);
238 }
239 Node::SoftBreak => {
240 run = run.add_text(" ");
241 }
242 node if node.is_custom_type::<HighlightNode>() => {
243 let highlight_node = node.as_custom_type::<HighlightNode>().unwrap();
244 run = run.highlight("yellow");
245 for child in &highlight_node.content {
246 run = self.process_inline_to_run(run, child)?;
247 }
248 }
249 node if node.is_custom_type::<InlineNode>() => {
250 let inline_node = node.as_custom_type::<InlineNode>().unwrap();
251 for child in &inline_node.content {
252 run = self.process_inline_to_run(run, child)?;
253 }
254 }
255 node if node.is_custom_type::<VerbatimNode>() => {
256 let node = node.as_custom_type::<VerbatimNode>().unwrap();
257 run = run.style("CodeInline").add_text(&node.content);
258 }
259 _ => {
261 debug!("unhandled inline node in DOCX export: {node:?}");
262 }
263 }
264
265 Ok(run)
266 }
267
268 fn process_paragraph(
270 &self,
271 mut docx: Docx,
272 content: &[Node],
273 style: Option<&str>,
274 ) -> Result<Docx> {
275 let mut para = Paragraph::new();
276
277 if let Some(style_name) = style {
279 para = para.style(style_name);
280 }
281
282 let mut links = Vec::new();
284 for (i, node) in content.iter().enumerate() {
285 if let Node::Link {
286 url,
287 title: _,
288 content: _,
289 } = node
290 {
291 links.push((i, url.clone()));
292 }
293 }
294
295 if links.is_empty() {
297 for node in content {
299 let run = Run::new();
300 let run = self.process_inline_to_run(run, node)?;
301 if !run.children.is_empty() {
302 para = para.add_run(run);
303 }
304 }
305 } else {
306 let mut last_idx = 0;
308 for (idx, url) in links {
309 for item in content.iter().take(idx).skip(last_idx) {
311 let run = Run::new();
312 let run = self.process_inline_to_run(run, item)?;
313 if !run.children.is_empty() {
314 para = para.add_run(run);
315 }
316 }
317
318 if let Node::Link {
320 url: _,
321 title: _,
322 content: link_content,
323 } = &content[idx]
324 {
325 let mut hyperlink_run = Run::new().style("Hyperlink");
326 for child in link_content {
327 hyperlink_run = self.process_inline_to_run(hyperlink_run, child)?;
328 }
329
330 if !hyperlink_run.children.is_empty() {
332 let hyperlink =
333 Hyperlink::new(&url, HyperlinkType::External).add_run(hyperlink_run);
334 para = para.add_hyperlink(hyperlink);
335 }
336 }
337
338 last_idx = idx + 1;
339 }
340
341 for item in content.iter().skip(last_idx) {
343 let run = Run::new();
344 let run = self.process_inline_to_run(run, item)?;
345 if !run.children.is_empty() {
346 para = para.add_run(run);
347 }
348 }
349 }
350
351 if !para.children.is_empty() {
353 docx = docx.add_paragraph(para);
354 }
355
356 Ok(docx)
357 }
358
359 fn process_node(&mut self, mut docx: Docx, node: &Node) -> Result<Docx> {
361 match node {
362 Node::Document(blocks) => {
363 for block in blocks {
364 docx = self.process_node(docx, block)?;
365 }
366 }
367 Node::Paragraph(content) => {
368 docx = self.process_paragraph(docx, content, None)?;
369 }
370 Node::Heading {
371 level,
372 content,
373 heading_type: _,
374 } => {
375 let style_name = match level {
377 1 => "Heading1",
378 2 => "Heading2",
379 3 => "Heading3",
380 4 => "Heading4",
381 5 => "Heading5",
382 _ => "Heading6",
383 };
384
385 docx = self.process_paragraph(docx, content, Some(style_name))?;
386 }
387 Node::BlockQuote(content) => {
388 for block in content {
389 if let Node::Paragraph(inline) = block {
390 docx = self.process_paragraph(docx, inline, Some("Blockquote"))?;
391 } else {
392 docx = self.process_node(docx, block)?;
393 }
394 }
395 }
396 Node::CodeBlock {
397 language,
398 content,
399 block_type: _,
400 } => {
401 if let Some(lang) = language
403 && !lang.is_empty()
404 {
405 let lang_para = Paragraph::new()
406 .style("CodeBlock")
407 .add_run(Run::new().add_text(lang));
408 docx = docx.add_paragraph(lang_para);
409 }
410
411 let lines: Vec<&str> = content.split('\n').collect();
413 for line in lines {
414 let code_para = Paragraph::new()
415 .style("CodeBlock")
416 .add_run(Run::new().add_text(line));
417 docx = docx.add_paragraph(code_para);
418 }
419 }
420 Node::OrderedList { start: _, items } => {
421 docx = self.process_ordered_list(docx, items)?;
422 }
423 Node::UnorderedList(items) => {
424 docx = self.process_unordered_list(docx, items)?;
425 }
426 Node::Table {
427 headers,
428 rows,
429 alignments: _,
430 } => {
431 docx = self.process_table(docx, headers, rows)?;
432 }
433 Node::Image { url, title: _, alt } => {
434 docx = self.process_image(docx, url, alt)?;
435 }
436 node if node.is_custom_type::<FigureNode>() => {
437 let figure_node = node.as_custom_type::<FigureNode>().unwrap();
438 docx = self.process_figure(docx, figure_node)?;
439 }
440 node if node.is_custom_type::<CenterNode>() => {
441 let center_node = node.as_custom_type::<CenterNode>().unwrap();
442 match ¢er_node.node {
444 Node::Paragraph(content) => {
445 docx = self.process_paragraph(docx, content, None)?;
446 if let Some(DocumentChild::Paragraph(para)) =
448 docx.document.children.last_mut()
449 {
450 para.property = para.property.clone().align(AlignmentType::Center);
451 }
452 }
453 Node::HtmlElement(element) => {
454 let start_idx = docx.document.children.len();
455 for child in &element.children {
456 docx = self.process_node(docx, child)?;
457 }
458 for child in docx.document.children.iter_mut().skip(start_idx) {
459 if let DocumentChild::Paragraph(para) = child {
460 para.property = para.property.clone().align(AlignmentType::Center);
461 }
462 }
463 }
464 other => {
465 docx = self.process_node(docx, other)?;
466 if let Some(DocumentChild::Paragraph(para)) =
468 docx.document.children.last_mut()
469 {
470 para.property = para.property.clone().align(AlignmentType::Center);
471 }
472 }
473 }
474 }
475 node if node.is_custom_type::<crate::common::ExternalFrameNode>() => {
476 let external_frame = node
477 .as_custom_type::<crate::common::ExternalFrameNode>()
478 .unwrap();
479 let data = base64::engine::general_purpose::STANDARD
480 .decode(&external_frame.svg)
481 .map_err(|e| format!("Failed to decode SVG data: {e}"))?;
482
483 docx = self.image_processor.process_image_data(
484 docx,
485 &data,
486 Some(&external_frame.alt_text),
487 None,
488 );
489 }
490 node if node.is_custom_type::<HighlightNode>() => {
491 let highlight_node = node.as_custom_type::<HighlightNode>().unwrap();
492 let mut para = Paragraph::new();
494 let mut run = Run::new().highlight("yellow");
495
496 for child in &highlight_node.content {
497 run = self.process_inline_to_run(run, child)?;
498 }
499
500 if !run.children.is_empty() {
501 para = para.add_run(run);
502 docx = docx.add_paragraph(para);
503 }
504 }
505 node if node.is_custom_type::<BlockVerbatimNode>() => {
506 let block_node = node.as_custom_type::<BlockVerbatimNode>().unwrap();
507 for line in block_node.content.split('\n') {
508 let para = Paragraph::new()
509 .style("CodeBlock")
510 .add_run(Run::new().add_text(line));
511 docx = docx.add_paragraph(para);
512 }
513 }
514 node if node.is_custom_type::<InlineNode>() => {
515 let inline_node = node.as_custom_type::<InlineNode>().unwrap();
516 let mut para = Paragraph::new();
518 let mut run = Run::new();
519
520 for child in &inline_node.content {
521 run = self.process_inline_to_run(run, child)?;
522 }
523
524 if !run.children.is_empty() {
525 para = para.add_run(run);
526 docx = docx.add_paragraph(para);
527 }
528 }
529 Node::ThematicBreak => {
530 let hr_para = Paragraph::new()
532 .style("HorizontalLine")
533 .add_run(Run::new().add_text(""));
534 docx = docx.add_paragraph(hr_para);
535 }
536 _ => {}
538 }
539
540 Ok(docx)
541 }
542
543 fn process_ordered_list(&mut self, mut docx: Docx, items: &[ListItem]) -> Result<Docx> {
545 self.list_level += 1;
547 let current_level = self.list_level - 1;
548
549 let (doc, num_id) = self.numbering.create_ordered_numbering(docx);
551 docx = doc;
552
553 for item in items {
555 if let ListItem::Ordered { content, .. } = item {
556 docx = self.process_list_item_content(docx, content, num_id, current_level)?;
557 }
558 }
559
560 self.list_level -= 1;
562 Ok(docx)
563 }
564
565 fn process_unordered_list(&mut self, mut docx: Docx, items: &[ListItem]) -> Result<Docx> {
567 self.list_level += 1;
569 let current_level = self.list_level - 1;
570
571 let (doc, num_id) = self.numbering.create_unordered_numbering(docx);
573 docx = doc;
574
575 for item in items {
577 if let ListItem::Unordered { content } = item {
578 docx = self.process_list_item_content(docx, content, num_id, current_level)?;
579 }
580 }
581
582 self.list_level -= 1;
584 Ok(docx)
585 }
586
587 fn process_list_item_content(
589 &mut self,
590 mut docx: Docx,
591 content: &[Node],
592 num_id: usize,
593 level: usize,
594 ) -> Result<Docx> {
595 if content.is_empty() {
597 let empty_para = Paragraph::new()
598 .numbering(NumberingId::new(num_id), IndentLevel::new(level))
599 .add_run(Run::new().add_text(""));
600 return Ok(docx.add_paragraph(empty_para));
601 }
602
603 for block in content {
605 match block {
606 Node::Paragraph(inline) => {
607 let mut para = Paragraph::new()
608 .numbering(NumberingId::new(num_id), IndentLevel::new(level));
609
610 for node in inline {
612 let run = Run::new();
613 let run = self.process_inline_to_run(run, node)?;
614 if !run.children.is_empty() {
615 para = para.add_run(run);
616 }
617 }
618
619 docx = docx.add_paragraph(para);
620 }
621 Node::OrderedList { start: _, items: _ } | Node::UnorderedList(_) => {
623 docx = self.process_node(docx, block)?;
624 }
625 _ => {
626 docx = self.process_node(docx, block)?;
627 }
628 }
629 }
630
631 Ok(docx)
632 }
633
634 fn process_table(&self, mut docx: Docx, headers: &[Node], rows: &[Vec<Node>]) -> Result<Docx> {
636 let mut table = Table::new(vec![]).style("Table");
637
638 if !headers.is_empty() {
640 let mut cells = Vec::new();
641
642 for header_node in headers {
643 let mut table_cell = TableCell::new();
644 let mut para = Paragraph::new();
645
646 let run = Run::new();
647 let run = self.process_inline_to_run(run, header_node)?;
648 if !run.children.is_empty() {
649 para = para.add_run(run);
650 }
651
652 if !para.children.is_empty() {
653 table_cell = table_cell.add_paragraph(para);
654 }
655
656 cells.push(table_cell);
657 }
658
659 if !cells.is_empty() {
660 let header_row = TableRow::new(cells);
661 table = table.add_row(header_row);
662 }
663 }
664
665 for row in rows {
667 let mut cells = Vec::new();
668
669 for cell_node in row {
670 let mut table_cell = TableCell::new();
671 let mut para = Paragraph::new();
672
673 let run = Run::new();
674 let run = self.process_inline_to_run(run, cell_node)?;
675 if !run.children.is_empty() {
676 para = para.add_run(run);
677 }
678
679 if !para.children.is_empty() {
680 table_cell = table_cell.add_paragraph(para);
681 }
682
683 cells.push(table_cell);
684 }
685
686 if !cells.is_empty() {
687 let data_row = TableRow::new(cells);
688 table = table.add_row(data_row);
689 }
690 }
691
692 docx = docx.add_table(table);
694
695 Ok(docx)
696 }
697
698 pub fn generate_docx(&mut self, doc: &Node) -> Result<Vec<u8>> {
700 let mut docx = Docx::new();
702 docx = self.styles.initialize_styles(docx);
703
704 docx = self.process_node(docx, doc)?;
706
707 docx = self.numbering.initialize_numbering(docx);
709
710 let docx_built = docx.build();
712 let mut buffer = Vec::new();
713 docx_built
714 .pack(&mut Cursor::new(&mut buffer))
715 .map_err(|e| format!("Failed to pack DOCX: {e}"))?;
716
717 Ok(buffer)
718 }
719}
720
721impl FormatWriter for DocxWriter {
722 fn write_vec(&mut self, document: &Node) -> Result<Vec<u8>> {
723 self.list_level = 0;
724 self.list_numbering_count = 0;
725 self.generate_docx(document)
726 }
727
728 fn write_eco(&mut self, _document: &Node, _output: &mut EcoString) -> Result<()> {
729 Err("DOCX format does not support EcoString output".into())
730 }
731}