typlite/writer/docx/
writer.rs

1//! DOCX document writer implementation
2
3use base64::Engine;
4use cmark_writer::ast::{ListItem, Node};
5use docx_rs::*;
6use ecow::EcoString;
7use std::fs;
8use std::io::Cursor;
9
10use crate::Result;
11use crate::common::{
12    CenterNode, FigureNode, FormatWriter, HighlightNode, InlineNode, VerbatimNode,
13};
14
15use super::image_processor::DocxImageProcessor;
16use super::numbering::DocxNumbering;
17use super::styles::DocxStyles;
18
19/// DOCX writer that generates DOCX directly from AST (without intermediate
20/// representation)
21pub struct DocxWriter {
22    styles: DocxStyles,
23    numbering: DocxNumbering,
24    list_level: usize,
25    list_numbering_count: usize,
26    image_processor: DocxImageProcessor,
27}
28
29impl Default for DocxWriter {
30    fn default() -> Self {
31        Self::new()
32    }
33}
34
35impl DocxWriter {
36    pub fn new() -> Self {
37        Self {
38            styles: DocxStyles::new(),
39            numbering: DocxNumbering::new(),
40            list_level: 0,
41            list_numbering_count: 0,
42            image_processor: DocxImageProcessor::new(),
43        }
44    }
45
46    /// Process image node
47    fn process_image(&self, docx: Docx, url: &str, alt_nodes: &[Node]) -> Result<Docx> {
48        // Build alt text
49        let alt_text = if !alt_nodes.is_empty() {
50            let mut text = String::new();
51            for node in alt_nodes {
52                if let Node::Text(content) = node {
53                    text.push_str(content);
54                }
55            }
56            Some(text)
57        } else {
58            None
59        };
60
61        // Try reading image file
62        if let Ok(img_data) = fs::read(url) {
63            Ok(self
64                .image_processor
65                .process_image_data(docx, &img_data, alt_text.as_deref(), None))
66        } else {
67            let placeholder = format!("[Image not found: {url}]");
68            let para = Paragraph::new().add_run(Run::new().add_text(placeholder));
69            Ok(docx.add_paragraph(para))
70        }
71    }
72
73    /// Process figure node (image with caption)
74    fn process_figure(&mut self, mut docx: Docx, figure_node: &FigureNode) -> Result<Docx> {
75        // First handle the figure body (typically an image)
76        match &*figure_node.body {
77            Node::Paragraph(content) => {
78                for node in content {
79                    if let Node::Image {
80                        url,
81                        title: _,
82                        alt: _,
83                    } = node
84                    {
85                        // Process the image
86                        if let Ok(img_data) = fs::read(url.as_str()) {
87                            let alt_text = figure_node.caption.clone();
88                            // Add the image with caption
89                            docx = self.image_processor.process_image_data(
90                                docx,
91                                &img_data,
92                                Some(&alt_text),
93                                None,
94                            );
95
96                            // Add caption as a separate paragraph with Caption style
97                            if !figure_node.caption.is_empty() {
98                                let caption_text = format!("Figure: {}", figure_node.caption);
99                                let caption_para = Paragraph::new()
100                                    .style("Caption")
101                                    .add_run(Run::new().add_text(caption_text));
102                                docx = docx.add_paragraph(caption_para);
103                            }
104                        } else {
105                            // Image not found, show placeholder
106                            let placeholder = format!("[Image not found: {url}]");
107                            let para = Paragraph::new().add_run(Run::new().add_text(placeholder));
108                            docx = docx.add_paragraph(para);
109
110                            // Still add caption
111                            if !figure_node.caption.is_empty() {
112                                let caption_para = Paragraph::new()
113                                    .style("Caption")
114                                    .add_run(Run::new().add_text(&figure_node.caption));
115                                docx = docx.add_paragraph(caption_para);
116                            }
117                        }
118                    } else {
119                        // Handle non-image content
120                        let mut para = Paragraph::new();
121                        let run = Run::new();
122                        let run = self.process_inline_to_run(run, node)?;
123                        if !run.children.is_empty() {
124                            para = para.add_run(run);
125                            docx = docx.add_paragraph(para);
126                        }
127
128                        // Add caption as a separate paragraph
129                        if !figure_node.caption.is_empty() {
130                            let caption_para = Paragraph::new()
131                                .style("Caption")
132                                .add_run(Run::new().add_text(&figure_node.caption));
133                            docx = docx.add_paragraph(caption_para);
134                        }
135                    }
136                }
137            }
138            // Handle other content types within figure
139            _ => {
140                // Process the content using standard node processing
141                docx = self.process_node(docx, &figure_node.body)?;
142
143                // Add caption as a separate paragraph
144                if !figure_node.caption.is_empty() {
145                    let caption_para = Paragraph::new()
146                        .style("Caption")
147                        .add_run(Run::new().add_text(&figure_node.caption));
148                    docx = docx.add_paragraph(caption_para);
149                }
150            }
151        }
152
153        Ok(docx)
154    }
155
156    /// Process inline element and add to Run
157    fn process_inline_to_run(&self, mut run: Run, node: &Node) -> Result<Run> {
158        match node {
159            Node::Text(text) => {
160                run = run.add_text(text);
161            }
162            Node::Strong(content) => {
163                run = run.style("Strong");
164                for child in content {
165                    run = self.process_inline_to_run(run, child)?;
166                }
167            }
168            Node::Emphasis(content) => {
169                run = run.style("Emphasis");
170                for child in content {
171                    run = self.process_inline_to_run(run, child)?;
172                }
173            }
174            Node::Strikethrough(content) => {
175                run = run.strike();
176                for child in content {
177                    run = self.process_inline_to_run(run, child)?;
178                }
179            }
180            Node::Link {
181                url: _,
182                title: _,
183                content,
184            } => {
185                // Hyperlinks need to be processed at paragraph level, only handle content here
186                run = run.style("Hyperlink");
187                for child in content {
188                    run = self.process_inline_to_run(run, child)?;
189                }
190            }
191            Node::Image {
192                url,
193                title: _,
194                alt: _,
195            } => {
196                if let Ok(img_data) = fs::read(url.as_str()) {
197                    run = self.image_processor.process_inline_image(run, &img_data)?;
198                } else {
199                    run = run.add_text(format!("[Image not found: {url}]"));
200                }
201            }
202            Node::HtmlElement(element) => {
203                // Handle special HTML elements
204                if element.tag == "img" && element.self_closing {
205                    let is_typst_block = element
206                        .attributes
207                        .iter()
208                        .any(|a| a.name == "alt" && a.value == "typst-block");
209
210                    let src = element
211                        .attributes
212                        .iter()
213                        .find(|a| a.name == "src")
214                        .map(|a| a.value.as_str())
215                        .unwrap_or("");
216
217                    if src.starts_with("data:image/") {
218                        run = self.image_processor.process_data_url_image(
219                            run,
220                            src,
221                            is_typst_block,
222                        )?;
223                    }
224                } else {
225                    // Standard element content processing
226                    for child in &element.children {
227                        run = self.process_inline_to_run(run, child)?;
228                    }
229                }
230            }
231            Node::InlineCode(code) => {
232                run = run.style("CodeInline").add_text(code);
233            }
234            Node::HardBreak => {
235                run = run.add_break(BreakType::TextWrapping);
236            }
237            Node::SoftBreak => {
238                run = run.add_text(" ");
239            }
240            node if node.is_custom_type::<HighlightNode>() => {
241                let highlight_node = node.as_custom_type::<HighlightNode>().unwrap();
242                run = run.highlight("yellow");
243                for child in &highlight_node.content {
244                    run = self.process_inline_to_run(run, child)?;
245                }
246            }
247            node if node.is_custom_type::<InlineNode>() => {
248                let inline_node = node.as_custom_type::<InlineNode>().unwrap();
249                for child in &inline_node.content {
250                    run = self.process_inline_to_run(run, child)?;
251                }
252            }
253            node if node.is_custom_type::<VerbatimNode>() => {
254                let node = node.as_custom_type::<VerbatimNode>().unwrap();
255                eprintln!("Warning: `m1verbatim` is ignored {:?}.", node.content);
256            }
257            // Other inline element types
258            _ => {
259                eprintln!("other inline element: {node:?}");
260            }
261        }
262
263        Ok(run)
264    }
265
266    /// Process paragraph and add to document
267    fn process_paragraph(
268        &self,
269        mut docx: Docx,
270        content: &[Node],
271        style: Option<&str>,
272    ) -> Result<Docx> {
273        let mut para = Paragraph::new();
274
275        // Apply style
276        if let Some(style_name) = style {
277            para = para.style(style_name);
278        }
279
280        // Extract all link nodes
281        let mut links = Vec::new();
282        for (i, node) in content.iter().enumerate() {
283            if let Node::Link {
284                url,
285                title: _,
286                content: _,
287            } = node
288            {
289                links.push((i, url.clone()));
290            }
291        }
292
293        // If no links, process paragraph normally
294        if links.is_empty() {
295            // Process paragraph content
296            for node in content {
297                let run = Run::new();
298                let run = self.process_inline_to_run(run, node)?;
299                if !run.children.is_empty() {
300                    para = para.add_run(run);
301                }
302            }
303        } else {
304            // If links exist, we need to process in segments
305            let mut last_idx = 0;
306            for (idx, url) in links {
307                // Process content before the link
308                for item in content.iter().take(idx).skip(last_idx) {
309                    let run = Run::new();
310                    let run = self.process_inline_to_run(run, item)?;
311                    if !run.children.is_empty() {
312                        para = para.add_run(run);
313                    }
314                }
315
316                // Process link
317                if let Node::Link {
318                    url: _,
319                    title: _,
320                    content: link_content,
321                } = &content[idx]
322                {
323                    let mut hyperlink_run = Run::new().style("Hyperlink");
324                    for child in link_content {
325                        hyperlink_run = self.process_inline_to_run(hyperlink_run, child)?;
326                    }
327
328                    // Create and add hyperlink
329                    if !hyperlink_run.children.is_empty() {
330                        let hyperlink =
331                            Hyperlink::new(&url, HyperlinkType::External).add_run(hyperlink_run);
332                        para = para.add_hyperlink(hyperlink);
333                    }
334                }
335
336                last_idx = idx + 1;
337            }
338
339            // Process content after the last link
340            for item in content.iter().skip(last_idx) {
341                let run = Run::new();
342                let run = self.process_inline_to_run(run, item)?;
343                if !run.children.is_empty() {
344                    para = para.add_run(run);
345                }
346            }
347        }
348
349        // Only add when paragraph has content
350        if !para.children.is_empty() {
351            docx = docx.add_paragraph(para);
352        }
353
354        Ok(docx)
355    }
356
357    /// Process node and add to document
358    fn process_node(&mut self, mut docx: Docx, node: &Node) -> Result<Docx> {
359        match node {
360            Node::Document(blocks) => {
361                for block in blocks {
362                    docx = self.process_node(docx, block)?;
363                }
364            }
365            Node::Paragraph(content) => {
366                docx = self.process_paragraph(docx, content, None)?;
367            }
368            Node::Heading {
369                level,
370                content,
371                heading_type: _,
372            } => {
373                // Determine heading style name
374                let style_name = match level {
375                    1 => "Heading1",
376                    2 => "Heading2",
377                    3 => "Heading3",
378                    4 => "Heading4",
379                    5 => "Heading5",
380                    _ => "Heading6",
381                };
382
383                docx = self.process_paragraph(docx, content, Some(style_name))?;
384            }
385            Node::BlockQuote(content) => {
386                for block in content {
387                    if let Node::Paragraph(inline) = block {
388                        docx = self.process_paragraph(docx, inline, Some("Blockquote"))?;
389                    } else {
390                        docx = self.process_node(docx, block)?;
391                    }
392                }
393            }
394            Node::CodeBlock {
395                language,
396                content,
397                block_type: _,
398            } => {
399                // Add language information
400                if let Some(lang) = language
401                    && !lang.is_empty()
402                {
403                    let lang_para = Paragraph::new()
404                        .style("CodeBlock")
405                        .add_run(Run::new().add_text(lang));
406                    docx = docx.add_paragraph(lang_para);
407                }
408
409                // Process code line by line, preserving line breaks
410                let lines: Vec<&str> = content.split('\n').collect();
411                for line in lines {
412                    let code_para = Paragraph::new()
413                        .style("CodeBlock")
414                        .add_run(Run::new().add_text(line));
415                    docx = docx.add_paragraph(code_para);
416                }
417            }
418            Node::OrderedList { start: _, items } => {
419                docx = self.process_ordered_list(docx, items)?;
420            }
421            Node::UnorderedList(items) => {
422                docx = self.process_unordered_list(docx, items)?;
423            }
424            Node::Table {
425                headers,
426                rows,
427                alignments: _,
428            } => {
429                docx = self.process_table(docx, headers, rows)?;
430            }
431            Node::Image { url, title: _, alt } => {
432                docx = self.process_image(docx, url, alt)?;
433            }
434            node if node.is_custom_type::<FigureNode>() => {
435                let figure_node = node.as_custom_type::<FigureNode>().unwrap();
436                docx = self.process_figure(docx, figure_node)?;
437            }
438            node if node.is_custom_type::<CenterNode>() => {
439                let center_node = node.as_custom_type::<CenterNode>().unwrap();
440                // Handle regular node but with center alignment
441                match &center_node.node {
442                    Node::Paragraph(content) => {
443                        docx = self.process_paragraph(docx, content, None)?;
444                        // Get the last paragraph and center it
445                        if let Some(DocumentChild::Paragraph(para)) =
446                            docx.document.children.last_mut()
447                        {
448                            para.property = para.property.clone().align(AlignmentType::Center);
449                        }
450                    }
451                    other => {
452                        docx = self.process_node(docx, other)?;
453                        // Get the last element and center it if it's a paragraph
454                        if let Some(DocumentChild::Paragraph(para)) =
455                            docx.document.children.last_mut()
456                        {
457                            para.property = para.property.clone().align(AlignmentType::Center);
458                        }
459                    }
460                }
461            }
462            node if node.is_custom_type::<crate::common::ExternalFrameNode>() => {
463                let external_frame = node
464                    .as_custom_type::<crate::common::ExternalFrameNode>()
465                    .unwrap();
466                let data = base64::engine::general_purpose::STANDARD
467                    .decode(&external_frame.svg)
468                    .map_err(|e| format!("Failed to decode SVG data: {e}"))?;
469
470                docx = self.image_processor.process_image_data(
471                    docx,
472                    &data,
473                    Some(&external_frame.alt_text),
474                    None,
475                );
476            }
477            node if node.is_custom_type::<HighlightNode>() => {
478                let highlight_node = node.as_custom_type::<HighlightNode>().unwrap();
479                // Handle HighlightNode at block level (convert to paragraph)
480                let mut para = Paragraph::new();
481                let mut run = Run::new().highlight("yellow");
482
483                for child in &highlight_node.content {
484                    run = self.process_inline_to_run(run, child)?;
485                }
486
487                if !run.children.is_empty() {
488                    para = para.add_run(run);
489                    docx = docx.add_paragraph(para);
490                }
491            }
492            node if node.is_custom_type::<InlineNode>() => {
493                let inline_node = node.as_custom_type::<InlineNode>().unwrap();
494                // Handle InlineNode at block level (convert to paragraph)
495                let mut para = Paragraph::new();
496                let mut run = Run::new();
497
498                for child in &inline_node.content {
499                    run = self.process_inline_to_run(run, child)?;
500                }
501
502                if !run.children.is_empty() {
503                    para = para.add_run(run);
504                    docx = docx.add_paragraph(para);
505                }
506            }
507            Node::ThematicBreak => {
508                // Add horizontal line as specially formatted paragraph
509                let hr_para = Paragraph::new()
510                    .style("HorizontalLine")
511                    .add_run(Run::new().add_text(""));
512                docx = docx.add_paragraph(hr_para);
513            }
514            // Inline elements should not be processed here individually
515            _ => {}
516        }
517
518        Ok(docx)
519    }
520
521    /// Process ordered list
522    fn process_ordered_list(&mut self, mut docx: Docx, items: &[ListItem]) -> Result<Docx> {
523        // Enter deeper list level
524        self.list_level += 1;
525        let current_level = self.list_level - 1;
526
527        // Create new ordered list numbering definition
528        let (doc, num_id) = self.numbering.create_ordered_numbering(docx);
529        docx = doc;
530
531        // Process list items
532        for item in items {
533            if let ListItem::Ordered { content, .. } = item {
534                docx = self.process_list_item_content(docx, content, num_id, current_level)?;
535            }
536        }
537
538        // Exit list level
539        self.list_level -= 1;
540        Ok(docx)
541    }
542
543    /// Process unordered list
544    fn process_unordered_list(&mut self, mut docx: Docx, items: &[ListItem]) -> Result<Docx> {
545        // Enter deeper list level
546        self.list_level += 1;
547        let current_level = self.list_level - 1;
548
549        // Create new unordered list numbering definition
550        let (doc, num_id) = self.numbering.create_unordered_numbering(docx);
551        docx = doc;
552
553        // Process list items
554        for item in items {
555            if let ListItem::Unordered { content } = item {
556                docx = self.process_list_item_content(docx, content, num_id, current_level)?;
557            }
558        }
559
560        // Exit list level
561        self.list_level -= 1;
562        Ok(docx)
563    }
564
565    /// Helper function to process list item content
566    fn process_list_item_content(
567        &mut self,
568        mut docx: Docx,
569        content: &[Node],
570        num_id: usize,
571        level: usize,
572    ) -> Result<Docx> {
573        // If content is empty, add empty paragraph
574        if content.is_empty() {
575            let empty_para = Paragraph::new()
576                .numbering(NumberingId::new(num_id), IndentLevel::new(level))
577                .add_run(Run::new().add_text(""));
578            return Ok(docx.add_paragraph(empty_para));
579        }
580
581        // Process content
582        for block in content {
583            match block {
584                Node::Paragraph(inline) => {
585                    let mut para = Paragraph::new()
586                        .numbering(NumberingId::new(num_id), IndentLevel::new(level));
587
588                    // Process paragraph content
589                    for node in inline {
590                        let run = Run::new();
591                        let run = self.process_inline_to_run(run, node)?;
592                        if !run.children.is_empty() {
593                            para = para.add_run(run);
594                        }
595                    }
596
597                    docx = docx.add_paragraph(para);
598                }
599                // Recursively process nested lists
600                Node::OrderedList { start: _, items: _ } | Node::UnorderedList(_) => {
601                    docx = self.process_node(docx, block)?;
602                }
603                _ => {
604                    docx = self.process_node(docx, block)?;
605                }
606            }
607        }
608
609        Ok(docx)
610    }
611
612    /// Process table
613    fn process_table(&self, mut docx: Docx, headers: &[Node], rows: &[Vec<Node>]) -> Result<Docx> {
614        let mut table = Table::new(vec![]).style("Table");
615
616        // Process table headers
617        if !headers.is_empty() {
618            let mut cells = Vec::new();
619
620            for header_node in headers {
621                let mut table_cell = TableCell::new();
622                let mut para = Paragraph::new();
623
624                let run = Run::new();
625                let run = self.process_inline_to_run(run, header_node)?;
626                if !run.children.is_empty() {
627                    para = para.add_run(run);
628                }
629
630                if !para.children.is_empty() {
631                    table_cell = table_cell.add_paragraph(para);
632                }
633
634                cells.push(table_cell);
635            }
636
637            if !cells.is_empty() {
638                let header_row = TableRow::new(cells);
639                table = table.add_row(header_row);
640            }
641        }
642
643        // Process table rows
644        for row in rows {
645            let mut cells = Vec::new();
646
647            for cell_node in row {
648                let mut table_cell = TableCell::new();
649                let mut para = Paragraph::new();
650
651                let run = Run::new();
652                let run = self.process_inline_to_run(run, cell_node)?;
653                if !run.children.is_empty() {
654                    para = para.add_run(run);
655                }
656
657                if !para.children.is_empty() {
658                    table_cell = table_cell.add_paragraph(para);
659                }
660
661                cells.push(table_cell);
662            }
663
664            if !cells.is_empty() {
665                let data_row = TableRow::new(cells);
666                table = table.add_row(data_row);
667            }
668        }
669
670        // Add table to document
671        docx = docx.add_table(table);
672
673        Ok(docx)
674    }
675
676    /// Generate DOCX document
677    pub fn generate_docx(&mut self, doc: &Node) -> Result<Vec<u8>> {
678        // Create DOCX document and initialize styles
679        let mut docx = Docx::new();
680        docx = self.styles.initialize_styles(docx);
681
682        // Process document content
683        docx = self.process_node(docx, doc)?;
684
685        // Initialize numbering definitions
686        docx = self.numbering.initialize_numbering(docx);
687
688        // Build and pack document
689        let docx_built = docx.build();
690        let mut buffer = Vec::new();
691        docx_built
692            .pack(&mut Cursor::new(&mut buffer))
693            .map_err(|e| format!("Failed to pack DOCX: {e}"))?;
694
695        Ok(buffer)
696    }
697}
698
699impl FormatWriter for DocxWriter {
700    fn write_vec(&mut self, document: &Node) -> Result<Vec<u8>> {
701        self.list_level = 0;
702        self.list_numbering_count = 0;
703        self.generate_docx(document)
704    }
705
706    fn write_eco(&mut self, _document: &Node, _output: &mut EcoString) -> Result<()> {
707        Err("DOCX format does not support EcoString output".into())
708    }
709}