typlite/parser/
table.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
//! HTML table parsing module, processes the conversion of table elements

use cmark_writer::ast::Node;
use cmark_writer::gfm::TableAlignment;
use typst::html::{tag, HtmlElement, HtmlNode};
use typst::utils::PicoStr;

use crate::tags::md_tag;
use crate::Result;

use super::core::HtmlToAstParser;

/// Table parser
pub struct TableParser;

impl TableParser {
    /// Convert HTML table to CommonMark AST
    pub fn convert_table(
        parser: &mut HtmlToAstParser,
        element: &HtmlElement,
    ) -> Result<Option<Node>> {
        // Find the real table element
        let real_table_elem = Self::find_real_table_element(element);

        // Process the table (if found)
        if let Some(table) = real_table_elem {
            // Check if the table contains rowspan or colspan attributes
            // If it does, fall back to using HtmlElement
            if Self::table_has_complex_cells(table) {
                if let Ok(html_node) = parser.create_html_element(table) {
                    return Ok(Some(html_node));
                }
                return Ok(None);
            }

            let mut headers = Vec::new();
            let mut rows = Vec::new();
            let mut is_header = true;

            Self::extract_table_content(parser, table, &mut headers, &mut rows, &mut is_header)?;
            return Self::create_table_node(headers, rows);
        }

        Ok(None)
    }

    /// Find the real table element in the HTML structure
    fn find_real_table_element(element: &HtmlElement) -> Option<&HtmlElement> {
        if element.tag == md_tag::grid {
            // For grid: grid -> table -> table
            Self::find_table_in_grid(element)
        } else {
            // For m1table -> table
            Self::find_table_direct(element)
        }
    }

    fn find_table_in_grid(grid_element: &HtmlElement) -> Option<&HtmlElement> {
        for child in &grid_element.children {
            if let HtmlNode::Element(table_elem) = child {
                if table_elem.tag == md_tag::table {
                    // Find table tag within m1table
                    for inner_child in &table_elem.children {
                        if let HtmlNode::Element(inner) = inner_child {
                            if inner.tag == tag::table {
                                return Some(inner);
                            }
                        }
                    }
                }
            }
        }
        None
    }

    fn find_table_direct(element: &HtmlElement) -> Option<&HtmlElement> {
        for child in &element.children {
            if let HtmlNode::Element(table_elem) = child {
                if table_elem.tag == tag::table {
                    return Some(table_elem);
                }
            }
        }
        None
    }

    // Extract table content from the table element
    fn extract_table_content(
        parser: &mut HtmlToAstParser,
        table: &HtmlElement,
        headers: &mut Vec<Vec<Node>>,
        rows: &mut Vec<Vec<Vec<Node>>>,
        is_header: &mut bool,
    ) -> Result<()> {
        // Process rows in the table
        for row_node in &table.children {
            if let HtmlNode::Element(row_elem) = row_node {
                if row_elem.tag == tag::tr {
                    let current_row =
                        Self::process_table_row(parser, row_elem, *is_header, headers)?;

                    // After the first row, treat remaining rows as data rows
                    if *is_header {
                        *is_header = false;
                    } else if !current_row.is_empty() {
                        rows.push(current_row);
                    }
                }
            }
        }
        Ok(())
    }

    fn process_table_row(
        parser: &mut HtmlToAstParser,
        row_elem: &HtmlElement,
        is_header: bool,
        headers: &mut Vec<Vec<Node>>,
    ) -> Result<Vec<Vec<Node>>> {
        let mut current_row = Vec::new();

        // Process cells in this row
        for cell_node in &row_elem.children {
            if let HtmlNode::Element(cell) = cell_node {
                if cell.tag == tag::td {
                    let mut cell_content = Vec::new();
                    parser.convert_children_into(&mut cell_content, cell)?;

                    // Add to appropriate section
                    if is_header {
                        headers.push(cell_content);
                    } else {
                        current_row.push(cell_content);
                    }
                }
            }
        }

        Ok(current_row)
    }

    /// Check if the table has complex cells (rowspan/colspan)
    fn table_has_complex_cells(table: &HtmlElement) -> bool {
        for row_node in &table.children {
            if let HtmlNode::Element(row_elem) = row_node {
                if row_elem.tag == tag::tr {
                    for cell_node in &row_elem.children {
                        if let HtmlNode::Element(cell) = cell_node {
                            if (cell.tag == tag::td || cell.tag == tag::th)
                                && cell.attrs.0.iter().any(|(name, _)| {
                                    let name = name.into_inner();
                                    name == PicoStr::constant("colspan")
                                        || name == PicoStr::constant("rowspan")
                                })
                            {
                                return true;
                            }
                        }
                    }
                }
            }
        }
        false
    }

    fn create_table_node(
        headers: Vec<Vec<Node>>,
        rows: Vec<Vec<Vec<Node>>>,
    ) -> Result<Option<Node>> {
        // Create alignment array (default to None for all columns)
        let alignments = vec![TableAlignment::None; headers.len().max(1)];

        // If there is content, add the table to blocks
        if !headers.is_empty() || !rows.is_empty() {
            let flattened_headers = headers.into_iter().flatten().collect();
            let flattened_rows: Vec<_> = rows
                .into_iter()
                .map(|row| row.into_iter().flatten().collect())
                .collect();

            return Ok(Some(Node::Table {
                headers: flattened_headers,
                rows: flattened_rows,
                alignments,
            }));
        }

        Ok(None)
    }
}