typlite/parser/table.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
//! HTML table parsing module, processes the conversion of table elements
use cmark_writer::ast::Node;
use cmark_writer::gfm::TableAlignment;
use typst::html::{tag, HtmlElement, HtmlNode};
use typst::utils::PicoStr;
use crate::tags::md_tag;
use crate::Result;
use super::core::HtmlToAstParser;
/// Table parser
pub struct TableParser;
impl TableParser {
/// Convert HTML table to CommonMark AST
pub fn convert_table(
parser: &mut HtmlToAstParser,
element: &HtmlElement,
) -> Result<Option<Node>> {
// Find the real table element
let real_table_elem = Self::find_real_table_element(element);
// Process the table (if found)
if let Some(table) = real_table_elem {
// Check if the table contains rowspan or colspan attributes
// If it does, fall back to using HtmlElement
if Self::table_has_complex_cells(table) {
if let Ok(html_node) = parser.create_html_element(table) {
return Ok(Some(html_node));
}
return Ok(None);
}
let mut headers = Vec::new();
let mut rows = Vec::new();
let mut is_header = true;
Self::extract_table_content(parser, table, &mut headers, &mut rows, &mut is_header)?;
return Self::create_table_node(headers, rows);
}
Ok(None)
}
/// Find the real table element in the HTML structure
fn find_real_table_element(element: &HtmlElement) -> Option<&HtmlElement> {
if element.tag == md_tag::grid {
// For grid: grid -> table -> table
Self::find_table_in_grid(element)
} else {
// For m1table -> table
Self::find_table_direct(element)
}
}
fn find_table_in_grid(grid_element: &HtmlElement) -> Option<&HtmlElement> {
for child in &grid_element.children {
if let HtmlNode::Element(table_elem) = child {
if table_elem.tag == md_tag::table {
// Find table tag within m1table
for inner_child in &table_elem.children {
if let HtmlNode::Element(inner) = inner_child {
if inner.tag == tag::table {
return Some(inner);
}
}
}
}
}
}
None
}
fn find_table_direct(element: &HtmlElement) -> Option<&HtmlElement> {
for child in &element.children {
if let HtmlNode::Element(table_elem) = child {
if table_elem.tag == tag::table {
return Some(table_elem);
}
}
}
None
}
// Extract table content from the table element
fn extract_table_content(
parser: &mut HtmlToAstParser,
table: &HtmlElement,
headers: &mut Vec<Vec<Node>>,
rows: &mut Vec<Vec<Vec<Node>>>,
is_header: &mut bool,
) -> Result<()> {
// Process rows in the table
for row_node in &table.children {
if let HtmlNode::Element(row_elem) = row_node {
if row_elem.tag == tag::tr {
let current_row =
Self::process_table_row(parser, row_elem, *is_header, headers)?;
// After the first row, treat remaining rows as data rows
if *is_header {
*is_header = false;
} else if !current_row.is_empty() {
rows.push(current_row);
}
}
}
}
Ok(())
}
fn process_table_row(
parser: &mut HtmlToAstParser,
row_elem: &HtmlElement,
is_header: bool,
headers: &mut Vec<Vec<Node>>,
) -> Result<Vec<Vec<Node>>> {
let mut current_row = Vec::new();
// Process cells in this row
for cell_node in &row_elem.children {
if let HtmlNode::Element(cell) = cell_node {
if cell.tag == tag::td {
let mut cell_content = Vec::new();
parser.convert_children_into(&mut cell_content, cell)?;
// Add to appropriate section
if is_header {
headers.push(cell_content);
} else {
current_row.push(cell_content);
}
}
}
}
Ok(current_row)
}
/// Check if the table has complex cells (rowspan/colspan)
fn table_has_complex_cells(table: &HtmlElement) -> bool {
for row_node in &table.children {
if let HtmlNode::Element(row_elem) = row_node {
if row_elem.tag == tag::tr {
for cell_node in &row_elem.children {
if let HtmlNode::Element(cell) = cell_node {
if (cell.tag == tag::td || cell.tag == tag::th)
&& cell.attrs.0.iter().any(|(name, _)| {
let name = name.into_inner();
name == PicoStr::constant("colspan")
|| name == PicoStr::constant("rowspan")
})
{
return true;
}
}
}
}
}
}
false
}
fn create_table_node(
headers: Vec<Vec<Node>>,
rows: Vec<Vec<Vec<Node>>>,
) -> Result<Option<Node>> {
// Create alignment array (default to None for all columns)
let alignments = vec![TableAlignment::None; headers.len().max(1)];
// If there is content, add the table to blocks
if !headers.is_empty() || !rows.is_empty() {
let flattened_headers = headers.into_iter().flatten().collect();
let flattened_rows: Vec<_> = rows
.into_iter()
.map(|row| row.into_iter().flatten().collect())
.collect();
return Ok(Some(Node::Table {
headers: flattened_headers,
rows: flattened_rows,
alignments,
}));
}
Ok(None)
}
}