1use std::sync::Arc;
4
5use cmark_writer::ast::{CustomNode, HtmlAttribute, HtmlElement as CmarkHtmlElement, Node};
6use cmark_writer::{CommonMarkWriter, WriteResult};
7use ecow::EcoString;
8use tinymist_project::LspWorld;
9use typst::html::{HtmlElement, HtmlNode, tag};
10
11use crate::Result;
12use crate::TypliteFeat;
13use crate::attributes::{AlertsAttr, HeadingAttr, RawAttr, TypliteAttrsParser, md_attr};
14use crate::common::{AlertNode, CenterNode, VerbatimNode};
15use crate::tags::md_tag;
16
17use super::{list::ListParser, table::TableParser};
18
19pub struct HtmlToAstParser {
21 pub asset_counter: usize,
22 pub feat: TypliteFeat,
23 pub world: Arc<LspWorld>,
24 pub list_level: usize,
25 pub blocks: Vec<Node>,
26 pub inline_buffer: Vec<Node>,
27}
28
29impl HtmlToAstParser {
30 pub fn new(feat: TypliteFeat, world: &Arc<LspWorld>) -> Self {
31 Self {
32 feat,
33 world: world.clone(),
34 asset_counter: 0,
35 list_level: 0,
36 blocks: Vec::new(),
37 inline_buffer: Vec::new(),
38 }
39 }
40
41 pub fn convert_element(&mut self, element: &HtmlElement) -> Result<()> {
42 match element.tag {
43 tag::head => Ok(()),
44
45 tag::html | tag::body | md_tag::doc => {
46 self.convert_children(element)?;
47 Ok(())
48 }
49
50 tag::p | tag::span | tag::div => {
51 self.convert_children(element)?;
52 Ok(())
53 }
54
55 tag::strong | md_tag::strong => self.convert_strong(element),
56 tag::em | md_tag::emph => self.convert_emphasis(element),
57
58 tag::br => {
59 self.inline_buffer.push(Node::HardBreak);
60 Ok(())
61 }
62
63 tag::ol => {
64 self.flush_inline_buffer();
65 let items = ListParser::convert_list(self, element);
66 self.blocks.push(Node::OrderedList {
67 start: 1,
68 items: items?,
69 });
70 Ok(())
71 }
72
73 tag::ul => {
74 self.flush_inline_buffer();
75 let items = ListParser::convert_list(self, element);
76 self.blocks.push(Node::UnorderedList(items?));
77 Ok(())
78 }
79
80 md_tag::parbreak => {
81 self.flush_inline_buffer();
82 Ok(())
83 }
84
85 md_tag::heading => {
86 self.flush_inline_buffer();
87 let attrs = HeadingAttr::parse(&element.attrs)?;
88 self.convert_children(element)?;
89 self.flush_inline_buffer_as_block(|content| {
90 Node::heading(attrs.level as u8 + 1, content)
91 });
92 Ok(())
93 }
94
95 md_tag::raw => {
96 let attrs = RawAttr::parse(&element.attrs)?;
97 if attrs.block {
98 self.flush_inline_buffer();
99 self.blocks
100 .push(Node::code_block(Some(attrs.lang), attrs.text));
101 } else {
102 self.inline_buffer.push(Node::InlineCode(attrs.text));
103 }
104 Ok(())
105 }
106
107 md_tag::quote => {
108 let prev_blocks = std::mem::take(&mut self.blocks);
109 self.flush_inline_buffer();
110 self.convert_children(element)?;
111 let content = Node::Paragraph(std::mem::take(&mut self.inline_buffer));
112 let mut quote = std::mem::take(&mut self.blocks);
113 quote.push(content);
114 self.blocks.clear();
115 self.blocks.extend(prev_blocks);
116 self.blocks.push(Node::BlockQuote(quote));
117 Ok(())
118 }
119
120 md_tag::figure => self.convert_figure(element),
121 md_tag::highlight => self.convert_highlight(element),
122 md_tag::strike => self.convert_strikethrough(element),
123 md_tag::link => self.convert_link(element),
124 md_tag::image => self.convert_image(element),
125
126 md_tag::linebreak => {
127 self.inline_buffer.push(Node::HardBreak);
128 Ok(())
129 }
130
131 md_tag::source => {
132 let src = self.convert_source(element);
133 self.inline_buffer.push(src);
134 Ok(())
135 }
136
137 md_tag::table | md_tag::grid => {
138 self.flush_inline_buffer();
139 if let Some(table) = TableParser::convert_table(self, element)? {
140 self.blocks.push(table);
141 }
142 Ok(())
143 }
144
145 md_tag::idoc => {
146 let src = self.convert_idoc(element);
147 self.inline_buffer.push(src);
148 Ok(())
149 }
150
151 md_tag::math_equation_inline | md_tag::math_equation_block => {
152 if element.tag == md_tag::math_equation_block {
153 self.flush_inline_buffer();
154 self.convert_children(element)?;
155 let content = std::mem::take(&mut self.inline_buffer);
156 self.blocks
157 .push(Node::Custom(Box::new(CenterNode::new(content))));
158 } else {
159 self.convert_children(element)?;
160 }
161 Ok(())
162 }
163
164 md_tag::alerts => {
165 self.flush_inline_buffer();
166 let attrs = AlertsAttr::parse(&element.attrs)?;
167 let prev_blocks = std::mem::take(&mut self.blocks);
168 self.flush_inline_buffer();
169 self.convert_children(element)?;
170 let content = Node::Paragraph(std::mem::take(&mut self.inline_buffer));
171 let mut quote = std::mem::take(&mut self.blocks);
172 quote.push(content);
173 self.blocks.clear();
174 self.blocks.extend(prev_blocks);
175 self.blocks.push(Node::Custom(Box::new(AlertNode {
176 content: quote,
177 class: attrs.class,
178 })));
179 Ok(())
180 }
181
182 md_tag::verbatim => {
183 self.inline_buffer.push(Node::Custom(Box::new(VerbatimNode {
184 content: element
185 .attrs
186 .0
187 .iter()
188 .find(|(name, _)| *name == md_attr::src)
189 .map(|(_, value)| value.clone())
190 .unwrap_or_default(),
191 })));
192 Ok(())
193 }
194
195 _ => {
196 let tag_name = element.tag.resolve().to_string();
197
198 if !tag_name.starts_with("m1") {
199 let html_element = self.create_html_element(element)?;
200 self.inline_buffer.push(html_element);
201 } else {
202 self.convert_children(element)?;
203 }
204 Ok(())
205 }
206 }
207 }
208
209 pub(crate) fn create_html_element(&mut self, element: &HtmlElement) -> Result<Node> {
211 let attributes = element
212 .attrs
213 .0
214 .iter()
215 .map(|(name, value)| HtmlAttribute {
216 name: name.resolve().to_string().into(),
217 value: value.clone(),
218 })
219 .collect();
220
221 let mut children = Vec::new();
222 self.convert_children_into(&mut children, element)?;
223
224 Ok(Node::HtmlElement(CmarkHtmlElement {
225 tag: element.tag.resolve().to_string().into(),
226 attributes,
227 children,
228 self_closing: element.children.is_empty(),
229 }))
230 }
231
232 pub fn flush_inline_buffer(&mut self) {
233 if !self.inline_buffer.is_empty() {
234 self.blocks
235 .push(Node::Paragraph(std::mem::take(&mut self.inline_buffer)));
236 }
237 }
238
239 pub fn flush_inline_buffer_as_block(&mut self, make_block: impl FnOnce(Vec<Node>) -> Node) {
240 if !self.inline_buffer.is_empty() {
241 self.blocks
242 .push(make_block(std::mem::take(&mut self.inline_buffer)));
243 }
244 }
245
246 pub fn convert_children(&mut self, element: &HtmlElement) -> Result<()> {
247 for child in &element.children {
248 match child {
249 HtmlNode::Text(text, _) => {
250 self.inline_buffer.push(Node::Text(text.clone()));
251 }
252 HtmlNode::Element(element) => {
253 self.convert_element(element)?;
254 }
255 HtmlNode::Frame(frame) => {
256 let res = self.convert_frame(frame);
257 self.inline_buffer.push(res);
258 }
259 HtmlNode::Tag(..) => {}
260 }
261 }
262 Ok(())
263 }
264
265 pub fn convert_children_into(
266 &mut self,
267 target: &mut Vec<Node>,
268 element: &HtmlElement,
269 ) -> Result<()> {
270 let prev_buffer = std::mem::take(&mut self.inline_buffer);
271 self.convert_children(element)?;
272 target.append(&mut self.inline_buffer);
273 self.inline_buffer = prev_buffer;
274 Ok(())
275 }
276}
277
278#[derive(Debug, Clone)]
279pub(crate) struct Comment(pub EcoString);
280
281impl CustomNode for Comment {
282 fn as_any(&self) -> &dyn std::any::Any {
283 self
284 }
285
286 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
287 self
288 }
289
290 fn write(&self, writer: &mut CommonMarkWriter) -> WriteResult<()> {
291 writer.write_str("<!-- ")?;
292 writer.write_str(&self.0)?;
293 writer.write_str(" -->")?;
294 Ok(())
295 }
296
297 fn clone_box(&self) -> Box<dyn CustomNode> {
298 Box::new(self.clone())
299 }
300
301 fn eq_box(&self, other: &dyn CustomNode) -> bool {
302 if let Some(other) = other.as_any().downcast_ref::<Comment>() {
303 self.0 == other.0
304 } else {
305 false
306 }
307 }
308
309 fn is_block(&self) -> bool {
310 false
311 }
312}
313
314impl HtmlToAstParser {
315 pub fn is_block_element(element: &HtmlElement) -> bool {
316 matches!(
317 element.tag,
318 tag::p
319 | tag::div
320 | tag::blockquote
321 | tag::h1
322 | tag::h2
323 | tag::h3
324 | tag::h4
325 | tag::h5
326 | tag::h6
327 | tag::hr
328 | tag::pre
329 | tag::table
330 | tag::section
331 | tag::article
332 | tag::header
333 | tag::footer
334 | tag::main
335 | tag::aside
336 | tag::nav
337 | tag::ul
338 | tag::ol
339 | md_tag::heading
340 | md_tag::quote
341 | md_tag::raw
342 | md_tag::parbreak
343 | md_tag::table
344 | md_tag::grid
345 | md_tag::figure
346 )
347 }
348
349 pub fn process_list_item_element(&mut self, element: &HtmlElement) -> Result<Vec<Node>> {
350 if element.tag == tag::ul || element.tag == tag::ol {
351 let items = super::list::ListParser::convert_list(self, element)?;
352 if element.tag == tag::ul {
353 return Ok(vec![Node::UnorderedList(items)]);
354 } else {
355 return Ok(vec![Node::OrderedList { start: 1, items }]);
356 }
357 }
358
359 let prev_blocks = std::mem::take(&mut self.blocks);
360 let prev_buffer = std::mem::take(&mut self.inline_buffer);
361
362 self.convert_element(element)?;
363 let mut result = Vec::new();
364
365 if !self.blocks.is_empty() {
366 result.extend(std::mem::take(&mut self.blocks));
367 } else if !self.inline_buffer.is_empty() {
368 if Self::is_block_element(element) {
369 result.push(Node::Paragraph(std::mem::take(&mut self.inline_buffer)));
370 } else {
371 result = std::mem::take(&mut self.inline_buffer);
372 }
373 }
374
375 self.blocks = prev_blocks;
376 self.inline_buffer = prev_buffer;
377
378 Ok(result)
379 }
380
381 pub fn parse(mut self, root: &HtmlElement) -> Result<Node> {
382 self.blocks.clear();
383 self.inline_buffer.clear();
384
385 self.convert_element(root)?;
386 self.flush_inline_buffer();
387
388 Ok(Node::Document(self.blocks))
389 }
390}