1use std::sync::Arc;
4
5use typst::diag::SourceDiagnostic;
6use typst_syntax::Span;
7
8use cmark_writer::WriteResult;
9use cmark_writer::ast::{CustomNode, HtmlAttribute, HtmlElement as CmarkHtmlElement, Node};
10use cmark_writer::writer::InlineWriterProxy;
11use ecow::EcoString;
12use tinymist_project::LspWorld;
13use typst_html::{HtmlElement, HtmlNode, tag};
14
15use crate::Result;
16use crate::TypliteFeat;
17use crate::attributes::{AlertsAttr, HeadingAttr, RawAttr, TypliteAttrsParser, VerbatimAttr};
18use crate::common::{AlertNode, BlockVerbatimNode, CenterNode, VerbatimNode};
19use crate::diagnostics::WarningCollector;
20use crate::tags::md_tag;
21
22use super::{list::ListParser, table::TableParser};
23
24pub struct HtmlToAstParser {
26 pub asset_counter: usize,
27 pub feat: TypliteFeat,
28 pub world: Arc<LspWorld>,
29 pub list_level: usize,
30 pub blocks: Vec<Node>,
31 pub inline_buffer: Vec<Node>,
32 pub(crate) warnings: WarningCollector,
33}
34
35impl HtmlToAstParser {
36 pub(crate) fn new(
37 feat: TypliteFeat,
38 world: &Arc<LspWorld>,
39 warnings: WarningCollector,
40 ) -> Self {
41 Self {
42 feat,
43 world: world.clone(),
44 asset_counter: 0,
45 list_level: 0,
46 blocks: Vec::new(),
47 inline_buffer: Vec::new(),
48 warnings,
49 }
50 }
51
52 pub fn convert_element(&mut self, element: &HtmlElement) -> Result<()> {
53 match element.tag {
54 tag::head => Ok(()),
55
56 tag::html | tag::body | md_tag::doc => {
57 self.convert_children(element)?;
58 Ok(())
59 }
60
61 tag::p | tag::span | tag::div => {
62 self.convert_children(element)?;
63 Ok(())
64 }
65
66 tag::strong | md_tag::strong => self.convert_strong(element),
67 tag::em | md_tag::emph => self.convert_emphasis(element),
68 tag::mark => self.convert_highlight(element),
69 tag::s => self.convert_strikethrough(element),
70
71 tag::br => {
72 self.inline_buffer.push(Node::HardBreak);
73 Ok(())
74 }
75
76 tag::ol => {
77 self.flush_inline_buffer();
78 let items = ListParser::convert_list(self, element);
79 self.blocks.push(Node::OrderedList {
80 start: 1,
81 items: items?,
82 });
83 Ok(())
84 }
85
86 tag::ul => {
87 self.flush_inline_buffer();
88 let items = ListParser::convert_list(self, element);
89 self.blocks.push(Node::UnorderedList(items?));
90 Ok(())
91 }
92
93 md_tag::parbreak => {
94 self.flush_inline_buffer();
95 Ok(())
96 }
97
98 md_tag::heading => {
99 self.flush_inline_buffer();
100 let attrs = HeadingAttr::parse(&element.attrs)?;
101 self.convert_children(element)?;
102 self.flush_inline_buffer_as_block(|content| {
103 Node::heading(attrs.level as u8 + 1, content)
104 });
105 Ok(())
106 }
107
108 md_tag::raw => {
109 let attrs = RawAttr::parse(&element.attrs)?;
110 if attrs.block {
111 self.flush_inline_buffer();
112 self.blocks
113 .push(Node::code_block(Some(attrs.lang), attrs.text));
114 } else {
115 self.inline_buffer.push(Node::InlineCode(attrs.text));
116 }
117 Ok(())
118 }
119
120 md_tag::quote => {
121 let prev_blocks = std::mem::take(&mut self.blocks);
122 self.flush_inline_buffer();
123 self.convert_children(element)?;
124 let content = Node::Paragraph(std::mem::take(&mut self.inline_buffer));
125 let mut quote = std::mem::take(&mut self.blocks);
126 quote.push(content);
127 self.blocks.clear();
128 self.blocks.extend(prev_blocks);
129 self.blocks.push(Node::BlockQuote(quote));
130 Ok(())
131 }
132
133 md_tag::figure => self.convert_figure(element),
134 md_tag::link => self.convert_link(element),
135 md_tag::image => self.convert_image(element),
136
137 md_tag::linebreak => {
138 self.inline_buffer.push(Node::HardBreak);
139 Ok(())
140 }
141
142 md_tag::source => {
143 let src = self.convert_source(element);
144 self.inline_buffer.push(src);
145 Ok(())
146 }
147
148 md_tag::table | md_tag::grid => {
149 self.flush_inline_buffer();
150 if let Some(table) = TableParser::convert_table(self, element)? {
151 self.blocks.push(table);
152 }
153 Ok(())
154 }
155
156 md_tag::idoc => {
157 let src = self.convert_idoc(element);
158 self.inline_buffer.push(src);
159 Ok(())
160 }
161
162 md_tag::math_equation_inline | md_tag::math_equation_block => {
163 if element.tag == md_tag::math_equation_block {
164 self.flush_inline_buffer();
165 self.convert_children(element)?;
166 let content = std::mem::take(&mut self.inline_buffer);
167 self.blocks
168 .push(Node::Custom(Box::new(CenterNode::new(content))));
169 } else {
170 self.convert_children(element)?;
171 }
172 Ok(())
173 }
174
175 md_tag::alerts => {
176 self.flush_inline_buffer();
177 let attrs = AlertsAttr::parse(&element.attrs)?;
178 let prev_blocks = std::mem::take(&mut self.blocks);
179 self.flush_inline_buffer();
180 self.convert_children(element)?;
181 let content = Node::Paragraph(std::mem::take(&mut self.inline_buffer));
182 let mut quote = std::mem::take(&mut self.blocks);
183 quote.push(content);
184 self.blocks.clear();
185 self.blocks.extend(prev_blocks);
186 self.blocks.push(Node::Custom(Box::new(AlertNode {
187 content: quote,
188 class: attrs.class,
189 })));
190 Ok(())
191 }
192
193 md_tag::verbatim => {
194 let attrs = VerbatimAttr::parse(&element.attrs)?;
195 if attrs.block {
196 self.flush_inline_buffer();
197 self.blocks.push(Node::Custom(Box::new(BlockVerbatimNode {
198 content: attrs.src,
199 })));
200 } else {
201 self.inline_buffer
202 .push(Node::Custom(Box::new(VerbatimNode { content: attrs.src })));
203 }
204 Ok(())
205 }
206
207 _ => {
208 let tag_name = element.tag.resolve().to_string();
209
210 if !tag_name.starts_with("m1") {
211 let html_element = self.create_html_element(element)?;
218 self.inline_buffer.push(html_element);
219 } else {
220 self.convert_children(element)?;
221 }
222 Ok(())
223 }
224 }
225 }
226
227 pub(crate) fn create_html_element(&mut self, element: &HtmlElement) -> Result<Node> {
229 let attributes = element
230 .attrs
231 .0
232 .iter()
233 .map(|(name, value)| HtmlAttribute {
234 name: name.resolve().to_string().into(),
235 value: value.clone(),
236 })
237 .collect();
238
239 let (inline_nodes, block_nodes) = self.capture_children(element)?;
240
241 let mut children = Vec::new();
242 if !inline_nodes.is_empty() {
243 children.extend(inline_nodes);
244 }
245 children.extend(block_nodes);
246
247 Ok(Node::HtmlElement(CmarkHtmlElement {
248 tag: element.tag.resolve().to_string().into(),
249 attributes,
250 children,
251 self_closing: element.children.is_empty(),
252 }))
253 }
254
255 pub fn flush_inline_buffer(&mut self) {
256 if !self.inline_buffer.is_empty() {
257 self.blocks
258 .push(Node::Paragraph(std::mem::take(&mut self.inline_buffer)));
259 }
260 }
261
262 pub fn flush_inline_buffer_as_block(&mut self, make_block: impl FnOnce(Vec<Node>) -> Node) {
263 if !self.inline_buffer.is_empty() {
264 self.blocks
265 .push(make_block(std::mem::take(&mut self.inline_buffer)));
266 }
267 }
268
269 pub fn convert_children(&mut self, element: &HtmlElement) -> Result<()> {
270 for child in &element.children {
271 match child {
272 HtmlNode::Text(text, _) => {
273 self.inline_buffer.push(Node::Text(text.clone()));
274 }
275 HtmlNode::Element(element) => {
276 self.convert_element(element)?;
277 }
278 HtmlNode::Frame(frame) => {
279 let res = self.convert_frame(&frame.inner);
280 self.inline_buffer.push(res);
281 }
282 HtmlNode::Tag(..) => {}
283 }
284 }
285 Ok(())
286 }
287
288 pub fn convert_children_into(
289 &mut self,
290 target: &mut Vec<Node>,
291 element: &HtmlElement,
292 ) -> Result<()> {
293 let prev_buffer = std::mem::take(&mut self.inline_buffer);
294 self.convert_children(element)?;
295 target.append(&mut self.inline_buffer);
296 self.inline_buffer = prev_buffer;
297 Ok(())
298 }
299
300 pub fn capture_children(&mut self, element: &HtmlElement) -> Result<(Vec<Node>, Vec<Node>)> {
302 let prev_buffer = std::mem::take(&mut self.inline_buffer);
303 let prev_blocks = std::mem::take(&mut self.blocks);
304
305 self.convert_children(element)?;
306
307 let inline = std::mem::take(&mut self.inline_buffer);
308 let blocks = std::mem::take(&mut self.blocks);
309
310 self.inline_buffer = prev_buffer;
311 self.blocks = prev_blocks;
312
313 Ok((inline, blocks))
314 }
315
316 pub(crate) fn warn_at(&mut self, span: Option<Span>, message: EcoString) {
317 let span = span.unwrap_or_else(Span::detached);
318 let span = self
319 .feat
320 .wrap_info
321 .as_ref()
322 .and_then(|info| self.remap_span_from_wrapper(span, info))
323 .unwrap_or(span);
324
325 let diag = SourceDiagnostic::warning(span, message);
326 self.warnings.extend(std::iter::once(diag));
327 }
328
329 fn remap_span_from_wrapper(&self, span: Span, info: &crate::WrapInfo) -> Option<Span> {
330 info.remap_span(self.world.as_ref(), span)
331 }
332}
333
334#[derive(Debug, Clone)]
335pub(crate) struct Comment(pub EcoString);
336
337impl CustomNode for Comment {
338 fn as_any(&self) -> &dyn std::any::Any {
339 self
340 }
341
342 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
343 self
344 }
345
346 fn write_inline(&self, writer: &mut InlineWriterProxy) -> WriteResult<()> {
347 writer.write_str("<!-- ")?;
348 writer.write_str(&self.0)?;
349 writer.write_str(" -->")?;
350 Ok(())
351 }
352
353 fn clone_box(&self) -> Box<dyn CustomNode> {
354 Box::new(self.clone())
355 }
356
357 fn eq_box(&self, other: &dyn CustomNode) -> bool {
358 if let Some(other) = other.as_any().downcast_ref::<Comment>() {
359 self.0 == other.0
360 } else {
361 false
362 }
363 }
364
365 fn is_block(&self) -> bool {
366 false
367 }
368}
369
370impl HtmlToAstParser {
371 pub fn is_block_element(element: &HtmlElement) -> bool {
372 matches!(
373 element.tag,
374 tag::p
375 | tag::div
376 | tag::blockquote
377 | tag::h1
378 | tag::h2
379 | tag::h3
380 | tag::h4
381 | tag::h5
382 | tag::h6
383 | tag::hr
384 | tag::pre
385 | tag::table
386 | tag::section
387 | tag::article
388 | tag::header
389 | tag::footer
390 | tag::main
391 | tag::aside
392 | tag::nav
393 | tag::ul
394 | tag::ol
395 | md_tag::heading
396 | md_tag::quote
397 | md_tag::raw
398 | md_tag::parbreak
399 | md_tag::table
400 | md_tag::grid
401 | md_tag::figure
402 ) || (element.tag == md_tag::verbatim && Self::is_verbatim_block(element))
403 }
404
405 fn is_verbatim_block(element: &HtmlElement) -> bool {
406 VerbatimAttr::parse(&element.attrs)
407 .map(|attrs| attrs.block)
408 .unwrap_or(false)
409 }
410
411 pub fn process_list_item_element(&mut self, element: &HtmlElement) -> Result<Vec<Node>> {
412 if element.tag == tag::ul || element.tag == tag::ol {
413 let items = super::list::ListParser::convert_list(self, element)?;
414 if element.tag == tag::ul {
415 return Ok(vec![Node::UnorderedList(items)]);
416 } else {
417 return Ok(vec![Node::OrderedList { start: 1, items }]);
418 }
419 }
420
421 let prev_blocks = std::mem::take(&mut self.blocks);
422 let prev_buffer = std::mem::take(&mut self.inline_buffer);
423
424 self.convert_element(element)?;
425 let mut result = Vec::new();
426
427 if !self.blocks.is_empty() {
428 result.extend(std::mem::take(&mut self.blocks));
429 } else if !self.inline_buffer.is_empty() {
430 if Self::is_block_element(element) {
431 result.push(Node::Paragraph(std::mem::take(&mut self.inline_buffer)));
432 } else {
433 result = std::mem::take(&mut self.inline_buffer);
434 }
435 }
436
437 self.blocks = prev_blocks;
438 self.inline_buffer = prev_buffer;
439
440 Ok(result)
441 }
442
443 pub fn parse(mut self, root: &HtmlElement) -> Result<Node> {
444 self.blocks.clear();
445 self.inline_buffer.clear();
446
447 self.convert_element(root)?;
448 self.flush_inline_buffer();
449
450 Ok(Node::Document(self.blocks))
451 }
452}