typlite/
lib.rs

1//! # Typlite
2
3// todo: remove me
4#![allow(missing_docs)]
5
6pub mod attributes;
7pub mod common;
8mod diagnostics;
9mod error;
10pub mod parser;
11pub mod tags;
12pub mod writer;
13
14use std::path::PathBuf;
15use std::str::FromStr;
16use std::sync::Arc;
17
18pub use error::*;
19
20use cmark_writer::ast::Node;
21use tinymist_project::base::ShadowApi;
22use tinymist_project::vfs::WorkspaceResolver;
23use tinymist_project::{EntryReader, LspWorld, TaskInputs};
24use tinymist_std::error::prelude::*;
25use typst::World;
26use typst::WorldExt;
27use typst::diag::SourceDiagnostic;
28use typst::foundations::Bytes;
29use typst_html::HtmlDocument;
30use typst_syntax::Span;
31use typst_syntax::VirtualPath;
32
33pub use crate::common::Format;
34use crate::diagnostics::WarningCollector;
35use crate::parser::HtmlToAstParser;
36use crate::writer::WriterFactory;
37use typst_syntax::FileId;
38
39use crate::tinymist_std::typst::LazyHash;
40use crate::tinymist_std::typst::foundations::Value::Str;
41
42/// The result type for typlite.
43pub type Result<T, Err = Error> = std::result::Result<T, Err>;
44
45pub use cmark_writer::ast;
46pub use tinymist_project::CompileOnceArgs;
47pub use tinymist_std;
48
49#[derive(Clone)]
50pub struct MarkdownDocument {
51    pub base: HtmlDocument,
52    world: Arc<LspWorld>,
53    feat: TypliteFeat,
54    ast: Option<Node>,
55    warnings: WarningCollector,
56}
57
58impl MarkdownDocument {
59    /// Create a new MarkdownDocument instance
60    pub fn new(base: HtmlDocument, world: Arc<LspWorld>, feat: TypliteFeat) -> Self {
61        Self {
62            base,
63            world,
64            feat,
65            ast: None,
66            warnings: WarningCollector::default(),
67        }
68    }
69
70    /// Create a MarkdownDocument instance with pre-parsed AST
71    pub fn with_ast(
72        base: HtmlDocument,
73        world: Arc<LspWorld>,
74        feat: TypliteFeat,
75        ast: Node,
76    ) -> Self {
77        Self {
78            base,
79            world,
80            feat,
81            ast: Some(ast),
82            warnings: WarningCollector::default(),
83        }
84    }
85
86    /// Replace the backing warning collector, preserving shared state with
87    /// other components of the pipeline.
88    pub(crate) fn with_warning_collector(mut self, collector: WarningCollector) -> Self {
89        self.warnings = collector;
90        self
91    }
92
93    /// Get a snapshot of all collected warnings so far.
94    pub fn warnings(&self) -> Vec<SourceDiagnostic> {
95        let warnings = self.warnings.snapshot();
96        if let Some(info) = &self.feat.wrap_info {
97            warnings
98                .into_iter()
99                .filter_map(|diag| self.remap_diagnostic(diag, info))
100                .collect()
101        } else {
102            warnings
103        }
104    }
105
106    /// Internal accessor for sharing the collector with the parser.
107    fn warning_collector(&self) -> WarningCollector {
108        self.warnings.clone()
109    }
110
111    fn remap_diagnostic(
112        &self,
113        mut diagnostic: SourceDiagnostic,
114        info: &WrapInfo,
115    ) -> Option<SourceDiagnostic> {
116        if let Some(span) = info.remap_span(self.world.as_ref(), diagnostic.span) {
117            diagnostic.span = span;
118        } else {
119            return None;
120        }
121
122        diagnostic.trace = diagnostic
123            .trace
124            .into_iter()
125            .filter_map(
126                |mut spanned| match info.remap_span(self.world.as_ref(), spanned.span) {
127                    Some(span) => {
128                        spanned.span = span;
129                        Some(spanned)
130                    }
131                    None => None,
132                },
133            )
134            .collect();
135
136        Some(diagnostic)
137    }
138
139    /// Parse HTML document to AST
140    pub fn parse(&self) -> tinymist_std::Result<Node> {
141        if let Some(ast) = &self.ast {
142            return Ok(ast.clone());
143        }
144        let parser = HtmlToAstParser::new(self.feat.clone(), &self.world, self.warning_collector());
145        parser.parse(&self.base.root).context_ut("failed to parse")
146    }
147
148    /// Convert content to markdown string
149    pub fn to_md_string(&self) -> tinymist_std::Result<ecow::EcoString> {
150        let mut output = ecow::EcoString::new();
151        let ast = self.parse()?;
152
153        let mut writer = WriterFactory::create(Format::Md);
154        writer
155            .write_eco(&ast, &mut output)
156            .context_ut("failed to write")?;
157
158        Ok(output)
159    }
160
161    /// Convert content to plain text string
162    pub fn to_text_string(&self) -> tinymist_std::Result<ecow::EcoString> {
163        let mut output = ecow::EcoString::new();
164        let ast = self.parse()?;
165
166        let mut writer = WriterFactory::create(Format::Text);
167        writer
168            .write_eco(&ast, &mut output)
169            .context_ut("failed to write")?;
170
171        Ok(output)
172    }
173
174    /// Convert the content to a LaTeX string.
175    pub fn to_tex_string(&self) -> tinymist_std::Result<ecow::EcoString> {
176        let mut output = ecow::EcoString::new();
177        let ast = self.parse()?;
178
179        let mut writer = WriterFactory::create(Format::LaTeX);
180        writer
181            .write_eco(&ast, &mut output)
182            .context_ut("failed to write")?;
183
184        Ok(output)
185    }
186
187    /// Convert the content to a DOCX document
188    #[cfg(feature = "docx")]
189    pub fn to_docx(&self) -> tinymist_std::Result<Vec<u8>> {
190        let ast = self.parse()?;
191
192        let mut writer = WriterFactory::create(Format::Docx);
193        writer.write_vec(&ast).context_ut("failed to write")
194    }
195}
196
197/// A color theme for rendering the content. The valid values can be checked in [color-scheme](https://developer.mozilla.org/en-US/docs/Web/CSS/color-scheme).
198#[derive(Debug, Default, Clone, Copy)]
199pub enum ColorTheme {
200    #[default]
201    Light,
202    Dark,
203}
204
205#[derive(Debug, Clone)]
206pub struct WrapInfo {
207    /// The synthetic wrapper file that hosts the original Typst source.
208    pub wrap_file_id: FileId,
209    /// The user's actual Typst source file.
210    pub original_file_id: FileId,
211    /// Number of UTF-8 bytes injected ahead of the original source.
212    pub prefix_len_bytes: usize,
213}
214
215impl WrapInfo {
216    /// Translate a span from the wrapper file back into the original file.
217    pub fn remap_span(&self, world: &dyn typst::World, span: Span) -> Option<Span> {
218        if span.id() != Some(self.wrap_file_id) {
219            return Some(span);
220        }
221
222        let range = world.range(span)?;
223        let start = range.start.checked_sub(self.prefix_len_bytes)?;
224        let end = range.end.checked_sub(self.prefix_len_bytes)?;
225
226        let original_source = world.source(self.original_file_id).ok()?;
227        let original_len = original_source.lines().len_bytes();
228
229        if start >= original_len || end > original_len {
230            return None;
231        }
232
233        Some(Span::from_range(self.original_file_id, start..end))
234    }
235}
236
237#[derive(Debug, Default, Clone)]
238pub struct TypliteFeat {
239    /// The preferred color theme.
240    pub color_theme: Option<ColorTheme>,
241    /// The path of external assets directory.
242    pub assets_path: Option<PathBuf>,
243    /// Allows GFM (GitHub Flavored Markdown) markups.
244    pub gfm: bool,
245    /// Annotate the elements for identification.
246    pub annotate_elem: bool,
247    /// Embed errors in the output instead of yielding them.
248    pub soft_error: bool,
249    /// Remove HTML tags from the output.
250    pub remove_html: bool,
251    /// The target to convert
252    pub target: Format,
253    /// Import context for code examples (e.g., "#import \"/path/to/file.typ\":
254    /// *")
255    pub import_context: Option<String>,
256    /// Specifies the package to process markup.
257    ///
258    /// ## `article` function
259    ///
260    /// The article function is used to wrap the typst content during
261    /// compilation.
262    ///
263    /// typlite exactly uses the `#article` function to process the content as
264    /// follow:
265    ///
266    /// ```typst
267    /// #import "@local/processor": article
268    /// #article(include "the-processed-content.typ")
269    /// ```
270    ///
271    /// It resembles the regular typst show rule function, like `#show:
272    /// article`.
273    pub processor: Option<String>,
274    /// Optional mapping from the wrapper file back to the original source.
275    pub wrap_info: Option<WrapInfo>,
276}
277
278impl TypliteFeat {
279    pub fn prepare_world(
280        &self,
281        world: &LspWorld,
282        format: Format,
283    ) -> tinymist_std::Result<(LspWorld, Option<WrapInfo>)> {
284        let entry = world.entry_state();
285        let main = entry.main();
286        let current = main.context("no main file in workspace")?;
287
288        if WorkspaceResolver::is_package_file(current) {
289            bail!("package file is not supported");
290        }
291
292        let wrap_main_id = current.join("__wrap_md_main.typ");
293
294        let (main_id, main_content) = match self.processor.as_ref() {
295            None => (wrap_main_id, None),
296            Some(processor) => {
297                let main_id = current.join("__md_main.typ");
298                let content = format!(
299                    r#"#import {processor:?}: article
300#article(include "__wrap_md_main.typ")"#
301                );
302
303                (main_id, Some(Bytes::from_string(content)))
304            }
305        };
306
307        // Start with existing inputs from the world (CLI inputs)
308        let mut dict = (**world.inputs()).clone();
309
310        // Add typlite-specific inputs
311        dict.insert("x-target".into(), Str("md".into()));
312        if format == Format::Text || self.remove_html {
313            dict.insert("x-remove-html".into(), Str("true".into()));
314        }
315
316        let task_inputs = TaskInputs {
317            entry: Some(entry.select_in_workspace(main_id.vpath().as_rooted_path())),
318            inputs: Some(Arc::new(LazyHash::new(dict))),
319        };
320
321        let mut world = world.task(task_inputs).html_task().into_owned();
322
323        let markdown_id = FileId::new(
324            Some(
325                typst_syntax::package::PackageSpec::from_str("@local/_markdown:0.1.0")
326                    .context_ut("failed to import markdown package")?,
327            ),
328            VirtualPath::new("lib.typ"),
329        );
330
331        world
332            .map_shadow_by_id(
333                markdown_id.join("typst.toml"),
334                Bytes::from_string(include_str!("markdown-typst.toml")),
335            )
336            .context_ut("cannot map markdown-typst.toml")?;
337        world
338            .map_shadow_by_id(
339                markdown_id,
340                Bytes::from_string(include_str!("markdown.typ")),
341            )
342            .context_ut("cannot map markdown.typ")?;
343        let original_source = world
344            .source(current)
345            .context_ut("cannot fetch main source")?
346            .text()
347            .to_owned();
348
349        const WRAP_PREFIX: &str =
350            "#import \"@local/_markdown:0.1.0\": md-doc, example; #show: md-doc\n";
351        let wrap_content = format!("{WRAP_PREFIX}{original_source}");
352
353        world
354            .map_shadow_by_id(wrap_main_id, Bytes::from_string(wrap_content))
355            .context_ut("cannot map source for main file")?;
356
357        if let Some(main_content) = main_content {
358            world
359                .map_shadow_by_id(main_id, main_content)
360                .context_ut("cannot map source for main file")?;
361        }
362
363        let wrap_info = Some(WrapInfo {
364            wrap_file_id: wrap_main_id,
365            original_file_id: current,
366            prefix_len_bytes: WRAP_PREFIX.len(),
367        });
368
369        Ok((world, wrap_info))
370    }
371}
372
373/// Task builder for converting a typst document to Markdown.
374pub struct Typlite {
375    /// The universe to use for the conversion.
376    world: Arc<LspWorld>,
377    /// Features for the conversion.
378    feat: TypliteFeat,
379    /// The format to use for the conversion.
380    format: Format,
381}
382
383impl Typlite {
384    /// Creates a new Typlite instance from a [`World`].
385    pub fn new(world: Arc<LspWorld>) -> Self {
386        Self {
387            world,
388            feat: Default::default(),
389            format: Format::Md,
390        }
391    }
392
393    /// Sets conversion features
394    pub fn with_feature(mut self, feat: TypliteFeat) -> Self {
395        self.feat = feat;
396        self
397    }
398
399    pub fn with_format(mut self, format: Format) -> Self {
400        self.format = format;
401        self
402    }
403
404    /// Convert the content to a markdown string.
405    pub fn convert(self) -> tinymist_std::Result<ecow::EcoString> {
406        match self.format {
407            Format::Md => self.convert_doc(Format::Md)?.to_md_string(),
408            Format::LaTeX => self.convert_doc(Format::LaTeX)?.to_tex_string(),
409            Format::Text => self.convert_doc(Format::Text)?.to_text_string(),
410            #[cfg(feature = "docx")]
411            Format::Docx => bail!("docx format is not supported"),
412        }
413    }
414
415    /// Convert the content to a DOCX document
416    #[cfg(feature = "docx")]
417    pub fn to_docx(self) -> tinymist_std::Result<Vec<u8>> {
418        if self.format != Format::Docx {
419            bail!("format is not DOCX");
420        }
421        self.convert_doc(Format::Docx)?.to_docx()
422    }
423
424    /// Convert the content to a markdown document.
425    pub fn convert_doc(mut self, format: Format) -> tinymist_std::Result<MarkdownDocument> {
426        let (prepared_world, wrap_info) = self.feat.prepare_world(&self.world, format)?;
427        self.feat.wrap_info = wrap_info;
428        let feat = self.feat.clone();
429        let world = Arc::new(prepared_world);
430        Self::convert_doc_prepared(feat, format, world)
431    }
432
433    /// Convert the content to a markdown document.
434    pub fn convert_doc_prepared(
435        feat: TypliteFeat,
436        format: Format,
437        world: Arc<LspWorld>,
438    ) -> tinymist_std::Result<MarkdownDocument> {
439        let compiled = typst::compile(&world);
440        let collector = WarningCollector::default();
441        collector.extend(
442            compiled
443                .warnings
444                .iter()
445                .filter(|&diag| {
446                    diag.message.as_str()
447                        != "html export is under active development and incomplete"
448                })
449                .cloned(),
450        );
451        let base = compiled.output?;
452        let mut feat = feat;
453        feat.target = format;
454        Ok(MarkdownDocument::new(base, world.clone(), feat).with_warning_collector(collector))
455    }
456}
457
458#[cfg(test)]
459mod tests;