tinymist_l10n/
lib.rs

1//! Tinymist's localization library.
2
3mod msg_parser;
4
5use core::panic;
6use std::{
7    borrow::Cow,
8    collections::HashSet,
9    path::Path,
10    sync::{OnceLock, RwLock},
11};
12
13use rayon::{
14    iter::{IntoParallelRefMutIterator, ParallelIterator},
15    str::ParallelString,
16};
17use rustc_hash::FxHashMap;
18
19/// A map of translations.
20pub type TranslationMap = FxHashMap<String, String>;
21/// A set of translation maps.
22pub type TranslationMapSet = FxHashMap<String, TranslationMap>;
23
24static ALL_TRANSLATIONS: OnceLock<TranslationMapSet> = OnceLock::new();
25static LOCALE_TRANSLATIONS: RwLock<Option<&'static TranslationMap>> = RwLock::new(Option::None);
26
27/// Sets the current translations. It can only be called once.
28pub fn set_translations(translations: TranslationMapSet) {
29    let new_translations = ALL_TRANSLATIONS.set(translations);
30
31    if let Err(new_translations) = new_translations {
32        eprintln!("cannot set translations: len = {}", new_translations.len());
33    }
34}
35
36/// Sets the current locale.
37pub fn set_locale(locale: &str) -> Option<()> {
38    let translations = ALL_TRANSLATIONS.get()?;
39    let lower_locale = locale.to_lowercase();
40    let locale = lower_locale.as_str();
41    let translations = translations.get(locale).or_else(|| {
42        // Tries s to find a language that starts with the locale and follow a hyphen.
43        translations
44            .iter()
45            .find(|(k, _)| locale.starts_with(*k) && locale.chars().nth(k.len()) == Some('-'))
46            .map(|(_, v)| v)
47    })?;
48
49    *LOCALE_TRANSLATIONS.write().unwrap() = Some(translations);
50
51    Some(())
52}
53
54/// Loads a TOML string into a map of translations.
55pub fn load_translations(input: &str) -> anyhow::Result<TranslationMapSet> {
56    let mut translations = deserialize(input, false)?;
57    translations.par_iter_mut().for_each(|(_, v)| {
58        v.par_iter_mut().for_each(|(_, v)| {
59            if !v.starts_with('"') {
60                return;
61            }
62
63            *v = serde_json::from_str::<String>(v)
64                .unwrap_or_else(|e| panic!("cannot parse translation message: {e}, message: {v}"));
65        });
66    });
67
68    Ok(translations)
69}
70
71/// Updates disk translations with new key-value pairs.
72pub fn update_disk_translations(
73    mut key_values: Vec<(String, String)>,
74    output: &Path,
75) -> anyhow::Result<()> {
76    key_values.sort_by(|(key_x, _), (key_y, _)| key_x.cmp(key_y));
77
78    // Reads and parses existing translations
79    let mut translations = match std::fs::read_to_string(output) {
80        Ok(existing_translations) => deserialize(&existing_translations, true)?,
81        Err(e) if e.kind() == std::io::ErrorKind::NotFound => TranslationMapSet::default(),
82        Err(e) => Err(e)?,
83    };
84
85    // Removes unused translations
86    update_translations(key_values, &mut translations);
87
88    // Writes translations
89    let result = serialize_translations(translations);
90    std::fs::write(output, result)?;
91    Ok(())
92}
93
94/// Updates a map of translations with new key-value pairs.
95pub fn update_translations(
96    key_values: Vec<(String, String)>,
97    translations: &mut TranslationMapSet,
98) {
99    let used = key_values.iter().map(|e| &e.0).collect::<HashSet<_>>();
100    translations.retain(|k, _| used.contains(k));
101
102    // Updates translations
103    let en = "en".to_owned();
104    for (key, value) in key_values {
105        translations
106            .entry(key)
107            .or_default()
108            .insert(en.clone(), value);
109    }
110}
111
112/// Writes a map of translations to a TOML string.
113pub fn serialize_translations(translations: TranslationMapSet) -> String {
114    let mut result = String::new();
115
116    result.push_str("\n# The translations are partially generated by copilot\n");
117
118    let mut translations = translations.into_iter().collect::<Vec<_>>();
119    translations.sort_by(|a, b| a.0.cmp(&b.0));
120
121    for (key, mut data) in translations {
122        result.push_str(&format!("\n[{key}]\n"));
123
124        let en = data.remove("en").expect("en translation is missing");
125        result.push_str(&format!("en = {en}\n"));
126
127        // sort by lang
128        let mut data = data.into_iter().collect::<Vec<_>>();
129        data.sort_by(|a, b| a.0.cmp(&b.0));
130
131        for (lang, value) in data {
132            result.push_str(&format!("{lang} = {value}\n"));
133        }
134    }
135
136    result
137}
138
139/// Tries to translate a string to the current language.
140#[macro_export]
141macro_rules! t {
142    ($key:expr, $message:expr) => {
143        $crate::t_without_args($key, $message)
144    };
145    ($key:expr, $message:expr $(, $arg_key:ident = $arg_value:expr)+ $(,)?) => {
146        $crate::t_with_args($key, $message, &[$((stringify!($arg_key), $arg_value)),*])
147    };
148}
149
150/// Returns an error with a translated message.
151#[macro_export]
152macro_rules! bail {
153    ($key:expr, $message:expr $(, $arg_key:ident = $args:expr)* $(,)?) => {{
154        let msg = $crate::t!($key, $message $(, $arg_key = $args)*);
155        return Err(tinymist_std::error::prelude::_msg(concat!(file!(), ":", line!(), ":", column!()), msg.into()));
156    }};
157}
158
159/// Tries to get a translation for a key.
160fn find_message(key: &'static str, message: &'static str) -> &'static str {
161    let Some(translations) = LOCALE_TRANSLATIONS.read().unwrap().as_ref().copied() else {
162        return message;
163    };
164
165    translations.get(key).map(String::as_str).unwrap_or(message)
166}
167
168/// Tries to translate a string to the current language.
169pub fn t_without_args(key: &'static str, message: &'static str) -> Cow<'static, str> {
170    Cow::Borrowed(find_message(key, message))
171}
172
173/// An argument for a translation.
174pub enum Arg<'a> {
175    /// A string argument.
176    Str(Cow<'a, str>),
177    /// An integer argument.
178    Int(i64),
179    /// A float argument.
180    Float(f64),
181}
182
183impl<'a> From<&'a String> for Arg<'a> {
184    fn from(s: &'a String) -> Self {
185        Arg::Str(Cow::Borrowed(s.as_str()))
186    }
187}
188
189impl<'a> From<&'a str> for Arg<'a> {
190    fn from(s: &'a str) -> Self {
191        Arg::Str(Cow::Borrowed(s))
192    }
193}
194
195/// Converts an object to an argument of debug message.
196pub trait DebugL10n {
197    /// Returns a debug string for the current language.
198    fn debug_l10n(&self) -> Arg<'_>;
199}
200
201impl<T: std::fmt::Debug> DebugL10n for T {
202    fn debug_l10n(&self) -> Arg<'static> {
203        Arg::Str(Cow::Owned(format!("{self:?}")))
204    }
205}
206
207/// Translates a message with placeholder substitution.
208///
209/// Substitutes placeholders in the format `{key}` with provided argument values.
210/// Escaped braces `{{` and `}}` are converted to literal `{` and `}`.
211///
212/// If a placeholder argument is not found, the placeholder is output as-is.
213/// If a placeholder is malformed (missing closing brace), the literal characters are output.
214///
215/// # Arguments
216///
217/// * `key` - Translation key for lookup (used only for translation retrieval)
218/// * `message` - Default message with placeholders
219/// * `args` - Slice of key-value pairs for substitution
220pub fn t_with_args(
221    key: &'static str,
222    message: &'static str,
223    args: &[(&'static str, Arg)],
224) -> Cow<'static, str> {
225    let message = find_message(key, message);
226    let result = msg_parser::parse_message(message, |arg_name| {
227        args.iter()
228            .find(|(k, _)| k == &arg_name)
229            .map(|(_, v)| match v {
230                Arg::Str(s) => s.clone(),
231                Arg::Int(i) => i.to_string().into(),
232                Arg::Float(f) => f.to_string().into(),
233            })
234    });
235    Cow::Owned(result)
236}
237
238/// Deserializes a TOML string into a map of translations.
239pub fn deserialize(input: &str, key_first: bool) -> anyhow::Result<TranslationMapSet> {
240    let lines = input
241        .par_split('\n')
242        .map(|line| line.trim())
243        .filter(|line| !line.starts_with('#') && !line.is_empty())
244        .collect::<Vec<_>>();
245
246    let mut translations = FxHashMap::default();
247    let mut key = String::new();
248
249    for line in lines {
250        if line.starts_with('[') {
251            key = line[1..line.len() - 1].to_string();
252        } else {
253            let equal_index = line.find('=').map_or_else(
254                || {
255                    Err(anyhow::anyhow!(
256                        "cannot find equal sign in translation line: {line}"
257                    ))
258                },
259                Ok,
260            )?;
261            let lang = line[..equal_index].trim().to_string();
262            let value = line[equal_index + 1..].trim().to_string();
263
264            if key_first {
265                translations
266                    .entry(key.clone())
267                    .or_insert_with(FxHashMap::default)
268                    .insert(lang, value);
269            } else {
270                translations
271                    .entry(lang)
272                    .or_insert_with(FxHashMap::default)
273                    .insert(key.clone(), value);
274            }
275        }
276    }
277
278    Ok(translations)
279}
280
281#[cfg(test)]
282mod tests {
283    use super::*;
284
285    #[test]
286    fn test_empty_placeholder() {
287        let args = &[("x", "value".into())];
288        let result = t_with_args("test.key", "Result: {} end", args);
289        assert_eq!(result, "Result: {} end");
290    }
291
292    #[test]
293    fn test_consecutive_placeholders() {
294        let args = &[("x", "A".into()), ("y", "B".into())];
295        let result = t_with_args("test.key", "{x}{y}", args);
296        assert_eq!(result, "AB");
297    }
298
299    #[test]
300    fn test_placeholder_missing_arg() {
301        let args = &[];
302        let result = t_with_args("test.key", "Hello {name} world", args);
303        assert_eq!(result, "Hello {name} world");
304    }
305
306    #[test]
307    fn test_int_argument() {
308        let args = &[("count", Arg::Int(42))];
309        let result = t_with_args("test.key", "You have {count} items", args);
310        assert_eq!(result, "You have 42 items");
311    }
312
313    #[test]
314    fn test_float_argument() {
315        let args = &[("price", Arg::Float(9.15))];
316        let result = t_with_args("test.key", "Price: {price} dollars", args);
317        assert_eq!(result, "Price: 9.15 dollars");
318    }
319
320    #[test]
321    fn test_malformed_placeholder_no_closing_brace() {
322        // Test placeholder without closing brace outputs as-is
323        let args = &[("name", "John".into())];
324        let result = t_with_args("test.key", "Hello {name world", args);
325        assert_eq!(result, "Hello {name world");
326    }
327
328    #[test]
329    fn test_escaped_opening_brace() {
330        // {{ should output as single {
331        let args = &[];
332        let result = t_with_args("test.key", "Use {{ to escape", args);
333        assert_eq!(result, "Use { to escape");
334    }
335
336    #[test]
337    fn test_escaped_closing_brace() {
338        // }} should output as single }
339        let args = &[];
340        let result = t_with_args("test.key", "Close with }}", args);
341        assert_eq!(result, "Close with }");
342    }
343
344    #[test]
345    fn test_escaped_braces() {
346        // {{arg}} should output {arg} literally (not treated as placeholder)
347        let args = &[("arg", "value".into())];
348        let result = t_with_args("test.key", "Pattern: {{arg}}", args);
349        assert_eq!(result, "Pattern: {arg}");
350    }
351
352    #[test]
353    fn test_multiple_escaped_braces() {
354        // Test multiple escape sequences
355        let args = &[("x", "value".into())];
356        let result = t_with_args("test.key", "{{ {x} }}", args);
357        assert_eq!(result, "{ value }");
358    }
359}