anki/rslib/src/i18n.rs
Damien Elmes 77c9db5bba plural rules and decimal separator should use bundle's language
Instead of providing the list of languages in preferred order, when
creating a bundle we need to specify the bundle language as the first
language, so that the correct plural rules are used. Fluent's docs
are misleading here; I will submit a PR to fix them.

The old behaviour caused:
https://forums.ankiweb.net/t/bug-in-review-intervals-for-some-languages-in-number-of-cards/5744
2020-12-14 14:23:49 +10:00

584 lines
17 KiB
Rust

// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use crate::err::Result;
use crate::log::{error, Logger};
use fluent::{concurrent::FluentBundle, FluentArgs, FluentResource, FluentValue};
use num_format::Locale;
use serde::Serialize;
use std::borrow::Cow;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use unic_langid::LanguageIdentifier;
include!(concat!(env!("OUT_DIR"), "/fluent_keys.rs"));
pub use crate::fluent_proto::FluentString as TR;
pub use fluent::fluent_args as tr_args;
/// Helper for creating args with &strs
#[macro_export]
macro_rules! tr_strs {
( $($key:expr => $value:expr),* ) => {
{
let mut args: fluent::FluentArgs = fluent::FluentArgs::new();
$(
args.add($key, $value.to_string().into());
)*
args
}
};
}
pub use tr_strs;
/// The folder containing ftl files for the provided language.
/// If a fully qualified folder exists (eg, en_GB), return that.
/// Otherwise, try the language alone (eg en).
/// If neither folder exists, return None.
fn lang_folder(lang: &Option<LanguageIdentifier>, ftl_root_folder: &Path) -> Option<PathBuf> {
if let Some(lang) = lang {
if let Some(region) = lang.region {
let path = ftl_root_folder.join(format!("{}_{}", lang.language, region));
if fs::metadata(&path).is_ok() {
return Some(path);
}
}
let path = ftl_root_folder.join(lang.language.to_string());
if fs::metadata(&path).is_ok() {
Some(path)
} else {
None
}
} else {
// fallback folder
let path = ftl_root_folder.join("templates");
if fs::metadata(&path).is_ok() {
Some(path)
} else {
None
}
}
}
#[cfg(feature = "translations")]
macro_rules! ftl_path {
( $fname: expr ) => {
include_str!(concat!(env!("OUT_DIR"), "/", $fname))
};
}
#[cfg(not(feature = "translations"))]
macro_rules! ftl_path {
( "template.ftl" ) => {
include_str!(concat!(env!("OUT_DIR"), "/template.ftl"))
};
( $fname: expr ) => {
"" // translations not included
};
}
/// Get the template/English resource text.
fn ftl_template_text() -> &'static str {
ftl_path!("template.ftl")
}
fn ftl_localized_text(lang: &LanguageIdentifier) -> Option<&'static str> {
let region = match &lang.region {
Some(region) => Some(region.as_str()),
None => None,
};
Some(match lang.language.as_str() {
"en" => {
match region {
Some("GB") | Some("AU") => ftl_path!("en-GB.ftl"),
// use fallback language instead
_ => return None,
}
}
"zh" => match region {
Some("TW") | Some("HK") => ftl_path!("zh-TW.ftl"),
_ => ftl_path!("zh-CN.ftl"),
},
"pt" => {
if let Some("PT") = region {
ftl_path!("pt-PT.ftl")
} else {
ftl_path!("pt-BR.ftl")
}
}
"ga" => ftl_path!("ga-IE.ftl"),
"hy" => ftl_path!("hy-AM.ftl"),
"nb" => ftl_path!("nb-NO.ftl"),
"sv" => ftl_path!("sv-SE.ftl"),
"jbo" => ftl_path!("jbo.ftl"),
"kab" => ftl_path!("kab.ftl"),
"af" => ftl_path!("af.ftl"),
"ar" => ftl_path!("ar.ftl"),
"bg" => ftl_path!("bg.ftl"),
"ca" => ftl_path!("ca.ftl"),
"cs" => ftl_path!("cs.ftl"),
"da" => ftl_path!("da.ftl"),
"de" => ftl_path!("de.ftl"),
"el" => ftl_path!("el.ftl"),
"eo" => ftl_path!("eo.ftl"),
"es" => ftl_path!("es.ftl"),
"et" => ftl_path!("et.ftl"),
"eu" => ftl_path!("eu.ftl"),
"fa" => ftl_path!("fa.ftl"),
"fi" => ftl_path!("fi.ftl"),
"fr" => ftl_path!("fr.ftl"),
"gl" => ftl_path!("gl.ftl"),
"he" => ftl_path!("he.ftl"),
"hr" => ftl_path!("hr.ftl"),
"hu" => ftl_path!("hu.ftl"),
"it" => ftl_path!("it.ftl"),
"ja" => ftl_path!("ja.ftl"),
"ko" => ftl_path!("ko.ftl"),
"la" => ftl_path!("la.ftl"),
"mn" => ftl_path!("mn.ftl"),
"mr" => ftl_path!("mr.ftl"),
"ms" => ftl_path!("ms.ftl"),
"nl" => ftl_path!("nl.ftl"),
"oc" => ftl_path!("oc.ftl"),
"pl" => ftl_path!("pl.ftl"),
"ro" => ftl_path!("ro.ftl"),
"ru" => ftl_path!("ru.ftl"),
"sk" => ftl_path!("sk.ftl"),
"sl" => ftl_path!("sl.ftl"),
"sr" => ftl_path!("sr.ftl"),
"th" => ftl_path!("th.ftl"),
"tr" => ftl_path!("tr.ftl"),
"uk" => ftl_path!("uk.ftl"),
"vi" => ftl_path!("vi.ftl"),
_ => return None,
})
}
/// Return the text from any .ftl files in the given folder.
fn ftl_external_text(folder: &Path) -> Result<String> {
let mut buf = String::new();
for entry in fs::read_dir(folder)? {
let entry = entry?;
let fname = entry
.file_name()
.into_string()
.unwrap_or_else(|_| "".into());
if !fname.ends_with(".ftl") {
continue;
}
buf += &fs::read_to_string(entry.path())?
}
Ok(buf)
}
/// Some sample text for testing purposes.
fn test_en_text() -> &'static str {
"
valid-key = a valid key
only-in-english = not translated
two-args-key = two args: {$one} and {$two}
plural = You have {$hats ->
[one] 1 hat
*[other] {$hats} hats
}.
"
}
fn test_jp_text() -> &'static str {
"
valid-key = キー
two-args-key = {$one}と{$two}
"
}
fn test_pl_text() -> &'static str {
"
one-arg-key = fake Polish {$one}
"
}
/// Parse resource text into an AST for inclusion in a bundle.
/// Returns None if text contains errors.
/// extra_text may contain resources loaded from the filesystem
/// at runtime. If it contains errors, they will not prevent a
/// bundle from being returned.
fn get_bundle(
text: &str,
extra_text: String,
locales: &[LanguageIdentifier],
log: &Logger,
) -> Option<FluentBundle<FluentResource>> {
let res = FluentResource::try_new(text.into())
.map_err(|e| {
error!(log, "Unable to parse translations file: {:?}", e);
})
.ok()?;
let mut bundle: FluentBundle<FluentResource> = FluentBundle::new(locales);
bundle
.add_resource(res)
.map_err(|e| {
error!(log, "Duplicate key detected in translation file: {:?}", e);
})
.ok()?;
if !extra_text.is_empty() {
match FluentResource::try_new(extra_text) {
Ok(res) => bundle.add_resource_overriding(res),
Err((_res, e)) => error!(log, "Unable to parse translations file: {:?}", e),
}
}
// disable isolation characters in test mode
if cfg!(test) {
bundle.set_use_isolating(false);
}
// add numeric formatter
set_bundle_formatter_for_langs(&mut bundle, locales);
Some(bundle)
}
/// Get a bundle that includes any filesystem overrides.
fn get_bundle_with_extra(
text: &str,
lang: Option<LanguageIdentifier>,
ftl_root_folder: &Path,
log: &Logger,
) -> Option<FluentBundle<FluentResource>> {
let mut extra_text = if let Some(path) = lang_folder(&lang, &ftl_root_folder) {
match ftl_external_text(&path) {
Ok(text) => text,
Err(e) => {
error!(log, "Error reading external FTL files: {:?}", e);
"".into()
}
}
} else {
"".into()
};
if cfg!(test) {
// inject some test strings in test mode
match &lang {
None => {
extra_text += test_en_text();
}
Some(lang) if lang.language == "ja" => {
extra_text += test_jp_text();
}
Some(lang) if lang.language == "pl" => {
extra_text += test_pl_text();
}
_ => {}
}
}
let mut locales = if let Some(lang) = lang {
vec![lang]
} else {
vec![]
};
locales.push("en-US".parse().unwrap());
get_bundle(text, extra_text, &locales, log)
}
#[derive(Clone)]
pub struct I18n {
inner: Arc<Mutex<I18nInner>>,
log: Logger,
}
impl I18n {
pub fn new<S: AsRef<str>, P: Into<PathBuf>>(
locale_codes: &[S],
ftl_root_folder: P,
log: Logger,
) -> Self {
let ftl_root_folder = ftl_root_folder.into();
let mut input_langs = vec![];
let mut bundles = Vec::with_capacity(locale_codes.len() + 1);
let mut resource_text = vec![];
for code in locale_codes {
let code = code.as_ref();
if let Ok(lang) = code.parse::<LanguageIdentifier>() {
input_langs.push(lang.clone());
if lang.language == "en" {
// if English was listed, any further preferences are skipped,
// as the template has 100% coverage, and we need to ensure
// it is tried prior to any other langs.
break;
}
}
}
let mut output_langs = vec![];
for lang in input_langs {
// if the language is bundled in the binary
if let Some(text) = ftl_localized_text(&lang) {
if let Some(bundle) =
get_bundle_with_extra(text, Some(lang.clone()), &ftl_root_folder, &log)
{
resource_text.push(text);
bundles.push(bundle);
output_langs.push(lang);
} else {
error!(log, "Failed to create bundle for {:?}", lang.language)
}
}
}
// add English templates
let template_text = ftl_template_text();
let template_lang = "en-US".parse().unwrap();
let template_bundle =
get_bundle_with_extra(template_text, None, &ftl_root_folder, &log).unwrap();
resource_text.push(template_text);
bundles.push(template_bundle);
output_langs.push(template_lang);
Self {
inner: Arc::new(Mutex::new(I18nInner {
bundles,
langs: output_langs,
resource_text,
})),
log,
}
}
/// Get translation with zero arguments.
pub fn tr(&self, key: TR) -> Cow<str> {
let key = FLUENT_KEYS[key as usize];
self.tr_(key, None)
}
/// Get translation with one or more arguments.
pub fn trn(&self, key: TR, args: FluentArgs) -> String {
let key = FLUENT_KEYS[key as usize];
self.tr_(key, Some(args)).into()
}
fn tr_<'a>(&'a self, key: &str, args: Option<FluentArgs>) -> Cow<'a, str> {
for bundle in &self.inner.lock().unwrap().bundles {
let msg = match bundle.get_message(key) {
Some(msg) => msg,
// not translated in this bundle
None => continue,
};
let pat = match msg.value {
Some(val) => val,
// empty value
None => continue,
};
let mut errs = vec![];
let out = bundle.format_pattern(pat, args.as_ref(), &mut errs);
if !errs.is_empty() {
error!(self.log, "Error(s) in translation '{}': {:?}", key, errs);
}
// clone so we can discard args
return out.to_string().into();
}
// return the key name if it was missing
key.to_string().into()
}
/// Return text from configured locales for use with the JS Fluent implementation.
pub fn resources_for_js(&self) -> ResourcesForJavascript {
let inner = self.inner.lock().unwrap();
ResourcesForJavascript {
langs: inner.langs.iter().map(ToString::to_string).collect(),
resources: inner.resource_text.clone(),
}
}
}
struct I18nInner {
// bundles in preferred language order, with template English as the
// last element
bundles: Vec<FluentBundle<FluentResource>>,
langs: Vec<LanguageIdentifier>,
resource_text: Vec<&'static str>,
}
// Simple number formatting implementation
fn set_bundle_formatter_for_langs<T>(bundle: &mut FluentBundle<T>, langs: &[LanguageIdentifier]) {
let formatter = if want_comma_as_decimal_separator(langs) {
format_decimal_with_comma
} else {
format_decimal_with_period
};
bundle.set_formatter(Some(formatter));
}
fn first_available_num_format_locale(langs: &[LanguageIdentifier]) -> Option<Locale> {
for lang in langs {
if let Some(locale) = num_format_locale(lang) {
return Some(locale);
}
}
None
}
// try to locate a num_format locale for a given language identifier
fn num_format_locale(lang: &LanguageIdentifier) -> Option<Locale> {
// region provided?
if let Some(region) = lang.region {
let code = format!("{}_{}", lang.language, region);
if let Ok(locale) = Locale::from_name(code) {
return Some(locale);
}
}
// try the language alone
Locale::from_name(lang.language.as_str()).ok()
}
fn want_comma_as_decimal_separator(langs: &[LanguageIdentifier]) -> bool {
let separator = if let Some(locale) = first_available_num_format_locale(langs) {
locale.decimal()
} else {
"."
};
separator == ","
}
fn format_decimal_with_comma(
val: &fluent::FluentValue,
_intl: &intl_memoizer::concurrent::IntlLangMemoizer,
) -> Option<String> {
format_number_values(val, Some(","))
}
fn format_decimal_with_period(
val: &fluent::FluentValue,
_intl: &intl_memoizer::concurrent::IntlLangMemoizer,
) -> Option<String> {
format_number_values(val, None)
}
#[inline]
fn format_number_values(
val: &fluent::FluentValue,
alt_separator: Option<&'static str>,
) -> Option<String> {
match val {
FluentValue::Number(num) => {
// create a string with desired maximum digits
let max_frac_digits = 2;
let with_max_precision = format!(
"{number:.precision$}",
number = num.value,
precision = max_frac_digits
);
// remove any excess trailing zeros
let mut val: Cow<str> = with_max_precision.trim_end_matches('0').into();
// adding back any required to meet minimum_fraction_digits
if let Some(minfd) = num.options.minimum_fraction_digits {
let pos = val.find('.').expect("expected . in formatted string");
let frac_num = val.len() - pos - 1;
let zeros_needed = minfd - frac_num;
if zeros_needed > 0 {
val = format!("{}{}", val, "0".repeat(zeros_needed)).into();
}
}
// lop off any trailing '.'
let result = val.trim_end_matches('.');
if let Some(sep) = alt_separator {
Some(result.replace('.', sep))
} else {
Some(result.to_string())
}
}
_ => None,
}
}
#[derive(Serialize)]
pub struct ResourcesForJavascript {
langs: Vec<String>,
resources: Vec<&'static str>,
}
#[cfg(test)]
mod test {
use super::*;
use crate::log;
use std::path::PathBuf;
use unic_langid::langid;
#[test]
fn numbers() {
assert_eq!(want_comma_as_decimal_separator(&[langid!("en-US")]), false);
assert_eq!(want_comma_as_decimal_separator(&[langid!("pl-PL")]), true);
}
#[test]
fn i18n() {
let ftl_dir = PathBuf::from(std::env::var("TEST_SRCDIR").unwrap());
let log = log::terminal();
// English template
let i18n = I18n::new(&["zz"], &ftl_dir, log.clone());
assert_eq!(i18n.tr_("valid-key", None), "a valid key");
assert_eq!(i18n.tr_("invalid-key", None), "invalid-key");
assert_eq!(
i18n.tr_("two-args-key", Some(tr_args!["one"=>1.1, "two"=>"2"])),
"two args: 1.1 and 2"
);
assert_eq!(
i18n.tr_("plural", Some(tr_args!["hats"=>1.0])),
"You have 1 hat."
);
assert_eq!(
i18n.tr_("plural", Some(tr_args!["hats"=>1.1])),
"You have 1.1 hats."
);
assert_eq!(
i18n.tr_("plural", Some(tr_args!["hats"=>3])),
"You have 3 hats."
);
// Another language
let i18n = I18n::new(&["ja_JP"], &ftl_dir, log.clone());
assert_eq!(i18n.tr_("valid-key", None), "キー");
assert_eq!(i18n.tr_("only-in-english", None), "not translated");
assert_eq!(i18n.tr_("invalid-key", None), "invalid-key");
assert_eq!(
i18n.tr_("two-args-key", Some(tr_args!["one"=>1, "two"=>"2"])),
"1と2"
);
// Decimal separator
let i18n = I18n::new(&["pl-PL"], &ftl_dir, log.clone());
// Polish will use a comma if the string is translated
assert_eq!(
i18n.tr_("one-arg-key", Some(tr_args!["one"=>2.07])),
"fake Polish 2,07"
);
// but if it falls back on English, it will use an English separator
assert_eq!(
i18n.tr_("two-args-key", Some(tr_args!["one"=>1, "two"=>2.07])),
"two args: 1 and 2.07"
);
}
}