// Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html use std::borrow::Cow; use lazy_static::lazy_static; use regex::{Captures, Regex}; use crate::{cloze::expand_clozes_to_reveal_latex, media::files::sha1_of_data, text::strip_html}; lazy_static! { static ref LATEX: Regex = Regex::new( r#"(?xsi) \[latex\](.+?)\[/latex\] # 1 - standard latex | \[\$\](.+?)\[/\$\] # 2 - inline math | \[\$\$\](.+?)\[/\$\$\] # 3 - math environment "# ) .unwrap(); static ref LATEX_NEWLINES: Regex = Regex::new( r#"(?xi) |
"# ) .unwrap(); } pub(crate) fn contains_latex(text: &str) -> bool { LATEX.is_match(text) } #[derive(Debug, PartialEq)] pub struct ExtractedLatex { pub fname: String, pub latex: String, } /// Expand any cloze deletions, then extract LaTeX. pub(crate) fn extract_latex_expanding_clozes( text: &str, svg: bool, ) -> (String, Vec) { let text: Cow = if text.contains("{{c") { expand_clozes_to_reveal_latex(text).into() } else { text.into() }; extract_latex(&text, svg) } /// Extract LaTeX from the provided text. /// Expects cloze deletions to already be expanded. pub(crate) fn extract_latex(text: &str, svg: bool) -> (String, Vec) { let mut extracted = vec![]; let new_text = LATEX.replace_all(text, |caps: &Captures| { let latex = match (caps.get(1), caps.get(2), caps.get(3)) { (Some(m), _, _) => m.as_str().into(), (_, Some(m), _) => format!("${}$", m.as_str()), (_, _, Some(m)) => format!(r"\begin{{displaymath}}{}\end{{displaymath}}", m.as_str()), _ => unreachable!(), }; let latex_text = strip_html_for_latex(&latex); let fname = fname_for_latex(&latex_text, svg); let img_link = image_link_for_fname(&latex_text, &fname); extracted.push(ExtractedLatex { fname, latex: latex_text.into(), }); img_link }); (new_text.into(), extracted) } fn strip_html_for_latex(html: &str) -> Cow { let mut out: Cow = html.into(); if let Cow::Owned(o) = LATEX_NEWLINES.replace_all(html, "\n") { out = o.into(); } if let Cow::Owned(o) = strip_html(out.as_ref()) { out = o.into(); } out } fn fname_for_latex(latex: &str, svg: bool) -> String { let ext = if svg { "svg" } else { "png" }; let csum = hex::encode(sha1_of_data(latex.as_bytes())); format!("latex-{}.{}", csum, ext) } fn image_link_for_fname(src: &str, fname: &str) -> String { format!( "\"{}\"", htmlescape::encode_attribute(src), fname ) } #[cfg(test)] mod test { use crate::latex::{extract_latex, ExtractedLatex}; #[test] fn latex() { let fname = "latex-ef30b3f4141c33a5bf7044b0d1961d3399c05d50.png"; assert_eq!( extract_latex("a[latex]one
and
two[/latex]b", false), ( format!( "a\"one
and
two\"b", fname ), vec![ExtractedLatex { fname: fname.into(), latex: "one\nand\ntwo".into() }] ) ); assert_eq!( extract_latex("[$]hello  world[/$]", true).1, vec![ExtractedLatex { fname: "latex-060219fbf3ddb74306abddaf4504276ad793b029.svg".to_string(), latex: "$hello world$".to_string() }] ); assert_eq!( extract_latex("[$$]math & stuff[/$$]", false).1, vec![ExtractedLatex { fname: "latex-8899f3f849ffdef6e4e9f2f34a923a1f608ebc07.png".to_string(), latex: r"\begin{displaymath}math & stuff\end{displaymath}".to_string() }] ); } }