diff --git a/rslib/src/cloze.rs b/rslib/src/cloze.rs new file mode 100644 index 000000000..c14624c34 --- /dev/null +++ b/rslib/src/cloze.rs @@ -0,0 +1,139 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use crate::template::RenderContext; +use crate::text::strip_html; +use lazy_static::lazy_static; +use regex::Captures; +use regex::Regex; +use std::borrow::Cow; +use std::collections::HashSet; + +lazy_static! { + static ref CLOZE: Regex = Regex::new( + r#"(?xsi) + \{\{ + c(\d+):: # 1 = cloze number + (.*?) # 2 = clozed text + (?: + ::(.*?) # 3 = optional hint + )? + \}\} + "# + ) + .unwrap(); + static ref MATHJAX: Regex = Regex::new( + r#"(?xsi) + (\\[(\[]) # 1 = mathjax opening tag + (.*?) # 2 = inner content + (\\[])]) # 3 = mathjax closing tag + "# + ) + .unwrap(); +} + +mod cloze_caps { + // cloze ordinal + pub const ORD: usize = 1; + // the occluded text + pub const TEXT: usize = 2; + // optional hint + pub const HINT: usize = 3; +} + +mod mathjax_caps { + pub const OPENING_TAG: usize = 1; + pub const INNER_TEXT: usize = 2; + pub const CLOSING_TAG: usize = 3; +} + +pub fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow { + let mut cloze_ord_was_in_text = false; + + let output = CLOZE.replace_all(text, |caps: &Captures| { + let captured_ord = caps + .get(cloze_caps::ORD) + .unwrap() + .as_str() + .parse() + .unwrap_or(0); + + if captured_ord != cloze_ord { + // other cloze deletions are unchanged + return caps.get(cloze_caps::TEXT).unwrap().as_str().to_owned(); + } else { + cloze_ord_was_in_text = true; + } + + let replacement; + if question { + // hint provided? + if let Some(hint) = caps.get(cloze_caps::HINT) { + replacement = format!("[{}]", hint.as_str()); + } else { + replacement = "[...]".to_string() + } + } else { + replacement = caps.get(cloze_caps::TEXT).unwrap().as_str().to_owned(); + } + + format!("{}", replacement) + }); + + if !cloze_ord_was_in_text { + return "".into(); + } + + // if no cloze deletions are found, Anki returns an empty string + match output { + Cow::Borrowed(_) => "".into(), + other => other, + } +} + +pub fn cloze_numbers_in_string(html: &str) -> HashSet { + let mut hash = HashSet::with_capacity(4); + for cap in CLOZE.captures_iter(html) { + if let Ok(n) = cap[1].parse() { + hash.insert(n); + } + } + hash +} + +fn strip_html_inside_mathjax(text: &str) -> Cow { + MATHJAX.replace_all(text, |caps: &Captures| -> String { + format!( + "{}{}{}", + caps.get(mathjax_caps::OPENING_TAG).unwrap().as_str(), + strip_html(caps.get(mathjax_caps::INNER_TEXT).unwrap().as_str()).as_ref(), + caps.get(mathjax_caps::CLOSING_TAG).unwrap().as_str() + ) + }) +} + +pub(crate) fn cloze_filter<'a>(text: &'a str, context: &RenderContext) -> Cow<'a, str> { + strip_html_inside_mathjax( + reveal_cloze_text(text, context.card_ord + 1, context.question_side).as_ref(), + ) + .into_owned() + .into() +} + +#[cfg(test)] +mod test { + use crate::cloze::cloze_numbers_in_string; + use std::collections::HashSet; + + #[test] + fn test_cloze() { + assert_eq!( + cloze_numbers_in_string("test"), + vec![].into_iter().collect::>() + ); + assert_eq!( + cloze_numbers_in_string("{{c2::te}}{{c1::s}}t{{"), + vec![1, 2].into_iter().collect::>() + ); + } +} diff --git a/rslib/src/lib.rs b/rslib/src/lib.rs index 2d33dff60..eb2150b6b 100644 --- a/rslib/src/lib.rs +++ b/rslib/src/lib.rs @@ -4,6 +4,7 @@ mod backend_proto; pub mod backend; +pub mod cloze; pub mod err; pub mod sched; pub mod template; diff --git a/rslib/src/template_filters.rs b/rslib/src/template_filters.rs index 4519a480e..fc9e27532 100644 --- a/rslib/src/template_filters.rs +++ b/rslib/src/template_filters.rs @@ -1,6 +1,7 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html +use crate::cloze::cloze_filter; use crate::template::RenderContext; use crate::text::strip_html; use blake3::Hasher; @@ -93,110 +94,6 @@ fn apply_filter<'a>( ) } -// Cloze filter -//---------------------------------------- - -lazy_static! { - static ref CLOZE: Regex = Regex::new( - r#"(?xsi) - \{\{ - c(\d+):: # 1 = cloze number - (.*?) # 2 = clozed text - (?: - ::(.*?) # 3 = optional hint - )? - \}\} - "# - ) - .unwrap(); - static ref MATHJAX: Regex = Regex::new( - r#"(?xsi) - (\\[(\[]) # 1 = mathjax opening tag - (.*?) # 2 = inner content - (\\[])]) # 3 = mathjax closing tag - "# - ) - .unwrap(); -} - -mod cloze_caps { - // cloze ordinal - pub const ORD: usize = 1; - // the occluded text - pub const TEXT: usize = 2; - // optional hint - pub const HINT: usize = 3; -} - -mod mathjax_caps { - pub const OPENING_TAG: usize = 1; - pub const INNER_TEXT: usize = 2; - pub const CLOSING_TAG: usize = 3; -} - -fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow { - let mut cloze_ord_was_in_text = false; - - let output = CLOZE.replace_all(text, |caps: &Captures| { - let captured_ord = caps - .get(cloze_caps::ORD) - .unwrap() - .as_str() - .parse() - .unwrap_or(0); - - if captured_ord != cloze_ord { - // other cloze deletions are unchanged - return caps.get(cloze_caps::TEXT).unwrap().as_str().to_owned(); - } else { - cloze_ord_was_in_text = true; - } - - let replacement; - if question { - // hint provided? - if let Some(hint) = caps.get(cloze_caps::HINT) { - replacement = format!("[{}]", hint.as_str()); - } else { - replacement = "[...]".to_string() - } - } else { - replacement = caps.get(cloze_caps::TEXT).unwrap().as_str().to_owned(); - } - - format!("{}", replacement) - }); - - if !cloze_ord_was_in_text { - return "".into(); - } - - // if no cloze deletions are found, Anki returns an empty string - match output { - Cow::Borrowed(_) => "".into(), - other => other, - } -} - -fn strip_html_inside_mathjax(text: &str) -> Cow { - MATHJAX.replace_all(text, |caps: &Captures| -> String { - format!( - "{}{}{}", - caps.get(mathjax_caps::OPENING_TAG).unwrap().as_str(), - strip_html(caps.get(mathjax_caps::INNER_TEXT).unwrap().as_str()).as_ref(), - caps.get(mathjax_caps::CLOSING_TAG).unwrap().as_str() - ) - }) -} - -fn cloze_filter<'a>(text: &'a str, context: &RenderContext) -> Cow<'a, str> { - strip_html_inside_mathjax( - reveal_cloze_text(text, context.card_ord + 1, context.question_side).as_ref(), - ) - .into_owned() - .into() -} - // Ruby filters //---------------------------------------- diff --git a/rslib/src/text.rs b/rslib/src/text.rs index f0a97a192..1b133b49a 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -5,7 +5,6 @@ use htmlescape; use lazy_static::lazy_static; use regex::{Captures, Regex}; use std::borrow::Cow; -use std::collections::HashSet; use std::ptr; #[derive(Debug, PartialEq)] @@ -45,10 +44,6 @@ lazy_static! { (.*?) # 3 - field text \[/anki:tts\] "#).unwrap(); - - static ref CLOZED_TEXT: Regex = Regex::new( - r"(?s)\{\{c(\d+)::.+?\}\}" - ).unwrap(); } pub fn strip_html(html: &str) -> Cow { @@ -148,23 +143,11 @@ pub fn strip_html_preserving_image_filenames(html: &str) -> Cow { without_html.into_owned().into() } -pub fn cloze_numbers_in_string(html: &str) -> HashSet { - let mut hash = HashSet::with_capacity(4); - for cap in CLOZED_TEXT.captures_iter(html) { - if let Ok(n) = cap[1].parse() { - hash.insert(n); - } - } - hash -} - #[cfg(test)] mod test { use crate::text::{ - cloze_numbers_in_string, extract_av_tags, strip_av_tags, strip_html, - strip_html_preserving_image_filenames, AVTag, + extract_av_tags, strip_av_tags, strip_html, strip_html_preserving_image_filenames, AVTag, }; - use std::collections::HashSet; #[test] fn test_stripping() { @@ -183,18 +166,6 @@ mod test { assert_eq!(strip_html_preserving_image_filenames(""), ""); } - #[test] - fn test_cloze() { - assert_eq!( - cloze_numbers_in_string("test"), - vec![].into_iter().collect::>() - ); - assert_eq!( - cloze_numbers_in_string("{{c2::te}}{{c1::s}}t{{"), - vec![1, 2].into_iter().collect::>() - ); - } - #[test] fn test_audio() { let s =