move cloze-related code into a separate file

This commit is contained in:
Damien Elmes 2020-01-27 20:41:23 +10:00
parent 23f13a312b
commit 9ad80f4d2c
4 changed files with 142 additions and 134 deletions

139
rslib/src/cloze.rs Normal file
View File

@ -0,0 +1,139 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use crate::template::RenderContext;
use crate::text::strip_html;
use lazy_static::lazy_static;
use regex::Captures;
use regex::Regex;
use std::borrow::Cow;
use std::collections::HashSet;
lazy_static! {
static ref CLOZE: Regex = Regex::new(
r#"(?xsi)
\{\{
c(\d+):: # 1 = cloze number
(.*?) # 2 = clozed text
(?:
::(.*?) # 3 = optional hint
)?
\}\}
"#
)
.unwrap();
static ref MATHJAX: Regex = Regex::new(
r#"(?xsi)
(\\[(\[]) # 1 = mathjax opening tag
(.*?) # 2 = inner content
(\\[])]) # 3 = mathjax closing tag
"#
)
.unwrap();
}
mod cloze_caps {
// cloze ordinal
pub const ORD: usize = 1;
// the occluded text
pub const TEXT: usize = 2;
// optional hint
pub const HINT: usize = 3;
}
mod mathjax_caps {
pub const OPENING_TAG: usize = 1;
pub const INNER_TEXT: usize = 2;
pub const CLOSING_TAG: usize = 3;
}
pub fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow<str> {
let mut cloze_ord_was_in_text = false;
let output = CLOZE.replace_all(text, |caps: &Captures| {
let captured_ord = caps
.get(cloze_caps::ORD)
.unwrap()
.as_str()
.parse()
.unwrap_or(0);
if captured_ord != cloze_ord {
// other cloze deletions are unchanged
return caps.get(cloze_caps::TEXT).unwrap().as_str().to_owned();
} else {
cloze_ord_was_in_text = true;
}
let replacement;
if question {
// hint provided?
if let Some(hint) = caps.get(cloze_caps::HINT) {
replacement = format!("[{}]", hint.as_str());
} else {
replacement = "[...]".to_string()
}
} else {
replacement = caps.get(cloze_caps::TEXT).unwrap().as_str().to_owned();
}
format!("<span class=cloze>{}</span>", replacement)
});
if !cloze_ord_was_in_text {
return "".into();
}
// if no cloze deletions are found, Anki returns an empty string
match output {
Cow::Borrowed(_) => "".into(),
other => other,
}
}
pub fn cloze_numbers_in_string(html: &str) -> HashSet<u16> {
let mut hash = HashSet::with_capacity(4);
for cap in CLOZE.captures_iter(html) {
if let Ok(n) = cap[1].parse() {
hash.insert(n);
}
}
hash
}
fn strip_html_inside_mathjax(text: &str) -> Cow<str> {
MATHJAX.replace_all(text, |caps: &Captures| -> String {
format!(
"{}{}{}",
caps.get(mathjax_caps::OPENING_TAG).unwrap().as_str(),
strip_html(caps.get(mathjax_caps::INNER_TEXT).unwrap().as_str()).as_ref(),
caps.get(mathjax_caps::CLOSING_TAG).unwrap().as_str()
)
})
}
pub(crate) fn cloze_filter<'a>(text: &'a str, context: &RenderContext) -> Cow<'a, str> {
strip_html_inside_mathjax(
reveal_cloze_text(text, context.card_ord + 1, context.question_side).as_ref(),
)
.into_owned()
.into()
}
#[cfg(test)]
mod test {
use crate::cloze::cloze_numbers_in_string;
use std::collections::HashSet;
#[test]
fn test_cloze() {
assert_eq!(
cloze_numbers_in_string("test"),
vec![].into_iter().collect::<HashSet<u16>>()
);
assert_eq!(
cloze_numbers_in_string("{{c2::te}}{{c1::s}}t{{"),
vec![1, 2].into_iter().collect::<HashSet<u16>>()
);
}
}

View File

@ -4,6 +4,7 @@
mod backend_proto;
pub mod backend;
pub mod cloze;
pub mod err;
pub mod sched;
pub mod template;

View File

@ -1,6 +1,7 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use crate::cloze::cloze_filter;
use crate::template::RenderContext;
use crate::text::strip_html;
use blake3::Hasher;
@ -93,110 +94,6 @@ fn apply_filter<'a>(
)
}
// Cloze filter
//----------------------------------------
lazy_static! {
static ref CLOZE: Regex = Regex::new(
r#"(?xsi)
\{\{
c(\d+):: # 1 = cloze number
(.*?) # 2 = clozed text
(?:
::(.*?) # 3 = optional hint
)?
\}\}
"#
)
.unwrap();
static ref MATHJAX: Regex = Regex::new(
r#"(?xsi)
(\\[(\[]) # 1 = mathjax opening tag
(.*?) # 2 = inner content
(\\[])]) # 3 = mathjax closing tag
"#
)
.unwrap();
}
mod cloze_caps {
// cloze ordinal
pub const ORD: usize = 1;
// the occluded text
pub const TEXT: usize = 2;
// optional hint
pub const HINT: usize = 3;
}
mod mathjax_caps {
pub const OPENING_TAG: usize = 1;
pub const INNER_TEXT: usize = 2;
pub const CLOSING_TAG: usize = 3;
}
fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow<str> {
let mut cloze_ord_was_in_text = false;
let output = CLOZE.replace_all(text, |caps: &Captures| {
let captured_ord = caps
.get(cloze_caps::ORD)
.unwrap()
.as_str()
.parse()
.unwrap_or(0);
if captured_ord != cloze_ord {
// other cloze deletions are unchanged
return caps.get(cloze_caps::TEXT).unwrap().as_str().to_owned();
} else {
cloze_ord_was_in_text = true;
}
let replacement;
if question {
// hint provided?
if let Some(hint) = caps.get(cloze_caps::HINT) {
replacement = format!("[{}]", hint.as_str());
} else {
replacement = "[...]".to_string()
}
} else {
replacement = caps.get(cloze_caps::TEXT).unwrap().as_str().to_owned();
}
format!("<span class=cloze>{}</span>", replacement)
});
if !cloze_ord_was_in_text {
return "".into();
}
// if no cloze deletions are found, Anki returns an empty string
match output {
Cow::Borrowed(_) => "".into(),
other => other,
}
}
fn strip_html_inside_mathjax(text: &str) -> Cow<str> {
MATHJAX.replace_all(text, |caps: &Captures| -> String {
format!(
"{}{}{}",
caps.get(mathjax_caps::OPENING_TAG).unwrap().as_str(),
strip_html(caps.get(mathjax_caps::INNER_TEXT).unwrap().as_str()).as_ref(),
caps.get(mathjax_caps::CLOSING_TAG).unwrap().as_str()
)
})
}
fn cloze_filter<'a>(text: &'a str, context: &RenderContext) -> Cow<'a, str> {
strip_html_inside_mathjax(
reveal_cloze_text(text, context.card_ord + 1, context.question_side).as_ref(),
)
.into_owned()
.into()
}
// Ruby filters
//----------------------------------------

View File

@ -5,7 +5,6 @@ use htmlescape;
use lazy_static::lazy_static;
use regex::{Captures, Regex};
use std::borrow::Cow;
use std::collections::HashSet;
use std::ptr;
#[derive(Debug, PartialEq)]
@ -45,10 +44,6 @@ lazy_static! {
(.*?) # 3 - field text
\[/anki:tts\]
"#).unwrap();
static ref CLOZED_TEXT: Regex = Regex::new(
r"(?s)\{\{c(\d+)::.+?\}\}"
).unwrap();
}
pub fn strip_html(html: &str) -> Cow<str> {
@ -148,23 +143,11 @@ pub fn strip_html_preserving_image_filenames(html: &str) -> Cow<str> {
without_html.into_owned().into()
}
pub fn cloze_numbers_in_string(html: &str) -> HashSet<u16> {
let mut hash = HashSet::with_capacity(4);
for cap in CLOZED_TEXT.captures_iter(html) {
if let Ok(n) = cap[1].parse() {
hash.insert(n);
}
}
hash
}
#[cfg(test)]
mod test {
use crate::text::{
cloze_numbers_in_string, extract_av_tags, strip_av_tags, strip_html,
strip_html_preserving_image_filenames, AVTag,
extract_av_tags, strip_av_tags, strip_html, strip_html_preserving_image_filenames, AVTag,
};
use std::collections::HashSet;
#[test]
fn test_stripping() {
@ -183,18 +166,6 @@ mod test {
assert_eq!(strip_html_preserving_image_filenames("<html>"), "");
}
#[test]
fn test_cloze() {
assert_eq!(
cloze_numbers_in_string("test"),
vec![].into_iter().collect::<HashSet<u16>>()
);
assert_eq!(
cloze_numbers_in_string("{{c2::te}}{{c1::s}}t{{"),
vec![1, 2].into_iter().collect::<HashSet<u16>>()
);
}
#[test]
fn test_audio() {
let s =