14940a617b
* Fix IO groups breaking upon editing * Emit change signal after group/ungroup
611 lines
19 KiB
Rust
611 lines
19 KiB
Rust
// Copyright: Ankitects Pty Ltd and contributors
|
|
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
|
|
|
use std::borrow::Cow;
|
|
use std::collections::HashMap;
|
|
use std::collections::HashSet;
|
|
use std::fmt::Write;
|
|
|
|
use anki_proto::image_occlusion::get_image_occlusion_note_response::ImageOcclusion;
|
|
use anki_proto::image_occlusion::get_image_occlusion_note_response::ImageOcclusionShape;
|
|
use htmlescape::encode_attribute;
|
|
use lazy_static::lazy_static;
|
|
use nom::branch::alt;
|
|
use nom::bytes::complete::tag;
|
|
use nom::bytes::complete::take_while;
|
|
use nom::combinator::map;
|
|
use nom::IResult;
|
|
use regex::Captures;
|
|
use regex::Regex;
|
|
|
|
use crate::image_occlusion::imageocclusion::get_image_cloze_data;
|
|
use crate::image_occlusion::imageocclusion::parse_image_cloze;
|
|
use crate::latex::contains_latex;
|
|
use crate::template::RenderContext;
|
|
use crate::text::strip_html_preserving_entities;
|
|
|
|
lazy_static! {
|
|
static ref MATHJAX: Regex = Regex::new(
|
|
r"(?xsi)
|
|
(\\[(\[]) # 1 = mathjax opening tag
|
|
(.*?) # 2 = inner content
|
|
(\\[])]) # 3 = mathjax closing tag
|
|
"
|
|
)
|
|
.unwrap();
|
|
}
|
|
|
|
mod mathjax_caps {
|
|
pub const OPENING_TAG: usize = 1;
|
|
pub const INNER_TEXT: usize = 2;
|
|
pub const CLOSING_TAG: usize = 3;
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
enum Token<'a> {
|
|
OpenCloze(u16),
|
|
Text(&'a str),
|
|
CloseCloze,
|
|
}
|
|
|
|
/// Tokenize string
|
|
fn tokenize(mut text: &str) -> impl Iterator<Item = Token> {
|
|
fn open_cloze(text: &str) -> IResult<&str, Token> {
|
|
// opening brackets and 'c'
|
|
let (text, _opening_brackets_and_c) = tag("{{c")(text)?;
|
|
// following number
|
|
let (text, digits) = take_while(|c: char| c.is_ascii_digit())(text)?;
|
|
let digits: u16 = match digits.parse() {
|
|
Ok(digits) => digits,
|
|
Err(_) => {
|
|
// not a valid number; fail to recognize
|
|
return Err(nom::Err::Error(nom::error::make_error(
|
|
text,
|
|
nom::error::ErrorKind::Digit,
|
|
)));
|
|
}
|
|
};
|
|
// ::
|
|
let (text, _colons) = tag("::")(text)?;
|
|
Ok((text, Token::OpenCloze(digits)))
|
|
}
|
|
|
|
fn close_cloze(text: &str) -> IResult<&str, Token> {
|
|
map(tag("}}"), |_| Token::CloseCloze)(text)
|
|
}
|
|
|
|
/// Match a run of text until an open/close marker is encountered.
|
|
fn normal_text(text: &str) -> IResult<&str, Token> {
|
|
if text.is_empty() {
|
|
return Err(nom::Err::Error(nom::error::make_error(
|
|
text,
|
|
nom::error::ErrorKind::Eof,
|
|
)));
|
|
}
|
|
let mut other_token = alt((open_cloze, close_cloze));
|
|
// start with the no-match case
|
|
let mut index = text.len();
|
|
for (idx, _) in text.char_indices() {
|
|
if other_token(&text[idx..]).is_ok() {
|
|
index = idx;
|
|
break;
|
|
}
|
|
}
|
|
Ok((&text[index..], Token::Text(&text[0..index])))
|
|
}
|
|
|
|
std::iter::from_fn(move || {
|
|
if text.is_empty() {
|
|
None
|
|
} else {
|
|
let (remaining_text, token) =
|
|
alt((open_cloze, close_cloze, normal_text))(text).unwrap();
|
|
text = remaining_text;
|
|
Some(token)
|
|
}
|
|
})
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
enum TextOrCloze<'a> {
|
|
Text(&'a str),
|
|
Cloze(ExtractedCloze<'a>),
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct ExtractedCloze<'a> {
|
|
ordinal: u16,
|
|
nodes: Vec<TextOrCloze<'a>>,
|
|
hint: Option<&'a str>,
|
|
}
|
|
|
|
impl ExtractedCloze<'_> {
|
|
/// Return the cloze's hint, or "..." if none was provided.
|
|
fn hint(&self) -> &str {
|
|
self.hint.unwrap_or("...")
|
|
}
|
|
|
|
fn clozed_text(&self) -> Cow<str> {
|
|
// happy efficient path?
|
|
if self.nodes.len() == 1 {
|
|
if let TextOrCloze::Text(text) = self.nodes.last().unwrap() {
|
|
return (*text).into();
|
|
}
|
|
}
|
|
|
|
let mut buf = String::new();
|
|
for node in &self.nodes {
|
|
match node {
|
|
TextOrCloze::Text(text) => buf.push_str(text),
|
|
TextOrCloze::Cloze(cloze) => buf.push_str(&cloze.clozed_text()),
|
|
}
|
|
}
|
|
|
|
buf.into()
|
|
}
|
|
|
|
/// If cloze starts with image-occlusion:, return the text following that.
|
|
fn image_occlusion(&self) -> Option<&str> {
|
|
let Some(first_node) = self.nodes.get(0) else {
|
|
return None;
|
|
};
|
|
let TextOrCloze::Text(text) = first_node else {
|
|
return None;
|
|
};
|
|
text.strip_prefix("image-occlusion:")
|
|
}
|
|
}
|
|
|
|
fn parse_text_with_clozes(text: &str) -> Vec<TextOrCloze<'_>> {
|
|
let mut open_clozes: Vec<ExtractedCloze> = vec![];
|
|
let mut output = vec![];
|
|
for token in tokenize(text) {
|
|
match token {
|
|
Token::OpenCloze(ordinal) => open_clozes.push(ExtractedCloze {
|
|
ordinal,
|
|
nodes: Vec::with_capacity(1), // common case
|
|
hint: None,
|
|
}),
|
|
Token::Text(mut text) => {
|
|
if let Some(cloze) = open_clozes.last_mut() {
|
|
// extract hint if found
|
|
if let Some((head, tail)) = text.split_once("::") {
|
|
text = head;
|
|
cloze.hint = Some(tail);
|
|
}
|
|
cloze.nodes.push(TextOrCloze::Text(text));
|
|
} else {
|
|
output.push(TextOrCloze::Text(text));
|
|
}
|
|
}
|
|
Token::CloseCloze => {
|
|
// take the currently active cloze
|
|
if let Some(cloze) = open_clozes.pop() {
|
|
let target = if let Some(outer_cloze) = open_clozes.last_mut() {
|
|
// and place it into the cloze layer above
|
|
&mut outer_cloze.nodes
|
|
} else {
|
|
// or the top level if no other clozes active
|
|
&mut output
|
|
};
|
|
target.push(TextOrCloze::Cloze(cloze));
|
|
} else {
|
|
// closing marker outside of any clozes
|
|
output.push(TextOrCloze::Text("}}"))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
output
|
|
}
|
|
|
|
fn reveal_cloze_text_in_nodes(
|
|
node: &TextOrCloze,
|
|
cloze_ord: u16,
|
|
question: bool,
|
|
output: &mut Vec<String>,
|
|
) {
|
|
if let TextOrCloze::Cloze(cloze) = node {
|
|
if cloze.ordinal == cloze_ord {
|
|
if question {
|
|
output.push(cloze.hint().into())
|
|
} else {
|
|
output.push(cloze.clozed_text().into())
|
|
}
|
|
}
|
|
for node in &cloze.nodes {
|
|
reveal_cloze_text_in_nodes(node, cloze_ord, question, output);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn reveal_cloze(
|
|
cloze: &ExtractedCloze,
|
|
cloze_ord: u16,
|
|
question: bool,
|
|
active_cloze_found_in_text: &mut bool,
|
|
buf: &mut String,
|
|
) {
|
|
let active = cloze.ordinal == cloze_ord;
|
|
*active_cloze_found_in_text |= active;
|
|
if let Some(image_occlusion_text) = cloze.image_occlusion() {
|
|
buf.push_str(&render_image_occlusion(
|
|
image_occlusion_text,
|
|
question,
|
|
active,
|
|
cloze.ordinal,
|
|
));
|
|
return;
|
|
}
|
|
match (question, active) {
|
|
(true, true) => {
|
|
// question side with active cloze; all inner content is elided
|
|
let mut content_buf = String::new();
|
|
for node in &cloze.nodes {
|
|
match node {
|
|
TextOrCloze::Text(text) => content_buf.push_str(text),
|
|
TextOrCloze::Cloze(cloze) => reveal_cloze(
|
|
cloze,
|
|
cloze_ord,
|
|
question,
|
|
active_cloze_found_in_text,
|
|
&mut content_buf,
|
|
),
|
|
}
|
|
}
|
|
write!(
|
|
buf,
|
|
r#"<span class="cloze" data-cloze="{}" data-ordinal="{}">[{}]</span>"#,
|
|
encode_attribute(&content_buf),
|
|
cloze.ordinal,
|
|
cloze.hint()
|
|
)
|
|
.unwrap();
|
|
}
|
|
(false, true) => {
|
|
write!(
|
|
buf,
|
|
r#"<span class="cloze" data-ordinal="{}">"#,
|
|
cloze.ordinal
|
|
)
|
|
.unwrap();
|
|
for node in &cloze.nodes {
|
|
match node {
|
|
TextOrCloze::Text(text) => buf.push_str(text),
|
|
TextOrCloze::Cloze(cloze) => {
|
|
reveal_cloze(cloze, cloze_ord, question, active_cloze_found_in_text, buf)
|
|
}
|
|
}
|
|
}
|
|
buf.push_str("</span>");
|
|
}
|
|
(_, false) => {
|
|
// question or answer side inactive cloze; text shown, children may be active
|
|
write!(
|
|
buf,
|
|
r#"<span class="cloze-inactive" data-ordinal="{}">"#,
|
|
cloze.ordinal
|
|
)
|
|
.unwrap();
|
|
for node in &cloze.nodes {
|
|
match node {
|
|
TextOrCloze::Text(text) => buf.push_str(text),
|
|
TextOrCloze::Cloze(cloze) => {
|
|
reveal_cloze(cloze, cloze_ord, question, active_cloze_found_in_text, buf)
|
|
}
|
|
}
|
|
}
|
|
buf.push_str("</span>")
|
|
}
|
|
}
|
|
}
|
|
|
|
fn render_image_occlusion(text: &str, question_side: bool, active: bool, ordinal: u16) -> String {
|
|
if (question_side && active) || ordinal == 0 {
|
|
format!(
|
|
r#"<div class="cloze" data-ordinal="{}" {}></div>"#,
|
|
ordinal,
|
|
&get_image_cloze_data(text)
|
|
)
|
|
} else if !active {
|
|
format!(
|
|
r#"<div class="cloze-inactive" data-ordinal="{}" {}></div>"#,
|
|
ordinal,
|
|
&get_image_cloze_data(text)
|
|
)
|
|
} else {
|
|
"".into()
|
|
}
|
|
}
|
|
|
|
pub fn parse_image_occlusions(text: &str) -> Vec<ImageOcclusion> {
|
|
let mut occlusions: HashMap<u16, Vec<ImageOcclusionShape>> = HashMap::new();
|
|
for node in parse_text_with_clozes(text) {
|
|
if let TextOrCloze::Cloze(cloze) = node {
|
|
if cloze.image_occlusion().is_some() {
|
|
if let Some(shape) = parse_image_cloze(cloze.image_occlusion().unwrap()) {
|
|
occlusions.entry(cloze.ordinal).or_default().push(shape);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
occlusions
|
|
.values()
|
|
.map(|v| ImageOcclusion { shapes: v.to_vec() })
|
|
.collect()
|
|
}
|
|
|
|
pub fn reveal_cloze_text(text: &str, cloze_ord: u16, question: bool) -> Cow<str> {
|
|
let mut buf = String::new();
|
|
let mut active_cloze_found_in_text = false;
|
|
for node in &parse_text_with_clozes(text) {
|
|
match node {
|
|
// top-level text is indiscriminately added
|
|
TextOrCloze::Text(text) => buf.push_str(text),
|
|
TextOrCloze::Cloze(cloze) => reveal_cloze(
|
|
cloze,
|
|
cloze_ord,
|
|
question,
|
|
&mut active_cloze_found_in_text,
|
|
&mut buf,
|
|
),
|
|
}
|
|
}
|
|
if active_cloze_found_in_text {
|
|
buf.into()
|
|
} else {
|
|
Cow::from("")
|
|
}
|
|
}
|
|
|
|
pub fn reveal_cloze_text_only(text: &str, cloze_ord: u16, question: bool) -> Cow<str> {
|
|
let mut output = Vec::new();
|
|
for node in &parse_text_with_clozes(text) {
|
|
reveal_cloze_text_in_nodes(node, cloze_ord, question, &mut output);
|
|
}
|
|
output.join(", ").into()
|
|
}
|
|
|
|
pub fn extract_cloze_for_typing(text: &str, cloze_ord: u16) -> Cow<str> {
|
|
let mut output = Vec::new();
|
|
for node in &parse_text_with_clozes(text) {
|
|
reveal_cloze_text_in_nodes(node, cloze_ord, false, &mut output);
|
|
}
|
|
if output.is_empty() {
|
|
"".into()
|
|
} else if output.iter().min() == output.iter().max() {
|
|
// If all matches are identical text, they get collapsed into a single entry
|
|
output.pop().unwrap().into()
|
|
} else {
|
|
output.join(", ").into()
|
|
}
|
|
}
|
|
|
|
/// If text contains any LaTeX tags, render the front and back
|
|
/// of each cloze deletion so that LaTeX can be generated. If
|
|
/// no LaTeX is found, returns an empty string.
|
|
pub fn expand_clozes_to_reveal_latex(text: &str) -> String {
|
|
if !contains_latex(text) {
|
|
return "".into();
|
|
}
|
|
let ords = cloze_numbers_in_string(text);
|
|
let mut buf = String::new();
|
|
for ord in ords {
|
|
buf += reveal_cloze_text(text, ord, true).as_ref();
|
|
buf += reveal_cloze_text(text, ord, false).as_ref();
|
|
}
|
|
|
|
buf
|
|
}
|
|
|
|
pub(crate) fn contains_cloze(text: &str) -> bool {
|
|
parse_text_with_clozes(text)
|
|
.iter()
|
|
.any(|node| matches!(node, TextOrCloze::Cloze(e) if e.ordinal != 0))
|
|
}
|
|
|
|
pub fn cloze_numbers_in_string(html: &str) -> HashSet<u16> {
|
|
let mut set = HashSet::with_capacity(4);
|
|
add_cloze_numbers_in_string(html, &mut set);
|
|
set
|
|
}
|
|
|
|
fn add_cloze_numbers_in_text_with_clozes(nodes: &[TextOrCloze], set: &mut HashSet<u16>) {
|
|
for node in nodes {
|
|
if let TextOrCloze::Cloze(cloze) = node {
|
|
if !(cloze.image_occlusion().is_some() && cloze.ordinal == 0) {
|
|
set.insert(cloze.ordinal);
|
|
add_cloze_numbers_in_text_with_clozes(&cloze.nodes, set);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::implicit_hasher)]
|
|
pub fn add_cloze_numbers_in_string(field: &str, set: &mut HashSet<u16>) {
|
|
add_cloze_numbers_in_text_with_clozes(&parse_text_with_clozes(field), set)
|
|
}
|
|
|
|
fn strip_html_inside_mathjax(text: &str) -> Cow<str> {
|
|
MATHJAX.replace_all(text, |caps: &Captures| -> String {
|
|
format!(
|
|
"{}{}{}",
|
|
caps.get(mathjax_caps::OPENING_TAG).unwrap().as_str(),
|
|
strip_html_preserving_entities(caps.get(mathjax_caps::INNER_TEXT).unwrap().as_str())
|
|
.as_ref(),
|
|
caps.get(mathjax_caps::CLOSING_TAG).unwrap().as_str()
|
|
)
|
|
})
|
|
}
|
|
|
|
pub(crate) fn cloze_filter<'a>(text: &'a str, context: &RenderContext) -> Cow<'a, str> {
|
|
strip_html_inside_mathjax(
|
|
reveal_cloze_text(text, context.card_ord + 1, context.frontside.is_none()).as_ref(),
|
|
)
|
|
.into_owned()
|
|
.into()
|
|
}
|
|
|
|
pub(crate) fn cloze_only_filter<'a>(text: &'a str, context: &RenderContext) -> Cow<'a, str> {
|
|
reveal_cloze_text_only(text, context.card_ord + 1, context.frontside.is_none())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use std::collections::HashSet;
|
|
|
|
use super::*;
|
|
use crate::text::strip_html;
|
|
|
|
#[test]
|
|
fn cloze() {
|
|
assert_eq!(
|
|
cloze_numbers_in_string("test"),
|
|
vec![].into_iter().collect::<HashSet<u16>>()
|
|
);
|
|
assert_eq!(
|
|
cloze_numbers_in_string("{{c2::te}}{{c1::s}}t{{"),
|
|
vec![1, 2].into_iter().collect::<HashSet<u16>>()
|
|
);
|
|
|
|
assert_eq!(
|
|
expand_clozes_to_reveal_latex("{{c1::foo}} {{c2::bar::baz}}"),
|
|
"".to_string()
|
|
);
|
|
|
|
let expanded = expand_clozes_to_reveal_latex("[latex]{{c1::foo}} {{c2::bar::baz}}[/latex]");
|
|
let expanded = strip_html(expanded.as_ref());
|
|
assert!(expanded.contains("foo [baz]"));
|
|
assert!(expanded.contains("[...] bar"));
|
|
assert!(expanded.contains("foo bar"));
|
|
}
|
|
|
|
#[test]
|
|
fn cloze_only() {
|
|
assert_eq!(reveal_cloze_text_only("foo", 1, true), "");
|
|
assert_eq!(reveal_cloze_text_only("foo {{c1::bar}}", 1, true), "...");
|
|
assert_eq!(
|
|
reveal_cloze_text_only("foo {{c1::bar::baz}}", 1, true),
|
|
"baz"
|
|
);
|
|
assert_eq!(reveal_cloze_text_only("foo {{c1::bar}}", 1, false), "bar");
|
|
assert_eq!(reveal_cloze_text_only("foo {{c1::bar}}", 2, false), "");
|
|
assert_eq!(
|
|
reveal_cloze_text_only("{{c1::foo}} {{c1::bar}}", 1, false),
|
|
"foo, bar"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn clozes_for_typing() {
|
|
assert_eq!(extract_cloze_for_typing("{{c2::foo}}", 1), "");
|
|
assert_eq!(
|
|
extract_cloze_for_typing("{{c1::foo}} {{c1::bar}} {{c1::foo}}", 1),
|
|
"foo, bar, foo"
|
|
);
|
|
assert_eq!(
|
|
extract_cloze_for_typing("{{c1::foo}} {{c1::foo}} {{c1::foo}}", 1),
|
|
"foo"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn nested_cloze_plain_text() {
|
|
assert_eq!(
|
|
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, true).as_ref()),
|
|
"foo [...]"
|
|
);
|
|
assert_eq!(
|
|
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, false).as_ref()),
|
|
"foo bar baz"
|
|
);
|
|
assert_eq!(
|
|
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, true).as_ref()),
|
|
"foo bar [...]"
|
|
);
|
|
assert_eq!(
|
|
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, false).as_ref()),
|
|
"foo bar baz"
|
|
);
|
|
assert_eq!(
|
|
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, true).as_ref()),
|
|
"foo [qux]"
|
|
);
|
|
assert_eq!(
|
|
strip_html(reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, false).as_ref()),
|
|
"foo bar baz"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn nested_cloze_html() {
|
|
assert_eq!(
|
|
cloze_numbers_in_string("{{c2::te{{c1::s}}}}t{{"),
|
|
vec![1, 2].into_iter().collect::<HashSet<u16>>()
|
|
);
|
|
assert_eq!(
|
|
reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, true),
|
|
format!(
|
|
r#"foo <span class="cloze" data-cloze="{}" data-ordinal="1">[...]</span>"#,
|
|
htmlescape::encode_attribute(
|
|
r#"bar <span class="cloze-inactive" data-ordinal="2">baz</span>"#
|
|
)
|
|
)
|
|
);
|
|
assert_eq!(
|
|
reveal_cloze_text("foo {{c1::bar {{c2::baz}}}}", 1, false),
|
|
r#"foo <span class="cloze" data-ordinal="1">bar <span class="cloze-inactive" data-ordinal="2">baz</span></span>"#
|
|
);
|
|
assert_eq!(
|
|
reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, true),
|
|
r#"foo <span class="cloze-inactive" data-ordinal="1">bar <span class="cloze" data-cloze="baz" data-ordinal="2">[...]</span></span>"#
|
|
);
|
|
assert_eq!(
|
|
reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 2, false),
|
|
r#"foo <span class="cloze-inactive" data-ordinal="1">bar <span class="cloze" data-ordinal="2">baz</span></span>"#
|
|
);
|
|
assert_eq!(
|
|
reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, true),
|
|
format!(
|
|
r#"foo <span class="cloze" data-cloze="{}" data-ordinal="1">[qux]</span>"#,
|
|
htmlescape::encode_attribute(
|
|
r#"bar <span class="cloze-inactive" data-ordinal="2">baz</span>"#
|
|
)
|
|
)
|
|
);
|
|
assert_eq!(
|
|
reveal_cloze_text("foo {{c1::bar {{c2::baz}}::qux}}", 1, false),
|
|
r#"foo <span class="cloze" data-ordinal="1">bar <span class="cloze-inactive" data-ordinal="2">baz</span></span>"#
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn mathjax_html() {
|
|
// escaped angle brackets should be preserved
|
|
assert_eq!(
|
|
strip_html_inside_mathjax(r"\(<foo><></foo>\)"),
|
|
r"\(<>\)"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn non_latin() {
|
|
assert!(cloze_numbers_in_string("öaöaöööaö").is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn image_cloze() {
|
|
assert_eq!(
|
|
reveal_cloze_text(
|
|
"{{c1::image-occlusion:rect:left=10.0:top=20:width=30:height=10}}",
|
|
1,
|
|
true
|
|
),
|
|
format!(
|
|
r#"<div class="cloze" data-ordinal="1" data-shape="rect" data-left="10.0" data-top="20" data-width="30" data-height="10" ></div>"#,
|
|
)
|
|
);
|
|
}
|
|
}
|