// Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html use std::borrow::Cow; use difflib::sequencematcher::Opcode; use difflib::sequencematcher::SequenceMatcher; use itertools::Itertools; use lazy_static::lazy_static; use regex::Regex; use unic_ucd_category::GeneralCategory; use crate::card_rendering::strip_av_tags; use crate::text::normalize_to_nfc; use crate::text::strip_html; lazy_static! { static ref LINEBREAKS: Regex = Regex::new( r#"(?six) ( \n | | )+ "# ) .unwrap(); } struct DiffContext { expected: Vec, provided: Vec, } impl DiffContext { fn new(expected: &str, provided: &str) -> Self { DiffContext { provided: prepare_provided(provided).chars().collect_vec(), expected: prepare_expected(expected).chars().collect_vec(), } } fn slice_expected(&self, opcode: &Opcode) -> String { self.expected[opcode.second_start..opcode.second_end] .iter() .cloned() .collect() } fn slice_provided(&self, opcode: &Opcode) -> String { self.provided[opcode.first_start..opcode.first_end] .iter() .cloned() .collect() } fn to_tokens(&self) -> DiffOutput { let mut matcher = SequenceMatcher::new(&self.provided, &self.expected); let opcodes = matcher.get_opcodes(); let mut provided = vec![]; let mut expected = vec![]; for opcode in opcodes { match opcode.tag.as_str() { "equal" => { provided.push(DiffToken::good(self.slice_provided(&opcode))); expected.push(DiffToken::good(self.slice_expected(&opcode))); } "delete" => { provided.push(DiffToken::bad(self.slice_provided(&opcode))); } "insert" => { provided.push(DiffToken::missing(self.slice_expected(&opcode))); expected.push(DiffToken::missing(self.slice_expected(&opcode))); } "replace" => { provided.push(DiffToken::bad(self.slice_provided(&opcode))); expected.push(DiffToken::missing(self.slice_expected(&opcode))); } _ => unreachable!(), } } DiffOutput { provided, expected } } fn to_html(&self) -> String { let output = self.to_tokens(); let provided = render_tokens(&output.provided); let expected = render_tokens(&output.expected); format!( "{}", if self.provided.is_empty() { self.expected.iter().collect() } else if no_mistakes(&output.expected) { provided } else { format!("{provided}

{expected}") } ) } } fn no_mistakes(tokens: &[DiffToken]) -> bool { tokens.iter().all(|v| v.kind == DiffTokenKind::Good) } fn prepare_expected(expected: &str) -> String { let without_av = strip_av_tags(expected); let without_newlines = LINEBREAKS.replace_all(&without_av, " "); let without_html = strip_html(&without_newlines); let without_outer_whitespace = without_html.trim(); normalize_to_nfc(without_outer_whitespace).into() } fn prepare_provided(provided: &str) -> String { normalize_to_nfc(provided).into() } #[derive(Debug, PartialEq, Eq)] enum DiffTokenKind { Good, Bad, Missing, } #[derive(Debug, PartialEq, Eq)] struct DiffToken { kind: DiffTokenKind, text: String, } impl DiffToken { fn bad(text: String) -> Self { Self { kind: DiffTokenKind::Bad, text, } } fn good(text: String) -> Self { Self { kind: DiffTokenKind::Good, text, } } fn missing(text: String) -> Self { Self { kind: DiffTokenKind::Missing, text, } } } #[derive(Debug, PartialEq, Eq)] struct DiffOutput { provided: Vec, expected: Vec, } pub fn compare_answer(expected: &str, provided: &str) -> String { DiffContext::new(expected, provided).to_html() } fn render_tokens(tokens: &[DiffToken]) -> String { let text_tokens: Vec<_> = tokens .iter() .map(|token| { let text = with_isolated_leading_mark(&token.text); let encoded = htmlescape::encode_minimal(&text); let class = match token.kind { DiffTokenKind::Good => "typeGood", DiffTokenKind::Bad => "typeBad", DiffTokenKind::Missing => "typeMissed", }; format!("{encoded}") }) .collect(); text_tokens.join("") } /// If text begins with a mark character, prefix it with a non-breaking /// space to prevent the mark from joining to the previous token. fn with_isolated_leading_mark(text: &str) -> Cow { if let Some(ch) = text.chars().next() { if GeneralCategory::of(ch).is_mark() { return format!("\u{a0}{text}").into(); } } text.into() } #[cfg(test)] mod test { use super::*; macro_rules! token_factory { ($name:ident) => { fn $name(text: &str) -> DiffToken { DiffToken::$name(String::from(text)) } }; } token_factory!(bad); token_factory!(good); token_factory!(missing); #[test] fn tokens() { let ctx = DiffContext::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer"); let output = ctx.to_tokens(); assert_eq!( output.provided, vec![ bad("y"), good(" ahora q"), bad("e"), good(" vamos"), missing(" "), good("a hacer"), missing("?"), ] ); assert_eq!( output.expected, vec![ missing("¿Y"), good(" ahora q"), missing("ué"), good(" vamos"), missing(" "), good("a hacer"), missing("?"), ] ); } #[test] fn html_and_media() { let ctx = DiffContext::new("[sound:foo.mp3]1  2", "1 2"); // the spacing is handled by wrapping html output in white-space: pre-wrap assert_eq!(ctx.to_tokens().expected, &[good("1 2")]); } #[test] fn missed_chars_only_shown_in_provided_when_after_good() { let ctx = DiffContext::new("1", "23"); assert_eq!(ctx.to_tokens().provided, &[bad("23")]); let ctx = DiffContext::new("12", "1"); assert_eq!(ctx.to_tokens().provided, &[good("1"), missing("2"),]); } #[test] fn handles_certain_unicode_as_expected() { // this was not parsed as expected with dissimilar 1.0.4 let ctx = DiffContext::new("쓰다듬다", "스다뜸다"); assert_eq!( ctx.to_tokens().provided, &[bad("스"), good("다"), bad("뜸"), good("다"),] ); } #[test] fn does_not_panic_with_certain_unicode() { // this was causing a panic with dissimilar 1.0.4 let ctx = DiffContext::new( "Сущность должна быть ответственна только за одно дело", concat!( "Single responsibility Сущность выполняет только одну задачу.", "Повод для изменения сущности только один." ), ); ctx.to_tokens(); } #[test] fn whitespace_is_trimmed() { assert_eq!(prepare_expected("
foo
"), "foo"); } }