2022-07-22 11:20:04 +02:00
|
|
|
|
// Copyright: Ankitects Pty Ltd and contributors
|
|
|
|
|
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
|
|
|
|
|
|
|
|
|
use std::borrow::Cow;
|
|
|
|
|
|
2023-01-18 12:39:55 +01:00
|
|
|
|
use difflib::sequencematcher::Opcode;
|
|
|
|
|
use difflib::sequencematcher::SequenceMatcher;
|
2023-01-16 00:49:34 +01:00
|
|
|
|
use itertools::Itertools;
|
2022-07-22 11:20:04 +02:00
|
|
|
|
use lazy_static::lazy_static;
|
|
|
|
|
use regex::Regex;
|
|
|
|
|
use unic_ucd_category::GeneralCategory;
|
|
|
|
|
|
2023-01-18 12:39:55 +01:00
|
|
|
|
use crate::card_rendering::strip_av_tags;
|
|
|
|
|
use crate::text::normalize_to_nfc;
|
|
|
|
|
use crate::text::strip_html;
|
2022-07-22 11:20:04 +02:00
|
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
|
static ref LINEBREAKS: Regex = Regex::new(
|
|
|
|
|
r#"(?six)
|
|
|
|
|
(
|
|
|
|
|
\n
|
|
|
|
|
|
|
|
|
|
|
<br\s?/?>
|
|
|
|
|
|
|
|
|
|
|
</?div>
|
|
|
|
|
)+
|
|
|
|
|
"#
|
|
|
|
|
)
|
|
|
|
|
.unwrap();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct DiffContext {
|
2023-01-16 00:49:34 +01:00
|
|
|
|
expected: Vec<char>,
|
|
|
|
|
provided: Vec<char>,
|
2022-07-22 11:20:04 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl DiffContext {
|
|
|
|
|
fn new(expected: &str, provided: &str) -> Self {
|
|
|
|
|
DiffContext {
|
2023-01-16 00:49:34 +01:00
|
|
|
|
provided: prepare_provided(provided).chars().collect_vec(),
|
|
|
|
|
expected: prepare_expected(expected).chars().collect_vec(),
|
2022-07-22 11:20:04 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-16 00:49:34 +01:00
|
|
|
|
fn slice_expected(&self, opcode: &Opcode) -> String {
|
|
|
|
|
self.expected[opcode.second_start..opcode.second_end]
|
|
|
|
|
.iter()
|
|
|
|
|
.cloned()
|
|
|
|
|
.collect()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn slice_provided(&self, opcode: &Opcode) -> String {
|
|
|
|
|
self.provided[opcode.first_start..opcode.first_end]
|
|
|
|
|
.iter()
|
|
|
|
|
.cloned()
|
|
|
|
|
.collect()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn to_tokens(&self) -> DiffOutput {
|
|
|
|
|
let mut matcher = SequenceMatcher::new(&self.provided, &self.expected);
|
|
|
|
|
let opcodes = matcher.get_opcodes();
|
2022-07-22 11:20:04 +02:00
|
|
|
|
let mut provided = vec![];
|
|
|
|
|
let mut expected = vec![];
|
2023-01-16 00:49:34 +01:00
|
|
|
|
for opcode in opcodes {
|
|
|
|
|
match opcode.tag.as_str() {
|
|
|
|
|
"equal" => {
|
|
|
|
|
provided.push(DiffToken::good(self.slice_provided(&opcode)));
|
|
|
|
|
expected.push(DiffToken::good(self.slice_expected(&opcode)));
|
|
|
|
|
}
|
|
|
|
|
"delete" => {
|
|
|
|
|
provided.push(DiffToken::bad(self.slice_provided(&opcode)));
|
2022-07-22 11:20:04 +02:00
|
|
|
|
}
|
2023-01-16 00:49:34 +01:00
|
|
|
|
"insert" => {
|
2023-02-02 10:13:11 +01:00
|
|
|
|
let expected_str = self.slice_expected(&opcode);
|
2023-02-10 05:40:45 +01:00
|
|
|
|
provided.push(DiffToken::missing("-".repeat(expected_str.chars().count())));
|
2023-02-02 10:13:11 +01:00
|
|
|
|
expected.push(DiffToken::missing(expected_str));
|
2022-07-22 11:20:04 +02:00
|
|
|
|
}
|
2023-01-16 00:49:34 +01:00
|
|
|
|
"replace" => {
|
|
|
|
|
provided.push(DiffToken::bad(self.slice_provided(&opcode)));
|
|
|
|
|
expected.push(DiffToken::missing(self.slice_expected(&opcode)));
|
2022-07-22 11:20:04 +02:00
|
|
|
|
}
|
2023-01-16 00:49:34 +01:00
|
|
|
|
_ => unreachable!(),
|
2022-07-22 11:20:04 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
DiffOutput { provided, expected }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn to_html(&self) -> String {
|
|
|
|
|
let output = self.to_tokens();
|
|
|
|
|
let provided = render_tokens(&output.provided);
|
|
|
|
|
let expected = render_tokens(&output.expected);
|
|
|
|
|
format!(
|
2022-07-22 12:29:39 +02:00
|
|
|
|
"<code id=typeans>{}</code>",
|
2023-02-02 09:01:23 +01:00
|
|
|
|
if self.provided.is_empty() {
|
|
|
|
|
self.expected.iter().collect()
|
2023-02-02 09:47:30 +01:00
|
|
|
|
} else if self.provided == self.expected {
|
2022-07-22 11:20:04 +02:00
|
|
|
|
provided
|
|
|
|
|
} else {
|
|
|
|
|
format!("{provided}<br><span id=typearrow>↓</span><br>{expected}")
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn prepare_expected(expected: &str) -> String {
|
|
|
|
|
let without_av = strip_av_tags(expected);
|
|
|
|
|
let without_newlines = LINEBREAKS.replace_all(&without_av, " ");
|
|
|
|
|
let without_html = strip_html(&without_newlines);
|
2023-01-23 08:18:53 +01:00
|
|
|
|
let without_outer_whitespace = without_html.trim();
|
|
|
|
|
normalize_to_nfc(without_outer_whitespace).into()
|
2022-07-22 11:20:04 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn prepare_provided(provided: &str) -> String {
|
|
|
|
|
normalize_to_nfc(provided).into()
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-24 03:12:58 +02:00
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
2022-07-22 11:20:04 +02:00
|
|
|
|
enum DiffTokenKind {
|
|
|
|
|
Good,
|
|
|
|
|
Bad,
|
|
|
|
|
Missing,
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-24 03:12:58 +02:00
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
2023-01-16 00:49:34 +01:00
|
|
|
|
struct DiffToken {
|
2022-07-22 11:20:04 +02:00
|
|
|
|
kind: DiffTokenKind,
|
2023-01-16 00:49:34 +01:00
|
|
|
|
text: String,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl DiffToken {
|
|
|
|
|
fn bad(text: String) -> Self {
|
|
|
|
|
Self {
|
|
|
|
|
kind: DiffTokenKind::Bad,
|
|
|
|
|
text,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn good(text: String) -> Self {
|
|
|
|
|
Self {
|
|
|
|
|
kind: DiffTokenKind::Good,
|
|
|
|
|
text,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn missing(text: String) -> Self {
|
|
|
|
|
Self {
|
|
|
|
|
kind: DiffTokenKind::Missing,
|
|
|
|
|
text,
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-07-22 11:20:04 +02:00
|
|
|
|
}
|
|
|
|
|
|
2022-09-24 03:12:58 +02:00
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
2023-01-16 00:49:34 +01:00
|
|
|
|
struct DiffOutput {
|
|
|
|
|
provided: Vec<DiffToken>,
|
|
|
|
|
expected: Vec<DiffToken>,
|
2022-07-22 11:20:04 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn compare_answer(expected: &str, provided: &str) -> String {
|
|
|
|
|
DiffContext::new(expected, provided).to_html()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn render_tokens(tokens: &[DiffToken]) -> String {
|
|
|
|
|
let text_tokens: Vec<_> = tokens
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|token| {
|
|
|
|
|
let text = with_isolated_leading_mark(&token.text);
|
|
|
|
|
let encoded = htmlescape::encode_minimal(&text);
|
|
|
|
|
let class = match token.kind {
|
|
|
|
|
DiffTokenKind::Good => "typeGood",
|
|
|
|
|
DiffTokenKind::Bad => "typeBad",
|
|
|
|
|
DiffTokenKind::Missing => "typeMissed",
|
|
|
|
|
};
|
|
|
|
|
format!("<span class={class}>{encoded}</span>")
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
|
|
|
|
text_tokens.join("")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// If text begins with a mark character, prefix it with a non-breaking
|
|
|
|
|
/// space to prevent the mark from joining to the previous token.
|
|
|
|
|
fn with_isolated_leading_mark(text: &str) -> Cow<str> {
|
|
|
|
|
if let Some(ch) = text.chars().next() {
|
|
|
|
|
if GeneralCategory::of(ch).is_mark() {
|
|
|
|
|
return format!("\u{a0}{text}").into();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
text.into()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod test {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
2023-01-16 00:49:34 +01:00
|
|
|
|
macro_rules! token_factory {
|
|
|
|
|
($name:ident) => {
|
|
|
|
|
fn $name(text: &str) -> DiffToken {
|
|
|
|
|
DiffToken::$name(String::from(text))
|
2022-07-22 11:20:04 +02:00
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
}
|
2023-01-16 00:49:34 +01:00
|
|
|
|
token_factory!(bad);
|
|
|
|
|
token_factory!(good);
|
|
|
|
|
token_factory!(missing);
|
2022-07-22 11:20:04 +02:00
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn tokens() {
|
|
|
|
|
let ctx = DiffContext::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer");
|
|
|
|
|
let output = ctx.to_tokens();
|
|
|
|
|
assert_eq!(
|
|
|
|
|
output.provided,
|
|
|
|
|
vec![
|
2023-01-16 00:49:34 +01:00
|
|
|
|
bad("y"),
|
|
|
|
|
good(" ahora q"),
|
|
|
|
|
bad("e"),
|
|
|
|
|
good(" vamos"),
|
2023-02-02 10:13:11 +01:00
|
|
|
|
missing("-"),
|
2023-01-16 00:49:34 +01:00
|
|
|
|
good("a hacer"),
|
2023-02-02 10:13:11 +01:00
|
|
|
|
missing("-"),
|
2022-07-22 11:20:04 +02:00
|
|
|
|
]
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
output.expected,
|
|
|
|
|
vec![
|
2023-01-16 00:49:34 +01:00
|
|
|
|
missing("¿Y"),
|
|
|
|
|
good(" ahora q"),
|
|
|
|
|
missing("ué"),
|
|
|
|
|
good(" vamos"),
|
|
|
|
|
missing(" "),
|
|
|
|
|
good("a hacer"),
|
|
|
|
|
missing("?"),
|
2022-07-22 11:20:04 +02:00
|
|
|
|
]
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn html_and_media() {
|
|
|
|
|
let ctx = DiffContext::new("[sound:foo.mp3]<b>1</b> 2", "1 2");
|
|
|
|
|
// the spacing is handled by wrapping html output in white-space: pre-wrap
|
2023-01-16 00:49:34 +01:00
|
|
|
|
assert_eq!(ctx.to_tokens().expected, &[good("1 2")]);
|
2022-07-22 11:20:04 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn missed_chars_only_shown_in_provided_when_after_good() {
|
|
|
|
|
let ctx = DiffContext::new("1", "23");
|
2023-01-16 00:49:34 +01:00
|
|
|
|
assert_eq!(ctx.to_tokens().provided, &[bad("23")]);
|
2022-07-22 11:20:04 +02:00
|
|
|
|
let ctx = DiffContext::new("12", "1");
|
2023-02-02 10:13:11 +01:00
|
|
|
|
assert_eq!(ctx.to_tokens().provided, &[good("1"), missing("-"),]);
|
2023-01-16 00:49:34 +01:00
|
|
|
|
}
|
|
|
|
|
|
2023-02-10 05:40:45 +01:00
|
|
|
|
#[test]
|
|
|
|
|
fn missed_chars_counted_correctly() {
|
|
|
|
|
let ctx = DiffContext::new("нос", "нс");
|
|
|
|
|
assert_eq!(
|
|
|
|
|
ctx.to_tokens().provided,
|
|
|
|
|
&[good("н"), missing("-"), good("с")]
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-16 00:49:34 +01:00
|
|
|
|
#[test]
|
|
|
|
|
fn handles_certain_unicode_as_expected() {
|
|
|
|
|
// this was not parsed as expected with dissimilar 1.0.4
|
|
|
|
|
let ctx = DiffContext::new("쓰다듬다", "스다뜸다");
|
2022-07-22 11:20:04 +02:00
|
|
|
|
assert_eq!(
|
|
|
|
|
ctx.to_tokens().provided,
|
2023-01-16 00:49:34 +01:00
|
|
|
|
&[bad("스"), good("다"), bad("뜸"), good("다"),]
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn does_not_panic_with_certain_unicode() {
|
|
|
|
|
// this was causing a panic with dissimilar 1.0.4
|
|
|
|
|
let ctx = DiffContext::new(
|
|
|
|
|
"Сущность должна быть ответственна только за одно дело",
|
|
|
|
|
concat!(
|
|
|
|
|
"Single responsibility Сущность выполняет только одну задачу.",
|
|
|
|
|
"Повод для изменения сущности только один."
|
|
|
|
|
),
|
2022-07-22 11:20:04 +02:00
|
|
|
|
);
|
2023-01-16 00:49:34 +01:00
|
|
|
|
ctx.to_tokens();
|
2022-07-22 11:20:04 +02:00
|
|
|
|
}
|
2023-01-23 08:18:53 +01:00
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn whitespace_is_trimmed() {
|
|
|
|
|
assert_eq!(prepare_expected("<div>foo</div>"), "foo");
|
|
|
|
|
}
|
2023-02-02 09:47:30 +01:00
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn empty_input_shows_as_code() {
|
|
|
|
|
let ctx = DiffContext::new("123", "");
|
|
|
|
|
assert_eq!(ctx.to_html(), "<code id=typeans>123</code>");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn correct_input_is_collapsed() {
|
|
|
|
|
let ctx = DiffContext::new("123", "123");
|
|
|
|
|
assert_eq!(
|
|
|
|
|
ctx.to_html(),
|
|
|
|
|
"<code id=typeans><span class=typeGood>123</span></code>"
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn incorrect_input_is_not_collapsed() {
|
|
|
|
|
let ctx = DiffContext::new("123", "1123");
|
|
|
|
|
assert_eq!(
|
|
|
|
|
ctx.to_html(),
|
|
|
|
|
"<code id=typeans><span class=typeBad>1</span><span class=typeGood>123</span><br><span id=typearrow>↓</span><br><span class=typeGood>123</span></code>"
|
|
|
|
|
);
|
|
|
|
|
}
|
2022-07-22 11:20:04 +02:00
|
|
|
|
}
|