Partially migrate type answer to backend

Partially completes #1068, and will allow mobile clients to drop
their separate diff-match-patch imports. Does not yet try to handle
case folding or combining-char stripping, and leaves some of the outer
HTML wrapping up to the frontend for now.

The logic for rendering the provided string has changed: missing chars
are now only inserted if they follow a correct section, and the original
text is shown instead of hyphens. This is an experiment, and can be
changed if it's not well received.
This commit is contained in:
Damien Elmes 2022-07-22 19:20:04 +10:00
parent 173a5bfed5
commit 1e0be26b7e
19 changed files with 393 additions and 94 deletions

7
Cargo.lock generated
View File

@ -55,6 +55,7 @@ dependencies = [
"coarsetime", "coarsetime",
"criterion", "criterion",
"csv 1.1.6 (git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90)", "csv 1.1.6 (git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90)",
"dissimilar",
"env_logger", "env_logger",
"flate2", "flate2",
"fluent", "fluent",
@ -586,6 +587,12 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "dissimilar"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c97b9233581d84b8e1e689cdd3a47b6f69770084fc246e86a7f78b0d9c1d4a5"
[[package]] [[package]]
name = "dtoa" name = "dtoa"
version = "0.4.8" version = "0.4.8"

View File

@ -75,6 +75,15 @@ alias(
], ],
) )
alias(
name = "dissimilar",
actual = "@raze__dissimilar__1_0_4//:dissimilar",
tags = [
"cargo-raze",
"manual",
],
)
alias( alias(
name = "env_logger", name = "env_logger",
actual = "@raze__env_logger__0_9_0//:env_logger", actual = "@raze__env_logger__0_9_0//:env_logger",

View File

@ -431,6 +431,16 @@ def raze_fetch_remote_crates():
build_file = Label("//cargo/remote:BUILD.dirs-sys-next-0.1.2.bazel"), build_file = Label("//cargo/remote:BUILD.dirs-sys-next-0.1.2.bazel"),
) )
maybe(
http_archive,
name = "raze__dissimilar__1_0_4",
url = "https://crates.io/api/v1/crates/dissimilar/1.0.4/download",
type = "tar.gz",
sha256 = "8c97b9233581d84b8e1e689cdd3a47b6f69770084fc246e86a7f78b0d9c1d4a5",
strip_prefix = "dissimilar-1.0.4",
build_file = Label("//cargo/remote:BUILD.dissimilar-1.0.4.bazel"),
)
maybe( maybe(
http_archive, http_archive,
name = "raze__dtoa__0_4_8", name = "raze__dtoa__0_4_8",

View File

@ -341,6 +341,15 @@
"license_file": null, "license_file": null,
"description": "system-level helper functions for the dirs and directories crates" "description": "system-level helper functions for the dirs and directories crates"
}, },
{
"name": "dissimilar",
"version": "1.0.4",
"authors": "David Tolnay <dtolnay@gmail.com>",
"repository": "https://github.com/dtolnay/dissimilar",
"license": "Apache-2.0",
"license_file": null,
"description": "Diff library with semantic cleanup, based on Google's diff-match-patch"
},
{ {
"name": "either", "name": "either",
"version": "1.6.1", "version": "1.6.1",

View File

@ -0,0 +1,58 @@
"""
@generated
cargo-raze crate build file.
DO NOT EDIT! Replaced on runs of cargo-raze
"""
# buildifier: disable=load
load("@bazel_skylib//lib:selects.bzl", "selects")
# buildifier: disable=load
load(
"@rules_rust//rust:defs.bzl",
"rust_binary",
"rust_library",
"rust_proc_macro",
"rust_test",
)
package(default_visibility = [
# Public for visibility by "@raze__crate__version//" targets.
#
# Prefer access through "//cargo", which limits external
# visibility to explicit Cargo.toml dependencies.
"//visibility:public",
])
licenses([
"notice", # Apache-2.0 from expression "Apache-2.0"
])
# Generated Targets
# Unsupported target "bench" with type "bench" omitted
rust_library(
name = "dissimilar",
srcs = glob(["**/*.rs"]),
crate_features = [
],
crate_root = "src/lib.rs",
data = [],
edition = "2018",
rustc_flags = [
"--cap-lints=allow",
],
tags = [
"cargo-raze",
"crate-name=dissimilar",
"manual",
],
version = "1.0.4",
# buildifier: leave-alone
deps = [
],
)
# Unsupported target "test" with type "test" omitted

View File

@ -26,6 +26,7 @@ service CardRenderingService {
rpc EncodeIriPaths(generic.String) returns (generic.String); rpc EncodeIriPaths(generic.String) returns (generic.String);
rpc DecodeIriPaths(generic.String) returns (generic.String); rpc DecodeIriPaths(generic.String) returns (generic.String);
rpc StripHtml(StripHtmlRequest) returns (generic.String); rpc StripHtml(StripHtmlRequest) returns (generic.String);
rpc CompareAnswer(CompareAnswerRequest) returns (generic.String);
} }
message ExtractAVTagsRequest { message ExtractAVTagsRequest {
@ -132,3 +133,8 @@ message StripHtmlRequest {
string text = 1; string text = 1;
Mode mode = 2; Mode mode = 2;
} }
message CompareAnswerRequest {
string expected = 1;
string provided = 2;
}

View File

@ -1213,6 +1213,9 @@ class Collection(DeprecatedNamesMixin):
"Not intended for public consumption at this time." "Not intended for public consumption at this time."
return self._backend.render_markdown(markdown=text, sanitize=sanitize) return self._backend.render_markdown(markdown=text, sanitize=sanitize)
def compare_answer(self, expected: str, provided: str) -> str:
return self._backend.compare_answer(expected=expected, provided=provided)
# Timeboxing # Timeboxing
########################################################################## ##########################################################################
# fixme: there doesn't seem to be a good reason why this code is in main.py # fixme: there doesn't seem to be a good reason why this code is in main.py

View File

@ -75,6 +75,15 @@ alias(
], ],
) )
alias(
name = "dissimilar",
actual = "@raze__dissimilar__1_0_4//:dissimilar",
tags = [
"cargo-raze",
"manual",
],
)
alias( alias(
name = "env_logger", name = "env_logger",
actual = "@raze__env_logger__0_9_0//:env_logger", actual = "@raze__env_logger__0_9_0//:env_logger",

View File

@ -571,7 +571,7 @@ class CardLayout(QDialog):
hadHR = origLen != len(txt) hadHR = origLen != len(txt)
def answerRepl(match: Match) -> str: def answerRepl(match: Match) -> str:
res = self.mw.reviewer.correct("example", "sample") res = self.mw.col.compare_answer("example", "sample")
if hadHR: if hadHR:
res = f"<hr id=answer>{res}" res = f"<hr id=answer>{res}"
return res return res

View File

@ -3,12 +3,9 @@
from __future__ import annotations from __future__ import annotations
import difflib
import html
import json import json
import random import random
import re import re
import unicodedata as ucd
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum, auto from enum import Enum, auto
from typing import Any, Callable, Literal, Match, Sequence, cast from typing import Any, Callable, Literal, Match, Sequence, cast
@ -24,7 +21,6 @@ from anki.scheduler.v3 import CardAnswer, NextStates, QueuedCards
from anki.scheduler.v3 import Scheduler as V3Scheduler from anki.scheduler.v3 import Scheduler as V3Scheduler
from anki.tags import MARKED_TAG from anki.tags import MARKED_TAG
from anki.types import assert_exhaustive from anki.types import assert_exhaustive
from anki.utils import strip_html
from aqt import AnkiQt, gui_hooks from aqt import AnkiQt, gui_hooks
from aqt.browser.card_info import PreviousReviewerCardInfo, ReviewerCardInfo from aqt.browser.card_info import PreviousReviewerCardInfo, ReviewerCardInfo
from aqt.deckoptions import confirm_deck_then_display_options from aqt.deckoptions import confirm_deck_then_display_options
@ -597,17 +593,10 @@ class Reviewer:
buf = buf.replace("<hr id=answer>", "") buf = buf.replace("<hr id=answer>", "")
hadHR = len(buf) != origSize hadHR = len(buf) != origSize
# munge correct value # munge correct value
cor = self.mw.col.media.strip(self.typeCorrect) expected = self.typeCorrect
cor = re.sub("(\n|<br ?/?>|</?div>)+", " ", cor) provided = self.typedAnswer
cor = strip_html(cor)
# ensure we don't chomp multiple whitespace
cor = cor.replace(" ", "&nbsp;")
cor = html.unescape(cor)
cor = cor.replace("\xa0", " ")
cor = cor.strip()
given = self.typedAnswer
# compare with typed answer # compare with typed answer
res = self.correct(given, cor, showBad=False) output = self.mw.col.compare_answer(expected, provided)
# and update the type answer area # and update the type answer area
def repl(match: Match) -> str: def repl(match: Match) -> str:
# can't pass a string in directly, and can't use re.escape as it # can't pass a string in directly, and can't use re.escape as it
@ -616,7 +605,7 @@ class Reviewer:
<span style="font-family: '{}'; font-size: {}px">{}</span>""".format( <span style="font-family: '{}'; font-size: {}px">{}</span>""".format(
self.typeFont, self.typeFont,
self.typeSize, self.typeSize,
res, output,
) )
if hadHR: if hadHR:
# a hack to ensure the q/a separator falls before the answer # a hack to ensure the q/a separator falls before the answer
@ -644,84 +633,6 @@ class Reviewer:
txt = ", ".join(matches) txt = ", ".join(matches)
return txt return txt
def tokenizeComparison(
self, given: str, correct: str
) -> tuple[list[tuple[bool, str]], list[tuple[bool, str]]]:
# compare in NFC form so accents appear correct
given = ucd.normalize("NFC", given)
correct = ucd.normalize("NFC", correct)
s = difflib.SequenceMatcher(None, given, correct, autojunk=False)
givenElems: list[tuple[bool, str]] = []
correctElems: list[tuple[bool, str]] = []
givenPoint = 0
correctPoint = 0
offby = 0
def logBad(old: int, new: int, s: str, array: list[tuple[bool, str]]) -> None:
if old != new:
array.append((False, s[old:new]))
def logGood(
start: int, cnt: int, s: str, array: list[tuple[bool, str]]
) -> None:
if cnt:
array.append((True, s[start : start + cnt]))
for x, y, cnt in s.get_matching_blocks():
# if anything was missed in correct, pad given
if cnt and y - offby > x:
givenElems.append((False, "-" * (y - x - offby)))
offby = y - x
# log any proceeding bad elems
logBad(givenPoint, x, given, givenElems)
logBad(correctPoint, y, correct, correctElems)
givenPoint = x + cnt
correctPoint = y + cnt
# log the match
logGood(x, cnt, given, givenElems)
logGood(y, cnt, correct, correctElems)
return givenElems, correctElems
def correct(self, given: str, correct: str, showBad: bool = True) -> str:
"Diff-corrects the typed-in answer."
givenElems, correctElems = self.tokenizeComparison(given, correct)
def good(s: str) -> str:
return f"<span class=typeGood>{html.escape(s)}</span>"
def bad(s: str) -> str:
return f"<span class=typeBad>{html.escape(s)}</span>"
def missed(s: str) -> str:
return f"<span class=typeMissed>{html.escape(s)}</span>"
if given == correct:
res = good(given)
else:
res = ""
for ok, txt in givenElems:
txt = self._noLoneMarks(txt)
if ok:
res += good(txt)
else:
res += bad(txt)
res += "<br><span id=typearrow>&darr;</span><br>"
for ok, txt in correctElems:
txt = self._noLoneMarks(txt)
if ok:
res += good(txt)
else:
res += missed(txt)
res = f"<div><code id=typeans>{res}</code></div>"
return res
def _noLoneMarks(self, s: str) -> str:
# ensure a combining character at the start does not join to
# previous text
if s and ucd.category(s[0]).startswith("M"):
return f"\xa0{s}"
return s
def _getTypedAnswer(self) -> None: def _getTypedAnswer(self) -> None:
self.web.evalWithCallback("getTypedAnswer();", self._onTypedAnswer) self.web.evalWithCallback("getTypedAnswer();", self._onTypedAnswer)

View File

@ -77,6 +77,7 @@ rust_library(
"//rslib/cargo:chrono", "//rslib/cargo:chrono",
"//rslib/cargo:coarsetime", "//rslib/cargo:coarsetime",
"//rslib/cargo:csv", "//rslib/cargo:csv",
"//rslib/cargo:dissimilar",
"//rslib/cargo:flate2", "//rslib/cargo:flate2",
"//rslib/cargo:fluent", "//rslib/cargo:fluent",
"//rslib/cargo:fnv", "//rslib/cargo:fnv",

View File

@ -101,3 +101,4 @@ id_tree = "1.8.0"
zstd = { version="0.10.0", features=["zstdmt"] } zstd = { version="0.10.0", features=["zstdmt"] }
num_cpus = "1.13.1" num_cpus = "1.13.1"
csv = { git="https://github.com/ankitects/rust-csv.git", rev="1c9d3aab6f79a7d815c69f925a46a4590c115f90" } csv = { git="https://github.com/ankitects/rust-csv.git", rev="1c9d3aab6f79a7d815c69f925a46a4590c115f90" }
dissimilar = "1.0.4"

View File

@ -75,6 +75,15 @@ alias(
], ],
) )
alias(
name = "dissimilar",
actual = "@raze__dissimilar__1_0_4//:dissimilar",
tags = [
"cargo-raze",
"manual",
],
)
alias( alias(
name = "env_logger", name = "env_logger",
actual = "@raze__env_logger__0_9_0//:env_logger", actual = "@raze__env_logger__0_9_0//:env_logger",

View File

@ -75,6 +75,15 @@ alias(
], ],
) )
alias(
name = "dissimilar",
actual = "@raze__dissimilar__1_0_4//:dissimilar",
tags = [
"cargo-raze",
"manual",
],
)
alias( alias(
name = "env_logger", name = "env_logger",
actual = "@raze__env_logger__0_9_0//:env_logger", actual = "@raze__env_logger__0_9_0//:env_logger",

View File

@ -75,6 +75,15 @@ alias(
], ],
) )
alias(
name = "dissimilar",
actual = "@raze__dissimilar__1_0_4//:dissimilar",
tags = [
"cargo-raze",
"manual",
],
)
alias( alias(
name = "env_logger", name = "env_logger",
actual = "@raze__env_logger__0_9_0//:env_logger", actual = "@raze__env_logger__0_9_0//:env_logger",

View File

@ -75,6 +75,15 @@ alias(
], ],
) )
alias(
name = "dissimilar",
actual = "@raze__dissimilar__1_0_4//:dissimilar",
tags = [
"cargo-raze",
"manual",
],
)
alias( alias(
name = "env_logger", name = "env_logger",
actual = "@raze__env_logger__0_9_0//:env_logger", actual = "@raze__env_logger__0_9_0//:env_logger",

View File

@ -15,6 +15,7 @@ use crate::{
decode_iri_paths, encode_iri_paths, sanitize_html_no_images, strip_html, decode_iri_paths, encode_iri_paths, sanitize_html_no_images, strip_html,
strip_html_preserving_media_filenames, strip_html_preserving_media_filenames,
}, },
typeanswer::compare_answer,
}; };
impl CardRenderingService for Backend { impl CardRenderingService for Backend {
@ -147,6 +148,10 @@ impl CardRenderingService for Backend {
.to_string() .to_string()
.into()) .into())
} }
fn compare_answer(&self, input: pb::CompareAnswerRequest) -> Result<pb::String> {
Ok(compare_answer(&input.expected, &input.provided).into())
}
} }
fn rendered_nodes_to_proto(nodes: Vec<RenderedNode>) -> Vec<pb::RenderedTemplateNode> { fn rendered_nodes_to_proto(nodes: Vec<RenderedNode>) -> Vec<pb::RenderedTemplateNode> {

View File

@ -43,6 +43,7 @@ pub mod template_filters;
pub(crate) mod tests; pub(crate) mod tests;
pub mod text; pub mod text;
pub mod timestamp; pub mod timestamp;
mod typeanswer;
pub mod types; pub mod types;
pub mod undo; pub mod undo;
pub mod version; pub mod version;

233
rslib/src/typeanswer.rs Normal file
View File

@ -0,0 +1,233 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
// FIXME: space to nbsp in output, or pre-wrap
use std::borrow::Cow;
use dissimilar::Chunk;
use lazy_static::lazy_static;
use regex::Regex;
use unic_ucd_category::GeneralCategory;
use crate::{
card_rendering::strip_av_tags,
text::{normalize_to_nfc, strip_html},
};
lazy_static! {
static ref LINEBREAKS: Regex = Regex::new(
r#"(?six)
(
\n
|
<br\s?/?>
|
</?div>
)+
"#
)
.unwrap();
}
struct DiffContext {
expected: String,
provided: String,
}
impl DiffContext {
fn new(expected: &str, provided: &str) -> Self {
DiffContext {
expected: prepare_expected(expected),
provided: prepare_provided(provided),
}
}
fn to_tokens(&self) -> DiffOutput<'_> {
let chunks = dissimilar::diff(&self.provided, &self.expected);
let mut provided = vec![];
let mut expected = vec![];
for chunk in chunks {
match chunk {
Chunk::Equal(text) => {
provided.push(DiffToken {
kind: DiffTokenKind::Good,
text: text.into(),
});
expected.push(DiffToken {
kind: DiffTokenKind::Good,
text: text.into(),
});
}
Chunk::Delete(text) => {
provided.push(DiffToken {
kind: DiffTokenKind::Bad,
text: text.into(),
});
}
Chunk::Insert(text) => {
// If the proceeding text was correct, indicate text was missing
if provided
.last()
.map(|v| v.kind == DiffTokenKind::Good)
.unwrap_or_default()
{
provided.push(DiffToken {
kind: DiffTokenKind::Missing,
text: text.into(),
});
}
expected.push(DiffToken {
kind: DiffTokenKind::Missing,
text: text.into(),
});
}
}
}
DiffOutput { provided, expected }
}
fn to_html(&self) -> String {
let output = self.to_tokens();
let provided = render_tokens(&output.provided);
let expected = render_tokens(&output.expected);
format!(
"<div style='white-space: pre-wrap;'>{}</div>",
if no_mistakes(&output.expected) {
provided
} else {
format!("{provided}<br><span id=typearrow>&darr;</span><br>{expected}")
}
)
}
}
fn no_mistakes(tokens: &[DiffToken]) -> bool {
tokens.iter().all(|v| v.kind == DiffTokenKind::Good)
}
fn prepare_expected(expected: &str) -> String {
let without_av = strip_av_tags(expected);
let without_newlines = LINEBREAKS.replace_all(&without_av, " ");
let without_html = strip_html(&without_newlines);
normalize_to_nfc(&without_html).into()
}
fn prepare_provided(provided: &str) -> String {
normalize_to_nfc(provided).into()
}
#[derive(Debug, PartialEq)]
enum DiffTokenKind {
Good,
Bad,
Missing,
}
#[derive(Debug, PartialEq)]
struct DiffToken<'a> {
kind: DiffTokenKind,
text: Cow<'a, str>,
}
#[derive(Debug, PartialEq)]
struct DiffOutput<'a> {
provided: Vec<DiffToken<'a>>,
expected: Vec<DiffToken<'a>>,
}
pub fn compare_answer(expected: &str, provided: &str) -> String {
DiffContext::new(expected, provided).to_html()
}
fn render_tokens(tokens: &[DiffToken]) -> String {
let text_tokens: Vec<_> = tokens
.iter()
.map(|token| {
let text = with_isolated_leading_mark(&token.text);
let encoded = htmlescape::encode_minimal(&text);
let class = match token.kind {
DiffTokenKind::Good => "typeGood",
DiffTokenKind::Bad => "typeBad",
DiffTokenKind::Missing => "typeMissed",
};
format!("<span class={class}>{encoded}</span>")
})
.collect();
text_tokens.join("")
}
/// If text begins with a mark character, prefix it with a non-breaking
/// space to prevent the mark from joining to the previous token.
fn with_isolated_leading_mark(text: &str) -> Cow<str> {
if let Some(ch) = text.chars().next() {
if GeneralCategory::of(ch).is_mark() {
return format!("\u{a0}{text}").into();
}
}
text.into()
}
#[cfg(test)]
mod test {
use DiffTokenKind::*;
use super::*;
macro_rules! token {
($kind:ident, $text:expr) => {
DiffToken {
kind: $kind,
text: $text.into(),
}
};
}
#[test]
fn tokens() {
let ctx = DiffContext::new("¿Y ahora qué vamos a hacer?", "y ahora qe vamosa hacer");
let output = ctx.to_tokens();
assert_eq!(
output.provided,
vec![
token!(Bad, "y"),
token!(Good, " ahora q"),
token!(Bad, "e"),
token!(Good, " vamos"),
token!(Missing, " "),
token!(Good, "a hacer"),
token!(Missing, "?"),
]
);
assert_eq!(
output.expected,
vec![
token!(Missing, "¿Y"),
token!(Good, " ahora q"),
token!(Missing, ""),
token!(Good, " vamos"),
token!(Missing, " "),
token!(Good, "a hacer"),
token!(Missing, "?"),
]
);
}
#[test]
fn html_and_media() {
let ctx = DiffContext::new("[sound:foo.mp3]<b>1</b> &nbsp;2", "1 2");
// the spacing is handled by wrapping html output in white-space: pre-wrap
assert_eq!(ctx.to_tokens().expected, &[token!(Good, "1 2")]);
}
#[test]
fn missed_chars_only_shown_in_provided_when_after_good() {
let ctx = DiffContext::new("1", "23");
assert_eq!(ctx.to_tokens().provided, &[token!(Bad, "23")]);
let ctx = DiffContext::new("12", "1");
assert_eq!(
ctx.to_tokens().provided,
&[token!(Good, "1"), token!(Missing, "2"),]
);
}
}