include LaTeX png/svg files when checking for unused media
This commit is contained in:
parent
4cca3ecef5
commit
c890ef871e
@ -1,8 +1,9 @@
|
||||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use crate::latex::contains_latex;
|
||||
use crate::template::RenderContext;
|
||||
use crate::text::{contains_latex, strip_html};
|
||||
use crate::text::strip_html;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Captures;
|
||||
use regex::Regex;
|
||||
|
122
rslib/src/latex.rs
Normal file
122
rslib/src/latex.rs
Normal file
@ -0,0 +1,122 @@
|
||||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use crate::media::files::sha1_of_data;
|
||||
use crate::text::strip_html;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::{Captures, Regex};
|
||||
use std::borrow::Cow;
|
||||
|
||||
lazy_static! {
|
||||
static ref LATEX: Regex = Regex::new(
|
||||
r#"(?xsi)
|
||||
\[latex\](.+?)\[/latex\] # 1 - standard latex
|
||||
|
|
||||
\[\$\](.+?)\[/\$\] # 2 - inline math
|
||||
|
|
||||
\[\$\$\](.+?)\[/\$\$\] # 3 - math environment
|
||||
"#
|
||||
)
|
||||
.unwrap();
|
||||
static ref LATEX_NEWLINES: Regex = Regex::new(
|
||||
r#"(?xi)
|
||||
<br( /)?>
|
||||
|
|
||||
<div>
|
||||
"#
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub(crate) fn contains_latex(text: &str) -> bool {
|
||||
LATEX.is_match(text)
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct ExtractedLatex {
|
||||
pub fname: String,
|
||||
pub latex: String,
|
||||
}
|
||||
|
||||
pub(crate) fn extract_latex(text: &str, svg: bool) -> (String, Vec<ExtractedLatex>) {
|
||||
let mut extracted = vec![];
|
||||
|
||||
let new_text = LATEX.replace_all(text, |caps: &Captures| {
|
||||
let latex = match (caps.get(1), caps.get(2), caps.get(3)) {
|
||||
(Some(m), _, _) => m.as_str().into(),
|
||||
(_, Some(m), _) => format!("${}$", m.as_str()),
|
||||
(_, _, Some(m)) => format!(r"\begin{{displaymath}}{}\end{{displaymath}}", m.as_str()),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let latex_text = strip_html_for_latex(&latex);
|
||||
let fname = fname_for_latex(&latex_text, svg);
|
||||
let img_link = image_link_for_fname(&fname);
|
||||
extracted.push(ExtractedLatex {
|
||||
fname,
|
||||
latex: latex_text.into(),
|
||||
});
|
||||
|
||||
img_link
|
||||
});
|
||||
|
||||
(new_text.into(), extracted)
|
||||
}
|
||||
|
||||
fn strip_html_for_latex(html: &str) -> Cow<str> {
|
||||
let mut out: Cow<str> = html.into();
|
||||
if let Cow::Owned(o) = LATEX_NEWLINES.replace_all(html, "\n") {
|
||||
out = o.into();
|
||||
}
|
||||
if let Cow::Owned(o) = strip_html(out.as_ref()) {
|
||||
out = o.into();
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
fn fname_for_latex(latex: &str, svg: bool) -> String {
|
||||
let ext = if svg { "svg" } else { "png" };
|
||||
let csum = hex::encode(sha1_of_data(latex.as_bytes()));
|
||||
|
||||
format!("latex-{}.{}", csum, ext)
|
||||
}
|
||||
|
||||
fn image_link_for_fname(fname: &str) -> String {
|
||||
format!("<img class=latex src=\"{}\">", fname)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::latex::{extract_latex, ExtractedLatex};
|
||||
|
||||
#[test]
|
||||
fn latex() {
|
||||
let fname = "latex-ef30b3f4141c33a5bf7044b0d1961d3399c05d50.png";
|
||||
assert_eq!(
|
||||
extract_latex("a[latex]one<br>and<div>two[/latex]b", false),
|
||||
(
|
||||
format!("a<img class=latex src=\"{}\">b", fname),
|
||||
vec![ExtractedLatex {
|
||||
fname: fname.into(),
|
||||
latex: "one\nand\ntwo".into()
|
||||
}]
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
extract_latex("[$]<b>hello</b> world[/$]", true).1,
|
||||
vec![ExtractedLatex {
|
||||
fname: "latex-060219fbf3ddb74306abddaf4504276ad793b029.svg".to_string(),
|
||||
latex: "$hello world$".to_string()
|
||||
}]
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
extract_latex("[$$]math & stuff[/$$]", false).1,
|
||||
vec![ExtractedLatex {
|
||||
fname: "latex-8899f3f849ffdef6e4e9f2f34a923a1f608ebc07.png".to_string(),
|
||||
latex: r"\begin{displaymath}math & stuff\end{displaymath}".to_string()
|
||||
}]
|
||||
);
|
||||
}
|
||||
}
|
@ -12,6 +12,7 @@ pub fn version() -> &'static str {
|
||||
pub mod backend;
|
||||
pub mod cloze;
|
||||
pub mod err;
|
||||
pub mod latex;
|
||||
pub mod media;
|
||||
pub mod sched;
|
||||
pub mod template;
|
||||
|
@ -1,7 +1,9 @@
|
||||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use crate::cloze::expand_clozes_to_reveal_latex;
|
||||
use crate::err::{AnkiError, Result};
|
||||
use crate::latex::extract_latex;
|
||||
use crate::media::col::{
|
||||
for_every_note, get_note_types, mark_collection_modified, open_or_create_collection_db,
|
||||
set_note, Note,
|
||||
@ -223,20 +225,19 @@ where
|
||||
if self.checked % 10 == 0 {
|
||||
self.maybe_fire_progress_cb()?;
|
||||
}
|
||||
if fix_and_extract_media_refs(note, &mut referenced_files, renamed)? {
|
||||
// note was modified, needs saving
|
||||
set_note(
|
||||
&trx,
|
||||
note,
|
||||
note_types
|
||||
let nt = note_types
|
||||
.get(¬e.mid)
|
||||
.ok_or_else(|| AnkiError::DBError {
|
||||
info: "missing note type".to_string(),
|
||||
})?,
|
||||
)?;
|
||||
})?;
|
||||
if fix_and_extract_media_refs(note, &mut referenced_files, renamed)? {
|
||||
// note was modified, needs saving
|
||||
set_note(&trx, note, nt)?;
|
||||
collection_modified = true;
|
||||
}
|
||||
|
||||
// extract latex
|
||||
extract_latex_refs(note, &mut referenced_files, nt.latex_uses_svg());
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
@ -320,6 +321,20 @@ fn find_unused_and_missing(
|
||||
(unused, references.into_iter().collect())
|
||||
}
|
||||
|
||||
fn extract_latex_refs(note: &Note, seen_files: &mut HashSet<String>, svg: bool) {
|
||||
for field in note.fields() {
|
||||
let field_text: Cow<str> = if field.contains("{{c") {
|
||||
expand_clozes_to_reveal_latex(field).into()
|
||||
} else {
|
||||
field.into()
|
||||
};
|
||||
let (_, extracted) = extract_latex(field_text.as_ref(), svg);
|
||||
for e in extracted {
|
||||
seen_files.insert(e.fname);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::err::Result;
|
||||
|
@ -64,6 +64,15 @@ pub(super) struct NoteType {
|
||||
id: ObjID,
|
||||
#[serde(rename = "sortf")]
|
||||
sort_field_idx: u16,
|
||||
|
||||
#[serde(rename = "latexsvg", default)]
|
||||
latex_svg: bool,
|
||||
}
|
||||
|
||||
impl NoteType {
|
||||
pub fn latex_uses_svg(&self) -> bool {
|
||||
self.latex_svg
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn get_note_types(db: &Connection) -> Result<HashMap<ObjID, NoteType>> {
|
||||
|
@ -70,25 +70,26 @@ lazy_static! {
|
||||
(.*?) # 3 - field text
|
||||
\[/anki:tts\]
|
||||
"#).unwrap();
|
||||
|
||||
static ref LATEX: Regex = Regex::new(
|
||||
r#"(?xsi)
|
||||
\[latex\](.+?)\[/latex\] # 1 - standard latex
|
||||
|
|
||||
\[\$\](.+?)\[/\$\] # 2 - inline math
|
||||
|
|
||||
\[\$\$\](.+?)\[/\$\$\] # 3 - math environment
|
||||
"#).unwrap();
|
||||
}
|
||||
|
||||
pub fn strip_html(html: &str) -> Cow<str> {
|
||||
HTML.replace_all(html, "")
|
||||
let mut out: Cow<str> = html.into();
|
||||
|
||||
if let Cow::Owned(o) = HTML.replace_all(html, "") {
|
||||
out = o.into();
|
||||
}
|
||||
|
||||
if let Cow::Owned(o) = decode_entities(out.as_ref()) {
|
||||
out = o.into();
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
pub fn decode_entities(html: &str) -> Cow<str> {
|
||||
if html.contains('&') {
|
||||
match htmlescape::decode_html(html) {
|
||||
Ok(text) => text,
|
||||
Ok(text) => text.replace("\u{a0}", " "),
|
||||
Err(e) => format!("{:?}", e),
|
||||
}
|
||||
.into()
|
||||
@ -211,10 +212,6 @@ pub fn strip_html_preserving_image_filenames(html: &str) -> Cow<str> {
|
||||
without_html.into_owned().into()
|
||||
}
|
||||
|
||||
pub(crate) fn contains_latex(text: &str) -> bool {
|
||||
LATEX.is_match(text)
|
||||
}
|
||||
|
||||
pub(crate) fn normalize_to_nfc(s: &str) -> Cow<str> {
|
||||
if !is_nfc(s) {
|
||||
s.chars().nfc().collect::<String>().into()
|
||||
|
Loading…
Reference in New Issue
Block a user