include LaTeX png/svg files when checking for unused media
This commit is contained in:
parent
4cca3ecef5
commit
c890ef871e
@ -1,8 +1,9 @@
|
|||||||
// Copyright: Ankitects Pty Ltd and contributors
|
// Copyright: Ankitects Pty Ltd and contributors
|
||||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
|
||||||
|
use crate::latex::contains_latex;
|
||||||
use crate::template::RenderContext;
|
use crate::template::RenderContext;
|
||||||
use crate::text::{contains_latex, strip_html};
|
use crate::text::strip_html;
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use regex::Captures;
|
use regex::Captures;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
|
122
rslib/src/latex.rs
Normal file
122
rslib/src/latex.rs
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
// Copyright: Ankitects Pty Ltd and contributors
|
||||||
|
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
|
||||||
|
use crate::media::files::sha1_of_data;
|
||||||
|
use crate::text::strip_html;
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use regex::{Captures, Regex};
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref LATEX: Regex = Regex::new(
|
||||||
|
r#"(?xsi)
|
||||||
|
\[latex\](.+?)\[/latex\] # 1 - standard latex
|
||||||
|
|
|
||||||
|
\[\$\](.+?)\[/\$\] # 2 - inline math
|
||||||
|
|
|
||||||
|
\[\$\$\](.+?)\[/\$\$\] # 3 - math environment
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
static ref LATEX_NEWLINES: Regex = Regex::new(
|
||||||
|
r#"(?xi)
|
||||||
|
<br( /)?>
|
||||||
|
|
|
||||||
|
<div>
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn contains_latex(text: &str) -> bool {
|
||||||
|
LATEX.is_match(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct ExtractedLatex {
|
||||||
|
pub fname: String,
|
||||||
|
pub latex: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn extract_latex(text: &str, svg: bool) -> (String, Vec<ExtractedLatex>) {
|
||||||
|
let mut extracted = vec![];
|
||||||
|
|
||||||
|
let new_text = LATEX.replace_all(text, |caps: &Captures| {
|
||||||
|
let latex = match (caps.get(1), caps.get(2), caps.get(3)) {
|
||||||
|
(Some(m), _, _) => m.as_str().into(),
|
||||||
|
(_, Some(m), _) => format!("${}$", m.as_str()),
|
||||||
|
(_, _, Some(m)) => format!(r"\begin{{displaymath}}{}\end{{displaymath}}", m.as_str()),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
let latex_text = strip_html_for_latex(&latex);
|
||||||
|
let fname = fname_for_latex(&latex_text, svg);
|
||||||
|
let img_link = image_link_for_fname(&fname);
|
||||||
|
extracted.push(ExtractedLatex {
|
||||||
|
fname,
|
||||||
|
latex: latex_text.into(),
|
||||||
|
});
|
||||||
|
|
||||||
|
img_link
|
||||||
|
});
|
||||||
|
|
||||||
|
(new_text.into(), extracted)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn strip_html_for_latex(html: &str) -> Cow<str> {
|
||||||
|
let mut out: Cow<str> = html.into();
|
||||||
|
if let Cow::Owned(o) = LATEX_NEWLINES.replace_all(html, "\n") {
|
||||||
|
out = o.into();
|
||||||
|
}
|
||||||
|
if let Cow::Owned(o) = strip_html(out.as_ref()) {
|
||||||
|
out = o.into();
|
||||||
|
}
|
||||||
|
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fname_for_latex(latex: &str, svg: bool) -> String {
|
||||||
|
let ext = if svg { "svg" } else { "png" };
|
||||||
|
let csum = hex::encode(sha1_of_data(latex.as_bytes()));
|
||||||
|
|
||||||
|
format!("latex-{}.{}", csum, ext)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn image_link_for_fname(fname: &str) -> String {
|
||||||
|
format!("<img class=latex src=\"{}\">", fname)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use crate::latex::{extract_latex, ExtractedLatex};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn latex() {
|
||||||
|
let fname = "latex-ef30b3f4141c33a5bf7044b0d1961d3399c05d50.png";
|
||||||
|
assert_eq!(
|
||||||
|
extract_latex("a[latex]one<br>and<div>two[/latex]b", false),
|
||||||
|
(
|
||||||
|
format!("a<img class=latex src=\"{}\">b", fname),
|
||||||
|
vec![ExtractedLatex {
|
||||||
|
fname: fname.into(),
|
||||||
|
latex: "one\nand\ntwo".into()
|
||||||
|
}]
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
extract_latex("[$]<b>hello</b> world[/$]", true).1,
|
||||||
|
vec![ExtractedLatex {
|
||||||
|
fname: "latex-060219fbf3ddb74306abddaf4504276ad793b029.svg".to_string(),
|
||||||
|
latex: "$hello world$".to_string()
|
||||||
|
}]
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
extract_latex("[$$]math & stuff[/$$]", false).1,
|
||||||
|
vec![ExtractedLatex {
|
||||||
|
fname: "latex-8899f3f849ffdef6e4e9f2f34a923a1f608ebc07.png".to_string(),
|
||||||
|
latex: r"\begin{displaymath}math & stuff\end{displaymath}".to_string()
|
||||||
|
}]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
@ -12,6 +12,7 @@ pub fn version() -> &'static str {
|
|||||||
pub mod backend;
|
pub mod backend;
|
||||||
pub mod cloze;
|
pub mod cloze;
|
||||||
pub mod err;
|
pub mod err;
|
||||||
|
pub mod latex;
|
||||||
pub mod media;
|
pub mod media;
|
||||||
pub mod sched;
|
pub mod sched;
|
||||||
pub mod template;
|
pub mod template;
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
// Copyright: Ankitects Pty Ltd and contributors
|
// Copyright: Ankitects Pty Ltd and contributors
|
||||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
|
||||||
|
use crate::cloze::expand_clozes_to_reveal_latex;
|
||||||
use crate::err::{AnkiError, Result};
|
use crate::err::{AnkiError, Result};
|
||||||
|
use crate::latex::extract_latex;
|
||||||
use crate::media::col::{
|
use crate::media::col::{
|
||||||
for_every_note, get_note_types, mark_collection_modified, open_or_create_collection_db,
|
for_every_note, get_note_types, mark_collection_modified, open_or_create_collection_db,
|
||||||
set_note, Note,
|
set_note, Note,
|
||||||
@ -223,20 +225,19 @@ where
|
|||||||
if self.checked % 10 == 0 {
|
if self.checked % 10 == 0 {
|
||||||
self.maybe_fire_progress_cb()?;
|
self.maybe_fire_progress_cb()?;
|
||||||
}
|
}
|
||||||
if fix_and_extract_media_refs(note, &mut referenced_files, renamed)? {
|
let nt = note_types
|
||||||
// note was modified, needs saving
|
|
||||||
set_note(
|
|
||||||
&trx,
|
|
||||||
note,
|
|
||||||
note_types
|
|
||||||
.get(¬e.mid)
|
.get(¬e.mid)
|
||||||
.ok_or_else(|| AnkiError::DBError {
|
.ok_or_else(|| AnkiError::DBError {
|
||||||
info: "missing note type".to_string(),
|
info: "missing note type".to_string(),
|
||||||
})?,
|
})?;
|
||||||
)?;
|
if fix_and_extract_media_refs(note, &mut referenced_files, renamed)? {
|
||||||
|
// note was modified, needs saving
|
||||||
|
set_note(&trx, note, nt)?;
|
||||||
collection_modified = true;
|
collection_modified = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// extract latex
|
||||||
|
extract_latex_refs(note, &mut referenced_files, nt.latex_uses_svg());
|
||||||
Ok(())
|
Ok(())
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
@ -320,6 +321,20 @@ fn find_unused_and_missing(
|
|||||||
(unused, references.into_iter().collect())
|
(unused, references.into_iter().collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn extract_latex_refs(note: &Note, seen_files: &mut HashSet<String>, svg: bool) {
|
||||||
|
for field in note.fields() {
|
||||||
|
let field_text: Cow<str> = if field.contains("{{c") {
|
||||||
|
expand_clozes_to_reveal_latex(field).into()
|
||||||
|
} else {
|
||||||
|
field.into()
|
||||||
|
};
|
||||||
|
let (_, extracted) = extract_latex(field_text.as_ref(), svg);
|
||||||
|
for e in extracted {
|
||||||
|
seen_files.insert(e.fname);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use crate::err::Result;
|
use crate::err::Result;
|
||||||
|
@ -64,6 +64,15 @@ pub(super) struct NoteType {
|
|||||||
id: ObjID,
|
id: ObjID,
|
||||||
#[serde(rename = "sortf")]
|
#[serde(rename = "sortf")]
|
||||||
sort_field_idx: u16,
|
sort_field_idx: u16,
|
||||||
|
|
||||||
|
#[serde(rename = "latexsvg", default)]
|
||||||
|
latex_svg: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NoteType {
|
||||||
|
pub fn latex_uses_svg(&self) -> bool {
|
||||||
|
self.latex_svg
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn get_note_types(db: &Connection) -> Result<HashMap<ObjID, NoteType>> {
|
pub(super) fn get_note_types(db: &Connection) -> Result<HashMap<ObjID, NoteType>> {
|
||||||
|
@ -70,25 +70,26 @@ lazy_static! {
|
|||||||
(.*?) # 3 - field text
|
(.*?) # 3 - field text
|
||||||
\[/anki:tts\]
|
\[/anki:tts\]
|
||||||
"#).unwrap();
|
"#).unwrap();
|
||||||
|
|
||||||
static ref LATEX: Regex = Regex::new(
|
|
||||||
r#"(?xsi)
|
|
||||||
\[latex\](.+?)\[/latex\] # 1 - standard latex
|
|
||||||
|
|
|
||||||
\[\$\](.+?)\[/\$\] # 2 - inline math
|
|
||||||
|
|
|
||||||
\[\$\$\](.+?)\[/\$\$\] # 3 - math environment
|
|
||||||
"#).unwrap();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn strip_html(html: &str) -> Cow<str> {
|
pub fn strip_html(html: &str) -> Cow<str> {
|
||||||
HTML.replace_all(html, "")
|
let mut out: Cow<str> = html.into();
|
||||||
|
|
||||||
|
if let Cow::Owned(o) = HTML.replace_all(html, "") {
|
||||||
|
out = o.into();
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Cow::Owned(o) = decode_entities(out.as_ref()) {
|
||||||
|
out = o.into();
|
||||||
|
}
|
||||||
|
|
||||||
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn decode_entities(html: &str) -> Cow<str> {
|
pub fn decode_entities(html: &str) -> Cow<str> {
|
||||||
if html.contains('&') {
|
if html.contains('&') {
|
||||||
match htmlescape::decode_html(html) {
|
match htmlescape::decode_html(html) {
|
||||||
Ok(text) => text,
|
Ok(text) => text.replace("\u{a0}", " "),
|
||||||
Err(e) => format!("{:?}", e),
|
Err(e) => format!("{:?}", e),
|
||||||
}
|
}
|
||||||
.into()
|
.into()
|
||||||
@ -211,10 +212,6 @@ pub fn strip_html_preserving_image_filenames(html: &str) -> Cow<str> {
|
|||||||
without_html.into_owned().into()
|
without_html.into_owned().into()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn contains_latex(text: &str) -> bool {
|
|
||||||
LATEX.is_match(text)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn normalize_to_nfc(s: &str) -> Cow<str> {
|
pub(crate) fn normalize_to_nfc(s: &str) -> Cow<str> {
|
||||||
if !is_nfc(s) {
|
if !is_nfc(s) {
|
||||||
s.chars().nfc().collect::<String>().into()
|
s.chars().nfc().collect::<String>().into()
|
||||||
|
Loading…
Reference in New Issue
Block a user