Add audio & object tags to media check

Makes the media check recognize files in <audio> and <object> tags as used.

They've been observed/supported by the WebView (checked: Anki, AnkiDroid) since just about forever already and are extremely useful if one knows a thing about web dev.
This commit is contained in:
Andreas Reis 2020-10-14 01:56:57 +02:00
parent 1e37e6cabd
commit 6e9aaad11e
2 changed files with 14 additions and 10 deletions

View File

@ -34,13 +34,17 @@ def media_paths_from_col_path(col_path: str) -> Tuple[str, str]:
class MediaManager:
soundRegexps = [r"(?i)(\[sound:(?P<fname>[^]]+)\])"]
imgRegexps = [
htmlRegexps = [
# src element quoted case
r"(?i)(<img[^>]* src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
r"(?i)(<[img|audio][^>]* src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
# unquoted case
r"(?i)(<img[^>]* src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
r"(?i)(<[img|audio][^>]* src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
# src element quoted case
r"(?i)(<object[^>]* data=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
# unquoted case
r"(?i)(<object[^>]* data=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
]
regexps = soundRegexps + imgRegexps
regexps = soundRegexps + htmlRegexps
def __init__(self, col: anki.collection.Collection, server: bool) -> None:
self.col = col.weakref()
@ -173,7 +177,7 @@ class MediaManager:
return tag
return tag.replace(fname, fn(fname))
for reg in self.imgRegexps:
for reg in self.htmlRegexps:
string = re.sub(reg, repl, string)
return string

View File

@ -32,10 +32,10 @@ lazy_static! {
))
.unwrap();
static ref IMG_TAG: Regex = Regex::new(
static ref HTML_TAGS: Regex = Regex::new(
r#"(?xsi)
# the start of the image tag
<img[^>]+src=
# the start of the image, audio, or object tag
<\b(?:img|audio|object)\b[^>]+\b(?:src|data)\b=
(?:
# 1: double-quoted filename
"
@ -149,7 +149,7 @@ pub(crate) struct MediaRef<'a> {
pub(crate) fn extract_media_refs(text: &str) -> Vec<MediaRef> {
let mut out = vec![];
for caps in IMG_TAG.captures_iter(text) {
for caps in HTML_TAGS.captures_iter(text) {
let fname = caps
.get(1)
.or_else(|| caps.get(2))
@ -214,7 +214,7 @@ fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
}
pub fn strip_html_preserving_image_filenames(html: &str) -> Cow<str> {
let without_fnames = IMG_TAG.replace_all(html, r" ${1}${2}${3} ");
let without_fnames = HTML_TAGS.replace_all(html, r" ${1}${2}${3} ");
let without_html = HTML.replace_all(&without_fnames, "");
// no changes?
if let Cow::Borrowed(b) = without_html {