921e8c7985
Closes #2526
648 lines
19 KiB
Rust
648 lines
19 KiB
Rust
// Copyright: Ankitects Pty Ltd and contributors
|
|
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
|
|
|
use std::borrow::Cow;
|
|
|
|
use lazy_static::lazy_static;
|
|
use percent_encoding_iri::percent_decode_str;
|
|
use percent_encoding_iri::utf8_percent_encode;
|
|
use percent_encoding_iri::AsciiSet;
|
|
use percent_encoding_iri::CONTROLS;
|
|
use regex::Captures;
|
|
use regex::Regex;
|
|
use unicase::eq as uni_eq;
|
|
use unicode_normalization::char::is_combining_mark;
|
|
use unicode_normalization::is_nfc;
|
|
use unicode_normalization::is_nfkd_quick;
|
|
use unicode_normalization::IsNormalized;
|
|
use unicode_normalization::UnicodeNormalization;
|
|
|
|
pub trait Trimming {
|
|
fn trim(self) -> Self;
|
|
}
|
|
|
|
impl Trimming for Cow<'_, str> {
|
|
fn trim(self) -> Self {
|
|
match self {
|
|
Cow::Borrowed(text) => text.trim().into(),
|
|
Cow::Owned(text) => {
|
|
let trimmed = text.as_str().trim();
|
|
if trimmed.len() == text.len() {
|
|
text.into()
|
|
} else {
|
|
trimmed.to_string().into()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(crate) trait CowMapping<'a, B: ?Sized + 'a + ToOwned> {
|
|
/// Returns [self]
|
|
/// - unchanged, if the given function returns [Cow::Borrowed]
|
|
/// - with the new value, if the given function returns [Cow::Owned]
|
|
fn map_cow(self, f: impl FnOnce(&B) -> Cow<B>) -> Self;
|
|
fn get_owned(self) -> Option<B::Owned>;
|
|
}
|
|
|
|
impl<'a, B: ?Sized + 'a + ToOwned> CowMapping<'a, B> for Cow<'a, B> {
|
|
fn map_cow(self, f: impl FnOnce(&B) -> Cow<B>) -> Self {
|
|
if let Cow::Owned(o) = f(&self) {
|
|
Cow::Owned(o)
|
|
} else {
|
|
self
|
|
}
|
|
}
|
|
|
|
fn get_owned(self) -> Option<B::Owned> {
|
|
match self {
|
|
Cow::Borrowed(_) => None,
|
|
Cow::Owned(s) => Some(s),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(crate) fn strip_utf8_bom(s: &str) -> &str {
|
|
s.strip_prefix('\u{feff}').unwrap_or(s)
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub enum AvTag {
|
|
SoundOrVideo(String),
|
|
TextToSpeech {
|
|
field_text: String,
|
|
lang: String,
|
|
voices: Vec<String>,
|
|
speed: f32,
|
|
other_args: Vec<String>,
|
|
},
|
|
}
|
|
|
|
lazy_static! {
|
|
static ref HTML: Regex = Regex::new(concat!(
|
|
"(?si)",
|
|
// wrapped text
|
|
r"(<!--.*?-->)|(<style.*?>.*?</style>)|(<script.*?>.*?</script>)",
|
|
// html tags
|
|
r"|(<.*?>)",
|
|
))
|
|
.unwrap();
|
|
|
|
static ref HTML_LINEBREAK_TAGS: Regex = Regex::new(
|
|
r#"(?xsi)
|
|
</?
|
|
(?:
|
|
br|address|article|aside|blockquote|canvas|dd|div
|
|
|dl|dt|fieldset|figcaption|figure|footer|form
|
|
|h[1-6]|header|hr|li|main|nav|noscript|ol
|
|
|output|p|pre|section|table|tfoot|ul|video
|
|
)
|
|
>
|
|
"#
|
|
).unwrap();
|
|
|
|
pub static ref HTML_MEDIA_TAGS: Regex = Regex::new(
|
|
r#"(?xsi)
|
|
# the start of the image, audio, or object tag
|
|
<\b(?:img|audio|object)\b[^>]+\b(?:src|data)\b=
|
|
(?:
|
|
# 1: double-quoted filename
|
|
"
|
|
([^"]+?)
|
|
"
|
|
[^>]*>
|
|
|
|
|
# 2: single-quoted filename
|
|
'
|
|
([^']+?)
|
|
'
|
|
[^>]*>
|
|
|
|
|
# 3: unquoted filename
|
|
([^ >]+?)
|
|
(?:
|
|
# then either a space and the rest
|
|
\x20[^>]*>
|
|
|
|
|
# or the tag immediately ends
|
|
>
|
|
)
|
|
)
|
|
"#
|
|
).unwrap();
|
|
|
|
// videos are also in sound tags
|
|
static ref AV_TAGS: Regex = Regex::new(
|
|
r#"(?xs)
|
|
\[sound:(.+?)\] # 1 - the filename in a sound tag
|
|
|
|
|
\[anki:tts\]
|
|
\[(.*?)\] # 2 - arguments to tts call
|
|
(.*?) # 3 - field text
|
|
\[/anki:tts\]
|
|
"#).unwrap();
|
|
|
|
static ref PERSISTENT_HTML_SPACERS: Regex = Regex::new(r#"(?i)<br\s*/?>|<div>|\n"#).unwrap();
|
|
|
|
static ref TYPE_TAG: Regex = Regex::new(r"\[\[type:[^]]+\]\]").unwrap();
|
|
pub(crate) static ref SOUND_TAG: Regex = Regex::new(r"\[sound:([^]]+)\]").unwrap();
|
|
|
|
/// Files included in CSS with a leading underscore.
|
|
static ref UNDERSCORED_CSS_IMPORTS: Regex = Regex::new(
|
|
r#"(?xi)
|
|
(?:@import\s+ # import statement with a bare
|
|
"(_[^"]*.css)" # double quoted
|
|
| # or
|
|
'(_[^']*.css)' # single quoted css filename
|
|
)
|
|
| # or
|
|
(?:url\(\s* # a url function with a
|
|
"(_[^"]+)" # double quoted
|
|
| # or
|
|
'(_[^']+)' # single quoted
|
|
| # or
|
|
(_.+) # unquoted filename
|
|
\s*\))
|
|
"#).unwrap();
|
|
|
|
/// Strings, src and data attributes with a leading underscore.
|
|
static ref UNDERSCORED_REFERENCES: Regex = Regex::new(
|
|
r#"(?x)
|
|
\[sound:(_[^]]+)\] # a filename in an Anki sound tag
|
|
| # or
|
|
"(_[^"]+)" # a double quoted
|
|
| # or
|
|
'(_[^']+)' # single quoted string
|
|
| # or
|
|
\b(?:src|data) # a 'src' or 'data' attribute
|
|
= # followed by
|
|
(_[^ >]+) # an unquoted value
|
|
"#).unwrap();
|
|
}
|
|
|
|
pub fn is_html(text: impl AsRef<str>) -> bool {
|
|
HTML.is_match(text.as_ref())
|
|
}
|
|
|
|
pub fn html_to_text_line(html: &str, preserve_media_filenames: bool) -> Cow<str> {
|
|
let (html_stripper, sound_rep): (fn(&str) -> Cow<str>, _) = if preserve_media_filenames {
|
|
(strip_html_preserving_media_filenames, "$1")
|
|
} else {
|
|
(strip_html, "")
|
|
};
|
|
PERSISTENT_HTML_SPACERS
|
|
.replace_all(html, " ")
|
|
.map_cow(|s| TYPE_TAG.replace_all(s, ""))
|
|
.map_cow(|s| SOUND_TAG.replace_all(s, sound_rep))
|
|
.map_cow(html_stripper)
|
|
.trim()
|
|
}
|
|
|
|
pub fn strip_html(html: &str) -> Cow<str> {
|
|
strip_html_preserving_entities(html).map_cow(decode_entities)
|
|
}
|
|
|
|
pub fn strip_html_preserving_entities(html: &str) -> Cow<str> {
|
|
HTML.replace_all(html, "")
|
|
}
|
|
|
|
pub fn decode_entities(html: &str) -> Cow<str> {
|
|
if html.contains('&') {
|
|
match htmlescape::decode_html(html) {
|
|
Ok(text) => text.replace('\u{a0}', " ").into(),
|
|
Err(_) => html.into(),
|
|
}
|
|
} else {
|
|
// nothing to do
|
|
html.into()
|
|
}
|
|
}
|
|
|
|
pub(crate) fn newlines_to_spaces(text: &str) -> Cow<str> {
|
|
if text.contains('\n') {
|
|
text.replace('\n', " ").into()
|
|
} else {
|
|
text.into()
|
|
}
|
|
}
|
|
|
|
pub fn strip_html_for_tts(html: &str) -> Cow<str> {
|
|
HTML_LINEBREAK_TAGS
|
|
.replace_all(html, " ")
|
|
.map_cow(strip_html)
|
|
}
|
|
|
|
/// Truncate a String on a valid UTF8 boundary.
|
|
pub(crate) fn truncate_to_char_boundary(s: &mut String, mut max: usize) {
|
|
if max >= s.len() {
|
|
return;
|
|
}
|
|
while !s.is_char_boundary(max) {
|
|
max -= 1;
|
|
}
|
|
s.truncate(max);
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub(crate) struct MediaRef<'a> {
|
|
pub full_ref: &'a str,
|
|
pub fname: &'a str,
|
|
/// audio files may have things like & that need decoding
|
|
pub fname_decoded: Cow<'a, str>,
|
|
}
|
|
|
|
pub(crate) fn extract_media_refs(text: &str) -> Vec<MediaRef> {
|
|
let mut out = vec![];
|
|
|
|
for caps in HTML_MEDIA_TAGS.captures_iter(text) {
|
|
let fname = caps
|
|
.get(1)
|
|
.or_else(|| caps.get(2))
|
|
.or_else(|| caps.get(3))
|
|
.unwrap()
|
|
.as_str();
|
|
let fname_decoded = decode_entities(fname);
|
|
out.push(MediaRef {
|
|
full_ref: caps.get(0).unwrap().as_str(),
|
|
fname,
|
|
fname_decoded,
|
|
});
|
|
}
|
|
|
|
for caps in AV_TAGS.captures_iter(text) {
|
|
if let Some(m) = caps.get(1) {
|
|
let fname = m.as_str();
|
|
let fname_decoded = decode_entities(fname);
|
|
out.push(MediaRef {
|
|
full_ref: caps.get(0).unwrap().as_str(),
|
|
fname,
|
|
fname_decoded,
|
|
});
|
|
}
|
|
}
|
|
|
|
out
|
|
}
|
|
|
|
/// Calls `replacer` for every media reference in `text`, and optionally
|
|
/// replaces it with something else. [None] if no reference was found.
|
|
pub fn replace_media_refs(
|
|
text: &str,
|
|
mut replacer: impl FnMut(&str) -> Option<String>,
|
|
) -> Option<String> {
|
|
let mut rep = |caps: &Captures| {
|
|
let whole_match = caps.get(0).unwrap().as_str();
|
|
let old_name = caps.iter().skip(1).find_map(|g| g).unwrap().as_str();
|
|
let old_name_decoded = decode_entities(old_name);
|
|
|
|
if let Some(mut new_name) = replacer(&old_name_decoded) {
|
|
if matches!(old_name_decoded, Cow::Owned(_)) {
|
|
new_name = htmlescape::encode_minimal(&new_name);
|
|
}
|
|
whole_match.replace(old_name, &new_name)
|
|
} else {
|
|
whole_match.to_owned()
|
|
}
|
|
};
|
|
|
|
HTML_MEDIA_TAGS
|
|
.replace_all(text, &mut rep)
|
|
.map_cow(|s| AV_TAGS.replace_all(s, &mut rep))
|
|
.get_owned()
|
|
}
|
|
|
|
pub(crate) fn extract_underscored_css_imports(text: &str) -> Vec<&str> {
|
|
UNDERSCORED_CSS_IMPORTS
|
|
.captures_iter(text)
|
|
.map(extract_match)
|
|
.collect()
|
|
}
|
|
|
|
pub(crate) fn extract_underscored_references(text: &str) -> Vec<&str> {
|
|
UNDERSCORED_REFERENCES
|
|
.captures_iter(text)
|
|
.map(extract_match)
|
|
.collect()
|
|
}
|
|
|
|
/// Returns the first matching group as a str. This is intended for regexes
|
|
/// where exactly one group matches, and will panic for matches without matching
|
|
/// groups.
|
|
fn extract_match(caps: Captures) -> &str {
|
|
caps.iter().skip(1).find_map(|g| g).unwrap().as_str()
|
|
}
|
|
|
|
pub fn strip_html_preserving_media_filenames(html: &str) -> Cow<str> {
|
|
HTML_MEDIA_TAGS
|
|
.replace_all(html, r" ${1}${2}${3} ")
|
|
.map_cow(strip_html)
|
|
}
|
|
|
|
pub fn contains_media_tag(html: &str) -> bool {
|
|
HTML_MEDIA_TAGS.is_match(html)
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
pub(crate) fn sanitize_html(html: &str) -> String {
|
|
ammonia::clean(html)
|
|
}
|
|
|
|
pub(crate) fn sanitize_html_no_images(html: &str) -> String {
|
|
ammonia::Builder::default()
|
|
.rm_tags(&["img"])
|
|
.clean(html)
|
|
.to_string()
|
|
}
|
|
|
|
pub(crate) fn normalize_to_nfc(s: &str) -> Cow<str> {
|
|
if !is_nfc(s) {
|
|
s.chars().nfc().collect::<String>().into()
|
|
} else {
|
|
s.into()
|
|
}
|
|
}
|
|
|
|
pub(crate) fn ensure_string_in_nfc(s: &mut String) {
|
|
if !is_nfc(s) {
|
|
*s = s.chars().nfc().collect()
|
|
}
|
|
}
|
|
|
|
/// Convert provided string to NFKD form and strip combining characters.
|
|
pub(crate) fn without_combining(s: &str) -> Cow<str> {
|
|
// if the string is already normalized
|
|
if matches!(is_nfkd_quick(s.chars()), IsNormalized::Yes) {
|
|
// and no combining characters found, return unchanged
|
|
if !s.chars().any(is_combining_mark) {
|
|
return s.into();
|
|
}
|
|
}
|
|
|
|
// we need to create a new string without the combining marks
|
|
s.chars()
|
|
.nfkd()
|
|
.filter(|c| !is_combining_mark(*c))
|
|
.collect::<String>()
|
|
.into()
|
|
}
|
|
|
|
/// Check if string contains an unescaped wildcard.
|
|
pub(crate) fn is_glob(txt: &str) -> bool {
|
|
// even number of \s followed by a wildcard
|
|
lazy_static! {
|
|
static ref RE: Regex = Regex::new(
|
|
r#"(?x)
|
|
(?:^|[^\\]) # not a backslash
|
|
(?:\\\\)* # even number of backslashes
|
|
[*_] # wildcard
|
|
"#
|
|
)
|
|
.unwrap();
|
|
}
|
|
|
|
RE.is_match(txt)
|
|
}
|
|
|
|
/// Convert to a RegEx respecting Anki wildcards.
|
|
pub(crate) fn to_re(txt: &str) -> Cow<str> {
|
|
to_custom_re(txt, ".")
|
|
}
|
|
|
|
/// Convert Anki style to RegEx using the provided wildcard.
|
|
pub(crate) fn to_custom_re<'a>(txt: &'a str, wildcard: &str) -> Cow<'a, str> {
|
|
lazy_static! {
|
|
static ref RE: Regex = Regex::new(r"\\?.").unwrap();
|
|
}
|
|
RE.replace_all(txt, |caps: &Captures| {
|
|
let s = &caps[0];
|
|
match s {
|
|
r"\\" | r"\*" => s.to_string(),
|
|
r"\_" => "_".to_string(),
|
|
"*" => format!("{}*", wildcard),
|
|
"_" => wildcard.to_string(),
|
|
s => regex::escape(s),
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Convert to SQL respecting Anki wildcards.
|
|
pub(crate) fn to_sql(txt: &str) -> Cow<str> {
|
|
// escape sequences and unescaped special characters which need conversion
|
|
lazy_static! {
|
|
static ref RE: Regex = Regex::new(r"\\[\\*]|[*%]").unwrap();
|
|
}
|
|
RE.replace_all(txt, |caps: &Captures| {
|
|
let s = &caps[0];
|
|
match s {
|
|
r"\\" => r"\\",
|
|
r"\*" => "*",
|
|
"*" => "%",
|
|
"%" => r"\%",
|
|
_ => unreachable!(),
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Unescape everything.
|
|
pub(crate) fn to_text(txt: &str) -> Cow<str> {
|
|
lazy_static! {
|
|
static ref RE: Regex = Regex::new(r"\\(.)").unwrap();
|
|
}
|
|
RE.replace_all(txt, "$1")
|
|
}
|
|
|
|
/// Escape Anki wildcards and the backslash for escaping them: \*_
|
|
pub(crate) fn escape_anki_wildcards(txt: &str) -> String {
|
|
lazy_static! {
|
|
static ref RE: Regex = Regex::new(r"[\\*_]").unwrap();
|
|
}
|
|
RE.replace_all(txt, r"\$0").into()
|
|
}
|
|
|
|
/// Escape Anki wildcards unless it's _*
|
|
pub(crate) fn escape_anki_wildcards_for_search_node(txt: &str) -> String {
|
|
if txt == "_*" {
|
|
txt.to_string()
|
|
} else {
|
|
escape_anki_wildcards(txt)
|
|
}
|
|
}
|
|
|
|
/// Return a function to match input against `search`,
|
|
/// which may contain wildcards.
|
|
pub(crate) fn glob_matcher(search: &str) -> impl Fn(&str) -> bool + '_ {
|
|
let mut regex = None;
|
|
let mut cow = None;
|
|
if is_glob(search) {
|
|
regex = Some(Regex::new(&format!("^(?i){}$", to_re(search))).unwrap());
|
|
} else {
|
|
cow = Some(to_text(search));
|
|
}
|
|
|
|
move |text| {
|
|
if let Some(r) = ®ex {
|
|
r.is_match(text)
|
|
} else {
|
|
uni_eq(text, cow.as_ref().unwrap())
|
|
}
|
|
}
|
|
}
|
|
|
|
lazy_static! {
|
|
pub(crate) static ref REMOTE_FILENAME: Regex = Regex::new("(?i)^https?://").unwrap();
|
|
}
|
|
|
|
/// https://url.spec.whatwg.org/#fragment-percent-encode-set
|
|
const FRAGMENT_QUERY_UNION: &AsciiSet = &CONTROLS
|
|
.add(b' ')
|
|
.add(b'"')
|
|
.add(b'<')
|
|
.add(b'>')
|
|
.add(b'`')
|
|
.add(b'#');
|
|
|
|
/// IRI-encode unescaped local paths in HTML fragment.
|
|
pub(crate) fn encode_iri_paths(unescaped_html: &str) -> Cow<str> {
|
|
transform_html_paths(unescaped_html, |fname| {
|
|
utf8_percent_encode(fname, FRAGMENT_QUERY_UNION).into()
|
|
})
|
|
}
|
|
|
|
/// URI-decode escaped local paths in HTML fragment.
|
|
pub(crate) fn decode_iri_paths(escaped_html: &str) -> Cow<str> {
|
|
transform_html_paths(escaped_html, |fname| {
|
|
percent_decode_str(fname).decode_utf8_lossy()
|
|
})
|
|
}
|
|
|
|
/// Apply a transform to local filename references in tags like IMG.
|
|
/// Required at display time, as Anki unfortunately stores the references
|
|
/// in unencoded form in the database.
|
|
fn transform_html_paths<F>(html: &str, transform: F) -> Cow<str>
|
|
where
|
|
F: Fn(&str) -> Cow<str>,
|
|
{
|
|
HTML_MEDIA_TAGS.replace_all(html, |caps: &Captures| {
|
|
let fname = caps
|
|
.get(1)
|
|
.or_else(|| caps.get(2))
|
|
.or_else(|| caps.get(3))
|
|
.unwrap()
|
|
.as_str();
|
|
let full = caps.get(0).unwrap().as_str();
|
|
if REMOTE_FILENAME.is_match(fname) {
|
|
full.into()
|
|
} else {
|
|
full.replace(fname, &transform(fname))
|
|
}
|
|
})
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use std::borrow::Cow;
|
|
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn stripping() {
|
|
assert_eq!(strip_html("test"), "test");
|
|
assert_eq!(strip_html("t<b>e</b>st"), "test");
|
|
assert_eq!(strip_html("so<SCRIPT>t<b>e</b>st</script>me"), "some");
|
|
|
|
assert_eq!(
|
|
strip_html_preserving_media_filenames("<img src=foo.jpg>"),
|
|
" foo.jpg "
|
|
);
|
|
assert_eq!(
|
|
strip_html_preserving_media_filenames("<img src='foo.jpg'><html>"),
|
|
" foo.jpg "
|
|
);
|
|
assert_eq!(strip_html_preserving_media_filenames("<html>"), "");
|
|
}
|
|
|
|
#[test]
|
|
fn combining() {
|
|
assert!(matches!(without_combining("test"), Cow::Borrowed(_)));
|
|
assert!(matches!(without_combining("Über"), Cow::Owned(_)));
|
|
}
|
|
|
|
#[test]
|
|
fn conversion() {
|
|
assert_eq!(&to_re(r"[te\*st]"), r"\[te\*st\]");
|
|
assert_eq!(&to_custom_re("f_o*", r"\d"), r"f\do\d*");
|
|
assert_eq!(&to_sql("%f_o*"), r"\%f_o%");
|
|
assert_eq!(&to_text(r"\*\_*_"), "*_*_");
|
|
assert!(is_glob(r"\\\\_"));
|
|
assert!(!is_glob(r"\\\_"));
|
|
assert!(glob_matcher(r"foo\*bar*")("foo*bar123"));
|
|
}
|
|
|
|
#[test]
|
|
fn extracting() {
|
|
assert_eq!(
|
|
extract_underscored_css_imports(concat!(
|
|
"@IMPORT '_foo.css'\n",
|
|
"@import \"_bar.css\"\n",
|
|
"@import '_baz.css'\n",
|
|
"@import 'nope.css'\n",
|
|
"url(_foo.css)\n",
|
|
"URL(\"_bar.css\")\n",
|
|
"@import url('_baz.css')\n",
|
|
"url('nope.css')\n",
|
|
)),
|
|
vec!["_foo.css", "_bar.css", "_baz.css", "_foo.css", "_bar.css", "_baz.css",]
|
|
);
|
|
assert_eq!(
|
|
extract_underscored_references(concat!(
|
|
"<img src=\"_foo.jpg\">",
|
|
"<object data=\"_bar\">",
|
|
"\"_baz.js\"",
|
|
"\"nope.js\"",
|
|
"<img src=_foo.jpg>",
|
|
"<object data=_bar>",
|
|
"'_baz.js'",
|
|
)),
|
|
vec!["_foo.jpg", "_bar", "_baz.js", "_foo.jpg", "_bar", "_baz.js",]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn replacing() {
|
|
assert_eq!(
|
|
&replace_media_refs("<img src=foo.jpg>[sound:bar.mp3]<img src=baz.jpg>", |s| {
|
|
(s != "baz.jpg").then(|| "spam".to_string())
|
|
})
|
|
.unwrap(),
|
|
"<img src=spam>[sound:spam]<img src=baz.jpg>",
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn truncate() {
|
|
let mut s = "日本語".to_string();
|
|
truncate_to_char_boundary(&mut s, 6);
|
|
assert_eq!(&s, "日本");
|
|
let mut s = "日本語".to_string();
|
|
truncate_to_char_boundary(&mut s, 1);
|
|
assert_eq!(&s, "");
|
|
}
|
|
|
|
#[test]
|
|
fn iri_encoding() {
|
|
for (input, output) in [
|
|
("foo.jpg", "foo.jpg"),
|
|
("bar baz", "bar%20baz"),
|
|
("sub/path.jpg", "sub/path.jpg"),
|
|
("日本語", "日本語"),
|
|
("a=b", "a=b"),
|
|
("a&b", "a&b"),
|
|
] {
|
|
assert_eq!(
|
|
&encode_iri_paths(&format!("<img src=\"{input}\">")),
|
|
&format!("<img src=\"{output}\">")
|
|
);
|
|
}
|
|
}
|
|
}
|