diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index a581234e5..a1ebd645f 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -65,7 +65,7 @@ lukkea
David Allison
Tsung-Han Yu
Piotr Kubowicz
-RumovZ
+RumovZ
Cecini
Krish Shah
ianki
diff --git a/pylib/tests/test_find.py b/pylib/tests/test_find.py
index d9b7f76f2..e7adfafec 100644
--- a/pylib/tests/test_find.py
+++ b/pylib/tests/test_find.py
@@ -48,8 +48,10 @@ def test_findCards():
# tag searches
assert len(col.findCards("tag:*")) == 5
assert len(col.findCards("tag:\\*")) == 1
- assert len(col.findCards("tag:%")) == 5
- assert len(col.findCards("tag:\\%")) == 1
+ assert len(col.findCards("tag:%")) == 1
+ assert len(col.findCards("tag:sheep_goat")) == 0
+ assert len(col.findCards('"tag:sheep goat"')) == 0
+ assert len(col.findCards('"tag:* *"')) == 0
assert len(col.findCards("tag:animal_1")) == 2
assert len(col.findCards("tag:animal\\_1")) == 1
assert not col.findCards("tag:donkey")
diff --git a/qt/aqt/browser.py b/qt/aqt/browser.py
index a8ba379c2..967af003c 100644
--- a/qt/aqt/browser.py
+++ b/qt/aqt/browser.py
@@ -1207,7 +1207,7 @@ QTableView {{ gridline-color: {grid} }}
if i % 2 == 0:
txt += a + ":"
else:
- txt += re.sub("[*%_]", r"\\\g<0>", a)
+ txt += re.sub(r"[*_\\]", r"\\\g<0>", a)
for c in ' ()"':
if c in txt:
txt = '"{}"'.format(txt.replace('"', '\\"'))
diff --git a/rslib/src/search/parser.rs b/rslib/src/search/parser.rs
index 6d3cd038c..3fb20c3e7 100644
--- a/rslib/src/search/parser.rs
+++ b/rslib/src/search/parser.rs
@@ -9,17 +9,15 @@ use crate::{
use lazy_static::lazy_static;
use nom::{
branch::alt,
- bytes::complete::{escaped, is_not, tag, take_while1},
- character::complete::{anychar, char, one_of},
- combinator::{all_consuming, map, map_res},
- sequence::{delimited, preceded, tuple},
+ bytes::complete::{escaped, is_not, tag},
+ character::complete::{anychar, char, none_of, one_of},
+ combinator::{all_consuming, map, map_res, verify},
+ sequence::{delimited, preceded, separated_pair},
{multi::many0, IResult},
};
-use regex::Regex;
+use regex::{Captures, Regex};
use std::{borrow::Cow, num};
-// fixme: need to preserve \ when used twice in string
-
struct ParseError {}
impl From for ParseError {
@@ -63,7 +61,7 @@ pub(super) enum SearchNode<'a> {
},
AddedInDays(u32),
EditedInDays(u32),
- CardTemplate(TemplateKind),
+ CardTemplate(TemplateKind<'a>),
Deck(Cow<'a, str>),
DeckID(DeckID),
NoteTypeID(NoteTypeID),
@@ -75,12 +73,12 @@ pub(super) enum SearchNode<'a> {
Tag(Cow<'a, str>),
Duplicates {
note_type_id: NoteTypeID,
- text: String,
+ text: Cow<'a, str>,
},
State(StateKind),
Flag(u8),
- NoteIDs(Cow<'a, str>),
- CardIDs(Cow<'a, str>),
+ NoteIDs(&'a str),
+ CardIDs(&'a str),
Property {
operator: String,
kind: PropertyKind,
@@ -113,9 +111,9 @@ pub(super) enum StateKind {
}
#[derive(Debug, PartialEq)]
-pub(super) enum TemplateKind {
+pub(super) enum TemplateKind<'a> {
Ordinal(u16),
- Name(String),
+ Name(Cow<'a, str>),
}
/// Parse the input string into a list of nodes.
@@ -127,7 +125,6 @@ pub(super) fn parse(input: &str) -> Result> {
let (_, nodes) =
all_consuming(group_inner)(input).map_err(|_e| AnkiError::SearchError(None))?;
-
Ok(nodes)
}
@@ -205,32 +202,21 @@ fn text(s: &str) -> IResult<&str, Node> {
/// Determine if text is a qualified search, and handle escaped chars.
fn search_node_for_text(s: &str) -> ParseResult {
- let mut it = s.splitn(2, ':');
- let (head, tail) = (
- unescape_quotes(it.next().unwrap()),
- it.next().map(unescape_quotes),
- );
-
- if let Some(tail) = tail {
- search_node_for_text_with_argument(head, tail)
+ let (tail, head) = escaped(is_not(r":\"), '\\', anychar)(s)?;
+ if tail.is_empty() {
+ Ok(SearchNode::UnqualifiedText(unescape(head)?))
} else {
- Ok(SearchNode::UnqualifiedText(head))
+ search_node_for_text_with_argument(head, &tail[1..])
}
}
-/// \" -> "
-fn unescape_quotes(s: &str) -> Cow {
- if s.find(r#"\""#).is_some() {
- s.replace(r#"\""#, "\"").into()
- } else {
- s.into()
- }
-}
-
-/// Unquoted text, terminated by a space or )
+/// Unquoted text, terminated by whitespace or unescaped ", ( or )
fn unquoted_term(s: &str) -> IResult<&str, Node> {
map_res(
- take_while1(|c| c != ' ' && c != ')' && c != '"'),
+ verify(
+ escaped(is_not("\"() \u{3000}\\"), '\\', none_of(" \u{3000}")),
+ |s: &str| !s.is_empty(),
+ ),
|text: &str| -> ParseResult {
Ok(if text.eq_ignore_ascii_case("or") {
Node::Or
@@ -256,57 +242,66 @@ fn quoted_term_str(s: &str) -> IResult<&str, &str> {
/// Quoted text, terminated by a non-escaped double quote
fn quoted_term_inner(s: &str) -> IResult<&str, &str> {
- escaped(is_not(r#""\"#), '\\', anychar)(s)
+ verify(escaped(is_not(r#""\"#), '\\', anychar), |s: &str| {
+ !s.is_empty()
+ })(s)
}
/// eg deck:"foo bar" - quotes must come after the :
fn partially_quoted_term(s: &str) -> IResult<&str, Node> {
- let term = take_while1(|c| c != ' ' && c != ')' && c != ':');
- let (s, (term, _, quoted_val)) = tuple((term, char(':'), quoted_term_str))(s)?;
- let quoted_val = unescape_quotes(quoted_val);
-
- match search_node_for_text_with_argument(term.into(), quoted_val) {
- Ok(search) => Ok((s, Node::Search(search))),
- Err(_) => Err(nom::Err::Failure((s, nom::error::ErrorKind::NoneOf))),
- }
+ map_res(
+ separated_pair(
+ verify(
+ escaped(is_not("\"(): \u{3000}\\"), '\\', none_of(": \u{3000}")),
+ |s: &str| !s.is_empty(),
+ ),
+ char(':'),
+ quoted_term_str,
+ ),
+ |p| match search_node_for_text_with_argument(p.0, p.1) {
+ Ok(search) => Ok(Node::Search(search)),
+ Err(e) => Err(e),
+ },
+ )(s)
}
/// Convert a colon-separated key/val pair into the relevant search type.
fn search_node_for_text_with_argument<'a>(
- key: Cow<'a, str>,
- val: Cow<'a, str>,
+ key: &'a str,
+ val: &'a str,
) -> ParseResult> {
Ok(match key.to_ascii_lowercase().as_str() {
"added" => SearchNode::AddedInDays(val.parse()?),
"edited" => SearchNode::EditedInDays(val.parse()?),
- "deck" => SearchNode::Deck(val),
- "note" => SearchNode::NoteType(val),
- "tag" => SearchNode::Tag(val),
+ "deck" => SearchNode::Deck(unescape(val)?),
+ "note" => SearchNode::NoteType(unescape(val)?),
+ "tag" => SearchNode::Tag(unescape(val)?),
"mid" => SearchNode::NoteTypeID(val.parse()?),
"nid" => SearchNode::NoteIDs(check_id_list(val)?),
"cid" => SearchNode::CardIDs(check_id_list(val)?),
"did" => SearchNode::DeckID(val.parse()?),
- "card" => parse_template(val.as_ref()),
- "is" => parse_state(val.as_ref())?,
- "flag" => parse_flag(val.as_ref())?,
- "rated" => parse_rated(val.as_ref())?,
- "dupe" => parse_dupes(val.as_ref())?,
- "prop" => parse_prop(val.as_ref())?,
- "re" => SearchNode::Regex(val),
- "nc" => SearchNode::NoCombining(val),
- "w" => SearchNode::WordBoundary(val),
+ "card" => parse_template(val)?,
+ "is" => parse_state(val)?,
+ "flag" => parse_flag(val)?,
+ "rated" => parse_rated(val)?,
+ "dupe" => parse_dupes(val)?,
+ "prop" => parse_prop(val)?,
+ "re" => SearchNode::Regex(unescape_quotes(val)),
+ "r" => SearchNode::UnqualifiedText(unescape_raw(val)),
+ "nc" => SearchNode::NoCombining(unescape(val)?),
+ "w" => SearchNode::WordBoundary(unescape(val)?),
// anything else is a field search
- _ => parse_single_field(key.as_ref(), val.as_ref()),
+ _ => parse_single_field(key, val)?,
})
}
/// ensure a list of ids contains only numbers and commas, returning unchanged if true
/// used by nid: and cid:
-fn check_id_list(s: Cow) -> ParseResult> {
+fn check_id_list(s: &str) -> ParseResult<&str> {
lazy_static! {
static ref RE: Regex = Regex::new(r"^(\d+,)*\d+$").unwrap();
}
- if RE.is_match(s.as_ref()) {
+ if RE.is_match(s) {
Ok(s)
} else {
Err(ParseError {})
@@ -360,13 +355,13 @@ fn parse_rated(val: &str) -> ParseResult> {
}
/// eg dupes:1231,hello
-fn parse_dupes(val: &str) -> ParseResult> {
+fn parse_dupes(val: &str) -> ParseResult {
let mut it = val.splitn(2, ',');
let mid: NoteTypeID = it.next().unwrap().parse()?;
let text = it.next().ok_or(ParseError {})?;
Ok(SearchNode::Duplicates {
note_type_id: mid,
- text: text.into(),
+ text: unescape_quotes(text),
})
}
@@ -411,27 +406,116 @@ fn parse_prop(val: &str) -> ParseResult> {
})
}
-fn parse_template(val: &str) -> SearchNode<'static> {
- SearchNode::CardTemplate(match val.parse::() {
+fn parse_template(val: &str) -> ParseResult {
+ Ok(SearchNode::CardTemplate(match val.parse::() {
Ok(n) => TemplateKind::Ordinal(n.max(1) - 1),
- Err(_) => TemplateKind::Name(val.into()),
+ Err(_) => TemplateKind::Name(unescape(val)?),
+ }))
+}
+
+fn parse_single_field<'a>(key: &'a str, val: &'a str) -> ParseResult> {
+ Ok(if val.starts_with("re:") {
+ SearchNode::SingleField {
+ field: unescape(key)?,
+ text: unescape_quotes(&val[3..]),
+ is_re: true,
+ }
+ } else if val.starts_with("r:") {
+ SearchNode::SingleField {
+ field: unescape(key)?,
+ text: unescape_raw(&val[2..]),
+ is_re: false,
+ }
+ } else {
+ SearchNode::SingleField {
+ field: unescape(key)?,
+ text: unescape(val)?,
+ is_re: false,
+ }
})
}
-fn parse_single_field(key: &str, mut val: &str) -> SearchNode<'static> {
- let is_re = if val.starts_with("re:") {
- val = val.trim_start_matches("re:");
- true
+/// For strings without unescaped ", convert \" to "
+fn unescape_quotes(s: &str) -> Cow {
+ if s.contains('"') {
+ s.replace(r#"\""#, "\"").into()
} else {
- false
- };
- SearchNode::SingleField {
- field: key.to_string().into(),
- text: val.to_string().into(),
- is_re,
+ s.into()
}
}
+/// Unescape quotes but escape wildcards and \s.
+fn unescape_raw(s: &str) -> Cow {
+ lazy_static! {
+ static ref RE: Regex = Regex::new(r#"\\"?|\*|_"#).unwrap();
+ }
+ RE.replace_all(&s, |caps: &Captures| match &caps[0] {
+ r"\" => r"\\",
+ "\\\"" => "\"",
+ r"*" => r"\*",
+ r"_" => r"\_",
+ _ => unreachable!(),
+ })
+}
+
+/// Unescape chars with special meaning to the parser.
+fn unescape(txt: &str) -> ParseResult> {
+ if is_invalid_escape(txt) {
+ Err(ParseError {})
+ } else if is_parser_escape(txt) {
+ lazy_static! {
+ static ref RE: Regex = Regex::new(r#"\\[\\":()-]"#).unwrap();
+ }
+ Ok(RE.replace_all(&txt, |caps: &Captures| match &caps[0] {
+ r"\\" => r"\\",
+ "\\\"" => "\"",
+ r"\:" => ":",
+ r"\(" => "(",
+ r"\)" => ")",
+ r"\-" => "-",
+ _ => unreachable!(),
+ }))
+ } else {
+ Ok(txt.into())
+ }
+}
+
+/// Check string for invalid escape sequences.
+fn is_invalid_escape(txt: &str) -> bool {
+ // odd number of \s not followed by an escapable character
+ lazy_static! {
+ static ref RE: Regex = Regex::new(
+ r#"(?x)
+ (?:^|[^\\]) # not a backslash
+ (?:\\\\)* # even number of backslashes
+ \\ # single backslash
+ (?:[^\\":*_()-]|$) # anything but an escapable char
+ "#
+ )
+ .unwrap();
+ }
+
+ RE.is_match(txt)
+}
+
+/// Check string for escape sequences handled by the parser: ":()
+fn is_parser_escape(txt: &str) -> bool {
+ // odd number of \s followed by a char with special meaning to the parser
+ lazy_static! {
+ static ref RE: Regex = Regex::new(
+ r#"(?x)
+ (?:^|[^\\]) # not a backslash
+ (?:\\\\)* # even number of backslashes
+ \\ # single backslash
+ [":()-] # parser escape
+ "#
+ )
+ .unwrap();
+ }
+
+ RE.is_match(txt)
+}
+
#[cfg(test)]
mod test {
use super::*;
@@ -497,7 +581,7 @@ mod test {
})]
);
- // partially quoted text should handle escaping the same way
+ // escaping is independent of quotation
assert_eq!(
parse(r#""field:va\"lue""#)?,
vec![Search(SingleField {
@@ -507,13 +591,67 @@ mod test {
})]
);
assert_eq!(parse(r#""field:va\"lue""#)?, parse(r#"field:"va\"lue""#)?,);
+ assert_eq!(parse(r#""field:va\"lue""#)?, parse(r#"field:va\"lue"#)?,);
- // any character should be escapable in quotes
+ // only \":()-*_ are escapable
+ assert!(parse(r"\").is_err());
+ assert!(parse(r"\a").is_err());
+ assert!(parse(r"\%").is_err());
+
+ // parser unescapes ":()-
assert_eq!(
- parse(r#""re:\btest""#)?,
- vec![Search(Regex(r"\btest".into()))]
+ parse(r#"\"\:\(\)\-"#)?,
+ vec![Search(UnqualifiedText(r#"":()-"#.into())),]
);
+ // parser doesn't unescape unescape \*_
+ assert_eq!(
+ parse(r#"\\\*\_"#)?,
+ vec![Search(UnqualifiedText(r#"\\\*\_"#.into())),]
+ );
+
+ // escaping parentheses is optional (only) inside quotes
+ assert_eq!(parse(r#""\)\(""#), parse(r#"")(""#));
+ assert!(parse(")(").is_err());
+
+ // escaping : is optional if it is preceded by another :
+ assert!(parse(":test").is_err());
+ assert!(parse(":").is_err());
+ assert_eq!(parse("field:val:ue"), parse(r"field:val\:ue"));
+ assert_eq!(parse(r#""field:val:ue""#), parse(r"field:val\:ue"));
+ assert_eq!(parse(r#"field:"val:ue""#), parse(r"field:val\:ue"));
+
+ // escaping - is optional if it cannot be mistaken for a negator
+ assert_eq!(parse("-"), parse(r"\-"));
+ assert_eq!(parse("A-"), parse(r"A\-"));
+ assert_eq!(parse(r#""-A""#), parse(r"\-A"));
+ assert_ne!(parse("-A"), parse(r"\-A"));
+
+ // any character should be escapable on the right side of re:
+ assert_eq!(
+ parse(r#""re:\btest\%""#)?,
+ vec![Search(Regex(r"\btest\%".into()))]
+ );
+
+ // treat all chars as literals in raw searches
+ assert_eq!(parse(r"r:\*_"), parse(r"\\\*\_"));
+ assert_eq!(parse(r"field:r:\*_"), parse(r"field:\\\*\_"));
+
+ // no exceptions for escaping "
+ assert_eq!(
+ parse(r#"re:te\"st"#)?,
+ vec![Search(Regex(r#"te"st"#.into()))]
+ );
+ assert!(parse(r#"re:te"st"#).is_err());
+ assert_eq!(
+ parse(r#"r:te\"st"#)?,
+ vec![Search(UnqualifiedText(r#"te"st"#.into()))]
+ );
+ assert!(parse(r#"r:te"st"#).is_err());
+
+ // spaces are optional if node separation is clear
+ assert_eq!(parse(r#"a"b"(c)"#)?, parse("a b (c)")?);
+
assert_eq!(parse("added:3")?, vec![Search(AddedInDays(3))]);
assert_eq!(
parse("card:front")?,
diff --git a/rslib/src/search/sqlwriter.rs b/rslib/src/search/sqlwriter.rs
index b678bbd33..5137255a2 100644
--- a/rslib/src/search/sqlwriter.rs
+++ b/rslib/src/search/sqlwriter.rs
@@ -9,12 +9,12 @@ use crate::{
err::Result,
notes::field_checksum,
notetype::NoteTypeID,
- text::{matches_wildcard, text_to_re},
- text::{normalize_to_nfc, strip_html_preserving_media_filenames, without_combining},
+ text::{
+ escape_sql, is_glob, matches_glob, normalize_to_nfc, strip_html_preserving_media_filenames,
+ to_custom_re, to_re, to_sql, to_text, without_combining,
+ },
timestamp::TimestampSecs,
};
-use lazy_static::lazy_static;
-use regex::Regex;
use std::{borrow::Cow, fmt::Write};
pub(crate) struct SqlWriter<'a> {
@@ -119,7 +119,7 @@ impl SqlWriter<'_> {
// note fields related
SearchNode::UnqualifiedText(text) => self.write_unqualified(&self.norm_note(text)),
SearchNode::SingleField { field, text, is_re } => {
- self.write_single_field(field.as_ref(), &self.norm_note(text), *is_re)?
+ self.write_single_field(&norm(field), &self.norm_note(text), *is_re)?
}
SearchNode::Duplicates { note_type_id, text } => {
self.write_dupes(*note_type_id, &self.norm_note(text))
@@ -132,11 +132,9 @@ impl SqlWriter<'_> {
SearchNode::AddedInDays(days) => self.write_added(*days)?,
SearchNode::EditedInDays(days) => self.write_edited(*days)?,
SearchNode::CardTemplate(template) => match template {
- TemplateKind::Ordinal(_) => {
- self.write_template(template)?;
- }
+ TemplateKind::Ordinal(_) => self.write_template(template)?,
TemplateKind::Name(name) => {
- self.write_template(&TemplateKind::Name(norm(name).into()))?;
+ self.write_template(&TemplateKind::Name(norm(name).into()))?
}
},
SearchNode::Deck(deck) => self.write_deck(&norm(deck))?,
@@ -148,6 +146,7 @@ impl SqlWriter<'_> {
}
SearchNode::NoteType(notetype) => self.write_note_type(&norm(notetype))?,
SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?,
+
SearchNode::Tag(tag) => self.write_tag(&norm(tag))?,
SearchNode::State(state) => self.write_state(state)?,
SearchNode::Flag(flag) => {
@@ -167,7 +166,7 @@ impl SqlWriter<'_> {
fn write_unqualified(&mut self, text: &str) {
// implicitly wrap in %
- let text = format!("%{}%", convert_glob_char(text));
+ let text = format!("%{}%", &to_sql(text));
self.args.push(text);
write!(
self.sql,
@@ -178,7 +177,7 @@ impl SqlWriter<'_> {
}
fn write_no_combining(&mut self, text: &str) {
- let text = format!("%{}%", without_combining(text));
+ let text = format!("%{}%", without_combining(&to_sql(text)));
self.args.push(text);
write!(
self.sql,
@@ -192,27 +191,27 @@ impl SqlWriter<'_> {
}
fn write_tag(&mut self, text: &str) -> Result<()> {
- match text {
- "none" => {
- write!(self.sql, "n.tags = ''").unwrap();
- }
- "*" | "%" => {
- write!(self.sql, "true").unwrap();
- }
- text => {
- if let Some(re_glob) = glob_to_re(text) {
- // text contains a wildcard
- let re_glob = format!("(?i).* {} .*", re_glob);
- write!(self.sql, "n.tags regexp ?").unwrap();
- self.args.push(re_glob);
- } else if let Some(tag) = self.col.storage.preferred_tag_case(&text)? {
- write!(self.sql, "n.tags like ?").unwrap();
- self.args.push(format!("% {} %", tag));
- } else {
- write!(self.sql, "false").unwrap();
+ if text.contains(" ") {
+ write!(self.sql, "false").unwrap();
+ } else {
+ match text {
+ "none" => write!(self.sql, "n.tags = ''").unwrap(),
+ "*" => write!(self.sql, "true").unwrap(),
+ s => {
+ if is_glob(s) {
+ write!(self.sql, "n.tags regexp ?").unwrap();
+ let re = &to_custom_re(s, r"\S");
+ self.args.push(format!("(?i).* {} .*", re));
+ } else if let Some(tag) = self.col.storage.preferred_tag_case(&to_text(s))? {
+ write!(self.sql, "n.tags like ? escape '\\'").unwrap();
+ self.args.push(format!("% {} %", escape_sql(&tag)));
+ } else {
+ write!(self.sql, "false").unwrap();
+ }
}
}
}
+
Ok(())
}
@@ -312,18 +311,20 @@ impl SqlWriter<'_> {
// rewrite "current" to the current deck name
let native_deck = if deck == "current" {
let current_did = self.col.get_current_deck_id();
- self.col
- .storage
- .get_deck(current_did)?
- .map(|d| d.name)
- .unwrap_or_else(|| "Default".into())
+ regex::escape(
+ self.col
+ .storage
+ .get_deck(current_did)?
+ .map(|d| d.name)
+ .unwrap_or_else(|| "Default".into())
+ .as_str(),
+ )
} else {
- human_deck_name_to_native(deck)
+ human_deck_name_to_native(&to_re(deck))
};
// convert to a regex that includes child decks
- let re = text_to_re(&native_deck);
- self.args.push(format!("(?i)^{}($|\x1f)", re));
+ self.args.push(format!("(?i)^{}($|\x1f)", native_deck));
let arg_idx = self.args.len();
self.sql.push_str(&format!(concat!(
"(c.did in (select id from decks where name regexp ?{n})",
@@ -341,8 +342,8 @@ impl SqlWriter<'_> {
write!(self.sql, "c.ord = {}", n).unwrap();
}
TemplateKind::Name(name) => {
- if let Some(re) = glob_to_re(name) {
- let re = format!("(?i){}", re);
+ if is_glob(name) {
+ let re = format!("(?i){}", to_re(name));
self.sql.push_str(
"(n.mid,c.ord) in (select ntid,ord from templates where name regexp ?)",
);
@@ -351,7 +352,7 @@ impl SqlWriter<'_> {
self.sql.push_str(
"(n.mid,c.ord) in (select ntid,ord from templates where name = ?)",
);
- self.args.push(name.to_string());
+ self.args.push(to_text(name).into());
}
}
};
@@ -359,15 +360,15 @@ impl SqlWriter<'_> {
}
fn write_note_type(&mut self, nt_name: &str) -> Result<()> {
- if let Some(re) = glob_to_re(nt_name) {
- let re = format!("(?i){}", re);
+ if is_glob(nt_name) {
+ let re = format!("(?i){}", to_re(nt_name));
self.sql
.push_str("n.mid in (select id from notetypes where name regexp ?)");
self.args.push(re);
} else {
self.sql
.push_str("n.mid in (select id from notetypes where name = ?)");
- self.args.push(nt_name.to_string());
+ self.args.push(to_text(nt_name).into());
}
Ok(())
}
@@ -378,7 +379,7 @@ impl SqlWriter<'_> {
let mut field_map = vec![];
for nt in note_types.values() {
for field in &nt.fields {
- if matches_wildcard(&field.name, field_name) {
+ if matches_glob(&field.name, field_name) {
field_map.push((nt.id, field.ord));
}
}
@@ -401,7 +402,7 @@ impl SqlWriter<'_> {
} else {
cmp = "like";
cmp_trailer = "escape '\\'";
- self.args.push(convert_glob_char(val).into())
+ self.args.push(to_sql(val).into())
}
let arg_idx = self.args.len();
@@ -455,29 +456,10 @@ impl SqlWriter<'_> {
}
fn write_word_boundary(&mut self, word: &str) {
- // fixme: need to escape in the no-glob case as well
- let re = text_to_re(word);
- self.write_regex(&format!(r"\b{}\b", re))
+ self.write_regex(&format!(r"\b{}\b", to_re(word)));
}
}
-/// Replace * with %, leaving \* alone.
-fn convert_glob_char(val: &str) -> Cow {
- lazy_static! {
- static ref RE: Regex = Regex::new(r"(^|[^\\])\*").unwrap();
- }
- RE.replace_all(val, "${1}%")
-}
-
-/// Convert a string with _, % or * characters into a regex.
-/// If string contains no globbing characters, return None.
-fn glob_to_re(glob: &str) -> Option {
- if !glob.contains(|c| c == '_' || c == '*' || c == '%') {
- return None;
- }
- Some(text_to_re(glob))
-}
-
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum RequiredTable {
Notes,
@@ -601,10 +583,9 @@ mod test {
vec!["%te%st%".into()]
)
);
- assert_eq!(s(ctx, "te%st").1, vec!["%te%st%".to_string()]);
- // user should be able to escape sql wildcards
- assert_eq!(s(ctx, r#"te\%s\_t"#).1, vec!["%te\\%s\\_t%".to_string()]);
- assert_eq!(s(ctx, r#"te\*s\_t"#).1, vec!["%te\\*s\\_t%".to_string()]);
+ assert_eq!(s(ctx, "te%st").1, vec![r"%te\%st%".to_string()]);
+ // user should be able to escape wildcards
+ assert_eq!(s(ctx, r#"te\*s\_t"#).1, vec!["%te*s\\_t%".to_string()]);
// qualified search
assert_eq!(
@@ -673,23 +654,26 @@ mod test {
)
);
- // unregistered tag short circuits
- assert_eq!(s(ctx, r"tag:one"), ("(false)".into(), vec![]));
-
// if registered, searches with canonical
ctx.transact(None, |col| col.register_tag("One", Usn(-1)))
.unwrap();
assert_eq!(
s(ctx, r"tag:one"),
- ("(n.tags like ?)".into(), vec![r"% One %".into()])
+ (
+ "(n.tags like ? escape '\\')".into(),
+ vec![r"% One %".into()]
+ )
);
+ // unregistered tags without wildcards won't match
+ assert_eq!(s(ctx, "tag:unknown"), ("(false)".into(), vec![]));
+
// wildcards force a regexp search
assert_eq!(
- s(ctx, r"tag:o*n\*et%w\%oth_re\_e"),
+ s(ctx, r"tag:o*n\*et%w%oth_re\_e"),
(
"(n.tags regexp ?)".into(),
- vec![r"(?i).* o.*n\*et.*w%oth.re_e .*".into()]
+ vec![r"(?i).* o\S*n\*et%w%oth\Sre_e .*".into()]
)
);
assert_eq!(s(ctx, "tag:none"), ("(n.tags = '')".into(), vec![]));
@@ -803,12 +787,4 @@ mod test {
RequiredTable::Notes
);
}
-
- #[test]
- fn convert_glob() {
- assert_eq!(&convert_glob_char("foo*bar"), "foo%bar");
- assert_eq!(&convert_glob_char("*bar"), "%bar");
- assert_eq!(&convert_glob_char("\n*bar"), "\n%bar");
- assert_eq!(&convert_glob_char(r"\*bar"), r"\*bar");
- }
}
diff --git a/rslib/src/text.rs b/rslib/src/text.rs
index f29d251dd..934fa64c0 100644
--- a/rslib/src/text.rs
+++ b/rslib/src/text.rs
@@ -240,17 +240,6 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) {
}
}
-/// True if search is equal to text, folding case.
-/// Supports '*' to match 0 or more characters.
-pub(crate) fn matches_wildcard(text: &str, search: &str) -> bool {
- if search.contains('*') {
- let search = format!("^(?i){}$", regex::escape(search).replace(r"\*", ".*"));
- Regex::new(&search).unwrap().is_match(text)
- } else {
- uni_eq(text, search)
- }
-}
-
/// Convert provided string to NFKD form and strip combining characters.
pub(crate) fn without_combining(s: &str) -> Cow {
// if the string is already normalized
@@ -301,9 +290,91 @@ pub(crate) fn text_to_re(glob: &str) -> String {
text2.into()
}
+/// Check if string contains an unescaped wildcard.
+pub(crate) fn is_glob(txt: &str) -> bool {
+ // even number of \s followed by a wildcard
+ lazy_static! {
+ static ref RE: Regex = Regex::new(
+ r#"(?x)
+ (?:^|[^\\]) # not a backslash
+ (?:\\\\)* # even number of backslashes
+ [*_] # wildcard
+ "#
+ )
+ .unwrap();
+ }
+
+ RE.is_match(txt)
+}
+
+/// Convert to a RegEx respecting Anki wildcards.
+pub(crate) fn to_re(txt: &str) -> Cow {
+ to_custom_re(txt, ".")
+}
+
+/// Convert Anki style to RegEx using the provided wildcard.
+pub(crate) fn to_custom_re<'a>(txt: &'a str, wildcard: &str) -> Cow<'a, str> {
+ lazy_static! {
+ static ref RE: Regex = Regex::new(r"\\?.").unwrap();
+ }
+ RE.replace_all(&txt, |caps: &Captures| {
+ let s = &caps[0];
+ match s {
+ r"\\" | r"\*" => s.to_string(),
+ r"\_" => "_".to_string(),
+ "*" => format!("{}*", wildcard),
+ "_" => wildcard.to_string(),
+ s => regex::escape(s),
+ }
+ })
+}
+
+/// Convert to SQL respecting Anki wildcards.
+pub(crate) fn to_sql<'a>(txt: &'a str) -> Cow<'a, str> {
+ // escape sequences and unescaped special characters which need conversion
+ lazy_static! {
+ static ref RE: Regex = Regex::new(r"\\[\\*]|[*%]").unwrap();
+ }
+ RE.replace_all(&txt, |caps: &Captures| {
+ let s = &caps[0];
+ match s {
+ r"\\" => r"\\",
+ r"\*" => "*",
+ "*" => "%",
+ "%" => r"\%",
+ _ => unreachable!(),
+ }
+ })
+}
+
+/// Unescape everything.
+pub(crate) fn to_text(txt: &str) -> Cow {
+ lazy_static! {
+ static ref RE: Regex = Regex::new(r"\\(.)").unwrap();
+ }
+ RE.replace_all(&txt, "$1")
+}
+
+/// Escape characters special to SQL: \%_
+pub(crate) fn escape_sql(txt: &str) -> Cow {
+ lazy_static! {
+ static ref RE: Regex = Regex::new(r"[\\%_]").unwrap();
+ }
+ RE.replace_all(&txt, r"\$0")
+}
+
+/// Compare text with a possible glob, folding case.
+pub(crate) fn matches_glob(text: &str, search: &str) -> bool {
+ if is_glob(search) {
+ let search = format!("^(?i){}$", to_re(search));
+ Regex::new(&search).unwrap().is_match(text)
+ } else {
+ uni_eq(text, &to_text(search))
+ }
+}
+
#[cfg(test)]
mod test {
- use super::matches_wildcard;
use crate::text::without_combining;
use crate::text::{
extract_av_tags, strip_av_tags, strip_html, strip_html_preserving_media_filenames, AVTag,
@@ -351,15 +422,6 @@ mod test {
);
}
- #[test]
- fn wildcard() {
- assert_eq!(matches_wildcard("foo", "bar"), false);
- assert_eq!(matches_wildcard("foo", "Foo"), true);
- assert_eq!(matches_wildcard("foo", "F*"), true);
- assert_eq!(matches_wildcard("foo", "F*oo"), true);
- assert_eq!(matches_wildcard("foo", "b*"), false);
- }
-
#[test]
fn combining() {
assert!(matches!(without_combining("test"), Cow::Borrowed(_)));