diff --git a/CONTRIBUTORS b/CONTRIBUTORS index a581234e5..a1ebd645f 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -65,7 +65,7 @@ lukkea David Allison Tsung-Han Yu Piotr Kubowicz -RumovZ +RumovZ Cecini Krish Shah ianki diff --git a/pylib/tests/test_find.py b/pylib/tests/test_find.py index d9b7f76f2..e7adfafec 100644 --- a/pylib/tests/test_find.py +++ b/pylib/tests/test_find.py @@ -48,8 +48,10 @@ def test_findCards(): # tag searches assert len(col.findCards("tag:*")) == 5 assert len(col.findCards("tag:\\*")) == 1 - assert len(col.findCards("tag:%")) == 5 - assert len(col.findCards("tag:\\%")) == 1 + assert len(col.findCards("tag:%")) == 1 + assert len(col.findCards("tag:sheep_goat")) == 0 + assert len(col.findCards('"tag:sheep goat"')) == 0 + assert len(col.findCards('"tag:* *"')) == 0 assert len(col.findCards("tag:animal_1")) == 2 assert len(col.findCards("tag:animal\\_1")) == 1 assert not col.findCards("tag:donkey") diff --git a/qt/aqt/browser.py b/qt/aqt/browser.py index a8ba379c2..967af003c 100644 --- a/qt/aqt/browser.py +++ b/qt/aqt/browser.py @@ -1207,7 +1207,7 @@ QTableView {{ gridline-color: {grid} }} if i % 2 == 0: txt += a + ":" else: - txt += re.sub("[*%_]", r"\\\g<0>", a) + txt += re.sub(r"[*_\\]", r"\\\g<0>", a) for c in '  ()"': if c in txt: txt = '"{}"'.format(txt.replace('"', '\\"')) diff --git a/rslib/src/search/parser.rs b/rslib/src/search/parser.rs index 6d3cd038c..3fb20c3e7 100644 --- a/rslib/src/search/parser.rs +++ b/rslib/src/search/parser.rs @@ -9,17 +9,15 @@ use crate::{ use lazy_static::lazy_static; use nom::{ branch::alt, - bytes::complete::{escaped, is_not, tag, take_while1}, - character::complete::{anychar, char, one_of}, - combinator::{all_consuming, map, map_res}, - sequence::{delimited, preceded, tuple}, + bytes::complete::{escaped, is_not, tag}, + character::complete::{anychar, char, none_of, one_of}, + combinator::{all_consuming, map, map_res, verify}, + sequence::{delimited, preceded, separated_pair}, {multi::many0, IResult}, }; -use regex::Regex; +use regex::{Captures, Regex}; use std::{borrow::Cow, num}; -// fixme: need to preserve \ when used twice in string - struct ParseError {} impl From for ParseError { @@ -63,7 +61,7 @@ pub(super) enum SearchNode<'a> { }, AddedInDays(u32), EditedInDays(u32), - CardTemplate(TemplateKind), + CardTemplate(TemplateKind<'a>), Deck(Cow<'a, str>), DeckID(DeckID), NoteTypeID(NoteTypeID), @@ -75,12 +73,12 @@ pub(super) enum SearchNode<'a> { Tag(Cow<'a, str>), Duplicates { note_type_id: NoteTypeID, - text: String, + text: Cow<'a, str>, }, State(StateKind), Flag(u8), - NoteIDs(Cow<'a, str>), - CardIDs(Cow<'a, str>), + NoteIDs(&'a str), + CardIDs(&'a str), Property { operator: String, kind: PropertyKind, @@ -113,9 +111,9 @@ pub(super) enum StateKind { } #[derive(Debug, PartialEq)] -pub(super) enum TemplateKind { +pub(super) enum TemplateKind<'a> { Ordinal(u16), - Name(String), + Name(Cow<'a, str>), } /// Parse the input string into a list of nodes. @@ -127,7 +125,6 @@ pub(super) fn parse(input: &str) -> Result> { let (_, nodes) = all_consuming(group_inner)(input).map_err(|_e| AnkiError::SearchError(None))?; - Ok(nodes) } @@ -205,32 +202,21 @@ fn text(s: &str) -> IResult<&str, Node> { /// Determine if text is a qualified search, and handle escaped chars. fn search_node_for_text(s: &str) -> ParseResult { - let mut it = s.splitn(2, ':'); - let (head, tail) = ( - unescape_quotes(it.next().unwrap()), - it.next().map(unescape_quotes), - ); - - if let Some(tail) = tail { - search_node_for_text_with_argument(head, tail) + let (tail, head) = escaped(is_not(r":\"), '\\', anychar)(s)?; + if tail.is_empty() { + Ok(SearchNode::UnqualifiedText(unescape(head)?)) } else { - Ok(SearchNode::UnqualifiedText(head)) + search_node_for_text_with_argument(head, &tail[1..]) } } -/// \" -> " -fn unescape_quotes(s: &str) -> Cow { - if s.find(r#"\""#).is_some() { - s.replace(r#"\""#, "\"").into() - } else { - s.into() - } -} - -/// Unquoted text, terminated by a space or ) +/// Unquoted text, terminated by whitespace or unescaped ", ( or ) fn unquoted_term(s: &str) -> IResult<&str, Node> { map_res( - take_while1(|c| c != ' ' && c != ')' && c != '"'), + verify( + escaped(is_not("\"() \u{3000}\\"), '\\', none_of(" \u{3000}")), + |s: &str| !s.is_empty(), + ), |text: &str| -> ParseResult { Ok(if text.eq_ignore_ascii_case("or") { Node::Or @@ -256,57 +242,66 @@ fn quoted_term_str(s: &str) -> IResult<&str, &str> { /// Quoted text, terminated by a non-escaped double quote fn quoted_term_inner(s: &str) -> IResult<&str, &str> { - escaped(is_not(r#""\"#), '\\', anychar)(s) + verify(escaped(is_not(r#""\"#), '\\', anychar), |s: &str| { + !s.is_empty() + })(s) } /// eg deck:"foo bar" - quotes must come after the : fn partially_quoted_term(s: &str) -> IResult<&str, Node> { - let term = take_while1(|c| c != ' ' && c != ')' && c != ':'); - let (s, (term, _, quoted_val)) = tuple((term, char(':'), quoted_term_str))(s)?; - let quoted_val = unescape_quotes(quoted_val); - - match search_node_for_text_with_argument(term.into(), quoted_val) { - Ok(search) => Ok((s, Node::Search(search))), - Err(_) => Err(nom::Err::Failure((s, nom::error::ErrorKind::NoneOf))), - } + map_res( + separated_pair( + verify( + escaped(is_not("\"(): \u{3000}\\"), '\\', none_of(": \u{3000}")), + |s: &str| !s.is_empty(), + ), + char(':'), + quoted_term_str, + ), + |p| match search_node_for_text_with_argument(p.0, p.1) { + Ok(search) => Ok(Node::Search(search)), + Err(e) => Err(e), + }, + )(s) } /// Convert a colon-separated key/val pair into the relevant search type. fn search_node_for_text_with_argument<'a>( - key: Cow<'a, str>, - val: Cow<'a, str>, + key: &'a str, + val: &'a str, ) -> ParseResult> { Ok(match key.to_ascii_lowercase().as_str() { "added" => SearchNode::AddedInDays(val.parse()?), "edited" => SearchNode::EditedInDays(val.parse()?), - "deck" => SearchNode::Deck(val), - "note" => SearchNode::NoteType(val), - "tag" => SearchNode::Tag(val), + "deck" => SearchNode::Deck(unescape(val)?), + "note" => SearchNode::NoteType(unescape(val)?), + "tag" => SearchNode::Tag(unescape(val)?), "mid" => SearchNode::NoteTypeID(val.parse()?), "nid" => SearchNode::NoteIDs(check_id_list(val)?), "cid" => SearchNode::CardIDs(check_id_list(val)?), "did" => SearchNode::DeckID(val.parse()?), - "card" => parse_template(val.as_ref()), - "is" => parse_state(val.as_ref())?, - "flag" => parse_flag(val.as_ref())?, - "rated" => parse_rated(val.as_ref())?, - "dupe" => parse_dupes(val.as_ref())?, - "prop" => parse_prop(val.as_ref())?, - "re" => SearchNode::Regex(val), - "nc" => SearchNode::NoCombining(val), - "w" => SearchNode::WordBoundary(val), + "card" => parse_template(val)?, + "is" => parse_state(val)?, + "flag" => parse_flag(val)?, + "rated" => parse_rated(val)?, + "dupe" => parse_dupes(val)?, + "prop" => parse_prop(val)?, + "re" => SearchNode::Regex(unescape_quotes(val)), + "r" => SearchNode::UnqualifiedText(unescape_raw(val)), + "nc" => SearchNode::NoCombining(unescape(val)?), + "w" => SearchNode::WordBoundary(unescape(val)?), // anything else is a field search - _ => parse_single_field(key.as_ref(), val.as_ref()), + _ => parse_single_field(key, val)?, }) } /// ensure a list of ids contains only numbers and commas, returning unchanged if true /// used by nid: and cid: -fn check_id_list(s: Cow) -> ParseResult> { +fn check_id_list(s: &str) -> ParseResult<&str> { lazy_static! { static ref RE: Regex = Regex::new(r"^(\d+,)*\d+$").unwrap(); } - if RE.is_match(s.as_ref()) { + if RE.is_match(s) { Ok(s) } else { Err(ParseError {}) @@ -360,13 +355,13 @@ fn parse_rated(val: &str) -> ParseResult> { } /// eg dupes:1231,hello -fn parse_dupes(val: &str) -> ParseResult> { +fn parse_dupes(val: &str) -> ParseResult { let mut it = val.splitn(2, ','); let mid: NoteTypeID = it.next().unwrap().parse()?; let text = it.next().ok_or(ParseError {})?; Ok(SearchNode::Duplicates { note_type_id: mid, - text: text.into(), + text: unescape_quotes(text), }) } @@ -411,27 +406,116 @@ fn parse_prop(val: &str) -> ParseResult> { }) } -fn parse_template(val: &str) -> SearchNode<'static> { - SearchNode::CardTemplate(match val.parse::() { +fn parse_template(val: &str) -> ParseResult { + Ok(SearchNode::CardTemplate(match val.parse::() { Ok(n) => TemplateKind::Ordinal(n.max(1) - 1), - Err(_) => TemplateKind::Name(val.into()), + Err(_) => TemplateKind::Name(unescape(val)?), + })) +} + +fn parse_single_field<'a>(key: &'a str, val: &'a str) -> ParseResult> { + Ok(if val.starts_with("re:") { + SearchNode::SingleField { + field: unescape(key)?, + text: unescape_quotes(&val[3..]), + is_re: true, + } + } else if val.starts_with("r:") { + SearchNode::SingleField { + field: unescape(key)?, + text: unescape_raw(&val[2..]), + is_re: false, + } + } else { + SearchNode::SingleField { + field: unescape(key)?, + text: unescape(val)?, + is_re: false, + } }) } -fn parse_single_field(key: &str, mut val: &str) -> SearchNode<'static> { - let is_re = if val.starts_with("re:") { - val = val.trim_start_matches("re:"); - true +/// For strings without unescaped ", convert \" to " +fn unescape_quotes(s: &str) -> Cow { + if s.contains('"') { + s.replace(r#"\""#, "\"").into() } else { - false - }; - SearchNode::SingleField { - field: key.to_string().into(), - text: val.to_string().into(), - is_re, + s.into() } } +/// Unescape quotes but escape wildcards and \s. +fn unescape_raw(s: &str) -> Cow { + lazy_static! { + static ref RE: Regex = Regex::new(r#"\\"?|\*|_"#).unwrap(); + } + RE.replace_all(&s, |caps: &Captures| match &caps[0] { + r"\" => r"\\", + "\\\"" => "\"", + r"*" => r"\*", + r"_" => r"\_", + _ => unreachable!(), + }) +} + +/// Unescape chars with special meaning to the parser. +fn unescape(txt: &str) -> ParseResult> { + if is_invalid_escape(txt) { + Err(ParseError {}) + } else if is_parser_escape(txt) { + lazy_static! { + static ref RE: Regex = Regex::new(r#"\\[\\":()-]"#).unwrap(); + } + Ok(RE.replace_all(&txt, |caps: &Captures| match &caps[0] { + r"\\" => r"\\", + "\\\"" => "\"", + r"\:" => ":", + r"\(" => "(", + r"\)" => ")", + r"\-" => "-", + _ => unreachable!(), + })) + } else { + Ok(txt.into()) + } +} + +/// Check string for invalid escape sequences. +fn is_invalid_escape(txt: &str) -> bool { + // odd number of \s not followed by an escapable character + lazy_static! { + static ref RE: Regex = Regex::new( + r#"(?x) + (?:^|[^\\]) # not a backslash + (?:\\\\)* # even number of backslashes + \\ # single backslash + (?:[^\\":*_()-]|$) # anything but an escapable char + "# + ) + .unwrap(); + } + + RE.is_match(txt) +} + +/// Check string for escape sequences handled by the parser: ":() +fn is_parser_escape(txt: &str) -> bool { + // odd number of \s followed by a char with special meaning to the parser + lazy_static! { + static ref RE: Regex = Regex::new( + r#"(?x) + (?:^|[^\\]) # not a backslash + (?:\\\\)* # even number of backslashes + \\ # single backslash + [":()-] # parser escape + "# + ) + .unwrap(); + } + + RE.is_match(txt) +} + #[cfg(test)] mod test { use super::*; @@ -497,7 +581,7 @@ mod test { })] ); - // partially quoted text should handle escaping the same way + // escaping is independent of quotation assert_eq!( parse(r#""field:va\"lue""#)?, vec![Search(SingleField { @@ -507,13 +591,67 @@ mod test { })] ); assert_eq!(parse(r#""field:va\"lue""#)?, parse(r#"field:"va\"lue""#)?,); + assert_eq!(parse(r#""field:va\"lue""#)?, parse(r#"field:va\"lue"#)?,); - // any character should be escapable in quotes + // only \":()-*_ are escapable + assert!(parse(r"\").is_err()); + assert!(parse(r"\a").is_err()); + assert!(parse(r"\%").is_err()); + + // parser unescapes ":()- assert_eq!( - parse(r#""re:\btest""#)?, - vec![Search(Regex(r"\btest".into()))] + parse(r#"\"\:\(\)\-"#)?, + vec![Search(UnqualifiedText(r#"":()-"#.into())),] ); + // parser doesn't unescape unescape \*_ + assert_eq!( + parse(r#"\\\*\_"#)?, + vec![Search(UnqualifiedText(r#"\\\*\_"#.into())),] + ); + + // escaping parentheses is optional (only) inside quotes + assert_eq!(parse(r#""\)\(""#), parse(r#"")(""#)); + assert!(parse(")(").is_err()); + + // escaping : is optional if it is preceded by another : + assert!(parse(":test").is_err()); + assert!(parse(":").is_err()); + assert_eq!(parse("field:val:ue"), parse(r"field:val\:ue")); + assert_eq!(parse(r#""field:val:ue""#), parse(r"field:val\:ue")); + assert_eq!(parse(r#"field:"val:ue""#), parse(r"field:val\:ue")); + + // escaping - is optional if it cannot be mistaken for a negator + assert_eq!(parse("-"), parse(r"\-")); + assert_eq!(parse("A-"), parse(r"A\-")); + assert_eq!(parse(r#""-A""#), parse(r"\-A")); + assert_ne!(parse("-A"), parse(r"\-A")); + + // any character should be escapable on the right side of re: + assert_eq!( + parse(r#""re:\btest\%""#)?, + vec![Search(Regex(r"\btest\%".into()))] + ); + + // treat all chars as literals in raw searches + assert_eq!(parse(r"r:\*_"), parse(r"\\\*\_")); + assert_eq!(parse(r"field:r:\*_"), parse(r"field:\\\*\_")); + + // no exceptions for escaping " + assert_eq!( + parse(r#"re:te\"st"#)?, + vec![Search(Regex(r#"te"st"#.into()))] + ); + assert!(parse(r#"re:te"st"#).is_err()); + assert_eq!( + parse(r#"r:te\"st"#)?, + vec![Search(UnqualifiedText(r#"te"st"#.into()))] + ); + assert!(parse(r#"r:te"st"#).is_err()); + + // spaces are optional if node separation is clear + assert_eq!(parse(r#"a"b"(c)"#)?, parse("a b (c)")?); + assert_eq!(parse("added:3")?, vec![Search(AddedInDays(3))]); assert_eq!( parse("card:front")?, diff --git a/rslib/src/search/sqlwriter.rs b/rslib/src/search/sqlwriter.rs index b678bbd33..5137255a2 100644 --- a/rslib/src/search/sqlwriter.rs +++ b/rslib/src/search/sqlwriter.rs @@ -9,12 +9,12 @@ use crate::{ err::Result, notes::field_checksum, notetype::NoteTypeID, - text::{matches_wildcard, text_to_re}, - text::{normalize_to_nfc, strip_html_preserving_media_filenames, without_combining}, + text::{ + escape_sql, is_glob, matches_glob, normalize_to_nfc, strip_html_preserving_media_filenames, + to_custom_re, to_re, to_sql, to_text, without_combining, + }, timestamp::TimestampSecs, }; -use lazy_static::lazy_static; -use regex::Regex; use std::{borrow::Cow, fmt::Write}; pub(crate) struct SqlWriter<'a> { @@ -119,7 +119,7 @@ impl SqlWriter<'_> { // note fields related SearchNode::UnqualifiedText(text) => self.write_unqualified(&self.norm_note(text)), SearchNode::SingleField { field, text, is_re } => { - self.write_single_field(field.as_ref(), &self.norm_note(text), *is_re)? + self.write_single_field(&norm(field), &self.norm_note(text), *is_re)? } SearchNode::Duplicates { note_type_id, text } => { self.write_dupes(*note_type_id, &self.norm_note(text)) @@ -132,11 +132,9 @@ impl SqlWriter<'_> { SearchNode::AddedInDays(days) => self.write_added(*days)?, SearchNode::EditedInDays(days) => self.write_edited(*days)?, SearchNode::CardTemplate(template) => match template { - TemplateKind::Ordinal(_) => { - self.write_template(template)?; - } + TemplateKind::Ordinal(_) => self.write_template(template)?, TemplateKind::Name(name) => { - self.write_template(&TemplateKind::Name(norm(name).into()))?; + self.write_template(&TemplateKind::Name(norm(name).into()))? } }, SearchNode::Deck(deck) => self.write_deck(&norm(deck))?, @@ -148,6 +146,7 @@ impl SqlWriter<'_> { } SearchNode::NoteType(notetype) => self.write_note_type(&norm(notetype))?, SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?, + SearchNode::Tag(tag) => self.write_tag(&norm(tag))?, SearchNode::State(state) => self.write_state(state)?, SearchNode::Flag(flag) => { @@ -167,7 +166,7 @@ impl SqlWriter<'_> { fn write_unqualified(&mut self, text: &str) { // implicitly wrap in % - let text = format!("%{}%", convert_glob_char(text)); + let text = format!("%{}%", &to_sql(text)); self.args.push(text); write!( self.sql, @@ -178,7 +177,7 @@ impl SqlWriter<'_> { } fn write_no_combining(&mut self, text: &str) { - let text = format!("%{}%", without_combining(text)); + let text = format!("%{}%", without_combining(&to_sql(text))); self.args.push(text); write!( self.sql, @@ -192,27 +191,27 @@ impl SqlWriter<'_> { } fn write_tag(&mut self, text: &str) -> Result<()> { - match text { - "none" => { - write!(self.sql, "n.tags = ''").unwrap(); - } - "*" | "%" => { - write!(self.sql, "true").unwrap(); - } - text => { - if let Some(re_glob) = glob_to_re(text) { - // text contains a wildcard - let re_glob = format!("(?i).* {} .*", re_glob); - write!(self.sql, "n.tags regexp ?").unwrap(); - self.args.push(re_glob); - } else if let Some(tag) = self.col.storage.preferred_tag_case(&text)? { - write!(self.sql, "n.tags like ?").unwrap(); - self.args.push(format!("% {} %", tag)); - } else { - write!(self.sql, "false").unwrap(); + if text.contains(" ") { + write!(self.sql, "false").unwrap(); + } else { + match text { + "none" => write!(self.sql, "n.tags = ''").unwrap(), + "*" => write!(self.sql, "true").unwrap(), + s => { + if is_glob(s) { + write!(self.sql, "n.tags regexp ?").unwrap(); + let re = &to_custom_re(s, r"\S"); + self.args.push(format!("(?i).* {} .*", re)); + } else if let Some(tag) = self.col.storage.preferred_tag_case(&to_text(s))? { + write!(self.sql, "n.tags like ? escape '\\'").unwrap(); + self.args.push(format!("% {} %", escape_sql(&tag))); + } else { + write!(self.sql, "false").unwrap(); + } } } } + Ok(()) } @@ -312,18 +311,20 @@ impl SqlWriter<'_> { // rewrite "current" to the current deck name let native_deck = if deck == "current" { let current_did = self.col.get_current_deck_id(); - self.col - .storage - .get_deck(current_did)? - .map(|d| d.name) - .unwrap_or_else(|| "Default".into()) + regex::escape( + self.col + .storage + .get_deck(current_did)? + .map(|d| d.name) + .unwrap_or_else(|| "Default".into()) + .as_str(), + ) } else { - human_deck_name_to_native(deck) + human_deck_name_to_native(&to_re(deck)) }; // convert to a regex that includes child decks - let re = text_to_re(&native_deck); - self.args.push(format!("(?i)^{}($|\x1f)", re)); + self.args.push(format!("(?i)^{}($|\x1f)", native_deck)); let arg_idx = self.args.len(); self.sql.push_str(&format!(concat!( "(c.did in (select id from decks where name regexp ?{n})", @@ -341,8 +342,8 @@ impl SqlWriter<'_> { write!(self.sql, "c.ord = {}", n).unwrap(); } TemplateKind::Name(name) => { - if let Some(re) = glob_to_re(name) { - let re = format!("(?i){}", re); + if is_glob(name) { + let re = format!("(?i){}", to_re(name)); self.sql.push_str( "(n.mid,c.ord) in (select ntid,ord from templates where name regexp ?)", ); @@ -351,7 +352,7 @@ impl SqlWriter<'_> { self.sql.push_str( "(n.mid,c.ord) in (select ntid,ord from templates where name = ?)", ); - self.args.push(name.to_string()); + self.args.push(to_text(name).into()); } } }; @@ -359,15 +360,15 @@ impl SqlWriter<'_> { } fn write_note_type(&mut self, nt_name: &str) -> Result<()> { - if let Some(re) = glob_to_re(nt_name) { - let re = format!("(?i){}", re); + if is_glob(nt_name) { + let re = format!("(?i){}", to_re(nt_name)); self.sql .push_str("n.mid in (select id from notetypes where name regexp ?)"); self.args.push(re); } else { self.sql .push_str("n.mid in (select id from notetypes where name = ?)"); - self.args.push(nt_name.to_string()); + self.args.push(to_text(nt_name).into()); } Ok(()) } @@ -378,7 +379,7 @@ impl SqlWriter<'_> { let mut field_map = vec![]; for nt in note_types.values() { for field in &nt.fields { - if matches_wildcard(&field.name, field_name) { + if matches_glob(&field.name, field_name) { field_map.push((nt.id, field.ord)); } } @@ -401,7 +402,7 @@ impl SqlWriter<'_> { } else { cmp = "like"; cmp_trailer = "escape '\\'"; - self.args.push(convert_glob_char(val).into()) + self.args.push(to_sql(val).into()) } let arg_idx = self.args.len(); @@ -455,29 +456,10 @@ impl SqlWriter<'_> { } fn write_word_boundary(&mut self, word: &str) { - // fixme: need to escape in the no-glob case as well - let re = text_to_re(word); - self.write_regex(&format!(r"\b{}\b", re)) + self.write_regex(&format!(r"\b{}\b", to_re(word))); } } -/// Replace * with %, leaving \* alone. -fn convert_glob_char(val: &str) -> Cow { - lazy_static! { - static ref RE: Regex = Regex::new(r"(^|[^\\])\*").unwrap(); - } - RE.replace_all(val, "${1}%") -} - -/// Convert a string with _, % or * characters into a regex. -/// If string contains no globbing characters, return None. -fn glob_to_re(glob: &str) -> Option { - if !glob.contains(|c| c == '_' || c == '*' || c == '%') { - return None; - } - Some(text_to_re(glob)) -} - #[derive(Debug, PartialEq, Clone, Copy)] pub enum RequiredTable { Notes, @@ -601,10 +583,9 @@ mod test { vec!["%te%st%".into()] ) ); - assert_eq!(s(ctx, "te%st").1, vec!["%te%st%".to_string()]); - // user should be able to escape sql wildcards - assert_eq!(s(ctx, r#"te\%s\_t"#).1, vec!["%te\\%s\\_t%".to_string()]); - assert_eq!(s(ctx, r#"te\*s\_t"#).1, vec!["%te\\*s\\_t%".to_string()]); + assert_eq!(s(ctx, "te%st").1, vec![r"%te\%st%".to_string()]); + // user should be able to escape wildcards + assert_eq!(s(ctx, r#"te\*s\_t"#).1, vec!["%te*s\\_t%".to_string()]); // qualified search assert_eq!( @@ -673,23 +654,26 @@ mod test { ) ); - // unregistered tag short circuits - assert_eq!(s(ctx, r"tag:one"), ("(false)".into(), vec![])); - // if registered, searches with canonical ctx.transact(None, |col| col.register_tag("One", Usn(-1))) .unwrap(); assert_eq!( s(ctx, r"tag:one"), - ("(n.tags like ?)".into(), vec![r"% One %".into()]) + ( + "(n.tags like ? escape '\\')".into(), + vec![r"% One %".into()] + ) ); + // unregistered tags without wildcards won't match + assert_eq!(s(ctx, "tag:unknown"), ("(false)".into(), vec![])); + // wildcards force a regexp search assert_eq!( - s(ctx, r"tag:o*n\*et%w\%oth_re\_e"), + s(ctx, r"tag:o*n\*et%w%oth_re\_e"), ( "(n.tags regexp ?)".into(), - vec![r"(?i).* o.*n\*et.*w%oth.re_e .*".into()] + vec![r"(?i).* o\S*n\*et%w%oth\Sre_e .*".into()] ) ); assert_eq!(s(ctx, "tag:none"), ("(n.tags = '')".into(), vec![])); @@ -803,12 +787,4 @@ mod test { RequiredTable::Notes ); } - - #[test] - fn convert_glob() { - assert_eq!(&convert_glob_char("foo*bar"), "foo%bar"); - assert_eq!(&convert_glob_char("*bar"), "%bar"); - assert_eq!(&convert_glob_char("\n*bar"), "\n%bar"); - assert_eq!(&convert_glob_char(r"\*bar"), r"\*bar"); - } } diff --git a/rslib/src/text.rs b/rslib/src/text.rs index f29d251dd..934fa64c0 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -240,17 +240,6 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) { } } -/// True if search is equal to text, folding case. -/// Supports '*' to match 0 or more characters. -pub(crate) fn matches_wildcard(text: &str, search: &str) -> bool { - if search.contains('*') { - let search = format!("^(?i){}$", regex::escape(search).replace(r"\*", ".*")); - Regex::new(&search).unwrap().is_match(text) - } else { - uni_eq(text, search) - } -} - /// Convert provided string to NFKD form and strip combining characters. pub(crate) fn without_combining(s: &str) -> Cow { // if the string is already normalized @@ -301,9 +290,91 @@ pub(crate) fn text_to_re(glob: &str) -> String { text2.into() } +/// Check if string contains an unescaped wildcard. +pub(crate) fn is_glob(txt: &str) -> bool { + // even number of \s followed by a wildcard + lazy_static! { + static ref RE: Regex = Regex::new( + r#"(?x) + (?:^|[^\\]) # not a backslash + (?:\\\\)* # even number of backslashes + [*_] # wildcard + "# + ) + .unwrap(); + } + + RE.is_match(txt) +} + +/// Convert to a RegEx respecting Anki wildcards. +pub(crate) fn to_re(txt: &str) -> Cow { + to_custom_re(txt, ".") +} + +/// Convert Anki style to RegEx using the provided wildcard. +pub(crate) fn to_custom_re<'a>(txt: &'a str, wildcard: &str) -> Cow<'a, str> { + lazy_static! { + static ref RE: Regex = Regex::new(r"\\?.").unwrap(); + } + RE.replace_all(&txt, |caps: &Captures| { + let s = &caps[0]; + match s { + r"\\" | r"\*" => s.to_string(), + r"\_" => "_".to_string(), + "*" => format!("{}*", wildcard), + "_" => wildcard.to_string(), + s => regex::escape(s), + } + }) +} + +/// Convert to SQL respecting Anki wildcards. +pub(crate) fn to_sql<'a>(txt: &'a str) -> Cow<'a, str> { + // escape sequences and unescaped special characters which need conversion + lazy_static! { + static ref RE: Regex = Regex::new(r"\\[\\*]|[*%]").unwrap(); + } + RE.replace_all(&txt, |caps: &Captures| { + let s = &caps[0]; + match s { + r"\\" => r"\\", + r"\*" => "*", + "*" => "%", + "%" => r"\%", + _ => unreachable!(), + } + }) +} + +/// Unescape everything. +pub(crate) fn to_text(txt: &str) -> Cow { + lazy_static! { + static ref RE: Regex = Regex::new(r"\\(.)").unwrap(); + } + RE.replace_all(&txt, "$1") +} + +/// Escape characters special to SQL: \%_ +pub(crate) fn escape_sql(txt: &str) -> Cow { + lazy_static! { + static ref RE: Regex = Regex::new(r"[\\%_]").unwrap(); + } + RE.replace_all(&txt, r"\$0") +} + +/// Compare text with a possible glob, folding case. +pub(crate) fn matches_glob(text: &str, search: &str) -> bool { + if is_glob(search) { + let search = format!("^(?i){}$", to_re(search)); + Regex::new(&search).unwrap().is_match(text) + } else { + uni_eq(text, &to_text(search)) + } +} + #[cfg(test)] mod test { - use super::matches_wildcard; use crate::text::without_combining; use crate::text::{ extract_av_tags, strip_av_tags, strip_html, strip_html_preserving_media_filenames, AVTag, @@ -351,15 +422,6 @@ mod test { ); } - #[test] - fn wildcard() { - assert_eq!(matches_wildcard("foo", "bar"), false); - assert_eq!(matches_wildcard("foo", "Foo"), true); - assert_eq!(matches_wildcard("foo", "F*"), true); - assert_eq!(matches_wildcard("foo", "F*oo"), true); - assert_eq!(matches_wildcard("foo", "b*"), false); - } - #[test] fn combining() { assert!(matches!(without_combining("test"), Cow::Borrowed(_)));