Split unescaping between parser and writer

* Unescape wildcards in writer instead of parser. * Move text conversion functions to text.rs. * Implicitly norm when converting text. * Revert to using collection when comparing tags but add escape support.
2020-11-17 12:49:37 +01:00 · 2020-11-17 12:49:37 +01:00 · 8c02c6e205
commit 8c02c6e205
parent 7c5cf6d18b
3 changed files with 241 additions and 171 deletions
--- a/rslib/src/search/parser.rs
+++ b/rslib/src/search/parser.rs
@ -40,12 +40,6 @@ impl<I> From<nom::Err<(I, nom::error::ErrorKind)>> for ParseError {

 type ParseResult<T> = std::result::Result<T, ParseError>;

-#[derive(Debug, PartialEq)]
-pub(super) enum OptionalRe<'a> {
-    Text(Cow<'a, str>),
-    Re(Cow<'a, str>),
-}
-
 #[derive(Debug, PartialEq)]
 pub(super) enum Node<'a> {
    And,
@ -61,22 +55,22 @@ pub(super) enum SearchNode<'a> {
    UnqualifiedText(Cow<'a, str>),
    // foo:bar, where foo doesn't match a term below
    SingleField {
-        field: OptionalRe<'a>,
+        field: Cow<'a, str>,
        text: Cow<'a, str>,
        is_re: bool,
    },
    AddedInDays(u32),
    EditedInDays(u32),
    CardTemplate(TemplateKind<'a>),
-    Deck(String),
+    Deck(Cow<'a, str>),
    DeckID(DeckID),
    NoteTypeID(NoteTypeID),
-    NoteType(OptionalRe<'a>),
+    NoteType(Cow<'a, str>),
    Rated {
        days: u32,
        ease: Option<u8>,
    },
-    Tag(String),
+    Tag(Cow<'a, str>),
    Duplicates {
        note_type_id: NoteTypeID,
        text: Cow<'a, str>,
@ -92,7 +86,7 @@ pub(super) enum SearchNode<'a> {
    WholeCollection,
    Regex(Cow<'a, str>),
    NoCombining(Cow<'a, str>),
-    WordBoundary(String),
+    WordBoundary(Cow<'a, str>),
 }

 #[derive(Debug, PartialEq)]
@ -119,7 +113,7 @@ pub(super) enum StateKind {
 #[derive(Debug, PartialEq)]
 pub(super) enum TemplateKind<'a> {
    Ordinal(u16),
-    Name(OptionalRe<'a>),
+    Name(Cow<'a, str>),
 }

 /// Parse the input string into a list of nodes.
@ -210,7 +204,7 @@ fn text(s: &str) -> IResult<&str, Node> {
 fn search_node_for_text(s: &str) -> ParseResult<SearchNode> {
    let (tail, head) = escaped(is_not(r":\"), '\\', anychar)(s)?;
    if tail.is_empty() {
-        Ok(SearchNode::UnqualifiedText(unescape_to_glob(head)?))
+        Ok(SearchNode::UnqualifiedText(unescape(head)?))
    } else {
        search_node_for_text_with_argument(head, &tail[1..])
    }
@ -279,9 +273,9 @@ fn search_node_for_text_with_argument<'a>(
    Ok(match key.to_ascii_lowercase().as_str() {
        "added" => SearchNode::AddedInDays(val.parse()?),
        "edited" => SearchNode::EditedInDays(val.parse()?),
-        "deck" => SearchNode::Deck(unescape_to_enforced_re(val, ".")?),
-        "note" => SearchNode::NoteType(unescape_to_re(val)?),
-        "tag" => SearchNode::Tag(unescape_to_enforced_re(val, r"\S")?),
+        "deck" => SearchNode::Deck(unescape(val)?),
+        "note" => SearchNode::NoteType(unescape(val)?),
+        "tag" => SearchNode::Tag(unescape(val)?),
        "mid" => SearchNode::NoteTypeID(val.parse()?),
        "nid" => SearchNode::NoteIDs(check_id_list(val)?),
        "cid" => SearchNode::CardIDs(check_id_list(val)?),
@ -293,8 +287,8 @@ fn search_node_for_text_with_argument<'a>(
        "dupe" => parse_dupes(val)?,
        "prop" => parse_prop(val)?,
        "re" => SearchNode::Regex(unescape_quotes(val)),
-        "nc" => SearchNode::NoCombining(unescape_to_glob(val)?),
-        "w" => SearchNode::WordBoundary(unescape_to_enforced_re(val, ".")?),
+        "nc" => SearchNode::NoCombining(unescape(val)?),
+        "w" => SearchNode::WordBoundary(unescape(val)?),
        // anything else is a field search
        _ => parse_single_field(key, val)?,
    })
@ -414,21 +408,21 @@ fn parse_prop(val: &str) -> ParseResult<SearchNode<'static>> {
 fn parse_template(val: &str) -> ParseResult<SearchNode> {
    Ok(SearchNode::CardTemplate(match val.parse::<u16>() {
        Ok(n) => TemplateKind::Ordinal(n.max(1) - 1),
-        Err(_) => TemplateKind::Name(unescape_to_re(val)?),
+        Err(_) => TemplateKind::Name(unescape(val)?),
    }))
 }

 fn parse_single_field<'a>(key: &'a str, val: &'a str) -> ParseResult<SearchNode<'a>> {
    Ok(if val.starts_with("re:") {
        SearchNode::SingleField {
-            field: unescape_to_re(key)?,
+            field: unescape(key)?,
            text: unescape_quotes(&val[3..]),
            is_re: true,
        }
    } else {
        SearchNode::SingleField {
-            field: unescape_to_re(key)?,
-            text: unescape_to_glob(val)?,
+            field: unescape(key)?,
+            text: unescape(val)?,
            is_re: false,
        }
    })
@ -443,6 +437,26 @@ fn unescape_quotes(s: &str) -> Cow<str> {
    }
 }

+fn unescape(txt: &str) -> ParseResult<Cow<str>> {
+    if is_invalid_escape(txt) {
+        Err(ParseError {})
+    } else if is_parser_escape(txt) {
+        lazy_static! {
+            static ref RE: Regex = Regex::new(r#"\\[\\":()]"#).unwrap();
+        }
+        Ok(RE.replace_all(&txt, |caps: &Captures| match &caps[0] {
+            r"\\" => r"\\",
+            "\\\"" => "\"",
+            r"\:" => ":",
+            r"\(" => "(",
+            r"\)" => ")",
+            _ => unreachable!(),
+        }))
+    } else {
+        Ok(txt.into())
+    }
+}
+
 /// Check string for invalid escape sequences.
 fn is_invalid_escape(txt: &str) -> bool {
    // odd number of \s not followed by an escapable character
@ -461,77 +475,22 @@ fn is_invalid_escape(txt: &str) -> bool {
    RE.is_match(txt)
 }

-/// Handle escaped characters and convert Anki wildcards to SQL wildcards.
-/// Return error if there is an undefined escape sequence.
-fn unescape_to_glob(txt: &str) -> ParseResult<Cow<str>> {
-    if is_invalid_escape(txt) {
-        Err(ParseError {})
-    } else {
-        // escape sequences and unescaped special characters which need conversion
-        lazy_static! {
-            static ref RE: Regex = Regex::new(r"\\.|[*%]").unwrap();
-        }
-        Ok(RE.replace_all(&txt, |caps: &Captures| match &caps[0] {
-            r"\\" => r"\\",
-            "\\\"" => "\"",
-            r"\:" => ":",
-            r"\*" => "*",
-            r"\_" => r"\_",
-            r"\(" => "(",
-            r"\)" => ")",
-            "*" => "%",
-            "%" => r"\%",
-            _ => unreachable!(),
-        }))
+/// Check string for escape sequences handled by the parser: ":()
+fn is_parser_escape(txt: &str) -> bool {
+    // odd number of \s followed by a char with special meaning to the parser
+    lazy_static! {
+        static ref RE: Regex = Regex::new(
+            r#"(?x)
+            (?:^|[^\\])     # not a backslash
+            (?:\\\\)*       # even number of backslashes
+            \\              # single backslash
+            [":()]          # parser escape
+            "#
+        )
+        .unwrap();
    }
-}

-/// Handle escaped characters and convert to regex if there are wildcards.
-/// Return error if there is an undefined escape sequence.
-fn unescape_to_re(txt: &str) -> ParseResult<OptionalRe> {
-    unescape_to_custom_re(txt, ".")
-}
-
-/// Handle escaped characters and if there are wildcards, convert to a regex using the given wildcard.
-/// Return error if there is an undefined escape sequence.
-fn unescape_to_custom_re<'a>(txt: &'a str, wildcard: &str) -> ParseResult<OptionalRe<'a>> {
-    if is_invalid_escape(txt) {
-        Err(ParseError {})
-    } else {
-        lazy_static! {
-            static ref WILDCARD: Regex = Regex::new(r"(^|[^\\])(\\\\)*[*_]").unwrap();
-            static ref MAYBE_ESCAPED: Regex = Regex::new(r"\\?.").unwrap();
-            static ref ESCAPED: Regex = Regex::new(r"\\(.)").unwrap();
-        }
-        if WILDCARD.is_match(txt) {
-            Ok(OptionalRe::Re(MAYBE_ESCAPED.replace_all(
-                &txt,
-                |caps: &Captures| {
-                    let s = &caps[0];
-                    match s {
-                        "\\" | r"\*" | r"\(" | r"\)" => s.to_string(),
-                        "\\\"" => "\"".to_string(),
-                        r"\:" => ":".to_string(),
-                        "*" => format!("{}*", wildcard),
-                        "_" => wildcard.to_string(),
-                        r"\_" => "_".to_string(),
-                        s => regex::escape(s),
-                    }
-                },
-            )))
-        } else {
-            Ok(OptionalRe::Text(ESCAPED.replace_all(&txt, "$1")))
-        }
-    }
-}
-
-/// Handle escaped characters and convert to regex.
-/// Return error if there is an undefined escape sequence.
-fn unescape_to_enforced_re(txt: &str, wildcard: &str) -> ParseResult<String> {
-    Ok(match unescape_to_custom_re(txt, wildcard)? {
-        OptionalRe::Text(s) => regex::escape(s.as_ref()),
-        OptionalRe::Re(s) => s.to_string(),
-    })
+    RE.is_match(txt)
 }

 #[cfg(test)]
@ -541,7 +500,6 @@ mod test {
    #[test]
    fn parsing() -> Result<()> {
        use Node::*;
-        use OptionalRe::*;
        use SearchNode::*;

        assert_eq!(parse("")?, vec![Search(SearchNode::WholeCollection)]);
@ -581,7 +539,7 @@ mod test {
                    Search(UnqualifiedText("world".into())),
                    And,
                    Search(SingleField {
-                        field: Text("foo".into()),
+                        field: "foo".into(),
                        text: "bar baz".into(),
                        is_re: false,
                    })
@ -594,7 +552,7 @@ mod test {
        assert_eq!(
            parse("foo:re:bar")?,
            vec![Search(SingleField {
-                field: Text("foo".into()),
+                field: "foo".into(),
                text: "bar".into(),
                is_re: true
            })]
@ -604,7 +562,7 @@ mod test {
        assert_eq!(
            parse(r#""field:va\"lue""#)?,
            vec![Search(SingleField {
-                field: Text("field".into()),
+                field: "field".into(),
                text: "va\"lue".into(),
                is_re: false
            })]
@ -616,9 +574,17 @@ mod test {
        assert!(parse(r"\").is_err());
        assert!(parse(r"\a").is_err());
        assert!(parse(r"\%").is_err());
+
+        // parser unescapes ":()
        assert_eq!(
-            parse(r#"\\\"\:\(\)\*\_"#)?,
-            vec![Search(UnqualifiedText(r#"\\":()*\_"#.into())),]
+            parse(r#"\"\:\(\)"#)?,
+            vec![Search(UnqualifiedText(r#"":()"#.into())),]
+        );
+
+        // parser doesn't unescape unescape \*_
+        assert_eq!(
+            parse(r#"\\\*\_"#)?,
+            vec![Search(UnqualifiedText(r#"\\\*\_"#.into())),]
        );

        // escaping parentheses is optional (only) inside quotes
@ -651,9 +617,7 @@ mod test {
        assert_eq!(parse("added:3")?, vec![Search(AddedInDays(3))]);
        assert_eq!(
            parse("card:front")?,
-            vec![Search(CardTemplate(TemplateKind::Name(Text(
-                "front".into()
-            ))))]
+            vec![Search(CardTemplate(TemplateKind::Name("front".into())))]
        );
        assert_eq!(
            parse("card:3")?,
@ -670,15 +634,8 @@ mod test {
            vec![Search(Deck("default one".into()))]
        );

-        assert_eq!(
-            parse("note:basic")?,
-            vec![Search(NoteType(Text("basic".into())))]
-        );
-        assert_eq!(parse("tag:hard")?, vec![Search(Tag("hard".to_string()))]);
-        // wildcards in tags don't match whitespace
-        assert_eq!(parse("tag:ha_d")?, vec![Search(Tag(r"ha\Sd".to_string()))]);
-        assert_eq!(parse("tag:h*d")?, vec![Search(Tag(r"h\S*d".to_string()))]);
-
+        assert_eq!(parse("note:basic")?, vec![Search(NoteType("basic".into()))]);
+        assert_eq!(parse("tag:hard")?, vec![Search(Tag("hard".into()))]);
        assert_eq!(
            parse("nid:1237123712,2,3")?,
            vec![Search(NoteIDs("1237123712,2,3".into()))]
--- a/rslib/src/search/sqlwriter.rs
+++ b/rslib/src/search/sqlwriter.rs
@ -1,7 +1,7 @@
 // Copyright: Ankitects Pty Ltd and contributors
 // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html

-use super::parser::{Node, OptionalRe, PropertyKind, SearchNode, StateKind, TemplateKind};
+use super::parser::{Node, PropertyKind, SearchNode, StateKind, TemplateKind};
 use crate::{
    card::{CardQueue, CardType},
    collection::Collection,
@ -9,12 +9,24 @@ use crate::{
    err::Result,
    notes::field_checksum,
    notetype::NoteTypeID,
-    text::{normalize_to_nfc, strip_html_preserving_image_filenames, without_combining},
+    text::{
+        escape_sql, is_glob, normalize_to_nfc, strip_html_preserving_image_filenames, to_custom_re,
+        to_re, to_sql, to_text, without_combining,
+    },
    timestamp::TimestampSecs,
 };
 use regex::Regex;
 use std::{borrow::Cow, fmt::Write};
 use unicase::eq as uni_eq;
+use ConversionMode as CM;
+
+enum ConversionMode<'a> {
+    OnlyNorm,
+    Regex,
+    CustomRe(&'a str),
+    Sql,
+    Text,
+}

 pub(crate) struct SqlWriter<'a> {
    col: &'a mut Collection,
@ -116,22 +128,20 @@ impl SqlWriter<'_> {
        use normalize_to_nfc as norm;
        match node {
            // note fields related
-            SearchNode::UnqualifiedText(text) => self.write_unqualified(&self.norm_note(text)),
+            SearchNode::UnqualifiedText(text) => self.write_unqualified(text),
            SearchNode::SingleField { field, text, is_re } => {
-                self.write_single_field(field, &self.norm_note(text), *is_re)?
+                self.write_single_field(field, text, *is_re)?
            }
-            SearchNode::Duplicates { note_type_id, text } => {
-                self.write_dupes(*note_type_id, &self.norm_note(text))
-            }
-            SearchNode::Regex(re) => self.write_regex(&self.norm_note(re)),
-            SearchNode::NoCombining(text) => self.write_no_combining(&self.norm_note(text)),
-            SearchNode::WordBoundary(text) => self.write_word_boundary(&self.norm_note(text)),
+            SearchNode::Duplicates { note_type_id, text } => self.write_dupes(*note_type_id, text),
+            SearchNode::Regex(re) => self.write_regex(re),
+            SearchNode::NoCombining(text) => self.write_no_combining(text),
+            SearchNode::WordBoundary(text) => self.write_word_boundary(text),

            // other
            SearchNode::AddedInDays(days) => self.write_added(*days)?,
            SearchNode::EditedInDays(days) => self.write_edited(*days)?,
-            // fixme: normalise in name case?
            SearchNode::CardTemplate(template) => self.write_template(template)?,
+            // fixme: always norm?
            SearchNode::Deck(deck) => self.write_deck(&norm(deck))?,
            SearchNode::NoteTypeID(ntid) => {
                write!(self.sql, "n.mid = {}", ntid).unwrap();
@ -139,11 +149,9 @@ impl SqlWriter<'_> {
            SearchNode::DeckID(did) => {
                write!(self.sql, "c.did = {}", did).unwrap();
            }
-            // fixme: normalise?
            SearchNode::NoteType(notetype) => self.write_note_type(notetype)?,
            SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?,

-            // fixme: normalise?
            SearchNode::Tag(tag) => self.write_tag(tag)?,
            SearchNode::State(state) => self.write_state(state)?,
            SearchNode::Flag(flag) => {
@ -163,7 +171,7 @@ impl SqlWriter<'_> {

    fn write_unqualified(&mut self, text: &str) {
        // implicitly wrap in %
-        let text = format!("%{}%", text);
+        let text = format!("%{}%", &self.convert(CM::Sql, text));
        self.args.push(text);
        write!(
            self.sql,
@ -174,7 +182,7 @@ impl SqlWriter<'_> {
    }

    fn write_no_combining(&mut self, text: &str) {
-        let text = format!("%{}%", without_combining(text));
+        let text = format!("%{}%", without_combining(&self.convert(CM::Sql, text)));
        self.args.push(text);
        write!(
            self.sql,
@ -187,16 +195,28 @@ impl SqlWriter<'_> {
        .unwrap();
    }

-    fn write_tag(&mut self, s: &String) -> Result<()> {
-        if s.contains(" ") {
+    fn write_tag(&mut self, text: &str) -> Result<()> {
+        if text.contains(" ") {
            write!(self.sql, "false").unwrap();
        } else {
-            match s.as_str() {
+            match text {
                "none" => write!(self.sql, "n.tags = ''").unwrap(),
-                r"\S*" => write!(self.sql, "true").unwrap(),
-                _ => {
-                    write!(self.sql, "n.tags regexp ?").unwrap();
-                    self.args.push(format!("(?i).* {} .*", s));
+                "*" => write!(self.sql, "true").unwrap(),
+                s => {
+                    if is_glob(s) {
+                        write!(self.sql, "n.tags regexp ?").unwrap();
+                        let re = &self.convert(CM::CustomRe(r"\S"), s);
+                        self.args.push(format!("(?i).* {} .*", re));
+                    } else if let Some(tag) = self
+                        .col
+                        .storage
+                        .preferred_tag_case(&self.convert(CM::Text, s))?
+                    {
+                        write!(self.sql, "n.tags like ? escape '\\'").unwrap();
+                        self.args.push(format!("% {} %", escape_sql(&tag)));
+                    } else {
+                        write!(self.sql, "false").unwrap();
+                    }
                }
            }
        }
@ -294,7 +314,7 @@ impl SqlWriter<'_> {

    fn write_deck(&mut self, deck: &str) -> Result<()> {
        match deck {
-            ".*" => write!(self.sql, "true").unwrap(),
+            "*" => write!(self.sql, "true").unwrap(),
            "filtered" => write!(self.sql, "c.odid != 0").unwrap(),
            deck => {
                // rewrite "current" to the current deck name
@ -309,7 +329,7 @@ impl SqlWriter<'_> {
                            .as_str(),
                    )
                } else {
-                    human_deck_name_to_native(deck)
+                    human_deck_name_to_native(&self.convert(CM::Regex, deck))
                };

                // convert to a regex that includes child decks
@ -330,54 +350,45 @@ impl SqlWriter<'_> {
            TemplateKind::Ordinal(n) => {
                write!(self.sql, "c.ord = {}", n).unwrap();
            }
-            TemplateKind::Name(name) => match name {
-                OptionalRe::Re(s) => {
-                    let re = format!("(?i){}", s);
+            TemplateKind::Name(name) => {
+                if is_glob(name) {
+                    let re = format!("(?i){}", self.convert(CM::Regex, name));
                    self.sql.push_str(
                        "(n.mid,c.ord) in (select ntid,ord from templates where name regexp ?)",
                    );
                    self.args.push(re);
-                }
-                OptionalRe::Text(s) => {
+                } else {
                    self.sql.push_str(
                        "(n.mid,c.ord) in (select ntid,ord from templates where name = ?)",
                    );
-                    self.args.push(s.to_string());
+                    self.args.push(self.convert(CM::Text, name).into());
                }
-            },
+            }
        };
        Ok(())
    }

-    fn write_note_type(&mut self, nt_name: &OptionalRe) -> Result<()> {
-        match nt_name {
-            OptionalRe::Re(s) => {
-                let re = format!("(?i){}", s);
-                self.sql
-                    .push_str("n.mid in (select id from notetypes where name regexp ?)");
-                self.args.push(re);
-            }
-            OptionalRe::Text(s) => {
-                self.sql
-                    .push_str("n.mid in (select id from notetypes where name = ?)");
-                self.args.push(s.to_string());
-            }
+    fn write_note_type(&mut self, nt_name: &str) -> Result<()> {
+        if is_glob(nt_name) {
+            let re = format!("(?i){}", self.convert(CM::Regex, nt_name));
+            self.sql
+                .push_str("n.mid in (select id from notetypes where name regexp ?)");
+            self.args.push(re);
+        } else {
+            self.sql
+                .push_str("n.mid in (select id from notetypes where name = ?)");
+            self.args.push(self.convert(CM::Text, nt_name).into());
        }
        Ok(())
    }

-    fn write_single_field(
-        &mut self,
-        field_name: &OptionalRe,
-        val: &str,
-        is_re: bool,
-    ) -> Result<()> {
+    fn write_single_field(&mut self, field_name: &str, val: &str, is_re: bool) -> Result<()> {
        let note_types = self.col.get_all_notetypes()?;

        let mut field_map = vec![];
        for nt in note_types.values() {
            for field in &nt.fields {
-                if matches_string_variant(&field.name, field_name) {
+                if self.matches_glob(&field.name, field_name) {
                    field_map.push((nt.id, field.ord));
                }
            }
@ -396,11 +407,12 @@ impl SqlWriter<'_> {
        if is_re {
            cmp = "regexp";
            cmp_trailer = "";
-            self.args.push(format!("(?i){}", val));
+            self.args
+                .push(format!("(?i){}", self.convert(CM::OnlyNorm, val)));
        } else {
            cmp = "like";
            cmp_trailer = "escape '\\'";
-            self.args.push(val.into())
+            self.args.push(self.convert(CM::Sql, val).into())
        }

        let arg_idx = self.args.len();
@ -423,6 +435,7 @@ impl SqlWriter<'_> {
    }

    fn write_dupes(&mut self, ntid: NoteTypeID, text: &str) {
+        let text = &self.convert(CM::OnlyNorm, text);
        let text_nohtml = strip_html_preserving_image_filenames(text);
        let csum = field_checksum(text_nohtml.as_ref());
        write!(
@ -450,19 +463,39 @@ impl SqlWriter<'_> {

    fn write_regex(&mut self, word: &str) {
        self.sql.push_str("n.flds regexp ?");
-        self.args.push(format!(r"(?i){}", word));
+        self.args
+            .push(format!(r"(?i){}", self.convert(CM::OnlyNorm, word)));
    }

    fn write_word_boundary(&mut self, word: &str) {
-        self.write_regex(&format!(r"\b{}\b", word))
+        self.sql.push_str("n.flds regexp ?");
+        self.args
+            .push(format!(r"(?i)\b{}\b", self.convert(CM::Regex, word)));
    }
-}

-/// True if the content of search is equal to text, folding case.
-fn matches_string_variant(text: &str, search: &OptionalRe) -> bool {
-    match search {
-        OptionalRe::Re(s) => Regex::new(&format!("^(?i){}$", s)).unwrap().is_match(text),
-        OptionalRe::Text(s) => uni_eq(text, s),
+    /// Norm text and call the according conversion function.
+    fn convert<'a>(&self, mode: ConversionMode, txt: &'a str) -> Cow<'a, str> {
+        let txt = match mode {
+            CM::OnlyNorm => txt.into(),
+            CM::Regex => to_re(txt),
+            CM::CustomRe(wildcard) => to_custom_re(txt, wildcard),
+            CM::Sql => to_sql(txt),
+            CM::Text => to_text(txt),
+        };
+        match txt {
+            Cow::Borrowed(s) => self.norm_note(s),
+            Cow::Owned(s) => self.norm_note(&s).to_string().into(),
+        }
+    }
+
+    /// Compare text with a possible glob, folding case.
+    fn matches_glob(&self, text: &str, search: &str) -> bool {
+        if is_glob(search) {
+            let search = format!("^(?i){}$", self.convert(CM::Regex, search));
+            Regex::new(&search).unwrap().is_match(text)
+        } else {
+            uni_eq(text, &self.convert(CM::Text, search))
+        }
    }
 }

@ -665,9 +698,15 @@ mod test {
            .unwrap();
        assert_eq!(
            s(ctx, r"tag:one"),
-            ("(n.tags regexp ?)".into(), vec![r"(?i).* one .*".into()])
+            (
+                "(n.tags like ? escape '\\')".into(),
+                vec![r"% One %".into()]
+            )
        );

+        // unregistered tags without wildcards won't match
+        assert_eq!(s(ctx, "tag:unknown"), ("(false)".into(), vec![]));
+
        // wildcards force a regexp search
        assert_eq!(
            s(ctx, r"tag:o*n\*et%w%oth_re\_e"),
--- a/rslib/src/text.rs
+++ b/rslib/src/text.rs
@ -289,6 +289,80 @@ pub(crate) fn text_to_re(glob: &str) -> String {
    text2.into()
 }

+/// Check if string contains an unescaped wildcard.
+pub(crate) fn is_glob(txt: &str) -> bool {
+    // even number of \s followed by a wildcard
+    lazy_static! {
+        static ref RE: Regex = Regex::new(
+            r#"(?x)
+            (?:^|[^\\])     # not a backslash
+            (?:\\\\)*       # even number of backslashes
+            [*_]            # wildcard
+            "#
+        )
+        .unwrap();
+    }
+
+    RE.is_match(txt)
+}
+
+/// Convert to a RegEx respecting Anki wildcards.
+pub(crate) fn to_re(txt: &str) -> Cow<str> {
+    to_custom_re(txt, ".")
+}
+
+/// Convert Anki style to RegEx using the provided wildcard.
+pub(crate) fn to_custom_re<'a>(txt: &'a str, wildcard: &str) -> Cow<'a, str> {
+    // escape sequences and unescaped special characters which need conversion
+    lazy_static! {
+        static ref RE: Regex = Regex::new(r"\\.|[*_]").unwrap();
+    }
+    RE.replace_all(&txt, |caps: &Captures| {
+        let s = &caps[0];
+        match s {
+            r"\\" | r"\*" => s.to_string(),
+            r"\_" => "_".to_string(),
+            "*" => format!("{}*", wildcard),
+            "_" => wildcard.to_string(),
+            s => regex::escape(s),
+        }
+    })
+}
+
+/// Convert to SQL respecting Anki wildcards.
+pub(crate) fn to_sql<'a>(txt: &'a str) -> Cow<'a, str> {
+    // escape sequences and unescaped special characters which need conversion
+    lazy_static! {
+        static ref RE: Regex = Regex::new(r"\\[\\*]|[*%]").unwrap();
+    }
+    RE.replace_all(&txt, |caps: &Captures| {
+        let s = &caps[0];
+        match s {
+            r"\\" => r"\\",
+            r"\*" => "*",
+            "*" => "%",
+            "%" => r"\%",
+            _ => unreachable!(),
+        }
+    })
+}
+
+/// Unescape everything.
+pub(crate) fn to_text(txt: &str) -> Cow<str> {
+    lazy_static! {
+        static ref RE: Regex = Regex::new(r"\\(.)").unwrap();
+    }
+    RE.replace_all(&txt, "$1")
+}
+
+/// Escape characters special to SQL: \%_
+pub(crate) fn escape_sql(txt: &str) -> Cow<str> {
+    lazy_static! {
+        static ref RE: Regex = Regex::new(r"[\\%_]").unwrap();
+    }
+    RE.replace_all(&txt, r"\$0")
+}
+
 #[cfg(test)]
 mod test {
    use crate::text::without_combining;