Merge pull request #823 from RumovZ/rework-search-parser

Rework search parser
2020-11-20 16:22:12 +10:00 · 2020-11-20 16:22:12 +10:00 · e23d40e850
commit e23d40e850
parent f7fcdca54c cb2c19aced
6 changed files with 365 additions and 187 deletions
--- a/pylib/tests/test_find.py
+++ b/pylib/tests/test_find.py
@ -48,8 +48,10 @@ def test_findCards():
    # tag searches
    assert len(col.findCards("tag:*")) == 5
    assert len(col.findCards("tag:\\*")) == 1
-    assert len(col.findCards("tag:%")) == 5
+    assert len(col.findCards("tag:%")) == 1
-    assert len(col.findCards("tag:\\%")) == 1
+    assert len(col.findCards("tag:sheep_goat")) == 0
    assert len(col.findCards('"tag:sheep goat"')) == 0
    assert len(col.findCards('"tag:* *"')) == 0
    assert len(col.findCards("tag:animal_1")) == 2
    assert len(col.findCards("tag:animal\\_1")) == 1
    assert not col.findCards("tag:donkey")
--- a/qt/aqt/browser.py
+++ b/qt/aqt/browser.py
@ -1207,7 +1207,7 @@ QTableView {{ gridline-color: {grid} }}
                if i % 2 == 0:
                    txt += a + ":"
                else:
-                    txt += re.sub("[*%_]", r"\\\g<0>", a)
+                    txt += re.sub(r"[*_\\]", r"\\\g<0>", a)
                    for c in ' 　()"':
                        if c in txt:
                            txt = '"{}"'.format(txt.replace('"', '\\"'))
--- a/rslib/src/search/parser.rs
+++ b/rslib/src/search/parser.rs
@ -9,17 +9,15 @@ use crate::{
 use lazy_static::lazy_static;
 use nom::{
    branch::alt,
-    bytes::complete::{escaped, is_not, tag, take_while1},
+    bytes::complete::{escaped, is_not, tag},
-    character::complete::{anychar, char, one_of},
+    character::complete::{anychar, char, none_of, one_of},
-    combinator::{all_consuming, map, map_res},
+    combinator::{all_consuming, map, map_res, verify},
-    sequence::{delimited, preceded, tuple},
+    sequence::{delimited, preceded, separated_pair},
    {multi::many0, IResult},
 };
-use regex::Regex;
+use regex::{Captures, Regex};
 use std::{borrow::Cow, num};
 // fixme: need to preserve \ when used twice in string
 struct ParseError {}
 impl From<num::ParseIntError> for ParseError {
@ -63,7 +61,7 @@ pub(super) enum SearchNode<'a> {
    },
    AddedInDays(u32),
    EditedInDays(u32),
-    CardTemplate(TemplateKind),
+    CardTemplate(TemplateKind<'a>),
    Deck(Cow<'a, str>),
    DeckID(DeckID),
    NoteTypeID(NoteTypeID),
@ -75,12 +73,12 @@ pub(super) enum SearchNode<'a> {
    Tag(Cow<'a, str>),
    Duplicates {
        note_type_id: NoteTypeID,
-        text: String,
+        text: Cow<'a, str>,
    },
    State(StateKind),
    Flag(u8),
-    NoteIDs(Cow<'a, str>),
+    NoteIDs(&'a str),
-    CardIDs(Cow<'a, str>),
+    CardIDs(&'a str),
    Property {
        operator: String,
        kind: PropertyKind,
@ -113,9 +111,9 @@ pub(super) enum StateKind {
 }
 #[derive(Debug, PartialEq)]
-pub(super) enum TemplateKind {
+pub(super) enum TemplateKind<'a> {
    Ordinal(u16),
-    Name(String),
+    Name(Cow<'a, str>),
 }
 /// Parse the input string into a list of nodes.
@ -127,7 +125,6 @@ pub(super) fn parse(input: &str) -> Result<Vec<Node>> {
    let (_, nodes) =
        all_consuming(group_inner)(input).map_err(|_e| AnkiError::SearchError(None))?;
    Ok(nodes)
 }
@ -205,32 +202,21 @@ fn text(s: &str) -> IResult<&str, Node> {
 /// Determine if text is a qualified search, and handle escaped chars.
 fn search_node_for_text(s: &str) -> ParseResult<SearchNode> {
-    let mut it = s.splitn(2, ':');
+    let (tail, head) = escaped(is_not(r":\"), '\\', anychar)(s)?;
-    let (head, tail) = (
+    if tail.is_empty() {
-        unescape_quotes(it.next().unwrap()),
+        Ok(SearchNode::UnqualifiedText(unescape(head)?))
        it.next().map(unescape_quotes),
    );
    if let Some(tail) = tail {
        search_node_for_text_with_argument(head, tail)
    } else {
-        Ok(SearchNode::UnqualifiedText(head))
+        search_node_for_text_with_argument(head, &tail[1..])
    }
 }
-/// \" -> "
+/// Unquoted text, terminated by whitespace or unescaped ", ( or )
 fn unescape_quotes(s: &str) -> Cow<str> {
    if s.find(r#"\""#).is_some() {
        s.replace(r#"\""#, "\"").into()
    } else {
        s.into()
    }
 }
 /// Unquoted text, terminated by a space or )
 fn unquoted_term(s: &str) -> IResult<&str, Node> {
    map_res(
-        take_while1(|c| c != ' ' && c != ')' && c != '"'),
+        verify(
            escaped(is_not("\"() \u{3000}\\"), '\\', none_of(" \u{3000}")),
            |s: &str| !s.is_empty(),
        ),
        |text: &str| -> ParseResult<Node> {
            Ok(if text.eq_ignore_ascii_case("or") {
                Node::Or
@ -256,57 +242,66 @@ fn quoted_term_str(s: &str) -> IResult<&str, &str> {
 /// Quoted text, terminated by a non-escaped double quote
 fn quoted_term_inner(s: &str) -> IResult<&str, &str> {
-    escaped(is_not(r#""\"#), '\\', anychar)(s)
+    verify(escaped(is_not(r#""\"#), '\\', anychar), |s: &str| {
        !s.is_empty()
    })(s)
 }
 /// eg deck:"foo bar" - quotes must come after the :
 fn partially_quoted_term(s: &str) -> IResult<&str, Node> {
-    let term = take_while1(|c| c != ' ' && c != ')' && c != ':');
+    map_res(
-    let (s, (term, _, quoted_val)) = tuple((term, char(':'), quoted_term_str))(s)?;
+        separated_pair(
-    let quoted_val = unescape_quotes(quoted_val);
+            verify(
-
+                escaped(is_not("\"(): \u{3000}\\"), '\\', none_of(": \u{3000}")),
-    match search_node_for_text_with_argument(term.into(), quoted_val) {
+                |s: &str| !s.is_empty(),
-        Ok(search) => Ok((s, Node::Search(search))),
+            ),
-        Err(_) => Err(nom::Err::Failure((s, nom::error::ErrorKind::NoneOf))),
+            char(':'),
-    }
+            quoted_term_str,
        ),
        |p| match search_node_for_text_with_argument(p.0, p.1) {
            Ok(search) => Ok(Node::Search(search)),
            Err(e) => Err(e),
        },
    )(s)
 }
 /// Convert a colon-separated key/val pair into the relevant search type.
 fn search_node_for_text_with_argument<'a>(
-    key: Cow<'a, str>,
+    key: &'a str,
-    val: Cow<'a, str>,
+    val: &'a str,
 ) -> ParseResult<SearchNode<'a>> {
    Ok(match key.to_ascii_lowercase().as_str() {
        "added" => SearchNode::AddedInDays(val.parse()?),
        "edited" => SearchNode::EditedInDays(val.parse()?),
-        "deck" => SearchNode::Deck(val),
+        "deck" => SearchNode::Deck(unescape(val)?),
-        "note" => SearchNode::NoteType(val),
+        "note" => SearchNode::NoteType(unescape(val)?),
-        "tag" => SearchNode::Tag(val),
+        "tag" => SearchNode::Tag(unescape(val)?),
        "mid" => SearchNode::NoteTypeID(val.parse()?),
        "nid" => SearchNode::NoteIDs(check_id_list(val)?),
        "cid" => SearchNode::CardIDs(check_id_list(val)?),
        "did" => SearchNode::DeckID(val.parse()?),
-        "card" => parse_template(val.as_ref()),
+        "card" => parse_template(val)?,
-        "is" => parse_state(val.as_ref())?,
+        "is" => parse_state(val)?,
-        "flag" => parse_flag(val.as_ref())?,
+        "flag" => parse_flag(val)?,
-        "rated" => parse_rated(val.as_ref())?,
+        "rated" => parse_rated(val)?,
-        "dupe" => parse_dupes(val.as_ref())?,
+        "dupe" => parse_dupes(val)?,
-        "prop" => parse_prop(val.as_ref())?,
+        "prop" => parse_prop(val)?,
-        "re" => SearchNode::Regex(val),
+        "re" => SearchNode::Regex(unescape_quotes(val)),
-        "nc" => SearchNode::NoCombining(val),
+        "r" => SearchNode::UnqualifiedText(unescape_raw(val)),
-        "w" => SearchNode::WordBoundary(val),
+        "nc" => SearchNode::NoCombining(unescape(val)?),
        "w" => SearchNode::WordBoundary(unescape(val)?),
        // anything else is a field search
-        _ => parse_single_field(key.as_ref(), val.as_ref()),
+        _ => parse_single_field(key, val)?,
    })
 }
 /// ensure a list of ids contains only numbers and commas, returning unchanged if true
 /// used by nid: and cid:
-fn check_id_list(s: Cow<str>) -> ParseResult<Cow<str>> {
+fn check_id_list(s: &str) -> ParseResult<&str> {
    lazy_static! {
        static ref RE: Regex = Regex::new(r"^(\d+,)*\d+$").unwrap();
    }
-    if RE.is_match(s.as_ref()) {
+    if RE.is_match(s) {
        Ok(s)
    } else {
        Err(ParseError {})
@ -360,13 +355,13 @@ fn parse_rated(val: &str) -> ParseResult<SearchNode<'static>> {
 }
 /// eg dupes:1231,hello
-fn parse_dupes(val: &str) -> ParseResult<SearchNode<'static>> {
+fn parse_dupes(val: &str) -> ParseResult<SearchNode> {
    let mut it = val.splitn(2, ',');
    let mid: NoteTypeID = it.next().unwrap().parse()?;
    let text = it.next().ok_or(ParseError {})?;
    Ok(SearchNode::Duplicates {
        note_type_id: mid,
-        text: text.into(),
+        text: unescape_quotes(text),
    })
 }
@ -411,27 +406,116 @@ fn parse_prop(val: &str) -> ParseResult<SearchNode<'static>> {
    })
 }
-fn parse_template(val: &str) -> SearchNode<'static> {
+fn parse_template(val: &str) -> ParseResult<SearchNode> {
-    SearchNode::CardTemplate(match val.parse::<u16>() {
+    Ok(SearchNode::CardTemplate(match val.parse::<u16>() {
        Ok(n) => TemplateKind::Ordinal(n.max(1) - 1),
-        Err(_) => TemplateKind::Name(val.into()),
+        Err(_) => TemplateKind::Name(unescape(val)?),
    }))
 }
 fn parse_single_field<'a>(key: &'a str, val: &'a str) -> ParseResult<SearchNode<'a>> {
    Ok(if val.starts_with("re:") {
        SearchNode::SingleField {
            field: unescape(key)?,
            text: unescape_quotes(&val[3..]),
            is_re: true,
        }
    } else if val.starts_with("r:") {
        SearchNode::SingleField {
            field: unescape(key)?,
            text: unescape_raw(&val[2..]),
            is_re: false,
        }
    } else {
        SearchNode::SingleField {
            field: unescape(key)?,
            text: unescape(val)?,
            is_re: false,
        }
    })
 }
-fn parse_single_field(key: &str, mut val: &str) -> SearchNode<'static> {
+/// For strings without unescaped ", convert \" to "
-    let is_re = if val.starts_with("re:") {
+fn unescape_quotes(s: &str) -> Cow<str> {
-        val = val.trim_start_matches("re:");
+    if s.contains('"') {
-        true
+        s.replace(r#"\""#, "\"").into()
    } else {
-        false
+        s.into()
    };
    SearchNode::SingleField {
        field: key.to_string().into(),
        text: val.to_string().into(),
        is_re,
    }
 }
 /// Unescape quotes but escape wildcards and \s.
 fn unescape_raw(s: &str) -> Cow<str> {
    lazy_static! {
        static ref RE: Regex = Regex::new(r#"\\"?|\*|_"#).unwrap();
    }
    RE.replace_all(&s, |caps: &Captures| match &caps[0] {
        r"\" => r"\\",
        "\\\"" => "\"",
        r"*" => r"\*",
        r"_" => r"\_",
        _ => unreachable!(),
    })
 }
 /// Unescape chars with special meaning to the parser.
 fn unescape(txt: &str) -> ParseResult<Cow<str>> {
    if is_invalid_escape(txt) {
        Err(ParseError {})
    } else if is_parser_escape(txt) {
        lazy_static! {
            static ref RE: Regex = Regex::new(r#"\\[\\":()-]"#).unwrap();
        }
        Ok(RE.replace_all(&txt, |caps: &Captures| match &caps[0] {
            r"\\" => r"\\",
            "\\\"" => "\"",
            r"\:" => ":",
            r"\(" => "(",
            r"\)" => ")",
            r"\-" => "-",
            _ => unreachable!(),
        }))
    } else {
        Ok(txt.into())
    }
 }
 /// Check string for invalid escape sequences.
 fn is_invalid_escape(txt: &str) -> bool {
    // odd number of \s not followed by an escapable character
    lazy_static! {
        static ref RE: Regex = Regex::new(
            r#"(?x)
            (?:^|[^\\])         # not a backslash
            (?:\\\\)*           # even number of backslashes
            \\                  # single backslash
            (?:[^\\":*_()-]|$)  # anything but an escapable char
            "#
        )
        .unwrap();
    }
    RE.is_match(txt)
 }
 /// Check string for escape sequences handled by the parser: ":()
 fn is_parser_escape(txt: &str) -> bool {
    // odd number of \s followed by a char with special meaning to the parser
    lazy_static! {
        static ref RE: Regex = Regex::new(
            r#"(?x)
            (?:^|[^\\])     # not a backslash
            (?:\\\\)*       # even number of backslashes
            \\              # single backslash
            [":()-]         # parser escape
            "#
        )
        .unwrap();
    }
    RE.is_match(txt)
 }
 #[cfg(test)]
 mod test {
    use super::*;
@ -497,7 +581,7 @@ mod test {
            })]
        );
-        // partially quoted text should handle escaping the same way
+        // escaping is independent of quotation
        assert_eq!(
            parse(r#""field:va\"lue""#)?,
            vec![Search(SingleField {
@ -507,13 +591,67 @@ mod test {
            })]
        );
        assert_eq!(parse(r#""field:va\"lue""#)?, parse(r#"field:"va\"lue""#)?,);
        assert_eq!(parse(r#""field:va\"lue""#)?, parse(r#"field:va\"lue"#)?,);
-        // any character should be escapable in quotes
+        // only \":()-*_ are escapable
        assert!(parse(r"\").is_err());
        assert!(parse(r"\a").is_err());
        assert!(parse(r"\%").is_err());
        // parser unescapes ":()-
        assert_eq!(
-            parse(r#""re:\btest""#)?,
+            parse(r#"\"\:\(\)\-"#)?,
-            vec![Search(Regex(r"\btest".into()))]
+            vec![Search(UnqualifiedText(r#"":()-"#.into())),]
        );
        // parser doesn't unescape unescape \*_
        assert_eq!(
            parse(r#"\\\*\_"#)?,
            vec![Search(UnqualifiedText(r#"\\\*\_"#.into())),]
        );
        // escaping parentheses is optional (only) inside quotes
        assert_eq!(parse(r#""\)\(""#), parse(r#"")(""#));
        assert!(parse(")(").is_err());
        // escaping : is optional if it is preceded by another :
        assert!(parse(":test").is_err());
        assert!(parse(":").is_err());
        assert_eq!(parse("field:val:ue"), parse(r"field:val\:ue"));
        assert_eq!(parse(r#""field:val:ue""#), parse(r"field:val\:ue"));
        assert_eq!(parse(r#"field:"val:ue""#), parse(r"field:val\:ue"));
        // escaping - is optional if it cannot be mistaken for a negator
        assert_eq!(parse("-"), parse(r"\-"));
        assert_eq!(parse("A-"), parse(r"A\-"));
        assert_eq!(parse(r#""-A""#), parse(r"\-A"));
        assert_ne!(parse("-A"), parse(r"\-A"));
        // any character should be escapable on the right side of re:
        assert_eq!(
            parse(r#""re:\btest\%""#)?,
            vec![Search(Regex(r"\btest\%".into()))]
        );
        // treat all chars as literals in raw searches
        assert_eq!(parse(r"r:\*_"), parse(r"\\\*\_"));
        assert_eq!(parse(r"field:r:\*_"), parse(r"field:\\\*\_"));
        // no exceptions for escaping "
        assert_eq!(
            parse(r#"re:te\"st"#)?,
            vec![Search(Regex(r#"te"st"#.into()))]
        );
        assert!(parse(r#"re:te"st"#).is_err());
        assert_eq!(
            parse(r#"r:te\"st"#)?,
            vec![Search(UnqualifiedText(r#"te"st"#.into()))]
        );
        assert!(parse(r#"r:te"st"#).is_err());
        // spaces are optional if node separation is clear
        assert_eq!(parse(r#"a"b"(c)"#)?, parse("a b (c)")?);
        assert_eq!(parse("added:3")?, vec![Search(AddedInDays(3))]);
        assert_eq!(
            parse("card:front")?,
--- a/rslib/src/search/sqlwriter.rs
+++ b/rslib/src/search/sqlwriter.rs
@ -9,12 +9,12 @@ use crate::{
    err::Result,
    notes::field_checksum,
    notetype::NoteTypeID,
-    text::{matches_wildcard, text_to_re},
+    text::{
-    text::{normalize_to_nfc, strip_html_preserving_media_filenames, without_combining},
+        escape_sql, is_glob, matches_glob, normalize_to_nfc, strip_html_preserving_media_filenames,
        to_custom_re, to_re, to_sql, to_text, without_combining,
    },
    timestamp::TimestampSecs,
 };
 use lazy_static::lazy_static;
 use regex::Regex;
 use std::{borrow::Cow, fmt::Write};
 pub(crate) struct SqlWriter<'a> {
@ -119,7 +119,7 @@ impl SqlWriter<'_> {
            // note fields related
            SearchNode::UnqualifiedText(text) => self.write_unqualified(&self.norm_note(text)),
            SearchNode::SingleField { field, text, is_re } => {
-                self.write_single_field(field.as_ref(), &self.norm_note(text), *is_re)?
+                self.write_single_field(&norm(field), &self.norm_note(text), *is_re)?
            }
            SearchNode::Duplicates { note_type_id, text } => {
                self.write_dupes(*note_type_id, &self.norm_note(text))
@ -132,11 +132,9 @@ impl SqlWriter<'_> {
            SearchNode::AddedInDays(days) => self.write_added(*days)?,
            SearchNode::EditedInDays(days) => self.write_edited(*days)?,
            SearchNode::CardTemplate(template) => match template {
-                TemplateKind::Ordinal(_) => {
+                TemplateKind::Ordinal(_) => self.write_template(template)?,
                    self.write_template(template)?;
                }
                TemplateKind::Name(name) => {
-                    self.write_template(&TemplateKind::Name(norm(name).into()))?;
+                    self.write_template(&TemplateKind::Name(norm(name).into()))?
                }
            },
            SearchNode::Deck(deck) => self.write_deck(&norm(deck))?,
@ -148,6 +146,7 @@ impl SqlWriter<'_> {
            }
            SearchNode::NoteType(notetype) => self.write_note_type(&norm(notetype))?,
            SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?,
            SearchNode::Tag(tag) => self.write_tag(&norm(tag))?,
            SearchNode::State(state) => self.write_state(state)?,
            SearchNode::Flag(flag) => {
@ -167,7 +166,7 @@ impl SqlWriter<'_> {
    fn write_unqualified(&mut self, text: &str) {
        // implicitly wrap in %
-        let text = format!("%{}%", convert_glob_char(text));
+        let text = format!("%{}%", &to_sql(text));
        self.args.push(text);
        write!(
            self.sql,
@ -178,7 +177,7 @@ impl SqlWriter<'_> {
    }
    fn write_no_combining(&mut self, text: &str) {
-        let text = format!("%{}%", without_combining(text));
+        let text = format!("%{}%", without_combining(&to_sql(text)));
        self.args.push(text);
        write!(
            self.sql,
@ -192,27 +191,27 @@ impl SqlWriter<'_> {
    }
    fn write_tag(&mut self, text: &str) -> Result<()> {
-        match text {
+        if text.contains(" ") {
-            "none" => {
+            write!(self.sql, "false").unwrap();
-                write!(self.sql, "n.tags = ''").unwrap();
+        } else {
-            }
+            match text {
-            "*" | "%" => {
+                "none" => write!(self.sql, "n.tags = ''").unwrap(),
-                write!(self.sql, "true").unwrap();
+                "*" => write!(self.sql, "true").unwrap(),
-            }
+                s => {
-            text => {
+                    if is_glob(s) {
-                if let Some(re_glob) = glob_to_re(text) {
+                        write!(self.sql, "n.tags regexp ?").unwrap();
-                    // text contains a wildcard
+                        let re = &to_custom_re(s, r"\S");
-                    let re_glob = format!("(?i).* {} .*", re_glob);
+                        self.args.push(format!("(?i).* {} .*", re));
-                    write!(self.sql, "n.tags regexp ?").unwrap();
+                    } else if let Some(tag) = self.col.storage.preferred_tag_case(&to_text(s))? {
-                    self.args.push(re_glob);
+                        write!(self.sql, "n.tags like ? escape '\\'").unwrap();
-                } else if let Some(tag) = self.col.storage.preferred_tag_case(&text)? {
+                        self.args.push(format!("% {} %", escape_sql(&tag)));
-                    write!(self.sql, "n.tags like ?").unwrap();
+                    } else {
-                    self.args.push(format!("% {} %", tag));
+                        write!(self.sql, "false").unwrap();
-                } else {
+                    }
                    write!(self.sql, "false").unwrap();
                }
            }
        }
        Ok(())
    }
@ -312,18 +311,20 @@ impl SqlWriter<'_> {
                // rewrite "current" to the current deck name
                let native_deck = if deck == "current" {
                    let current_did = self.col.get_current_deck_id();
-                    self.col
+                    regex::escape(
-                        .storage
+                        self.col
-                        .get_deck(current_did)?
+                            .storage
-                        .map(|d| d.name)
+                            .get_deck(current_did)?
-                        .unwrap_or_else(|| "Default".into())
+                            .map(|d| d.name)
                            .unwrap_or_else(|| "Default".into())
                            .as_str(),
                    )
                } else {
-                    human_deck_name_to_native(deck)
+                    human_deck_name_to_native(&to_re(deck))
                };
                // convert to a regex that includes child decks
-                let re = text_to_re(&native_deck);
+                self.args.push(format!("(?i)^{}($|\x1f)", native_deck));
                self.args.push(format!("(?i)^{}($|\x1f)", re));
                let arg_idx = self.args.len();
                self.sql.push_str(&format!(concat!(
                    "(c.did in (select id from decks where name regexp ?{n})",
@ -341,8 +342,8 @@ impl SqlWriter<'_> {
                write!(self.sql, "c.ord = {}", n).unwrap();
            }
            TemplateKind::Name(name) => {
-                if let Some(re) = glob_to_re(name) {
+                if is_glob(name) {
-                    let re = format!("(?i){}", re);
+                    let re = format!("(?i){}", to_re(name));
                    self.sql.push_str(
                        "(n.mid,c.ord) in (select ntid,ord from templates where name regexp ?)",
                    );
@ -351,7 +352,7 @@ impl SqlWriter<'_> {
                    self.sql.push_str(
                        "(n.mid,c.ord) in (select ntid,ord from templates where name = ?)",
                    );
-                    self.args.push(name.to_string());
+                    self.args.push(to_text(name).into());
                }
            }
        };
@ -359,15 +360,15 @@ impl SqlWriter<'_> {
    }
    fn write_note_type(&mut self, nt_name: &str) -> Result<()> {
-        if let Some(re) = glob_to_re(nt_name) {
+        if is_glob(nt_name) {
-            let re = format!("(?i){}", re);
+            let re = format!("(?i){}", to_re(nt_name));
            self.sql
                .push_str("n.mid in (select id from notetypes where name regexp ?)");
            self.args.push(re);
        } else {
            self.sql
                .push_str("n.mid in (select id from notetypes where name = ?)");
-            self.args.push(nt_name.to_string());
+            self.args.push(to_text(nt_name).into());
        }
        Ok(())
    }
@ -378,7 +379,7 @@ impl SqlWriter<'_> {
        let mut field_map = vec![];
        for nt in note_types.values() {
            for field in &nt.fields {
-                if matches_wildcard(&field.name, field_name) {
+                if matches_glob(&field.name, field_name) {
                    field_map.push((nt.id, field.ord));
                }
            }
@ -401,7 +402,7 @@ impl SqlWriter<'_> {
        } else {
            cmp = "like";
            cmp_trailer = "escape '\\'";
-            self.args.push(convert_glob_char(val).into())
+            self.args.push(to_sql(val).into())
        }
        let arg_idx = self.args.len();
@ -455,29 +456,10 @@ impl SqlWriter<'_> {
    }
    fn write_word_boundary(&mut self, word: &str) {
-        // fixme: need to escape in the no-glob case as well
+        self.write_regex(&format!(r"\b{}\b", to_re(word)));
        let re = text_to_re(word);
        self.write_regex(&format!(r"\b{}\b", re))
    }
 }
 /// Replace * with %, leaving \* alone.
 fn convert_glob_char(val: &str) -> Cow<str> {
    lazy_static! {
        static ref RE: Regex = Regex::new(r"(^|[^\\])\*").unwrap();
    }
    RE.replace_all(val, "${1}%")
 }
 /// Convert a string with _, % or * characters into a regex.
 /// If string contains no globbing characters, return None.
 fn glob_to_re(glob: &str) -> Option<String> {
    if !glob.contains(|c| c == '_' || c == '*' || c == '%') {
        return None;
    }
    Some(text_to_re(glob))
 }
 #[derive(Debug, PartialEq, Clone, Copy)]
 pub enum RequiredTable {
    Notes,
@ -601,10 +583,9 @@ mod test {
                vec!["%te%st%".into()]
            )
        );
-        assert_eq!(s(ctx, "te%st").1, vec!["%te%st%".to_string()]);
+        assert_eq!(s(ctx, "te%st").1, vec![r"%te\%st%".to_string()]);
-        // user should be able to escape sql wildcards
+        // user should be able to escape wildcards
-        assert_eq!(s(ctx, r#"te\%s\_t"#).1, vec!["%te\\%s\\_t%".to_string()]);
+        assert_eq!(s(ctx, r#"te\*s\_t"#).1, vec!["%te*s\\_t%".to_string()]);
        assert_eq!(s(ctx, r#"te\*s\_t"#).1, vec!["%te\\*s\\_t%".to_string()]);
        // qualified search
        assert_eq!(
@ -673,23 +654,26 @@ mod test {
            )
        );
        // unregistered tag short circuits
        assert_eq!(s(ctx, r"tag:one"), ("(false)".into(), vec![]));
        // if registered, searches with canonical
        ctx.transact(None, |col| col.register_tag("One", Usn(-1)))
            .unwrap();
        assert_eq!(
            s(ctx, r"tag:one"),
-            ("(n.tags like ?)".into(), vec![r"% One %".into()])
+            (
                "(n.tags like ? escape '\\')".into(),
                vec![r"% One %".into()]
            )
        );
        // unregistered tags without wildcards won't match
        assert_eq!(s(ctx, "tag:unknown"), ("(false)".into(), vec![]));
        // wildcards force a regexp search
        assert_eq!(
-            s(ctx, r"tag:o*n\*et%w\%oth_re\_e"),
+            s(ctx, r"tag:o*n\*et%w%oth_re\_e"),
            (
                "(n.tags regexp ?)".into(),
-                vec![r"(?i).* o.*n\*et.*w%oth.re_e .*".into()]
+                vec![r"(?i).* o\S*n\*et%w%oth\Sre_e .*".into()]
            )
        );
        assert_eq!(s(ctx, "tag:none"), ("(n.tags = '')".into(), vec![]));
@ -803,12 +787,4 @@ mod test {
            RequiredTable::Notes
        );
    }
    #[test]
    fn convert_glob() {
        assert_eq!(&convert_glob_char("foo*bar"), "foo%bar");
        assert_eq!(&convert_glob_char("*bar"), "%bar");
        assert_eq!(&convert_glob_char("\n*bar"), "\n%bar");
        assert_eq!(&convert_glob_char(r"\*bar"), r"\*bar");
    }
 }
--- a/rslib/src/text.rs
+++ b/rslib/src/text.rs
@ -240,17 +240,6 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) {
    }
 }
 /// True if search is equal to text, folding case.
 /// Supports '*' to match 0 or more characters.
 pub(crate) fn matches_wildcard(text: &str, search: &str) -> bool {
    if search.contains('*') {
        let search = format!("^(?i){}$", regex::escape(search).replace(r"\*", ".*"));
        Regex::new(&search).unwrap().is_match(text)
    } else {
        uni_eq(text, search)
    }
 }
 /// Convert provided string to NFKD form and strip combining characters.
 pub(crate) fn without_combining(s: &str) -> Cow<str> {
    // if the string is already normalized
@ -301,9 +290,91 @@ pub(crate) fn text_to_re(glob: &str) -> String {
    text2.into()
 }
 /// Check if string contains an unescaped wildcard.
 pub(crate) fn is_glob(txt: &str) -> bool {
    // even number of \s followed by a wildcard
    lazy_static! {
        static ref RE: Regex = Regex::new(
            r#"(?x)
            (?:^|[^\\])     # not a backslash
            (?:\\\\)*       # even number of backslashes
            [*_]            # wildcard
            "#
        )
        .unwrap();
    }
    RE.is_match(txt)
 }
 /// Convert to a RegEx respecting Anki wildcards.
 pub(crate) fn to_re(txt: &str) -> Cow<str> {
    to_custom_re(txt, ".")
 }
 /// Convert Anki style to RegEx using the provided wildcard.
 pub(crate) fn to_custom_re<'a>(txt: &'a str, wildcard: &str) -> Cow<'a, str> {
    lazy_static! {
        static ref RE: Regex = Regex::new(r"\\?.").unwrap();
    }
    RE.replace_all(&txt, |caps: &Captures| {
        let s = &caps[0];
        match s {
            r"\\" | r"\*" => s.to_string(),
            r"\_" => "_".to_string(),
            "*" => format!("{}*", wildcard),
            "_" => wildcard.to_string(),
            s => regex::escape(s),
        }
    })
 }
 /// Convert to SQL respecting Anki wildcards.
 pub(crate) fn to_sql<'a>(txt: &'a str) -> Cow<'a, str> {
    // escape sequences and unescaped special characters which need conversion
    lazy_static! {
        static ref RE: Regex = Regex::new(r"\\[\\*]|[*%]").unwrap();
    }
    RE.replace_all(&txt, |caps: &Captures| {
        let s = &caps[0];
        match s {
            r"\\" => r"\\",
            r"\*" => "*",
            "*" => "%",
            "%" => r"\%",
            _ => unreachable!(),
        }
    })
 }
 /// Unescape everything.
 pub(crate) fn to_text(txt: &str) -> Cow<str> {
    lazy_static! {
        static ref RE: Regex = Regex::new(r"\\(.)").unwrap();
    }
    RE.replace_all(&txt, "$1")
 }
 /// Escape characters special to SQL: \%_
 pub(crate) fn escape_sql(txt: &str) -> Cow<str> {
    lazy_static! {
        static ref RE: Regex = Regex::new(r"[\\%_]").unwrap();
    }
    RE.replace_all(&txt, r"\$0")
 }
 /// Compare text with a possible glob, folding case.
 pub(crate) fn matches_glob(text: &str, search: &str) -> bool {
    if is_glob(search) {
        let search = format!("^(?i){}$", to_re(search));
        Regex::new(&search).unwrap().is_match(text)
    } else {
        uni_eq(text, &to_text(search))
    }
 }
 #[cfg(test)]
 mod test {
    use super::matches_wildcard;
    use crate::text::without_combining;
    use crate::text::{
        extract_av_tags, strip_av_tags, strip_html, strip_html_preserving_media_filenames, AVTag,
@ -351,15 +422,6 @@ mod test {
        );
    }
    #[test]
    fn wildcard() {
        assert_eq!(matches_wildcard("foo", "bar"), false);
        assert_eq!(matches_wildcard("foo", "Foo"), true);
        assert_eq!(matches_wildcard("foo", "F*"), true);
        assert_eq!(matches_wildcard("foo", "F*oo"), true);
        assert_eq!(matches_wildcard("foo", "b*"), false);
    }
    #[test]
    fn combining() {
        assert!(matches!(without_combining("test"), Cow::Borrowed(_)));