support globbing chars inside word boundary search

This commit is contained in:
Damien Elmes 2020-04-25 09:43:08 +10:00
parent fd844a0d5a
commit 8dfd362fed
2 changed files with 23 additions and 13 deletions

View File

@ -80,6 +80,7 @@ pub(super) enum SearchNode<'a> {
WholeCollection, WholeCollection,
Regex(Cow<'a, str>), Regex(Cow<'a, str>),
NoCombining(Cow<'a, str>), NoCombining(Cow<'a, str>),
WordBoundary(Cow<'a, str>),
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -277,7 +278,7 @@ fn search_node_for_text_with_argument<'a>(
"prop" => parse_prop(val.as_ref())?, "prop" => parse_prop(val.as_ref())?,
"re" => SearchNode::Regex(val), "re" => SearchNode::Regex(val),
"nc" => SearchNode::NoCombining(val), "nc" => SearchNode::NoCombining(val),
"w" => parse_word(val.as_ref()), "w" => SearchNode::WordBoundary(val),
// anything else is a field search // anything else is a field search
_ => parse_single_field(key.as_ref(), val.as_ref()), _ => parse_single_field(key.as_ref(), val.as_ref()),
}) })
@ -410,13 +411,6 @@ fn parse_single_field(key: &str, mut val: &str) -> SearchNode<'static> {
} }
} }
fn parse_word(val: &str) -> SearchNode<'static> {
let front_boundary = if val.starts_with('*') { "" } else { r"\b" };
let end_boundary = if val.ends_with('*') { "" } else { r"\b" };
let escaped = regex::escape(val.trim_matches('*'));
SearchNode::Regex(format!("{}{}{}", front_boundary, escaped, end_boundary).into())
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;
@ -531,11 +525,6 @@ mod test {
})] })]
); );
assert_eq!(parse("w:foo")?, vec![Search(Regex(r"\bfoo\b".into()))]);
assert_eq!(parse("w:*foo")?, vec![Search(Regex(r"foo\b".into()))]);
assert_eq!(parse("w:foo*")?, vec![Search(Regex(r"\bfoo".into()))]);
assert_eq!(parse("w:*fo.*o*")?, vec![Search(Regex(r"fo\.\*o".into()))]);
Ok(()) Ok(())
} }
} }

View File

@ -84,6 +84,7 @@ impl SqlWriter<'_> {
SearchNode::WholeCollection => write!(self.sql, "true").unwrap(), SearchNode::WholeCollection => write!(self.sql, "true").unwrap(),
SearchNode::Regex(re) => self.write_regex(re.as_ref()), SearchNode::Regex(re) => self.write_regex(re.as_ref()),
SearchNode::NoCombining(text) => self.write_no_combining(text.as_ref()), SearchNode::NoCombining(text) => self.write_no_combining(text.as_ref()),
SearchNode::WordBoundary(text) => self.write_word_boundary(text.as_ref()),
}; };
Ok(()) Ok(())
} }
@ -379,6 +380,11 @@ impl SqlWriter<'_> {
self.sql.push_str("n.flds regexp ?"); self.sql.push_str("n.flds regexp ?");
self.args.push(format!(r"(?i){}", word)); self.args.push(format!(r"(?i){}", word));
} }
fn write_word_boundary(&mut self, word: &str) {
let re = glob_to_re(word).unwrap_or_else(|| word.to_string());
self.write_regex(&format!(r"\b{}\b", re))
}
} }
// Write a list of IDs as '(x,y,...)' into the provided string. // Write a list of IDs as '(x,y,...)' into the provided string.
@ -639,6 +645,21 @@ mod test {
("(n.flds regexp ?)".into(), vec![r"(?i)\bone".into()]) ("(n.flds regexp ?)".into(), vec![r"(?i)\bone".into()])
); );
// word boundary
assert_eq!(
s(ctx, r"w:foo"),
("(n.flds regexp ?)".into(), vec![r"(?i)\bfoo\b".into()])
);
assert_eq!(
s(ctx, r"w:*foo"),
("(n.flds regexp ?)".into(), vec![r"(?i)\b.*foo\b".into()])
);
assert_eq!(
s(ctx, r"w:*fo_o*"),
("(n.flds regexp ?)".into(), vec![r"(?i)\b.*fo.o.*\b".into()])
);
Ok(()) Ok(())
} }
} }