Revert changes to normalisation handling

Handle norm calls individually in write_search_node_to_sql again.
This commit is contained in:
RumovZ 2020-11-18 23:46:27 +01:00
parent 6e51bad7db
commit 785540bddc
2 changed files with 43 additions and 71 deletions

View File

@ -10,23 +10,12 @@ use crate::{
notes::field_checksum,
notetype::NoteTypeID,
text::{
escape_sql, is_glob, normalize_to_nfc, strip_html_preserving_media_filenames, to_custom_re,
to_re, to_sql, to_text, without_combining,
escape_sql, is_glob, matches_glob, normalize_to_nfc, strip_html_preserving_media_filenames,
to_custom_re, to_re, to_sql, to_text, without_combining,
},
timestamp::TimestampSecs,
};
use regex::Regex;
use std::{borrow::Cow, fmt::Write};
use unicase::eq as uni_eq;
use ConversionMode as CM;
enum ConversionMode<'a> {
OnlyNorm,
Regex,
CustomRe(&'a str),
Sql,
Text,
}
pub(crate) struct SqlWriter<'a> {
col: &'a mut Collection,
@ -128,20 +117,26 @@ impl SqlWriter<'_> {
use normalize_to_nfc as norm;
match node {
// note fields related
SearchNode::UnqualifiedText(text) => self.write_unqualified(text),
SearchNode::UnqualifiedText(text) => self.write_unqualified(&self.norm_note(text)),
SearchNode::SingleField { field, text, is_re } => {
self.write_single_field(field, text, *is_re)?
self.write_single_field(&norm(field), &self.norm_note(text), *is_re)?
}
SearchNode::Duplicates { note_type_id, text } => self.write_dupes(*note_type_id, text),
SearchNode::Regex(re) => self.write_regex(re),
SearchNode::NoCombining(text) => self.write_no_combining(text),
SearchNode::WordBoundary(text) => self.write_word_boundary(text),
SearchNode::Duplicates { note_type_id, text } => {
self.write_dupes(*note_type_id, &self.norm_note(text))
}
SearchNode::Regex(re) => self.write_regex(&self.norm_note(re)),
SearchNode::NoCombining(text) => self.write_no_combining(&self.norm_note(text)),
SearchNode::WordBoundary(text) => self.write_word_boundary(&self.norm_note(text)),
// other
SearchNode::AddedInDays(days) => self.write_added(*days)?,
SearchNode::EditedInDays(days) => self.write_edited(*days)?,
SearchNode::CardTemplate(template) => self.write_template(template)?,
// fixme: always norm?
SearchNode::CardTemplate(template) => match template {
TemplateKind::Ordinal(_) => self.write_template(template)?,
TemplateKind::Name(name) => {
self.write_template(&TemplateKind::Name(norm(name).into()))?
}
},
SearchNode::Deck(deck) => self.write_deck(&norm(deck))?,
SearchNode::NoteTypeID(ntid) => {
write!(self.sql, "n.mid = {}", ntid).unwrap();
@ -149,10 +144,10 @@ impl SqlWriter<'_> {
SearchNode::DeckID(did) => {
write!(self.sql, "c.did = {}", did).unwrap();
}
SearchNode::NoteType(notetype) => self.write_note_type(notetype)?,
SearchNode::NoteType(notetype) => self.write_note_type(&norm(notetype))?,
SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?,
SearchNode::Tag(tag) => self.write_tag(tag)?,
SearchNode::Tag(tag) => self.write_tag(&norm(tag))?,
SearchNode::State(state) => self.write_state(state)?,
SearchNode::Flag(flag) => {
write!(self.sql, "(c.flags & 7) == {}", flag).unwrap();
@ -171,7 +166,7 @@ impl SqlWriter<'_> {
fn write_unqualified(&mut self, text: &str) {
// implicitly wrap in %
let text = format!("%{}%", &self.convert(CM::Sql, text));
let text = format!("%{}%", &to_sql(text));
self.args.push(text);
write!(
self.sql,
@ -182,7 +177,7 @@ impl SqlWriter<'_> {
}
fn write_no_combining(&mut self, text: &str) {
let text = format!("%{}%", without_combining(&self.convert(CM::Sql, text)));
let text = format!("%{}%", without_combining(&to_sql(text)));
self.args.push(text);
write!(
self.sql,
@ -205,13 +200,9 @@ impl SqlWriter<'_> {
s => {
if is_glob(s) {
write!(self.sql, "n.tags regexp ?").unwrap();
let re = &self.convert(CM::CustomRe(r"\S"), s);
let re = &to_custom_re(s, r"\S");
self.args.push(format!("(?i).* {} .*", re));
} else if let Some(tag) = self
.col
.storage
.preferred_tag_case(&self.convert(CM::Text, s))?
{
} else if let Some(tag) = self.col.storage.preferred_tag_case(&to_text(s))? {
write!(self.sql, "n.tags like ? escape '\\'").unwrap();
self.args.push(format!("% {} %", escape_sql(&tag)));
} else {
@ -329,7 +320,7 @@ impl SqlWriter<'_> {
.as_str(),
)
} else {
human_deck_name_to_native(&self.convert(CM::Regex, deck))
human_deck_name_to_native(&to_re(deck))
};
// convert to a regex that includes child decks
@ -352,7 +343,7 @@ impl SqlWriter<'_> {
}
TemplateKind::Name(name) => {
if is_glob(name) {
let re = format!("(?i){}", self.convert(CM::Regex, name));
let re = format!("(?i){}", to_re(name));
self.sql.push_str(
"(n.mid,c.ord) in (select ntid,ord from templates where name regexp ?)",
);
@ -361,7 +352,7 @@ impl SqlWriter<'_> {
self.sql.push_str(
"(n.mid,c.ord) in (select ntid,ord from templates where name = ?)",
);
self.args.push(self.convert(CM::Text, name).into());
self.args.push(to_text(name).into());
}
}
};
@ -370,14 +361,14 @@ impl SqlWriter<'_> {
fn write_note_type(&mut self, nt_name: &str) -> Result<()> {
if is_glob(nt_name) {
let re = format!("(?i){}", self.convert(CM::Regex, nt_name));
let re = format!("(?i){}", to_re(nt_name));
self.sql
.push_str("n.mid in (select id from notetypes where name regexp ?)");
self.args.push(re);
} else {
self.sql
.push_str("n.mid in (select id from notetypes where name = ?)");
self.args.push(self.convert(CM::Text, nt_name).into());
self.args.push(to_text(nt_name).into());
}
Ok(())
}
@ -388,7 +379,7 @@ impl SqlWriter<'_> {
let mut field_map = vec![];
for nt in note_types.values() {
for field in &nt.fields {
if self.matches_glob(&field.name, field_name) {
if matches_glob(&field.name, field_name) {
field_map.push((nt.id, field.ord));
}
}
@ -407,12 +398,11 @@ impl SqlWriter<'_> {
if is_re {
cmp = "regexp";
cmp_trailer = "";
self.args
.push(format!("(?i){}", self.convert(CM::OnlyNorm, val)));
self.args.push(format!("(?i){}", val));
} else {
cmp = "like";
cmp_trailer = "escape '\\'";
self.args.push(self.convert(CM::Sql, val).into())
self.args.push(to_sql(val).into())
}
let arg_idx = self.args.len();
@ -435,7 +425,6 @@ impl SqlWriter<'_> {
}
fn write_dupes(&mut self, ntid: NoteTypeID, text: &str) {
let text = &self.convert(CM::OnlyNorm, text);
let text_nohtml = strip_html_preserving_media_filenames(text);
let csum = field_checksum(text_nohtml.as_ref());
write!(
@ -463,39 +452,11 @@ impl SqlWriter<'_> {
fn write_regex(&mut self, word: &str) {
self.sql.push_str("n.flds regexp ?");
self.args
.push(format!(r"(?i){}", self.convert(CM::OnlyNorm, word)));
self.args.push(format!(r"(?i){}", word));
}
fn write_word_boundary(&mut self, word: &str) {
self.sql.push_str("n.flds regexp ?");
self.args
.push(format!(r"(?i)\b{}\b", self.convert(CM::Regex, word)));
}
/// Norm text and call the according conversion function.
fn convert<'a>(&self, mode: ConversionMode, txt: &'a str) -> Cow<'a, str> {
let txt = match mode {
CM::OnlyNorm => txt.into(),
CM::Regex => to_re(txt),
CM::CustomRe(wildcard) => to_custom_re(txt, wildcard),
CM::Sql => to_sql(txt),
CM::Text => to_text(txt),
};
match txt {
Cow::Borrowed(s) => self.norm_note(s),
Cow::Owned(s) => self.norm_note(&s).to_string().into(),
}
}
/// Compare text with a possible glob, folding case.
fn matches_glob(&self, text: &str, search: &str) -> bool {
if is_glob(search) {
let search = format!("^(?i){}$", self.convert(CM::Regex, search));
Regex::new(&search).unwrap().is_match(text)
} else {
uni_eq(text, &self.convert(CM::Text, search))
}
self.write_regex(&format!(r"\b{}\b", to_re(word)));
}
}

View File

@ -5,6 +5,7 @@ use lazy_static::lazy_static;
use regex::{Captures, Regex};
use std::borrow::Cow;
use std::ptr;
use unicase::eq as uni_eq;
use unicode_normalization::{
char::is_combining_mark, is_nfc, is_nfkd_quick, IsNormalized, UnicodeNormalization,
};
@ -362,6 +363,16 @@ pub(crate) fn escape_sql(txt: &str) -> Cow<str> {
RE.replace_all(&txt, r"\$0")
}
/// Compare text with a possible glob, folding case.
pub(crate) fn matches_glob(text: &str, search: &str) -> bool {
if is_glob(search) {
let search = format!("^(?i){}$", to_re(search));
Regex::new(&search).unwrap().is_match(text)
} else {
uni_eq(text, &to_text(search))
}
}
#[cfg(test)]
mod test {
use crate::text::without_combining;