Revert changes to normalisation handling

Handle norm calls individually in write_search_node_to_sql again.
This commit is contained in:
RumovZ 2020-11-18 23:46:27 +01:00
parent 6e51bad7db
commit 785540bddc
2 changed files with 43 additions and 71 deletions

View File

@ -10,23 +10,12 @@ use crate::{
notes::field_checksum, notes::field_checksum,
notetype::NoteTypeID, notetype::NoteTypeID,
text::{ text::{
escape_sql, is_glob, normalize_to_nfc, strip_html_preserving_media_filenames, to_custom_re, escape_sql, is_glob, matches_glob, normalize_to_nfc, strip_html_preserving_media_filenames,
to_re, to_sql, to_text, without_combining, to_custom_re, to_re, to_sql, to_text, without_combining,
}, },
timestamp::TimestampSecs, timestamp::TimestampSecs,
}; };
use regex::Regex;
use std::{borrow::Cow, fmt::Write}; use std::{borrow::Cow, fmt::Write};
use unicase::eq as uni_eq;
use ConversionMode as CM;
enum ConversionMode<'a> {
OnlyNorm,
Regex,
CustomRe(&'a str),
Sql,
Text,
}
pub(crate) struct SqlWriter<'a> { pub(crate) struct SqlWriter<'a> {
col: &'a mut Collection, col: &'a mut Collection,
@ -128,20 +117,26 @@ impl SqlWriter<'_> {
use normalize_to_nfc as norm; use normalize_to_nfc as norm;
match node { match node {
// note fields related // note fields related
SearchNode::UnqualifiedText(text) => self.write_unqualified(text), SearchNode::UnqualifiedText(text) => self.write_unqualified(&self.norm_note(text)),
SearchNode::SingleField { field, text, is_re } => { SearchNode::SingleField { field, text, is_re } => {
self.write_single_field(field, text, *is_re)? self.write_single_field(&norm(field), &self.norm_note(text), *is_re)?
} }
SearchNode::Duplicates { note_type_id, text } => self.write_dupes(*note_type_id, text), SearchNode::Duplicates { note_type_id, text } => {
SearchNode::Regex(re) => self.write_regex(re), self.write_dupes(*note_type_id, &self.norm_note(text))
SearchNode::NoCombining(text) => self.write_no_combining(text), }
SearchNode::WordBoundary(text) => self.write_word_boundary(text), SearchNode::Regex(re) => self.write_regex(&self.norm_note(re)),
SearchNode::NoCombining(text) => self.write_no_combining(&self.norm_note(text)),
SearchNode::WordBoundary(text) => self.write_word_boundary(&self.norm_note(text)),
// other // other
SearchNode::AddedInDays(days) => self.write_added(*days)?, SearchNode::AddedInDays(days) => self.write_added(*days)?,
SearchNode::EditedInDays(days) => self.write_edited(*days)?, SearchNode::EditedInDays(days) => self.write_edited(*days)?,
SearchNode::CardTemplate(template) => self.write_template(template)?, SearchNode::CardTemplate(template) => match template {
// fixme: always norm? TemplateKind::Ordinal(_) => self.write_template(template)?,
TemplateKind::Name(name) => {
self.write_template(&TemplateKind::Name(norm(name).into()))?
}
},
SearchNode::Deck(deck) => self.write_deck(&norm(deck))?, SearchNode::Deck(deck) => self.write_deck(&norm(deck))?,
SearchNode::NoteTypeID(ntid) => { SearchNode::NoteTypeID(ntid) => {
write!(self.sql, "n.mid = {}", ntid).unwrap(); write!(self.sql, "n.mid = {}", ntid).unwrap();
@ -149,10 +144,10 @@ impl SqlWriter<'_> {
SearchNode::DeckID(did) => { SearchNode::DeckID(did) => {
write!(self.sql, "c.did = {}", did).unwrap(); write!(self.sql, "c.did = {}", did).unwrap();
} }
SearchNode::NoteType(notetype) => self.write_note_type(notetype)?, SearchNode::NoteType(notetype) => self.write_note_type(&norm(notetype))?,
SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?, SearchNode::Rated { days, ease } => self.write_rated(*days, *ease)?,
SearchNode::Tag(tag) => self.write_tag(tag)?, SearchNode::Tag(tag) => self.write_tag(&norm(tag))?,
SearchNode::State(state) => self.write_state(state)?, SearchNode::State(state) => self.write_state(state)?,
SearchNode::Flag(flag) => { SearchNode::Flag(flag) => {
write!(self.sql, "(c.flags & 7) == {}", flag).unwrap(); write!(self.sql, "(c.flags & 7) == {}", flag).unwrap();
@ -171,7 +166,7 @@ impl SqlWriter<'_> {
fn write_unqualified(&mut self, text: &str) { fn write_unqualified(&mut self, text: &str) {
// implicitly wrap in % // implicitly wrap in %
let text = format!("%{}%", &self.convert(CM::Sql, text)); let text = format!("%{}%", &to_sql(text));
self.args.push(text); self.args.push(text);
write!( write!(
self.sql, self.sql,
@ -182,7 +177,7 @@ impl SqlWriter<'_> {
} }
fn write_no_combining(&mut self, text: &str) { fn write_no_combining(&mut self, text: &str) {
let text = format!("%{}%", without_combining(&self.convert(CM::Sql, text))); let text = format!("%{}%", without_combining(&to_sql(text)));
self.args.push(text); self.args.push(text);
write!( write!(
self.sql, self.sql,
@ -205,13 +200,9 @@ impl SqlWriter<'_> {
s => { s => {
if is_glob(s) { if is_glob(s) {
write!(self.sql, "n.tags regexp ?").unwrap(); write!(self.sql, "n.tags regexp ?").unwrap();
let re = &self.convert(CM::CustomRe(r"\S"), s); let re = &to_custom_re(s, r"\S");
self.args.push(format!("(?i).* {} .*", re)); self.args.push(format!("(?i).* {} .*", re));
} else if let Some(tag) = self } else if let Some(tag) = self.col.storage.preferred_tag_case(&to_text(s))? {
.col
.storage
.preferred_tag_case(&self.convert(CM::Text, s))?
{
write!(self.sql, "n.tags like ? escape '\\'").unwrap(); write!(self.sql, "n.tags like ? escape '\\'").unwrap();
self.args.push(format!("% {} %", escape_sql(&tag))); self.args.push(format!("% {} %", escape_sql(&tag)));
} else { } else {
@ -329,7 +320,7 @@ impl SqlWriter<'_> {
.as_str(), .as_str(),
) )
} else { } else {
human_deck_name_to_native(&self.convert(CM::Regex, deck)) human_deck_name_to_native(&to_re(deck))
}; };
// convert to a regex that includes child decks // convert to a regex that includes child decks
@ -352,7 +343,7 @@ impl SqlWriter<'_> {
} }
TemplateKind::Name(name) => { TemplateKind::Name(name) => {
if is_glob(name) { if is_glob(name) {
let re = format!("(?i){}", self.convert(CM::Regex, name)); let re = format!("(?i){}", to_re(name));
self.sql.push_str( self.sql.push_str(
"(n.mid,c.ord) in (select ntid,ord from templates where name regexp ?)", "(n.mid,c.ord) in (select ntid,ord from templates where name regexp ?)",
); );
@ -361,7 +352,7 @@ impl SqlWriter<'_> {
self.sql.push_str( self.sql.push_str(
"(n.mid,c.ord) in (select ntid,ord from templates where name = ?)", "(n.mid,c.ord) in (select ntid,ord from templates where name = ?)",
); );
self.args.push(self.convert(CM::Text, name).into()); self.args.push(to_text(name).into());
} }
} }
}; };
@ -370,14 +361,14 @@ impl SqlWriter<'_> {
fn write_note_type(&mut self, nt_name: &str) -> Result<()> { fn write_note_type(&mut self, nt_name: &str) -> Result<()> {
if is_glob(nt_name) { if is_glob(nt_name) {
let re = format!("(?i){}", self.convert(CM::Regex, nt_name)); let re = format!("(?i){}", to_re(nt_name));
self.sql self.sql
.push_str("n.mid in (select id from notetypes where name regexp ?)"); .push_str("n.mid in (select id from notetypes where name regexp ?)");
self.args.push(re); self.args.push(re);
} else { } else {
self.sql self.sql
.push_str("n.mid in (select id from notetypes where name = ?)"); .push_str("n.mid in (select id from notetypes where name = ?)");
self.args.push(self.convert(CM::Text, nt_name).into()); self.args.push(to_text(nt_name).into());
} }
Ok(()) Ok(())
} }
@ -388,7 +379,7 @@ impl SqlWriter<'_> {
let mut field_map = vec![]; let mut field_map = vec![];
for nt in note_types.values() { for nt in note_types.values() {
for field in &nt.fields { for field in &nt.fields {
if self.matches_glob(&field.name, field_name) { if matches_glob(&field.name, field_name) {
field_map.push((nt.id, field.ord)); field_map.push((nt.id, field.ord));
} }
} }
@ -407,12 +398,11 @@ impl SqlWriter<'_> {
if is_re { if is_re {
cmp = "regexp"; cmp = "regexp";
cmp_trailer = ""; cmp_trailer = "";
self.args self.args.push(format!("(?i){}", val));
.push(format!("(?i){}", self.convert(CM::OnlyNorm, val)));
} else { } else {
cmp = "like"; cmp = "like";
cmp_trailer = "escape '\\'"; cmp_trailer = "escape '\\'";
self.args.push(self.convert(CM::Sql, val).into()) self.args.push(to_sql(val).into())
} }
let arg_idx = self.args.len(); let arg_idx = self.args.len();
@ -435,7 +425,6 @@ impl SqlWriter<'_> {
} }
fn write_dupes(&mut self, ntid: NoteTypeID, text: &str) { fn write_dupes(&mut self, ntid: NoteTypeID, text: &str) {
let text = &self.convert(CM::OnlyNorm, text);
let text_nohtml = strip_html_preserving_media_filenames(text); let text_nohtml = strip_html_preserving_media_filenames(text);
let csum = field_checksum(text_nohtml.as_ref()); let csum = field_checksum(text_nohtml.as_ref());
write!( write!(
@ -463,39 +452,11 @@ impl SqlWriter<'_> {
fn write_regex(&mut self, word: &str) { fn write_regex(&mut self, word: &str) {
self.sql.push_str("n.flds regexp ?"); self.sql.push_str("n.flds regexp ?");
self.args self.args.push(format!(r"(?i){}", word));
.push(format!(r"(?i){}", self.convert(CM::OnlyNorm, word)));
} }
fn write_word_boundary(&mut self, word: &str) { fn write_word_boundary(&mut self, word: &str) {
self.sql.push_str("n.flds regexp ?"); self.write_regex(&format!(r"\b{}\b", to_re(word)));
self.args
.push(format!(r"(?i)\b{}\b", self.convert(CM::Regex, word)));
}
/// Norm text and call the according conversion function.
fn convert<'a>(&self, mode: ConversionMode, txt: &'a str) -> Cow<'a, str> {
let txt = match mode {
CM::OnlyNorm => txt.into(),
CM::Regex => to_re(txt),
CM::CustomRe(wildcard) => to_custom_re(txt, wildcard),
CM::Sql => to_sql(txt),
CM::Text => to_text(txt),
};
match txt {
Cow::Borrowed(s) => self.norm_note(s),
Cow::Owned(s) => self.norm_note(&s).to_string().into(),
}
}
/// Compare text with a possible glob, folding case.
fn matches_glob(&self, text: &str, search: &str) -> bool {
if is_glob(search) {
let search = format!("^(?i){}$", self.convert(CM::Regex, search));
Regex::new(&search).unwrap().is_match(text)
} else {
uni_eq(text, &self.convert(CM::Text, search))
}
} }
} }

View File

@ -5,6 +5,7 @@ use lazy_static::lazy_static;
use regex::{Captures, Regex}; use regex::{Captures, Regex};
use std::borrow::Cow; use std::borrow::Cow;
use std::ptr; use std::ptr;
use unicase::eq as uni_eq;
use unicode_normalization::{ use unicode_normalization::{
char::is_combining_mark, is_nfc, is_nfkd_quick, IsNormalized, UnicodeNormalization, char::is_combining_mark, is_nfc, is_nfkd_quick, IsNormalized, UnicodeNormalization,
}; };
@ -362,6 +363,16 @@ pub(crate) fn escape_sql(txt: &str) -> Cow<str> {
RE.replace_all(&txt, r"\$0") RE.replace_all(&txt, r"\$0")
} }
/// Compare text with a possible glob, folding case.
pub(crate) fn matches_glob(text: &str, search: &str) -> bool {
if is_glob(search) {
let search = format!("^(?i){}$", to_re(search));
Regex::new(&search).unwrap().is_match(text)
} else {
uni_eq(text, &to_text(search))
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::text::without_combining; use crate::text::without_combining;