more search bikeshedding

While implementing the overdue search, I realised it would be nice to
be able to construct a search string with OR and NOT searches without
having to construct each part individually with build_search_string().

Changes:

- Extends SearchTerm to support a text search, which will be parsed
by the backend. This allows us to do things like wrap text in a group
or NOT node.
- Because SearchTerm->Node conversion can now fail with a parsing error,
it's switched over to TryFrom
- Switch concatenate_searches and replace_search_term to use SearchTerms,
so that they too don't require separate string building steps.
- Remove the unused normalize_search()
- Remove negate_search, as this is now an operation on a Node, and
users can wrap their search in SearchTerm(negated=...)
- Remove the match_any and negate args from build_search_string

Having done all this work, I've just realised that perhaps the original
JSON idea was more feasible than I first thought - if we wrote it out
to a string and re-parsed it, we would be able to leverage the existing
checks that occur at parsing stage.
This commit is contained in:
Damien Elmes 2021-02-11 17:11:17 +10:00
parent 242b4ea505
commit 59ccfe5918
8 changed files with 237 additions and 163 deletions

View File

@ -11,7 +11,7 @@ import sys
import time
import traceback
import weakref
from typing import Any, List, Optional, Sequence, Tuple, Union
from typing import Any, List, Literal, Optional, Sequence, Tuple, Union
import anki._backend.backend_pb2 as _pb
import anki.find
@ -526,37 +526,71 @@ class Collection:
# Search Strings
##########################################################################
# pylint: disable=no-member
def group_search_terms(self, *terms: Union[str, SearchTerm]) -> SearchTerm:
"""Join provided search terms and strings into a single SearchTerm.
If multiple terms provided, they will be ANDed together into a group.
If a single term is provided, it is returned as-is.
"""
assert terms
# convert raw text to SearchTerms
search_terms = [
term if isinstance(term, SearchTerm) else SearchTerm(unparsed_search=term)
for term in terms
]
# if there's more than one, wrap it in an implicit AND
if len(search_terms) > 1:
return SearchTerm(group=SearchTerm.Group(terms=search_terms))
else:
return search_terms[0]
def build_search_string(
self,
*terms: Union[str, SearchTerm],
negate: bool = False,
match_any: bool = False,
) -> str:
"""Helper function for the backend's search string operations.
"""Join provided search terms together, and return a normalized search string.
Pass terms as strings to normalize.
Pass fields of backend.proto/FilterToSearchIn as valid SearchTerms.
Pass multiple terms to concatenate (defaults to 'and', 'or' when 'match_any=True').
Pass 'negate=True' to negate the end result.
May raise InvalidInput.
Terms are joined by an implicit AND. You can make an explict AND or OR
by wrapping in a group:
terms = [... one or more SearchTerms()]
group = SearchTerm.Group(op=SearchTerm.Group.OR, terms=terms)
term = SearchTerm(group=group)
To negate, wrap in a negated search term:
term = SearchTerm(negated=term)
Invalid search terms will throw an exception.
"""
term = self.group_search_terms(*terms)
return self._backend.filter_to_search(term)
# pylint: disable=no-member
def join_searches(
self,
existing_term: SearchTerm,
additional_term: SearchTerm,
operator: Literal["AND", "OR"],
) -> str:
"""
AND or OR `additional_term` to `existing_term`, without wrapping `existing_term` in brackets.
If you're building a search query yourself, prefer using SearchTerm(group=SearchTerm.Group(...))
"""
searches = []
for term in terms:
if isinstance(term, SearchTerm):
term = self._backend.filter_to_search(term)
searches.append(term)
if match_any:
sep = _pb.ConcatenateSearchesIn.OR
else:
if operator == "AND":
sep = _pb.ConcatenateSearchesIn.AND
search_string = self._backend.concatenate_searches(sep=sep, searches=searches)
if negate:
search_string = self._backend.negate_search(search_string)
else:
sep = _pb.ConcatenateSearchesIn.OR
search_string = self._backend.concatenate_searches(
sep=sep, existing_search=existing_term, additional_search=additional_term
)
return search_string
def replace_search_term(self, search: str, replacement: str) -> str:
def replace_search_term(self, search: SearchTerm, replacement: SearchTerm) -> str:
return self._backend.replace_search_term(search=search, replacement=replacement)
# Config

View File

@ -51,8 +51,6 @@ fn want_release_gil(method: u32) -> bool {
| BackendMethod::LatestProgress
| BackendMethod::SetWantsAbort
| BackendMethod::I18nResources
| BackendMethod::NormalizeSearch
| BackendMethod::NegateSearch
| BackendMethod::ConcatenateSearches
| BackendMethod::ReplaceSearchTerm
| BackendMethod::FilterToSearch

View File

@ -392,19 +392,27 @@ class SidebarTreeView(QTreeView):
self.setExpanded(idx, True)
def update_search(self, *terms: Union[str, SearchTerm]) -> None:
"""Modify the current search string based on modified keys, then refresh."""
try:
search = self.col.build_search_string(*terms)
"""Modify the current search string based on modifier keys, then refresh."""
mods = self.mw.app.keyboardModifiers()
previous = SearchTerm(unparsed_search=self.browser.current_search())
current = self.mw.col.group_search_terms(*terms)
# if Alt pressed, invert
if mods & Qt.AltModifier:
search = self.col.build_search_string(search, negate=True)
current = self.browser.current_search()
current = SearchTerm(negated=current)
try:
if mods & Qt.ControlModifier and mods & Qt.ShiftModifier:
search = self.col.replace_search_term(current, search)
# If Ctrl+Shift, replace searches nodes of the same type.
search = self.col.replace_search_term(previous, current)
elif mods & Qt.ControlModifier:
search = self.col.build_search_string(current, search)
# If Ctrl, AND with previous
search = self.col.join_searches(previous, current, "AND")
elif mods & Qt.ShiftModifier:
search = self.col.build_search_string(current, search, match_any=True)
# If Shift, OR with previous
search = self.col.join_searches(previous, current, "OR")
else:
search = self.col.build_search_string(current)
except InvalidInput as e:
show_invalid_search_error(e)
else:
@ -590,7 +598,7 @@ class SidebarTreeView(QTreeView):
return top
def _filter_func(self, *terms: Union[str, SearchTerm]) -> Callable:
return lambda: self.update_search(self.col.build_search_string(*terms))
return lambda: self.update_search(*terms)
# Tree: Saved Searches
###########################

View File

@ -91,10 +91,8 @@ service BackendService {
// searching
rpc FilterToSearch(SearchTerm) returns (String);
rpc NormalizeSearch(String) returns (String);
rpc SearchCards(SearchCardsIn) returns (SearchCardsOut);
rpc SearchNotes(SearchNotesIn) returns (SearchNotesOut);
rpc NegateSearch(String) returns (String);
rpc ConcatenateSearches(ConcatenateSearchesIn) returns (String);
rpc ReplaceSearchTerm(ReplaceSearchTermIn) returns (String);
rpc FindAndReplace(FindAndReplaceIn) returns (UInt32);
@ -820,7 +818,7 @@ message SearchTerm {
oneof filter {
Group group = 1;
SearchTerm negated = 2;
string note = 3;
string unparsed_search = 3;
uint32 template = 4;
int64 nid = 5;
Dupe dupe = 6;
@ -835,6 +833,7 @@ message SearchTerm {
string deck = 15;
int32 due_on_day = 16;
string tag = 17;
string note = 18;
}
}
@ -844,12 +843,13 @@ message ConcatenateSearchesIn {
OR = 1;
}
Separator sep = 1;
repeated string searches = 2;
SearchTerm existing_search = 2;
SearchTerm additional_search = 3;
}
message ReplaceSearchTermIn {
string search = 1;
string replacement = 2;
SearchTerm search = 1;
SearchTerm replacement = 2;
}
message CloseCollectionIn {

View File

@ -38,9 +38,8 @@ use crate::{
timespan::{answer_button_time, time_span},
},
search::{
concatenate_searches, negate_search, normalize_search, replace_search_term, write_nodes,
BoolSeparator, Node, PropertyKind, RatingKind, SearchNode, SortMode, StateKind,
TemplateKind,
concatenate_searches, parse_search, replace_search_term, write_nodes, BoolSeparator, Node,
PropertyKind, RatingKind, SearchNode, SortMode, StateKind, TemplateKind,
},
stats::studied_today,
sync::{
@ -62,8 +61,8 @@ use pb::{sync_status_out, BackendService};
use prost::Message;
use serde_json::Value as JsonValue;
use slog::warn;
use std::collections::HashSet;
use std::convert::TryFrom;
use std::{collections::HashSet, convert::TryInto};
use std::{
result,
sync::{Arc, Mutex},
@ -296,12 +295,14 @@ impl From<pb::DeckConfigId> for DeckConfID {
}
}
impl From<pb::SearchTerm> for Node {
fn from(msg: pb::SearchTerm) -> Self {
impl TryFrom<pb::SearchTerm> for Node {
type Error = AnkiError;
fn try_from(msg: pb::SearchTerm) -> std::result::Result<Self, Self::Error> {
use pb::search_term::group::Operator;
use pb::search_term::Filter;
use pb::search_term::Flag;
if let Some(filter) = msg.filter {
Ok(if let Some(filter) = msg.filter {
match filter {
Filter::Tag(s) => Node::Search(SearchNode::Tag(escape_anki_wildcards(&s))),
Filter::Deck(s) => Node::Search(SearchNode::Deck(if s == "*" {
@ -351,24 +352,40 @@ impl From<pb::SearchTerm> for Node {
Flag::Green => Node::Search(SearchNode::Flag(3)),
Flag::Blue => Node::Search(SearchNode::Flag(4)),
},
Filter::Negated(term) => Node::Not(Box::new((*term).into())),
Filter::Group(group) => {
Filter::Negated(term) => Node::try_from(*term)?.negated(),
Filter::Group(mut group) => {
match group.terms.len() {
0 => return Err(AnkiError::invalid_input("empty group")),
// a group of 1 doesn't need to be a group
1 => group.terms.pop().unwrap().try_into()?,
// 2+ nodes
_ => {
let operator = match group.op() {
Operator::And => Node::And,
Operator::Or => Node::Or,
};
let joined = group
let parsed: Vec<_> = group
.terms
.into_iter()
.map(Into::into)
.intersperse(operator)
.collect();
.map(TryFrom::try_from)
.collect::<Result<_>>()?;
let joined = parsed.into_iter().intersperse(operator).collect();
Node::Group(joined)
}
}
}
Filter::UnparsedSearch(text) => {
let mut nodes = parse_search(&text)?;
if nodes.len() == 1 {
nodes.pop().unwrap()
} else {
Node::Group(nodes)
}
}
}
} else {
Node::Search(SearchNode::WholeCollection)
}
})
}
}
@ -532,11 +549,7 @@ impl BackendService for Backend {
//-----------------------------------------------
fn filter_to_search(&self, input: pb::SearchTerm) -> Result<pb::String> {
Ok(write_nodes(&[input.into()]).into())
}
fn normalize_search(&self, input: pb::String) -> Result<pb::String> {
Ok(normalize_search(&input.val)?.into())
Ok(write_nodes(&[input.try_into()?]).into())
}
fn search_cards(&self, input: pb::SearchCardsIn) -> Result<pb::SearchCardsOut> {
@ -558,16 +571,31 @@ impl BackendService for Backend {
})
}
fn negate_search(&self, input: pb::String) -> Result<pb::String> {
Ok(negate_search(&input.val)?.into())
}
fn concatenate_searches(&self, input: pb::ConcatenateSearchesIn) -> Result<pb::String> {
Ok(concatenate_searches(input.sep().into(), &input.searches)?.into())
let sep = input.sep().into();
let existing_nodes = {
let node = input.existing_search.unwrap_or_default().try_into()?;
if let Node::Group(nodes) = node {
nodes
} else {
vec![node]
}
};
let additional_node = input.additional_search.unwrap_or_default().try_into()?;
Ok(concatenate_searches(sep, existing_nodes, additional_node).into())
}
fn replace_search_term(&self, input: pb::ReplaceSearchTermIn) -> Result<pb::String> {
Ok(replace_search_term(&input.search, &input.replacement)?.into())
let existing = {
let node = input.search.unwrap_or_default().try_into()?;
if let Node::Group(nodes) = node {
nodes
} else {
vec![node]
}
};
let replacement = input.replacement.unwrap_or_default().try_into()?;
Ok(replace_search_term(existing, replacement).into())
}
fn find_and_replace(&self, input: pb::FindAndReplaceIn) -> BackendResult<pb::UInt32> {

View File

@ -8,8 +8,7 @@ mod sqlwriter;
mod writer;
pub use cards::SortMode;
pub use parser::{Node, PropertyKind, RatingKind, SearchNode, StateKind, TemplateKind};
pub use writer::{
concatenate_searches, negate_search, normalize_search, replace_search_term, write_nodes,
BoolSeparator,
pub use parser::{
parse as parse_search, Node, PropertyKind, RatingKind, SearchNode, StateKind, TemplateKind,
};
pub use writer::{concatenate_searches, replace_search_term, write_nodes, BoolSeparator};

View File

@ -38,6 +38,16 @@ pub enum Node {
Search(SearchNode),
}
impl Node {
pub fn negated(self) -> Node {
if let Node::Not(inner) = self {
*inner
} else {
Node::Not(Box::new(self))
}
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum SearchNode {
// text without a colon
@ -115,7 +125,7 @@ pub enum RatingKind {
}
/// Parse the input string into a list of nodes.
pub(super) fn parse(input: &str) -> Result<Vec<Node>> {
pub fn parse(input: &str) -> Result<Vec<Node>> {
let input = input.trim();
if input.is_empty() {
return Ok(vec![Node::Search(SearchNode::WholeCollection)]);
@ -980,4 +990,14 @@ mod test {
Ok(())
}
#[test]
fn negating() {
let node = Node::Search(SearchNode::UnqualifiedText("foo".to_string()));
let neg_node = Node::Not(Box::new(Node::Search(SearchNode::UnqualifiedText(
"foo".to_string(),
))));
assert_eq!(node.clone().negated(), neg_node);
assert_eq!(node.clone().negated().negated(), node);
}
}

View File

@ -3,11 +3,9 @@
use crate::{
decks::DeckID as DeckIDType,
err::Result,
notetype::NoteTypeID as NoteTypeIDType,
search::parser::{parse, Node, PropertyKind, RatingKind, SearchNode, StateKind, TemplateKind},
search::parser::{Node, PropertyKind, RatingKind, SearchNode, StateKind, TemplateKind},
};
use itertools::Itertools;
use std::mem;
#[derive(Debug, PartialEq)]
@ -16,57 +14,31 @@ pub enum BoolSeparator {
Or,
}
/// Take an Anki-style search string and convert it into an equivalent
/// search string with normalized syntax.
pub fn normalize_search(input: &str) -> Result<String> {
Ok(write_nodes(&parse(input)?))
}
/// Take an Anki-style search string and return the negated counterpart.
/// Empty searches (whole collection) remain unchanged.
pub fn negate_search(input: &str) -> Result<String> {
let mut nodes = parse(input)?;
use Node::*;
Ok(if nodes.len() == 1 {
let node = nodes.remove(0);
match node {
Not(n) => write_node(&n),
Search(SearchNode::WholeCollection) => "".to_string(),
Group(_) | Search(_) => write_node(&Not(Box::new(node))),
_ => unreachable!(),
}
} else {
write_node(&Not(Box::new(Group(nodes))))
})
}
/// Take arbitrary Anki-style search strings and return their concatenation where they
/// are separated by the provided boolean operator.
/// Empty searches (whole collection) are left out.
pub fn concatenate_searches(sep: BoolSeparator, searches: &[String]) -> Result<String> {
let bool_node = vec![match sep {
/// Take an existing search, and AND/OR it with the provided additional search.
/// This is required because when the user has "a AND b" in an existing search and
/// wants to add "c", we want "a AND b AND c", not "(a AND b) AND C", which is what we'd
/// get if we tried to join the existing search string with a new SearchTerm on the
/// client side.
pub fn concatenate_searches(
sep: BoolSeparator,
mut existing: Vec<Node>,
additional: Node,
) -> String {
if !existing.is_empty() {
existing.push(match sep {
BoolSeparator::And => Node::And,
BoolSeparator::Or => Node::Or,
}];
Ok(write_nodes(
searches
.iter()
.map(|s| parse(s))
.collect::<Result<Vec<Vec<Node>>>>()?
.iter()
.filter(|v| v[0] != Node::Search(SearchNode::WholeCollection))
.intersperse(&&bool_node)
.flat_map(|v| v.iter()),
))
});
}
existing.push(additional);
write_nodes(&existing)
}
/// Take two Anki-style search strings. If the second one evaluates to a single search
/// node, replace with it all search terms of the same kind in the first search.
/// Then return the possibly modified first search.
pub fn replace_search_term(search: &str, replacement: &str) -> Result<String> {
let mut nodes = parse(search)?;
let new = parse(replacement)?;
if let [Node::Search(search_node)] = &new[..] {
/// Given an existing parsed search, if the provided `replacement` is a single search node such
/// as a deck:xxx search, replace any instances of that search in `existing` with the new value.
/// Then return the possibly modified first search as a string.
pub fn replace_search_term(mut existing: Vec<Node>, replacement: Node) -> String {
if let Node::Search(search_node) = replacement {
fn update_node_vec(old_nodes: &mut [Node], new_node: &SearchNode) {
fn update_node(old_node: &mut Node, new_node: &SearchNode) {
match old_node {
@ -82,16 +54,13 @@ pub fn replace_search_term(search: &str, replacement: &str) -> Result<String> {
}
old_nodes.iter_mut().for_each(|n| update_node(n, new_node));
}
update_node_vec(&mut nodes, search_node);
update_node_vec(&mut existing, &search_node);
}
Ok(write_nodes(&nodes))
write_nodes(&existing)
}
pub fn write_nodes<'a, I>(nodes: I) -> String
where
I: IntoIterator<Item = &'a Node>,
{
nodes.into_iter().map(|node| write_node(node)).collect()
pub fn write_nodes(nodes: &[Node]) -> String {
nodes.iter().map(|node| write_node(node)).collect()
}
fn write_node(node: &Node) -> String {
@ -125,7 +94,7 @@ fn write_search_node(node: &SearchNode) -> String {
NoteIDs(s) => format!("\"nid:{}\"", s),
CardIDs(s) => format!("\"cid:{}\"", s),
Property { operator, kind } => write_property(operator, kind),
WholeCollection => "".to_string(),
WholeCollection => "\"deck:*\"".to_string(),
Regex(s) => quote(&format!("re:{}", s)),
NoCombining(s) => quote(&format!("nc:{}", s)),
WordBoundary(s) => quote(&format!("w:{}", s)),
@ -206,6 +175,14 @@ fn write_property(operator: &str, kind: &PropertyKind) -> String {
#[cfg(test)]
mod test {
use super::*;
use crate::err::Result;
use crate::search::parse_search as parse;
/// Take an Anki-style search string and convert it into an equivalent
/// search string with normalized syntax.
fn normalize_search(input: &str) -> Result<String> {
Ok(write_nodes(&parse(input)?))
}
#[test]
fn normalizing() -> Result<()> {
@ -224,36 +201,40 @@ mod test {
Ok(())
}
#[test]
fn negating() -> Result<()> {
assert_eq!(r#"-("foo" AND "bar")"#, negate_search("foo bar").unwrap());
assert_eq!(r#""foo""#, negate_search("-foo").unwrap());
assert_eq!(r#"("foo")"#, negate_search("-(foo)").unwrap());
assert_eq!("", negate_search("").unwrap());
Ok(())
}
#[test]
fn concatenating() -> Result<()> {
assert_eq!(
concatenate_searches(
BoolSeparator::And,
vec![Node::Search(SearchNode::UnqualifiedText("foo".to_string()))],
Node::Search(SearchNode::UnqualifiedText("bar".to_string()))
),
r#""foo" AND "bar""#,
concatenate_searches(BoolSeparator::And, &["foo".to_string(), "bar".to_string()])
.unwrap()
);
assert_eq!(
r#""foo" OR "bar""#,
concatenate_searches(
BoolSeparator::Or,
&["foo".to_string(), "".to_string(), "bar".to_string()]
)
.unwrap()
vec![Node::Search(SearchNode::UnqualifiedText("foo".to_string()))],
Node::Search(SearchNode::UnqualifiedText("bar".to_string()))
),
r#""foo" OR "bar""#,
);
assert_eq!(
"",
concatenate_searches(BoolSeparator::Or, &["".to_string()]).unwrap()
concatenate_searches(
BoolSeparator::Or,
vec![Node::Search(SearchNode::WholeCollection)],
Node::Search(SearchNode::UnqualifiedText("bar".to_string()))
),
r#""deck:*" OR "bar""#,
);
assert_eq!(
concatenate_searches(
BoolSeparator::Or,
vec![],
Node::Search(SearchNode::UnqualifiedText("bar".to_string()))
),
r#""bar""#,
);
assert_eq!("", concatenate_searches(BoolSeparator::Or, &[]).unwrap());
Ok(())
}
@ -261,24 +242,30 @@ mod test {
#[test]
fn replacing() -> Result<()> {
assert_eq!(
replace_search_term(parse("deck:baz bar")?, parse("deck:foo")?.pop().unwrap()),
r#""deck:foo" AND "bar""#,
replace_search_term("deck:baz bar", "deck:foo").unwrap()
);
assert_eq!(
replace_search_term(
parse("tag:foo Or tag:bar")?,
parse("tag:baz")?.pop().unwrap()
),
r#""tag:baz" OR "tag:baz""#,
replace_search_term("tag:foo Or tag:bar", "tag:baz").unwrap()
);
assert_eq!(
replace_search_term(
parse("foo or (-foo tag:baz)")?,
parse("bar")?.pop().unwrap()
),
r#""bar" OR (-"bar" AND "tag:baz")"#,
replace_search_term("foo or (-foo tag:baz)", "bar").unwrap()
);
assert_eq!(
r#""is:due""#,
replace_search_term("is:due", "-is:new").unwrap()
replace_search_term(parse("is:due")?, parse("-is:new")?.pop().unwrap()),
r#""is:due""#
);
assert_eq!(
r#""added:1""#,
replace_search_term("added:1", "is:due").unwrap()
replace_search_term(parse("added:1")?, parse("is:due")?.pop().unwrap()),
r#""added:1""#
);
Ok(())