anki/rslib/src/findreplace.rs

// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html

use std::borrow::Cow;

use regex::Regex;

use crate::collection::Collection;
use crate::error::Result;
use crate::notes::NoteId;
use crate::notes::TransformNoteOutput;
use crate::prelude::*;
use crate::text::normalize_to_nfc;

pub struct FindReplaceContext {
    nids: Vec<NoteId>,
    search: Regex,
    replacement: String,
    field_name: Option<String>,
}

enum FieldForNotetype {
    Any,
    Index(usize),
    None,
}

impl FindReplaceContext {
    pub fn new(
        nids: Vec<NoteId>,
        search_re: &str,
        repl: impl Into<String>,
        field_name: Option<String>,
    ) -> Result<Self> {
        Ok(FindReplaceContext {
            nids,
            search: Regex::new(search_re)?,
            replacement: repl.into(),
            field_name,
        })
    }

    fn replace_text<'a>(&self, text: &'a str) -> Cow<'a, str> {
        self.search.replace_all(text, self.replacement.as_str())
    }
}

impl Collection {
    pub fn find_and_replace(
        &mut self,
        nids: Vec<NoteId>,
        search_re: &str,
        repl: &str,
        field_name: Option<String>,
    ) -> Result<OpOutput<usize>> {
        self.transact(Op::FindAndReplace, |col| {
            let norm = col.get_config_bool(BoolKey::NormalizeNoteText);
            let search = if norm {
                normalize_to_nfc(search_re)
            } else {
                search_re.into()
            };
            let ctx = FindReplaceContext::new(nids, &search, repl, field_name)?;
            col.find_and_replace_inner(ctx)
        })
    }

    fn find_and_replace_inner(&mut self, ctx: FindReplaceContext) -> Result<usize> {
        let mut last_ntid = None;
        let mut field_for_notetype = FieldForNotetype::None;
        self.transform_notes(&ctx.nids, |note, nt| {
            if last_ntid != Some(nt.id) {
                field_for_notetype = match ctx.field_name.as_ref() {
                    None => FieldForNotetype::Any,
                    Some(name) => match nt.get_field_ord(name) {
                        None => FieldForNotetype::None,
                        Some(ord) => FieldForNotetype::Index(ord),
                    },
                };
                last_ntid = Some(nt.id);
            }

            let mut changed = false;
            match field_for_notetype {
                FieldForNotetype::Any => {
                    for txt in note.fields_mut() {
                        if let Cow::Owned(otxt) = ctx.replace_text(txt) {
                            changed = true;
                            *txt = otxt;
                        }
                    }
                }
                FieldForNotetype::Index(ord) => {
                    if let Some(txt) = note.fields_mut().get_mut(ord) {
                        if let Cow::Owned(otxt) = ctx.replace_text(txt) {
                            changed = true;
                            *txt = otxt;
                        }
                    }
                }
                FieldForNotetype::None => (),
            }

            Ok(TransformNoteOutput {
                changed,
                generate_cards: true,
                mark_modified: true,
                update_tags: false,
            })
        })
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::decks::DeckId;

    #[test]
    fn findreplace() -> Result<()> {
        let mut col = Collection::new();

        let nt = col.get_notetype_by_name("Basic")?.unwrap();
        let mut note = nt.new_note();
        note.set_field(0, "one aaa")?;
        note.set_field(1, "two aaa")?;
        col.add_note(&mut note, DeckId(1))?;

        let nt = col.get_notetype_by_name("Cloze")?.unwrap();
        let mut note2 = nt.new_note();
        note2.set_field(0, "three aaa")?;
        col.add_note(&mut note2, DeckId(1))?;

        let nids = col.search_notes_unordered("")?;
        let out = col.find_and_replace(nids.clone(), "(?i)AAA", "BBB", None)?;
        assert_eq!(out.output, 2);

        let note = col.storage.get_note(note.id)?.unwrap();
        // but the update should be limited to the specified field when it was available
        assert_eq!(&note.fields()[..], &["one BBB", "two BBB"]);

        let note2 = col.storage.get_note(note2.id)?.unwrap();
        assert_eq!(&note2.fields()[..], &["three BBB", ""]);

        assert_eq!(
            col.storage.field_names_for_notes(&nids)?,
            vec![
                "Back".to_string(),
                "Back Extra".into(),
                "Front".into(),
                "Text".into()
            ]
        );
        let out = col.find_and_replace(nids, "BBB", "ccc", Some("Front".into()))?;
        // 1, because notes without the specified field should be skipped
        assert_eq!(out.output, 1);

        let note = col.storage.get_note(note.id)?.unwrap();
        // the update should be limited to the specified field when it was available
        assert_eq!(&note.fields()[..], &["one ccc", "two BBB"]);

        Ok(())
    }
}
move find&replace to backend 2020-05-05 12:50:17 +02:00			`// Copyright: Ankitects Pty Ltd and contributors`
			`// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html`

tidy up Rust imports rustfmt can do this automatically, but only when run with a nightly toolchain, so it needs to be manually done for now - see rslib/rusfmt.toml 2021-04-18 10:29:20 +02:00			`use std::borrow::Cow;`

			`use regex::Regex;`

Switch Rust import style (#2330) * Prepare to switch Rust import style * Run nightly format Closes #2320 * Clean up a few imports * Enable comment wrapping * Wrap comments 2023-01-18 12:39:55 +01:00			`use crate::collection::Collection;`
			`use crate::error::Result;`
			`use crate::notes::NoteId;`
			`use crate::notes::TransformNoteOutput;`
			`use crate::prelude::*;`
			`use crate::text::normalize_to_nfc;`
move find&replace to backend 2020-05-05 12:50:17 +02:00
			`pub struct FindReplaceContext {`
use mixed case for abbreviations in Rust code So, this is fun. Apparently "DeckId" is considered preferable to the "DeckID" were were using until now, and the latest clippy will start warning about it. We could of course disable the warning, but probably better to bite the bullet and switch to the naming that's generally considered best. 2021-03-27 10:53:33 +01:00			`nids: Vec<NoteId>,`
move find&replace to backend 2020-05-05 12:50:17 +02:00			`search: Regex,`
			`replacement: String,`
			`field_name: Option<String>,`
			`}`

Fix find & replace for notes without 'field_name' Distinguish between no 'field_name' passed and 'field_name' not on note. 2021-07-13 16:21:13 +02:00			`enum FieldForNotetype {`
			`Any,`
			`Index(usize),`
			`None,`
			`}`

move find&replace to backend 2020-05-05 12:50:17 +02:00			`impl FindReplaceContext {`
			`pub fn new(`
use mixed case for abbreviations in Rust code So, this is fun. Apparently "DeckId" is considered preferable to the "DeckID" were were using until now, and the latest clippy will start warning about it. We could of course disable the warning, but probably better to bite the bullet and switch to the naming that's generally considered best. 2021-03-27 10:53:33 +01:00			`nids: Vec<NoteId>,`
move find&replace to backend 2020-05-05 12:50:17 +02:00			`search_re: &str,`
			`repl: impl Into<String>,`
			`field_name: Option<String>,`
			`) -> Result<Self> {`
			`Ok(FindReplaceContext {`
			`nids,`
Remove redundant error mapping (#1593) Helpful regex error in Find&Replace was obscured by generic "Invalid input" error. 2022-01-16 04:46:27 +01:00			`search: Regex::new(search_re)?,`
move find&replace to backend 2020-05-05 12:50:17 +02:00			`replacement: repl.into(),`
			`field_name,`
			`})`
			`}`

			`fn replace_text<'a>(&self, text: &'a str) -> Cow<'a, str> {`
			`self.search.replace_all(text, self.replacement.as_str())`
			`}`
			`}`

			`impl Collection {`
support disabling unicode normalization in notes 2020-05-06 12:06:42 +02:00			`pub fn find_and_replace(`
			`&mut self,`
use mixed case for abbreviations in Rust code So, this is fun. Apparently "DeckId" is considered preferable to the "DeckID" were were using until now, and the latest clippy will start warning about it. We could of course disable the warning, but probably better to bite the bullet and switch to the naming that's generally considered best. 2021-03-27 10:53:33 +01:00			`nids: Vec<NoteId>,`
support disabling unicode normalization in notes 2020-05-06 12:06:42 +02:00			`search_re: &str,`
			`repl: &str,`
			`field_name: Option<String>,`
undoable ops now return changes directly; add new *_ops.py files - Introduced a new transact() method that wraps the return value in a separate struct that describes the changes that were made. - Changes are now gathered from the undo log, so we don't need to guess at what was changed - eg if update_note() is called with identical note contents, no changes are returned. Card changes will only be set if cards were actually generated by the update_note() call, and tag will only be set if a new tag was added. - mw.perform_op() has been updated to expect the op to return the changes, or a structure with the changes in it, and it will use them to fire the change hook, instead of fetching the changes from undo_status(), so there is no risk of race conditions. - the various calls to mw.perform_op() have been split into separate files like card_ops.py. Aside from making the code cleaner, this works around a rather annoying issue with mypy. Because we run it with no_strict_optional, mypy is happy to accept an operation that returns None, despite the type signature saying it requires changes to be returned. Turning no_strict_optional on for the whole codebase is not practical at the moment, but we can enable it for individual files. Still todo: - The cursor keeps moving back to the start of a field when typing - we need to ignore the refresh hook when we are the initiator. - The busy cursor icon should probably be delayed a few hundreds ms. - Still need to think about a nicer way of handling saveNow() - op_made_changes(), op_affects_study_queue() might be better embedded as properties in the object instead 2021-03-16 05:26:42 +01:00			`) -> Result<OpOutput<usize>> {`
			`self.transact(Op::FindAndReplace, \|col\| {`
expose undoable config changes to frontend; refresh sidebar The browser header handling still needs updating 2021-05-21 09:50:41 +02:00			`let norm = col.get_config_bool(BoolKey::NormalizeNoteText);`
support disabling unicode normalization in notes 2020-05-06 12:06:42 +02:00			`let search = if norm {`
			`normalize_to_nfc(search_re)`
			`} else {`
			`search_re.into()`
			`};`
			`let ctx = FindReplaceContext::new(nids, &search, repl, field_name)?;`
bulk tag add/remove/update; canonify on note save also remove the tag list updated hook - we'll need a better solution in the future than having the library code call back into the GUI code 2020-05-07 09:54:23 +02:00			`col.find_and_replace_inner(ctx)`
support disabling unicode normalization in notes 2020-05-06 12:06:42 +02:00			`})`
move find&replace to backend 2020-05-05 12:50:17 +02:00			`}`

bulk tag add/remove/update; canonify on note save also remove the tag list updated hook - we'll need a better solution in the future than having the library code call back into the GUI code 2020-05-07 09:54:23 +02:00			`fn find_and_replace_inner(&mut self, ctx: FindReplaceContext) -> Result<usize> {`
			`let mut last_ntid = None;`
Fix find & replace for notes without 'field_name' Distinguish between no 'field_name' passed and 'field_name' not on note. 2021-07-13 16:21:13 +02:00			`let mut field_for_notetype = FieldForNotetype::None;`
bulk tag add/remove/update; canonify on note save also remove the tag list updated hook - we'll need a better solution in the future than having the library code call back into the GUI code 2020-05-07 09:54:23 +02:00			`self.transform_notes(&ctx.nids, \|note, nt\| {`
			`if last_ntid != Some(nt.id) {`
Fix find & replace for notes without 'field_name' Distinguish between no 'field_name' passed and 'field_name' not on note. 2021-07-13 16:21:13 +02:00			`field_for_notetype = match ctx.field_name.as_ref() {`
			`None => FieldForNotetype::Any,`
			`Some(name) => match nt.get_field_ord(name) {`
			`None => FieldForNotetype::None,`
			`Some(ord) => FieldForNotetype::Index(ord),`
			`},`
			`};`
bulk tag add/remove/update; canonify on note save also remove the tag list updated hook - we'll need a better solution in the future than having the library code call back into the GUI code 2020-05-07 09:54:23 +02:00			`last_ntid = Some(nt.id);`
			`}`

			`let mut changed = false;`
Fix find & replace for notes without 'field_name' Distinguish between no 'field_name' passed and 'field_name' not on note. 2021-07-13 16:21:13 +02:00			`match field_for_notetype {`
			`FieldForNotetype::Any => {`
initial work on undoing reviews+burying siblings - fetch sfld and csum when fetching notes, to make it cheaper to write them back out unmodified - make `fields` private, and access it via accessors, so we can still catch when fields have been mutated without calling prepare_for_update() - fix python importing code passing a string in as the checksum 2021-03-02 10:02:00 +01:00			`for txt in note.fields_mut() {`
bulk tag add/remove/update; canonify on note save also remove the tag list updated hook - we'll need a better solution in the future than having the library code call back into the GUI code 2020-05-07 09:54:23 +02:00			`if let Cow::Owned(otxt) = ctx.replace_text(txt) {`
			`changed = true;`
			`*txt = otxt;`
move find&replace to backend 2020-05-05 12:50:17 +02:00			`}`
			`}`
bulk tag add/remove/update; canonify on note save also remove the tag list updated hook - we'll need a better solution in the future than having the library code call back into the GUI code 2020-05-07 09:54:23 +02:00			`}`
Fix find & replace for notes without 'field_name' Distinguish between no 'field_name' passed and 'field_name' not on note. 2021-07-13 16:21:13 +02:00			`FieldForNotetype::Index(ord) => {`
initial work on undoing reviews+burying siblings - fetch sfld and csum when fetching notes, to make it cheaper to write them back out unmodified - make `fields` private, and access it via accessors, so we can still catch when fields have been mutated without calling prepare_for_update() - fix python importing code passing a string in as the checksum 2021-03-02 10:02:00 +01:00			`if let Some(txt) = note.fields_mut().get_mut(ord) {`
bulk tag add/remove/update; canonify on note save also remove the tag list updated hook - we'll need a better solution in the future than having the library code call back into the GUI code 2020-05-07 09:54:23 +02:00			`if let Cow::Owned(otxt) = ctx.replace_text(txt) {`
			`changed = true;`
			`*txt = otxt;`
move find&replace to backend 2020-05-05 12:50:17 +02:00			`}`
			`}`
			`}`
Fix find & replace for notes without 'field_name' Distinguish between no 'field_name' passed and 'field_name' not on note. 2021-07-13 16:21:13 +02:00			`FieldForNotetype::None => (),`
move find&replace to backend 2020-05-05 12:50:17 +02:00			`}`

bulk tag add/remove/update; canonify on note save also remove the tag list updated hook - we'll need a better solution in the future than having the library code call back into the GUI code 2020-05-07 09:54:23 +02:00			`Ok(TransformNoteOutput {`
			`changed,`
			`generate_cards: true,`
			`mark_modified: true,`
update backend to support undoing of notetype changes 2021-04-29 15:28:42 +02:00			`update_tags: false,`
bulk tag add/remove/update; canonify on note save also remove the tag list updated hook - we'll need a better solution in the future than having the library code call back into the GUI code 2020-05-07 09:54:23 +02:00			`})`
			`})`
move find&replace to backend 2020-05-05 12:50:17 +02:00			`}`
			`}`

			`#[cfg(test)]`
			`mod test {`
			`use super::*;`
Switch Rust import style (#2330) * Prepare to switch Rust import style * Run nightly format Closes #2320 * Clean up a few imports * Enable comment wrapping * Wrap comments 2023-01-18 12:39:55 +01:00			`use crate::decks::DeckId;`
move find&replace to backend 2020-05-05 12:50:17 +02:00
			`#[test]`
			`fn findreplace() -> Result<()> {`
Fix invalid ids on db check (#2445) * Move open_test_collection into Collection test impl * Fix invalid ids when checking database * Report fixed invalid ids * Improve message when trying to export invalid ids Also move ImportError due to namespace conflicts with snafu macro. * Take a human name in DeckAdder::new * Mention timestamps in the db check message (dae) Will help to correlate the fix with the message shown when importing/ exporting. 2023-03-19 01:58:35 +01:00			`let mut col = Collection::new();`
move find&replace to backend 2020-05-05 12:50:17 +02:00
			`let nt = col.get_notetype_by_name("Basic")?.unwrap();`
			`let mut note = nt.new_note();`
initial work on undoing reviews+burying siblings - fetch sfld and csum when fetching notes, to make it cheaper to write them back out unmodified - make `fields` private, and access it via accessors, so we can still catch when fields have been mutated without calling prepare_for_update() - fix python importing code passing a string in as the checksum 2021-03-02 10:02:00 +01:00			`note.set_field(0, "one aaa")?;`
			`note.set_field(1, "two aaa")?;`
use mixed case for abbreviations in Rust code So, this is fun. Apparently "DeckId" is considered preferable to the "DeckID" were were using until now, and the latest clippy will start warning about it. We could of course disable the warning, but probably better to bite the bullet and switch to the naming that's generally considered best. 2021-03-27 10:53:33 +01:00			`col.add_note(&mut note, DeckId(1))?;`
move find&replace to backend 2020-05-05 12:50:17 +02:00
			`let nt = col.get_notetype_by_name("Cloze")?.unwrap();`
			`let mut note2 = nt.new_note();`
initial work on undoing reviews+burying siblings - fetch sfld and csum when fetching notes, to make it cheaper to write them back out unmodified - make `fields` private, and access it via accessors, so we can still catch when fields have been mutated without calling prepare_for_update() - fix python importing code passing a string in as the checksum 2021-03-02 10:02:00 +01:00			`note2.set_field(0, "three aaa")?;`
use mixed case for abbreviations in Rust code So, this is fun. Apparently "DeckId" is considered preferable to the "DeckID" were were using until now, and the latest clippy will start warning about it. We could of course disable the warning, but probably better to bite the bullet and switch to the naming that's generally considered best. 2021-03-27 10:53:33 +01:00			`col.add_note(&mut note2, DeckId(1))?;`
move find&replace to backend 2020-05-05 12:50:17 +02:00
make it more ergonomic to search directly via nodes in Rust 2021-04-30 03:37:55 +02:00			`let nids = col.search_notes_unordered("")?;`
undoable ops now return changes directly; add new *_ops.py files - Introduced a new transact() method that wraps the return value in a separate struct that describes the changes that were made. - Changes are now gathered from the undo log, so we don't need to guess at what was changed - eg if update_note() is called with identical note contents, no changes are returned. Card changes will only be set if cards were actually generated by the update_note() call, and tag will only be set if a new tag was added. - mw.perform_op() has been updated to expect the op to return the changes, or a structure with the changes in it, and it will use them to fire the change hook, instead of fetching the changes from undo_status(), so there is no risk of race conditions. - the various calls to mw.perform_op() have been split into separate files like card_ops.py. Aside from making the code cleaner, this works around a rather annoying issue with mypy. Because we run it with no_strict_optional, mypy is happy to accept an operation that returns None, despite the type signature saying it requires changes to be returned. Turning no_strict_optional on for the whole codebase is not practical at the moment, but we can enable it for individual files. Still todo: - The cursor keeps moving back to the start of a field when typing - we need to ignore the refresh hook when we are the initiator. - The busy cursor icon should probably be delayed a few hundreds ms. - Still need to think about a nicer way of handling saveNow() - op_made_changes(), op_affects_study_queue() might be better embedded as properties in the object instead 2021-03-16 05:26:42 +01:00			`let out = col.find_and_replace(nids.clone(), "(?i)AAA", "BBB", None)?;`
			`assert_eq!(out.output, 2);`
move find&replace to backend 2020-05-05 12:50:17 +02:00
			`let note = col.storage.get_note(note.id)?.unwrap();`
			`// but the update should be limited to the specified field when it was available`
initial work on undoing reviews+burying siblings - fetch sfld and csum when fetching notes, to make it cheaper to write them back out unmodified - make `fields` private, and access it via accessors, so we can still catch when fields have been mutated without calling prepare_for_update() - fix python importing code passing a string in as the checksum 2021-03-02 10:02:00 +01:00			`assert_eq!(&note.fields()[..], &["one BBB", "two BBB"]);`
move find&replace to backend 2020-05-05 12:50:17 +02:00
			`let note2 = col.storage.get_note(note2.id)?.unwrap();`
initial work on undoing reviews+burying siblings - fetch sfld and csum when fetching notes, to make it cheaper to write them back out unmodified - make `fields` private, and access it via accessors, so we can still catch when fields have been mutated without calling prepare_for_update() - fix python importing code passing a string in as the checksum 2021-03-02 10:02:00 +01:00			`assert_eq!(&note2.fields()[..], &["three BBB", ""]);`
move find&replace to backend 2020-05-05 12:50:17 +02:00
			`assert_eq!(`
			`col.storage.field_names_for_notes(&nids)?,`
add the Extra field back to cloze deletions as "Back Extra" Originally removed because some users were adding cloze deletions to it, but removing it just replaced that problem with a new problem where users add the field but don't add it to their card templates. 2020-05-07 00:31:49 +02:00			`vec![`
			`"Back".to_string(),`
			`"Back Extra".into(),`
			`"Front".into(),`
			`"Text".into()`
			`]`
move find&replace to backend 2020-05-05 12:50:17 +02:00			`);`
undoable ops now return changes directly; add new *_ops.py files - Introduced a new transact() method that wraps the return value in a separate struct that describes the changes that were made. - Changes are now gathered from the undo log, so we don't need to guess at what was changed - eg if update_note() is called with identical note contents, no changes are returned. Card changes will only be set if cards were actually generated by the update_note() call, and tag will only be set if a new tag was added. - mw.perform_op() has been updated to expect the op to return the changes, or a structure with the changes in it, and it will use them to fire the change hook, instead of fetching the changes from undo_status(), so there is no risk of race conditions. - the various calls to mw.perform_op() have been split into separate files like card_ops.py. Aside from making the code cleaner, this works around a rather annoying issue with mypy. Because we run it with no_strict_optional, mypy is happy to accept an operation that returns None, despite the type signature saying it requires changes to be returned. Turning no_strict_optional on for the whole codebase is not practical at the moment, but we can enable it for individual files. Still todo: - The cursor keeps moving back to the start of a field when typing - we need to ignore the refresh hook when we are the initiator. - The busy cursor icon should probably be delayed a few hundreds ms. - Still need to think about a nicer way of handling saveNow() - op_made_changes(), op_affects_study_queue() might be better embedded as properties in the object instead 2021-03-16 05:26:42 +01:00			`let out = col.find_and_replace(nids, "BBB", "ccc", Some("Front".into()))?;`
Fix find & replace for notes without 'field_name' Distinguish between no 'field_name' passed and 'field_name' not on note. 2021-07-13 16:21:13 +02:00			`// 1, because notes without the specified field should be skipped`
			`assert_eq!(out.output, 1);`
move find&replace to backend 2020-05-05 12:50:17 +02:00
			`let note = col.storage.get_note(note.id)?.unwrap();`
Fix find & replace for notes without 'field_name' Distinguish between no 'field_name' passed and 'field_name' not on note. 2021-07-13 16:21:13 +02:00			`// the update should be limited to the specified field when it was available`
initial work on undoing reviews+burying siblings - fetch sfld and csum when fetching notes, to make it cheaper to write them back out unmodified - make `fields` private, and access it via accessors, so we can still catch when fields have been mutated without calling prepare_for_update() - fix python importing code passing a string in as the checksum 2021-03-02 10:02:00 +01:00			`assert_eq!(&note.fields()[..], &["one ccc", "two BBB"]);`
move find&replace to backend 2020-05-05 12:50:17 +02:00
			`Ok(())`
			`}`
			`}`