anki/rslib/src/dbcheck.rs

641 lines
20 KiB
Rust
Raw Normal View History

// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{collections::HashSet, sync::Arc};
use itertools::Itertools;
use slog::debug;
use crate::{
collection::Collection,
config::SchedulerVersion,
2021-04-01 09:34:03 +02:00
error::{AnkiError, DbError, DbErrorKind, Result},
2021-03-27 01:39:53 +01:00
i18n::I18n,
notetype::{
all_stock_notetypes, AlreadyGeneratedCardInfo, CardGenContext, Notetype, NotetypeId,
NotetypeKind,
},
2020-11-06 01:21:51 +01:00
prelude::*,
timestamp::{TimestampMillis, TimestampSecs},
};
#[derive(Debug, Default, PartialEq, Eq)]
pub struct CheckDatabaseOutput {
card_properties_invalid: usize,
card_position_too_high: usize,
cards_missing_note: usize,
decks_missing: usize,
revlog_properties_invalid: usize,
templates_missing: usize,
card_ords_duplicated: usize,
field_count_mismatch: usize,
notetypes_recovered: usize,
2020-11-06 01:21:51 +01:00
invalid_utf8: usize,
}
2020-06-08 12:28:11 +02:00
#[derive(Debug, Clone, Copy)]
pub(crate) enum DatabaseCheckProgress {
Integrity,
Optimize,
Cards,
Notes { current: u32, total: u32 },
History,
}
impl CheckDatabaseOutput {
pub fn to_i18n_strings(&self, tr: &I18n) -> Vec<String> {
let mut probs = Vec::new();
if self.notetypes_recovered > 0 {
probs.push(tr.database_check_notetypes_recovered());
}
if self.card_position_too_high > 0 {
probs.push(tr.database_check_new_card_high_due(self.card_position_too_high));
}
if self.card_properties_invalid > 0 {
probs.push(tr.database_check_card_properties(self.card_properties_invalid));
}
if self.cards_missing_note > 0 {
probs.push(tr.database_check_card_missing_note(self.cards_missing_note));
}
if self.decks_missing > 0 {
probs.push(tr.database_check_missing_decks(self.decks_missing));
}
if self.field_count_mismatch > 0 {
probs.push(tr.database_check_field_count(self.field_count_mismatch));
}
if self.card_ords_duplicated > 0 {
probs.push(tr.database_check_duplicate_card_ords(self.card_ords_duplicated));
}
if self.templates_missing > 0 {
probs.push(tr.database_check_missing_templates(self.templates_missing));
}
if self.revlog_properties_invalid > 0 {
probs.push(tr.database_check_revlog_properties(self.revlog_properties_invalid));
}
2020-11-06 01:21:51 +01:00
if self.invalid_utf8 > 0 {
probs.push(tr.database_check_notes_with_invalid_utf8(self.invalid_utf8));
2020-11-06 01:21:51 +01:00
}
2021-03-27 01:39:53 +01:00
probs.into_iter().map(Into::into).collect()
}
}
impl Collection {
/// Check the database, returning a list of problems that were fixed.
2020-06-08 12:28:11 +02:00
pub(crate) fn check_database<F>(&mut self, mut progress_fn: F) -> Result<CheckDatabaseOutput>
where
F: FnMut(DatabaseCheckProgress, bool),
{
progress_fn(DatabaseCheckProgress::Integrity, false);
debug!(self.log, "quick check");
if self.storage.quick_check_corrupt() {
debug!(self.log, "quick check failed");
2021-04-01 09:34:03 +02:00
return Err(AnkiError::db_error(
self.tr.database_check_corrupt(),
DbErrorKind::Corrupt,
));
}
2020-06-08 12:28:11 +02:00
progress_fn(DatabaseCheckProgress::Optimize, false);
debug!(self.log, "optimize");
self.storage.optimize()?;
undoable ops now return changes directly; add new *_ops.py files - Introduced a new transact() method that wraps the return value in a separate struct that describes the changes that were made. - Changes are now gathered from the undo log, so we don't need to guess at what was changed - eg if update_note() is called with identical note contents, no changes are returned. Card changes will only be set if cards were actually generated by the update_note() call, and tag will only be set if a new tag was added. - mw.perform_op() has been updated to expect the op to return the changes, or a structure with the changes in it, and it will use them to fire the change hook, instead of fetching the changes from undo_status(), so there is no risk of race conditions. - the various calls to mw.perform_op() have been split into separate files like card_ops.py. Aside from making the code cleaner, this works around a rather annoying issue with mypy. Because we run it with no_strict_optional, mypy is happy to accept an operation that returns None, despite the type signature saying it requires changes to be returned. Turning no_strict_optional on for the whole codebase is not practical at the moment, but we can enable it for individual files. Still todo: - The cursor keeps moving back to the start of a field when typing - we need to ignore the refresh hook when we are the initiator. - The busy cursor icon should probably be delayed a few hundreds ms. - Still need to think about a nicer way of handling saveNow() - op_made_changes(), op_affects_study_queue() might be better embedded as properties in the object instead
2021-03-16 05:26:42 +01:00
self.transact_no_undo(|col| col.check_database_inner(progress_fn))
}
2020-06-08 12:28:11 +02:00
fn check_database_inner<F>(&mut self, mut progress_fn: F) -> Result<CheckDatabaseOutput>
where
F: FnMut(DatabaseCheckProgress, bool),
{
let mut out = CheckDatabaseOutput::default();
// cards first, as we need to be able to read them to process notes
2020-06-08 12:28:11 +02:00
progress_fn(DatabaseCheckProgress::Cards, false);
debug!(self.log, "check cards");
self.check_card_properties(&mut out)?;
self.check_orphaned_cards(&mut out)?;
debug!(self.log, "check decks");
self.check_missing_deck_ids(&mut out)?;
self.check_filtered_cards(&mut out)?;
debug!(self.log, "check notetypes");
2020-06-08 12:28:11 +02:00
self.check_notetypes(&mut out, &mut progress_fn)?;
progress_fn(DatabaseCheckProgress::History, false);
debug!(self.log, "check review log");
self.check_revlog(&mut out)?;
debug!(self.log, "missing decks");
self.check_missing_deck_names(&mut out)?;
self.update_next_new_position()?;
debug!(self.log, "db check finished: {:#?}", out);
Ok(out)
}
fn check_card_properties(&mut self, out: &mut CheckDatabaseOutput) -> Result<()> {
let timing = self.timing_today()?;
let (new_cnt, other_cnt) = self.storage.fix_card_properties(
timing.days_elapsed,
TimestampSecs::now(),
self.usn()?,
self.scheduler_version() == SchedulerVersion::V1,
)?;
out.card_position_too_high = new_cnt;
out.card_properties_invalid += other_cnt;
Ok(())
}
fn check_orphaned_cards(&mut self, out: &mut CheckDatabaseOutput) -> Result<()> {
let cnt = self.storage.delete_orphaned_cards()?;
if cnt > 0 {
self.set_schema_modified()?;
out.cards_missing_note = cnt;
}
Ok(())
}
fn check_missing_deck_ids(&mut self, out: &mut CheckDatabaseOutput) -> Result<()> {
let usn = self.usn()?;
for did in self.storage.missing_decks()? {
self.recover_missing_deck(did, usn)?;
out.decks_missing += 1;
}
Ok(())
}
fn check_filtered_cards(&mut self, out: &mut CheckDatabaseOutput) -> Result<()> {
let decks = self.storage.get_decks_map()?;
let mut wrong = 0;
for (cid, did) in self.storage.all_filtered_cards_by_deck()? {
// we expect calling code to ensure all decks already exist
if let Some(deck) = decks.get(&did) {
if !deck.is_filtered() {
let mut card = self.storage.get_card(cid)?.unwrap();
card.original_deck_id.0 = 0;
card.original_due = 0;
self.storage.update_card(&card)?;
wrong += 1;
}
}
}
if wrong > 0 {
self.set_schema_modified()?;
out.card_properties_invalid += wrong;
}
Ok(())
}
2020-06-08 12:28:11 +02:00
fn check_notetypes<F>(
&mut self,
out: &mut CheckDatabaseOutput,
mut progress_fn: F,
) -> Result<()>
where
F: FnMut(DatabaseCheckProgress, bool),
{
let nids_by_notetype = self.storage.all_note_ids_by_notetype()?;
let norm = self.get_config_bool(BoolKey::NormalizeNoteText);
let usn = self.usn()?;
let stamp_millis = TimestampMillis::now();
let stamp_secs = TimestampSecs::now();
2021-02-02 09:12:50 +01:00
let expanded_tags = self.storage.expanded_tags()?;
self.storage.clear_all_tags()?;
2020-06-08 12:28:11 +02:00
let total_notes = self.storage.total_notes()?;
let mut checked_notes = 0;
for (ntid, group) in &nids_by_notetype.into_iter().group_by(|tup| tup.0) {
debug!(self.log, "check notetype: {}", ntid);
let mut group = group.peekable();
let nt = match self.get_notetype(ntid)? {
None => {
let first_note = self.storage.get_note(group.peek().unwrap().1)?.unwrap();
out.notetypes_recovered += 1;
self.recover_notetype(stamp_millis, first_note.fields().len(), ntid)?
}
Some(nt) => nt,
};
let mut genctx = None;
for (_, nid) in group {
2020-06-08 12:28:11 +02:00
progress_fn(
DatabaseCheckProgress::Notes {
current: checked_notes,
total: total_notes,
},
true,
);
checked_notes += 1;
2020-11-06 01:21:51 +01:00
let mut note = self.get_note_fixing_invalid_utf8(nid, out)?;
let original = note.clone();
let cards = self.storage.existing_cards_for_note(nid)?;
out.card_ords_duplicated += self.remove_duplicate_card_ordinals(&cards)?;
out.templates_missing += self.remove_cards_without_template(&nt, &cards)?;
// fix fields
if note.fields().len() != nt.fields.len() {
note.fix_field_count(&nt);
note.tags.push("db-check".into());
out.field_count_mismatch += 1;
}
if note.mtime > stamp_secs {
note.mtime = stamp_secs;
}
// note type ID may have changed if we created a recovery notetype
note.notetype_id = nt.id;
// write note, updating tags and generating missing cards
let ctx = genctx.get_or_insert_with(|| {
Plaintext import/export (#1850) * Add crate csv * Add start of csv importing on backend * Add Menomosyne serializer * Add csv and json importing on backend * Add plaintext importing on frontend * Add csv metadata extraction on backend * Add csv importing with GUI * Fix missing dfa file in build Added compile_data_attr, then re-ran cargo/update.py. * Don't use doubly buffered reader in csv * Escape HTML entities if CSV is not HTML Also use name 'is_html' consistently. * Use decimal number as foreign ease (like '2.5') * ForeignCard.ivl → ForeignCard.interval * Only allow fixed set of CSV delimiters * Map timestamp of ForeignCard to native due time * Don't trim CSV records * Document use of empty strings for defaults * Avoid creating CardGenContexts for every note This requires CardGenContext to be generic, so it works both with an owned and borrowed notetype. * Show all accepted file types in import file picker * Add import_json_file() * factor → ease_factor * delimter_from_value → delimiter_from_value * Map columns to fields, not the other way around * Fallback to current config for csv metadata * Add start of new import csv screen * Temporary fix for compilation issue on Linux/Mac * Disable jest bazel action for import-csv Jest fails with an error code if no tests are available, but this would not be noticable on Windows as Jest is not run there. * Fix field mapping issue * Revert "Temporary fix for compilation issue on Linux/Mac" This reverts commit 21f8a261408cdae49ec031aa21a1b659c4f66d82. * Add HtmlSwitch and move Switch to components * Fix spacing and make selectors consistent * Fix shortcut tooltip * Place import button at the top with path * Fix meta column indices * Remove NotetypeForString * Fix queue and type of foreign cards * Support different dupe resolution strategies * Allow dupe resolution selection when importing CSV * Test import of unnormalized text Close #1863. * Fix logging of foreign notes * Implement CSV exports * Use db_scalar() in notes_table_len() * Rework CSV metadata - Notetypes and decks are either defined by a global id or by a column. - If a notetype id is provided, its field map must also be specified. - If a notetype column is provided, fields are now mapped by index instead of name at import time. So the first non-meta column is used for the first field of every note, regardless of notetype. This makes importing easier and should improve compatiblity with files without a notetype column. - Ensure first field can be mapped to a column. - Meta columns must be defined as `#[meta name]:[column index]` instead of in the `#columns` tag. - Column labels contain the raw names defined by the file and must be prettified by the frontend. * Adjust frontend to new backend column mapping * Add force flags for is_html and delimiter * Detect if CSV is HTML by field content * Update dupe resolution labels * Simplify selectors * Fix coalescence of oneofs in TS * Disable meta columns from selection Plus a lot of refactoring. * Make import button stick to the bottom * Write delimiter and html flag into csv * Refetch field map after notetype change * Fix log labels for csv import * Log notes whose deck/notetype was missing * Fix hiding of empty log queues * Implement adding tags to all notes of a csv * Fix dupe resolution not being set in log * Implement adding tags to updated notes of a csv * Check first note field is not empty * Temporary fix for build on Linux/Mac * Fix inverted html check (dae) * Remove unused ftl string * Delimiter → Separator * Remove commented-out line * Don't accept .json files * Tweak tag ftl strings * Remove redundant blur call * Strip sound and add spaces in csv export * Export HTML by default * Fix unset deck in Mnemosyne import Also accept both numbers and strings for notetypes and decks in JSON. * Make DupeResolution::Update the default * Fix missing dot in extension * Make column indices 1-based * Remove StickContainer from TagEditor Fixes line breaking, border and z index on ImportCsvPage. * Assign different key combos to tag editors * Log all updated duplicates Add a log field for the true number of found notes. * Show identical notes as skipped * Split tag-editor into separate ts module (dae) * Add progress for CSV export * Add progress for text import * Tidy-ups after tag-editor split (dae) - import-csv no longer depends on editor - remove some commented lines
2022-06-01 12:26:16 +02:00
CardGenContext::new(
nt.as_ref(),
self.get_last_deck_added_to_for_notetype(nt.id),
usn,
)
});
self.update_note_inner_generating_cards(
2021-06-21 04:32:11 +02:00
ctx, &mut note, &original, false, norm, true,
)?;
}
}
// the note rebuilding process took care of adding tags back, so we just need
// to ensure to restore the collapse state
2021-02-02 09:12:50 +01:00
self.storage.restore_expanded_tags(&expanded_tags)?;
// if the collection is empty and the user has deleted all note types, ensure at least
// one note type exists
if self.storage.get_all_notetype_names()?.is_empty() {
let mut nt = all_stock_notetypes(&self.tr).remove(0);
self.add_notetype_inner(&mut nt, usn, true)?;
}
if out.card_ords_duplicated > 0
|| out.field_count_mismatch > 0
|| out.templates_missing > 0
|| out.notetypes_recovered > 0
{
self.set_schema_modified()?;
}
Ok(())
}
2020-11-06 01:21:51 +01:00
fn get_note_fixing_invalid_utf8(
&self,
nid: NoteId,
2020-11-06 01:21:51 +01:00
out: &mut CheckDatabaseOutput,
) -> Result<Note> {
match self.storage.get_note(nid) {
Ok(note) => Ok(note.unwrap()),
Err(err) => match err {
2021-04-01 09:34:03 +02:00
AnkiError::DbError(DbError {
kind: DbErrorKind::Utf8,
2020-11-06 01:21:51 +01:00
..
2021-04-01 09:34:03 +02:00
}) => {
2020-11-06 01:21:51 +01:00
// fix note then fetch again
self.storage.fix_invalid_utf8_in_note(nid)?;
out.invalid_utf8 += 1;
Ok(self.storage.get_note(nid)?.unwrap())
}
// other errors are unhandled
2020-11-24 11:13:05 +01:00
_ => Err(err),
2020-11-06 01:21:51 +01:00
},
}
}
fn remove_duplicate_card_ordinals(
&mut self,
cards: &[AlreadyGeneratedCardInfo],
) -> Result<usize> {
let mut ords = HashSet::new();
let mut removed = 0;
for card in cards {
if !ords.insert(card.ord) {
self.storage.remove_card(card.id)?;
removed += 1;
}
}
Ok(removed)
}
fn remove_cards_without_template(
&mut self,
nt: &Notetype,
cards: &[AlreadyGeneratedCardInfo],
) -> Result<usize> {
if nt.config.kind() == NotetypeKind::Cloze {
return Ok(0);
}
let mut removed = 0;
for card in cards {
if card.ord as usize >= nt.templates.len() {
self.storage.remove_card(card.id)?;
removed += 1;
}
}
Ok(removed)
}
fn recover_notetype(
&mut self,
stamp: TimestampMillis,
field_count: usize,
previous_id: NotetypeId,
) -> Result<Arc<Notetype>> {
debug!(self.log, "create recovery notetype");
let extra_cards_required = self
.storage
.highest_card_ordinal_for_notetype(previous_id)?;
let mut basic = all_stock_notetypes(&self.tr).remove(0);
let mut field = 3;
while basic.fields.len() < field_count {
basic.add_field(format!("{}", field));
field += 1;
}
basic.name = format!("db-check-{}-{}", stamp, field_count);
let qfmt = basic.templates[0].config.q_format.clone();
let afmt = basic.templates[0].config.a_format.clone();
for n in 0..extra_cards_required {
basic.add_template(&format!("Card {}", n + 2), &qfmt, &afmt);
}
self.add_notetype(&mut basic, true)?;
Ok(Arc::new(basic))
}
fn check_revlog(&mut self, out: &mut CheckDatabaseOutput) -> Result<()> {
let cnt = self.storage.fix_revlog_properties()?;
if cnt > 0 {
self.set_schema_modified()?;
out.revlog_properties_invalid = cnt;
}
Ok(())
}
fn check_missing_deck_names(&mut self, out: &mut CheckDatabaseOutput) -> Result<()> {
let names = self.storage.get_all_deck_names()?;
out.decks_missing += self.add_missing_deck_names(&names)?;
Ok(())
}
2021-03-09 01:58:55 +01:00
fn update_next_new_position(&mut self) -> Result<()> {
let pos = self.storage.max_new_card_position().unwrap_or(0);
self.set_next_card_position(pos)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::{collection::open_test_collection, decks::DeckId, search::SortMode};
2020-06-08 12:28:11 +02:00
fn progress_fn(_progress: DatabaseCheckProgress, _throttle: bool) {}
#[test]
fn cards() -> Result<()> {
let mut col = open_test_collection();
let nt = col.get_notetype_by_name("Basic")?.unwrap();
let mut note = nt.new_note();
col.add_note(&mut note, DeckId(1))?;
// card properties
col.storage
.db
.execute_batch("update cards set ivl=1.5,due=2000000,odue=1.5")?;
2020-06-08 12:28:11 +02:00
let out = col.check_database(progress_fn)?;
assert_eq!(
out,
CheckDatabaseOutput {
card_properties_invalid: 2,
card_position_too_high: 1,
..Default::default()
}
);
// should be idempotent
2020-06-08 12:28:11 +02:00
assert_eq!(col.check_database(progress_fn)?, Default::default());
// missing deck
col.storage.db.execute_batch("update cards set did=123")?;
2020-06-08 12:28:11 +02:00
let out = col.check_database(progress_fn)?;
assert_eq!(
out,
CheckDatabaseOutput {
decks_missing: 1,
..Default::default()
}
);
assert_eq!(
2021-04-18 01:33:39 +02:00
col.storage
.get_deck(DeckId(123))?
.unwrap()
.name
.as_native_str(),
"recovered123"
);
// missing note
col.storage.remove_note(note.id)?;
2020-06-08 12:28:11 +02:00
let out = col.check_database(progress_fn)?;
assert_eq!(
out,
CheckDatabaseOutput {
cards_missing_note: 1,
..Default::default()
}
);
assert_eq!(
col.storage.db_scalar::<u32>("select count(*) from cards")?,
0
);
Ok(())
}
#[test]
fn revlog() -> Result<()> {
let mut col = open_test_collection();
col.storage.db.execute_batch(
"
insert into revlog (id,cid,usn,ease,ivl,lastIvl,factor,time,type)
values (0,0,0,0,1.5,1.5,0,0,0)",
)?;
2020-06-08 12:28:11 +02:00
let out = col.check_database(progress_fn)?;
assert_eq!(
out,
CheckDatabaseOutput {
revlog_properties_invalid: 1,
..Default::default()
}
);
2021-06-21 04:32:11 +02:00
assert!(col
.storage
.db_scalar::<bool>("select ivl = lastIvl = 1 from revlog")?);
Ok(())
}
#[test]
fn note_card_link() -> Result<()> {
let mut col = open_test_collection();
let nt = col.get_notetype_by_name("Basic")?.unwrap();
let mut note = nt.new_note();
col.add_note(&mut note, DeckId(1))?;
// duplicate ordinals
let cid = col.search_cards("", SortMode::NoOrder)?[0];
let mut card = col.storage.get_card(cid)?.unwrap();
card.id.0 += 1;
col.storage.add_card(&mut card)?;
2020-06-08 12:28:11 +02:00
let out = col.check_database(progress_fn)?;
assert_eq!(
out,
CheckDatabaseOutput {
card_ords_duplicated: 1,
..Default::default()
}
);
assert_eq!(
col.storage.db_scalar::<u32>("select count(*) from cards")?,
1
);
// missing templates
let cid = col.search_cards("", SortMode::NoOrder)?[0];
let mut card = col.storage.get_card(cid)?.unwrap();
card.id.0 += 1;
card.template_idx = 10;
col.storage.add_card(&mut card)?;
2020-06-08 12:28:11 +02:00
let out = col.check_database(progress_fn)?;
assert_eq!(
out,
CheckDatabaseOutput {
templates_missing: 1,
..Default::default()
}
);
assert_eq!(
col.storage.db_scalar::<u32>("select count(*) from cards")?,
1
);
Ok(())
}
#[test]
fn note_fields() -> Result<()> {
let mut col = open_test_collection();
let nt = col.get_notetype_by_name("Basic")?.unwrap();
let mut note = nt.new_note();
col.add_note(&mut note, DeckId(1))?;
// excess fields get joined into the last one
col.storage
.db
.execute_batch("update notes set flds = 'a\x1fb\x1fc\x1fd'")?;
2020-06-08 12:28:11 +02:00
let out = col.check_database(progress_fn)?;
assert_eq!(
out,
CheckDatabaseOutput {
field_count_mismatch: 1,
..Default::default()
}
);
let note = col.storage.get_note(note.id)?.unwrap();
assert_eq!(&note.fields()[..], &["a", "b; c; d"]);
// missing fields get filled with blanks
col.storage
.db
.execute_batch("update notes set flds = 'a'")?;
2020-06-08 12:28:11 +02:00
let out = col.check_database(progress_fn)?;
assert_eq!(
out,
CheckDatabaseOutput {
field_count_mismatch: 1,
..Default::default()
}
);
let note = col.storage.get_note(note.id)?.unwrap();
assert_eq!(&note.fields()[..], &["a", ""]);
Ok(())
}
#[test]
fn deck_names() -> Result<()> {
let mut col = open_test_collection();
let deck = col.get_or_create_normal_deck("foo::bar::baz")?;
// includes default
assert_eq!(col.storage.get_all_deck_names()?.len(), 4);
col.storage
.db
.prepare("delete from decks where id != ? and id != 1")?
2021-06-25 08:22:21 +02:00
.execute([deck.id])?;
assert_eq!(col.storage.get_all_deck_names()?.len(), 2);
2020-06-08 12:28:11 +02:00
let out = col.check_database(progress_fn)?;
assert_eq!(
out,
CheckDatabaseOutput {
decks_missing: 1, // only counts the immediate parent that was missing
..Default::default()
}
);
assert_eq!(
&col.storage
.get_all_deck_names()?
.iter()
.map(|(_, name)| name)
.collect::<Vec<_>>(),
&["Default", "foo", "foo::bar", "foo::bar::baz"]
);
Ok(())
}
#[test]
fn tags() -> Result<()> {
let mut col = open_test_collection();
let nt = col.get_notetype_by_name("Basic")?.unwrap();
let mut note = nt.new_note();
note.tags.push("one".into());
note.tags.push("two".into());
col.add_note(&mut note, DeckId(1))?;
col.set_tag_collapsed("one", false)?;
col.check_database(progress_fn)?;
2021-06-21 04:32:11 +02:00
assert!(col.storage.get_tag("one")?.unwrap().expanded);
assert!(!col.storage.get_tag("two")?.unwrap().expanded);
Ok(())
}
}