handle notes with invalid utf8

This commit is contained in:
Damien Elmes 2020-11-06 10:21:51 +10:00
parent 96f6b94dba
commit c157ccb3f5
5 changed files with 66 additions and 3 deletions

View File

@ -40,6 +40,11 @@ database-check-revlog-properties =
[one] Fixed { $count } review entry with invalid properties.
*[other] Fixed { $count } review entries with invalid properties.
}
database-check-notes-with-invalid-utf8 =
{ $count ->
[one] Fixed { $count } note with invalid utf8 characters.
*[other] Fixed { $count } notes with invalid utf8 characters.
}
# "db-check" is always in English
database-check-notetypes-recovered = One or more notetypes were missing. The notes that used them have been given new notetypes starting with "db-check", but field names and card design have been lost, so you may be better off restoring from an automatic backup.

View File

@ -9,6 +9,7 @@ use crate::{
all_stock_notetypes, AlreadyGeneratedCardInfo, CardGenContext, NoteType, NoteTypeID,
NoteTypeKind,
},
prelude::*,
timestamp::{TimestampMillis, TimestampSecs},
};
use itertools::Itertools;
@ -29,6 +30,7 @@ pub struct CheckDatabaseOutput {
card_ords_duplicated: usize,
field_count_mismatch: usize,
notetypes_recovered: usize,
invalid_utf8: usize,
}
#[derive(Debug, Clone, Copy)]
@ -99,6 +101,12 @@ impl CheckDatabaseOutput {
tr_args!["count"=>self.revlog_properties_invalid],
));
}
if self.invalid_utf8 > 0 {
probs.push(i18n.trn(
TR::DatabaseCheckNotesWithInvalidUtf8,
tr_args!["count"=>self.invalid_utf8],
));
}
probs
}
@ -263,7 +271,7 @@ impl Collection {
);
checked_notes += 1;
let mut note = self.storage.get_note(nid)?.unwrap();
let mut note = self.get_note_fixing_invalid_utf8(nid, out)?;
let cards = self.storage.existing_cards_for_note(nid)?;
@ -304,6 +312,29 @@ impl Collection {
Ok(())
}
fn get_note_fixing_invalid_utf8(
&self,
nid: NoteID,
out: &mut CheckDatabaseOutput,
) -> Result<Note> {
match self.storage.get_note(nid) {
Ok(note) => Ok(note.unwrap()),
Err(err) => match err {
AnkiError::DBError {
kind: DBErrorKind::Utf8,
..
} => {
// fix note then fetch again
self.storage.fix_invalid_utf8_in_note(nid)?;
out.invalid_utf8 += 1;
Ok(self.storage.get_note(nid)?.unwrap())
}
// other errors are unhandled
_ => return Err(err),
},
}
}
fn remove_duplicate_card_ordinals(
&mut self,
cards: &[AlreadyGeneratedCardInfo],

View File

@ -4,7 +4,7 @@
use crate::i18n::{tr_args, tr_strs, I18n, TR};
pub use failure::{Error, Fail};
use reqwest::StatusCode;
use std::io;
use std::{io, str::Utf8Error};
pub type Result<T> = std::result::Result<T, AnkiError>;
@ -175,6 +175,14 @@ impl From<rusqlite::Error> for AnkiError {
impl From<rusqlite::types::FromSqlError> for AnkiError {
fn from(err: rusqlite::types::FromSqlError) -> Self {
if let rusqlite::types::FromSqlError::Other(ref err) = err {
if let Some(_err) = err.downcast_ref::<Utf8Error>() {
return AnkiError::DBError {
info: "".to_string(),
kind: DBErrorKind::Utf8,
};
}
}
AnkiError::DBError {
info: format!("{:?}", err),
kind: DBErrorKind::Other,
@ -316,5 +324,6 @@ pub enum DBErrorKind {
MissingEntity,
Corrupt,
Locked,
Utf8,
Other,
}

View File

@ -8,7 +8,7 @@ pub use crate::{
decks::DeckID,
err::{AnkiError, Result},
i18n::{tr_args, tr_strs, TR},
notes::NoteID,
notes::{Note, NoteID},
notetype::NoteTypeID,
revlog::RevlogID,
timestamp::{TimestampMillis, TimestampSecs},

View File

@ -117,6 +117,24 @@ impl super::SqliteStorage {
Ok(())
}
pub(crate) fn fix_invalid_utf8_in_note(&self, nid: NoteID) -> Result<()> {
self.db
.query_row(
"select cast(flds as blob) from notes where id=?",
&[nid],
|row| {
let fixed_flds: Vec<u8> = row.get(0)?;
let fixed_str = String::from_utf8_lossy(&fixed_flds);
self.db.execute(
"update notes set flds = ? where id = ?",
params![fixed_str, nid],
)
},
)
.map_err(Into::into)
.map(|_| ())
}
/// Returns the first field of other notes with the same checksum.
/// The field of the provided note ID is not returned.
pub(crate) fn note_fields_by_checksum(