Keep content of unmapped fields when importing (#2023)

* Keep content of unmapped fields when importing

* Test new behaviour

* Fix typo in `canonify_tags_without_resgistering`

* Log updated note instead of original one

* Revert merging imported tags

But keep old note tags if no new ones are provided.
This commit is contained in:
RumovZ 2022-08-24 08:04:32 +02:00 committed by GitHub
parent 45f2502e5b
commit 79fbb6c8d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 274 additions and 156 deletions

View File

@ -56,7 +56,7 @@ impl CsvMetadata {
.ok_or_else(|| AnkiError::invalid_input("notetype oneof not set")) .ok_or_else(|| AnkiError::invalid_input("notetype oneof not set"))
} }
fn field_source_columns(&self) -> Result<Vec<Option<usize>>> { fn field_source_columns(&self) -> Result<FieldSourceColumns> {
Ok(match self.notetype()? { Ok(match self.notetype()? {
CsvNotetype::GlobalNotetype(global) => global CsvNotetype::GlobalNotetype(global) => global
.field_columns .field_columns
@ -115,8 +115,7 @@ struct ColumnContext {
guid_column: Option<usize>, guid_column: Option<usize>,
deck_column: Option<usize>, deck_column: Option<usize>,
notetype_column: Option<usize>, notetype_column: Option<usize>,
/// Source column indices for the fields of a notetype, identified by its /// Source column indices for the fields of a notetype
/// name or id as string. The empty string corresponds to the default notetype.
field_source_columns: FieldSourceColumns, field_source_columns: FieldSourceColumns,
/// How fields are converted to strings. Used for escaping HTML if appropriate. /// How fields are converted to strings. Used for escaping HTML if appropriate.
stringify: fn(&str) -> String, stringify: fn(&str) -> String,
@ -168,22 +167,20 @@ impl ColumnContext {
} }
} }
fn gather_tags(&self, record: &csv::StringRecord) -> Vec<String> { fn gather_tags(&self, record: &csv::StringRecord) -> Option<Vec<String>> {
self.tags_column self.tags_column.and_then(|i| record.get(i - 1)).map(|s| {
.and_then(|i| record.get(i - 1)) s.split_whitespace()
.unwrap_or_default() .filter(|s| !s.is_empty())
.split_whitespace() .map(ToString::to_string)
.filter(|s| !s.is_empty()) .collect()
.map(ToString::to_string) })
.collect()
} }
fn gather_note_fields(&self, record: &csv::StringRecord) -> Vec<String> { fn gather_note_fields(&self, record: &csv::StringRecord) -> Vec<Option<String>> {
let stringify = self.stringify; let stringify = self.stringify;
self.field_source_columns self.field_source_columns
.iter() .iter()
.map(|opt| opt.and_then(|idx| record.get(idx - 1)).unwrap_or_default()) .map(|opt| opt.and_then(|idx| record.get(idx - 1)).map(stringify))
.map(stringify)
.collect() .collect()
} }
} }
@ -253,7 +250,19 @@ mod test {
($metadata:expr, $csv:expr, $expected:expr) => { ($metadata:expr, $csv:expr, $expected:expr) => {
let notes = import!(&$metadata, $csv); let notes = import!(&$metadata, $csv);
let fields: Vec<_> = notes.into_iter().map(|note| note.fields).collect(); let fields: Vec<_> = notes.into_iter().map(|note| note.fields).collect();
assert_eq!(fields, $expected); assert_eq!(fields.len(), $expected.len());
for (note_fields, note_expected) in fields.iter().zip($expected.iter()) {
assert_field_eq!(note_fields, note_expected);
}
};
}
macro_rules! assert_field_eq {
($fields:expr, $expected:expr) => {
assert_eq!($fields.len(), $expected.len());
for (field, expected) in $fields.iter().zip($expected.iter()) {
assert_eq!(&field.as_ref().map(String::as_str), expected);
}
}; };
} }
@ -283,20 +292,28 @@ mod test {
#[test] #[test]
fn should_allow_missing_columns() { fn should_allow_missing_columns() {
let metadata = CsvMetadata::defaults_for_testing(); let metadata = CsvMetadata::defaults_for_testing();
assert_imported_fields!(metadata, "foo\n", &[&["foo", ""]]); assert_imported_fields!(metadata, "foo\n", [[Some("foo"), None]]);
} }
#[test] #[test]
fn should_respect_custom_delimiter() { fn should_respect_custom_delimiter() {
let mut metadata = CsvMetadata::defaults_for_testing(); let mut metadata = CsvMetadata::defaults_for_testing();
metadata.set_delimiter(Delimiter::Pipe); metadata.set_delimiter(Delimiter::Pipe);
assert_imported_fields!(metadata, "fr,ont|ba,ck\n", &[&["fr,ont", "ba,ck"]]); assert_imported_fields!(
metadata,
"fr,ont|ba,ck\n",
[[Some("fr,ont"), Some("ba,ck")]]
);
} }
#[test] #[test]
fn should_ignore_first_line_starting_with_tags() { fn should_ignore_first_line_starting_with_tags() {
let metadata = CsvMetadata::defaults_for_testing(); let metadata = CsvMetadata::defaults_for_testing();
assert_imported_fields!(metadata, "tags:foo\nfront,back\n", &[&["front", "back"]]); assert_imported_fields!(
metadata,
"tags:foo\nfront,back\n",
[[Some("front"), Some("back")]]
);
} }
#[test] #[test]
@ -308,21 +325,29 @@ mod test {
id: 1, id: 1,
field_columns: vec![3, 1], field_columns: vec![3, 1],
})); }));
assert_imported_fields!(metadata, "front,foo,back\n", &[&["back", "front"]]); assert_imported_fields!(
metadata,
"front,foo,back\n",
[[Some("back"), Some("front")]]
);
} }
#[test] #[test]
fn should_ignore_lines_starting_with_number_sign() { fn should_ignore_lines_starting_with_number_sign() {
let metadata = CsvMetadata::defaults_for_testing(); let metadata = CsvMetadata::defaults_for_testing();
assert_imported_fields!(metadata, "#foo\nfront,back\n#bar\n", &[&["front", "back"]]); assert_imported_fields!(
metadata,
"#foo\nfront,back\n#bar\n",
[[Some("front"), Some("back")]]
);
} }
#[test] #[test]
fn should_escape_html_entities_if_csv_is_html() { fn should_escape_html_entities_if_csv_is_html() {
let mut metadata = CsvMetadata::defaults_for_testing(); let mut metadata = CsvMetadata::defaults_for_testing();
assert_imported_fields!(metadata, "<hr>\n", &[&["&lt;hr&gt;", ""]]); assert_imported_fields!(metadata, "<hr>\n", [[Some("&lt;hr&gt;"), None]]);
metadata.is_html = true; metadata.is_html = true;
assert_imported_fields!(metadata, "<hr>\n", &[&["<hr>", ""]]); assert_imported_fields!(metadata, "<hr>\n", [[Some("<hr>"), None]]);
} }
#[test] #[test]
@ -330,7 +355,7 @@ mod test {
let mut metadata = CsvMetadata::defaults_for_testing(); let mut metadata = CsvMetadata::defaults_for_testing();
metadata.tags_column = 3; metadata.tags_column = 3;
let notes = import!(metadata, "front,back,foo bar\n"); let notes = import!(metadata, "front,back,foo bar\n");
assert_eq!(notes[0].tags, &["foo", "bar"]); assert_eq!(notes[0].tags.as_ref().unwrap(), &["foo", "bar"]);
} }
#[test] #[test]
@ -347,9 +372,9 @@ mod test {
metadata.notetype.replace(CsvNotetype::NotetypeColumn(1)); metadata.notetype.replace(CsvNotetype::NotetypeColumn(1));
metadata.column_labels.push("".to_string()); metadata.column_labels.push("".to_string());
let notes = import!(metadata, "Basic,front,back\nCloze,foo,bar\n"); let notes = import!(metadata, "Basic,front,back\nCloze,foo,bar\n");
assert_eq!(notes[0].fields, &["front", "back"]); assert_field_eq!(notes[0].fields, [Some("front"), Some("back")]);
assert_eq!(notes[0].notetype, NameOrId::Name(String::from("Basic"))); assert_eq!(notes[0].notetype, NameOrId::Name(String::from("Basic")));
assert_eq!(notes[1].fields, &["foo", "bar"]); assert_field_eq!(notes[1].fields, [Some("foo"), Some("bar")]);
assert_eq!(notes[1].notetype, NameOrId::Name(String::from("Cloze"))); assert_eq!(notes[1].notetype, NameOrId::Name(String::from("Cloze")));
} }
} }

View File

@ -4,7 +4,6 @@
use std::{ use std::{
borrow::Cow, borrow::Cow,
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},
mem,
sync::Arc, sync::Arc,
}; };
@ -16,8 +15,9 @@ use crate::{
text::{ text::{
DupeResolution, ForeignCard, ForeignData, ForeignNote, ForeignNotetype, ForeignTemplate, DupeResolution, ForeignCard, ForeignData, ForeignNote, ForeignNotetype, ForeignTemplate,
}, },
ImportProgress, IncrementableProgress, LogNote, NoteLog, ImportProgress, IncrementableProgress, NoteLog,
}, },
notes::{field_checksum, normalize_field},
notetype::{CardGenContext, CardTemplate, NoteField, NotetypeConfig}, notetype::{CardGenContext, CardTemplate, NoteField, NotetypeConfig},
prelude::*, prelude::*,
text::strip_html_preserving_media_filenames, text::strip_html_preserving_media_filenames,
@ -78,13 +78,13 @@ struct DeckIdsByNameOrId {
default: Option<DeckId>, default: Option<DeckId>,
} }
struct NoteContext { struct NoteContext<'a> {
/// Prepared and with canonified tags. note: ForeignNote,
note: Note,
dupes: Vec<Duplicate>, dupes: Vec<Duplicate>,
cards: Vec<Card>,
notetype: Arc<Notetype>, notetype: Arc<Notetype>,
deck_id: DeckId, deck_id: DeckId,
global_tags: &'a [String],
updated_tags: &'a [String],
} }
struct Duplicate { struct Duplicate {
@ -94,8 +94,8 @@ struct Duplicate {
} }
impl Duplicate { impl Duplicate {
fn new(dupe: Note, original: &Note, first_field_match: bool) -> Self { fn new(dupe: Note, original: &ForeignNote, first_field_match: bool) -> Self {
let identical = dupe.equal_fields_and_tags(original); let identical = original.equal_fields_and_tags(&dupe);
Self { Self {
note: dupe, note: dupe,
identical, identical,
@ -190,14 +190,20 @@ impl<'a> Context<'a> {
let mut log = NoteLog::new(self.dupe_resolution, notes.len() as u32); let mut log = NoteLog::new(self.dupe_resolution, notes.len() as u32);
for foreign in notes { for foreign in notes {
incrementor.increment()?; incrementor.increment()?;
if foreign.first_field_is_empty() { if foreign.first_field_is_the_empty_string() {
log.empty_first_field.push(foreign.into_log_note()); log.empty_first_field.push(foreign.into_log_note());
continue; continue;
} }
if let Some(notetype) = self.notetype_for_note(&foreign)? { if let Some(notetype) = self.notetype_for_note(&foreign)? {
if let Some(deck_id) = self.deck_ids.get(&foreign.deck) { if let Some(deck_id) = self.deck_ids.get(&foreign.deck) {
let ctx = self.build_note_context(foreign, notetype, deck_id, global_tags)?; let ctx = self.build_note_context(
self.import_note(ctx, updated_tags, &mut log)?; foreign,
notetype,
deck_id,
global_tags,
updated_tags,
)?;
self.import_note(ctx, &mut log)?;
} else { } else {
log.missing_deck.push(foreign.into_log_note()); log.missing_deck.push(foreign.into_log_note());
} }
@ -208,41 +214,45 @@ impl<'a> Context<'a> {
Ok(log) Ok(log)
} }
fn build_note_context( fn build_note_context<'tags>(
&mut self, &mut self,
foreign: ForeignNote, mut note: ForeignNote,
notetype: Arc<Notetype>, notetype: Arc<Notetype>,
deck_id: DeckId, deck_id: DeckId,
global_tags: &[String], global_tags: &'tags [String],
) -> Result<NoteContext> { updated_tags: &'tags [String],
let (mut note, cards) = foreign.into_native(&notetype, deck_id, self.today, global_tags); ) -> Result<NoteContext<'tags>> {
note.prepare_for_update(&notetype, self.normalize_notes)?; self.prepare_foreign_note(&mut note)?;
self.col.canonify_note_tags(&mut note, self.usn)?;
let dupes = self.find_duplicates(&notetype, &note)?; let dupes = self.find_duplicates(&notetype, &note)?;
Ok(NoteContext { Ok(NoteContext {
note, note,
dupes, dupes,
cards,
notetype, notetype,
deck_id, deck_id,
global_tags,
updated_tags,
}) })
} }
fn find_duplicates(&self, notetype: &Notetype, note: &Note) -> Result<Vec<Duplicate>> { fn prepare_foreign_note(&mut self, note: &mut ForeignNote) -> Result<()> {
let checksum = note note.normalize_fields(self.normalize_notes);
.checksum self.col.canonify_foreign_tags(note, self.usn)
.ok_or_else(|| AnkiError::invalid_input("note unprepared"))?; }
fn find_duplicates(&self, notetype: &Notetype, note: &ForeignNote) -> Result<Vec<Duplicate>> {
if let Some(nid) = self.existing_guids.get(&note.guid) { if let Some(nid) = self.existing_guids.get(&note.guid) {
self.get_guid_dupe(*nid, note).map(|dupe| vec![dupe]) self.get_guid_dupe(*nid, note).map(|dupe| vec![dupe])
} else if let Some(nids) = self.existing_checksums.get(&(notetype.id, checksum)) { } else if let Some(nids) = note
.checksum()
.and_then(|csum| self.existing_checksums.get(&(notetype.id, csum)))
{
self.get_first_field_dupes(note, nids) self.get_first_field_dupes(note, nids)
} else { } else {
Ok(Vec::new()) Ok(Vec::new())
} }
} }
fn get_guid_dupe(&self, nid: NoteId, original: &Note) -> Result<Duplicate> { fn get_guid_dupe(&self, nid: NoteId, original: &ForeignNote) -> Result<Duplicate> {
self.col self.col
.storage .storage
.get_note(nid)? .get_note(nid)?
@ -250,7 +260,7 @@ impl<'a> Context<'a> {
.map(|dupe| Duplicate::new(dupe, original, false)) .map(|dupe| Duplicate::new(dupe, original, false))
} }
fn get_first_field_dupes(&self, note: &Note, nids: &[NoteId]) -> Result<Vec<Duplicate>> { fn get_first_field_dupes(&self, note: &ForeignNote, nids: &[NoteId]) -> Result<Vec<Duplicate>> {
Ok(self Ok(self
.col .col
.get_full_duplicates(note, nids)? .get_full_duplicates(note, nids)?
@ -259,26 +269,36 @@ impl<'a> Context<'a> {
.collect()) .collect())
} }
fn import_note( fn import_note(&mut self, ctx: NoteContext, log: &mut NoteLog) -> Result<()> {
&mut self,
ctx: NoteContext,
updated_tags: &[String],
log: &mut NoteLog,
) -> Result<()> {
match self.dupe_resolution { match self.dupe_resolution {
_ if ctx.dupes.is_empty() => self.add_note(ctx, &mut log.new)?, _ if ctx.dupes.is_empty() => self.add_note(ctx, log, false)?,
DupeResolution::Add => self.add_note(ctx, &mut log.first_field_match)?, DupeResolution::Add => self.add_note(ctx, log, true)?,
DupeResolution::Update => self.update_with_note(ctx, updated_tags, log)?, DupeResolution::Update => self.update_with_note(ctx, log)?,
DupeResolution::Ignore => log.first_field_match.push(ctx.note.into_log_note()), DupeResolution::Ignore => log.first_field_match.push(ctx.note.into_log_note()),
} }
Ok(()) Ok(())
} }
fn add_note(&mut self, mut ctx: NoteContext, log_queue: &mut Vec<LogNote>) -> Result<()> { fn add_note(&mut self, ctx: NoteContext, log: &mut NoteLog, dupe: bool) -> Result<()> {
ctx.note.usn = self.usn; if !ctx.note.first_field_is_unempty() {
self.col.add_note_only_undoable(&mut ctx.note)?; log.empty_first_field.push(ctx.note.into_log_note());
self.add_cards(&mut ctx.cards, &ctx.note, ctx.deck_id, ctx.notetype)?; return Ok(());
log_queue.push(ctx.note.into_log_note()); }
let mut note = Note::new(&ctx.notetype);
let mut cards = ctx
.note
.into_native(&mut note, ctx.deck_id, self.today, ctx.global_tags);
self.prepare_note(&mut note, &ctx.notetype)?;
self.col.add_note_only_undoable(&mut note)?;
self.add_cards(&mut cards, &note, ctx.deck_id, ctx.notetype)?;
if dupe {
log.first_field_match.push(note.into_log_note());
} else {
log.new.push(note.into_log_note());
}
Ok(()) Ok(())
} }
@ -293,63 +313,46 @@ impl<'a> Context<'a> {
self.generate_missing_cards(notetype, deck_id, note) self.generate_missing_cards(notetype, deck_id, note)
} }
fn update_with_note( fn update_with_note(&mut self, ctx: NoteContext, log: &mut NoteLog) -> Result<()> {
&mut self, for dupe in ctx.dupes {
mut ctx: NoteContext, if dupe.note.notetype_id != ctx.notetype.id {
updated_tags: &[String], log.conflicting.push(dupe.note.into_log_note());
log: &mut NoteLog, continue;
) -> Result<()> { }
self.prepare_note_for_update(&mut ctx.note, updated_tags)?;
for dupe in mem::take(&mut ctx.dupes) { let mut note = dupe.note.clone();
self.maybe_update_dupe(dupe, &mut ctx, log)?; let mut cards = ctx.note.clone().into_native(
&mut note,
ctx.deck_id,
self.today,
ctx.global_tags.iter().chain(ctx.updated_tags.iter()),
);
if !dupe.identical {
self.prepare_note(&mut note, &ctx.notetype)?;
self.col.update_note_undoable(&note, &dupe.note)?;
}
self.add_cards(&mut cards, &note, ctx.deck_id, ctx.notetype.clone())?;
if dupe.identical {
log.duplicate.push(dupe.note.into_log_note());
} else if dupe.first_field_match {
log.first_field_match.push(note.into_log_note());
} else {
log.updated.push(note.into_log_note());
}
} }
Ok(()) Ok(())
} }
fn prepare_note_for_update(&mut self, note: &mut Note, updated_tags: &[String]) -> Result<()> { fn prepare_note(&mut self, note: &mut Note, notetype: &Notetype) -> Result<()> {
if !updated_tags.is_empty() { note.prepare_for_update(notetype, self.normalize_notes)?;
note.tags.extend(updated_tags.iter().cloned()); self.col.canonify_note_tags(note, self.usn)?;
self.col.canonify_note_tags(note, self.usn)?;
}
note.set_modified(self.usn); note.set_modified(self.usn);
Ok(()) Ok(())
} }
fn maybe_update_dupe(
&mut self,
dupe: Duplicate,
ctx: &mut NoteContext,
log: &mut NoteLog,
) -> Result<()> {
if dupe.note.notetype_id != ctx.notetype.id {
log.conflicting.push(dupe.note.into_log_note());
return Ok(());
}
if dupe.identical {
log.duplicate.push(dupe.note.into_log_note());
} else {
self.update_dupe(dupe, ctx, log)?;
}
self.add_cards(&mut ctx.cards, &ctx.note, ctx.deck_id, ctx.notetype.clone())
}
fn update_dupe(
&mut self,
dupe: Duplicate,
ctx: &mut NoteContext,
log: &mut NoteLog,
) -> Result<()> {
ctx.note.id = dupe.note.id;
ctx.note.guid = dupe.note.guid.clone();
self.col.update_note_undoable(&ctx.note, &dupe.note)?;
if dupe.first_field_match {
log.first_field_match.push(dupe.note.into_log_note());
} else {
log.updated.push(dupe.note.into_log_note());
}
Ok(())
}
fn import_cards(&mut self, cards: &mut [Card], note_id: NoteId) -> Result<()> { fn import_cards(&mut self, cards: &mut [Card], note_id: NoteId) -> Result<()> {
for card in cards { for card in cards {
card.note_id = note_id; card.note_id = note_id;
@ -397,8 +400,18 @@ impl Collection {
} }
} }
fn get_full_duplicates(&self, note: &Note, dupe_ids: &[NoteId]) -> Result<Vec<Note>> { fn canonify_foreign_tags(&mut self, note: &mut ForeignNote, usn: Usn) -> Result<()> {
let first_field = note.first_field_stripped(); if let Some(tags) = note.tags.take() {
note.tags
.replace(self.canonify_tags_without_registering(tags, usn)?);
}
Ok(())
}
fn get_full_duplicates(&self, note: &ForeignNote, dupe_ids: &[NoteId]) -> Result<Vec<Note>> {
let first_field = note
.first_field_stripped()
.ok_or_else(|| AnkiError::invalid_input("no first field"))?;
dupe_ids dupe_ids
.iter() .iter()
.filter_map(|&dupe_id| self.storage.get_note(dupe_id).transpose()) .filter_map(|&dupe_id| self.storage.get_note(dupe_id).transpose())
@ -411,35 +424,72 @@ impl Collection {
} }
impl ForeignNote { impl ForeignNote {
fn into_native( /// Updates a native note with the foreign data and returns its new cards.
fn into_native<'tags>(
self, self,
notetype: &Notetype, note: &mut Note,
deck_id: DeckId, deck_id: DeckId,
today: u32, today: u32,
extra_tags: &[String], extra_tags: impl IntoIterator<Item = &'tags String>,
) -> (Note, Vec<Card>) { ) -> Vec<Card> {
// TODO: Handle new and learning cards // TODO: Handle new and learning cards
let mut note = Note::new(notetype);
if !self.guid.is_empty() { if !self.guid.is_empty() {
note.guid = self.guid; note.guid = self.guid;
} }
note.tags = self.tags; if let Some(tags) = self.tags {
note.tags.extend(extra_tags.iter().cloned()); note.tags = tags;
}
note.tags.extend(extra_tags.into_iter().cloned());
note.fields_mut() note.fields_mut()
.iter_mut() .iter_mut()
.zip(self.fields.into_iter()) .zip(self.fields.into_iter())
.for_each(|(field, value)| *field = value); .for_each(|(field, new)| {
let cards = self if let Some(s) = new {
.cards *field = s;
}
});
self.cards
.into_iter() .into_iter()
.enumerate() .enumerate()
.map(|(idx, c)| c.into_native(NoteId(0), idx as u16, deck_id, today)) .map(|(idx, c)| c.into_native(NoteId(0), idx as u16, deck_id, today))
.collect(); .collect()
(note, cards)
} }
fn first_field_is_empty(&self) -> bool { fn first_field_is_the_empty_string(&self) -> bool {
self.fields.get(0).map(String::is_empty).unwrap_or(true) matches!(self.fields.get(0), Some(Some(s)) if s.is_empty())
}
fn first_field_is_unempty(&self) -> bool {
matches!(self.fields.get(0), Some(Some(s)) if !s.is_empty())
}
fn normalize_fields(&mut self, normalize_text: bool) {
for field in self.fields.iter_mut().flatten() {
normalize_field(field, normalize_text);
}
}
/// Expects normalized form.
fn equal_fields_and_tags(&self, other: &Note) -> bool {
self.tags.as_ref().map_or(true, |tags| *tags == other.tags)
&& self
.fields
.iter()
.zip(other.fields())
.all(|(opt, field)| opt.as_ref().map(|s| s == field).unwrap_or(true))
}
fn first_field_stripped(&self) -> Option<Cow<str>> {
self.fields
.get(0)
.and_then(|s| s.as_ref())
.map(|field| strip_html_preserving_media_filenames(field.as_str()))
}
/// If the first field is set, returns its checksum. Field is expected to be normalized.
fn checksum(&self) -> Option<u32> {
self.first_field_stripped()
.map(|field| field_checksum(&field))
} }
} }
@ -493,12 +543,6 @@ impl ForeignTemplate {
} }
} }
impl Note {
fn equal_fields_and_tags(&self, other: &Self) -> bool {
self.fields() == other.fields() && self.tags == other.tags
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;
@ -515,7 +559,7 @@ mod test {
fn add_note(&mut self, fields: &[&str]) { fn add_note(&mut self, fields: &[&str]) {
self.notes.push(ForeignNote { self.notes.push(ForeignNote {
fields: fields.iter().map(ToString::to_string).collect(), fields: fields.iter().map(ToString::to_string).map(Some).collect(),
..Default::default() ..Default::default()
}); });
} }
@ -543,7 +587,7 @@ mod test {
data.clone().import(&mut col, |_, _| true).unwrap(); data.clone().import(&mut col, |_, _| true).unwrap();
assert_eq!(col.storage.notes_table_len(), 1); assert_eq!(col.storage.notes_table_len(), 1);
data.notes[0].fields[1] = "new".to_string(); data.notes[0].fields[1].replace("new".to_string());
data.import(&mut col, |_, _| true).unwrap(); data.import(&mut col, |_, _| true).unwrap();
let notes = col.storage.get_all_notes(); let notes = col.storage.get_all_notes();
assert_eq!(notes.len(), 1); assert_eq!(notes.len(), 1);
@ -560,11 +604,30 @@ mod test {
data.clone().import(&mut col, |_, _| true).unwrap(); data.clone().import(&mut col, |_, _| true).unwrap();
assert_eq!(col.storage.notes_table_len(), 1); assert_eq!(col.storage.notes_table_len(), 1);
data.notes[0].fields[1] = "new".to_string(); data.notes[0].fields[1].replace("new".to_string());
data.import(&mut col, |_, _| true).unwrap(); data.import(&mut col, |_, _| true).unwrap();
assert_eq!(col.storage.get_all_notes()[0].fields()[1], "new"); assert_eq!(col.storage.get_all_notes()[0].fields()[1], "new");
} }
#[test]
fn should_keep_old_field_content_if_no_new_one_is_supplied() {
let mut col = open_test_collection();
let mut data = ForeignData::with_defaults();
data.add_note(&["same", "unchanged"]);
data.add_note(&["same", "unchanged"]);
data.dupe_resolution = DupeResolution::Update;
data.clone().import(&mut col, |_, _| true).unwrap();
assert_eq!(col.storage.notes_table_len(), 2);
data.notes[0].fields[1] = None;
data.notes[1].fields.pop();
data.import(&mut col, |_, _| true).unwrap();
let notes = col.storage.get_all_notes();
assert_eq!(notes[0].fields(), &["same", "unchanged"]);
assert_eq!(notes[0].fields(), &["same", "unchanged"]);
}
#[test] #[test]
fn should_recognize_normalized_duplicate_only_if_normalization_is_enabled() { fn should_recognize_normalized_duplicate_only_if_normalization_is_enabled() {
let mut col = open_test_collection(); let mut col = open_test_collection();
@ -589,7 +652,7 @@ mod test {
let mut col = open_test_collection(); let mut col = open_test_collection();
let mut data = ForeignData::with_defaults(); let mut data = ForeignData::with_defaults();
data.add_note(&["foo"]); data.add_note(&["foo"]);
data.notes[0].tags = vec![String::from("bar")]; data.notes[0].tags.replace(vec![String::from("bar")]);
data.global_tags = vec![String::from("baz")]; data.global_tags = vec![String::from("baz")];
data.import(&mut col, |_, _| true).unwrap(); data.import(&mut col, |_, _| true).unwrap();
@ -601,7 +664,7 @@ mod test {
let mut col = open_test_collection(); let mut col = open_test_collection();
let mut data = ForeignData::with_defaults(); let mut data = ForeignData::with_defaults();
data.add_note(&["foo"]); data.add_note(&["foo"]);
data.notes[0].tags = vec![String::from("bar")]; data.notes[0].tags.replace(vec![String::from("bar")]);
data.global_tags = vec![String::from("baz")]; data.global_tags = vec![String::from("baz")];
data.import(&mut col, |_, _| true).unwrap(); data.import(&mut col, |_, _| true).unwrap();

View File

@ -26,8 +26,8 @@ pub struct ForeignData {
#[serde(default)] #[serde(default)]
pub struct ForeignNote { pub struct ForeignNote {
guid: String, guid: String,
fields: Vec<String>, fields: Vec<Option<String>>,
tags: Vec<String>, tags: Option<Vec<String>>,
notetype: NameOrId, notetype: NameOrId,
deck: NameOrId, deck: NameOrId,
cards: Vec<ForeignCard>, cards: Vec<ForeignCard>,
@ -82,7 +82,11 @@ impl ForeignNote {
pub(crate) fn into_log_note(self) -> LogNote { pub(crate) fn into_log_note(self) -> LogNote {
LogNote { LogNote {
id: None, id: None,
fields: self.fields, fields: self
.fields
.into_iter()
.map(Option::unwrap_or_default)
.collect(),
} }
} }
} }

View File

@ -186,16 +186,8 @@ impl Note {
))); )));
} }
for field in &mut self.fields { for field in self.fields_mut() {
if field.contains(invalid_char_for_field) { normalize_field(field, normalize_text);
*field = field.replace(invalid_char_for_field, "");
}
}
if normalize_text {
for field in &mut self.fields {
ensure_string_in_nfc(field);
}
} }
let field1_nohtml = strip_html_preserving_media_filenames(&self.fields()[0]); let field1_nohtml = strip_html_preserving_media_filenames(&self.fields()[0]);
@ -265,6 +257,16 @@ impl Note {
} }
} }
/// Remove invalid characters and optionally ensure nfc normalization.
pub(crate) fn normalize_field(field: &mut String, normalize_text: bool) {
if field.contains(invalid_char_for_field) {
*field = field.replace(invalid_char_for_field, "");
}
if normalize_text {
ensure_string_in_nfc(field);
}
}
impl From<Note> for pb::Note { impl From<Note> for pb::Note {
fn from(n: Note) -> Self { fn from(n: Note) -> Self {
pb::Note { pb::Note {

View File

@ -17,6 +17,26 @@ impl Collection {
&mut self, &mut self,
tags: Vec<String>, tags: Vec<String>,
usn: Usn, usn: Usn,
) -> Result<(Vec<String>, bool)> {
self.canonify_tags_inner(tags, usn, true)
}
pub(crate) fn canonify_tags_without_registering(
&mut self,
tags: Vec<String>,
usn: Usn,
) -> Result<Vec<String>> {
self.canonify_tags_inner(tags, usn, false)
.map(|(tags, _)| tags)
}
/// Like [canonify_tags()], but doesn't save new tags. As a consequence, new
/// parents are not canonified.
fn canonify_tags_inner(
&mut self,
tags: Vec<String>,
usn: Usn,
register: bool,
) -> Result<(Vec<String>, bool)> { ) -> Result<(Vec<String>, bool)> {
let mut seen = HashSet::new(); let mut seen = HashSet::new();
let mut added = false; let mut added = false;
@ -24,7 +44,11 @@ impl Collection {
let tags: Vec<_> = tags.iter().flat_map(|t| split_tags(t)).collect(); let tags: Vec<_> = tags.iter().flat_map(|t| split_tags(t)).collect();
for tag in tags { for tag in tags {
let mut tag = Tag::new(tag.to_string(), usn); let mut tag = Tag::new(tag.to_string(), usn);
added |= self.register_tag(&mut tag)?; if register {
added |= self.register_tag(&mut tag)?;
} else {
self.prepare_tag_for_registering(&mut tag)?;
}
seen.insert(UniCase::new(tag.name)); seen.insert(UniCase::new(tag.name));
} }