Support UTF-8-BOM when importing CSV (#2360)

The csv crate already supports it, but the meta line parsing didn't.
This commit is contained in:
RumovZ 2023-02-05 02:53:21 +01:00 committed by GitHub
parent b97d1ac074
commit f3ef242bc5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 1 deletions

View File

@ -18,6 +18,7 @@ use crate::import_export::ImportProgress;
use crate::import_export::NoteLog;
use crate::io::open_file;
use crate::prelude::*;
use crate::text::strip_utf8_bom;
impl Collection {
pub fn import_csv(
@ -222,7 +223,7 @@ fn remove_tags_line_from_reader(reader: &mut (impl Read + Seek)) -> Result<()> {
let mut buf_reader = BufReader::new(reader);
let mut first_line = String::new();
buf_reader.read_line(&mut first_line)?;
let offset = if first_line.starts_with("tags:") {
let offset = if strip_utf8_bom(&first_line).starts_with("tags:") {
first_line.as_bytes().len()
} else {
0
@ -380,4 +381,13 @@ mod test {
assert_field_eq!(notes[1].fields, [Some("foo"), Some("bar")]);
assert_eq!(notes[1].notetype, NameOrId::Name(String::from("Cloze")));
}
#[test]
fn should_ignore_bom() {
let metadata = CsvMetadata::defaults_for_testing();
assert_imported_fields!(metadata, "\u{feff}foo,bar\n", [[Some("foo"), Some("bar")]]);
assert!(import!(metadata, "\u{feff}#foo\n").is_empty());
assert!(import!(metadata, "\u{feff}#html:true\n").is_empty());
assert!(import!(metadata, "\u{feff}tags:foo\n").is_empty());
}
}

View File

@ -28,6 +28,7 @@ pub use crate::pb::import_export::CsvMetadata;
use crate::prelude::*;
use crate::text::html_to_text_line;
use crate::text::is_html;
use crate::text::strip_utf8_bom;
/// The maximum number of preview rows.
const PREVIEW_LENGTH: usize = 5;
@ -96,6 +97,7 @@ impl Collection {
/// True if the line is a meta line, i.e. a comment, or starting with
/// 'tags:'.
fn parse_first_line(&mut self, line: &str, metadata: &mut CsvMetadata) -> bool {
let line = strip_utf8_bom(line);
if let Some(tags) = line.strip_prefix("tags:") {
metadata.global_tags = collect_tags(tags);
true
@ -739,4 +741,14 @@ mod test {
// html is stripped
assert_eq!(meta.preview[1].vals, ["baz", ""]);
}
#[test]
fn should_parse_first_first_line_despite_bom() {
let mut col = open_test_collection();
assert_eq!(
metadata!(col, "\u{feff}#separator:tab\n").delimiter(),
Delimiter::Tab
);
assert_eq!(metadata!(col, "\u{feff}tags:foo\n").global_tags, ["foo"]);
}
}

View File

@ -62,6 +62,10 @@ impl<'a, B: ?Sized + 'a + ToOwned> CowMapping<'a, B> for Cow<'a, B> {
}
}
pub(crate) fn strip_utf8_bom(s: &str) -> &str {
s.strip_prefix('\u{feff}').unwrap_or(s)
}
#[derive(Debug, PartialEq)]
pub enum AvTag {
SoundOrVideo(String),