Support UTF-8-BOM when importing CSV (#2360)
The csv crate already supports it, but the meta line parsing didn't.
This commit is contained in:
parent
b97d1ac074
commit
f3ef242bc5
@ -18,6 +18,7 @@ use crate::import_export::ImportProgress;
|
||||
use crate::import_export::NoteLog;
|
||||
use crate::io::open_file;
|
||||
use crate::prelude::*;
|
||||
use crate::text::strip_utf8_bom;
|
||||
|
||||
impl Collection {
|
||||
pub fn import_csv(
|
||||
@ -222,7 +223,7 @@ fn remove_tags_line_from_reader(reader: &mut (impl Read + Seek)) -> Result<()> {
|
||||
let mut buf_reader = BufReader::new(reader);
|
||||
let mut first_line = String::new();
|
||||
buf_reader.read_line(&mut first_line)?;
|
||||
let offset = if first_line.starts_with("tags:") {
|
||||
let offset = if strip_utf8_bom(&first_line).starts_with("tags:") {
|
||||
first_line.as_bytes().len()
|
||||
} else {
|
||||
0
|
||||
@ -380,4 +381,13 @@ mod test {
|
||||
assert_field_eq!(notes[1].fields, [Some("foo"), Some("bar")]);
|
||||
assert_eq!(notes[1].notetype, NameOrId::Name(String::from("Cloze")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_ignore_bom() {
|
||||
let metadata = CsvMetadata::defaults_for_testing();
|
||||
assert_imported_fields!(metadata, "\u{feff}foo,bar\n", [[Some("foo"), Some("bar")]]);
|
||||
assert!(import!(metadata, "\u{feff}#foo\n").is_empty());
|
||||
assert!(import!(metadata, "\u{feff}#html:true\n").is_empty());
|
||||
assert!(import!(metadata, "\u{feff}tags:foo\n").is_empty());
|
||||
}
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ pub use crate::pb::import_export::CsvMetadata;
|
||||
use crate::prelude::*;
|
||||
use crate::text::html_to_text_line;
|
||||
use crate::text::is_html;
|
||||
use crate::text::strip_utf8_bom;
|
||||
|
||||
/// The maximum number of preview rows.
|
||||
const PREVIEW_LENGTH: usize = 5;
|
||||
@ -96,6 +97,7 @@ impl Collection {
|
||||
/// True if the line is a meta line, i.e. a comment, or starting with
|
||||
/// 'tags:'.
|
||||
fn parse_first_line(&mut self, line: &str, metadata: &mut CsvMetadata) -> bool {
|
||||
let line = strip_utf8_bom(line);
|
||||
if let Some(tags) = line.strip_prefix("tags:") {
|
||||
metadata.global_tags = collect_tags(tags);
|
||||
true
|
||||
@ -739,4 +741,14 @@ mod test {
|
||||
// html is stripped
|
||||
assert_eq!(meta.preview[1].vals, ["baz", ""]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_first_first_line_despite_bom() {
|
||||
let mut col = open_test_collection();
|
||||
assert_eq!(
|
||||
metadata!(col, "\u{feff}#separator:tab\n").delimiter(),
|
||||
Delimiter::Tab
|
||||
);
|
||||
assert_eq!(metadata!(col, "\u{feff}tags:foo\n").global_tags, ["foo"]);
|
||||
}
|
||||
}
|
||||
|
@ -62,6 +62,10 @@ impl<'a, B: ?Sized + 'a + ToOwned> CowMapping<'a, B> for Cow<'a, B> {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn strip_utf8_bom(s: &str) -> &str {
|
||||
s.strip_prefix('\u{feff}').unwrap_or(s)
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum AvTag {
|
||||
SoundOrVideo(String),
|
||||
|
Loading…
Reference in New Issue
Block a user