don't add non-normalized files to media DB

This commit is contained in:
Damien Elmes 2020-02-08 18:59:55 +10:00
parent 22793c8cd6
commit 4fa4a5077c
2 changed files with 60 additions and 15 deletions

View File

@ -4,7 +4,8 @@
use crate::err::{AnkiError, Result};
use crate::media::database::{MediaDatabaseContext, MediaEntry};
use crate::media::files::{
mtime_as_i64, sha1_of_file, MEDIA_SYNC_FILESIZE_LIMIT, NONSYNCABLE_FILENAME,
filename_if_normalized, mtime_as_i64, sha1_of_file, MEDIA_SYNC_FILESIZE_LIMIT,
NONSYNCABLE_FILENAME,
};
use std::collections::HashMap;
use std::path::Path;
@ -78,10 +79,6 @@ where
/// Scan through the media folder, finding changes.
/// Returns (added/changed files, removed files).
///
/// Checks for invalid filenames and unicode normalization are deferred
/// until syncing time, as we can't trust the entries previous Anki versions
/// wrote are correct.
fn media_folder_changes(
&mut self,
mut mtimes: HashMap<String, i64>,
@ -99,13 +96,22 @@ where
// if the filename is not valid unicode, skip it
let fname_os = dentry.file_name();
let fname = match fname_os.to_str() {
let disk_fname = match fname_os.to_str() {
Some(s) => s,
None => continue,
};
// make sure the filename is normalized
let fname = match filename_if_normalized(&disk_fname) {
Some(fname) => fname,
None => {
// not normalized; skip it
continue;
}
};
// ignore blacklisted files
if NONSYNCABLE_FILENAME.is_match(fname) {
if NONSYNCABLE_FILENAME.is_match(fname.as_ref()) {
continue;
}
@ -116,7 +122,7 @@ where
}
// remove from mtimes for later deletion tracking
let previous_mtime = mtimes.remove(fname);
let previous_mtime = mtimes.remove(fname.as_ref());
// skip files that have not been modified
let mtime = metadata

View File

@ -71,19 +71,58 @@ pub(crate) fn normalize_filename(fname: &str) -> Cow<str> {
output = output.chars().nfc().collect::<String>().into();
}
if output.chars().any(disallowed_char) {
output = output.replace(disallowed_char, "").into()
normalize_nfc_filename(output)
}
/// See normalize_filename(). This function expects NFC-normalized input.
fn normalize_nfc_filename(mut fname: Cow<str>) -> Cow<str> {
if fname.chars().any(disallowed_char) {
fname = fname.replace(disallowed_char, "").into()
}
if let Cow::Owned(o) = WINDOWS_DEVICE_NAME.replace_all(output.as_ref(), "${1}_${2}") {
output = o.into();
if let Cow::Owned(o) = WINDOWS_DEVICE_NAME.replace_all(fname.as_ref(), "${1}_${2}") {
fname = o.into();
}
if let Cow::Owned(o) = truncate_filename(output.as_ref(), MAX_FILENAME_LENGTH) {
output = o.into();
if let Cow::Owned(o) = truncate_filename(fname.as_ref(), MAX_FILENAME_LENGTH) {
fname = o.into();
}
output
fname
}
/// Return the filename in NFC form if the filename is valid.
///
/// Returns None if the filename is not normalized
/// (NFD, invalid chars, etc)
///
/// On Apple devices, the filename may be stored on disk in NFD encoding,
/// but can be accessed as NFC. On these devices, if the filename
/// is otherwise valid, the filename is returned as NFC.
#[allow(clippy::collapsible_if)]
pub(super) fn filename_if_normalized(fname: &str) -> Option<Cow<str>> {
if cfg!(target_vendor = "apple") {
if !is_nfc(fname) {
let as_nfc = fname.chars().nfc().collect::<String>();
if let Cow::Borrowed(_) = normalize_nfc_filename(as_nfc.as_str().into()) {
Some(as_nfc.into())
} else {
None
}
} else {
if let Cow::Borrowed(_) = normalize_nfc_filename(fname.into()) {
Some(fname.into())
} else {
None
}
}
} else {
if let Cow::Borrowed(_) = normalize_filename(fname) {
Some(fname.into())
} else {
None
}
}
}
/// Write desired_name into folder, renaming if existing file has different content.