From 4fa4a5077c76bd9c5ecb1a3d4fd90793e305d897 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Sat, 8 Feb 2020 18:59:55 +1000 Subject: [PATCH] don't add non-normalized files to media DB --- rslib/src/media/changetracker.rs | 22 ++++++++----- rslib/src/media/files.rs | 53 +++++++++++++++++++++++++++----- 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/rslib/src/media/changetracker.rs b/rslib/src/media/changetracker.rs index 8949ca67d..fde118384 100644 --- a/rslib/src/media/changetracker.rs +++ b/rslib/src/media/changetracker.rs @@ -4,7 +4,8 @@ use crate::err::{AnkiError, Result}; use crate::media::database::{MediaDatabaseContext, MediaEntry}; use crate::media::files::{ - mtime_as_i64, sha1_of_file, MEDIA_SYNC_FILESIZE_LIMIT, NONSYNCABLE_FILENAME, + filename_if_normalized, mtime_as_i64, sha1_of_file, MEDIA_SYNC_FILESIZE_LIMIT, + NONSYNCABLE_FILENAME, }; use std::collections::HashMap; use std::path::Path; @@ -78,10 +79,6 @@ where /// Scan through the media folder, finding changes. /// Returns (added/changed files, removed files). - /// - /// Checks for invalid filenames and unicode normalization are deferred - /// until syncing time, as we can't trust the entries previous Anki versions - /// wrote are correct. fn media_folder_changes( &mut self, mut mtimes: HashMap, @@ -99,13 +96,22 @@ where // if the filename is not valid unicode, skip it let fname_os = dentry.file_name(); - let fname = match fname_os.to_str() { + let disk_fname = match fname_os.to_str() { Some(s) => s, None => continue, }; + // make sure the filename is normalized + let fname = match filename_if_normalized(&disk_fname) { + Some(fname) => fname, + None => { + // not normalized; skip it + continue; + } + }; + // ignore blacklisted files - if NONSYNCABLE_FILENAME.is_match(fname) { + if NONSYNCABLE_FILENAME.is_match(fname.as_ref()) { continue; } @@ -116,7 +122,7 @@ where } // remove from mtimes for later deletion tracking - let previous_mtime = mtimes.remove(fname); + let previous_mtime = mtimes.remove(fname.as_ref()); // skip files that have not been modified let mtime = metadata diff --git a/rslib/src/media/files.rs b/rslib/src/media/files.rs index b6bd4d192..74cdc5fe4 100644 --- a/rslib/src/media/files.rs +++ b/rslib/src/media/files.rs @@ -71,19 +71,58 @@ pub(crate) fn normalize_filename(fname: &str) -> Cow { output = output.chars().nfc().collect::().into(); } - if output.chars().any(disallowed_char) { - output = output.replace(disallowed_char, "").into() + normalize_nfc_filename(output) +} + +/// See normalize_filename(). This function expects NFC-normalized input. +fn normalize_nfc_filename(mut fname: Cow) -> Cow { + if fname.chars().any(disallowed_char) { + fname = fname.replace(disallowed_char, "").into() } - if let Cow::Owned(o) = WINDOWS_DEVICE_NAME.replace_all(output.as_ref(), "${1}_${2}") { - output = o.into(); + if let Cow::Owned(o) = WINDOWS_DEVICE_NAME.replace_all(fname.as_ref(), "${1}_${2}") { + fname = o.into(); } - if let Cow::Owned(o) = truncate_filename(output.as_ref(), MAX_FILENAME_LENGTH) { - output = o.into(); + if let Cow::Owned(o) = truncate_filename(fname.as_ref(), MAX_FILENAME_LENGTH) { + fname = o.into(); } - output + fname +} + +/// Return the filename in NFC form if the filename is valid. +/// +/// Returns None if the filename is not normalized +/// (NFD, invalid chars, etc) +/// +/// On Apple devices, the filename may be stored on disk in NFD encoding, +/// but can be accessed as NFC. On these devices, if the filename +/// is otherwise valid, the filename is returned as NFC. +#[allow(clippy::collapsible_if)] +pub(super) fn filename_if_normalized(fname: &str) -> Option> { + if cfg!(target_vendor = "apple") { + if !is_nfc(fname) { + let as_nfc = fname.chars().nfc().collect::(); + if let Cow::Borrowed(_) = normalize_nfc_filename(as_nfc.as_str().into()) { + Some(as_nfc.into()) + } else { + None + } + } else { + if let Cow::Borrowed(_) = normalize_nfc_filename(fname.into()) { + Some(fname.into()) + } else { + None + } + } + } else { + if let Cow::Borrowed(_) = normalize_filename(fname) { + Some(fname.into()) + } else { + None + } + } } /// Write desired_name into folder, renaming if existing file has different content.