Expand normalization checks on import/export

The old Python code was only checking for NFC encoding, but we should
check for other issues like special filenames on windows (eg con.mp3)

- On export, the user is told to use Check Media if their media has
invalid filenames.
- On import, legacy packages will be transparently normalized. Since we're
doing the checks on export as well, any invalid names in a v3 package
are an error.
This commit is contained in:
Damien Elmes 2022-03-17 17:31:19 +10:00
parent 57a4495d92
commit 4687620f5e
8 changed files with 67 additions and 15 deletions

View File

@ -9,6 +9,7 @@ errors-100-tags-max =
is no need to select child tags if you have selected a parent tag.
errors-multiple-notetypes-selected = Please select notes from only one notetype.
errors-please-check-database = Please use the Check Database action, then try again.
errors-please-check-media = Please use the Check Media action, then try again.
errors-collection-too-new = This collection requires a newer version of Anki to open.
## Card Rendering

View File

@ -36,6 +36,7 @@ impl AnkiError {
AnkiError::CustomStudyError(_) => Kind::CustomStudyError,
AnkiError::ImportError(_) => Kind::ImportError,
AnkiError::FileIoError(_) => Kind::IoError,
AnkiError::MediaCheckRequired => Kind::InvalidInput,
};
pb::BackendError {

View File

@ -42,6 +42,7 @@ pub enum AnkiError {
UndoEmpty,
MultipleNotetypesSelected,
DatabaseCheckRequired,
MediaCheckRequired,
CustomStudyError(CustomStudyError),
ImportError(ImportError),
}
@ -97,6 +98,7 @@ impl AnkiError {
AnkiError::InvalidRegex(err) => format!("<pre>{}</pre>", err),
AnkiError::MultipleNotetypesSelected => tr.errors_multiple_notetypes_selected().into(),
AnkiError::DatabaseCheckRequired => tr.errors_please_check_database().into(),
AnkiError::MediaCheckRequired => tr.errors_please_check_media().into(),
AnkiError::CustomStudyError(err) => err.localized_description(tr),
AnkiError::ImportError(err) => err.localized_description(tr),
AnkiError::IoError(_)

View File

@ -18,7 +18,9 @@ use zstd::{
use super::super::{MediaEntries, MediaEntry, Meta, Version};
use crate::{
collection::CollectionBuilder, media::files::sha1_of_data, prelude::*, text::normalize_to_nfc,
collection::CollectionBuilder,
media::files::{filename_if_normalized, sha1_of_data},
prelude::*,
};
/// Enable multithreaded compression if over this size. For smaller files,
@ -279,16 +281,18 @@ fn make_media_entry(data: &[u8], name: String) -> MediaEntry {
}
fn normalized_unicode_file_name(entry: &DirEntry) -> Result<String> {
entry
.file_name()
.to_str()
.map(|name| normalize_to_nfc(name).into())
.ok_or_else(|| {
let filename = entry.file_name();
let filename = filename.to_str().ok_or_else(|| {
AnkiError::IoError(format!(
"non-unicode file name: {}",
entry.file_name().to_string_lossy()
))
})
})?;
if let Some(filename) = filename_if_normalized(filename) {
Ok(filename.into_owned())
} else {
Err(AnkiError::MediaCheckRequired)
}
}
/// Writes media files while compressing according to the targeted version.

View File

@ -2,6 +2,7 @@
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{
borrow::Cow,
collections::HashMap,
fs::{self, File},
io::{self, Read, Write},
@ -21,8 +22,8 @@ use crate::{
package::{MediaEntries, MediaEntry, Meta},
ImportProgress,
},
media::files::normalize_filename,
prelude::*,
text::normalize_to_nfc,
};
impl Meta {
@ -119,8 +120,8 @@ fn restore_media(
if let Ok(mut zip_file) = archive.by_name(&archive_file_name.to_string()) {
check_filename_safe(&entry.name)?;
let nfc_name = normalize_to_nfc(&entry.name);
let file_path = media_folder.join(nfc_name.as_ref());
let normalized = maybe_normalizing(&entry.name, meta.strict_media_checks())?;
let file_path = media_folder.join(normalized.as_ref());
let size_in_colpkg = if meta.media_list_is_hashmap() {
zip_file.size()
} else {
@ -151,6 +152,18 @@ fn restore_media(
Ok(())
}
/// - If strict is true, return an error if not normalized.
/// - If false, return the normalized version.
fn maybe_normalizing(name: &str, strict: bool) -> Result<Cow<str>> {
let normalized = normalize_filename(name);
if strict && matches!(normalized, Cow::Owned(_)) {
// exporting code should have checked this
Err(AnkiError::ImportError(ImportError::Corrupt))
} else {
Ok(normalized)
}
}
/// Return an error if name contains any path separators.
fn check_filename_safe(name: &str) -> Result<()> {
let mut components = Path::new(name).components();
@ -238,4 +251,10 @@ mod test {
assert!(check_filename_safe("\\foo").is_err());
}
}
#[test]
fn normalization() {
assert_eq!(&maybe_normalizing("con", false).unwrap(), "con_");
assert!(&maybe_normalizing("con", true).is_err());
}
}

View File

@ -68,3 +68,24 @@ fn roundtrip() -> Result<()> {
Ok(())
}
/// Files with an invalid encoding should prevent export, except
/// on Apple platforms where the encoding is transparently changed.
#[test]
#[cfg(not(target_vendor = "apple"))]
fn normalization_check_on_export() -> Result<()> {
let _dir = tempdir()?;
let dir = _dir.path();
let col = collection_with_media(dir, "normalize")?;
let colpkg_name = dir.join("normalize.colpkg");
// manually write a file in the wrong encoding.
std::fs::write(col.media_folder.join("ぱぱ.jpg"), "nfd encoding")?;
assert_eq!(
col.export_colpkg(&colpkg_name, true, false, |_| ())
.unwrap_err(),
AnkiError::MediaCheckRequired
);
Ok(())
}

View File

@ -39,6 +39,10 @@ impl Meta {
self.is_legacy()
}
pub(super) fn strict_media_checks(&self) -> bool {
!self.is_legacy()
}
fn is_legacy(&self) -> bool {
matches!(self.version(), Version::Legacy1 | Version::Legacy2)
}

View File

@ -133,7 +133,7 @@ pub(crate) fn normalize_nfc_filename(mut fname: Cow<str>) -> Cow<str> {
/// but can be accessed as NFC. On these devices, if the filename
/// is otherwise valid, the filename is returned as NFC.
#[allow(clippy::collapsible_else_if)]
pub(super) fn filename_if_normalized(fname: &str) -> Option<Cow<str>> {
pub(crate) fn filename_if_normalized(fname: &str) -> Option<Cow<str>> {
if cfg!(target_vendor = "apple") {
if !is_nfc(fname) {
let as_nfc = fname.chars().nfc().collect::<String>();