Expand normalization checks on import/export
The old Python code was only checking for NFC encoding, but we should check for other issues like special filenames on windows (eg con.mp3) - On export, the user is told to use Check Media if their media has invalid filenames. - On import, legacy packages will be transparently normalized. Since we're doing the checks on export as well, any invalid names in a v3 package are an error.
This commit is contained in:
parent
57a4495d92
commit
4687620f5e
@ -9,6 +9,7 @@ errors-100-tags-max =
|
||||
is no need to select child tags if you have selected a parent tag.
|
||||
errors-multiple-notetypes-selected = Please select notes from only one notetype.
|
||||
errors-please-check-database = Please use the Check Database action, then try again.
|
||||
errors-please-check-media = Please use the Check Media action, then try again.
|
||||
errors-collection-too-new = This collection requires a newer version of Anki to open.
|
||||
|
||||
## Card Rendering
|
||||
|
@ -36,6 +36,7 @@ impl AnkiError {
|
||||
AnkiError::CustomStudyError(_) => Kind::CustomStudyError,
|
||||
AnkiError::ImportError(_) => Kind::ImportError,
|
||||
AnkiError::FileIoError(_) => Kind::IoError,
|
||||
AnkiError::MediaCheckRequired => Kind::InvalidInput,
|
||||
};
|
||||
|
||||
pb::BackendError {
|
||||
|
@ -42,6 +42,7 @@ pub enum AnkiError {
|
||||
UndoEmpty,
|
||||
MultipleNotetypesSelected,
|
||||
DatabaseCheckRequired,
|
||||
MediaCheckRequired,
|
||||
CustomStudyError(CustomStudyError),
|
||||
ImportError(ImportError),
|
||||
}
|
||||
@ -97,6 +98,7 @@ impl AnkiError {
|
||||
AnkiError::InvalidRegex(err) => format!("<pre>{}</pre>", err),
|
||||
AnkiError::MultipleNotetypesSelected => tr.errors_multiple_notetypes_selected().into(),
|
||||
AnkiError::DatabaseCheckRequired => tr.errors_please_check_database().into(),
|
||||
AnkiError::MediaCheckRequired => tr.errors_please_check_media().into(),
|
||||
AnkiError::CustomStudyError(err) => err.localized_description(tr),
|
||||
AnkiError::ImportError(err) => err.localized_description(tr),
|
||||
AnkiError::IoError(_)
|
||||
|
@ -18,7 +18,9 @@ use zstd::{
|
||||
|
||||
use super::super::{MediaEntries, MediaEntry, Meta, Version};
|
||||
use crate::{
|
||||
collection::CollectionBuilder, media::files::sha1_of_data, prelude::*, text::normalize_to_nfc,
|
||||
collection::CollectionBuilder,
|
||||
media::files::{filename_if_normalized, sha1_of_data},
|
||||
prelude::*,
|
||||
};
|
||||
|
||||
/// Enable multithreaded compression if over this size. For smaller files,
|
||||
@ -279,16 +281,18 @@ fn make_media_entry(data: &[u8], name: String) -> MediaEntry {
|
||||
}
|
||||
|
||||
fn normalized_unicode_file_name(entry: &DirEntry) -> Result<String> {
|
||||
entry
|
||||
.file_name()
|
||||
.to_str()
|
||||
.map(|name| normalize_to_nfc(name).into())
|
||||
.ok_or_else(|| {
|
||||
AnkiError::IoError(format!(
|
||||
"non-unicode file name: {}",
|
||||
entry.file_name().to_string_lossy()
|
||||
))
|
||||
})
|
||||
let filename = entry.file_name();
|
||||
let filename = filename.to_str().ok_or_else(|| {
|
||||
AnkiError::IoError(format!(
|
||||
"non-unicode file name: {}",
|
||||
entry.file_name().to_string_lossy()
|
||||
))
|
||||
})?;
|
||||
if let Some(filename) = filename_if_normalized(filename) {
|
||||
Ok(filename.into_owned())
|
||||
} else {
|
||||
Err(AnkiError::MediaCheckRequired)
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes media files while compressing according to the targeted version.
|
||||
|
@ -2,6 +2,7 @@
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::HashMap,
|
||||
fs::{self, File},
|
||||
io::{self, Read, Write},
|
||||
@ -21,8 +22,8 @@ use crate::{
|
||||
package::{MediaEntries, MediaEntry, Meta},
|
||||
ImportProgress,
|
||||
},
|
||||
media::files::normalize_filename,
|
||||
prelude::*,
|
||||
text::normalize_to_nfc,
|
||||
};
|
||||
|
||||
impl Meta {
|
||||
@ -119,8 +120,8 @@ fn restore_media(
|
||||
|
||||
if let Ok(mut zip_file) = archive.by_name(&archive_file_name.to_string()) {
|
||||
check_filename_safe(&entry.name)?;
|
||||
let nfc_name = normalize_to_nfc(&entry.name);
|
||||
let file_path = media_folder.join(nfc_name.as_ref());
|
||||
let normalized = maybe_normalizing(&entry.name, meta.strict_media_checks())?;
|
||||
let file_path = media_folder.join(normalized.as_ref());
|
||||
let size_in_colpkg = if meta.media_list_is_hashmap() {
|
||||
zip_file.size()
|
||||
} else {
|
||||
@ -151,6 +152,18 @@ fn restore_media(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// - If strict is true, return an error if not normalized.
|
||||
/// - If false, return the normalized version.
|
||||
fn maybe_normalizing(name: &str, strict: bool) -> Result<Cow<str>> {
|
||||
let normalized = normalize_filename(name);
|
||||
if strict && matches!(normalized, Cow::Owned(_)) {
|
||||
// exporting code should have checked this
|
||||
Err(AnkiError::ImportError(ImportError::Corrupt))
|
||||
} else {
|
||||
Ok(normalized)
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an error if name contains any path separators.
|
||||
fn check_filename_safe(name: &str) -> Result<()> {
|
||||
let mut components = Path::new(name).components();
|
||||
@ -238,4 +251,10 @@ mod test {
|
||||
assert!(check_filename_safe("\\foo").is_err());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalization() {
|
||||
assert_eq!(&maybe_normalizing("con", false).unwrap(), "con_");
|
||||
assert!(&maybe_normalizing("con", true).is_err());
|
||||
}
|
||||
}
|
||||
|
@ -68,3 +68,24 @@ fn roundtrip() -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Files with an invalid encoding should prevent export, except
|
||||
/// on Apple platforms where the encoding is transparently changed.
|
||||
#[test]
|
||||
#[cfg(not(target_vendor = "apple"))]
|
||||
fn normalization_check_on_export() -> Result<()> {
|
||||
let _dir = tempdir()?;
|
||||
let dir = _dir.path();
|
||||
|
||||
let col = collection_with_media(dir, "normalize")?;
|
||||
let colpkg_name = dir.join("normalize.colpkg");
|
||||
// manually write a file in the wrong encoding.
|
||||
std::fs::write(col.media_folder.join("ぱぱ.jpg"), "nfd encoding")?;
|
||||
assert_eq!(
|
||||
col.export_colpkg(&colpkg_name, true, false, |_| ())
|
||||
.unwrap_err(),
|
||||
AnkiError::MediaCheckRequired
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -39,6 +39,10 @@ impl Meta {
|
||||
self.is_legacy()
|
||||
}
|
||||
|
||||
pub(super) fn strict_media_checks(&self) -> bool {
|
||||
!self.is_legacy()
|
||||
}
|
||||
|
||||
fn is_legacy(&self) -> bool {
|
||||
matches!(self.version(), Version::Legacy1 | Version::Legacy2)
|
||||
}
|
||||
|
@ -133,7 +133,7 @@ pub(crate) fn normalize_nfc_filename(mut fname: Cow<str>) -> Cow<str> {
|
||||
/// but can be accessed as NFC. On these devices, if the filename
|
||||
/// is otherwise valid, the filename is returned as NFC.
|
||||
#[allow(clippy::collapsible_else_if)]
|
||||
pub(super) fn filename_if_normalized(fname: &str) -> Option<Cow<str>> {
|
||||
pub(crate) fn filename_if_normalized(fname: &str) -> Option<Cow<str>> {
|
||||
if cfg!(target_vendor = "apple") {
|
||||
if !is_nfc(fname) {
|
||||
let as_nfc = fname.chars().nfc().collect::<String>();
|
||||
|
Loading…
Reference in New Issue
Block a user