Refactor MediaIter
Removes the dependency on a local path, and allows the unicode checks to be skipped if appropriate.
This commit is contained in:
parent
ce35ba123b
commit
d5772ac43a
@ -8,7 +8,7 @@ use std::path::PathBuf;
|
||||
use crate::collection::CollectionBuilder;
|
||||
use crate::import_export::gather::ExchangeData;
|
||||
use crate::import_export::package::colpkg::export::export_collection;
|
||||
use crate::import_export::package::colpkg::export::MediaIter;
|
||||
use crate::import_export::package::media::MediaIter;
|
||||
use crate::import_export::package::Meta;
|
||||
use crate::import_export::ExportProgress;
|
||||
use crate::import_export::IncrementableProgress;
|
||||
|
@ -10,8 +10,8 @@ use zip::ZipArchive;
|
||||
use super::Context;
|
||||
use crate::error::FileIoSnafu;
|
||||
use crate::error::FileOp;
|
||||
use crate::import_export::package::colpkg::export::MediaCopier;
|
||||
use crate::import_export::package::media::extract_media_entries;
|
||||
use crate::import_export::package::media::MediaCopier;
|
||||
use crate::import_export::package::media::SafeMediaEntry;
|
||||
use crate::import_export::ImportProgress;
|
||||
use crate::import_export::IncrementableProgress;
|
||||
|
@ -1,9 +1,7 @@
|
||||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsStr;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::Read;
|
||||
@ -12,8 +10,6 @@ use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use prost::Message;
|
||||
use sha1::Digest;
|
||||
use sha1::Sha1;
|
||||
use tempfile::NamedTempFile;
|
||||
use zip::write::FileOptions;
|
||||
use zip::CompressionMethod;
|
||||
@ -27,14 +23,14 @@ use super::super::MediaEntry;
|
||||
use super::super::Meta;
|
||||
use super::super::Version;
|
||||
use crate::collection::CollectionBuilder;
|
||||
use crate::import_export::package::media::MediaCopier;
|
||||
use crate::import_export::package::media::MediaIter;
|
||||
use crate::import_export::ExportProgress;
|
||||
use crate::import_export::IncrementableProgress;
|
||||
use crate::io::atomic_rename;
|
||||
use crate::io::new_tempfile;
|
||||
use crate::io::new_tempfile_in_parent_of;
|
||||
use crate::io::open_file;
|
||||
use crate::io::read_dir_files;
|
||||
use crate::media::files::filename_if_normalized;
|
||||
use crate::prelude::*;
|
||||
use crate::storage::SchemaVersion;
|
||||
|
||||
@ -82,36 +78,6 @@ impl Collection {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MediaIter(Box<dyn Iterator<Item = io::Result<PathBuf>>>);
|
||||
|
||||
impl MediaIter {
|
||||
/// Iterator over all files in the given path, without traversing
|
||||
/// subfolders.
|
||||
pub fn from_folder(path: &Path) -> Result<Self> {
|
||||
Ok(Self(Box::new(
|
||||
read_dir_files(path)?.map(|res| res.map(|entry| entry.path())),
|
||||
)))
|
||||
}
|
||||
|
||||
/// Iterator over all given files in the given folder.
|
||||
/// Missing files are silently ignored.
|
||||
pub fn from_file_list(
|
||||
list: impl IntoIterator<Item = String> + 'static,
|
||||
folder: PathBuf,
|
||||
) -> Self {
|
||||
Self(Box::new(
|
||||
list.into_iter()
|
||||
.map(move |file| folder.join(file))
|
||||
.filter(|path| path.exists())
|
||||
.map(Ok),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn empty() -> Self {
|
||||
Self(Box::new(std::iter::empty()))
|
||||
}
|
||||
}
|
||||
|
||||
fn export_collection_file(
|
||||
out_path: impl AsRef<Path>,
|
||||
col_path: impl AsRef<Path>,
|
||||
@ -298,88 +264,24 @@ fn write_media_files(
|
||||
let mut incrementor = progress.incrementor(ExportProgress::Media);
|
||||
for (index, res) in media.0.enumerate() {
|
||||
incrementor.increment()?;
|
||||
let path = res?;
|
||||
let mut entry = res?;
|
||||
|
||||
zip.start_file(index.to_string(), file_options_stored())?;
|
||||
|
||||
let mut file = open_file(&path)?;
|
||||
let file_name = path.file_name().or_invalid("not a file path")?;
|
||||
let name = normalized_unicode_file_name(file_name)?;
|
||||
|
||||
let (size, sha1) = copier.copy(&mut file, zip)?;
|
||||
media_entries.push(MediaEntry::new(name, size, sha1));
|
||||
let (size, sha1) = copier.copy(&mut entry.data, zip)?;
|
||||
media_entries.push(MediaEntry::new(entry.nfc_filename, size, sha1));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn normalized_unicode_file_name(filename: &OsStr) -> Result<String> {
|
||||
let filename = filename.to_str().or_invalid("non-unicode filename")?;
|
||||
filename_if_normalized(filename)
|
||||
.map(Cow::into_owned)
|
||||
.ok_or(AnkiError::MediaCheckRequired)
|
||||
}
|
||||
|
||||
/// Copies and hashes while optionally encoding.
|
||||
/// If compressing, the encoder is reused to optimize for repeated calls.
|
||||
pub(crate) struct MediaCopier {
|
||||
encoding: bool,
|
||||
encoder: Option<RawEncoder<'static>>,
|
||||
buf: [u8; 64 * 1024],
|
||||
}
|
||||
|
||||
impl MediaCopier {
|
||||
pub(crate) fn new(encoding: bool) -> Self {
|
||||
Self {
|
||||
encoding,
|
||||
encoder: None,
|
||||
buf: [0; 64 * 1024],
|
||||
}
|
||||
}
|
||||
|
||||
fn encoder(&mut self) -> Option<RawEncoder<'static>> {
|
||||
self.encoding.then(|| {
|
||||
self.encoder
|
||||
.take()
|
||||
.unwrap_or_else(|| RawEncoder::with_dictionary(0, &[]).unwrap())
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns size and sha1 hash of the copied data.
|
||||
pub(crate) fn copy(
|
||||
&mut self,
|
||||
reader: &mut impl Read,
|
||||
writer: &mut impl Write,
|
||||
) -> Result<(usize, Sha1Hash)> {
|
||||
let mut size = 0;
|
||||
let mut hasher = Sha1::new();
|
||||
self.buf = [0; 64 * 1024];
|
||||
let mut wrapped_writer = MaybeEncodedWriter::new(writer, self.encoder());
|
||||
|
||||
loop {
|
||||
let count = match reader.read(&mut self.buf) {
|
||||
Ok(0) => break,
|
||||
Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
|
||||
result => result?,
|
||||
};
|
||||
size += count;
|
||||
hasher.update(&self.buf[..count]);
|
||||
wrapped_writer.write(&self.buf[..count])?;
|
||||
}
|
||||
|
||||
self.encoder = wrapped_writer.finish()?;
|
||||
|
||||
Ok((size, hasher.finalize().into()))
|
||||
}
|
||||
}
|
||||
|
||||
enum MaybeEncodedWriter<'a, W: Write> {
|
||||
pub(crate) enum MaybeEncodedWriter<'a, W: Write> {
|
||||
Stored(&'a mut W),
|
||||
Encoded(zio::Writer<&'a mut W, RawEncoder<'static>>),
|
||||
}
|
||||
|
||||
impl<'a, W: Write> MaybeEncodedWriter<'a, W> {
|
||||
fn new(writer: &'a mut W, encoder: Option<RawEncoder<'static>>) -> Self {
|
||||
pub fn new(writer: &'a mut W, encoder: Option<RawEncoder<'static>>) -> Self {
|
||||
if let Some(encoder) = encoder {
|
||||
Self::Encoded(zio::Writer::new(writer, encoder))
|
||||
} else {
|
||||
@ -387,7 +289,7 @@ impl<'a, W: Write> MaybeEncodedWriter<'a, W> {
|
||||
}
|
||||
}
|
||||
|
||||
fn write(&mut self, buf: &[u8]) -> Result<()> {
|
||||
pub fn write(&mut self, buf: &[u8]) -> Result<()> {
|
||||
match self {
|
||||
Self::Stored(writer) => writer.write_all(buf)?,
|
||||
Self::Encoded(writer) => writer.write_all(buf)?,
|
||||
@ -395,7 +297,7 @@ impl<'a, W: Write> MaybeEncodedWriter<'a, W> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn finish(self) -> Result<Option<RawEncoder<'static>>> {
|
||||
pub fn finish(self) -> Result<Option<RawEncoder<'static>>> {
|
||||
Ok(match self {
|
||||
Self::Stored(_) => None,
|
||||
Self::Encoded(mut writer) => {
|
||||
|
@ -3,25 +3,36 @@
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsString;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use prost::Message;
|
||||
use sha1::Digest;
|
||||
use sha1::Sha1;
|
||||
use zip::read::ZipFile;
|
||||
use zip::ZipArchive;
|
||||
use zstd::stream::copy_decode;
|
||||
use zstd::stream::raw::Encoder as RawEncoder;
|
||||
|
||||
use super::colpkg::export::MediaCopier;
|
||||
use super::MediaEntries;
|
||||
use super::MediaEntry;
|
||||
use super::Meta;
|
||||
use crate::error::FileIoError;
|
||||
use crate::error::FileOp;
|
||||
use crate::error::ImportError;
|
||||
use crate::error::InvalidInputError;
|
||||
use crate::import_export::package::colpkg::export::MaybeEncodedWriter;
|
||||
use crate::io::atomic_rename;
|
||||
use crate::io::filename_is_safe;
|
||||
use crate::io::new_tempfile_in;
|
||||
use crate::io::read_dir_files;
|
||||
use crate::media::files::filename_if_normalized;
|
||||
use crate::media::files::normalize_filename;
|
||||
use crate::prelude::*;
|
||||
|
||||
@ -171,6 +182,163 @@ impl MediaEntries {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MediaIterEntry {
|
||||
pub nfc_filename: String,
|
||||
pub data: Box<dyn Read>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum MediaIterError {
|
||||
InvalidFilename {
|
||||
filename: OsString,
|
||||
},
|
||||
IoError {
|
||||
filename: String,
|
||||
source: io::Error,
|
||||
},
|
||||
Other {
|
||||
source: Box<dyn std::error::Error + Send + Sync>,
|
||||
},
|
||||
}
|
||||
|
||||
impl TryFrom<&Path> for MediaIterEntry {
|
||||
type Error = MediaIterError;
|
||||
|
||||
fn try_from(value: &Path) -> std::result::Result<Self, Self::Error> {
|
||||
let nfc_filename: String = value
|
||||
.file_name()
|
||||
.and_then(|s| s.to_str())
|
||||
.and_then(filename_if_normalized)
|
||||
.ok_or_else(|| MediaIterError::InvalidFilename {
|
||||
filename: value.as_os_str().to_owned(),
|
||||
})?
|
||||
.into();
|
||||
let file = File::open(value).map_err(|err| MediaIterError::IoError {
|
||||
filename: nfc_filename.clone(),
|
||||
source: err,
|
||||
})?;
|
||||
Ok(MediaIterEntry {
|
||||
nfc_filename,
|
||||
data: Box::new(file) as _,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<MediaIterError> for AnkiError {
|
||||
fn from(err: MediaIterError) -> Self {
|
||||
match err {
|
||||
MediaIterError::InvalidFilename { .. } => AnkiError::MediaCheckRequired,
|
||||
MediaIterError::IoError { filename, source } => FileIoError {
|
||||
path: filename.into(),
|
||||
op: FileOp::Read,
|
||||
source,
|
||||
}
|
||||
.into(),
|
||||
MediaIterError::Other { source } => InvalidInputError {
|
||||
message: "".to_string(),
|
||||
source: Some(source),
|
||||
backtrace: None,
|
||||
}
|
||||
.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MediaIter(pub Box<dyn Iterator<Item = Result<MediaIterEntry, MediaIterError>>>);
|
||||
|
||||
impl MediaIter {
|
||||
pub fn new<I>(iter: I) -> Self
|
||||
where
|
||||
I: Iterator<Item = Result<MediaIterEntry, MediaIterError>> + 'static,
|
||||
{
|
||||
Self(Box::new(iter))
|
||||
}
|
||||
|
||||
/// Iterator over all files in the given path, without traversing
|
||||
/// subfolders.
|
||||
pub fn from_folder(path: &Path) -> Result<Self> {
|
||||
let path2 = path.to_owned();
|
||||
Ok(Self::new(read_dir_files(path)?.map(move |res| match res {
|
||||
Ok(entry) => MediaIterEntry::try_from(entry.path().as_path()),
|
||||
Err(err) => Err(MediaIterError::IoError {
|
||||
filename: path2.to_string_lossy().into(),
|
||||
source: err,
|
||||
}),
|
||||
})))
|
||||
}
|
||||
|
||||
/// Iterator over all given files in the given folder.
|
||||
/// Missing files are silently ignored.
|
||||
pub fn from_file_list(
|
||||
list: impl IntoIterator<Item = String> + 'static,
|
||||
folder: PathBuf,
|
||||
) -> Self {
|
||||
Self::new(
|
||||
list.into_iter()
|
||||
.map(move |file| folder.join(file))
|
||||
.filter(|path| path.exists())
|
||||
.map(|path| MediaIterEntry::try_from(path.as_path())),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn empty() -> Self {
|
||||
Self::new([].into_iter())
|
||||
}
|
||||
}
|
||||
|
||||
/// Copies and hashes while optionally encoding.
|
||||
/// If compressing, the encoder is reused to optimize for repeated calls.
|
||||
pub(crate) struct MediaCopier {
|
||||
encoding: bool,
|
||||
encoder: Option<RawEncoder<'static>>,
|
||||
buf: [u8; 64 * 1024],
|
||||
}
|
||||
|
||||
impl MediaCopier {
|
||||
pub(crate) fn new(encoding: bool) -> Self {
|
||||
Self {
|
||||
encoding,
|
||||
encoder: None,
|
||||
buf: [0; 64 * 1024],
|
||||
}
|
||||
}
|
||||
|
||||
fn encoder(&mut self) -> Option<RawEncoder<'static>> {
|
||||
self.encoding.then(|| {
|
||||
self.encoder
|
||||
.take()
|
||||
.unwrap_or_else(|| RawEncoder::with_dictionary(0, &[]).unwrap())
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns size and sha1 hash of the copied data.
|
||||
pub(crate) fn copy(
|
||||
&mut self,
|
||||
reader: &mut impl Read,
|
||||
writer: &mut impl Write,
|
||||
) -> Result<(usize, Sha1Hash)> {
|
||||
let mut size = 0;
|
||||
let mut hasher = Sha1::new();
|
||||
self.buf = [0; 64 * 1024];
|
||||
let mut wrapped_writer = MaybeEncodedWriter::new(writer, self.encoder());
|
||||
|
||||
loop {
|
||||
let count = match reader.read(&mut self.buf) {
|
||||
Ok(0) => break,
|
||||
Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
|
||||
result => result?,
|
||||
};
|
||||
size += count;
|
||||
hasher.update(&self.buf[..count]);
|
||||
wrapped_writer.write(&self.buf[..count])?;
|
||||
}
|
||||
|
||||
self.encoder = wrapped_writer.finish()?;
|
||||
|
||||
Ok((size, hasher.finalize().into()))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
@ -9,6 +9,9 @@ mod meta;
|
||||
pub(crate) use apkg::NoteMeta;
|
||||
pub(crate) use colpkg::export::export_colpkg_from_data;
|
||||
pub use colpkg::import::import_colpkg;
|
||||
pub use media::MediaIter;
|
||||
pub use media::MediaIterEntry;
|
||||
pub use media::MediaIterError;
|
||||
pub(self) use meta::Meta;
|
||||
pub(self) use meta::Version;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user