Refactor MediaIter
Removes the dependency on a local path, and allows the unicode checks to be skipped if appropriate.
This commit is contained in:
parent
ce35ba123b
commit
d5772ac43a
@ -8,7 +8,7 @@ use std::path::PathBuf;
|
|||||||
use crate::collection::CollectionBuilder;
|
use crate::collection::CollectionBuilder;
|
||||||
use crate::import_export::gather::ExchangeData;
|
use crate::import_export::gather::ExchangeData;
|
||||||
use crate::import_export::package::colpkg::export::export_collection;
|
use crate::import_export::package::colpkg::export::export_collection;
|
||||||
use crate::import_export::package::colpkg::export::MediaIter;
|
use crate::import_export::package::media::MediaIter;
|
||||||
use crate::import_export::package::Meta;
|
use crate::import_export::package::Meta;
|
||||||
use crate::import_export::ExportProgress;
|
use crate::import_export::ExportProgress;
|
||||||
use crate::import_export::IncrementableProgress;
|
use crate::import_export::IncrementableProgress;
|
||||||
|
@ -10,8 +10,8 @@ use zip::ZipArchive;
|
|||||||
use super::Context;
|
use super::Context;
|
||||||
use crate::error::FileIoSnafu;
|
use crate::error::FileIoSnafu;
|
||||||
use crate::error::FileOp;
|
use crate::error::FileOp;
|
||||||
use crate::import_export::package::colpkg::export::MediaCopier;
|
|
||||||
use crate::import_export::package::media::extract_media_entries;
|
use crate::import_export::package::media::extract_media_entries;
|
||||||
|
use crate::import_export::package::media::MediaCopier;
|
||||||
use crate::import_export::package::media::SafeMediaEntry;
|
use crate::import_export::package::media::SafeMediaEntry;
|
||||||
use crate::import_export::ImportProgress;
|
use crate::import_export::ImportProgress;
|
||||||
use crate::import_export::IncrementableProgress;
|
use crate::import_export::IncrementableProgress;
|
||||||
|
@ -1,9 +1,7 @@
|
|||||||
// Copyright: Ankitects Pty Ltd and contributors
|
// Copyright: Ankitects Pty Ltd and contributors
|
||||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::ffi::OsStr;
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
@ -12,8 +10,6 @@ use std::path::Path;
|
|||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use prost::Message;
|
use prost::Message;
|
||||||
use sha1::Digest;
|
|
||||||
use sha1::Sha1;
|
|
||||||
use tempfile::NamedTempFile;
|
use tempfile::NamedTempFile;
|
||||||
use zip::write::FileOptions;
|
use zip::write::FileOptions;
|
||||||
use zip::CompressionMethod;
|
use zip::CompressionMethod;
|
||||||
@ -27,14 +23,14 @@ use super::super::MediaEntry;
|
|||||||
use super::super::Meta;
|
use super::super::Meta;
|
||||||
use super::super::Version;
|
use super::super::Version;
|
||||||
use crate::collection::CollectionBuilder;
|
use crate::collection::CollectionBuilder;
|
||||||
|
use crate::import_export::package::media::MediaCopier;
|
||||||
|
use crate::import_export::package::media::MediaIter;
|
||||||
use crate::import_export::ExportProgress;
|
use crate::import_export::ExportProgress;
|
||||||
use crate::import_export::IncrementableProgress;
|
use crate::import_export::IncrementableProgress;
|
||||||
use crate::io::atomic_rename;
|
use crate::io::atomic_rename;
|
||||||
use crate::io::new_tempfile;
|
use crate::io::new_tempfile;
|
||||||
use crate::io::new_tempfile_in_parent_of;
|
use crate::io::new_tempfile_in_parent_of;
|
||||||
use crate::io::open_file;
|
use crate::io::open_file;
|
||||||
use crate::io::read_dir_files;
|
|
||||||
use crate::media::files::filename_if_normalized;
|
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::storage::SchemaVersion;
|
use crate::storage::SchemaVersion;
|
||||||
|
|
||||||
@ -82,36 +78,6 @@ impl Collection {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct MediaIter(Box<dyn Iterator<Item = io::Result<PathBuf>>>);
|
|
||||||
|
|
||||||
impl MediaIter {
|
|
||||||
/// Iterator over all files in the given path, without traversing
|
|
||||||
/// subfolders.
|
|
||||||
pub fn from_folder(path: &Path) -> Result<Self> {
|
|
||||||
Ok(Self(Box::new(
|
|
||||||
read_dir_files(path)?.map(|res| res.map(|entry| entry.path())),
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Iterator over all given files in the given folder.
|
|
||||||
/// Missing files are silently ignored.
|
|
||||||
pub fn from_file_list(
|
|
||||||
list: impl IntoIterator<Item = String> + 'static,
|
|
||||||
folder: PathBuf,
|
|
||||||
) -> Self {
|
|
||||||
Self(Box::new(
|
|
||||||
list.into_iter()
|
|
||||||
.map(move |file| folder.join(file))
|
|
||||||
.filter(|path| path.exists())
|
|
||||||
.map(Ok),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn empty() -> Self {
|
|
||||||
Self(Box::new(std::iter::empty()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn export_collection_file(
|
fn export_collection_file(
|
||||||
out_path: impl AsRef<Path>,
|
out_path: impl AsRef<Path>,
|
||||||
col_path: impl AsRef<Path>,
|
col_path: impl AsRef<Path>,
|
||||||
@ -298,88 +264,24 @@ fn write_media_files(
|
|||||||
let mut incrementor = progress.incrementor(ExportProgress::Media);
|
let mut incrementor = progress.incrementor(ExportProgress::Media);
|
||||||
for (index, res) in media.0.enumerate() {
|
for (index, res) in media.0.enumerate() {
|
||||||
incrementor.increment()?;
|
incrementor.increment()?;
|
||||||
let path = res?;
|
let mut entry = res?;
|
||||||
|
|
||||||
zip.start_file(index.to_string(), file_options_stored())?;
|
zip.start_file(index.to_string(), file_options_stored())?;
|
||||||
|
|
||||||
let mut file = open_file(&path)?;
|
let (size, sha1) = copier.copy(&mut entry.data, zip)?;
|
||||||
let file_name = path.file_name().or_invalid("not a file path")?;
|
media_entries.push(MediaEntry::new(entry.nfc_filename, size, sha1));
|
||||||
let name = normalized_unicode_file_name(file_name)?;
|
|
||||||
|
|
||||||
let (size, sha1) = copier.copy(&mut file, zip)?;
|
|
||||||
media_entries.push(MediaEntry::new(name, size, sha1));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn normalized_unicode_file_name(filename: &OsStr) -> Result<String> {
|
pub(crate) enum MaybeEncodedWriter<'a, W: Write> {
|
||||||
let filename = filename.to_str().or_invalid("non-unicode filename")?;
|
|
||||||
filename_if_normalized(filename)
|
|
||||||
.map(Cow::into_owned)
|
|
||||||
.ok_or(AnkiError::MediaCheckRequired)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Copies and hashes while optionally encoding.
|
|
||||||
/// If compressing, the encoder is reused to optimize for repeated calls.
|
|
||||||
pub(crate) struct MediaCopier {
|
|
||||||
encoding: bool,
|
|
||||||
encoder: Option<RawEncoder<'static>>,
|
|
||||||
buf: [u8; 64 * 1024],
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MediaCopier {
|
|
||||||
pub(crate) fn new(encoding: bool) -> Self {
|
|
||||||
Self {
|
|
||||||
encoding,
|
|
||||||
encoder: None,
|
|
||||||
buf: [0; 64 * 1024],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn encoder(&mut self) -> Option<RawEncoder<'static>> {
|
|
||||||
self.encoding.then(|| {
|
|
||||||
self.encoder
|
|
||||||
.take()
|
|
||||||
.unwrap_or_else(|| RawEncoder::with_dictionary(0, &[]).unwrap())
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns size and sha1 hash of the copied data.
|
|
||||||
pub(crate) fn copy(
|
|
||||||
&mut self,
|
|
||||||
reader: &mut impl Read,
|
|
||||||
writer: &mut impl Write,
|
|
||||||
) -> Result<(usize, Sha1Hash)> {
|
|
||||||
let mut size = 0;
|
|
||||||
let mut hasher = Sha1::new();
|
|
||||||
self.buf = [0; 64 * 1024];
|
|
||||||
let mut wrapped_writer = MaybeEncodedWriter::new(writer, self.encoder());
|
|
||||||
|
|
||||||
loop {
|
|
||||||
let count = match reader.read(&mut self.buf) {
|
|
||||||
Ok(0) => break,
|
|
||||||
Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
|
|
||||||
result => result?,
|
|
||||||
};
|
|
||||||
size += count;
|
|
||||||
hasher.update(&self.buf[..count]);
|
|
||||||
wrapped_writer.write(&self.buf[..count])?;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.encoder = wrapped_writer.finish()?;
|
|
||||||
|
|
||||||
Ok((size, hasher.finalize().into()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
enum MaybeEncodedWriter<'a, W: Write> {
|
|
||||||
Stored(&'a mut W),
|
Stored(&'a mut W),
|
||||||
Encoded(zio::Writer<&'a mut W, RawEncoder<'static>>),
|
Encoded(zio::Writer<&'a mut W, RawEncoder<'static>>),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, W: Write> MaybeEncodedWriter<'a, W> {
|
impl<'a, W: Write> MaybeEncodedWriter<'a, W> {
|
||||||
fn new(writer: &'a mut W, encoder: Option<RawEncoder<'static>>) -> Self {
|
pub fn new(writer: &'a mut W, encoder: Option<RawEncoder<'static>>) -> Self {
|
||||||
if let Some(encoder) = encoder {
|
if let Some(encoder) = encoder {
|
||||||
Self::Encoded(zio::Writer::new(writer, encoder))
|
Self::Encoded(zio::Writer::new(writer, encoder))
|
||||||
} else {
|
} else {
|
||||||
@ -387,7 +289,7 @@ impl<'a, W: Write> MaybeEncodedWriter<'a, W> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write(&mut self, buf: &[u8]) -> Result<()> {
|
pub fn write(&mut self, buf: &[u8]) -> Result<()> {
|
||||||
match self {
|
match self {
|
||||||
Self::Stored(writer) => writer.write_all(buf)?,
|
Self::Stored(writer) => writer.write_all(buf)?,
|
||||||
Self::Encoded(writer) => writer.write_all(buf)?,
|
Self::Encoded(writer) => writer.write_all(buf)?,
|
||||||
@ -395,7 +297,7 @@ impl<'a, W: Write> MaybeEncodedWriter<'a, W> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finish(self) -> Result<Option<RawEncoder<'static>>> {
|
pub fn finish(self) -> Result<Option<RawEncoder<'static>>> {
|
||||||
Ok(match self {
|
Ok(match self {
|
||||||
Self::Stored(_) => None,
|
Self::Stored(_) => None,
|
||||||
Self::Encoded(mut writer) => {
|
Self::Encoded(mut writer) => {
|
||||||
|
@ -3,25 +3,36 @@
|
|||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::ffi::OsString;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
use std::io::Read;
|
||||||
|
use std::io::Write;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use prost::Message;
|
use prost::Message;
|
||||||
|
use sha1::Digest;
|
||||||
|
use sha1::Sha1;
|
||||||
use zip::read::ZipFile;
|
use zip::read::ZipFile;
|
||||||
use zip::ZipArchive;
|
use zip::ZipArchive;
|
||||||
use zstd::stream::copy_decode;
|
use zstd::stream::copy_decode;
|
||||||
|
use zstd::stream::raw::Encoder as RawEncoder;
|
||||||
|
|
||||||
use super::colpkg::export::MediaCopier;
|
|
||||||
use super::MediaEntries;
|
use super::MediaEntries;
|
||||||
use super::MediaEntry;
|
use super::MediaEntry;
|
||||||
use super::Meta;
|
use super::Meta;
|
||||||
|
use crate::error::FileIoError;
|
||||||
|
use crate::error::FileOp;
|
||||||
use crate::error::ImportError;
|
use crate::error::ImportError;
|
||||||
|
use crate::error::InvalidInputError;
|
||||||
|
use crate::import_export::package::colpkg::export::MaybeEncodedWriter;
|
||||||
use crate::io::atomic_rename;
|
use crate::io::atomic_rename;
|
||||||
use crate::io::filename_is_safe;
|
use crate::io::filename_is_safe;
|
||||||
use crate::io::new_tempfile_in;
|
use crate::io::new_tempfile_in;
|
||||||
|
use crate::io::read_dir_files;
|
||||||
|
use crate::media::files::filename_if_normalized;
|
||||||
use crate::media::files::normalize_filename;
|
use crate::media::files::normalize_filename;
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
|
|
||||||
@ -171,6 +182,163 @@ impl MediaEntries {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct MediaIterEntry {
|
||||||
|
pub nfc_filename: String,
|
||||||
|
pub data: Box<dyn Read>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum MediaIterError {
|
||||||
|
InvalidFilename {
|
||||||
|
filename: OsString,
|
||||||
|
},
|
||||||
|
IoError {
|
||||||
|
filename: String,
|
||||||
|
source: io::Error,
|
||||||
|
},
|
||||||
|
Other {
|
||||||
|
source: Box<dyn std::error::Error + Send + Sync>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<&Path> for MediaIterEntry {
|
||||||
|
type Error = MediaIterError;
|
||||||
|
|
||||||
|
fn try_from(value: &Path) -> std::result::Result<Self, Self::Error> {
|
||||||
|
let nfc_filename: String = value
|
||||||
|
.file_name()
|
||||||
|
.and_then(|s| s.to_str())
|
||||||
|
.and_then(filename_if_normalized)
|
||||||
|
.ok_or_else(|| MediaIterError::InvalidFilename {
|
||||||
|
filename: value.as_os_str().to_owned(),
|
||||||
|
})?
|
||||||
|
.into();
|
||||||
|
let file = File::open(value).map_err(|err| MediaIterError::IoError {
|
||||||
|
filename: nfc_filename.clone(),
|
||||||
|
source: err,
|
||||||
|
})?;
|
||||||
|
Ok(MediaIterEntry {
|
||||||
|
nfc_filename,
|
||||||
|
data: Box::new(file) as _,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<MediaIterError> for AnkiError {
|
||||||
|
fn from(err: MediaIterError) -> Self {
|
||||||
|
match err {
|
||||||
|
MediaIterError::InvalidFilename { .. } => AnkiError::MediaCheckRequired,
|
||||||
|
MediaIterError::IoError { filename, source } => FileIoError {
|
||||||
|
path: filename.into(),
|
||||||
|
op: FileOp::Read,
|
||||||
|
source,
|
||||||
|
}
|
||||||
|
.into(),
|
||||||
|
MediaIterError::Other { source } => InvalidInputError {
|
||||||
|
message: "".to_string(),
|
||||||
|
source: Some(source),
|
||||||
|
backtrace: None,
|
||||||
|
}
|
||||||
|
.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct MediaIter(pub Box<dyn Iterator<Item = Result<MediaIterEntry, MediaIterError>>>);
|
||||||
|
|
||||||
|
impl MediaIter {
|
||||||
|
pub fn new<I>(iter: I) -> Self
|
||||||
|
where
|
||||||
|
I: Iterator<Item = Result<MediaIterEntry, MediaIterError>> + 'static,
|
||||||
|
{
|
||||||
|
Self(Box::new(iter))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterator over all files in the given path, without traversing
|
||||||
|
/// subfolders.
|
||||||
|
pub fn from_folder(path: &Path) -> Result<Self> {
|
||||||
|
let path2 = path.to_owned();
|
||||||
|
Ok(Self::new(read_dir_files(path)?.map(move |res| match res {
|
||||||
|
Ok(entry) => MediaIterEntry::try_from(entry.path().as_path()),
|
||||||
|
Err(err) => Err(MediaIterError::IoError {
|
||||||
|
filename: path2.to_string_lossy().into(),
|
||||||
|
source: err,
|
||||||
|
}),
|
||||||
|
})))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterator over all given files in the given folder.
|
||||||
|
/// Missing files are silently ignored.
|
||||||
|
pub fn from_file_list(
|
||||||
|
list: impl IntoIterator<Item = String> + 'static,
|
||||||
|
folder: PathBuf,
|
||||||
|
) -> Self {
|
||||||
|
Self::new(
|
||||||
|
list.into_iter()
|
||||||
|
.map(move |file| folder.join(file))
|
||||||
|
.filter(|path| path.exists())
|
||||||
|
.map(|path| MediaIterEntry::try_from(path.as_path())),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn empty() -> Self {
|
||||||
|
Self::new([].into_iter())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Copies and hashes while optionally encoding.
|
||||||
|
/// If compressing, the encoder is reused to optimize for repeated calls.
|
||||||
|
pub(crate) struct MediaCopier {
|
||||||
|
encoding: bool,
|
||||||
|
encoder: Option<RawEncoder<'static>>,
|
||||||
|
buf: [u8; 64 * 1024],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MediaCopier {
|
||||||
|
pub(crate) fn new(encoding: bool) -> Self {
|
||||||
|
Self {
|
||||||
|
encoding,
|
||||||
|
encoder: None,
|
||||||
|
buf: [0; 64 * 1024],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encoder(&mut self) -> Option<RawEncoder<'static>> {
|
||||||
|
self.encoding.then(|| {
|
||||||
|
self.encoder
|
||||||
|
.take()
|
||||||
|
.unwrap_or_else(|| RawEncoder::with_dictionary(0, &[]).unwrap())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns size and sha1 hash of the copied data.
|
||||||
|
pub(crate) fn copy(
|
||||||
|
&mut self,
|
||||||
|
reader: &mut impl Read,
|
||||||
|
writer: &mut impl Write,
|
||||||
|
) -> Result<(usize, Sha1Hash)> {
|
||||||
|
let mut size = 0;
|
||||||
|
let mut hasher = Sha1::new();
|
||||||
|
self.buf = [0; 64 * 1024];
|
||||||
|
let mut wrapped_writer = MaybeEncodedWriter::new(writer, self.encoder());
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let count = match reader.read(&mut self.buf) {
|
||||||
|
Ok(0) => break,
|
||||||
|
Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
|
||||||
|
result => result?,
|
||||||
|
};
|
||||||
|
size += count;
|
||||||
|
hasher.update(&self.buf[..count]);
|
||||||
|
wrapped_writer.write(&self.buf[..count])?;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.encoder = wrapped_writer.finish()?;
|
||||||
|
|
||||||
|
Ok((size, hasher.finalize().into()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -9,6 +9,9 @@ mod meta;
|
|||||||
pub(crate) use apkg::NoteMeta;
|
pub(crate) use apkg::NoteMeta;
|
||||||
pub(crate) use colpkg::export::export_colpkg_from_data;
|
pub(crate) use colpkg::export::export_colpkg_from_data;
|
||||||
pub use colpkg::import::import_colpkg;
|
pub use colpkg::import::import_colpkg;
|
||||||
|
pub use media::MediaIter;
|
||||||
|
pub use media::MediaIterEntry;
|
||||||
|
pub use media::MediaIterError;
|
||||||
pub(self) use meta::Meta;
|
pub(self) use meta::Meta;
|
||||||
pub(self) use meta::Version;
|
pub(self) use meta::Version;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user