define standard encoding for unicode (#893)
- always store media references in fields in NFC form - always encode filenames on disk in NFC form on machines other than macs - use relevant encoding when placing files in the media folder during syncs and apkg imports as well - rename 'unused media' back to 'check media' - check media can now automatically change media references and filename encodings to the correct format
This commit is contained in:
parent
4d42282b7b
commit
0d1d8c5bf9
@ -3,6 +3,7 @@
|
||||
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
import os
|
||||
import unicodedata
|
||||
from anki import Collection
|
||||
from anki.utils import intTime, splitFields, joinFields, incGuid
|
||||
from anki.importing.base import Importer
|
||||
@ -349,7 +350,8 @@ insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)""", revlog)
|
||||
return self._mediaData(fname, self.dst.media.dir())
|
||||
|
||||
def _writeDstMedia(self, fname, data):
|
||||
path = os.path.join(self.dst.media.dir(), fname)
|
||||
path = os.path.join(self.dst.media.dir(),
|
||||
unicodedata.normalize("NFC", fname))
|
||||
try:
|
||||
open(path, "wb").write(data)
|
||||
except (OSError, IOError):
|
||||
|
@ -3,6 +3,7 @@
|
||||
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
import zipfile, os
|
||||
import unicodedata
|
||||
from anki.utils import tmpfile, json
|
||||
from anki.importing.anki2 import Anki2Importer
|
||||
|
||||
@ -26,7 +27,8 @@ class AnkiPackageImporter(Anki2Importer):
|
||||
for file, c in self.nameToNum.items():
|
||||
if not file.startswith("_") and not file.startswith("latex-"):
|
||||
continue
|
||||
path = os.path.join(self.col.media.dir(), file)
|
||||
path = os.path.join(self.col.media.dir(),
|
||||
unicodedata.normalize("NFC", file))
|
||||
if not os.path.exists(path):
|
||||
open(path, "wb").write(z.read(c))
|
||||
|
||||
|
@ -83,6 +83,7 @@ class MediaManager(object):
|
||||
|
||||
# Adding media
|
||||
##########################################################################
|
||||
# opath must be in unicode
|
||||
|
||||
def addFile(self, opath):
|
||||
return self.writeData(opath, open(opath, "rb").read())
|
||||
@ -90,6 +91,9 @@ class MediaManager(object):
|
||||
def writeData(self, opath, data):
|
||||
# if fname is a full path, use only the basename
|
||||
fname = os.path.basename(opath)
|
||||
# make sure we write it in NFC form (on mac will autoconvert to NFD),
|
||||
# and return an NFC-encoded reference
|
||||
fname = unicodedata.normalize("NFC", fname)
|
||||
# remove any dangerous characters
|
||||
base = self.stripIllegal(fname)
|
||||
(root, ext) = os.path.splitext(base)
|
||||
@ -186,15 +190,19 @@ class MediaManager(object):
|
||||
def check(self, local=None):
|
||||
"Return (missingFiles, unusedFiles)."
|
||||
mdir = self.dir()
|
||||
# generate card q/a and look through all references
|
||||
normrefs = {}
|
||||
def norm(s):
|
||||
if isinstance(s, unicode) and isMac:
|
||||
return unicodedata.normalize('NFD', s)
|
||||
return s
|
||||
for f in self.allMedia():
|
||||
normrefs[norm(f)] = True
|
||||
# loop through directory and find unused & missing media
|
||||
# gather all media references in NFC form
|
||||
allRefs = set()
|
||||
for nid, mid, flds in self.col.db.execute("select id, mid, flds from notes"):
|
||||
noteRefs = self.filesInStr(mid, flds)
|
||||
# check the refs are in NFC
|
||||
for f in noteRefs:
|
||||
# if they're not, we'll need to fix them first
|
||||
if f != unicodedata.normalize("NFC", f):
|
||||
self._normalizeNoteRefs(nid)
|
||||
noteRefs = self.filesInStr(mid, flds)
|
||||
break
|
||||
allRefs.update(noteRefs)
|
||||
# loop through media folder
|
||||
unused = []
|
||||
if local is None:
|
||||
files = os.listdir(mdir)
|
||||
@ -202,28 +210,38 @@ class MediaManager(object):
|
||||
files = local
|
||||
for file in files:
|
||||
if not local:
|
||||
path = os.path.join(mdir, file)
|
||||
if not os.path.isfile(path):
|
||||
if not os.path.isfile(file):
|
||||
# ignore directories
|
||||
continue
|
||||
if file.startswith("_"):
|
||||
# leading _ says to ignore file
|
||||
continue
|
||||
nfile = norm(file)
|
||||
if nfile not in normrefs:
|
||||
if file.startswith("_"):
|
||||
# leading _ says to ignore file
|
||||
continue
|
||||
nfcFile = unicodedata.normalize("NFC", file)
|
||||
# we enforce NFC fs encoding on non-macs; on macs we'll have gotten
|
||||
# NFD so we use the above variable for comparing references
|
||||
if not isMac:
|
||||
if file != nfcFile:
|
||||
# delete if we already have the NFC form, otherwise rename
|
||||
if os.path.exists(nfcFile):
|
||||
os.unlink(file)
|
||||
else:
|
||||
os.rename(file, nfcFile)
|
||||
file = nfcFile
|
||||
# compare
|
||||
if nfcFile not in allRefs:
|
||||
unused.append(file)
|
||||
else:
|
||||
del normrefs[nfile]
|
||||
nohave = [x for x in normrefs.keys() if not x.startswith("_")]
|
||||
allRefs.discard(nfcFile)
|
||||
nohave = [x for x in allRefs if not x.startswith("_")]
|
||||
return (nohave, unused)
|
||||
|
||||
def allMedia(self):
|
||||
"Return a set of all referenced filenames."
|
||||
files = set()
|
||||
for mid, flds in self.col.db.execute("select mid, flds from notes"):
|
||||
for f in self.filesInStr(mid, flds):
|
||||
files.add(f)
|
||||
return files
|
||||
def _normalizeNoteRefs(self, nid):
|
||||
note = self.col.getNote(nid)
|
||||
for c, fld in enumerate(note.fields):
|
||||
nfc = unicodedata.normalize("NFC", fld)
|
||||
if nfc != fld:
|
||||
note.fields[c] = nfc
|
||||
note.flush()
|
||||
|
||||
# Copying on import
|
||||
##########################################################################
|
||||
@ -276,6 +294,11 @@ class MediaManager(object):
|
||||
data = z.read(i)
|
||||
csum = checksum(data)
|
||||
name = meta[i.filename]
|
||||
# normalize name for platform
|
||||
if isMac:
|
||||
name = unicodedata.normalize("NFD", name)
|
||||
else:
|
||||
name = unicodedata.normalize("NFC", name)
|
||||
# save file
|
||||
open(name, "wb").write(data)
|
||||
# update db
|
||||
@ -327,6 +350,8 @@ class MediaManager(object):
|
||||
z.writestr("_finished", "")
|
||||
break
|
||||
fname = fname[0]
|
||||
# we add it as a one-element array simply to make
|
||||
# the later forgetAdded() call easier
|
||||
fnames.append([fname])
|
||||
z.write(fname, str(cnt))
|
||||
files[str(cnt)] = fname
|
||||
|
@ -151,7 +151,7 @@
|
||||
</action>
|
||||
<action name="actionCheckMediaDatabase">
|
||||
<property name="text">
|
||||
<string>&Unused Media...</string>
|
||||
<string>Check &Media...</string>
|
||||
</property>
|
||||
<property name="statusTip">
|
||||
<string>Check the files in the media directory</string>
|
||||
|
@ -18,6 +18,6 @@ def getUpgradeDeckPath(name="anki12.anki"):
|
||||
src = os.path.join(testDir, "support", name)
|
||||
(fd, dst) = tempfile.mkstemp(suffix=".anki2")
|
||||
shutil.copy(src, dst)
|
||||
return dst
|
||||
return unicode(dst, "utf8")
|
||||
|
||||
testDir = os.path.dirname(__file__)
|
||||
|
@ -7,7 +7,7 @@ from shared import getEmptyDeck, testDir
|
||||
def test_add():
|
||||
d = getEmptyDeck()
|
||||
dir = tempfile.mkdtemp(prefix="anki")
|
||||
path = os.path.join(dir, "foo.jpg")
|
||||
path = os.path.join(dir, u"foo.jpg")
|
||||
open(path, "w").write("hello")
|
||||
# new file, should preserve name
|
||||
assert d.media.addFile(path) == "foo.jpg"
|
||||
@ -72,7 +72,7 @@ def test_changes():
|
||||
assert not list(d.media.removed())
|
||||
# add a file
|
||||
dir = tempfile.mkdtemp(prefix="anki")
|
||||
path = os.path.join(dir, "foo.jpg")
|
||||
path = os.path.join(dir, u"foo.jpg")
|
||||
open(path, "w").write("hello")
|
||||
time.sleep(1)
|
||||
path = d.media.addFile(path)
|
||||
|
Loading…
Reference in New Issue
Block a user