define standard encoding for unicode (#893)

- always store media references in fields in NFC form
- always encode filenames on disk in NFC form on machines other than macs
- use relevant encoding when placing files in the media folder during syncs
  and apkg imports as well
- rename 'unused media' back to 'check media'
- check media can now automatically change media references and filename
  encodings to the correct format
This commit is contained in:
Damien Elmes 2013-09-20 18:06:41 +09:00
parent 4d42282b7b
commit 0d1d8c5bf9
6 changed files with 60 additions and 31 deletions

View File

@ -3,6 +3,7 @@
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import os
import unicodedata
from anki import Collection
from anki.utils import intTime, splitFields, joinFields, incGuid
from anki.importing.base import Importer
@ -349,7 +350,8 @@ insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)""", revlog)
return self._mediaData(fname, self.dst.media.dir())
def _writeDstMedia(self, fname, data):
path = os.path.join(self.dst.media.dir(), fname)
path = os.path.join(self.dst.media.dir(),
unicodedata.normalize("NFC", fname))
try:
open(path, "wb").write(data)
except (OSError, IOError):

View File

@ -3,6 +3,7 @@
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import zipfile, os
import unicodedata
from anki.utils import tmpfile, json
from anki.importing.anki2 import Anki2Importer
@ -26,7 +27,8 @@ class AnkiPackageImporter(Anki2Importer):
for file, c in self.nameToNum.items():
if not file.startswith("_") and not file.startswith("latex-"):
continue
path = os.path.join(self.col.media.dir(), file)
path = os.path.join(self.col.media.dir(),
unicodedata.normalize("NFC", file))
if not os.path.exists(path):
open(path, "wb").write(z.read(c))

View File

@ -83,6 +83,7 @@ class MediaManager(object):
# Adding media
##########################################################################
# opath must be in unicode
def addFile(self, opath):
return self.writeData(opath, open(opath, "rb").read())
@ -90,6 +91,9 @@ class MediaManager(object):
def writeData(self, opath, data):
# if fname is a full path, use only the basename
fname = os.path.basename(opath)
# make sure we write it in NFC form (on mac will autoconvert to NFD),
# and return an NFC-encoded reference
fname = unicodedata.normalize("NFC", fname)
# remove any dangerous characters
base = self.stripIllegal(fname)
(root, ext) = os.path.splitext(base)
@ -186,15 +190,19 @@ class MediaManager(object):
def check(self, local=None):
"Return (missingFiles, unusedFiles)."
mdir = self.dir()
# generate card q/a and look through all references
normrefs = {}
def norm(s):
if isinstance(s, unicode) and isMac:
return unicodedata.normalize('NFD', s)
return s
for f in self.allMedia():
normrefs[norm(f)] = True
# loop through directory and find unused & missing media
# gather all media references in NFC form
allRefs = set()
for nid, mid, flds in self.col.db.execute("select id, mid, flds from notes"):
noteRefs = self.filesInStr(mid, flds)
# check the refs are in NFC
for f in noteRefs:
# if they're not, we'll need to fix them first
if f != unicodedata.normalize("NFC", f):
self._normalizeNoteRefs(nid)
noteRefs = self.filesInStr(mid, flds)
break
allRefs.update(noteRefs)
# loop through media folder
unused = []
if local is None:
files = os.listdir(mdir)
@ -202,28 +210,38 @@ class MediaManager(object):
files = local
for file in files:
if not local:
path = os.path.join(mdir, file)
if not os.path.isfile(path):
if not os.path.isfile(file):
# ignore directories
continue
if file.startswith("_"):
# leading _ says to ignore file
continue
nfile = norm(file)
if nfile not in normrefs:
if file.startswith("_"):
# leading _ says to ignore file
continue
nfcFile = unicodedata.normalize("NFC", file)
# we enforce NFC fs encoding on non-macs; on macs we'll have gotten
# NFD so we use the above variable for comparing references
if not isMac:
if file != nfcFile:
# delete if we already have the NFC form, otherwise rename
if os.path.exists(nfcFile):
os.unlink(file)
else:
os.rename(file, nfcFile)
file = nfcFile
# compare
if nfcFile not in allRefs:
unused.append(file)
else:
del normrefs[nfile]
nohave = [x for x in normrefs.keys() if not x.startswith("_")]
allRefs.discard(nfcFile)
nohave = [x for x in allRefs if not x.startswith("_")]
return (nohave, unused)
def allMedia(self):
"Return a set of all referenced filenames."
files = set()
for mid, flds in self.col.db.execute("select mid, flds from notes"):
for f in self.filesInStr(mid, flds):
files.add(f)
return files
def _normalizeNoteRefs(self, nid):
note = self.col.getNote(nid)
for c, fld in enumerate(note.fields):
nfc = unicodedata.normalize("NFC", fld)
if nfc != fld:
note.fields[c] = nfc
note.flush()
# Copying on import
##########################################################################
@ -276,6 +294,11 @@ class MediaManager(object):
data = z.read(i)
csum = checksum(data)
name = meta[i.filename]
# normalize name for platform
if isMac:
name = unicodedata.normalize("NFD", name)
else:
name = unicodedata.normalize("NFC", name)
# save file
open(name, "wb").write(data)
# update db
@ -327,6 +350,8 @@ class MediaManager(object):
z.writestr("_finished", "")
break
fname = fname[0]
# we add it as a one-element array simply to make
# the later forgetAdded() call easier
fnames.append([fname])
z.write(fname, str(cnt))
files[str(cnt)] = fname

View File

@ -151,7 +151,7 @@
</action>
<action name="actionCheckMediaDatabase">
<property name="text">
<string>&amp;Unused Media...</string>
<string>Check &amp;Media...</string>
</property>
<property name="statusTip">
<string>Check the files in the media directory</string>

View File

@ -18,6 +18,6 @@ def getUpgradeDeckPath(name="anki12.anki"):
src = os.path.join(testDir, "support", name)
(fd, dst) = tempfile.mkstemp(suffix=".anki2")
shutil.copy(src, dst)
return dst
return unicode(dst, "utf8")
testDir = os.path.dirname(__file__)

View File

@ -7,7 +7,7 @@ from shared import getEmptyDeck, testDir
def test_add():
d = getEmptyDeck()
dir = tempfile.mkdtemp(prefix="anki")
path = os.path.join(dir, "foo.jpg")
path = os.path.join(dir, u"foo.jpg")
open(path, "w").write("hello")
# new file, should preserve name
assert d.media.addFile(path) == "foo.jpg"
@ -72,7 +72,7 @@ def test_changes():
assert not list(d.media.removed())
# add a file
dir = tempfile.mkdtemp(prefix="anki")
path = os.path.join(dir, "foo.jpg")
path = os.path.join(dir, u"foo.jpg")
open(path, "w").write("hello")
time.sleep(1)
path = d.media.addFile(path)