anki/anki/upgrade.py
2013-05-22 09:46:09 +09:00

740 lines
28 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU AGPL, version 3 or later; http://www.gnu.org/copyleft/agpl.html
import time, re, datetime, shutil
from anki.utils import intTime, tmpfile, ids2str, splitFields, base91, json
from anki.db import DB
from anki.collection import _Collection
from anki.consts import *
from anki.storage import _addSchema, _getColVars, _addColVars, \
_updateIndices
#
# Upgrading is the first step in migrating to 2.0.
# Caller should have called check() on path before calling upgrade().
#
class Upgrader(object):
def __init__(self):
self.tmppath = None
# Integrity checking & initial setup
######################################################################
def check(self, path):
"Returns 'ok', 'invalid', or log of fixes applied."
# copy into a temp file before we open
self.tmppath = tmpfile(suffix=".anki2")
shutil.copy(path, self.tmppath)
# run initial check
with DB(self.tmppath) as db:
res = self._check(db)
# needs fixing?
if res not in ("ok", "invalid"):
res = self._fix(self.tmppath)
# don't allow .upgrade() if invalid
if res == "invalid":
os.unlink(self.tmppath)
self.tmppath = None
return res
def _check(self, db):
# corrupt?
try:
if db.scalar("pragma integrity_check") != "ok":
return "invalid"
except:
return "invalid"
# old version?
if db.scalar("select version from decks") < 65:
return
# ensure we have indices for checks below
db.executescript("""
create index if not exists ix_cards_factId on cards (factId);
create index if not exists ix_fields_factId on fields (factId);
analyze;""")
# fields missing a field model?
if db.list("""
select id from fields where fieldModelId not in (
select distinct id from fieldModels)"""):
return
# facts missing a field?
if db.list("""
select distinct facts.id from facts, fieldModels where
facts.modelId = fieldModels.modelId and fieldModels.id not in
(select fieldModelId from fields where factId = facts.id)"""):
return
# cards missing a fact?
if db.list("""
select id from cards where factId not in (select id from facts)"""):
return
# cards missing a card model?
if db.list("""
select id from cards where cardModelId not in
(select id from cardModels)"""):
return
# cards with a card model from the wrong model?
if db.list("""
select id from cards where cardModelId not in (select cm.id from
cardModels cm, facts f where cm.modelId = f.modelId and
f.id = cards.factId)"""):
return
# facts missing a card?
if db.list("""
select facts.id from facts
where facts.id not in (select distinct factId from cards)"""):
return
# dangling fields?
if db.list("""
select id from fields where factId not in (select id from facts)"""):
return
# incorrect types
if db.list("""
select id from cards where relativeDelay != (case
when successive then 1 when reps then 0 else 2 end)"""):
return
if db.list("""
select id from cards where type != (case
when type >= 0 then relativeDelay else relativeDelay - 3 end)"""):
return
return "ok"
def _fix(self, path):
from oldanki import DeckStorage
try:
deck = DeckStorage.Deck(path, backup=False)
except:
# if we can't open the file, it's invalid
return "invalid"
# run a db check
res = deck.fixIntegrity()
if "Database file is damaged" in res:
# we can't recover from a corrupt db
return "invalid"
# other errors are non-fatal
deck.close()
return res
# Upgrading
######################################################################
def upgrade(self):
assert self.tmppath
self.db = DB(self.tmppath)
self._upgradeSchema()
self.col = _Collection(self.db)
self._upgradeRest()
self.tmppath = None
return self.col
# Schema upgrade
######################################################################
def _upgradeSchema(self):
"Alter tables prior to ORM initialization."
db = self.db
# speed up the upgrade
db.execute("pragma temp_store = memory")
db.execute("pragma cache_size = 10000")
db.execute("pragma synchronous = off")
# these weren't always correctly set
db.execute("pragma page_size = 4096")
db.execute("pragma legacy_file_format = 0")
for mid in db.list("select id from models"):
# ensure the ordinals are correct for each cardModel
for c, cmid in enumerate(db.list(
"select id from cardModels where modelId = ? order by ordinal",
mid)):
db.execute("update cardModels set ordinal = ? where id = ?",
c, cmid)
# and fieldModel
for c, fmid in enumerate(db.list(
"select id from fieldModels where modelId = ? order by ordinal",
mid)):
db.execute("update fieldModels set ordinal = ? where id = ?",
c, fmid)
# then fix ordinals numbers on cards & fields
db.execute("""update cards set ordinal = (select ordinal from
cardModels where cardModels.id = cardModelId)""")
db.execute("""update fields set ordinal = (select ordinal from
fieldModels where id = fieldModelId)""")
# notes
###########
# tags should have a leading and trailing space if not empty, and not
# use commas
db.execute("""
update facts set tags = (case
when trim(tags) == "" then ""
else " " || replace(replace(trim(tags), ",", " "), " ", " ") || " "
end)
""")
# pull facts into memory, so we can merge them with fields efficiently
facts = db.all("""
select id, id, modelId, cast(created*1000 as int), cast(modified as int),
0, tags from facts order by created""")
# build field hash
fields = {}
for (fid, ord, val) in db.execute(
"select factId, ordinal, value from fields order by factId, ordinal"):
if fid not in fields:
fields[fid] = []
val = self._mungeField(val)
fields[fid].append((ord, val))
# build insert data and transform ids, and minimize qt's
# bold/italics/underline cruft.
map = {}
data = []
factidmap = {}
from anki.utils import minimizeHTML
highest = 0
for c, row in enumerate(facts):
oldid = row[0]
row = list(row)
if row[3] <= highest:
highest = max(highest, row[3]) + 1
row[3] = highest
else:
highest = row[3]
factidmap[row[0]] = row[3]
row[0] = row[3]
del row[3]
map[oldid] = row[0]
# convert old 64bit id into a string, discarding sign bit
row[1] = base91(abs(row[1]))
row.append(minimizeHTML("\x1f".join([x[1] for x in sorted(fields[oldid])])))
data.append(row)
# and put the facts into the new table
db.execute("drop table facts")
_addSchema(db, False)
db.executemany("insert into notes values (?,?,?,?,?,?,?,'','',0,'')", data)
db.execute("drop table fields")
# cards
###########
# we need to pull this into memory, to rewrite the creation time if
# it's not unique and update the fact id
rows = []
cardidmap = {}
highest = 0
for row in db.execute("""
select id, cast(created*1000 as int), factId, ordinal,
cast(modified as int), 0,
(case relativeDelay
when 0 then 1
when 1 then 2
when 2 then 0 end),
(case type
when 0 then 1
when 1 then 2
when 2 then 0
else type end),
cast(due as int), cast(interval as int),
cast(factor*1000 as int), reps, noCount from cards
order by created"""):
# find an unused time
row = list(row)
if row[1] <= highest:
highest = max(highest, row[1]) + 1
row[1] = highest
else:
highest = row[1]
# rewrite fact id
row[2] = factidmap[row[2]]
# note id change and save all but old id
cardidmap[row[0]] = row[1]
rows.append(row[1:])
# drop old table and rewrite
db.execute("drop table cards")
_addSchema(db, False)
db.executemany("""
insert into cards values (?,?,1,?,?,?,?,?,?,?,?,?,?,0,0,0,0,"")""",
rows)
# reviewHistory -> revlog
###########
# fetch the data so we can rewrite ids quickly
r = []
for row in db.execute("""
select
cast(time*1000 as int), cardId, 0, ease,
cast(nextInterval as int), cast(lastInterval as int),
cast(nextFactor*1000 as int), cast(min(thinkingTime, 60)*1000 as int),
yesCount from reviewHistory"""):
row = list(row)
# new card ids
try:
row[1] = cardidmap[row[1]]
except:
# id doesn't exist
continue
# no ease 0 anymore
row[3] = row[3] or 1
# determine type, overwriting yesCount
newInt = row[4]
oldInt = row[5]
yesCnt = row[8]
# yesCnt included the current answer
if row[3] > 1:
yesCnt -= 1
if oldInt < 1:
# new or failed
if yesCnt:
# type=relrn
row[8] = 2
else:
# type=lrn
row[8] = 0
else:
# type=rev
row[8] = 1
r.append(row)
db.executemany(
"insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)", r)
db.execute("drop table reviewHistory")
# deck
###########
self._migrateDeckTbl()
# tags
###########
tags = {}
for t in db.list("select tag from tags"):
tags[t] = intTime()
db.execute("update col set tags = ?", json.dumps(tags))
db.execute("drop table tags")
db.execute("drop table cardTags")
# the rest
###########
db.execute("drop table media")
db.execute("drop table sources")
self._migrateModels()
_updateIndices(db)
def _migrateDeckTbl(self):
db = self.db
db.execute("delete from col")
db.execute("""
insert or replace into col select id, cast(created as int), :t,
:t, 99, 0, 0, cast(lastSync as int),
"", "", "", "", "" from decks""", t=intTime())
# prepare a deck to store the old deck options
g, gc, conf = _getColVars(db)
# delete old selective study settings, which we can't auto-upgrade easily
keys = ("newActive", "newInactive", "revActive", "revInactive")
for k in keys:
db.execute("delete from deckVars where key=:k", k=k)
# copy other settings, ignoring deck order as there's a new default
gc['new']['perDay'] = db.scalar("select newCardsPerDay from decks")
gc['new']['order'] = min(1, db.scalar("select newCardOrder from decks"))
# these are collection level, and can't be imported on a per-deck basis
# conf['newSpread'] = db.scalar("select newCardSpacing from decks")
# conf['timeLim'] = db.scalar("select sessionTimeLimit from decks")
# add any deck vars and save
for (k, v) in db.execute("select * from deckVars").fetchall():
if k in ("hexCache", "cssCache"):
# ignore
pass
elif k == "leechFails":
gc['lapse']['leechFails'] = int(v)
else:
conf[k] = v
# don't use a learning mode for upgrading users
#gc['new']['delays'] = [10]
_addColVars(db, g, gc, conf)
# clean up
db.execute("drop table decks")
db.execute("drop table deckVars")
def _migrateModels(self):
import anki.models
db = self.db
times = {}
mods = {}
for row in db.all(
"select id, name from models"):
# use only first 31 bits if not old anki id
t = abs(row[0])
if t > 4294967296:
t >>= 32
assert t > 0
m = anki.models.defaultModel.copy()
m['id'] = t
m['name'] = row[1]
m['mod'] = intTime()
m['tags'] = []
m['flds'] = self._fieldsForModel(row[0])
m['tmpls'] = self._templatesForModel(row[0], m['flds'])
mods[m['id']] = m
db.execute("update notes set mid = ? where mid = ?", t, row[0])
# save and clean up
db.execute("update col set models = ?", json.dumps(mods))
db.execute("drop table fieldModels")
db.execute("drop table cardModels")
db.execute("drop table models")
def _fieldsForModel(self, mid):
import anki.models
db = self.db
dconf = anki.models.defaultField
flds = []
# note: qsize & qcol are used in upgrade then discarded
for c, row in enumerate(db.all("""
select name, features, quizFontFamily, quizFontSize, quizFontColour,
editFontSize from fieldModels where modelId = ?
order by ordinal""", mid)):
conf = dconf.copy()
(conf['name'],
conf['rtl'],
conf['font'],
conf['qsize'],
conf['qcol'],
conf['size']) = row
conf['ord'] = c
# ensure data is good
conf['rtl'] = not not conf['rtl']
conf['font'] = conf['font'] or "Arial"
conf['size'] = 12
# will be removed later in upgrade
conf['qcol'] = conf['qcol'] or "#000"
conf['qsize'] = conf['qsize'] or 20
flds.append(conf)
return flds
def _templatesForModel(self, mid, flds):
import anki.models
db = self.db
dconf = anki.models.defaultTemplate
tmpls = []
for c, row in enumerate(db.all("""
select name, active, qformat, aformat, questionInAnswer,
questionAlign, lastFontColour, typeAnswer from cardModels
where modelId = ?
order by ordinal""", mid)):
conf = dconf.copy()
(conf['name'],
conf['actv'],
conf['qfmt'],
conf['afmt'],
# the following are used in upgrade then discarded
hideq,
conf['align'],
conf['bg'],
typeAns) = row
conf['ord'] = c
for type in ("qfmt", "afmt"):
# ensure the new style field format
conf[type] = re.sub("%\((.+?)\)s", "{{\\1}}", conf[type])
# some special names have changed
conf[type] = re.sub(
"(?i){{tags}}", "{{Tags}}", conf[type])
conf[type] = re.sub(
"(?i){{cardModel}}", "{{Card}}", conf[type])
conf[type] = re.sub(
"(?i){{modelTags}}", "{{Type}}", conf[type])
# type answer is now embedded in the format
if typeAns:
if type == "qfmt" or hideq:
conf[type] += '<br>{{type:%s}}' % typeAns
# q fields now in a
if not hideq:
conf['afmt'] = (
"{{FrontSide}}\n\n<hr id=answer>\n\n" + conf['afmt'])
tmpls.append(conf)
return tmpls
# Field munging
######################################################################
def _mungeField(self, val):
# we no longer wrap fields in white-space: pre-wrap, so we need to
# convert previous whitespace into non-breaking spaces
def repl(match):
return match.group(1).replace(" ", "&nbsp;")
return re.sub("( +)", repl, val)
# Template upgrading
######################################################################
# - {{field}} no longer inserts an implicit span, so we make the span
# explicit on upgrade.
# - likewise with alignment and background color
def _upgradeTemplates(self):
d = self.col
for m in d.models.all():
# cache field styles
styles = {}
for f in m['flds']:
attrs = []
if f['font'].lower() != 'arial':
attrs.append("font-family: %s" % f['font'])
if f['qsize'] != 20:
attrs.append("font-size: %spx" % f['qsize'])
if f['qcol'] not in ("black", "#000"):
attrs.append("color: %s" % f['qcol'])
if f['rtl']:
attrs.append("direction: rtl; unicode-bidi: embed")
if attrs:
styles[f['name']] = '<span style="%s">{{%s}}</span>' % (
"; ".join(attrs), f['name'])
# obsolete
del f['qcol']
del f['qsize']
# then for each template
for t in m['tmpls']:
def repl(match):
field = match.group(2)
if field in styles:
return match.group(1) + styles[field]
# special or non-existant field; leave alone
return match.group(0)
for k in 'qfmt', 'afmt':
# replace old field references
t[k] = re.sub("(^|[^{]){{([^{}]+)?}}", repl, t[k])
# then strip extra {}s from other fields
t[k] = t[k].replace("{{{", "{{").replace("}}}", "}}")
# remove superfluous formatting from 1.0 -> 1.2 upgrade
t[k] = re.sub("font-size: ?20px;?", "", t[k])
t[k] = re.sub("(?i)font-family: ?arial;?", "", t[k])
t[k] = re.sub("color: ?#000(000)?;?", "", t[k])
t[k] = re.sub("white-space: ?pre-wrap;?", "", t[k])
# new furigana handling
if "japanese" in m['name'].lower():
if k == 'qfmt':
t[k] = t[k].replace(
"{{Reading}}", "{{kana:Reading}}")
else:
t[k] = t[k].replace(
"{{Reading}}", "{{furigana:Reading}}")
# adjust css
css = ""
if t['bg'] != "white" and t['bg'].lower() != "#ffffff":
css = "background-color: %s;" % t['bg']
if t['align']:
css += "text-align: %s" % ("left", "right")[t['align']-1]
if css:
css = '\n.card%d { %s }' % (t['ord']+1, css)
m['css'] += css
# remove obsolete
del t['bg']
del t['align']
# save model
d.models.save(m)
# Media references
######################################################################
# In 2.0 we drop support for media and latex references in the template,
# since they require generating card templates to see what media a note
# uses, and are confusing for shared deck users. To ease the upgrade
# process, we automatically convert the references to new fields.
def _rewriteMediaRefs(self):
col = self.col
def rewriteRef(key):
all = match.group(0)
fname = match.group("fname")
if all in state['mflds']:
# we've converted this field before
new = state['mflds'][all]
else:
# get field name and any prefix/suffix
m2 = re.match(
"([^{]*)\{\{\{?(?:text:)?([^}]+)\}\}\}?(.*)",
fname)
# not a field reference?
if not m2:
return
pre, ofld, suf = m2.groups()
# get index of field name
try:
idx = col.models.fieldMap(m)[ofld][0]
except:
# invalid field or tag reference; don't rewrite
return
# find a free field name
while 1:
state['fields'] += 1
fld = "Media %d" % state['fields']
if fld not in col.models.fieldMap(m).keys():
break
# add the new field
f = col.models.newField(fld)
f['qsize'] = 20
f['qcol'] = '#000'
col.models.addField(m, f)
# loop through notes and write reference into new field
data = []
for id, flds in self.col.db.execute(
"select id, flds from notes where id in "+
ids2str(col.models.nids(m))):
sflds = splitFields(flds)
ref = all.replace(fname, pre+sflds[idx]+suf)
data.append((flds+ref, id))
# update notes
col.db.executemany("update notes set flds=? where id=?",
data)
# note field for future
state['mflds'][fname] = fld
new = fld
# rewrite reference in template
t[key] = t[key].replace(all, "{{{%s}}}" % new)
regexps = col.media.regexps + [
r"(\[latex\](?P<fname>.+?)\[/latex\])",
r"(\[\$\](?P<fname>.+?)\[/\$\])",
r"(\[\$\$\](?P<fname>.+?)\[/\$\$\])"]
# process each model
for m in col.models.all():
state = dict(mflds={}, fields=0)
for t in m['tmpls']:
for r in regexps:
for match in re.finditer(r, t['qfmt']):
rewriteRef('qfmt')
for match in re.finditer(r, t['afmt']):
rewriteRef('afmt')
if state['fields']:
col.models.save(m)
# Inactive templates
######################################################################
# Templates can't be declared as inactive anymore. Remove any that are
# marked inactive and have no dependent cards.
def _removeInactive(self):
d = self.col
for m in d.models.all():
remove = []
for t in m['tmpls']:
if not t['actv']:
if not d.db.scalar("""
select 1 from cards where nid in (select id from notes where mid = ?)
and ord = ? limit 1""", m['id'], t['ord']):
remove.append(t)
del t['actv']
for r in remove:
try:
d.models.remTemplate(m, r)
except AssertionError:
# if the model was unused this could result in all
# templates being removed; ignore error
pass
d.models.save(m)
# Conditional templates
######################################################################
# For models that don't use a given template in all cards, we'll need to
# add a new field to notes to indicate if the card should be generated or not
def _addFlagFields(self):
for m in self.col.models.all():
nids = self.col.models.nids(m)
changed = False
for tmpl in m['tmpls']:
if self._addFlagFieldsForTemplate(m, nids, tmpl):
changed = True
if changed:
# save model
self.col.models.save(m, templates=True)
def _addFlagFieldsForTemplate(self, m, nids, tmpl):
cids = self.col.db.list(
"select id from cards where nid in %s and ord = ?" %
ids2str(nids), tmpl['ord'])
if len(cids) == len(nids):
# not selectively used
return
# add a flag field
name = tmpl['name']
have = [f['name'] for f in m['flds']]
while name in have:
name += "_"
f = self.col.models.newField(name)
self.col.models.addField(m, f)
# find the notes that have that card
haveNids = self.col.db.list(
"select nid from cards where id in "+ids2str(cids))
# add "y" to the appended field for those notes
self.col.db.execute(
"update notes set flds = flds || 'y' where id in "+ids2str(
haveNids))
# wrap the template in a conditional
tmpl['qfmt'] = "{{#%s}}\n%s\n{{/%s}}" % (
f['name'], tmpl['qfmt'], f['name'])
return True
# Post-schema upgrade
######################################################################
def _upgradeRest(self):
"Handle the rest of the upgrade to 2.0."
col = self.col
# make sure we have a current model id
col.models.setCurrent(col.models.models.values()[0])
# remove unused templates that were marked inactive
self._removeInactive()
# rewrite media references in card template
self._rewriteMediaRefs()
# template handling has changed
self._upgradeTemplates()
# add fields for selectively used templates
self._addFlagFields()
# fix creation time
col.sched._updateCutoff()
d = datetime.datetime.today()
d -= datetime.timedelta(hours=4)
d = datetime.datetime(d.year, d.month, d.day)
d += datetime.timedelta(hours=4)
d -= datetime.timedelta(days=1+int((time.time()-col.crt)/86400))
col.crt = int(time.mktime(d.timetuple()))
col.sched._updateCutoff()
# update uniq cache
col.updateFieldCache(col.db.list("select id from notes"))
# remove old views
for v in ("failedCards", "revCardsOld", "revCardsNew",
"revCardsDue", "revCardsRandom", "acqCardsRandom",
"acqCardsOld", "acqCardsNew"):
col.db.execute("drop view if exists %s" % v)
# remove stats, as it's all in the revlog now
col.db.execute("drop table if exists stats")
# suspended cards don't use ranges anymore
col.db.execute("update cards set queue=-1 where queue between -3 and -1")
col.db.execute("update cards set queue=-2 where queue between 3 and 5")
col.db.execute("update cards set queue=type where queue between 6 and 8")
# remove old deleted tables
for t in ("cards", "notes", "models", "media"):
col.db.execute("drop table if exists %sDeleted" % t)
# and failed cards
left = len(col.decks.confForDid(1)['lapse']['delays'])*1001
col.db.execute("""
update cards set left=?,type=1,queue=1,ivl=1 where type=1 and ivl <= 1
and queue>=0""", left)
col.db.execute("""
update cards set odue=?,left=?,type=2 where type=1 and ivl > 1 and queue>=0""",
col.sched.today+1, left)
# and due cards
col.db.execute("""
update cards set due = cast(
(case when due < :stamp then 0 else 1 end) +
((due-:stamp)/86400) as int)+:today where type = 2
""", stamp=col.sched.dayCutoff, today=col.sched.today)
# lapses were counted differently in 1.0, so we should have a higher
# default lapse threshold
for d in col.decks.allConf():
d['lapse']['leechFails'] = 16
col.decks.save(d)
# possibly re-randomize
conf = col.decks.allConf()[0]
if not conf['new']['order']:
col.sched.randomizeCards(1)
else:
col.sched.orderCards(1)
# optimize and finish
col.db.commit()
col.db.execute("vacuum")
col.db.execute("analyze")
col.db.execute("update col set ver = ?", SCHEMA_VERSION)
col.save()