# -*- coding: utf-8 -*- # Copyright: Damien Elmes # License: GNU AGPL, version 3 or later; http://www.gnu.org/copyleft/agpl.html import time, re, datetime, shutil from anki.utils import intTime, tmpfile, ids2str, splitFields, base91, json from anki.db import DB from anki.collection import _Collection from anki.consts import * from anki.storage import _addSchema, _getColVars, _addColVars, \ _updateIndices # # Upgrading is the first step in migrating to 2.0. # Caller should have called check() on path before calling upgrade(). # class Upgrader(object): def __init__(self): self.tmppath = None # Integrity checking & initial setup ###################################################################### def check(self, path): "Returns 'ok', 'invalid', or log of fixes applied." # copy into a temp file before we open self.tmppath = tmpfile(suffix=".anki2") shutil.copy(path, self.tmppath) # run initial check with DB(self.tmppath) as db: res = self._check(db) # needs fixing? if res not in ("ok", "invalid"): res = self._fix(self.tmppath) # don't allow .upgrade() if invalid if res == "invalid": os.unlink(self.tmppath) self.tmppath = None return res def _check(self, db): # corrupt? try: if db.scalar("pragma integrity_check") != "ok": return "invalid" except: return "invalid" # old version? if db.scalar("select version from decks") < 65: return # ensure we have indices for checks below db.executescript(""" create index if not exists ix_cards_factId on cards (factId); create index if not exists ix_fields_factId on fields (factId); analyze;""") # fields missing a field model? if db.list(""" select id from fields where fieldModelId not in ( select distinct id from fieldModels)"""): return # facts missing a field? if db.list(""" select distinct facts.id from facts, fieldModels where facts.modelId = fieldModels.modelId and fieldModels.id not in (select fieldModelId from fields where factId = facts.id)"""): return # cards missing a fact? if db.list(""" select id from cards where factId not in (select id from facts)"""): return # cards missing a card model? if db.list(""" select id from cards where cardModelId not in (select id from cardModels)"""): return # cards with a card model from the wrong model? if db.list(""" select id from cards where cardModelId not in (select cm.id from cardModels cm, facts f where cm.modelId = f.modelId and f.id = cards.factId)"""): return # facts missing a card? if db.list(""" select facts.id from facts where facts.id not in (select distinct factId from cards)"""): return # dangling fields? if db.list(""" select id from fields where factId not in (select id from facts)"""): return # incorrect types if db.list(""" select id from cards where relativeDelay != (case when successive then 1 when reps then 0 else 2 end)"""): return if db.list(""" select id from cards where type != (case when type >= 0 then relativeDelay else relativeDelay - 3 end)"""): return return "ok" def _fix(self, path): from oldanki import DeckStorage try: deck = DeckStorage.Deck(path, backup=False) except: # if we can't open the file, it's invalid return "invalid" # run a db check res = deck.fixIntegrity() if "Database file is damaged" in res: # we can't recover from a corrupt db return "invalid" # other errors are non-fatal deck.close() return res # Upgrading ###################################################################### def upgrade(self): assert self.tmppath self.db = DB(self.tmppath) self._upgradeSchema() self.col = _Collection(self.db) self._upgradeRest() self.tmppath = None return self.col # Schema upgrade ###################################################################### def _upgradeSchema(self): "Alter tables prior to ORM initialization." db = self.db # speed up the upgrade db.execute("pragma temp_store = memory") db.execute("pragma cache_size = 10000") db.execute("pragma synchronous = off") # these weren't always correctly set db.execute("pragma page_size = 4096") db.execute("pragma legacy_file_format = 0") for mid in db.list("select id from models"): # ensure the ordinals are correct for each cardModel for c, cmid in enumerate(db.list( "select id from cardModels where modelId = ? order by ordinal", mid)): db.execute("update cardModels set ordinal = ? where id = ?", c, cmid) # and fieldModel for c, fmid in enumerate(db.list( "select id from fieldModels where modelId = ? order by ordinal", mid)): db.execute("update fieldModels set ordinal = ? where id = ?", c, fmid) # then fix ordinals numbers on cards & fields db.execute("""update cards set ordinal = (select ordinal from cardModels where cardModels.id = cardModelId)""") db.execute("""update fields set ordinal = (select ordinal from fieldModels where id = fieldModelId)""") # notes ########### # tags should have a leading and trailing space if not empty, and not # use commas db.execute(""" update facts set tags = (case when trim(tags) == "" then "" else " " || replace(replace(trim(tags), ",", " "), " ", " ") || " " end) """) # pull facts into memory, so we can merge them with fields efficiently facts = db.all(""" select id, id, modelId, cast(created*1000 as int), cast(modified as int), 0, tags from facts order by created""") # build field hash fields = {} for (fid, ord, val) in db.execute( "select factId, ordinal, value from fields order by factId, ordinal"): if fid not in fields: fields[fid] = [] val = self._mungeField(val) fields[fid].append((ord, val)) # build insert data and transform ids, and minimize qt's # bold/italics/underline cruft. map = {} data = [] factidmap = {} from anki.utils import minimizeHTML highest = 0 for c, row in enumerate(facts): oldid = row[0] row = list(row) if row[3] <= highest: highest = max(highest, row[3]) + 1 row[3] = highest else: highest = row[3] factidmap[row[0]] = row[3] row[0] = row[3] del row[3] map[oldid] = row[0] # convert old 64bit id into a string, discarding sign bit row[1] = base91(abs(row[1])) row.append(minimizeHTML("\x1f".join([x[1] for x in sorted(fields[oldid])]))) data.append(row) # and put the facts into the new table db.execute("drop table facts") _addSchema(db, False) db.executemany("insert into notes values (?,?,?,?,?,?,?,'','',0,'')", data) db.execute("drop table fields") # cards ########### # we need to pull this into memory, to rewrite the creation time if # it's not unique and update the fact id rows = [] cardidmap = {} highest = 0 for row in db.execute(""" select id, cast(created*1000 as int), factId, ordinal, cast(modified as int), 0, (case relativeDelay when 0 then 1 when 1 then 2 when 2 then 0 end), (case type when 0 then 1 when 1 then 2 when 2 then 0 else type end), cast(due as int), cast(interval as int), cast(factor*1000 as int), reps, noCount from cards order by created"""): # find an unused time row = list(row) if row[1] <= highest: highest = max(highest, row[1]) + 1 row[1] = highest else: highest = row[1] # rewrite fact id row[2] = factidmap[row[2]] # note id change and save all but old id cardidmap[row[0]] = row[1] rows.append(row[1:]) # drop old table and rewrite db.execute("drop table cards") _addSchema(db, False) db.executemany(""" insert into cards values (?,?,1,?,?,?,?,?,?,?,?,?,?,0,0,0,0,"")""", rows) # reviewHistory -> revlog ########### # fetch the data so we can rewrite ids quickly r = [] for row in db.execute(""" select cast(time*1000 as int), cardId, 0, ease, cast(nextInterval as int), cast(lastInterval as int), cast(nextFactor*1000 as int), cast(min(thinkingTime, 60)*1000 as int), yesCount from reviewHistory"""): row = list(row) # new card ids try: row[1] = cardidmap[row[1]] except: # id doesn't exist continue # no ease 0 anymore row[3] = row[3] or 1 # determine type, overwriting yesCount newInt = row[4] oldInt = row[5] yesCnt = row[8] # yesCnt included the current answer if row[3] > 1: yesCnt -= 1 if oldInt < 1: # new or failed if yesCnt: # type=relrn row[8] = 2 else: # type=lrn row[8] = 0 else: # type=rev row[8] = 1 r.append(row) db.executemany( "insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)", r) db.execute("drop table reviewHistory") # deck ########### self._migrateDeckTbl() # tags ########### tags = {} for t in db.list("select tag from tags"): tags[t] = intTime() db.execute("update col set tags = ?", json.dumps(tags)) db.execute("drop table tags") db.execute("drop table cardTags") # the rest ########### db.execute("drop table media") db.execute("drop table sources") self._migrateModels() _updateIndices(db) def _migrateDeckTbl(self): db = self.db db.execute("delete from col") db.execute(""" insert or replace into col select id, cast(created as int), :t, :t, 99, 0, 0, cast(lastSync as int), "", "", "", "", "" from decks""", t=intTime()) # prepare a deck to store the old deck options g, gc, conf = _getColVars(db) # delete old selective study settings, which we can't auto-upgrade easily keys = ("newActive", "newInactive", "revActive", "revInactive") for k in keys: db.execute("delete from deckVars where key=:k", k=k) # copy other settings, ignoring deck order as there's a new default gc['new']['perDay'] = db.scalar("select newCardsPerDay from decks") gc['new']['order'] = min(1, db.scalar("select newCardOrder from decks")) # these are collection level, and can't be imported on a per-deck basis # conf['newSpread'] = db.scalar("select newCardSpacing from decks") # conf['timeLim'] = db.scalar("select sessionTimeLimit from decks") # add any deck vars and save for (k, v) in db.execute("select * from deckVars").fetchall(): if k in ("hexCache", "cssCache"): # ignore pass elif k == "leechFails": gc['lapse']['leechFails'] = int(v) else: conf[k] = v # don't use a learning mode for upgrading users #gc['new']['delays'] = [10] _addColVars(db, g, gc, conf) # clean up db.execute("drop table decks") db.execute("drop table deckVars") def _migrateModels(self): import anki.models db = self.db times = {} mods = {} for row in db.all( "select id, name from models"): # use only first 31 bits if not old anki id t = abs(row[0]) if t > 4294967296: t >>= 32 assert t > 0 m = anki.models.defaultModel.copy() m['id'] = t m['name'] = row[1] m['mod'] = intTime() m['tags'] = [] m['flds'] = self._fieldsForModel(row[0]) m['tmpls'] = self._templatesForModel(row[0], m['flds']) mods[m['id']] = m db.execute("update notes set mid = ? where mid = ?", t, row[0]) # save and clean up db.execute("update col set models = ?", json.dumps(mods)) db.execute("drop table fieldModels") db.execute("drop table cardModels") db.execute("drop table models") def _fieldsForModel(self, mid): import anki.models db = self.db dconf = anki.models.defaultField flds = [] # note: qsize & qcol are used in upgrade then discarded for c, row in enumerate(db.all(""" select name, features, quizFontFamily, quizFontSize, quizFontColour, editFontSize from fieldModels where modelId = ? order by ordinal""", mid)): conf = dconf.copy() (conf['name'], conf['rtl'], conf['font'], conf['qsize'], conf['qcol'], conf['size']) = row conf['ord'] = c # ensure data is good conf['rtl'] = not not conf['rtl'] conf['font'] = conf['font'] or "Arial" conf['size'] = 12 # will be removed later in upgrade conf['qcol'] = conf['qcol'] or "#000" conf['qsize'] = conf['qsize'] or 20 flds.append(conf) return flds def _templatesForModel(self, mid, flds): import anki.models db = self.db dconf = anki.models.defaultTemplate tmpls = [] for c, row in enumerate(db.all(""" select name, active, qformat, aformat, questionInAnswer, questionAlign, lastFontColour, typeAnswer from cardModels where modelId = ? order by ordinal""", mid)): conf = dconf.copy() (conf['name'], conf['actv'], conf['qfmt'], conf['afmt'], # the following are used in upgrade then discarded hideq, conf['align'], conf['bg'], typeAns) = row conf['ord'] = c for type in ("qfmt", "afmt"): # ensure the new style field format conf[type] = re.sub("%\((.+?)\)s", "{{\\1}}", conf[type]) # some special names have changed conf[type] = re.sub( "(?i){{tags}}", "{{Tags}}", conf[type]) conf[type] = re.sub( "(?i){{cardModel}}", "{{Card}}", conf[type]) conf[type] = re.sub( "(?i){{modelTags}}", "{{Type}}", conf[type]) # type answer is now embedded in the format if typeAns: if type == "qfmt" or hideq: conf[type] += '
{{type:%s}}' % typeAns # q fields now in a if not hideq: conf['afmt'] = ( "{{FrontSide}}\n\n
\n\n" + conf['afmt']) tmpls.append(conf) return tmpls # Field munging ###################################################################### def _mungeField(self, val): # we no longer wrap fields in white-space: pre-wrap, so we need to # convert previous whitespace into non-breaking spaces def repl(match): return match.group(1).replace(" ", " ") return re.sub("( +)", repl, val) # Template upgrading ###################################################################### # - {{field}} no longer inserts an implicit span, so we make the span # explicit on upgrade. # - likewise with alignment and background color def _upgradeTemplates(self): d = self.col for m in d.models.all(): # cache field styles styles = {} for f in m['flds']: attrs = [] if f['font'].lower() != 'arial': attrs.append("font-family: %s" % f['font']) if f['qsize'] != 20: attrs.append("font-size: %spx" % f['qsize']) if f['qcol'] not in ("black", "#000"): attrs.append("color: %s" % f['qcol']) if f['rtl']: attrs.append("direction: rtl; unicode-bidi: embed") if attrs: styles[f['name']] = '{{%s}}' % ( "; ".join(attrs), f['name']) # obsolete del f['qcol'] del f['qsize'] # then for each template for t in m['tmpls']: def repl(match): field = match.group(2) if field in styles: return match.group(1) + styles[field] # special or non-existant field; leave alone return match.group(0) for k in 'qfmt', 'afmt': # replace old field references t[k] = re.sub("(^|[^{]){{([^{}]+)?}}", repl, t[k]) # then strip extra {}s from other fields t[k] = t[k].replace("{{{", "{{").replace("}}}", "}}") # remove superfluous formatting from 1.0 -> 1.2 upgrade t[k] = re.sub("font-size: ?20px;?", "", t[k]) t[k] = re.sub("(?i)font-family: ?arial;?", "", t[k]) t[k] = re.sub("color: ?#000(000)?;?", "", t[k]) t[k] = re.sub("white-space: ?pre-wrap;?", "", t[k]) # new furigana handling if "japanese" in m['name'].lower(): if k == 'qfmt': t[k] = t[k].replace( "{{Reading}}", "{{kana:Reading}}") else: t[k] = t[k].replace( "{{Reading}}", "{{furigana:Reading}}") # adjust css css = "" if t['bg'] != "white" and t['bg'].lower() != "#ffffff": css = "background-color: %s;" % t['bg'] if t['align']: css += "text-align: %s" % ("left", "right")[t['align']-1] if css: css = '\n.card%d { %s }' % (t['ord']+1, css) m['css'] += css # remove obsolete del t['bg'] del t['align'] # save model d.models.save(m) # Media references ###################################################################### # In 2.0 we drop support for media and latex references in the template, # since they require generating card templates to see what media a note # uses, and are confusing for shared deck users. To ease the upgrade # process, we automatically convert the references to new fields. def _rewriteMediaRefs(self): col = self.col def rewriteRef(key): all = match.group(0) fname = match.group("fname") if all in state['mflds']: # we've converted this field before new = state['mflds'][all] else: # get field name and any prefix/suffix m2 = re.match( "([^{]*)\{\{\{?(?:text:)?([^}]+)\}\}\}?(.*)", fname) # not a field reference? if not m2: return pre, ofld, suf = m2.groups() # get index of field name try: idx = col.models.fieldMap(m)[ofld][0] except: # invalid field or tag reference; don't rewrite return # find a free field name while 1: state['fields'] += 1 fld = "Media %d" % state['fields'] if fld not in col.models.fieldMap(m).keys(): break # add the new field f = col.models.newField(fld) f['qsize'] = 20 f['qcol'] = '#000' col.models.addField(m, f) # loop through notes and write reference into new field data = [] for id, flds in self.col.db.execute( "select id, flds from notes where id in "+ ids2str(col.models.nids(m))): sflds = splitFields(flds) ref = all.replace(fname, pre+sflds[idx]+suf) data.append((flds+ref, id)) # update notes col.db.executemany("update notes set flds=? where id=?", data) # note field for future state['mflds'][fname] = fld new = fld # rewrite reference in template t[key] = t[key].replace(all, "{{{%s}}}" % new) regexps = col.media.regexps + [ r"(\[latex\](?P.+?)\[/latex\])", r"(\[\$\](?P.+?)\[/\$\])", r"(\[\$\$\](?P.+?)\[/\$\$\])"] # process each model for m in col.models.all(): state = dict(mflds={}, fields=0) for t in m['tmpls']: for r in regexps: for match in re.finditer(r, t['qfmt']): rewriteRef('qfmt') for match in re.finditer(r, t['afmt']): rewriteRef('afmt') if state['fields']: col.models.save(m) # Inactive templates ###################################################################### # Templates can't be declared as inactive anymore. Remove any that are # marked inactive and have no dependent cards. def _removeInactive(self): d = self.col for m in d.models.all(): remove = [] for t in m['tmpls']: if not t['actv']: if not d.db.scalar(""" select 1 from cards where nid in (select id from notes where mid = ?) and ord = ? limit 1""", m['id'], t['ord']): remove.append(t) del t['actv'] for r in remove: try: d.models.remTemplate(m, r) except AssertionError: # if the model was unused this could result in all # templates being removed; ignore error pass d.models.save(m) # Conditional templates ###################################################################### # For models that don't use a given template in all cards, we'll need to # add a new field to notes to indicate if the card should be generated or not def _addFlagFields(self): for m in self.col.models.all(): nids = self.col.models.nids(m) changed = False for tmpl in m['tmpls']: if self._addFlagFieldsForTemplate(m, nids, tmpl): changed = True if changed: # save model self.col.models.save(m, templates=True) def _addFlagFieldsForTemplate(self, m, nids, tmpl): cids = self.col.db.list( "select id from cards where nid in %s and ord = ?" % ids2str(nids), tmpl['ord']) if len(cids) == len(nids): # not selectively used return # add a flag field name = tmpl['name'] have = [f['name'] for f in m['flds']] while name in have: name += "_" f = self.col.models.newField(name) self.col.models.addField(m, f) # find the notes that have that card haveNids = self.col.db.list( "select nid from cards where id in "+ids2str(cids)) # add "y" to the appended field for those notes self.col.db.execute( "update notes set flds = flds || 'y' where id in "+ids2str( haveNids)) # wrap the template in a conditional tmpl['qfmt'] = "{{#%s}}\n%s\n{{/%s}}" % ( f['name'], tmpl['qfmt'], f['name']) return True # Post-schema upgrade ###################################################################### def _upgradeRest(self): "Handle the rest of the upgrade to 2.0." col = self.col # make sure we have a current model id col.models.setCurrent(col.models.models.values()[0]) # remove unused templates that were marked inactive self._removeInactive() # rewrite media references in card template self._rewriteMediaRefs() # template handling has changed self._upgradeTemplates() # add fields for selectively used templates self._addFlagFields() # fix creation time col.sched._updateCutoff() d = datetime.datetime.today() d -= datetime.timedelta(hours=4) d = datetime.datetime(d.year, d.month, d.day) d += datetime.timedelta(hours=4) d -= datetime.timedelta(days=1+int((time.time()-col.crt)/86400)) col.crt = int(time.mktime(d.timetuple())) col.sched._updateCutoff() # update uniq cache col.updateFieldCache(col.db.list("select id from notes")) # remove old views for v in ("failedCards", "revCardsOld", "revCardsNew", "revCardsDue", "revCardsRandom", "acqCardsRandom", "acqCardsOld", "acqCardsNew"): col.db.execute("drop view if exists %s" % v) # remove stats, as it's all in the revlog now col.db.execute("drop table if exists stats") # suspended cards don't use ranges anymore col.db.execute("update cards set queue=-1 where queue between -3 and -1") col.db.execute("update cards set queue=-2 where queue between 3 and 5") col.db.execute("update cards set queue=type where queue between 6 and 8") # remove old deleted tables for t in ("cards", "notes", "models", "media"): col.db.execute("drop table if exists %sDeleted" % t) # and failed cards left = len(col.decks.confForDid(1)['lapse']['delays'])*1001 col.db.execute(""" update cards set left=?,type=1,queue=1,ivl=1 where type=1 and ivl <= 1 and queue>=0""", left) col.db.execute(""" update cards set odue=?,left=?,type=2 where type=1 and ivl > 1 and queue>=0""", col.sched.today+1, left) # and due cards col.db.execute(""" update cards set due = cast( (case when due < :stamp then 0 else 1 end) + ((due-:stamp)/86400) as int)+:today where type = 2 """, stamp=col.sched.dayCutoff, today=col.sched.today) # lapses were counted differently in 1.0, so we should have a higher # default lapse threshold for d in col.decks.allConf(): d['lapse']['leechFails'] = 16 col.decks.save(d) # possibly re-randomize conf = col.decks.allConf()[0] if not conf['new']['order']: col.sched.randomizeCards(1) else: col.sched.orderCards(1) # optimize and finish col.db.commit() col.db.execute("vacuum") col.db.execute("analyze") col.db.execute("update col set ver = ?", SCHEMA_VERSION) col.save()