# -*- coding: utf-8 -*- # Copyright: Damien Elmes # License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html """\ Statistical tracking and reports ================================= """ __docformat__ = 'restructuredtext' # we track statistics over the life of the deck, and per-day STATS_LIFE = 0 STATS_DAY = 1 import unicodedata, time, sys, os, datetime import oldanki, oldanki.utils from datetime import date from oldanki.db import * from oldanki.lang import _, ngettext from oldanki.utils import canonifyTags, ids2str from oldanki.hooks import runFilter # Tracking stats on the DB ########################################################################## statsTable = Table( 'stats', metadata, Column('id', Integer, primary_key=True), Column('type', Integer, nullable=False), Column('day', Date, nullable=False), Column('reps', Integer, nullable=False, default=0), Column('averageTime', Float, nullable=False, default=0), Column('reviewTime', Float, nullable=False, default=0), # next two columns no longer used Column('distractedTime', Float, nullable=False, default=0), Column('distractedReps', Integer, nullable=False, default=0), Column('newEase0', Integer, nullable=False, default=0), Column('newEase1', Integer, nullable=False, default=0), Column('newEase2', Integer, nullable=False, default=0), Column('newEase3', Integer, nullable=False, default=0), Column('newEase4', Integer, nullable=False, default=0), Column('youngEase0', Integer, nullable=False, default=0), Column('youngEase1', Integer, nullable=False, default=0), Column('youngEase2', Integer, nullable=False, default=0), Column('youngEase3', Integer, nullable=False, default=0), Column('youngEase4', Integer, nullable=False, default=0), Column('matureEase0', Integer, nullable=False, default=0), Column('matureEase1', Integer, nullable=False, default=0), Column('matureEase2', Integer, nullable=False, default=0), Column('matureEase3', Integer, nullable=False, default=0), Column('matureEase4', Integer, nullable=False, default=0)) class Stats(object): def __init__(self): self.day = None self.reps = 0 self.averageTime = 0 self.reviewTime = 0 self.distractedTime = 0 self.distractedReps = 0 self.newEase0 = 0 self.newEase1 = 0 self.newEase2 = 0 self.newEase3 = 0 self.newEase4 = 0 self.youngEase0 = 0 self.youngEase1 = 0 self.youngEase2 = 0 self.youngEase3 = 0 self.youngEase4 = 0 self.matureEase0 = 0 self.matureEase1 = 0 self.matureEase2 = 0 self.matureEase3 = 0 self.matureEase4 = 0 def fromDB(self, s, id): r = s.first("select * from stats where id = :id", id=id) (self.id, self.type, self.day, self.reps, self.averageTime, self.reviewTime, self.distractedTime, self.distractedReps, self.newEase0, self.newEase1, self.newEase2, self.newEase3, self.newEase4, self.youngEase0, self.youngEase1, self.youngEase2, self.youngEase3, self.youngEase4, self.matureEase0, self.matureEase1, self.matureEase2, self.matureEase3, self.matureEase4) = r self.day = datetime.date(*[int(i) for i in self.day.split("-")]) def create(self, s, type, day): self.type = type self.day = day s.execute("""insert into stats (type, day, reps, averageTime, reviewTime, distractedTime, distractedReps, newEase0, newEase1, newEase2, newEase3, newEase4, youngEase0, youngEase1, youngEase2, youngEase3, youngEase4, matureEase0, matureEase1, matureEase2, matureEase3, matureEase4) values (:type, :day, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)""", self.__dict__) self.id = s.scalar( "select id from stats where type = :type and day = :day", type=type, day=day) def toDB(self, s): assert self.id s.execute("""update stats set type=:type, day=:day, reps=:reps, averageTime=:averageTime, reviewTime=:reviewTime, newEase0=:newEase0, newEase1=:newEase1, newEase2=:newEase2, newEase3=:newEase3, newEase4=:newEase4, youngEase0=:youngEase0, youngEase1=:youngEase1, youngEase2=:youngEase2, youngEase3=:youngEase3, youngEase4=:youngEase4, matureEase0=:matureEase0, matureEase1=:matureEase1, matureEase2=:matureEase2, matureEase3=:matureEase3, matureEase4=:matureEase4 where id = :id""", self.__dict__) mapper(Stats, statsTable) def genToday(deck): return datetime.datetime.utcfromtimestamp( time.time() - deck.utcOffset).date() def updateAllStats(s, gs, ds, card, ease, oldState): "Update global and daily statistics." updateStats(s, gs, card, ease, oldState) updateStats(s, ds, card, ease, oldState) def updateStats(s, stats, card, ease, oldState): stats.reps += 1 delay = card.totalTime() if delay >= 60: stats.reviewTime += 60 else: stats.reviewTime += delay stats.averageTime = ( stats.reviewTime / float(stats.reps)) # update eases attr = oldState + "Ease%d" % ease setattr(stats, attr, getattr(stats, attr) + 1) stats.toDB(s) def globalStats(deck): s = deck.s type = STATS_LIFE today = genToday(deck) id = s.scalar("select id from stats where type = :type", type=type) stats = Stats() if id: stats.fromDB(s, id) return stats else: stats.create(s, type, today) stats.type = type return stats def dailyStats(deck): s = deck.s type = STATS_DAY today = genToday(deck) id = s.scalar("select id from stats where type = :type and day = :day", type=type, day=today) stats = Stats() if id: stats.fromDB(s, id) return stats else: stats.create(s, type, today) return stats def summarizeStats(stats, pre=""): "Generate percentages and total counts for STATS. Optionally prefix." cardTypes = ("new", "young", "mature") h = {} # total counts ############### for type in cardTypes: # total yes/no for type, eg. gNewYes h[pre + type.capitalize() + "No"] = (getattr(stats, type + "Ease0") + getattr(stats, type + "Ease1")) h[pre + type.capitalize() + "Yes"] = (getattr(stats, type + "Ease2") + getattr(stats, type + "Ease3") + getattr(stats, type + "Ease4")) # total for type, eg. gNewTotal h[pre + type.capitalize() + "Total"] = ( h[pre + type.capitalize() + "No"] + h[pre + type.capitalize() + "Yes"]) # total yes/no, eg. gYesTotal for answer in ("yes", "no"): num = 0 for type in cardTypes: num += h[pre + type.capitalize() + answer.capitalize()] h[pre + answer.capitalize() + "Total"] = num # total over all, eg. gTotal num = 0 for type in cardTypes: num += h[pre + type.capitalize() + "Total"] h[pre + "Total"] = num # percentages ############## for type in cardTypes: # total yes/no % by type, eg. gNewYes% for answer in ("yes", "no"): setPercentage(h, pre + type.capitalize() + answer.capitalize(), pre + type.capitalize()) for answer in ("yes", "no"): # total yes/no, eg. gYesTotal% setPercentage(h, pre + answer.capitalize() + "Total", pre) h[pre + 'AverageTime'] = stats.averageTime h[pre + 'ReviewTime'] = stats.reviewTime return h def setPercentage(h, a, b): try: h[a + "%"] = (h[a] / float(h[b + "Total"])) * 100 except ZeroDivisionError: h[a + "%"] = 0 def getStats(s, gs, ds): "Return a handy dictionary exposing a number of internal stats." h = {} h.update(summarizeStats(gs, "g")) h.update(summarizeStats(ds, "d")) return h # Card stats ########################################################################## class CardStats(object): def __init__(self, deck, card): self.deck = deck self.card = card def report(self): c = self.card fmt = oldanki.utils.fmtTimeSpan fmtFloat = oldanki.utils.fmtFloat self.txt = "" self.addLine(_("Added"), self.strTime(c.created)) if c.firstAnswered: self.addLine(_("First Review"), self.strTime(c.firstAnswered)) self.addLine(_("Changed"), self.strTime(c.modified)) if c.reps: next = time.time() - c.combinedDue if next > 0: next = _("%s ago") % fmt(next) else: next = _("in %s") % fmt(abs(next)) self.addLine(_("Due"), next) self.addLine(_("Interval"), fmt(c.interval * 86400)) self.addLine(_("Ease"), fmtFloat(c.factor, point=2)) if c.lastDue: last = _("%s ago") % fmt(time.time() - c.lastDue) self.addLine(_("Last Due"), last) if c.interval != c.lastInterval: # don't show the last interval if it hasn't been updated yet self.addLine(_("Last Interval"), fmt(c.lastInterval * 86400)) self.addLine(_("Last Ease"), fmtFloat(c.lastFactor, point=2)) if c.reps: self.addLine(_("Reviews"), "%d/%d (s=%d)" % ( c.yesCount, c.reps, c.successive)) avg = fmt(c.averageTime, point=2) self.addLine(_("Average Time"),avg) total = fmt(c.reviewTime, point=2) self.addLine(_("Total Time"), total) self.addLine(_("Model Tags"), c.fact.model.tags) self.addLine(_("Card Template") + " "*5, c.cardModel.name) self.txt += "
" return self.txt def addLine(self, k, v): self.txt += "%s%s" % (k, v) def strTime(self, tm): s = oldanki.utils.fmtTimeSpan(time.time() - tm) return _("%s ago") % s # Deck stats (specific to the 'sched' scheduler) ########################################################################## class DeckStats(object): def __init__(self, deck): self.deck = deck def report(self): "Return an HTML string with a report." fmtPerc = oldanki.utils.fmtPercentage fmtFloat = oldanki.utils.fmtFloat if self.deck.isEmpty(): return _("Please add some cards first.") + "

" d = self.deck html="

" + _("Deck Statistics") + "

" html += _("Deck created: %s ago
") % self.createdTimeStr() total = d.cardCount new = d.newCountAll() young = d.youngCardCount() old = d.matureCardCount() newP = new / float(total) * 100 youngP = young / float(total) * 100 oldP = old / float(total) * 100 stats = d.getStats() (stats["new"], stats["newP"]) = (new, newP) (stats["old"], stats["oldP"]) = (old, oldP) (stats["young"], stats["youngP"]) = (young, youngP) html += _("Total number of cards:") + " %d
" % total html += _("Total number of facts:") + " %d

" % d.factCount html += "" + _("Card Maturity") + "
" html += _("Mature cards: ") + " %(old)d (%(oldP)s)
" % { 'old': stats['old'], 'oldP' : fmtPerc(stats['oldP'])} html += _("Young cards: ") + " %(young)d (%(youngP)s)
" % { 'young': stats['young'], 'youngP' : fmtPerc(stats['youngP'])} html += _("Unseen cards:") + " %(new)d (%(newP)s)
" % { 'new': stats['new'], 'newP' : fmtPerc(stats['newP'])} avgInt = self.getAverageInterval() if avgInt: html += _("Average interval: ") + ("%s ") % fmtFloat(avgInt) + _("days") html += "
" html += "
" html += "" + _("Correct Answers") + "
" html += _("Mature cards: ") + " " + fmtPerc(stats['gMatureYes%']) + ( " " + _("(%(partOf)d of %(totalSum)d)") % { 'partOf' : stats['gMatureYes'], 'totalSum' : stats['gMatureTotal'] } + "
") html += _("Young cards: ") + " " + fmtPerc(stats['gYoungYes%']) + ( " " + _("(%(partOf)d of %(totalSum)d)") % { 'partOf' : stats['gYoungYes'], 'totalSum' : stats['gYoungTotal'] } + "
") html += _("First-seen cards:") + " " + fmtPerc(stats['gNewYes%']) + ( " " + _("(%(partOf)d of %(totalSum)d)") % { 'partOf' : stats['gNewYes'], 'totalSum' : stats['gNewTotal'] } + "

") # average pending time existing = d.cardCount - d.newCountToday def tr(a, b): return "%s%s" % (a, b) def repsPerDay(reps,days): retval = ("%d " % reps) + ngettext("rep", "reps", reps) retval += ("/%d " % days) + ngettext("day", "days", days) return retval if existing and avgInt: html += "" + _("Recent Work") + "" if sys.platform.startswith("darwin"): html += "" else: html += "
" html += tr(_("In last week"), repsPerDay( self.getRepsDone(-7, 0), self.getDaysReviewed(-7, 0))) html += tr(_("In last month"), repsPerDay( self.getRepsDone(-30, 0), self.getDaysReviewed(-30, 0))) html += tr(_("In last 3 months"), repsPerDay( self.getRepsDone(-92, 0), self.getDaysReviewed(-92, 0))) html += tr(_("In last 6 months"), repsPerDay( self.getRepsDone(-182, 0), self.getDaysReviewed(-182, 0))) html += tr(_("In last year"), repsPerDay( self.getRepsDone(-365, 0), self.getDaysReviewed(-365, 0))) html += tr(_("Deck life"), repsPerDay( self.getRepsDone(-13000, 0), self.getDaysReviewed(-13000, 0))) html += "
" html += "

" + _("Average Daily Reviews") + "" if sys.platform.startswith("darwin"): html += "" else: html += "
" html += tr(_("Deck life"), ("%s ") % ( fmtFloat(self.getSumInverseRoundInterval())) + _("cards/day")) html += tr(_("In next week"), ("%s ") % ( fmtFloat(self.getWorkloadPeriod(7))) + _("cards/day")) html += tr(_("In next month"), ("%s ") % ( fmtFloat(self.getWorkloadPeriod(30))) + _("cards/day")) html += tr(_("In last week"), ("%s ") % ( fmtFloat(self.getPastWorkloadPeriod(7))) + _("cards/day")) html += tr(_("In last month"), ("%s ") % ( fmtFloat(self.getPastWorkloadPeriod(30))) + _("cards/day")) html += tr(_("In last 3 months"), ("%s ") % ( fmtFloat(self.getPastWorkloadPeriod(92))) + _("cards/day")) html += tr(_("In last 6 months"), ("%s ") % ( fmtFloat(self.getPastWorkloadPeriod(182))) + _("cards/day")) html += tr(_("In last year"), ("%s ") % ( fmtFloat(self.getPastWorkloadPeriod(365))) + _("cards/day")) html += "
" html += "

" + _("Average Added") + "" if sys.platform.startswith("darwin"): html += "" else: html += "
" html += tr(_("Deck life"), _("%(a)s/day, %(b)s/mon") % { 'a': fmtFloat(self.newAverage()), 'b': fmtFloat(self.newAverage()*30)}) np = self.getNewPeriod(7) html += tr(_("In last week"), _("%(a)d (%(b)s/day)") % ( {'a': np, 'b': fmtFloat(np / float(7))})) np = self.getNewPeriod(30) html += tr(_("In last month"), _("%(a)d (%(b)s/day)") % ( {'a': np, 'b': fmtFloat(np / float(30))})) np = self.getNewPeriod(92) html += tr(_("In last 3 months"), _("%(a)d (%(b)s/day)") % ( {'a': np, 'b': fmtFloat(np / float(92))})) np = self.getNewPeriod(182) html += tr(_("In last 6 months"), _("%(a)d (%(b)s/day)") % ( {'a': np, 'b': fmtFloat(np / float(182))})) np = self.getNewPeriod(365) html += tr(_("In last year"), _("%(a)d (%(b)s/day)") % ( {'a': np, 'b': fmtFloat(np / float(365))})) html += "
" html += "

" + _("Average New Seen") + "" if sys.platform.startswith("darwin"): html += "" else: html += "
" np = self.getFirstPeriod(7) html += tr(_("In last week"), _("%(a)d (%(b)s/day)") % ( {'a': np, 'b': fmtFloat(np / float(7))})) np = self.getFirstPeriod(30) html += tr(_("In last month"), _("%(a)d (%(b)s/day)") % ( {'a': np, 'b': fmtFloat(np / float(30))})) np = self.getFirstPeriod(92) html += tr(_("In last 3 months"), _("%(a)d (%(b)s/day)") % ( {'a': np, 'b': fmtFloat(np / float(92))})) np = self.getFirstPeriod(182) html += tr(_("In last 6 months"), _("%(a)d (%(b)s/day)") % ( {'a': np, 'b': fmtFloat(np / float(182))})) np = self.getFirstPeriod(365) html += tr(_("In last year"), _("%(a)d (%(b)s/day)") % ( {'a': np, 'b': fmtFloat(np / float(365))})) html += "
" html += "

" + _("Card Ease") + "
" html += _("Lowest factor: %.2f") % d.s.scalar( "select min(factor) from cards") + "
" html += _("Average factor: %.2f") % d.s.scalar( "select avg(factor) from cards") + "
" html += _("Highest factor: %.2f") % d.s.scalar( "select max(factor) from cards") + "
" html = runFilter("deckStats", html) return html def getDaysReviewed(self, start, finish): now = datetime.datetime.today() x = now + datetime.timedelta(start) y = now + datetime.timedelta(finish) return self.deck.s.scalar( "select count() from stats where " "day >= :x and day <= :y and reps > 0", x=x, y=y) def getRepsDone(self, start, finish): now = datetime.datetime.today() x = time.mktime((now + datetime.timedelta(start)).timetuple()) y = time.mktime((now + datetime.timedelta(finish)).timetuple()) return self.deck.s.scalar( "select count() from reviewHistory where time >= :x and time <= :y", x=x, y=y) def getAverageInterval(self): return self.deck.s.scalar( "select sum(interval) / count(interval) from cards " "where cards.reps > 0") or 0 def intervalReport(self, intervals, labels, total): boxes = self.splitIntoIntervals(intervals) keys = boxes.keys() keys.sort() html = "" for key in keys: html += ("%s" + "%d%s") % ( labels[key], boxes[key], fmtPerc(boxes[key] / float(total) * 100)) return html def splitIntoIntervals(self, intervals): boxes = {} n = 0 for i in range(len(intervals) - 1): (min, max) = (intervals[i], intervals[i+1]) for c in self.deck: if c.interval > min and c.interval <= max: boxes[n] = boxes.get(n, 0) + 1 n += 1 return boxes def newAverage(self): "Average number of new cards added each day." return self.deck.cardCount / max(1, self.ageInDays()) def createdTimeStr(self): return oldanki.utils.fmtTimeSpan(time.time() - self.deck.created) def ageInDays(self): return (time.time() - self.deck.created) / 86400.0 def getSumInverseRoundInterval(self): return self.deck.s.scalar( "select sum(1/round(max(interval, 1)+0.5)) from cards " "where cards.reps > 0 " "and priority > 0") or 0 def getWorkloadPeriod(self, period): cutoff = time.time() + 86400 * period return (self.deck.s.scalar(""" select count(id) from cards where combinedDue < :cutoff and priority > 0 and relativeDelay in (0,1)""", cutoff=cutoff) or 0) / float(period) def getPastWorkloadPeriod(self, period): cutoff = time.time() - 86400 * period return (self.deck.s.scalar(""" select count(*) from reviewHistory where time > :cutoff""", cutoff=cutoff) or 0) / float(period) def getNewPeriod(self, period): cutoff = time.time() - 86400 * period return (self.deck.s.scalar(""" select count(id) from cards where created > :cutoff""", cutoff=cutoff) or 0) def getFirstPeriod(self, period): cutoff = time.time() - 86400 * period return (self.deck.s.scalar(""" select count(*) from reviewHistory where reps = 1 and time > :cutoff""", cutoff=cutoff) or 0)