In [None]:
%matplotlib inline

from matplotlib import pyplot
import matplotlib
import pandas
from pony import orm
from IPython import display
import jinja2
import db
import seaborn
import os
import numpy as np
import warnings
import pathlib

In [None]:
os.chdir('data')
db.use_db(str(pathlib.Path.cwd() / 'course.sqlite'))

In [None]:
exam_id = os.environ['EXAM_ID']

full_scores = db.full_exam_data(exam_id)
# Full exam data has multilevel columns (includes detailed feedback), we flatten them out first.
problem_scores = full_scores.iloc[:, full_scores.columns.get_level_values(1) == 'total']
problem_scores.columns = problem_scores.columns.get_level_values(0)
# Exclude empty columns from statistics
problem_scores = problem_scores.loc[:, ~(problem_scores == 0).all()]

with orm.db_session:
    display.display_html(display.HTML('<h1>Statistics for {}<h1>'.format(db.Exam[exam_id].name)))

warnings.simplefilter('ignore')

## At a glance

In [None]:
seaborn.set()
seaborn.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})

cm = matplotlib.cm.magma
# define the bins and normalize
bounds = np.linspace(0, 1, 21)
norm = matplotlib.colors.BoundaryNorm(bounds, cm.N)


vals = [problem_scores[i].value_counts(normalize=True).sort_index().cumsum() for i in problem_scores]
data = np.array([(-i, upper-lower, lower, num/val.index[-1]) for i, val in enumerate(vals) 
                 for num, upper, lower in zip(val.index, val.data, [0] + list(val.data[:-1]))]).T
fig = pyplot.figure(figsize=(12, 9))
ax = fig.add_subplot(1, 1, 1)
ax.barh(data[0], data[1], data[3]/2 + .1, data[2], color=cm(norm(data[3])), align='edge')
ax.set_yticks(np.arange(0, -len(problem_scores.columns), -1));
ax.set_yticklabels(problem_scores.columns);
ax.set_xlabel('percentile')
ax.set_xlim(-0.025, 1.025)
sm = matplotlib.cm.ScalarMappable(cmap=cm, norm=norm)
sm._A = []
colorbar = fig.colorbar(sm)
colorbar.set_ticks(np.linspace(0, 1, 11))
colorbar.set_label('score percentage')

## Correlations between different problems.

In [None]:
problem_scores.fillna(0).corr().round(2)

## Lenghty description of all possible feedback options and how often they were received

In [None]:
feedback_template = jinja2.Template("""<ul>
{% for fo in feedback_options %}
<li> {{ fo.text }}: {{ fo.description }} </li>
{% endfor %}
</ul>
""")

stats = ''

for problem in db.Exam[exam_id].problems.order_by(lambda p: p.name):
    if not orm.max(problem.feedback_options.score, default=0):
        continue
    df = pandas.DataFrame({fo.text: (fo.solutions.count(), fo.score) 
                           for fo in problem.feedback_options if fo.solutions.count()}, 
                          index=['amount', 'score']).T.fillna(0).astype(int)
    df.index.name = "Feedback"
    stats += '<h3>' + problem.name + '</h3>'
    stats += '<h4>Feedback frequencies and scores</h4>'
    stats += df._repr_html_()
    stats += '<hr><h4>Descriptions</h4>'
    stats += feedback_template.render(feedback_options=(fo for fo in problem.feedback_options
                                                        if fo.solutions.count()))


stats = display.HTML(stats)
stats