In [2]:
import pandas as pd
import numpy as np
import sqlite3

from bkcharts import Bar, cat
from bokeh.io import output_notebook, save
from bokeh.models import HoverTool
from bokeh.palettes import viridis
from bokeh.plotting import show
from scipy import stats

# import CleanData as cd
# import BokehPlots as bp
# import CoinagePlots as cp

In [3]:
title = 'Roman_Imperial_Coinage'
fname = '../Data/' + title + '.sqlite'
conn = sqlite3.connect(fname)

# Functions

## Get Data

In [9]:
def countsDF(source, conn):
    '''
    Get counts of the coins by emperor in SOURCE 
    '''
    query = '''
    SELECT
      emperor,
      materials as denomination,
      count(materials) AS counts
    FROM
      {} JOIN emperors ON
        startDate >= emperors.start
        AND startDate <= emperors.end
    WHERE description LIKE '%radiate%'
    GROUP BY emperor, materials
    ORDER BY start, end, counts DESC;
    '''.format(source)
    df = pd.read_sql(query, conn)
    return df

In [10]:
def radiateCountsDF(source, conn):
    '''
    Get dataframe of the counts for the occurence of 'radiate' crowns by emperor and denomination
    '''
    query = '''
    SELECT
      emperor,
      denomination,
      count(denomination) AS counts
    FROM
      {} JOIN emperors ON
        startDate >= emperors.start
        AND startDate <= emperors.end
    WHERE description LIKE '%radiate%'
    GROUP BY emperor, denomination
    ORDER BY start, end, counts DESC;
    '''.format(source)
    df = pd.read_sql(query, conn)
    return df

In [11]:
def radiateRatiosDF(source, conn):
    '''
    Get dataframe of the ratio for the occurence of 'radiate' crowns by emperor 
    and denomination over total coins from emperor
    '''
    query = '''
    WITH emperorCoins AS (
      SELECT
        emperor,
        description,
        denomination,
        start,
        end
      FROM
        {} JOIN emperors ON
          startDate >= emperors.start
          AND startDate <= emperors.end
    ), emperorTotals AS (
      SELECT
        emperor,
        count(description)  AS emperorCounts
      FROM emperorCoins
      GROUP BY emperor
    )
    SELECT
      emperor,
      count(description) * 1.0 / emperorCounts   AS keywordRatio
    FROM
      emperorCoins JOIN emperorTotals USING (emperor)
    WHERE description LIKE '%radiate%'
    GROUP BY emperor
    ORDER BY start, end;
    '''.format(source)
    df = pd.read_sql(query, conn)
    return df

## Generate Plots

In [12]:
def countsPlot(source, conn):
    '''
    Create counts of the coins by emperor in SOURCE plot
    '''
    df = countsDF(source, conn)
    counts = Bar(df, label=cat(columns="emperor", sort=False), palette=viridis(df.denomination.unique().size), 
                    values='counts', stack="denomination", responsive=True, active_scroll='wheel_zoom', 
                     title="Count of coins by Emperor in {} Plot".format(source))

    hover_counts = HoverTool(tooltips=[
                                ("emperor", "@emperor"),
                                ("denomination", "@denomination"),
                                ("count", "@counts")
                            ])

    counts.add_tools(hover_counts)
    counts.xaxis.axis_label = "Emperors"
    counts.yaxis.axis_label = "Counts"

    save(counts, "../Plots/{}_counts_plot.html".format(source))
    
    return counts

In [13]:
def radiateCountsPlot(source, conn):
    '''
    Create plot of the count of coins that have 'radiate' crowns in them by emperor
    '''
    df = radiateCountsDF(source, conn)
    counts = Bar(df, label=cat(columns="emperor", sort=False), palette=viridis(df.denomination.unique().size), 
                    values='counts', stack="denomination", responsive=True, active_scroll='wheel_zoom', 
                     title="Count of 'Radiate Crowns' per Emperor in " + source + " Plot")

    hover_counts = HoverTool(tooltips=[
                                ("emperor", "@emperor"),
                                ("denomination", "@denomination"),
                                ("count", "@counts")
                            ])

    counts.add_tools(hover_counts)
    counts.xaxis.axis_label = "Emperors"
    counts.yaxis.axis_label = "Counts"

    save(counts, "../Plots/" + source + "_radiate_counts_plot.html")
    
    return counts

In [14]:
def radiateRatiosPlot(source, conn):
    '''
    Plot the ratio for the occurence of 'radiate' crowns by emperor 
    over total number of coins by emperor
    '''
    df = radiateRatiosDF(source, conn)
    ratios = Bar(df, label=cat(columns="emperor", sort=False), values='keywordRatio', 
                 responsive=True, active_scroll='wheel_zoom', legend=False,
                 title="Ratio of 'Radiate Crowns' by Emperor in " + source + " Plot")

    hover_counts = HoverTool(tooltips=[
                                ("emperor", "@emperor"),
                                ("Percentage", "@percentages")
                            ])

    ratios.add_tools(hover_counts)
    ratios.xaxis.axis_label = "Emperors"
    ratios.yaxis.axis_label = "Percentages"

    save(ratios, "../Plots/" + source + "_ratios_plot.html")
    
    return ratios

# Create Counts and Ratios Plots

In [15]:
sources = ['britishMuseum', 'americanNumismaticSociety', 'OCRE', 'allData']
for source in sources:
    counts = countsPlot(source, conn)
    # show(counts)
    radiateCounts = radiateCountsPlot(source, conn)
    # show(radiateCounts)
    ratios = radiateRatiosPlot(source, conn)
    # show(ratios)



DatabaseError: Execution failed on sql '
    SELECT
      emperor,
      materials as denomination,
      count(materials) AS counts
    FROM
      americanNumismaticSociety JOIN emperors ON
        startDate >= emperors.start
        AND startDate <= emperors.end
    WHERE description LIKE '%radiate%'
    GROUP BY emperor, materials
    ORDER BY start, end, counts DESC;
    ': no such column: materials

In [42]:
denominations = []
sources = ['britishMuseum', 'americanNumismaticSociety', 'OCRE']
for source in sources:
    denominations.append(list(countsDF(source, conn)['denomination'].unique()))
denominations = [x for denoms in denominations for x in denoms]

In [43]:
set(denominations)

{'?',
 'ae',
 'ae halfunit',
 'ae large',
 'ae medium',
 'ae or ae',
 'ae small',
 'ae unit',
 'antoninianus',
 'argenteus',
 'as',
 'as  asdupondius',
 'as  assarion semis  hemiassarion',
 'as  dupondius',
 'as assarion',
 'as cut half',
 'as irregular',
 'as struck on sestertius size flan',
 'as subferratus',
 'aureus',
 'aureus bracteate',
 'aureus double',
 'aureus festaureus of  solidi',
 'aureus or denarius',
 'aureus quarter',
 'barbarous radiate',
 'br',
 'cast',
 'chalkous',
 'cistophorus',
 'contorniate',
 'denarius',
 'denarius carausian laureate silver',
 'denarius irregular',
 'denarius probably laureate carausian silver',
 'dichalkon',
 'didrachm',
 'diobol',
 'double',
 'double aureus',
 'double maiorina',
 'double sestertius',
 'drachm',
 'drachma',
 'dupondius',
 'dupondius double',
 'dupondius or as',
 'dupondius or as dupondius more likely',
 'dupondius or as medallic',
 'dupondius or as possibly',
 'dupondius or as probably',
 'dupondius or as probably small medalli

# Statistical Significance of Data

In [10]:
bm_ratio = radiateRatiosDF('britishMuseum', conn)
ans_ratio = radiateRatiosDF('americanNumismaticSociety', conn)
merged = bm_ratio.merge(ans_ratio, on='emperor')

In [11]:
merged.tail()

Unnamed: 0,emperor,keywordRatio_x,keywordRatio_y
49,Severus II,0.00813,0.008333
50,Maxentius,0.016327,0.011268
51,Constantine the Great,0.020356,0.037863
52,Licinius I,0.033259,0.067402
53,Maximinus II,0.032258,0.058824


In [12]:
stats.linregress(merged['keywordRatio_x'], merged['keywordRatio_y'])

LinregressResult(slope=0.90108812630976232, intercept=0.076982647557691658, rvalue=0.49722822964661706, pvalue=0.00013080012302330908, stderr=0.21804155158865832)

In [13]:
merged = merged.merge(hoxne_ratio, on='emperor')

NameError: name 'hoxne_ratio' is not defined

In [None]:
merged.tail()