In [1]:
from collections import Counter
from itertools import chain

import pandas as pd
import numpy as np
import sqlite3
import re

from bkcharts import Bar, cat
from bokeh.io import output_notebook, save
from bokeh.models import HoverTool
from bokeh.palettes import viridis, PRGn
from bokeh.plotting import show
from scipy import stats

# import CleanData as cd
# import BokehPlots as bp
# import CoinagePlots as cp

In [2]:
title = 'Roman_Imperial_Coinage'
fname = '../Data/' + title + '.sqlite'
conn = sqlite3.connect(fname)

# Functions

## Get Coin Counts Data

In [3]:
def countsDF(source, column, conn):
    '''
    Get counts of the coins by emperor in SOURCE 
    '''
    query = '''
    SELECT
      authority,
      {},
      count({}) AS counts
    FROM {}
    GROUP BY authority, material
    ORDER BY startDate, endDate, counts DESC;
    '''.format(column, column, source, column)
    df = pd.read_sql(query, conn)
    return df

In [4]:
def radiateCountsDF(source, column, conn):
    '''
    Get dataframe of the counts for the occurence of 'radiate' crowns by emperor and denomination
    '''
    query = '''
    SELECT
      authority,
      {},
      count({}) AS counts
    FROM {}
    WHERE description LIKE '%radiate%'
    GROUP BY authority, material
    ORDER BY startDate, endDate, counts DESC;
    '''.format(column, column, source, column)
    df = pd.read_sql(query, conn)
    return df

In [5]:
def radiateRatiosDF(source, conn):
    '''
    Get dataframe of the ratio for the occurence of 'radiate' crowns by emperor 
    and denomination over total coins from emperor
    '''
    query = '''
    WITH emperorCoins AS (
      SELECT
        authority,
        description,
        denomination,
        startDate,
        endDate
      FROM {}
    ), emperorTotals AS (
      SELECT
        authority,
        count(description)  AS emperorCounts
      FROM emperorCoins
      GROUP BY authority
    )
    SELECT
      authority,
      count(description) * 1.0 / emperorCounts   AS keywordRatio
    FROM
      emperorCoins JOIN emperorTotals USING (authority)
    WHERE description LIKE '%radiate%'
    GROUP BY authority
    ORDER BY startDate, endDate;
    '''.format(source)
    df = pd.read_sql(query, conn)
    return df

## Generate Coin Counts Plots

In [6]:
def countsPlot(source, column, conn):
    '''
    Create counts of the coins by emperor in SOURCE plot
    '''
    df = countsDF(source, column, conn)
    counts = Bar(df, label=cat(columns="authority", sort=False), palette=viridis(df[column].unique().size), 
                    values='counts', stack=column, responsive=True, active_scroll='wheel_zoom', 
                     title="Count of coins by Authority in {} Plot".format(source))

    hover_counts = HoverTool(tooltips=[
                                ("authority", "@authority"),
                                ("denomination", "@denomination"),
                                ("count", "@counts")
                            ])

    counts.add_tools(hover_counts)
    counts.xaxis.axis_label = "Authority"
    counts.yaxis.axis_label = "Counts"

    save(counts, "../Plots/{}_{}_counts_plot.html".format(source, column))
    
    return counts

In [7]:
def radiateCountsPlot(source, column, conn):
    '''
    Create plot of the count of coins that have 'radiate' crowns in them by emperor
    '''
    df = radiateCountsDF(source, column, conn)
    counts = Bar(df, label=cat(columns="authority", sort=False), palette=viridis(df[column].unique().size), 
                    values='counts', stack=column, responsive=True, active_scroll='wheel_zoom', 
                     title="Count of 'Radiate Crowns' per Authority in " + source + " Plot")

    hover_counts = HoverTool(tooltips=[
                                ("authority", "@authority"),
                                ("denomination", "@denomination"),
                                ("count", "@counts")
                            ])

    counts.add_tools(hover_counts)
    counts.xaxis.axis_label = "Authority"
    counts.yaxis.axis_label = "Counts"

    save(counts, "../Plots/" + source + "_" + column + "_radiate_counts_plot.html")
    
    return counts

In [8]:
def radiateRatiosPlot(source, conn):
    '''
    Plot the ratio for the occurence of 'radiate' crowns by emperor 
    over total number of coins by emperor
    '''
    df = radiateRatiosDF(source, conn)
    ratios = Bar(df, label=cat(columns="authority", sort=False), values='keywordRatio', 
                 responsive=True, active_scroll='wheel_zoom', legend=False,
                 title="Ratio of 'Radiate Crowns' by Authority in " + source + " Plot")

    hover_counts = HoverTool(tooltips=[
                                ("authority", "@authority"),
                                ("Percentage", "@percentages")
                            ])

    ratios.add_tools(hover_counts)
    ratios.xaxis.axis_label = "Authority"
    ratios.yaxis.axis_label = "Percentages"

    save(ratios, "../Plots/" + source + "_" + column + "_ratios_plot.html")
    
    return ratios

## Create Word Counts Plots

In [9]:
def wordCountsPlot(source, conn, wanted_obverse=[], wanted_reverse=[]):
    '''
    Get the count of words that appear on "radiate" coins from SOURCE.
    '''
    query = '''
    SELECT
      lower(obverseType) as obverseType,
      lower(reverseType) as reverseType
    FROM {}
    WHERE description LIKE "%radiate%"
    '''.format(source)
    df = pd.read_sql(query, conn)
    
    obverseList = [re.sub('[^a-zA-Z\s]+', '', a).split() for a in df['obverseType']]
    reverseList = [re.sub('[^a-zA-Z\s]+', '', a).split() for a in df['reverseType']]
    
    obverseCounter = dict(Counter(chain.from_iterable(obverseList)))
    reverseCounter = dict(Counter(chain.from_iterable(reverseList)))
    
    obverseTemp = pd.DataFrame(list(obverseCounter.items()), columns=['Word', 'Count'])
    if wanted_obverse:
        obverseTemp = obverseTemp[obverseTemp['Word'].isin(wanted_obverse)]
    else:
        obverseTemp = obverseTemp[obverseTemp['Count'] > 100]
    obverseTemp['Side'] = 'Obverse'
    obverseTemp = obverseTemp.fillna(0)
    obverseTemp = obverseTemp.sort_values(by='Count', ascending=False)
    
    reverseTemp = pd.DataFrame(list(reverseCounter.items()), columns=['Word', 'Count'])
    if wanted_reverse:
        reverseTemp = reverseTemp[reverseTemp['Word'].isin(wanted_reverse)]
    else:
        reverseTemp = reverseTemp[(reverseTemp['Count'] > 50) & (reverseTemp['Count'] < 100)]
    reverseTemp['Side'] = 'Reverse'
    reverseTemp = reverseTemp.fillna(0)
    reverseTemp = reverseTemp.sort_values(by='Count', ascending=False)
    
    hover_obverseCounts = HoverTool(tooltips=[
                                ("Word", "@Word"),
                                ("Count", "@Count")
                            ])
    hover_reverseCounts = HoverTool(tooltips=[
                                ("Word", "@Word"),
                                ("Count", "@Count")
                            ])
    
    obverseCounts = Bar(obverseTemp, label=cat(columns=["Word"], sort=False), values='Count', 
                         responsive=True, active_scroll='wheel_zoom', legend='top_right',
                         title="Other Words on Obverse of 'radiate' Coins from {}".format(source))
    obverseCounts.add_tools(hover_obverseCounts)
    obverseCounts.yaxis.axis_label = "Count"
    
    reverseCounts = Bar(reverseTemp, label=cat(columns=["Word"], sort=False), values='Count', 
                         responsive=True, active_scroll='wheel_zoom', legend='top_right',
                         title="Other Words on Reverse of 'radiate' Coins from {}".format(source))
    reverseCounts.add_tools(hover_reverseCounts)
    reverseCounts.yaxis.axis_label = "Count"
    
    #show(obverseCounts)
    #show(reverseCounts)
    
    save(obverseCounts, "../Plots/" + source + "_" + column + "_obverseCount_plot.html")
    save(reverseCounts, "../Plots/" + source + "_" + column + "_reverseCount_plot.html")
    
#wordCountsPlot('allData', conn, wanted_reverse=wanted_reverse)

# Create Counts and Ratios Plots

In [10]:
# Table names
sources = ['britishMuseum', 'americanNumismaticSociety', 'OCRE', 'allData']

# Columns to plot individually
columns = ["material"]

# Words to show on word count plots
wanted_obverse = ['scepter',
                 'cuirassed',
                 'draped',
                 'laureate']
wanted_reverse = ['victory',
                 'jupiter',
                 'sol',
                 'pax',
                 'cornucopia',
                 'emperor',
                 'felicitas',
                 'roma',
                 'hercules',
                 'captive',
                 'eagel',
                 'trophy',
                 'mars',
                 'prince',
                 'providentia',
                 'rudder',
                 'whip',
                 'club']

In [11]:
# Loop over sources to get all permutations of sources and plots
for source in sources:
    for column in columns:
        counts = countsPlot(source, column, conn)
        # show(counts)
        radiateCounts = radiateCountsPlot(source, column, conn)
        # show(radiateCounts)
        ratios = radiateRatiosPlot(source, conn)
        # show(ratios)
        wordCounts = wordCountsPlot(source, conn, wanted_obverse=wanted_obverse, wanted_reverse=wanted_reverse)



# Statistical Significance of Data

In [12]:
bm_ratio = radiateRatiosDF('britishMuseum', conn)
ans_ratio = radiateRatiosDF('americanNumismaticSociety', conn)
merged = bm_ratio.merge(ans_ratio, on='emperor')

KeyError: 'emperor'

In [None]:
merged.tail()

In [None]:
stats.linregress(merged['keywordRatio_x'], merged['keywordRatio_y'])