In [1]:
from collections import Counter
from itertools import chain

import pandas as pd
import numpy as np
import sqlite3
import re

from bkcharts import Bar, cat
from bokeh.io import output_notebook, save
from bokeh.models import HoverTool
from bokeh.palettes import viridis, PRGn
from bokeh.plotting import show
from scipy import stats

# import CleanData as cd
# import BokehPlots as bp
# import CoinagePlots as cp

In [2]:
title = 'Roman_Imperial_Coinage'
fname = '../Data/' + title + '.sqlite'
conn = sqlite3.connect(fname)

# Functions

## Get Coin Counts Data

In [3]:
def countsDF(source, column, conn):
    '''
    Get counts of the coins by emperor in SOURCE 
    '''
    query = '''
    SELECT
      authority,
      {},
      count({}) AS counts
    FROM {}
    GROUP BY authority, material
    ORDER BY startDate, endDate, counts DESC;
    '''.format(column, column, source, column)
    df = pd.read_sql(query, conn)
    return df

In [4]:
def radiateCountsDF(source, column, conn):
    '''
    Get dataframe of the counts for the occurence of 'radiate' crowns by emperor and denomination
    '''
    query = '''
    SELECT
      authority,
      {},
      count({}) AS counts
    FROM {}
    WHERE description LIKE '%radiate%'
    GROUP BY authority, material
    ORDER BY startDate, endDate, counts DESC;
    '''.format(column, column, source, column)
    df = pd.read_sql(query, conn)
    return df

In [5]:
def radiateRatiosDF(source, conn):
    '''
    Get dataframe of the ratio for the occurence of 'radiate' crowns by emperor 
    and denomination over total coins from emperor
    '''
    query = '''
    WITH emperorCoins AS (
      SELECT
        authority,
        description,
        denomination,
        startDate,
        endDate
      FROM {}
    ), emperorTotals AS (
      SELECT
        authority,
        count(description)  AS emperorCounts
      FROM emperorCoins
      GROUP BY authority
    )
    SELECT
      authority,
      count(description) * 1.0 / emperorCounts   AS keywordRatio
    FROM
      emperorCoins JOIN emperorTotals USING (authority)
    WHERE description LIKE '%radiate%'
    GROUP BY authority
    ORDER BY startDate, endDate;
    '''.format(source)
    df = pd.read_sql(query, conn)
    return df

## Generate Coin Counts Plots

In [6]:
def countsPlot(source, column, conn):
    '''
    Create counts of the coins by emperor in SOURCE plot
    '''
    df = countsDF(source, column, conn)
    counts = Bar(df, label=cat(columns="authority", sort=False), palette=viridis(df[column].unique().size), 
                    values='counts', stack=column, responsive=True, active_scroll='wheel_zoom', 
                     title="Count of coins by Authority in {} Plot".format(source))

    hover_counts = HoverTool(tooltips=[
                                ("authority", "@authority"),
                                ("denomination", "@denomination"),
                                ("count", "@counts")
                            ])

    counts.add_tools(hover_counts)
    counts.xaxis.axis_label = "Authority"
    counts.yaxis.axis_label = "Counts"

    save(counts, "../Plots/{}_{}_counts_plot.html".format(source, column))
    
    return counts

In [7]:
def radiateCountsPlot(source, column, conn):
    '''
    Create plot of the count of coins that have 'radiate' crowns in them by emperor
    '''
    df = radiateCountsDF(source, column, conn)
    counts = Bar(df, label=cat(columns="authority", sort=False), palette=viridis(df[column].unique().size), 
                    values='counts', stack=column, responsive=True, active_scroll='wheel_zoom', 
                     title="Count of 'Radiate Crowns' per Authority in " + source + " Plot")

    hover_counts = HoverTool(tooltips=[
                                ("authority", "@authority"),
                                ("denomination", "@denomination"),
                                ("count", "@counts")
                            ])

    counts.add_tools(hover_counts)
    counts.xaxis.axis_label = "Authority"
    counts.yaxis.axis_label = "Counts"

    save(counts, "../Plots/" + source + "_" + column + "_radiate_counts_plot.html")
    
    return counts

In [8]:
def radiateRatiosPlot(source, conn):
    '''
    Plot the ratio for the occurence of 'radiate' crowns by emperor 
    over total number of coins by emperor
    '''
    df = radiateRatiosDF(source, conn)
    ratios = Bar(df, label=cat(columns="authority", sort=False), values='keywordRatio', 
                 responsive=True, active_scroll='wheel_zoom', legend=False,
                 title="Ratio of 'Radiate Crowns' by Authority in " + source + " Plot")

    hover_counts = HoverTool(tooltips=[
                                ("authority", "@authority"),
                                ("Percentage", "@percentages")
                            ])

    ratios.add_tools(hover_counts)
    ratios.xaxis.axis_label = "Authority"
    ratios.yaxis.axis_label = "Percentages"

    save(ratios, "../Plots/" + source + "_" + column + "_ratios_plot.html")
    
    return ratios

## Create Word Counts Plots

In [14]:
def wordCountsPlot(source, conn, col="Word", wanted_obverse=[], wanted_reverse=[]):
    '''
    Get the count of words that appear on "radiate" coins from SOURCE.
    
    Parameters
    ----------
    source : str
        Table name in database
    conn : sqlite3 connection
        Connection to sqlite3 database
    col : str
        Column to plot by
    wanted_obverse : str list
        List of strings of words wanted on the obverse plot
    wanted_reverse : str list
        List of strings of words wanted on the reverse plot
    '''
    if col == 'Word':
        query = '''
        SELECT
          lower(obverseType) as obverseType,
          lower(reverseType) as reverseType
        FROM {}
        WHERE description LIKE "%radiate%"
        '''.format(source)
    else:
        query = '''
        SELECT
          lower(obverseType) as obverseType,
          lower(reverseType) as reverseType,
          {}
        FROM {}
        WHERE description LIKE "%radiate%"
        '''.format(col, source)
    df = pd.read_sql(query, conn)
    obverse = pd.DataFrame()
    reverse = pd.DataFrame()
    
    if col == 'Word':
        colVals = ['Word']
    else:
        colVals = df[col].unique()
    
    for colVal in colVals:
        # Get the word lists
        if col != 'Word':
            tempDF = df[df[col] == colVal]
        else:
            tempDF = df
        
        obverseList = [re.sub('[^a-zA-Z\s]+', '', a).split() for a in tempDF['obverseType']]
        reverseList = [re.sub('[^a-zA-Z\s]+', '', a).split() for a in tempDF['reverseType']]

        # Create Counts
        obverseCounter = dict(Counter(chain.from_iterable(obverseList)))
        reverseCounter = dict(Counter(chain.from_iterable(reverseList)))

        # Create dataframes with word and counts
        obverseTemp = pd.DataFrame(list(obverseCounter.items()), columns=['Word', 'Count'])
        if wanted_obverse:
            obverseTemp = obverseTemp[obverseTemp['Word'].isin(wanted_obverse)]
        else:
            obverseTemp = obverseTemp[obverseTemp['Count'] > 100]
        if col != 'Word':
            obverseTemp[col] = colVal
        obverseTemp = obverseTemp.fillna(0)
        obverseTemp = obverseTemp.sort_values(by='Count', ascending=False)
        
        reverseTemp = pd.DataFrame(list(reverseCounter.items()), columns=['Word', 'Count'])
        if wanted_reverse:
            reverseTemp = reverseTemp[reverseTemp['Word'].isin(wanted_reverse)]
        else:
            reverseTemp = reverseTemp[(reverseTemp['Count'] > 50) & (reverseTemp['Count'] < 100)]
        if col != 'Word':
            reverseTemp[col] = colVal
        reverseTemp = reverseTemp.fillna(0)
        reverseTemp = reverseTemp.sort_values(by='Count', ascending=False)
        
        # merge with total values
        obverse = pd.concat([obverse, obverseTemp])
        reverse = pd.concat([reverse, reverseTemp])
        
    hover_obverseCounts = HoverTool(tooltips=[
                                ("Word", "@Word"),
                                ("Count", "@Count")
                            ])
    hover_reverseCounts = HoverTool(tooltips=[
                                ("Word", "@Word"),
                                ("Count", "@Count")
                            ])

    if col == 'Word':
        obverseCounts = Bar(obverse, label=cat(columns=["Word"], sort=False), values='Count', 
                             responsive=True, active_scroll='wheel_zoom', legend='top_right',
                             title="Other Words on Obverse of 'radiate' Coins from {}".format(source))
    else:
        obverseCounts = Bar(obverse, label=cat(columns=[col], sort=False), values='Count', 
                             responsive=True, active_scroll='wheel_zoom', legend='top_right', stack='Word',
                             title="Other Words on Obverse of 'radiate' Coins from {} by {}".format(source, col))
    obverseCounts.add_tools(hover_obverseCounts)
    obverseCounts.yaxis.axis_label = "Count"
    
    if col == 'Word':
        reverseCounts = Bar(reverse, label=cat(columns=["Word"], sort=False), values='Count', 
                             responsive=True, active_scroll='wheel_zoom', legend='top_right',
                             title="Other Words on Reverse of 'radiate' Coins from {}".format(source))
    else:
        reverseCounts = Bar(reverse, label=cat(columns=[col], sort=False), values='Count', 
                             responsive=True, active_scroll='wheel_zoom', legend='top_right', stack='Word',
                             title="Other Words on Reverse of 'radiate' Coins from {} by {}".format(source, col))
    reverseCounts.add_tools(hover_reverseCounts)
    reverseCounts.yaxis.axis_label = "Count"
    
    #show(obverseCounts)
    #show(reverseCounts)
    
    save(obverseCounts, "../Plots/" + source + "_" + col + "_obverseCount_plot.html")
    save(reverseCounts, "../Plots/" + source + "_" + col + "_reverseCount_plot.html")
    
#wordCountsPlot('britishMuseum', conn, wanted_obverse=wanted_obverse)

In [11]:
def wordCountsOverTimePlot(source, conn, obverse, reverse):
    '''
    Get the counts of OBVERSE on obverse and REVERSE on reverse from SOURCE.
    '''
    query = '''
    SELECT
      lower(obverseType) as obverseType,
      lower(reverseType) as reverseType,
      authority,
      startDate,
      endDate
    FROM {}
    WHERE 
      obverseType LIKE "%{}%" AND reverseType LIKE "%{}%"
    '''.format(source, obverse, reverse)
    df = pd.read_sql(query, conn)
    df = df.sort_values(by=['startDate', 'endDate'], ascending=False)
    
    hover = HoverTool(tooltips=[
                        ("Word", "@Word"),
                        ("Count", "@Count")
                    ])
    
    reverseCounts = Bar(df, label=cat(columns=['authority'], sort=False), values='reverseType', 
                         responsive=True, active_scroll='wheel_zoom', legend='top_right', agg='count',
                         title="'{}' on Obverse and '{}' on Reverse Coins from {} by Authority".format(obverse, reverse, source))
    reverseCounts.add_tools(hover)
    reverseCounts.yaxis.axis_label = "Count"
    
    #show(reverseCounts)
    
    save(reverseCounts, "../Plots/" + source + "_" + obverse + "_" + reverse + "_Count_plot.html")

#wordCountsOverTimePlot('britishMuseum', conn, 'radiate', 'jupiter')

# Create Counts and Ratios Plots

In [17]:
# Table names
sources = ['britishMuseum', 'americanNumismaticSociety', 'OCRE', 'allData']

# Columns to plot individually
columns = ["material"]

# Words to show on word count plots
wanted_obverse = ['scepter',
                 'cuirassed',
                 'draped',
                 'laureate']
wanted_reverse = ['victory',
                 'jupiter',
                 'sol',
                 'pax',
                 'cornucopia',
                 'emperor',
                 'felicitas',
                 'roma',
                 'hercules',
                 'captive',
                 'eagel',
                 'trophy',
                 'mars',
                 'prince',
                 'providentia',
                 'rudder',
                 'whip',
                 'club']

In [16]:
# Loop over sources to get all permutations of sources and plots
for source in sources:
    for column in columns:
        counts = countsPlot(source, column, conn)
        # show(counts)
        radiateCounts = radiateCountsPlot(source, column, conn)
        # show(radiateCounts)
        ratios = radiateRatiosPlot(source, conn)
        # show(ratios)
        wordCounts = wordCountsPlot(source, conn, wanted_obverse=wanted_obverse, wanted_reverse=wanted_reverse)

          Word  Count
165     draped   1294
118  cuirassed   1276
85    laureate    154


In [18]:
obverses = ['radiate']
reverses = ['jupiter', 'sol', 'victory']

for source in sources:
    for obverse in obverses:
        for reverse in reverses:
            reverseCounts = wordCountsOverTimePlot(source, conn, obverse, reverse)

                                          obverseType  \
37  radiate bust of licinius, right, draped and cu...   
41  bust of licinius i, radiate, draped and cuiras...   
26  bust of maximinus ii, radiate, draped & cuiras...   
38  bust of constantius i, radiate, draped & cuira...   
29  bust of diocletian, radiate, draped and cuiras...   
28  bust of diocletian, radiate, draped and cuiras...   
27  bust of galerius, radiate, draped & cuirassed, r.   
23  bust of maximian, radiate, cuirassed,facing right   
36  radiate draped and cuirassed bust (viewed from...   
33  radiate, draped and cuirassed bust of diocleti...   
20  radiate, draped and cuirassed bust of carinus,...   
21     bust of numerian, radiate, draped,facing right   
22       bust of probus, radiate, draped,facing right   
32     radiate and cuirassed bust of aurelian, right.   
40    bust of aurelian, radiate and cuirassed, right.   
35  radiate and cuirassed bust of claudius ii, right.   
19  radiate bust of postumus, d

# Statistical Significance of Data

In [None]:
bm_ratio = radiateRatiosDF('britishMuseum', conn)
ans_ratio = radiateRatiosDF('americanNumismaticSociety', conn)
merged = bm_ratio.merge(ans_ratio, on='authority')

In [None]:
merged.tail()

In [None]:
stats.linregress(merged['keywordRatio_x'], merged['keywordRatio_y'])