In [1]:
import pandas as pd
import numpy as np
import doctest
import CleanData as cd
import BokehMaker as magicPlots
import sqlite3

# import sys
# stdout = sys.stdout
# reload(sys)
# sys.setdefaultencoding('utf-8')
# sys.stdout = stdout

## TODO:
* Take data from all Roman Imperial era
    * Create SQLite db to store data
* 'Radiate crown' in Augustian Era

In [2]:
title = 'AugustusCoins_44BC-14AD'
cnx = sqlite3.connect('../Data/'+title+'.sqlite')
cursor = cnx.cursor()

# Read in Data

In [11]:
bad_locations = ['Alexandria', 'Cos', 'Apamea', 'Philippi', 'Thrace', 
                 'Achulla', 'Arabia', 'Orthosia', 'Sebaste', 'Prymnessus', 
                 'Acmonea', 'Antiochia', 'Eucarpea', 'Chalkis', 'Apollonia', 
                 'Midaeum', 'Magnetes', 'Heraclea Salbace', '']

placeholder= '?'
placeholders= ', '.join(placeholder for _ in bad_locations)
query= '''SELECT denomination, mint FROM britishMuseum 
            WHERE mint NOT IN (%s) AND 
            denomination <> "?"''' % placeholders
cursor.execute(query, bad_locations)
cols = ['denomination', 'mint']
rows = cursor.fetchall()
mints_bm = pd.DataFrame(rows, columns=cols)

In [13]:
mints_bm.head()

Unnamed: 0,denomination,mint
0,denarius,Lugdunum
1,as,Calagurris
2,as,Calagurris
3,as,Turiaso
4,as,Turiaso


In [15]:
bad_locations = ['', 'uncertain', 'Uncertain value', 'Bilbilis|Italica']
placeholder= '?'
placeholders= ', '.join(placeholder for _ in bad_locations)
query= '''SELECT denomination, mint FROM americanNumismaticSociety 
            WHERE mint NOT IN (%s) AND 
            denomination <> ""''' % placeholders
cursor.execute(query, bad_locations)
cols = ['denomination', 'mint']
rows = cursor.fetchall()
mints_ans = pd.DataFrame(rows, columns=cols)

In [None]:
mints_ans

# Plot Data
* Plot all data together in plots
* Split up data and plot seperately

### All data
* Make stacked bar plot
* Make map plot

In [None]:
from bokeh.io import output_notebook, save
from bokeh.plotting import show
from bokeh.models import Range1d, HoverTool
from bokeh.palettes import linear_palette, viridis, grey

In [None]:
#output_notebook()

In [None]:
cleaned = mints_bm

location_bar_plot = magicPlots.makeStackedBar(cleaned, 'mint', 'denomination', sort_bars=True,
                               bars_ascending=False, sort_stacks=True, stacks_agg='sum', stacks_ascending=False,
                              colors=viridis, title='Number of coins produced from each location')

location_bar_plot.yaxis.axis_label='Location Counts'
location_bar_plot.y_range = Range1d(0, 500, bounds=(0, 500))
location_bar_plot.legend.location = 'top_right'
location_bar_plot.add_tools(HoverTool(tooltips=[('Denomination', '@Denomination'), 
                                                ('Denomination Count', '@height'),
                                                ('Location Count', '@Sum')]))

save(location_bar_plot, filename='../Plots/location_bar.html')
show(location_bar_plot)

In [None]:
from bokeh.io import save, show
import pygeoj
from pyproj import Proj, transform

In [None]:
#output_notebook()

In [None]:
location_counts = cd.prepareDataframeForMapping(mints_bm, col_name='Production place')

location_map_plot = magicPlots.makeMap(location_counts, 'Production place', 'Count', x_ranges=(-2.0e6, 5e6), 
                                        y_ranges=(3.5e6, 7e6), mintsFile='../GeoJSON/mints.geojson', 
                                       path='../GeoJSON/', ext='html', pt_size=lambda x: 5 * np.log(3 * x),
                                      colors_ascending=False)

save(location_map_plot, filename='../Plots/location_map.html')
show(location_map_plot)

### Individual time periods and subjects

In [None]:
from bokeh.layouts import gridplot

In [None]:
def coinsFromDates(df, date_range, col_name='Date'):
    '''
    Parameters
    ----------
    df : Pandas dataframe
        Dataframe containing coins and dates
    date_range : tuple
        Tuple of length two containing date range
    col_names : str
        Column name of dates
        
    Return
    ------
    Returns a dataframe containing only the rows that have the correct dates
    '''
    begin = date_range[0]
    end = date_range[1]
    def intWithinTupleRange(tup):
        in_range = False
        if len(tup) == 1:
            if tup[0] >= begin and tup[0]<= end:
                in_range = True
        elif len(tup) == 2:
            if tup[0] >= begin and tup[1]<= end:
                in_range = False
        return in_range
    return df[df.apply(lambda x: intWithinTupleRange(x[col_name]), axis=1)]


def containKeyword(df, keys, col_names):
    '''
    Parameters
    ----------
    df : Pandas dataframe
        Dataframe containing coincs and column to look for keyword in
    keys : list
        list of strings to look for in each row of given column
    col_name : list
        list of columns of where to search for keyword in 'keys' list, respectively
    
    Return
    ------
    Returns a dataframe containing only rows that have the keyword in the given column
    '''
    def containIn(obj, key):
        contained = False
        cleaned_key = key.lower()
        
        if type(obj) == str:
            if cleaned_key in obj.lower():
                return True
        elif type(obj) == tuple or type(obj) == list:
            for item in obj:
                if cleaned_key in item.lower():
                    contained = True
                    break
                    
        return contained
    
    if len(keys) != len(col_names):
        raise ValueError('length of keys does not equal length of columns')
        
    try:
        result = df[df.apply(lambda x: containIn(x[col_names[0]], keys[0]), axis=1)]
        df = df.drop(df[df.apply(lambda x: containIn(x[col_names[0]], keys[0]), axis=1)], axis=1)
    except:
        raise ValueError('Missing keys')
        
    for i, key in enumerate(keys):
        if key == keys[0]:
            pass
        else:
            result.append(df[df.apply(lambda x: containIn(x[col_names[i]], key), axis=1)])
            df = df.drop(df[df.apply(lambda x: containIn(x[col_names[i]], key), axis=1)], axis=1)
            
    return result


def makeTitle(dates, subject):
    '''
    Parameters
    ----------
    dates : tuple
        tuple of length two with date range
    subject : list
        list of strings of the subjects
        
    Return
    ------
    Returns a string of an appropriate title
    
    Doctest
    -------
    >>> makeTitle([-44, -31], ['star'])
    "'Star' in coinage from 44BC to 31BC"
    '''
    result = ''
    str_dates = []
    for i, sub in enumerate(subject):
        result += "'" + sub[0].upper() + sub[1:] + "'"
        if len(subject) > 1:
            if i != len(subject)-1:
                result += ', '
            if i == len(subject) - 2:
                result += 'and '
                
    result += ' in coinage from '
    
    for date in dates:
        if date < 0:
            str_dates.append(str(abs(date)) + 'BC')
        else:
            str_dates.append(str(date) + 'BC')
    
    result += str_dates[0] + ' to ' + str_dates[1]
    return result

makeTitle([-44, -31], ['star'])

In [None]:
mints_with_desc = mints_bm[mints_bm['Description'] != '']
date_ranges = [(-44, -31), (-30, -27), (-27, -19), (-18, -16), (-15, -11), (-10, 13)]
subjects = [(['star'], ['Description']), (['statue of Augustus'], ['Description']), 
            (['capricorn', 'globe', 'rudder'], ['Description' for _ in range(3)]), 
            (['Julius Caesar'], ['Associated names']), 
            (['Apollo', 'lyre'], ['Description', 'Description'])]
good_dfs = {}
grid_plots = []

for dates in date_ranges:
    for subject in subjects:
        plot_title = makeTitle(dates, subject[0])
        contain_dates = coinsFromDates(mints_with_desc, dates)
        good_df = containKeyword(contain_dates, subject[0], subject[1])
        
        if not good_df.empty:
            good_dfs[plot_title] = good_df

            bar_plot = magicPlots.makeStackedBar(good_df, 'Production place', 'Denomination', sort_bars=True,
                                   bars_ascending=False, sort_stacks=True, stacks_agg='sum', stacks_ascending=False,
                                  colors=viridis, title=plot_title+' Stacked Bar Graph', plot_size=(480, 480))
            bar_plot.yaxis.axis_label='Location Counts'
            bar_plot.add_tools(HoverTool(tooltips=[('Denomination', '@Denomination'), 
                                                            ('Denomination Count', '@height'),
                                                            ('Location Count', '@Sum')]))

            map_counts = cd.prepareDataframeForMapping(good_df)
            map_plot = magicPlots.makeMap(map_counts, 'Production place', 'Count', x_ranges=(-2.0e6, 5e6), 
                                y_ranges=(3.5e6, 7e6), path='../GeoJSON/', ext='html', pt_size=lambda x: 10 * x,
                                 mintsFile='../GeoJSON/mints.geojson', title=plot_title+' Map', colors_ascending=False)
            grid_plots.append([bar_plot, map_plot])

grid = gridplot(grid_plots)
#save(grid, filename='../Plots/Many plots')
show(grid)

In [None]:
subjects = [(['radiate'], ['Description'])]
grid_plots = []
good_dfs = {}

for subject in subjects:
    plot_title = makeTitle(dates, subject[0])
    good_df = containKeyword(mints_with_desc, subject[0], subject[1])

    if not good_df.empty:
        good_dfs[plot_title] = good_df

        bar_plot = magicPlots.makeStackedBar(good_df, 'Production place', 'Denomination', sort_bars=True,
                               bars_ascending=False, sort_stacks=True, stacks_agg='sum', stacks_ascending=False,
                              colors=viridis, title=plot_title+' Stacked Bar Graph', plot_size=(480, 480))
        bar_plot.yaxis.axis_label='Location Counts'
        bar_plot.add_tools(HoverTool(tooltips=[('Denomination', '@Denomination'), 
                                                        ('Denomination Count', '@height'),
                                                        ('Location Count', '@Sum')]))

        map_counts = cd.prepareDataframeForMapping(good_df)
        map_plot = magicPlots.makeMap(map_counts, 'Production place', 'Count', x_ranges=(-2.0e6, 5e6), 
                            y_ranges=(3.5e6, 7e6), mintsFile='../GeoJSON/mints.geojson', path='../GeoJSON/', ext='html', 
                             title=plot_title+' Map', colors_ascending=False, pt_size=lambda x: 5 * np.log(3 * x))
        grid_plots.append([bar_plot, map_plot])

grid = gridplot(grid_plots)
#save(grid, filename='../Plots/Many plots')
show(grid)