In [1]:
import pandas as pd
import numpy as np
import doctest
import CleanBMData as cleanBM
import BokehMaker as magicPlots

import sys
stdout = sys.stdout
reload(sys)
sys.setdefaultencoding('utf-8')
sys.stdout = stdout

## TODO:
* Make function to create:
    * ~~Stacked bar graph~~
    * ~~Map~~
* Take data from [American Numismatic Society](http://numismatics.org/search/apis)
* Look at following types of coins for location, denomination, material, and subject:
    * seperate 44-31, 30-27, 27-19, 18-16, 15-11 (ALL BCE) and 10BC-13AD
        * ~~Star of Julius (Sidus Iulium)~~
        * ~~Capricorn and globe and rudder~~
        * Secular games
        * ~~Julius Caesar~~
        * ~~Apollo and lyre~~
        * ~~Statue of Augustus~~
* ~~Manually select colors for stacked bar graph to avoid repeated colors next to each other~~
* Predictive value of subjects, material, inscription
    * Predict what will have laurels

# Read in Data
Read in the data from the British Museum data scraper script with proper column names

In [2]:
bm_df = pd.read_csv('../Data/AugustusCoins_44BC-14AD.csv',
                 converters={"Authority": cleanBM.stringToList(), 'Associated names': cleanBM.stringToList(), 
                             'Subjects': cleanBM.stringToList(), 'Inscriptions': cleanBM.stringToListofDicts('|', ';', ':')})
bm_df = pd.concat([bm_df, cleanBM.cleanInscriptions(bm_df)], axis=1)
bm_df = bm_df.drop('Unnamed: 0', 1)
bm_df = bm_df.replace(np.nan, '', regex=True)
bm_df.iloc[435].Description

'Copper alloy coin.(obverse) Head of Augustus, laureate, right. (reverse) Bull, right. '

In [3]:
def makeDescription(material, manufacture, obverse, reverse):
    '''
    '''
    result = manufacture + ' ' + material + '.'
    if obverse:
        result += ' (obverse) ' + obverse + '.'
    if reverse:
        result += ' (reverse) ' + reverse + '.'
    return result

In [4]:
ans_df = pd.read_csv('../Data/ANS_query.csv',
                    converters={"Authority": cleanBM.stringToList('|'), 'Year': cleanBM.stringToList('|'),
                               'Issuer': cleanBM.stringToList('|'), 'Portrait': cleanBM.stringToList('|'),
                               'Reference': cleanBM.stringToList('|')})
ans_df = ans_df.replace(np.nan, '', regex=True)
ans_df['Description'] = ans_df.apply(lambda row: makeDescription(row['Material'], row['Manufacture'], row['Obverse Type'], row['Reverse Type']), axis=1)
ans_df.tail()

Unnamed: 0,URI,Title,RecordId,Authority,Coin Type URI,Date on Object,Degree,Deity,Denomination,Department,...,Object Type,Portrait,Reference,Region,Reverse Legend,Reverse Type,Weight,Year,Date Record Modified,Description
995,http://numismatics.org/search/id/1944.100.65544,"Bronze Coin, Antioch, 27 BC - AD 14. 1944.100....",1944.100.65544,[Augustus],,,,,,Greek,...,Coin,[],[],Syria,,SC within wreath,13.89,"[-26, 14]",2015-06-18T19:23:45Z,Bronze. (obverse) Augustus hd. laur. r.. (rev...
996,http://numismatics.org/search/id/1944.100.65545,"Bronze Coin, Antioch, 44 BC - AD 14. 1944.100....",1944.100.65545,[Augustus],,,,,,Greek,...,Coin,[],[],Syria,,SC within wreath,14.33,"[-43, 14]",2015-06-18T19:23:45Z,Bronze. (obverse) Augustus hd. laur. r.. (rev...
997,http://numismatics.org/search/id/1944.100.65546,"Bronze Coin, Antioch, 44 BC - AD 14. 1944.100....",1944.100.65546,[Augustus],,,,,,Greek,...,Coin,[],[],Syria,,SC within wreath,14.33,"[-43, 14]",2015-06-18T19:23:45Z,Bronze. (obverse) Augustus hd. laur. r.. (rev...
998,http://numismatics.org/search/id/1944.100.65547,"Bronze Coin, Antioch, 44 BC - AD 14. 1944.100....",1944.100.65547,[Augustus],,,,,,Greek,...,Coin,[],[],Syria,,SC within wreath,8.39,"[-43, 14]",2015-06-18T19:23:45Z,Bronze. (obverse) Augustus hd. laur. r.. (rev...
999,http://numismatics.org/search/id/1944.100.65548,"Bronze Coin, Antioch, 44 BC - AD 14. 1944.100....",1944.100.65548,[Augustus],,,,,,Greek,...,Coin,[],[],Syria,,SC within wreath,9.01,"[-43, 14]",2015-06-18T19:23:45Z,Bronze. (obverse) Augustus hd. laur. r.. (rev...


# Clean Data
* Convert columns to proper data types
* Remove coins that do not have enough data
* Remove duplicates

In [5]:
mask = ((bm_df['Object type'] == 'coin ') & (bm_df['Date'].str.find('stC') == -1))
filtered = bm_df[mask]

lists = ['Authority', 'Subjects', 'Associated names']
strings = ['Museum number', 'Denomination', 'Description', 'State', 'Culture/period', 'Materials', 
            'Curator\'s comments', 'Bibliography', 'Object type', 'Obverse legend', 'Reverse legend',
           "Production place"]
floats = ['Weight (g)']
dates = ['Date']
redundant_notes = ['Production place', 'Denomination']
do_nothing = ['url', 'Inscriptions']
duplicate_cols = ['Authority', 'Date', 'Production place', 'Description', 'Subjects', "Curator's comments",
                  'Obverse legend', 'Reverse legend']

cleaned_bm = cleanBM.cleanDF(filtered, lists, strings, floats, dates, redundant_notes, do_nothing, duplicate_cols)
cleaned_bm.tail()

Unnamed: 0,Associated names,Authority,Bibliography,Culture/period,Curator's comments,Date,Denomination,Description,Inscriptions,Materials,Museum number,Object type,Obverse legend,Production place,Reverse legend,State,Subjects,Weight (g),url
1527,"(Mark Antony, Octavia, Augustus (Octavian))","(M Oppius Capito, Mark Antony)","RPC1 1463 (type) RR2 154, p.518",Roman Republican,,"(-38, -37)",tressis,Copper alloy coin.(obverse) Busts of M. Antoni...,[{u'Inscription Content': u'[M·ANT·IMP·TERT·CO...,copper alloy,18600328.250,coin,[M·ANT·IMP·TERT·COS·DESIG·ITER·ET·TER·III·VIR·...,Achaea,Г [M·OPPIVS·CAPITO·PRO·PR·PRAEF·CLASS·F·C],Roman Republic,"(politician/statesman, emperor/empress, boat/s...",21.51,http://www.britishmuseum.org/research/collecti...
1528,"(Mark Antony, Augustus (Octavian))","(Mark Antony, M Barbatius Pollio)","Ghey, Leins & Crawford 2010 517.2.12 RRC 517/2...",Roman Republican,"Mint uncertain, moving with Mark Antony.Not th...","(-41,)",denarius,Silver coin.(obverse) Head of M. Antonius righ...,[{u'Inscription Content': u'M·ANT·IMP·AVG·III·...,silver,"1932,0706.24.3",coin,M·ANT·IMP·AVG·III·VIR·R·P·C·M·BARBAT·Q·P,,CAESAR·IMP·PONT·III·VIR·R·P·C,Roman Republic,"(politician/statesman, emperor/empress)",3.39,http://www.britishmuseum.org/research/collecti...
1529,"(Marcus Aemilius Lepidus, Augustus (Octavian))","(Marcus Aemilius Lepidus,)","Ghey, Leins & Crawford 2010 495.2.6 RRC 495/2a...",Roman Republican,Die appears to read IMA (ligatured) instead of...,"(-42,)",denarius,Silver coin.(obverse) Head of M. Lepidus right...,[{u'Inscription Content': u'LEPIDVS·PONT·MAX·I...,silver,20114027.3,coin,LEPIDVS·PONT·MAX·III·V·R·P·C,Italy,CAESAR·IMA·III·VIR·R·P·C,Roman Republic,"(politician/statesman, emperor/empress)",3.56,http://www.britishmuseum.org/research/collecti...
1530,"(Augustus (Octavian), Eros/Cupid, Aphrodite/Ve...","(P Clodius,)","RR1 4277, p.583 Ghey, Leins & Crawford 2010 49...",Roman Republican,,"(-42,)",aureus,Gold coin; pierced for suspension.(obverse) He...,[{u'Inscription Content': u'C·CAESAR·III·VIR·R...,gold,18520903.11,coin,C·CAESAR·III·VIR·R·P·C,Rome,P·CLODIVS·M·F·IIII·VIR·A·P·F,Roman Republic,"(cherub/cupid, politician/statesman, classical...",8.09,http://www.britishmuseum.org/research/collecti...
1531,"(Augustus (Octavian), Tyche/Fortuna)","(Ti Sempronius Gracchus,)","RR1 4313, p.593 Ghey, Leins & Crawford 2010 52...",Roman Republican,,"(-40,)",aureus,"Gold coin.(obverse) Head of Octavian right, be...","[{u'Inscription Content': u'IIII·VIR·Q·D', u'I...",gold,18440425.473,coin,DIVI IVLI·F,Rome,IIII·VIR·Q·D TI·SEMPRON·GRACCVS,Roman Republic,"(politician/statesman, allegory/personificatio...",7.96,http://www.britishmuseum.org/research/collecti...


In [6]:
lists = ['Year', 'Authority', 'Issuer', 'Portrait', 'Reference']
strings = ['Deity', 'Denomination', 'Mint', 'Description', 'Obverse Legend', 'Reverse Legend']
floats = ['Weight']
dates = []
redundant_notes = []
do_nothing = ['URI']
duplicate_cols = ['Issuer', 'Year', 'Mint', 'Deity', 'Description', 'Obverse Legend', 'Reverse Legend']

cleaned_ans = cleanBM.cleanDF(ans_df, lists, strings, floats, dates, redundant_notes, do_nothing, 
                              duplicate_cols, production_place='Mint')
cleaned_ans.to_csv('test.csv', encoding='utf-8')
cleaned_ans.tail()

Unnamed: 0,Authority,Deity,Denomination,Description,Issuer,Mint,Obverse Legend,Portrait,Reference,Reverse Legend,URI,Weight,Year
487,"(Augustus,)",,?,Bronze. (obverse) Augustus hd. r.. (reverse) t...,"(,)",Cyzicus,,"(,)","(RPC.2244,)",,http://numismatics.org/search/id/0000.999.18176,3.98,"(-26, 14)"
488,"(Augustus,)",,?,Bronze. (obverse) Augustus hd. laur. r.. (reve...,"(,)",Antioch,,"(,)","(,)",,http://numismatics.org/search/id/0000.999.26811,16.32,"(-43, 14)"
489,"(Augustus,)",Zeus,?,. (obverse) Head of Augustus r.. (reverse) Zeu...,"(,)",Eumeneia,,"(,)","(RPC 3147,)",...LEON ...APHTO...,http://numismatics.org/search/id/1998.18.76,4.31,"(-26, 14)"
490,"(Augustus,)",,?,Bronze. (obverse) Augustus head r.. (reverse) ...,"(,)",Cnossus,,"(,)","(,)",,http://numismatics.org/search/id/1984.83.20,5.44,"(-26, 14)"
491,"(Augustus,)",,?,Bronze. (obverse) Augustus hd. laur. r.. (reve...,"(,)",Antioch,,"(,)","(,)",,http://numismatics.org/search/id/1944.100.65544,13.89,"(-26, 14)"


In [103]:
mints_bm = cleaned_bm[(cleaned_bm['Denomination'] != '') & (cleaned_bm['Production place'] != '') &
                      (cleaned_bm['Production place']!='Alexandria') & (cleaned_bm['Production place']!='Cos') &
                     (cleaned_bm['Production place']!='Apamea') & (cleaned_bm['Production place']!='Philippi') & 
                      (cleaned_bm['Production place']!='Thrace') & (cleaned_bm['Production place']!='Achulla') & 
                      (cleaned_bm['Production place']!='Arabia') & (cleaned_bm['Production place']!='Orthosia') & 
                      (cleaned_bm['Production place']!='Sebaste') & (cleaned_bm['Production place']!='Prymnessus') & 
                      (cleaned_bm['Production place']!='Acmonea') & (cleaned_bm['Production place']!='Antiochia') & 
                      (cleaned_bm['Production place']!='Eucarpea') & (cleaned_bm['Production place']!='Chalkis') & 
                      (cleaned_bm['Production place']!='Apollonia') & (cleaned_bm['Production place']!='Midaeum') & 
                      (cleaned_bm['Production place']!='Magnetes') & (cleaned_bm['Production place']!='Heraclea Salbace')]
mints_bm[mints_bm['Production place']=='Alexandria']

Unnamed: 0,Associated names,Authority,Bibliography,Culture/period,Curator's comments,Date,Denomination,Description,Inscriptions,Materials,Museum number,Object type,Obverse legend,Production place,Reverse legend,State,Subjects,Weight (g),url


In [8]:
mints_ans = cleaned_ans[(cleaned_ans['Mint'] != '') & (cleaned_ans['Mint'] != 'uncertain') & 
                        (cleaned_ans['Mint'] != 'Uncertain value') & (cleaned_ans['Denomination'] != '') & 
                       (cleaned_ans['Mint'] != 'Bilbilis|Italica')]
mints_ans[mints_ans.Mint == 'Alexandria']

Unnamed: 0,Authority,Deity,Denomination,Description,Issuer,Mint,Obverse Legend,Portrait,Reference,Reverse Legend,URI,Weight,Year


# Plot Data
* Plot all data together in plots
* Split up data and plot seperately

### All data
* Make stacked bar plot
* Make map plot

In [9]:
from bokeh.io import output_notebook, save
from bokeh.plotting import show
from bokeh.models import Range1d, HoverTool
from bokeh.palettes import linear_palette, viridis, grey

In [10]:
#output_notebook()

In [43]:
cleaned = mints_bm

location_bar_plot = magicPlots.makeStackedBar(cleaned, 'Production place', 'Denomination', sort_bars=True,
                               bars_ascending=False, sort_stacks=True, stacks_agg='sum', stacks_ascending=False,
                              colors=viridis, title='Number of coins produced from each location')

location_bar_plot.yaxis.axis_label='Location Counts'
location_bar_plot.y_range = Range1d(0, 500, bounds=(0, 500))
location_bar_plot.legend.location = 'top_right'
location_bar_plot.add_tools(HoverTool(tooltips=[('Denomination', '@Denomination'), 
                                                ('Denomination Count', '@height'),
                                                ('Location Count', '@Sum')]))

#save(location_bar_plot, filename='../Plots/location_bar.html')
show(location_bar_plot)

In [12]:
from bokeh.io import save, show
import pygeoj
from pyproj import Proj, transform

In [13]:
#output_notebook()

In [114]:
location_counts = cleanBM.prepareDataframeForMapping(mints_bm, col_name='Production place')

location_map_plot = magicPlots.makeMap(location_counts, 'Production place', 'Count', x_ranges=(-2.0e6, 5e6), 
                                        y_ranges=(3.5e6, 7e6), mintsFile='../GeoJSON/mints.geojson', 
                                       path='../GeoJSON/', ext='html', pt_size=lambda x: 5 * np.log(3 * x))

#save(location_map_plot, filename='../Plots/location_map.html')
show(location_map_plot)

### Individual time periods and subjects

In [None]:
from bokeh.layouts import gridplot

In [None]:
def coinsFromDates(df, date_range, col_name='Date'):
    '''
    Parameters
    ----------
    df : Pandas dataframe
        Dataframe containing coincs and dates
    date_range : tuple
        Tuple of length two containing date range
    col_names : str
        Column name of dates
        
    Return
    ------
    Returns a dataframe containing only the rows that have the correct dates
    '''
    begin = date_range[0]
    end = date_range[1]
    def intWithinTupleRange(tup):
        in_range = False
        if len(tup) == 1:
            if tup[0] >= begin and tup[0]<= end:
                in_range = True
        elif len(tup) == 2:
            if tup[0] >= begin and tup[1]<= end:
                in_range = False
        return in_range
    return df[df.apply(lambda x: intWithinTupleRange(x[col_name]), axis=1)]


def containKeyword(df, keys, col_names):
    '''
    Parameters
    ----------
    df : Pandas dataframe
        Dataframe containing coincs and column to look for keyword in
    keys : list
        list of strings to look for in each row of given column
    col_name : list
        list of columns of where to search for keyword in 'keys' list, respectively
    
    Return
    ------
    Returns a dataframe containing only rows that have the keyword in the given column
    '''
    def containIn(obj, key):
        contained = False
        cleaned_key = key.lower()
        
        if type(obj) == str:
            if cleaned_key in obj.lower():
                return True
        elif type(obj) == tuple or type(obj) == list:
            for item in obj:
                if cleaned_key in item.lower():
                    contained = True
                    break
                    
        return contained
    
    if len(keys) != len(col_names):
        raise ValueError('length of keys does not equal length of columns')
        
    try:
        result = df[df.apply(lambda x: containIn(x[col_names[0]], keys[0]), axis=1)]
        df = df.drop(df[df.apply(lambda x: containIn(x[col_names[0]], keys[0]), axis=1)], axis=1)
    except:
        raise ValueError('Missing keys')
        
    for i, key in enumerate(keys):
        if key == keys[0]:
            pass
        else:
            result.append(df[df.apply(lambda x: containIn(x[col_names[i]], key), axis=1)])
            df = df.drop(df[df.apply(lambda x: containIn(x[col_names[i]], key), axis=1)], axis=1)
            
    return result


def makeTitle(dates, subject):
    '''
    Parameters
    ----------
    dates : tuple
        tuple of length two with date range
    subject : list
        list of strings of the subjects
        
    Return
    ------
    Returns a string of an appropriate title
    
    Doctest
    -------
    >>> makeTitle([-44, -31], ['star'])
    "'Star' in coinage from 44BC to 31BC"
    '''
    result = ''
    str_dates = []
    for i, sub in enumerate(subject):
        result += "'" + sub[0].upper() + sub[1:] + "'"
        if len(subject) > 1:
            if i != len(subject)-1:
                result += ', '
            if i == len(subject) - 2:
                result += 'and '
                
    result += ' in coinage from '
    
    for date in dates:
        if date < 0:
            str_dates.append(str(abs(date)) + 'BC')
        else:
            str_dates.append(str(date) + 'BC')
    
    result += str_dates[0] + ' to ' + str_dates[1]
    return result

makeTitle([-44, -31], ['star'])

In [None]:
date_ranges = [(-44, -31), (-30, -27), (-27, -19), (-18, -16), (-15, -11), (-10, 13)]
subjects = [(['star'], ['Description']), (['statue of Augustus'], ['Description']), (['capricorn', 'globe', 'rudder'], ['Description' for _ in range(3)]), (['Julius Caesar'], ['Associated names']), (['Apollo', 'lyre'], ['Description', 'Description'])]
good_dfs = {}
bar_plots = []
map_plots = []

for dates in date_ranges:
    for subject in subjects:
        plot_title = makeTitle(dates, subject[0])
        contain_dates = coinsFromDates(cleaned, dates)
        good_df = containKeyword(contain_dates, subject[0], subject[1])
        
        if not good_df.empty:
            good_dfs[plot_title] = good_df

            bar_plot = magicPlots.makeStackedBar(good_df, 'Production place', 'Denomination', sort_bars=True,
                                   bars_ascending=False, sort_stacks=True, stacks_agg='sum', stacks_ascending=False,
                                  colors=viridis, title=plot_title+' Stacked Bar Graph', plot_size=(1000, 480))
            bar_plot.yaxis.axis_label='Location Counts'
            bar_plot.add_tools(HoverTool(tooltips=[('Denomination', '@Denomination'), 
                                                            ('Denomination Count', '@height'),
                                                            ('Location Count', '@Sum')]))
            bar_plots.append(bar_plot)
            
            map_counts = cleanBM.prepareDataframeForMapping(good_df)
            map_plots.append(magicPlots.makeMap(map_counts, 'Production place', 'Count', x_ranges=(-2.0e6, 5e6), 
                                y_ranges=(3.5e6, 7e6), path='../GeoJSON/', ext='html', pt_size=lambda x: 10 * x,
                                 title=plot_title+' Map'))

grid = gridplot([bar_plots, map_plots])
#save(grid, filename='../Plots/Many plots')
show(grid)