In [1]:
import pandas as pd
import numpy as np
import doctest
import CleanBMData as cleanBM

import sys
stdout = sys.stdout
reload(sys)
sys.setdefaultencoding('utf-8')
sys.stdout = stdout

## TODO:
* Make function to create:
    * ~~Stacked bar graph~~
    * Map
* Take data from American Numismatic Society
* Look at following types of coins for location, denomination, material, and subject:
    * seperate 44-31, 30-27, 27-19, 18-16, 15-11 (ALL BCE) and 10BC-13AD
        * Star of Julius (Sidus Iulium) 
        * Capricorn and/or globe and/or rudder
        * Secular games
        * Julius Caesar
        * Alpollo and/or lyre
        * Statue of Augustus
* Manually select colors for stacked bar graph to avoid repeated colors next to each other
* Predictive value of subjects, material, inscription
    * Predict what will have laurels

# Read in Data

In [2]:
df = pd.read_csv('AugustusCoins_44BC-14AD.csv',
                 converters={"Authority": cleanBM.stringToList(), 'Associated names': cleanBM.stringToList(), 
                             'Subjects': cleanBM.stringToList(), 'Inscriptions': cleanBM.stringToListofDicts('|', ';', ':')})
df = df.drop('Unnamed: 0', 1)
df = df.replace(np.nan, '', regex=True)
df.head()

Unnamed: 0,Associated names,Authority,Bibliography,Culture/period,Curator's comments,Date,Denomination,Description,Inscriptions,Materials,Museum number,Object type,Production place,State,Subjects,Weight (g),url
0,[Portrait of: Augustus (Octavian)],[Ruler: Augustus (Octavian)],RPC1 1801,,,27BC-14,,Bronze coin.(obverse) Head of Augustus r. (rev...,"[{u'Inscription Position': u'reverse', u'Inscr...",bronze,19971206.1,coin,"Minted in: Odessus (Europe,Balkans,Bulgaria,Va...",Roman Empire,"[symbol, emperor/empress]",7.45,http://www.britishmuseum.org/research/collecti...
1,[Portrait of: Augustus (Octavian)],[Ruler: Augustus (Octavian)],RPC1 5476,Roman Provincial,,27BC-14,,Bronze coin.(obverse) Head of Augustus r. (rev...,"[{u'Inscription Script': u'Latin', u'Inscripti...",bronze,20010335.3,coin,,Roman Empire,"[symbol, emperor/empress]",2.27,http://www.britishmuseum.org/research/collecti...
2,[Named in inscription & portrayed: Julius Caes...,[Ruler: Augustus (Octavian)],RPC1 2007,Roman Provincial,,31BC-14,,"Alloy coin.(obverse) Diademed head of Caesar, ...","[{u'Inscription Position': u'reverse', u'Inscr...",alloy,G.1200,coin,"Minted in: Apamea (Asia,Turkey,Marmara Region,...",Roman Empire,[emperor/empress],8.36,http://www.britishmuseum.org/research/collecti...
3,[],[Ruler: Augustus (Octavian) (?)],,Greek,,27BC-14,,Alloy coin.,[{}],alloy,19051111.1,coin,"Minted in: Cremna (?) (Asia,Turkey,Mediterrane...",,[],1.49,http://www.britishmuseum.org/research/collecti...
4,[Representation of: Augustus (Octavian)],[],Walker & Higgs 2001 308 Gem 3396,Roman Republican,The portrait probably dates from the later 30s...,44BC-40BC,,Seal of glass paste imitating sard: with a bus...,[{}],glass,19230401.928,seal,,,[],,http://www.britishmuseum.org/research/collecti...


In [3]:
cleanBM.stringToListofDicts('|', ';', ':')('1:one;2:two|1:I;2:II')

[{'1': 'one', '2': 'two'}, {'1': 'I', '2': 'II'}]

# Clean Data

In [4]:
mask = ((df['Object type'] == 'coin ') & (df['Date'].str.find('stC') == -1) &
        (df['Denomination'] != '') & (df['Production place'] != '') &
        (df['Bibliography'] != '') & (df['Weight (g)'] != 0))
filtered = df[mask]
cleaning = pd.DataFrame()

lists = ['Authority', 'Subjects', 'Associated names']
strings = ['Museum number', 'Denomination', 'Description', 'State', 'Culture/period', 'Materials', 
            'Curator\'s comments', 'Bibliography', 'Object type']
floats = ['Weight (g)']
dates = ['Date']
redundant_notes = ['Production place', 'Denomination']
do_nothing = ['url', 'Inscriptions']

cleaning['Production place'] = filtered['Production place'].apply(cleanBM.cleanProductionPlace)
for lst in lists:
    cleaning[lst] = filtered[lst].apply(cleanBM.cleanList)
for string in strings:
    cleaning[string] = filtered[string].apply(cleanBM.cleanString)
for flot in floats:
    cleaning[flot] = filtered[flot].apply(cleanBM.float_conversion).replace(np.nan, -1)
for date in dates:
    cleaning[date] = filtered[date].apply(cleanBM.dateRange)
for col in redundant_notes:
    cleaning[col] = cleaning[col].apply(cleanBM.removeNotes)
for col in do_nothing:
    cleaning[col] = filtered[col]
 
cleaning = cleaning.reindex_axis(sorted(cleaning.columns), axis=1)

duplicate_cols = ['Authority', 'Date', 'Production place', 'Description', 'Subjects', "Curator's comments"]
removed_dup = (cleaning.drop_duplicates(subset=duplicate_cols)
                        .reset_index(drop=True))
cleaned = removed_dup[(removed_dup['Production place'] != 'Gaul')] #too vague
cleaned = cleaned[(cleaned['Denomination'] != 'unit')]
cleaned.set_value(387, 'Production place', 'Lugdunum')
cleaned.tail()

Unnamed: 0,Associated names,Authority,Bibliography,Culture/period,Curator's comments,Date,Denomination,Description,Inscriptions,Materials,Museum number,Object type,Production place,State,Subjects,Weight (g),url
693,"(Augustus (Octavian), Nike/Victoria/Victory)","(Augustus (Octavian),)","RIC1 263, p.60 RR2 4342, p.12 RE1 616, p.101",Roman Imperial,Octavian's IMP CAESAR coinage was a celebratio...,"(-32, -29)",denarius,"Silver coin.(obverse) Victory, draped, standin...","[{u'Inscription Content': u'CAESAR DIVI F', u'...",silver,R.6163,coin,Italy,Roman Empire,"(charioteer/chariot, allegory/personification,...",3.78,http://www.britishmuseum.org/research/collecti...
694,"(Mark Antony, Octavia, Augustus (Octavian))","(M Oppius Capito, Mark Antony)","RPC1 1463 (type) RR2 154, p.518",Roman Republican,,"(-38, -37)",tressis,Copper alloy coin.(obverse) Busts of M. Antoni...,[{u'Inscription Content': u'[M·ANT·IMP·TERT·CO...,copper alloy,18600328.250,coin,Achaea,Roman Republic,"(politician/statesman, emperor/empress, boat/s...",21.51,http://www.britishmuseum.org/research/collecti...
695,"(Marcus Aemilius Lepidus, Augustus (Octavian))","(Marcus Aemilius Lepidus,)","Ghey, Leins & Crawford 2010 495.2.6 RRC 495/2a...",Roman Republican,Die appears to read IMA (ligatured) instead of...,"(-42,)",denarius,Silver coin.(obverse) Head of M. Lepidus right...,[{u'Inscription Content': u'LEPIDVS·PONT·MAX·I...,silver,20114027.3,coin,Italy,Roman Republic,"(politician/statesman, emperor/empress)",3.56,http://www.britishmuseum.org/research/collecti...
696,"(Augustus (Octavian), Eros/Cupid, Aphrodite/Ve...","(P Clodius,)","RR1 4277, p.583 Ghey, Leins & Crawford 2010 49...",Roman Republican,,"(-42,)",aureus,Gold coin; pierced for suspension.(obverse) He...,[{u'Inscription Content': u'C·CAESAR·III·VIR·R...,gold,18520903.11,coin,Rome,Roman Republic,"(cherub/cupid, politician/statesman, classical...",8.09,http://www.britishmuseum.org/research/collecti...
697,"(Augustus (Octavian), Tyche/Fortuna)","(Ti Sempronius Gracchus,)","RR1 4313, p.593 Ghey, Leins & Crawford 2010 52...",Roman Republican,,"(-40,)",aureus,"Gold coin.(obverse) Head of Octavian right, be...","[{u'Inscription Content': u'IIII·VIR·Q·D', u'I...",gold,18440425.473,coin,Rome,Roman Republic,"(politician/statesman, allegory/personificatio...",7.96,http://www.britishmuseum.org/research/collecti...


# Plot Data

In [5]:
from bokeh.io import output_notebook, save
from bokeh.plotting import show
from bokeh.models import Range1d, HoverTool
from bokeh.palettes import linear_palette, viridis

In [6]:
output_notebook()

In [7]:
import BokehMaker as magicPlots


location_bar_plot = magicPlots.makeStackedBar(cleaned, 'Production place', 'Denomination', sort_bars=True,
                               bars_ascending=False, sort_stacks=True, stacks_agg='sum', stacks_ascending=False,
                              colors=viridis, title='Number of coins produced from each location')

location_bar_plot.yaxis.axis_label='Location Counts'
location_bar_plot.y_range = Range1d(0, 200, bounds=(0, 200))
location_bar_plot.legend.location = 'top_right'
location_bar_plot.add_tools(HoverTool(tooltips=[('Denomination', '@Denomination'), 
                                                ('Denomination Count', '@height'),
                                                ('Location Count', '@Sum')]))

save(location_bar_plot, filename='location_bar.html')
show(location_bar_plot)

In [None]:
from bokeh.io import save, show
from bokeh.models import GeoJSONDataSource, Circle, Legend, HoverTool, ColumnDataSource
from bokeh.plotting import figure
from bokeh.tile_providers import STAMEN_TERRAIN
from bokeh.models.glyphs import Patches
from collections import OrderedDict
import json
import pygeoj
from pyproj import Proj, transform
from io import StringIO

In [None]:
output_notebook()

In [None]:
locations = cleaned['Production place'].unique()
location_map = pd.DataFrame(columns=['Production_place', 'Count', 'Color', 'is_point', 
                                     'point_xs', 'point_ys', 'patch_xs', 'patch_ys', 'Size'],
                            index=range(len(locations)))
counts = location_bar['Sum'].unique()
txt_locations = ['Asia', 'Spain', 'England', 'Crete', 'Jerusalem', 'Masicytes']
json_locations = ['Italy', 'Syria']
colors = ["#1f0a11","#1b2110","#0e2420","#36201c","#363022","#1b3a3f","#2c3a27","#5b4138",
            "#424f42","#666554","#7a5e54","#4e6e74","#71867a","#957b7b","#a49186","#96a998",
            "#90b3bc","#cdb6ab","#c3ded7","#eae5d2"]
row = 0

from_proj = Proj(init="epsg:4326")
to_proj = Proj(init="epsg:3857")

for loc in locations:  
    point_xs = 0
    point_ys = 0
    patch_xs = []
    patch_ys = []
    is_point = False
    
    # Make file name of location GeoJSON file
    if loc in txt_locations:
        fname = str(loc) + '.txt'
    elif loc in json_locations:
        fname = str(loc) + '.json'
    else:
        fname = str(loc) + '.html'
    
    # Get coordinates of the location
    data = pygeoj.load(filepath='GeoJSON/'+fname)
    coors = data[0].geometry.coordinates
    if len(coors) == 2:
        is_point = True
        x, y = transform(from_proj, to_proj, coors[0], coors[1])
        point_xs = x
        point_ys = y
    elif len(coors) == 1:
        for lst in coors[0]:
            x, y = transform(from_proj, to_proj, lst[0], lst[1])
            patch_xs.append(x)
            patch_ys.append(y)
    else:
        for lst in coors:
            for sublst in lst[0]:
                x, y = transform(from_proj, to_proj, sublst[0], sublst[1])
                patch_xs.append(x)
                patch_ys.append(y)
    
    # Get count of coins produced from location
    count = location_bar[location_bar['Production place'] == loc]['Sum'].mean()
    
    # Size of point
    size = 5 * np.log(3 * count)
    
    location_map.iloc[row]['Production_place'] = loc
    location_map.iloc[row]['Count'] = count
    location_map.iloc[row]['Color'] = colors[19-np.where(counts == count)[0][0]]
    location_map.iloc[row]['is_point'] = is_point
    location_map.iloc[row]['point_xs'] = point_xs
    location_map.iloc[row]['point_ys'] = point_ys
    location_map.iloc[row]['Size'] = size
    location_map.iloc[row]['patch_xs'] = patch_xs
    location_map.iloc[row]['patch_ys'] = patch_ys
    row += 1

location_map.head()

In [None]:
# Seperate patches and points to seperate dataframes
df_patches = location_map[(location_map['is_point'] == False)]
df_points = location_map[location_map['is_point'] == True]

# Convert patches and points to Bokeh databases
source_patches = ColumnDataSource(df_patches)
source_points = ColumnDataSource(data=df_points)

# Create points and patches graph objects
patches = Patches(xs="patch_xs", ys="patch_ys", fill_color="Color",
                      fill_alpha=0.8, line_color="Color", line_width=0.5)
points = Circle(x="point_xs", y="point_ys", size='Size', fill_color="Color", fill_alpha=.9)

# Create plot with map tile
location_map_plot = figure(plot_width=1000, plot_height=480,
                           active_scroll='wheel_zoom',
                           x_range=(-1.0e6, 4.5e6), y_range=(5e6, 6e6))
location_map_plot.add_tile(STAMEN_TERRAIN)

# Add points and patches objects to graph
location_map_plot.add_glyph(source_patches, patches)
location_map_plot.add_glyph(source_points, points)

# Add tooltips
location_map_plot.add_tools(HoverTool())
hover = location_map_plot.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
    ('Production place', '@Production_place'),
    ("Count", "@Count")
    ])

save(location_map_plot, filename='location_map.html')
show(location_map_plot)