In [13]:
from sqlalchemy import types
from StringIO import StringIO
import sqlite3
import CleanData as cd
import numpy as np
import pandas as pd

# Clean Data

In [14]:
title = 'Roman_Imperial_Coinage'

### British Museum

In [15]:
# Read in data and convert to appropriate types
bm_df = pd.read_csv('../Data/BM_'+title+'.csv',
                 converters={"Authority": cd.stringToList(), 'Associated names': cd.stringToList(), 
                             'Subjects': cd.stringToList(), 'Inscriptions': cd.stringToListofDicts('|', ';', ':')})
bm_df = pd.concat([bm_df, cd.cleanInscriptions(bm_df)], axis=1)
bm_df = bm_df.drop('Unnamed: 0', 1)
bm_df = bm_df.replace(np.nan, '', regex=True)
bm_df = bm_df.rename(columns = {'Culture/period':'Culture'})

In [16]:
# Clean data to only retain relevant information
mask = ((bm_df['Object type'] == 'coin ') & (bm_df['Date'].str.find('stC') == -1))
filtered = bm_df[mask]

lists = ['Authority', 'Subjects', 'Associated names', 'Obverse legend', 'Reverse legend']
strings = ['Museum number', 'Denomination', 'Description', 'State', 'Culture', 'Materials', 
            'Curator\'s comments', 'Bibliography', 'Object type', "Production place"]
floats = ['Weight (g)']
dates = ['Date']
redundant_notes = ['Production place', 'Denomination']
do_nothing = ['url', 'Inscriptions']
duplicate_cols = ['Authority', 'Date', 'Production place', 'Description', 'Subjects', "Curator's comments",
                  'Obverse legend', 'Reverse legend']

cleaned_bm = cd.cleanDF(filtered, lists, strings, floats, dates, redundant_notes, do_nothing, duplicate_cols)
cleaned_bm = cleaned_bm.drop('Inscriptions', 1)
cleaned_bm['Source'] = 'British Museum'
#cleaned_bm.head()

In [17]:
# Convert everything to strings and integers to put into SQL db
lists = ['Authority', 'Subjects', 'Associated names', 'Obverse legend', 'Reverse legend', 'Date']
for col in lists:
    cleaned_bm[col] = cleaned_bm[col].apply(cd.listToString())

columns = [u'Associated names', u'Authority', u'Bibliography', u'Culture',
       u'Curator\'s comments', u'Date', u'Denomination', u'Description',
       u'Materials', u'Museum number', u'Object type', u'Obverse legend',
       u'Production place', u'Reverse legend', u'State', u'Subjects',
       u'url', u'Source']

for col in columns:
    cleaned_bm[col] = cleaned_bm[col].apply(lambda x: x.decode('utf-8'))

cleaned_bm.columns = ['associatedNames', 'authority', 'bibliography', 'culture',
                       'curatorComment', 'date', 'denomination', 'description',
                       'materials', 'museumNumber', 'objectType', 'obverseLegend',
                       'mint', 'reverseLegend', 'state', 'subjects',
                       'weight', 'url', 'source']

cleaned_bm['startDate'] = cleaned_bm.apply(lambda row: row['date'].split(",")[0], axis=1)
cleaned_bm['endDate'] = cleaned_bm.apply(lambda row: row['date'].split(",")[1] 
                                                 if len(row['date'].split(",")) > 1 
                                                 else row['date'].split(",")[0], 
                                         axis=1)

In [18]:
cleaned_bm.tail()

Unnamed: 0,associatedNames,authority,bibliography,culture,curatorComment,date,denomination,description,materials,museumNumber,...,obverseLegend,mint,reverseLegend,state,subjects,weight,url,source,startDate,endDate
863,Julian (the Apostate),Julian (the Apostate),HXN 1 161.2,Roman Imperial,,"360, 363",?,Silver coin.(obverse) Head of Julian r. (rever...,silver,19940401161.2,...,[...]C L IVLIA [...],Lyon,VOTIS V MVLTIS X,Roman Empire,"symbol, emperor/empress",1.07,http://www.britishmuseum.org/research/collecti...,British Museum,360,363
864,Julian (the Apostate),Julian (the Apostate),HXN 1 160.4,Roman Imperial,,"360, 363",?,Silver coin.(obverse) Head of Julian r. (rever...,silver,19940401160.4,...,[...]C L IVLIANVS P[...],Lyon,VOTIS V MVLTIS X,Roman Empire,"symbol, emperor/empress",1.2,http://www.britishmuseum.org/research/collecti...,British Museum,360,363
865,Julian (the Apostate),Julian (the Apostate),HXN 1 159.2,Roman Imperial,,"360, 363",?,Silver coin.(obverse) Head of Julian r. (rever...,silver,19940401159.2,...,D N C L IVL[...],Trier,VOTIS V MVLTIS X,Roman Empire,"symbol, emperor/empress",1.11,http://www.britishmuseum.org/research/collecti...,British Museum,360,363
866,Julian (the Apostate),Julian (the Apostate),HXN 1 158.4,Roman Imperial,,"360, 363",?,Silver coin.(obverse) Head of Julian r. (rever...,silver,19940401158.4,...,[...]C L IVLIANVS AVG,Trier,VOTIS V MVLTIS X,Roman Empire,"symbol, emperor/empress",1.32,http://www.britishmuseum.org/research/collecti...,British Museum,360,363
867,Julian (the Apostate),Julian (the Apostate),HXN 1 163.5,Roman Imperial,,"360, 363",?,Silver coin.(obverse) Head of Julian r. (rever...,silver,19940401163.5,...,ILLEGIBLE,Trier,VOTIS V MVLTIS X,Roman Empire,"symbol, emperor/empress",0.96,http://www.britishmuseum.org/research/collecti...,British Museum,360,363


### American Numismatic Society

In [49]:
ans_df = pd.read_csv('../Data/ANS_'+title+'.csv',
                    converters={"Authority": cd.stringToList('|'), 'Year': cd.stringToList('|'),
                               'Issuer': cd.stringToList('|'), 'Portrait': cd.stringToList('|'),
                               'Reference': cd.stringToList('|')})
ans_df = ans_df.replace(np.nan, '', regex=True)
ans_df['Description'] = ans_df.apply(lambda row: cd.makeDescription(
                                                                    row['Material'], 
                                                                    row['Manufacture'], 
                                                                    row['Obverse Type'], 
                                                                    row['Reverse Type']
                                                                   ), axis=1)
ans_df['DupCheck'] = ans_df.apply(lambda row: cd.makeDupCheckCol(
                                                                 row['Material'], 
                                                                 row['Denomination'], 
                                                                 row['Portrait'], 
                                                                 row['Mint'],
                                                                 row['Year']
                                                                ), axis=1)
ans_df.tail()

Unnamed: 0.1,Unnamed: 0,URI,Title,RecordId,Authority,Coin Type URI,Date on Object,Degree,Deity,Denomination,...,Region,Reverse Legend,Reverse Type,Weight,Year,Thumbnail_obv,Thumbnail_rev,Date Record Modified,Description,DupCheck
62310,62310,http://numismatics.org/collection/2017.11.7,"Billon tetradrachm, Alexandreia, AD 118 - AD 1...",2017.11.7,[Hadrian],,,,Tyche,tetradrachm,...,Egypt,L Γ,Tyche standing l. holding rudder and cornucopia.,12.73,"[118, 119]",http://numismatics.org/collectionimages/200020...,http://numismatics.org/collectionimages/200020...,2017-12-05T17:40:30Z,Billon. (obverse) Laureate bust r. fold of cl...,"Billon tetradrachm [''] Alexandreia ['118', '1..."
62311,62311,http://numismatics.org/collection/2017.11.8,"Billon Tetradrachm, AD 122 - AD 123. 2017.11.8",2017.11.8,[Hadrian],,,,Athena|Nike,Tetradrachm,...,,L ϛ,Athena standing l. holding Nikeand shield,12.71,"[122, 123]",http://numismatics.org/collectionimages/200020...,http://numismatics.org/collectionimages/200020...,2017-12-05T17:40:30Z,"Billon. (obverse) Laureate bust r., fold of c...","Billon Tetradrachm [''] ['122', '123']"
62312,62312,http://numismatics.org/collection/2017.11.9,"Bronze Coin, Alexandreia, AD 164 - AD 165. 201...",2017.11.9,"[Faustina II under Marcus Aurelius,]",,,,,,...,Egypt,L E,"Eagle with wings folded standing left, head r.",7.8,"[164, 165]",,,2017-12-05T17:40:30Z,Bronze. (obverse) Bust draped r.. (reverse) E...,"Bronze [''] Alexandreia ['164', '165']"
62313,62313,http://numismatics.org/collection/2015.8.48,"Bronze AE3, Constantinople, AD 378 - AD 383. 2...",2015.8.48,[Theodosius I],,,,,AE3,...,,,,2.502,"[378, 383]",,,2017-12-19T12:57:47Z,Bronze.,"Bronze AE3 [''] Constantinople ['378', '383']"
62314,62314,http://numismatics.org/collection/2017.34.1,"Gold Aureus of Antoninus Pius, Rome, AD 145 - ...",2017.34.1,[Antoninus Pius],http://numismatics.org/ocre/id/ric.3.ant.503Aa,,,,Aureus,...,Italy,CONCORDIA,Dove standing right,7.11,"[145, 161]",http://numismatics.org/collectionimages/200020...,http://numismatics.org/collectionimages/200020...,2017-12-19T12:57:47Z,"Struck Gold. (obverse) Bust of Faustina II, dr...",Gold Aureus ['Faustina the Younger'] Rome ['14...


In [50]:
lists = ['Year', 'Authority', 'Issuer', 'Portrait', 'Reference']
strings = ['Deity', 'Denomination', 'Mint', 'Description', 'Obverse Legend', 'Reverse Legend']
floats = ['Weight']
dates = []
redundant_notes = []
do_nothing = ['URI', 'DupCheck']
duplicate_cols = 'DupCheck'

cleaned_ans = cd.cleanDF(ans_df, lists, strings, floats, dates, redundant_notes, do_nothing, 
                              duplicate_cols, production_place='Mint')
cleaned_ans['Source'] = 'American Numismatic Society'
cleaned_ans.drop(['DupCheck'], axis=1, inplace=True)
cleaned_ans.tail()
cleaned_ans.columns

Index([u'Authority', u'Deity', u'Denomination', u'Description', u'Issuer',
       u'Mint', u'Obverse Legend', u'Portrait', u'Reference',
       u'Reverse Legend', u'URI', u'Weight', u'Year', u'Source'],
      dtype='object')

In [51]:
# Convert everything to strings and integers to put into SQL db
lists = ['Year', 'Authority', 'Issuer', 'Portrait', 'Reference']
for col in lists:
    cleaned_ans[col] = cleaned_ans[col].apply(cd.listToString())
    
cleaned_ans['StartDate'] = cleaned_ans.apply(lambda row: row['Year'].split(",")[0], axis=1)
cleaned_ans['EndDate'] = cleaned_ans.apply(lambda row: row['Year'].split(",")[1] 
                                                 if len(row['Year'].split(",")) > 1 
                                                 else row['Year'].split(",")[0], 
                                         axis=1)

columns = [u'Authority', u'Deity', u'Denomination', u'Description', u'Issuer',
       u'Mint', u'Obverse Legend', u'Portrait', u'Reference',
       u'Reverse Legend', u'URI', u'Weight', u'Year', u'Source', 'StartDate', 'EndDate']

cleaned_ans.columns = columns

for col in columns:
    cleaned_ans[col] = cleaned_ans[col].apply(lambda x: x.decode('utf-8') if type(x) is not float else x)

In [52]:
cleaned_ans.tail()

Unnamed: 0,Authority,Deity,Denomination,Description,Issuer,Mint,Obverse Legend,Portrait,Reference,Reverse Legend,URI,Weight,Year,Source,StartDate,EndDate
7826,Saloninus as Caesar,,tetradrachm,"Billon. (obverse) Bare-headed, draped, cuirass...",,Alexandreia,ΠΟΛΙΚΚΟΡCA OVAΛΕΡΙΑΝΟC K CEB,,"BMC 2296, Cologne 3002, Dattari 5376",L ϛ,http://numismatics.org/collection/2017.11.52,9.77,"258, 259",American Numismatic Society,258,259
7827,Claudius II,Tyche,tetradrachm,"Billon. (obverse) Bust laureate, draped, cuira...",,Alexandreia,ΑVTK KΛΑVΔΙΟC CEB,,"Cologne 3048 (same rev. die, Dattari-Savio pl....",L Γ,http://numismatics.org/collection/2017.11.53,9.69,"269, 270",American Numismatic Society,269,270
7828,Hadrian,Zeus,?,Billon. (obverse) Laureate bust r. fold of clo...,,,AYT KAIC TPAINOC (sic) ΑΔΡΙΑΝΟC,,RPC 5015 (2 spec.),L B,http://numismatics.org/collection/2017.11.6,13.74,"117, 118",American Numismatic Society,117,118
7829,Hadrian,Athena|Nike,Tetradrachm,"Billon. (obverse) Laureate bust r., fold of cl...",,,AYT KAI TRAI - AΔΡΙΑ CεΒ,,"Dattari-Savio, pl. 64, 7382, RPC.- cf. RPC 531...",L ϛ,http://numismatics.org/collection/2017.11.8,12.71,"122, 123",American Numismatic Society,122,123
7830,"Faustina II under Marcus Aurelius,",,?,Bronze. (obverse) Bust draped r.. (reverse) Ea...,,Alexandreia,ΦΑΥCTINA CεΒΑCΤΗ,,"Dattari-Savio pl. 195, 3639 (this coin)., Emme...",L E,http://numismatics.org/collection/2017.11.9,7.8,"164, 165",American Numismatic Society,164,165


# Store Data

In [19]:
cnx = sqlite3.connect('../Data/'+title+'.sqlite')
cur = cnx.cursor()

In [20]:
cur.execute('''CREATE TABLE britishMuseum(associatedNames TEXT, authority TEXT, bibliography TEXT, culture TEXT,
                    curatorComment TEXT, dates TEXT, denomination TEXT, 
                    description TEXT, materials TEXT, museumNumber TEXT, objectType TEXT, obverseLegend TEXT,
                    mint TEXT, reverseLegend TEXT, state TEXT, subjects TEXT, weight INT,
                    url TEXT, source TEXT, startDate INT, endDate INT)''')

<sqlite3.Cursor at 0x11bec0ce0>

In [21]:
cur.executemany("""INSERT INTO britishMuseum (associatedNames, authority, bibliography, culture,
                        curatorComment, dates, denomination, description,
                        materials, museumNumber, objectType, obverseLegend,
                        mint, reverseLegend, state, subjects, weight,
                        url, source, startDate, endDate) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", 
                list(cleaned_bm.to_records(index=False)))

<sqlite3.Cursor at 0x11bec0ce0>

In [16]:
cur.execute('''CREATE TABLE americanNumismaticSociety(authority TEXT, deity TEXT, denomination TEXT, description TEXT, 
                       issuer TEXT, mint TEXT, obverseLegend TEXT, portrait TEXT, reference TEXT, reverseLegend TEXT, 
                       url TEXT, weight INT, dates TEXT, source TEXT, startDate INT, endDate INT)''')

<sqlite3.Cursor at 0x117645dc0>

In [53]:
cur.executemany("""INSERT INTO americanNumismaticSociety (authority, deity, denomination, description, 
                       issuer, mint, obverseLegend, portrait, reference, reverseLegend, url, weight, dates, 
                       source, startDate, endDate) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", 
                list(cleaned_ans.to_records(index=False)))

<sqlite3.Cursor at 0x117645dc0>

In [22]:
cnx.commit()