In [3]:
from sqlalchemy import types
from StringIO import StringIO
import sqlite3
import CleanData as cd
import numpy as np
import pandas as pd

# Clean Data

### British Museum

In [4]:
title = 'Roman_Imperial_Coinage'

In [5]:
# Read in data and convert to appropriate types
bm_df = pd.read_csv('../Data/BM_'+title+'.csv',
                 converters={"Authority": cd.stringToList(), 'Associated names': cd.stringToList(), 
                             'Subjects': cd.stringToList(), 'Inscriptions': cd.stringToListofDicts('|', ';', ':')})
bm_df = pd.concat([bm_df, cd.cleanInscriptions(bm_df)], axis=1)
bm_df = bm_df.drop('Unnamed: 0', 1)
bm_df = bm_df.replace(np.nan, '', regex=True)
bm_df = bm_df.rename(columns = {'Culture/period':'Culture'})

In [6]:
# Clean data to only retain relevant information
mask = ((bm_df['Object type'] == 'coin ') & (bm_df['Date'].str.find('stC') == -1))
filtered = bm_df[mask]

lists = ['Authority', 'Subjects', 'Associated names', 'Obverse legend', 'Reverse legend']
strings = ['Museum number', 'Denomination', 'Description', 'State', 'Culture', 'Materials', 
            'Curator\'s comments', 'Bibliography', 'Object type', "Production place"]
floats = ['Weight (g)']
dates = ['Date']
redundant_notes = ['Production place', 'Denomination']
do_nothing = ['url', 'Inscriptions']
duplicate_cols = ['Authority', 'Date', 'Production place', 'Description', 'Subjects', "Curator's comments",
                  'Obverse legend', 'Reverse legend']

cleaned_bm = cd.cleanDF(filtered, lists, strings, floats, dates, redundant_notes, do_nothing, duplicate_cols)
cleaned_bm = cleaned_bm.drop('Inscriptions', 1)
cleaned_bm['Source'] = 'British Museum'
#cleaned_bm.head()

In [7]:
# Convert everything to strings and integers to put into SQL db
lists = ['Authority', 'Subjects', 'Associated names', 'Obverse legend', 'Reverse legend', 'Date']
for col in lists:
    cleaned_bm[col] = cleaned_bm[col].apply(cd.listToString())

columns = [u'Associated names', u'Authority', u'Bibliography', u'Culture',
       u'Curator\'s comments', u'Date', u'Denomination', u'Description',
       u'Materials', u'Museum number', u'Object type', u'Obverse legend',
       u'Production place', u'Reverse legend', u'State', u'Subjects',
       u'url', u'Source']
for col in columns:
    cleaned_bm[col] = cleaned_bm[col].apply(lambda x: x.decode('utf-8'))
    
cleaned_bm.columns = ['associatedNames', 'authority', 'bibliography', 'culture',
                       'curatorComment', 'date', 'denomination', 'description',
                       'materials', 'museumNumber', 'objectType', 'obverseLegend',
                       'mint', 'reverseLegend', 'state', 'subjects',
                       'weight', 'url', 'source']

In [8]:
cleaned_bm.tail()

Unnamed: 0,associatedNames,authority,bibliography,culture,curatorComment,date,denomination,description,materials,museumNumber,objectType,obverseLegend,mint,reverseLegend,state,subjects,weight,url,source
22033,"Antinous, Demeter/Ceres",Hadrian,"BMC Greek (Mysia) 13, p.4 PCG 86.3, p. (pl. 47...",Roman Provincial,The young courtier Antinous died suddenly in A...,"130, 138",?,"Alloy coin.(obverse) Bare head of Antinous, ri...",alloy,18440425.1609,coin,AHTINOOC IAKXOC,"Adramyteum,",ECIOC AN HKE A PAMVTHNOIC,Roman Empire,classical deity,26.69,http://www.britishmuseum.org/research/collecti...,British Museum
22034,Constantine the Great,Constantine the Great,"RIC6 65, p.407 (type)",Roman Imperial,,"312, 313",solidus,"Gold coin.(obverse) Bust of Constantine I, dra...",gold,18960608.98,coin,IMP CONSTANTINVS P F AVG,"Ostia,",PRINCIPI IVVENTVTIS,Roman Empire,emperor/empress,8.49,http://www.britishmuseum.org/research/collecti...,British Museum
22035,Hadrian,Hadrian,"RE3 1209, p. 417 Strack (Hadrian) 557 (var = f...",Roman Imperial,,"117, 138",sestertius,"on the left, three citizens in front and one b...",copper alloy,18690507.7,coin,IMP CAESAR TRAIANVS HADRIANVS AVG P M TR P COS...,"Rome,",RELIQVA VETERA HS N[OVIES] MILL ABOLITA,Roman Empire,"politics, emperor/empress",24.07,http://www.britishmuseum.org/research/collecti...,British Museum
22036,Hadrian,Hadrian,"RE3 1207, p. 417 Strack (Hadrian) 557 RIC2 592...",Roman Imperial,,"117, 138",sestertius,"on the left, three citizens (two in front, one...",copper alloy,18720709.561,coin,IMP CAESAR TRAIANVS HADRIANVS AVG P M TR P COS...,"Rome,",RELIQVA VETERA HS NOVIES MILL ABOLITA,Roman Empire,"politics, emperor/empress",20.64,http://www.britishmuseum.org/research/collecti...,British Museum
22037,Constantine the Great,Constantine the Great,RIC7 p275.381,Roman Imperial,maiorina (big [coins]) and centenionalis (a ‘h...,334,nummus,"Copper alloy coin.(obverse) Diademed, draped a...",copper alloy,B.1746,coin,CONSTANTINVS MAX AVG,"Arles,",GLORIA EXERCITVS,Roman Empire,"soldier, emperor/empress",2.28,http://www.britishmuseum.org/research/collecti...,British Museum


### American Numismatic Society

In [7]:
ans_df = pd.read_csv('../Data/ANS_'+title+'.csv',
                    converters={"Authority": cd.stringToList('|'), 'Year': cd.stringToList('|'),
                               'Issuer': cd.stringToList('|'), 'Portrait': cd.stringToList('|'),
                               'Reference': cd.stringToList('|')})
ans_df = ans_df.replace(np.nan, '', regex=True)
ans_df['Description'] = ans_df.apply(lambda row: cd.makeDescription(row['Material'], row['Manufacture'], row['Obverse Type'], row['Reverse Type']), axis=1)
#ans_df.tail()

In [8]:
lists = ['Year', 'Authority', 'Issuer', 'Portrait', 'Reference']
strings = ['Deity', 'Denomination', 'Mint', 'Description', 'Obverse Legend', 'Reverse Legend']
floats = ['Weight']
dates = []
redundant_notes = []
do_nothing = ['URI']
duplicate_cols = ['Issuer', 'Year', 'Mint', 'Deity', 'Description', 'Obverse Legend', 'Reverse Legend']

cleaned_ans = cd.cleanDF(ans_df, lists, strings, floats, dates, redundant_notes, do_nothing, 
                              duplicate_cols, production_place='Mint')
cleaned_ans['Source'] = 'British Museum'
cleaned_ans.tail()
cleaned_ans.columns

Index([u'Authority', u'Deity', u'Denomination', u'Description', u'Issuer',
       u'Mint', u'Obverse Legend', u'Portrait', u'Reference',
       u'Reverse Legend', u'URI', u'Weight', u'Year', u'Source'],
      dtype='object')

In [9]:
# Convert everything to strings and integers to put into SQL db
lists = ['Year', 'Authority', 'Issuer', 'Portrait', 'Reference']
for col in lists:
    cleaned_ans[col] = cleaned_ans[col].apply(cd.listToString())

columns = [u'Authority', u'Deity', u'Denomination', u'Description', u'Issuer',
       u'Mint', u'Obverse Legend', u'Portrait', u'Reference',
       u'Reverse Legend', u'URI', u'Year', u'Source']
for col in columns:
    cleaned_ans[col] = cleaned_ans[col].apply(lambda x: x.decode('utf-8'))

cleaned_ans.head()

Unnamed: 0,Authority,Deity,Denomination,Description,Issuer,Mint,Obverse Legend,Portrait,Reference,Reverse Legend,URI,Weight,Year,Source
0,Augustus,,As,Struck Bronze. (obverse) Augustus head right. ...,,Ephesus,CAESAR,Augustus,RIC I (second edition) Augustus 486,AVGVSTVS,http://numismatics.org/search/id/1944.100.39200,11.1,-25,British Museum
1,Augustus,,As,Struck Bronze.,,Ephesus,CAESAR,Augustus,"BMC.731, RIC I (second edition) Augustus 486",AVGVSTVS,http://numismatics.org/search/id/1952.142.117,10.92,-25,British Museum
2,Augustus,,Aureus,Struck Gold.,,Colonia Patricia,,Augustus,"BMC.345v, RIC I (second edition) Augustus 50A",,http://numismatics.org/search/id/1957.172.1489,3.66,"-20, -19",British Museum
3,Augustus,,Aureus,"Struck Gold. (obverse) Head of Augustus, laure...",,Lugdunum,AVGVSTVS DIVI F,Augustus,"BMC.492, RIC I (second edition) Augustus 200",IMP XIIII,http://numismatics.org/search/id/0000.999.16779,7.88,-9,British Museum
4,Augustus,,Denarius,Bronze. (obverse) Bare head r.. (reverse) Fema...,Augustus,,AVGVSTVS DIVI F,Augustus,,IMP X ACT,http://numismatics.org/search/id/1944.27.9,0.0,"-26, 14",British Museum


# Store Data

In [9]:
cnx = sqlite3.connect('../Data/'+title+'.sqlite')
cur = cnx.cursor()

In [10]:
cur.execute('''CREATE TABLE britishMuseum(associatedNames TEXT, authority TEXT, bibliography TEXT, culture TEXT,
                    curatorComment TEXT, dates TEXT, denomination TEXT, description TEXT,
                    materials TEXT, museumNumber TEXT, objectType TEXT, obverseLegend TEXT,
                    mint TEXT, reverseLegend TEXT, state TEXT, subjects TEXT, weight INT,
                    url TEXT, source TEXT)''')

<sqlite3.Cursor at 0x123894b20>

In [11]:
cur.executemany("""INSERT INTO britishMuseum (associatedNames, authority, bibliography, culture,
                        curatorComment, dates, denomination, description,
                        materials, museumNumber, objectType, obverseLegend,
                        mint, reverseLegend, state, subjects, weight,
                        url, source) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", 
                list(cleaned_bm.to_records(index=False)))

<sqlite3.Cursor at 0x123894b20>

In [12]:
cur.execute('''CREATE TABLE americanNumismaticSociety(authority TEXT, deity TEXT, denomination TEXT, description TEXT, 
                       issuer TEXT, mint TEXT, obverseLegend TEXT, portrait TEXT, reference TEXT,
                       reverseLegend TEXT, url TEXT, weight INT, dates TEXT, source TEXT)''')

<sqlite3.Cursor at 0x123894b20>

In [13]:
cur.executemany("""INSERT INTO americanNumismaticSociety (authority, deity, denomination, description, 
                       issuer, mint, obverseLegend, portrait, reference,
                       reverseLegend, url, weight, dates, source) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", 
                list(cleaned_ans.to_records(index=False)))

NameError: name 'cleaned_ans' is not defined

In [None]:
cnx.commit()