In [1]:
# from sqlalchemy import types
import numpy as np
import pandas as pd
# import sqlite3

import re, string

import CleanData as cd
from emperors import emperors

In [2]:
emperors = [emperor for emperors_list in emperors for emperor in emperors_list]
'L Valerius Catullus' in "".join(emperors)

False

# Clean Data

In [3]:
title = 'Roman_Imperial_Coinage'

In [10]:
def cleanMaterials(materialCol):
    '''
    Takes in a string definiing the material column then cleans the material descriptions. 
    
    Currently, it cleans by:
        * Making all the letters to lowercase
        * Simplifies the material types
            - bronze are either copper, bronze, billion, or orichalum
            - silvers is silver
            - golds is gold
            - alloys are lead and alloy
            - replicas are zinc or white metal
            - other is anything else
    '''
    
    def cleanMat(row):
        # Get material and convert to lowercase
        material = row[materialCol]
        material = material.lower()
        
        bronzes = ['copper', 'bronze', 'billion', 'orichalum']
        silvers = ['silver']
        golds = ['gold']
        alloys = ['lead', 'alloy']
        replicas = ['zinc', 'white metal']
        
        if any(mat in material for mat in replicas):
            material = 'replicas'
        elif any(mat in material for mat in bronzes):
            material = 'bronze'
        elif any(mat in material for mat in silvers):
            material = 'silver'
        elif any(mat in material for mat in golds):
            material = 'gold'
        elif any(mat in material for mat in alloys):
            material = 'lead/alloy'
        else:
            material = 'other'
        
        return material
    
    return cleanMat


def splitDescription(descriptionCol, side):
    '''
    Takes in a string defining the description column then splits the description 
    to obverse and reverse specified by SIDE
    '''
    
    def splitDesc(row):
        desc = row[descriptionCol]
        result = re.split(r'(\(obverse\)|\(reverse\))', desc)
        if side == 'obverse' and len(result) >= 2:
            return result[2].strip()
        elif side == 'reverse' and len(result) >= 4:
            return result[4].strip()
        else:
            return ''
    
    return splitDesc


def cleanAuthority(authority, issuer):
    '''
    Function to be applied to a row of a dataframe. Cleans the authority column after merging with issuer
    
    Parameters
    ----------
    authority : str
        Name of the authority column
    issuer : str
        Name of the issuer column. If blank, then no issuer.
    
    Returns
    -------
    String containing issuer and authority. Lists the names in alphabetical order.
    
    DocTest
    -------
    cleanAuthority('authority', 'issuer')({'authority': 'augustus, caesar (civil war)', 'issuer': "roma"})
    >>> 'augustus, caesar '
    '''
    
    def cleanAuth(row):
        names = list()
        if len(row[authority]) == 0:
            return ""
        for name in row[authority].split(","):
            name = re.sub(r'\([^)]*\)', '', name)
            name = name.strip()
            if name:
                if name == 'Constantine the Great':
                    names.append('Constantine I')
                else:
                    names.append(name)
            
        if issuer:
            for name in row[issuer].split(","):
                name = name.strip()
                #if name not in names:
                    # names.append(name)

        if 'Divus Augustus' in names:
            names.append("Tiberius")
        if any([unknown_name in names for unknown_name in ['uncertain', 'Anonymous', '']]):
            return ''
        
        names.sort()
        
        return ", ".join(names)
    
    return cleanAuth

#cleanAuthority('authority', 'issuer')({'authority': 'augustus, caesar (civil war)', 'issuer': "roma"})

emperors = [emperor for emperors_list in emperors for emperor in emperors_list]
def getEmperor(authority, delimiter=',', emperors=emperors):
    '''
    Function to get the emperor name out of the authority.
    
    Parameter
    ---------
    authority : string
        Column name containing the authority
    emperors : list
        List of strings containing the emperors
        
    Returns
    -------
    String of the emperor name under which the coin was minted
    '''
    emperors_str = "".join(emperors)
    
    def getEmp(row):
        emperors = list()
        if len(row[authority]) == 0:
            return ""
        authorities = row[authority].split(",")
        for person in authorities:
            person = person.strip()
            if person in emperors_str:
                emperors.append(person)
            elif person == 'Constantine the Great':
                emperors.append('Constantine I')
        emperors.sort()
        return ", ".join(emperors)
        
    return getEmp

getEmperor('authority')({'authority': "Constantine the Great, Maximian"})

'Constantine I, Maximian'

### British Museum

In [5]:
# Read in data and convert to appropriate types
bm_df = pd.read_csv('../Data/BM_'+title+'.csv',
                 converters={"Authority": cd.stringToList(), 'Associated names': cd.stringToList(), 
                             'Subjects': cd.stringToList(), 'Inscriptions': cd.stringToListofDicts('|', ';', ':')})
bm_df = pd.concat([bm_df, cd.cleanInscriptions(bm_df)], axis=1)
bm_df = bm_df.drop('Unnamed: 0', 1)
bm_df = bm_df.replace(np.nan, '', regex=True)
bm_df['Denomination'] = bm_df.apply(lambda row: cd.cleanDenomination(row['Denomination']), axis=1)
bm_df = bm_df.rename(columns = {'Culture/period':'Culture'})
bm_df = bm_df[~bm_df["Curator's comments"].str.contains('forgery')]
bm_df['Materials'] = bm_df.apply(cleanMaterials("Materials"), axis=1)
bm_df['Obverse type'] = bm_df.apply(splitDescription('Description', 'obverse'), axis=1)
bm_df['Reverse type'] = bm_df.apply(splitDescription('Description', 'reverse'), axis=1)

In [6]:
bm_df.tail()

Unnamed: 0,Associated names,Authority,Bibliography,Culture,Curator's comments,Date,Denomination,Description,Inscriptions,Materials,...,Object type,Production place,State,Subjects,Weight (g),url,Obverse legend,Reverse legend,Obverse type,Reverse type
84837,"[Representation of: Meleager, Representation o...",[],Jones 1990a 141a Dalton 1915 189,,"Text from Dalton 1915, Catalogue of Engraved G...",16thC,,Cameo; onyx; Meleager and Atalanta; Meleager s...,"[{'Inscription Type': 'signature', 'Inscriptio...",gold,...,cameo,,,"[mythical figure/creature, myth/legend]",,http://www.britishmuseum.org/research/collecti...,[S TRATOU],[],,
84838,"[Representation of: Zeus/Jupiter, Named in ins...",[],Jones 1990a 151c Dalton 1915 54,,"Text from Dalton 1915, Catalogue of Engraved G...",18thC,,"Cameo; onyx; head of Jupiter to right, with wr...","[{'Inscription Type': 'inscription', 'Inscript...",other,...,cameo,,,"[symbol, myth/legend, leaf, classical deity]",,http://www.britishmuseum.org/research/collecti...,[DIOSKOURIDOU],[],,
84839,"[Associated with: Alexander I, Tsar of Russia,...",[],BM Satires 10451 De Vinck 1909-1967 8068,,(Description and comment from M.Dorothy George...,1805,,"A female monster, 'l'Angleterre', rides astrid...",[{'Inscription Content': 'Lettered with title ...,other,...,satirical print print,"[Published in: Paris, (Europe,France,Ile-de-Fr...",,[satire],,http://www.britishmuseum.org/research/collecti...,[],[],,
84840,"[Emblem of: Order of the Teutonic Knights, Nam...",[],Read 1902 180 Tait 1986 36,,Text from Tait 1986:- Origin: German; probably...,1612 (circa),,"Pendant oval medallion of Maximilian, Archduke...","[{'Inscription Type': 'inscription', 'Inscript...",gold,...,pendant medallion,"[Made in: Vienna (city), (Europe,Austria,Vienn...",,"[heraldry, barrack/camp scene]",33.48,http://www.britishmuseum.org/research/collecti...,[MAXIMIL . D . G . ARC . AVS ],[MILITEMVS],,
84841,"[Associated with: Dennis Collins, Associated w...",[],BM Satires 17238 BM Satires 17239 BM Satires 1...,,"Notes to No. 17238: Phillpotts (see No. 17005,...",1832,,Lithographic caricature magazine of four pages...,[{'Inscription Content': 'Lettered on first pa...,other,...,satirical print print newspaper/periodical,"[Published in: London, (Europe,British Isles,E...",,[satire],,http://www.britishmuseum.org/research/collecti...,[],[],,


In [7]:
# Clean data to only retain relevant information
mask = ((bm_df['Object type'] == 'coin ') & (bm_df['Date'].str.find('stC') == -1))
filtered = bm_df[mask]

lists = ['Authority', 'Subjects', 'Associated names', 'Obverse legend', 'Reverse legend']
strings = ['Museum number', 'Denomination', 'Description', 'State', 'Culture', 'Materials', 
            'Curator\'s comments', 'Bibliography', 'Object type', "Production place", 'Obverse type', 'Reverse type']
floats = ['Weight (g)']
dates = ['Date']
redundant_notes = ['Production place', 'Denomination']
do_nothing = ['url', 'Inscriptions']
duplicate_cols = ['url']

cleaned_bm = cd.cleanDF(filtered, lists, strings, floats, dates, redundant_notes, do_nothing, duplicate_cols)
cleaned_bm = cleaned_bm.drop('Inscriptions', 1)
cleaned_bm['Source'] = 'British Museum'
#cleaned_bm.head()

  result = result.reindex_axis(sorted(result.columns), axis=1)


In [8]:
cleaned_bm.tail()

Unnamed: 0,Associated names,Authority,Bibliography,Culture,Curator's comments,Date,Denomination,Description,Materials,Museum number,...,Obverse legend,Obverse type,Production place,Reverse legend,Reverse type,State,Subjects,Weight (g),url,Source
82887,"(Constantine the Great,)","(Constantine the Great,)","RIC6 65, p.407 (type)",Roman Imperial,,"(312, 313)",solidus,"Gold coin.(obverse) Bust of Constantine I, dra...",gold,18960608.98,...,"(IMP CONSTANTINVS P F AVG,)","Bust of Constantine I, draped, cuirassed, head...",Ostia,"(PRINCIPI IVVENTVTIS,)","Constantine I, in military dress with cloak ov...",Roman Empire,"(emperor/empress,)",8.49,http://www.britishmuseum.org/research/collecti...,British Museum
82888,"(Hadrian,)","(Hadrian,)","RE3 1209, p. 417 Strack (Hadrian) 557 (var = f...",Roman Imperial,,"(117, 138)",sestertius,"on the left, three citizens in front and one b...",bronze,18690507.7,...,(IMP CAESAR TRAIANVS HADRIANVS AVG P M TR P CO...,Laureate bust of Hadrian with drapery on his l...,Rome,"(RELIQVA VETERA HS N[OVIES] MILL ABOLITA,)","on the left, three citizens in front and one b...",Roman Empire,"(politics, emperor/empress)",24.07,http://www.britishmuseum.org/research/collecti...,British Museum
82889,"(Hadrian,)","(Hadrian,)","RE3 1207, p. 417 Strack (Hadrian) 557 RIC2 592...",Roman Imperial,,"(117, 138)",sestertius,"on the left, three citizens (two in front, one...",bronze,18720709.561,...,(IMP CAESAR TRAIANVS HADRIANVS AVG P M TR P CO...,Laureate bust of Hadrian with drapery on his l...,Rome,"(RELIQVA VETERA HS NOVIES MILL ABOLITA,)","on the left, three citizens (two in front, one...",Roman Empire,"(politics, emperor/empress)",20.64,http://www.britishmuseum.org/research/collecti...,British Museum
82890,"(Constantine the Great,)","(Constantine the Great,)",RIC7 p275.381,Roman Imperial,maiorina (big [coins]) and centenionalis (a ‘h...,"(334,)",nummus,"Copper alloy coin.(obverse) Diademed, draped a...",bronze,B.1746,...,"(CONSTANTINVS MAX AVG,)","Diademed, draped and cuirassed bust, right.",Arles,"(GLORIA EXERCITVS,)",Two soldiers with two standards between them.,Roman Empire,"(soldier, emperor/empress)",2.28,http://www.britishmuseum.org/research/collecti...,British Museum
82891,"(Nero, Zeus/Jupiter)","(Nero,)","RIC1 52, p.153 RE1 67, p.209",Roman Imperial,The reverse celebrates Nero's deliverance from...,"(64, 65)",aureus,"Gold coin.(obverse) Head of Nero, bearded and ...",gold,"BNK,R.13",...,"(NERO CAESAR AVGVSTVS,)","Head of Nero, bearded and laureate, right.",Rome,"(IVPPITER CVSTOS,)","Jupiter, bare to waist, seated left on throne,...",Roman Empire,"(emperor/empress, classical deity)",7.39,http://www.britishmuseum.org/research/collecti...,British Museum


In [9]:
# Convert everything to strings and integers to put into SQL db
lists = ['Authority', 'Subjects', 'Associated names', 'Obverse legend', 'Reverse legend', 'Date']
for col in lists:
    cleaned_bm[col] = cleaned_bm[col].apply(cd.listToString())

columns = [u'Associated names', u'Authority', u'Bibliography', u'Culture',
       u'Curator\'s comments', u'Date', u'Denomination', u'Description',
       u'Materials', u'Museum number', u'Object type', u'Obverse legend', 'Obverse type',
       u'Production place', u'Reverse legend', 'Reverse type', u'State', u'Subjects',
       u'url', u'Source']

#for col in columns:
#    cleaned_bm[col] = cleaned_bm[col].apply(lambda x: x.decode('utf-8'))

cleaned_bm.columns = ['associatedNames', 'authority', 'bibliography', 'culture',
                       'curatorComment', 'date', 'denomination', 'description',
                       'materials', 'museumNumber', 'objectType', 'obverseLegend', 'obverseType',
                       'mint', 'reverseLegend', 'reverseType', 'state', 'subjects',
                       'weight', 'url', 'source']

cleaned_bm['authority'] = cleaned_bm.apply(cleanAuthority("authority", ''), axis=1)
cleaned_bm = cleaned_bm[cleaned_bm['authority'] != '']
cleaned_bm['emperor'] = cleaned_bm.apply(getEmperor("authority"), axis=1)
cleaned_bm = cleaned_bm[cleaned_bm['emperor'] != '']
cleaned_bm['subjects'] = cleaned_bm['associatedNames']
cleaned_bm['emperorDisplayed'] = cleaned_bm.apply(getEmperor("subjects"), axis=1)
cleaned_bm.loc[cleaned_bm['date'] == '220222', 'date'] = '220, 222'
cleaned_bm.loc[cleaned_bm['date'] == '412541, 341', 'date'] = '41'
cleaned_bm.loc[cleaned_bm['date'] == '26810', 'date'] = '268'
regex = re.compile('[%s]' % re.escape(string.punctuation))
cleaned_bm['cleanDesc'] = cleaned_bm.apply(lambda row: regex.sub(' ', row['description']), axis=1)
cleaned_bm['startDate'] = cleaned_bm.apply(lambda row: row['date'].split(",")[0], axis=1)
cleaned_bm['endDate'] = cleaned_bm.apply(lambda row: row['date'].split(",")[1] 
                                                 if len(row['date'].split(",")) > 1 
                                                 else row['date'].split(",")[0], 
                                         axis=1)

In [10]:
cleaned_bm.tail()

Unnamed: 0,associatedNames,authority,bibliography,culture,curatorComment,date,denomination,description,materials,museumNumber,...,state,subjects,weight,url,source,emperor,emperorDisplayed,cleanDesc,startDate,endDate
82887,Constantine the Great,Constantine I,"RIC6 65, p.407 (type)",Roman Imperial,,"312, 313",solidus,"Gold coin.(obverse) Bust of Constantine I, dra...",gold,18960608.98,...,Roman Empire,Constantine the Great,8.49,http://www.britishmuseum.org/research/collecti...,British Museum,Constantine I,Constantine I,Gold coin obverse Bust of Constantine I dra...,312,313
82888,Hadrian,Hadrian,"RE3 1209, p. 417 Strack (Hadrian) 557 (var = f...",Roman Imperial,,"117, 138",sestertius,"on the left, three citizens in front and one b...",bronze,18690507.7,...,Roman Empire,Hadrian,24.07,http://www.britishmuseum.org/research/collecti...,British Museum,Hadrian,Hadrian,on the left three citizens in front and one b...,117,138
82889,Hadrian,Hadrian,"RE3 1207, p. 417 Strack (Hadrian) 557 RIC2 592...",Roman Imperial,,"117, 138",sestertius,"on the left, three citizens (two in front, one...",bronze,18720709.561,...,Roman Empire,Hadrian,20.64,http://www.britishmuseum.org/research/collecti...,British Museum,Hadrian,Hadrian,on the left three citizens two in front one...,117,138
82890,Constantine the Great,Constantine I,RIC7 p275.381,Roman Imperial,maiorina (big [coins]) and centenionalis (a ‘h...,334,nummus,"Copper alloy coin.(obverse) Diademed, draped a...",bronze,B.1746,...,Roman Empire,Constantine the Great,2.28,http://www.britishmuseum.org/research/collecti...,British Museum,Constantine I,Constantine I,Copper alloy coin obverse Diademed draped a...,334,334
82891,"Nero, Zeus/Jupiter",Nero,"RIC1 52, p.153 RE1 67, p.209",Roman Imperial,The reverse celebrates Nero's deliverance from...,"64, 65",aureus,"Gold coin.(obverse) Head of Nero, bearded and ...",gold,"BNK,R.13",...,Roman Empire,"Nero, Zeus/Jupiter",7.39,http://www.britishmuseum.org/research/collecti...,British Museum,Nero,Nero,Gold coin obverse Head of Nero bearded and ...,64,65


### American Numismatic Society

In [11]:
ans_df = pd.read_csv('../Data/ANS_'+title+'.csv',
                    converters={"Authority": cd.stringToList('|'), 'Year': cd.stringToList('|'),
                               'Issuer': cd.stringToList('|'), 'Portrait': cd.stringToList('|'),
                               'Reference': cd.stringToList('|')})
ans_df = ans_df.replace(np.nan, '', regex=True)
ans_df['Description'] = ans_df.apply(lambda row: cd.makeDescription(
                                                                    row['Material'], 
                                                                    row['Manufacture'], 
                                                                    row['Obverse Type'], 
                                                                    row['Reverse Type']
                                                                   ), axis=1)
ans_df['DupCheck'] = ans_df.apply(lambda row: cd.makeDupCheckCol(
                                                                 row['Material'], 
                                                                 row['Denomination'], 
                                                                 row['Portrait'], 
                                                                 row['Mint'],
                                                                 row['Year']
                                                                ), axis=1)
# Clean denomination
ans_df['Denomination'] = ans_df.apply(lambda row: cd.cleanDenomination(row['Denomination']), axis=1)
ans_df = ans_df[~ans_df["Material"].str.contains("Steel")]
ans_df['Material'] = ans_df.apply(cleanMaterials("Material"), axis=1)
ans_df.tail()

Unnamed: 0.1,Unnamed: 0,URI,Title,RecordId,Authority,Coin Type URI,Date on Object,Degree,Deity,Denomination,...,Region,Reverse Legend,Reverse Type,Weight,Year,Thumbnail_obv,Thumbnail_rev,Date Record Modified,Description,DupCheck
62310,62310,http://numismatics.org/collection/2017.11.7,"Billon tetradrachm, Alexandreia, AD 118 - AD 1...",2017.11.7,[Hadrian],,,,Tyche,tetradrachm,...,Egypt,L Γ,Tyche standing l. holding rudder and cornucopia.,12.73,"[118, 119]",http://numismatics.org/collectionimages/200020...,http://numismatics.org/collectionimages/200020...,2017-12-05T17:40:30Z,Billon. (obverse) Laureate bust r. fold of cl...,"Billon tetradrachm [''] Alexandreia ['118', '1..."
62311,62311,http://numismatics.org/collection/2017.11.8,"Billon Tetradrachm, AD 122 - AD 123. 2017.11.8",2017.11.8,[Hadrian],,,,Athena|Nike,tetradrachm,...,,L ϛ,Athena standing l. holding Nikeand shield,12.71,"[122, 123]",http://numismatics.org/collectionimages/200020...,http://numismatics.org/collectionimages/200020...,2017-12-05T17:40:30Z,"Billon. (obverse) Laureate bust r., fold of c...","Billon Tetradrachm [''] ['122', '123']"
62312,62312,http://numismatics.org/collection/2017.11.9,"Bronze Coin, Alexandreia, AD 164 - AD 165. 201...",2017.11.9,"[Faustina II under Marcus Aurelius,]",,,,,,...,Egypt,L E,"Eagle with wings folded standing left, head r.",7.8,"[164, 165]",,,2017-12-05T17:40:30Z,Bronze. (obverse) Bust draped r.. (reverse) E...,"Bronze [''] Alexandreia ['164', '165']"
62313,62313,http://numismatics.org/collection/2015.8.48,"Bronze AE3, Constantinople, AD 378 - AD 383. 2...",2015.8.48,[Theodosius I],,,,,ae,...,,,,2.502,"[378, 383]",,,2017-12-19T12:57:47Z,Bronze.,"Bronze AE3 [''] Constantinople ['378', '383']"
62314,62314,http://numismatics.org/collection/2017.34.1,"Gold Aureus of Antoninus Pius, Rome, AD 145 - ...",2017.34.1,[Antoninus Pius],http://numismatics.org/ocre/id/ric.3.ant.503Aa,,,,aureus,...,Italy,CONCORDIA,Dove standing right,7.11,"[145, 161]",http://numismatics.org/collectionimages/200020...,http://numismatics.org/collectionimages/200020...,2017-12-19T12:57:47Z,"Struck Gold. (obverse) Bust of Faustina II, dr...",Gold Aureus ['Faustina the Younger'] Rome ['14...


In [12]:
ans_df['Reference'].values[0]

['BMC.113', 'RIC I (second edition) Nero 354', 'WCN.171']

In [13]:
lists = ['Year', 'Authority', 'Issuer', 'Portrait', 'Reference']
strings = ['Deity', 'Denomination', 'Mint', 'Description', 'Obverse Legend',
           'Reverse Legend', 'Material', 'Obverse Type', 'Reverse Type']
floats = []
dates = []
redundant_notes = []
do_nothing = ['URI', 'DupCheck']
duplicate_cols = 'URI'

cleaned_ans = cd.cleanDF(ans_df, lists, strings, floats, dates, redundant_notes, do_nothing, 
                              duplicate_cols, production_place='Mint')
cleaned_ans['Source'] = 'American Numismatic Society'
cleaned_ans.drop(['DupCheck'], axis=1, inplace=True)
cleaned_ans.tail()
cleaned_ans.columns

  result = result.reindex_axis(sorted(result.columns), axis=1)


Index(['Authority', 'Deity', 'Denomination', 'Description', 'Issuer',
       'Material', 'Mint', 'Obverse Legend', 'Obverse Type', 'Portrait',
       'Reference', 'Reverse Legend', 'Reverse Type', 'URI', 'Year', 'Source'],
      dtype='object')

In [14]:
# Convert everything to strings and integers to put into SQL db
lists = ['Year', 'Authority', 'Issuer', 'Portrait', 'Reference']
for col in lists:
    cleaned_ans[col] = cleaned_ans[col].apply(cd.listToString())
    
cleaned_ans['Authority'] = cleaned_ans.apply(cleanAuthority("Authority", 'Issuer'), axis=1)
cleaned_ans = cleaned_ans[cleaned_ans['Authority'] != '']
cleaned_ans['emperor'] = cleaned_ans.apply(getEmperor("Authority"), axis=1)
cleaned_ans = cleaned_ans[cleaned_ans['emperor'] != '']
cleaned_ans['emperorDisplayed'] = cleaned_ans.apply(getEmperor("Portrait"), axis=1)
regex = re.compile('[%s]' % re.escape(string.punctuation))
cleaned_ans['cleanDesc'] = cleaned_ans.apply(lambda row: regex.sub('', row['Description']), axis=1)
cleaned_ans['StartDate'] = cleaned_ans.apply(lambda row: row['Year'].split(",")[0], axis=1)
cleaned_ans['EndDate'] = cleaned_ans.apply(lambda row: row['Year'].split(",")[1] 
                                                 if len(row['Year'].split(",")) > 1 
                                                 else row['Year'].split(",")[0], 
                                         axis=1)

# Reorder columns
columns = [u'Authority', u'Deity', u'Denomination', u'Description', u'Issuer',
       'Material', u'Mint', u'Obverse Legend', 'Obverse Type', u'Portrait', u'Reference',
       u'Reverse Legend', 'Reverse Type', u'URI', u'Year', u'Source', u'emperor', 'emperorDisplayed',
           'cleanDesc', 'StartDate', 'EndDate']

cleaned_ans.columns = columns

#for col in columns:
#    cleaned_ans[col] = cleaned_ans[col].apply(lambda x: x.decode('utf-8') if type(x) is not float else x)

In [15]:
cleaned_ans.tail()

Unnamed: 0,Authority,Deity,Denomination,Description,Issuer,Material,Mint,Obverse Legend,Obverse Type,Portrait,...,Reverse Legend,Reverse Type,URI,Year,Source,emperor,emperorDisplayed,cleanDesc,StartDate,EndDate
62301,Hadrian,Zeus,?,Billon. (obverse) Laureate bust r. fold of clo...,,other,,AYT KAIC TPAINOC (sic) ΑΔΡΙΑΝΟC,Laureate bust r. fold of cloak on front shoulder,,...,L B,"Bust of Zeus right wearingtaenia, fold of drap...",http://numismatics.org/collection/2017.11.6,"117, 118",American Numismatic Society,Hadrian,,Billon obverse Laureate bust r fold of cloak o...,117,118
62302,Hadrian,Tyche,tetradrachm,Billon. (obverse) Laureate bust r. fold of clo...,,other,Alexandreia,AYT KAIC TRAIANOC ΑΔΡΙΑΝΟC CεΒ,Laureate bust r. fold of cloak on front should...,,...,L Γ,Tyche standing l. holding rudder and cornucopia.,http://numismatics.org/collection/2017.11.7,"118, 119",American Numismatic Society,Hadrian,,Billon obverse Laureate bust r fold of cloak o...,118,119
62303,Hadrian,Athena|Nike,tetradrachm,"Billon. (obverse) Laureate bust r., fold of cl...",,other,,AYT KAI TRAI - AΔΡΙΑ CεΒ,"Laureate bust r., fold of cloak on front shoul...",,...,L ϛ,Athena standing l. holding Nikeand shield,http://numismatics.org/collection/2017.11.8,"122, 123",American Numismatic Society,Hadrian,,Billon obverse Laureate bust r fold of cloak o...,122,123
62305,Theodosius I,,ae,Bronze.,,bronze,Constantinople,,,,...,,,http://numismatics.org/collection/2015.8.48,"378, 383",American Numismatic Society,Theodosius I,,Bronze,378,383
62306,Antoninus Pius,,aureus,"Struck Gold. (obverse) Bust of Faustina II, dr...",,gold,Rome,FAVSTINA – AVG PII AVG FIL,"Bust of Faustina II, draped, right, head bare,...",Faustina the Younger,...,CONCORDIA,Dove standing right,http://numismatics.org/collection/2017.34.1,"145, 161",American Numismatic Society,Antoninus Pius,,Struck Gold obverse Bust of Faustina II draped...,145,161


# OCRE

In [27]:
ocre_df = pd.read_csv('../Data/OCRE.csv',
                    converters={"Authority": cd.stringToList('|'), 'Year': cd.stringToList('|'),
                               'Issuer': cd.stringToList('|'), 'Portrait': cd.stringToList('|'),
                               'Reference': cd.stringToList('|')})
ocre_df = ocre_df.replace(np.nan, '', regex=True)
ocre_df['Description'] = ocre_df.apply(lambda row: cd.makeDescription(
                                                                    row['Material'], 
                                                                    row['Manufacture'], 
                                                                    row['Obverse Type'], 
                                                                    row['Reverse Type']
                                                                   ), axis=1)

# Clean denomination
ocre_df['Denomination'] = ocre_df.apply(lambda row: cd.cleanDenomination(row['Denomination']), axis=1)
ocre_df['Material'] = ocre_df.apply(cleanMaterials("Material"), axis=1)

# Remove confusing columns
ocre_df = ocre_df.drop(['Unnamed: 0', 'Unnamed: 0.1', 'URI'], axis=1)
ocre_df.tail()

Unnamed: 0,Title,RecordId,Authority,Degree,Deity,Denomination,Dynasty,Engraver,Era,Issuer,...,Reverse Type,Year,Date Record Modified,OCRE URL,URL,collection,collectionID,findspot,hoard,Description
112576,RIC V Gallienus (joint reign) 18,ric.5.gall(1).18,"[Valerian, Gallienus]",,,antoninianus,,,,[],...,Trophy between two captives,"[258, 259]",2018-08-08T16:20:43Z,http://numismatics.org/ocre/id/ric.5.gall(1).18,http://hdl.handle.net/428894.vzg/41cfec12-cd6f...,Münzkabinett der Universität Göttingen,158574,,,"Struck Silver. (obverse) Bust of Gallienus, ra..."
112577,RIC V Gallienus (joint reign) 18,ric.5.gall(1).18,"[Valerian, Gallienus]",,,antoninianus,,,,[],...,Trophy between two captives,"[258, 259]",2018-08-08T16:20:43Z,http://numismatics.org/ocre/id/ric.5.gall(1).18,http://hdl.handle.net/428894.vzg/a6f91f3e-bd7f...,Münzkabinett der Universität Göttingen,162043,,,"Struck Silver. (obverse) Bust of Gallienus, ra..."
112578,RIC V Gallienus (joint reign) 18,ric.5.gall(1).18,"[Valerian, Gallienus]",,,antoninianus,,,,[],...,Trophy between two captives,"[258, 259]",2018-08-08T16:20:43Z,http://numismatics.org/ocre/id/ric.5.gall(1).18,http://hdl.handle.net/428894.vzg/87ff79b8-529c...,State Coin Collection of Munich,184699,,,"Struck Silver. (obverse) Bust of Gallienus, ra..."
112579,RIC V Gallienus (joint reign) 16,ric.5.gall(1).16,"[Valerian, Gallienus]",,,antoninianus,,,,[],...,Eagle on globe between two ensigns,[259],2018-08-08T16:20:43Z,http://numismatics.org/ocre/id/ric.5.gall(1).16,http://www.ikmk.at/object.php?id=ID215526,Münzkabinett Wien,AT-KHMW-MK/IKMK-ID215526,,,"Struck Silver. (obverse) Bust of Gallienus, ra..."
112580,RIC V Gallienus (joint reign) 163,ric.5.gall(1).163,"[Valerian, Gallienus]",,Providentia,antoninianus,,,,[],...,"Providentia, draped, standing left, leaning on...","[257, 258]",2018-08-08T16:21:11Z,http://numismatics.org/ocre/id/ric.5.gall(1).163,http://numismatics.org/collection/1984.67.3553,American Numismatic Society,1984.67.3553,,,"Struck Silver. (obverse) Head of Gallienus, ra..."


In [28]:
lists = ['Year', 'Authority', 'Issuer', 'Portrait', 'Reference']
strings = ['Deity', 'Denomination', 'Mint', 'Description', 'Obverse Legend',
           'Reverse Legend', 'Material', 'Obverse Type', 'Reverse Type', 'hoard', 
           'Title', 'collectionID', 'RecordId', 'collection']
floats = []
dates = []
redundant_notes = []
do_nothing = ['OCRE URL', 'URL']
duplicate_cols = 'URL'

cleaned_ocre = cd.cleanDF(ocre_df, lists, strings, floats, dates, redundant_notes, do_nothing, 
                              duplicate_cols, production_place='Mint')
cleaned_ocre.columns

Index(['Authority', 'Deity', 'Denomination', 'Description', 'Issuer',
       'Material', 'Mint', 'OCRE URL', 'Obverse Legend', 'Obverse Type',
       'Portrait', 'RecordId', 'Reference', 'Reverse Legend', 'Reverse Type',
       'Title', 'URL', 'Year', 'collection', 'collectionID', 'hoard'],
      dtype='object')

In [29]:
# Convert everything to strings and integers to put into SQL db
lists = ['Year', 'Authority', 'Issuer', 'Portrait', 'Reference']
for col in lists:
    cleaned_ocre[col] = cleaned_ocre[col].apply(cd.listToString())
    
# Get start and end date from date/year column
cleaned_ocre['Authority'] = cleaned_ocre.apply(cleanAuthority("Authority", 'Issuer'), axis=1)
# cleaned_ocre = cleaned_ocre[cleaned_ocre['Authority'] != '']
cleaned_ocre['emperor'] = cleaned_ocre.apply(getEmperor("Authority"), axis=1)
# cleaned_ocre = cleaned_ocre[cleaned_ocre['emperor'] != '']
# Portrait is from both obverse and reverse
cleaned_ocre['emperorDisplayed'] = cleaned_ocre.apply(getEmperor("Portrait"), axis=1)
regex = re.compile('[%s]' % re.escape(string.punctuation))
cleaned_ocre['cleanDesc'] = cleaned_ocre.apply(lambda row: regex.sub('', row['Description']), axis=1)
cleaned_ocre['StartDate'] = cleaned_ocre.apply(lambda row: row['Year'].split(",")[0], axis=1)
cleaned_ocre['EndDate'] = cleaned_ocre.apply(lambda row: row['Year'].split(",")[1] 
                                                 if len(row['Year'].split(",")) > 1 
                                                 else row['Year'].split(",")[0], 
                                         axis=1)

# Reorder columns
# columns = [u'Authority', u'Deity', u'Denomination', u'Description', u'Issuer',
#        u'Material', u'Mint', u'OCRE URL', u'Obverse Legend', u'Obverse Type',
#        u'Portrait', u'RecordId', u'Reference', u'Reverse Legend',
#        u'Reverse Type', u'Title', u'URL', u'Year', 'Collection', u'collectionID', u'Hoard',
#        u'Emperor', 'emperorDisplayed', u'CleanDesc', u'StartDate', u'EndDate']

# cleaned_ocre = cleaned_ocre[columns]

cleaned_ocre['Source'] = 'OCRE'

#for col in columns:
#    cleaned_ocre[col] = cleaned_ocre[col].apply(lambda x: x.decode('utf-8') if type(x) is not float else x)

In [30]:
cleaned_ocre.tail()

Unnamed: 0,Authority,Deity,Denomination,Description,Issuer,Material,Mint,OCRE URL,Obverse Legend,Obverse Type,...,Year,collection,collectionID,hoard,emperor,emperorDisplayed,cleanDesc,StartDate,EndDate,Source
112359,"Gallienus, Valerian",,antoninianus,"Struck Silver. (obverse) Bust of Gallienus, ra...",,silver,Lugdunum,http://numismatics.org/ocre/id/ric.5.gall(1).18,GALLIENVS P F AVG,"Bust of Gallienus, radiate, cuirassed, left, h...",...,"258, 259",Münzkabinett der Universität Göttingen,158574,,"Gallienus, Valerian",Gallienus,Struck Silver obverse Bust of Gallienus radiat...,258,259,OCRE
112360,"Gallienus, Valerian",,antoninianus,"Struck Silver. (obverse) Bust of Gallienus, ra...",,silver,Lugdunum,http://numismatics.org/ocre/id/ric.5.gall(1).18,GALLIENVS P F AVG,"Bust of Gallienus, radiate, cuirassed, left, h...",...,"258, 259",Münzkabinett der Universität Göttingen,162043,,"Gallienus, Valerian",Gallienus,Struck Silver obverse Bust of Gallienus radiat...,258,259,OCRE
112361,"Gallienus, Valerian",,antoninianus,"Struck Silver. (obverse) Bust of Gallienus, ra...",,silver,Lugdunum,http://numismatics.org/ocre/id/ric.5.gall(1).18,GALLIENVS P F AVG,"Bust of Gallienus, radiate, cuirassed, left, h...",...,"258, 259",State Coin Collection of Munich,184699,,"Gallienus, Valerian",Gallienus,Struck Silver obverse Bust of Gallienus radiat...,258,259,OCRE
112362,"Gallienus, Valerian",,antoninianus,"Struck Silver. (obverse) Bust of Gallienus, ra...",,silver,Lugdunum,http://numismatics.org/ocre/id/ric.5.gall(1).16,GALLIENVS AVG GERM V,"Bust of Gallienus, radiate, cuirassed, to wais...",...,259,Münzkabinett Wien,AT-KHMW-MK/IKMK-ID215526,,"Gallienus, Valerian",Gallienus,Struck Silver obverse Bust of Gallienus radiat...,259,259,OCRE
112363,"Gallienus, Valerian",Providentia,antoninianus,"Struck Silver. (obverse) Head of Gallienus, ra...",,silver,Rome,http://numismatics.org/ocre/id/ric.5.gall(1).163,IMP GALLIENVS P F AVG GERM,"Head of Gallienus, radiate, right",...,"257, 258",American Numismatic Society,1984.67.3553,,"Gallienus, Valerian",Gallienus,Struck Silver obverse Head of Gallienus radiat...,257,258,OCRE


In [31]:
cleaned_ocre.to_csv("../Data/OCRE_clean.csv")

# Store Data

In [20]:
cnx = sqlite3.connect('../Data/'+title+'.sqlite')
cur = cnx.cursor()

In [21]:
cur.execute('DROP TABLE IF EXISTS britishMuseum')
cur.execute('''CREATE TABLE britishMuseum(associatedNames TEXT, authority TEXT, bibliography TEXT, culture TEXT, 
                    curatorComment TEXT, dates TEXT, denomination TEXT, description TEXT, material TEXT, 
                    museumNumber TEXT, objectType TEXT, obverseLegend TEXT, obverseType TEXT, mint TEXT, 
                    reverseLegend TEXT, reverseType TEXT, state TEXT, subjects TEXT, weight INT, url TEXT, 
                    source TEXT, emperor TEXT, emperorDisplayed TEXT, cleanDesc TEXT, startDate INT, endDate INT)''')

<sqlite3.Cursor at 0x140ff46c0>

In [22]:
cur.executemany("""INSERT INTO britishMuseum (associatedNames, authority, bibliography, culture,
                        curatorComment, dates, denomination, description,
                        material, museumNumber, objectType, obverseLegend, obverseType,
                        mint, reverseLegend, reverseType, state, subjects, weight, url, source, 
                        emperor, emperorDisplayed, cleanDesc, startDate, endDate) 
                        VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", 
                list(cleaned_bm.to_records(index=False)))

<sqlite3.Cursor at 0x140ff46c0>

In [23]:
cur.execute('DROP TABLE IF EXISTS americanNumismaticSociety')
cur.execute('''CREATE TABLE americanNumismaticSociety(authority TEXT, deity TEXT, denomination TEXT, 
                description TEXT, issuer TEXT, material TEXT, mint TEXT, obverseLegend TEXT, obverseType TEXT, 
                portrait TEXT, reference TEXT, reverseLegend TEXT, reverseType TEXT, url TEXT, dates TEXT, 
                source TEXT, emperor TEXT, emperorDisplayed TEXT, cleanDesc TEXT, startDate INT, endDate INT)''')

<sqlite3.Cursor at 0x140ff46c0>

In [24]:
cur.executemany("""INSERT INTO americanNumismaticSociety (authority, deity, denomination, description, 
                       issuer, material, mint, obverseLegend, obverseType, portrait, reference, reverseLegend, 
                       reverseType, url, dates, source, emperor, emperorDisplayed, cleanDesc, startDate, endDate) 
                    VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", 
                list(cleaned_ans.to_records(index=False)))

<sqlite3.Cursor at 0x140ff46c0>

In [25]:
cur.execute('DROP TABLE IF EXISTS OCRE')
cur.execute('''CREATE TABLE OCRE(recordID TEXT, title TEXT, authority TEXT, deity TEXT, denomination TEXT, 
                        description TEXT, issuer TEXT, material TEXT, mint TEXT, obverseLegend TEXT, 
                        obverseType TEXT, portrait TEXT, reference TEXT, reverseLegend TEXT, 
                        reverseType TEXT, ocreUrl TEXT, url TEXT, dates TEXT, collection TEXT, 
                        collectionID TEXT, hoard TEXT, emperor TEXT, emperorDisplayed TEXT, cleanDesc TEXT, 
                        startDate INT, endDate INT, source, TEXT)''')

<sqlite3.Cursor at 0x140ff46c0>

In [26]:
cur.executemany("""INSERT INTO OCRE (authority, deity, denomination, description, 
                       issuer, material, mint, ocreUrl, obverseLegend, obverseType, portrait, 
                       recordID, reference, reverseLegend, 
                       reverseType, title, url, dates, collection, collectionID, hoard, emperor, emperorDisplayed,
                       cleanDesc, startDate, endDate, source) 
                    VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", 
                list(cleaned_ocre.to_records(index=False)))

<sqlite3.Cursor at 0x140ff46c0>

In [27]:
cur.execute('DROP TABLE IF EXISTS allData')
cur.execute('''
CREATE TABLE allData (
  authority text,
  emperor text,
  emperorDisplayed text,
  material text,
  denomination text,
  subject text,
  mint text,
  date text,
  description text,
  cleanDesc text,
  obverseType text,
  reverseType text,
  startDate int,
  endDate int,
  source text,
  url text
);
''')

<sqlite3.Cursor at 0x140ff46c0>

In [28]:
cur.execute('''
INSERT INTO allData (authority, material, emperor, emperorDisplayed, denomination, subject, mint, date, description, cleanDesc, obverseType, reverseType, startDate, endDate, source, url)
SELECT
  authority,
  material,
  emperor,
  emperorDisplayed,
  denomination,
  subjects,
  mint,
  dates,
  description,
  cleanDesc,
  obverseType,
  reverseType,
  startDate,
  endDate,
  source,
  url
FROM britishMuseum;
''')

cur.execute('''
INSERT INTO allData (authority, material, emperor, emperorDisplayed, denomination, subject, mint, date, description, cleanDesc, obverseType, reverseType, startDate, endDate, source, url)
SELECT
  authority,
  material,
  emperor,
  emperorDisplayed,
  denomination,
  portrait,
  mint,
  dates,
  description,
  cleanDesc,
  obverseType,
  reverseType,
  startDate,
  endDate,
  source,
  url
FROM americanNumismaticSociety;''')

cur.execute('''
INSERT INTO allData (authority, material, emperor, emperorDisplayed, denomination, subject, mint, date, description, cleanDesc, obverseType, reverseType, startDate, endDate, source, url)
SELECT
  authority,
  material,
  emperor,
  emperorDisplayed,
  denomination,
  portrait,
  mint,
  dates,
  description,
  cleanDesc,
  obverseType,
  reverseType,
  startDate,
  endDate,
  source,
  ocreUrl
FROM OCRE;''')


<sqlite3.Cursor at 0x140ff46c0>

In [29]:
cnx.commit()