In [1]:
import pandas as pd
import numpy as np
import doctest
import re

In [52]:
df = pd.read_csv('AugustusCoins_44BC-14AD.csv')
df = df.drop('Unnamed: 0', 1)
df = df.replace(np.nan, '', regex=True)
df.head(5)

Unnamed: 0,Associated names,Authority,Bibliography,Culture/period,Curator's comments,Date,Denomination,Description,Inscriptions,Materials,Museum number,Object type,Production place,State,Subjects,Weight (g),url
0,Portrait of: Augustus (Octavian),Ruler: Augustus (Octavian),RPC1 1801,,,27BC-14,,Bronze coin.(obverse) Head of Augustus r.,[],bronze,19971206.1,coin,Minted in: Odessus,Roman Empire,symbol,7.45,http://www.britishmuseum.org/research/collecti...
1,Portrait of: Augustus (Octavian),Ruler: Augustus (Octavian),RPC1 5476,Roman Provincial,,27BC-14,,Bronze coin.(obverse) Head of Augustus r.,[],bronze,20010335.3,coin,,Roman Empire,symbol,2.27,http://www.britishmuseum.org/research/collecti...
2,Named in inscription & portrayed: Julius Caesar,Ruler: Augustus (Octavian),RPC1 2007,Roman Provincial,,31BC-14,,"Alloy coin.(obverse) Diademed head of Caesar, r.","[{u'Inscription Position': u'reverse', u'Inscr...",alloy,G.1200,coin,Minted in: Apamea,Roman Empire,emperor/empress,8.36,http://www.britishmuseum.org/research/collecti...
3,,Ruler: Augustus (Octavian) (?),,Greek,,27BC-14,,Alloy coin.,,alloy,19051111.1,coin,Minted in: Cremna (?),,,1.49,http://www.britishmuseum.org/research/collecti...
4,Representation of: Augustus (Octavian),,Walker & Higgs 2001 308,Roman Republican,The portrait probably dates from the later 30s...,44BC-40BC,,Seal of glass paste imitating sard: with a bus...,,glass,19230401.928,seal,,,,,http://www.britishmuseum.org/research/collecti...


## Clean Data

In [53]:
def cleanString(string):
    '''
    Parameters
    ----------
    string: str
        String that needs to be cleaned
    
    Returns
    -------
    Removes the descriptor and returns the (assumed) description
    
    Doctests
    --------
    >>> cleanString('Named in inscription & portrayed: Julius Caesar (probably)')
    'Julius Caesar'
    >>> cleanString('Ruler: Augustus (Octavian) (?)')
    'Augustus (Octavian)'
    >>> cleanString('dupondius (?)')
    'dupondius'
    '''
    result = string.replace('(?)', "").replace('(probably)', "")
    if ':' in result:
        result = [s for s in result.split(':')][1]
    return result.strip()

doctest.testmod()
#cleanString('Ruler: Augustus (Octavian) (?)')
#cleanString('dupondius (?)')

TestResults(failed=0, attempted=6)

In [54]:
def dateRange(date):
    '''
    Parameter
    ---------
    date: str
        Date range given as a string
    
    Returns
    -------
    Returns list of the date range
    
    Doctests
    --------
    >>> dateRange('27BC-14 (?)')
    [-27, 14]
    >>> dateRange('44BC (cira) -40BC')
    [-44, -40]
    >>> dateRange('4-14')
    [4, 14]
    
    TODO
    ----
    * Handle specific notes of dates
    '''
    dates = date.split('-')
    result = []
    
    for year in dates:
        certain = True
        bc = 'BC' in year
        try:
            year = int(re.sub('[^\d]', '', year))
        except:
            print(year)
            break
        if bc:
            year = 0 - year
        result.append(year)
        
    return result
        
doctest.testmod()
#dateRange('27BC-14 (?)')

TestResults(failed=0, attempted=6)

In [55]:
def float_conversion(x):
    '''
    Parameter
    ---------
    x: str
        Input value
    
    Return
    ------
    Returns the float or 0 if empty string
    '''
    try:
        x = float(x)
    except:
        x = 0
    return x

In [82]:
mask = (df['Object type'] == 'coin') & (df['Date'].str.find('stC') == -1)
cleaned = df[mask]
weights = cleaned.pop('Weight (g)').replace(np.nan, -1)
dates = cleaned.pop('Date')

cleaned = cleaned.applymap(cleanString)
cleaned['Weight (g)'] = weights.map(float_conversion)
cleaned['Date'] = dates.map(dateRange)
cleaned.head()

Unnamed: 0,Associated names,Authority,Bibliography,Culture/period,Curator's comments,Denomination,Description,Inscriptions,Materials,Museum number,Object type,Production place,State,Subjects,url,Weight (g),Date
0,Augustus (Octavian),Augustus (Octavian),RPC1 1801,,,,Bronze coin.(obverse) Head of Augustus r.,[],bronze,19971206.1,coin,Odessus,Roman Empire,symbol,//www.britishmuseum.org/research/collection_on...,7.45,"[-27, 14]"
1,Augustus (Octavian),Augustus (Octavian),RPC1 5476,Roman Provincial,,,Bronze coin.(obverse) Head of Augustus r.,[],bronze,20010335.3,coin,,Roman Empire,symbol,//www.britishmuseum.org/research/collection_on...,2.27,"[-27, 14]"
2,Julius Caesar,Augustus (Octavian),RPC1 2007,Roman Provincial,,,"Alloy coin.(obverse) Diademed head of Caesar, r.","u'reverse', u'Inscription Type'",alloy,G.1200,coin,Apamea,Roman Empire,emperor/empress,//www.britishmuseum.org/research/collection_on...,8.36,"[-31, 14]"
3,,Augustus (Octavian),,Greek,,,Alloy coin.,,alloy,19051111.1,coin,Cremna,,,//www.britishmuseum.org/research/collection_on...,1.49,"[-27, 14]"
6,,Augustus (Octavian),,Greek,,,Alloy coin.,,alloy,19360220.13,coin,Tyndaris,,,//www.britishmuseum.org/research/collection_on...,7.41,"[-27, 14]"


In [87]:
cleaned[['Bibliography', 'url']].to_csv('test.csv')