In [None]:
import pandas as pd

# Get table from Wikipedia
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_presidents_of_the_United_States")[0]
# Only a few columns are of interest to us
df = df.iloc[:, [0, 2, 3, 5, 6, 7]]
df.rename({
    df.columns[0]: 'No',
    df.columns[1]: 'Name',
    df.columns[2]: 'Term',
    df.columns[3]: 'Party',
    df.columns[5]: 'Vice President'
    }, axis='columns', inplace=True)
df.set_index('No', inplace=True)
df

In [None]:
# Extract birth and death years from name 
df.insert(1, 'Birth', df.Name.str.extract(r'(?P<Birth>[0-9]{4})'))
df.insert(2, 'Death', df.Name.str.extract(r'–(?P<Death>[0-9]{4})'))
df.Name = df.Name.str.replace(r'\s\(.*', "", regex=True)
df

In [None]:
# Split names into first and last names
df[['First Name(s)', 'Last Name']] = df.Name.str.rsplit(n=1, expand=True)
df.drop(columns='Name', inplace=True)
df

In [None]:
# Clean up and split term dates
df.Term = df.Term.str.replace(r'\[.*\]', "", regex=True)
df[['Term Start', 'Term End']] = df.Term.str.split(' – ', expand=True)
df['Term End'] = df['Term End'].replace('Incumbent', None)
df.drop(columns='Term', inplace=True)
df

In [None]:
# Clean up party names
df.Party = df.Party.str.replace(r'\[.*\]\s', '\n', regex=True).str.replace(r'\[.*\]', '', regex=True).str.replace(r'- ', '-', regex=True).str.split('\n')
df

In [None]:
# Clean up elections
df.Election = df.Election.str.split()
df

In [None]:
# Clean up vice president
df['Vice President'] = df['Vice President'].str.replace(r'\[.*\]', '', regex=True)
df

In [None]:
# Reorder columns and save
df = df[df.columns[[5, 6, 0, 1, 2, 7, 8, 3, 4 ]]]
df.to_csv('presidents.csv')