### Electoral Parties PDF Table Scraping 

**Tables contain data about the number of political parties present in a variety of countries over time**

**By: Zach Palmer**

In [1]:
# import in packages
import pandas as pd
import numpy as np
import camelot as cl

import matplotlib.pyplot as plt
%matplotlib inline

In [50]:
# read the pdf and scrape the tables
electoral_tables = cl.read_pdf (
    filepath='pdfs/ElectionIndices.pdf', 
    pages='5-49', 
    flavor='stream', 
    table_areas=['0,770,420,0']
)

In [None]:
# concatenate the tables together into one big dataframe
merged_df = electoral_tables[0].df
for i in range(1, len(electoral_tables)):
    merged_df = pd.concat([merged_df, electoral_tables[i].df], ignore_index=True)

# update the column headers
merged_df.columns = ['Year', 'LSq', 'Eff Nv', 'Eff Ns', 'N Seats']

# remove the extraneous rows
merged_df = merged_df.query('Year != "See Notes."')

# fix the string formatting in many of the country names
merged_df = merged_df.replace(r'\n', '', regex=True)

In [61]:
# write the dataframe out to a csv file
merged_df.to_csv('electoral_party_data.csv', index=False) 