# Notebook of scripts to import and parse data

['year', 'constituency', 'constituency_type', 'candidates', 'party', 'vote_count', 'vote_percentage']

## Importing data from CSV

In [1]:
import pandas as pd
import os
import json

if not os.path.exists("output"):
    os.makedirs("output")

data = pd.read_csv("./RAW DATA/parliamentary-general-election-results-by-candidate.csv")

df = data.loc[data["year"]==1955].drop(["year", "constituency_type"], axis=1)
df["vote_percentage"] = df["vote_percentage"].apply(pd.to_numeric, errors='coerce').apply(lambda x: round(x*100, 2))

df.head()

Unnamed: 0,constituency,candidates,party,vote_count,vote_percentage
0,Bukit Panjang,Goh Tong Liang,PP,3097,72.21
1,Bukit Panjang,Lim Wee Toh,SLF,1192,27.79
2,Bukit Timah,S. F. Ho,PP,722,11.62
3,Bukit Timah,Lim Ching Siong,PAP,3259,52.45
4,Bukit Timah,A. N. Mitra,SLF,924,14.88


## winners1955.json
```
{
    "Havelock": {
        "candidates": "Lim Yew Hock",
        "party": "SLF",
        "vote_percentage": "0.8648"
    },
    "Tanjong Pagar": {
        "candidates": "Lee Kuan Yew",
        "party": "PAP",
        "vote_percentage": "0.7833"
    },
```

In [2]:
def get_winners(group):
    return group.loc[group['vote_percentage'] == group['vote_percentage'].max()]

winners = df.groupby('constituency').apply(get_winners).reset_index(drop=True).sort_values(by="vote_percentage", ascending=False).set_index(keys="constituency")

winners.to_json(r"output/winners1955.json", orient="index")
winners.head()

Unnamed: 0_level_0,candidates,party,vote_count,vote_percentage
constituency,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Havelock,Lim Yew Hock,SLF,5744,86.48
Tanjong Pagar,Lee Kuan Yew,PAP,6029,78.33
Bukit Panjang,Goh Tong Liang,PP,3097,72.21
Queenstown,Lee Choon Eng,SLF,2792,67.28
Sembawang,Ahmad Ibrahim,Independent,4281,63.24


I tried using pandas because I hoped it would provide an easier way to create subsets of data. It didn't. The way I did this is less efficient, but the data set is small.


## constituencies1955.json

In [8]:
import csv

keys = ['year', 'constituency', 'constituency_type', 'candidates', 'party', 'vote_count', 'vote_percentage'];
dict = {};
sortedDict = {};

with open('./RAW DATA/parliamentary-general-election-results-by-candidate.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    for row in csv_reader:
        if row[0] == "1955": 
            if row[1] in dict:                
                dict[row[1]].insert(1, {
                    "candidates": row[3],
                    "party": row[4],
                    "vote_count": row[5],
                    "vote_percentage": round(float(row[6])*100,2)
                })
            else:
                dict[row[1]] = [{
                    "candidates": row[3],
                    "party": row[4],
                    "vote_count": row[5],
                    "vote_percentage": round(float(row[6])*100,2)
                }]
                
                
    for district in dict:
        sortedDict[district] = sorted( dict[district], key = lambda i: i["vote_percentage"],reverse=True)

text_file = open(r"output/constituencies1955.json", "w")
text_file.write(json.dumps(sortedDict))
text_file.close()