In [185]:
import pandas as pd
import re

In [215]:
def get_unique_values(df):
    print(df.nunique())
    print("\n")
    unique_values = {}
    for column in df.columns:
        unique_values[column] = df[column].unique().tolist()
    for column, values in unique_values.items():
        print(f"Unique values in '{column}':")
        print(values)
        print("\n")

def get_NA_values(df):
    missing = df.isnull().sum()
    print(missing)

def get_empty_values(df):
    empty = (df == "").sum()
    print(empty)

def convert_to_short_form(party_name):
    short_form = ''.join(word[0].upper() for word in party_name.split())
    return short_form

# details_of_assembly_segment_2019.csv

In [187]:
df = pd.read_csv("details_of_assembly_segment_2019.csv")
df.head()

Unnamed: 0,state_name,parlimentary_constituency_name,assembly_constituency_name,nota_votes,candidate_name,party_name,votes_secured
0,Andhra Pradesh,Aruku,Palakonda (ST),3736,KISHORE CHANDRA DEO,TDP,54056.0
1,Andhra Pradesh,Aruku,Palakonda (ST),3736,Dr. KOSURI KASI VISWANADHA VEERA VENKATA SATYA...,BJP,1753.0
2,Andhra Pradesh,Aruku,Palakonda (ST),3736,GODDETI. MADHAVI,YSRCP,69588.0
3,Andhra Pradesh,Aruku,Palakonda (ST),3736,SHRUTI DEVI VYRICHERLA,INC,1327.0
4,Andhra Pradesh,Aruku,Palakonda (ST),3736,GANGULAIAH VAMPURU.,JnP,2987.0


In [188]:
df.rename(columns={'parlimentary_constituency_name':'constituency_name', 
                   'votes_secured':'secured_votes'}, 
                   inplace=True)

In [189]:
# remove SC and ST
df['constituency_name'] = df['constituency_name'].str.replace(r'\s*\(SC\)\s*|\s*\(ST\)\s*|\s*-\s*\d+$', '', regex=True).str.strip()

In [190]:
# remove SC and ST
df['assembly_constituency_name'] = df['assembly_constituency_name'].str.replace(r'\s*\(SC\)\s*|\s*\(ST\)\s*|\s*-\s*\d+$', '', regex=True).str.strip()

In [191]:
# convert party names to short form and upper
df['party_name'] = df['party_name'].str.upper()

In [192]:
get_NA_values(df)

state_name                     0
constituency_name              0
assembly_constituency_name     0
nota_votes                     0
candidate_name                 0
party_name                     0
secured_votes                 15
dtype: int64


In [193]:
df = df.fillna(0)

In [194]:
get_empty_values(df)

state_name                    0
constituency_name             0
assembly_constituency_name    0
nota_votes                    0
candidate_name                0
party_name                    0
secured_votes                 0
dtype: int64


In [195]:
get_unique_values(df)

state_name                       36
constituency_name               540
assembly_constituency_name     4034
nota_votes                     2431
candidate_name                 7893
party_name                      673
secured_votes                 13684
dtype: int64


Unique values in 'state_name':
['Andhra Pradesh', 'Arunachal Pradesh', 'Assam', 'Bihar', 'Goa', 'Gujarat', 'Haryana', 'Himachal Pradesh', 'Jammu & Kashmir', 'Karnataka', 'Kerala', 'Madhya Pradesh', 'Maharashtra', 'Manipur', 'Meghalaya', 'Mizoram', 'Nagaland', 'Odisha', 'Punjab', 'Rajasthan', 'Sikkim', 'Tamil Nadu', 'Tripura', 'Uttar Pradesh', 'West Bengal', 'Chhattisgarh', 'Jharkhand', 'Uttarakhand', 'Telangana', 'Andaman & Nicobar Islands', 'Chandigarh', 'Dadra & Nagar Haveli', 'Daman & Diu', 'NCT OF Delhi', 'Lakshadweep', 'Puducherry']


Unique values in 'constituency_name':
['Aruku', 'Srikakulam', 'Vizianagaram', 'Visakhapatnam', 'Anakapalli', 'Kakinada', 'Amalapuram', 'Rajahmundry', 'Narsapuram', 'Eluru', 'Machilipatnam

In [196]:
df.head()

Unnamed: 0,state_name,constituency_name,assembly_constituency_name,nota_votes,candidate_name,party_name,secured_votes
0,Andhra Pradesh,Aruku,Palakonda,3736,KISHORE CHANDRA DEO,TDP,54056.0
1,Andhra Pradesh,Aruku,Palakonda,3736,Dr. KOSURI KASI VISWANADHA VEERA VENKATA SATYA...,BJP,1753.0
2,Andhra Pradesh,Aruku,Palakonda,3736,GODDETI. MADHAVI,YSRCP,69588.0
3,Andhra Pradesh,Aruku,Palakonda,3736,SHRUTI DEVI VYRICHERLA,INC,1327.0
4,Andhra Pradesh,Aruku,Palakonda,3736,GANGULAIAH VAMPURU.,JNP,2987.0


In [197]:
df.to_csv('final/final_details_of_assembly_segment_2019.csv')

# eci_data_2024.csv

In [198]:
df = pd.read_csv("eci_data_2024.csv", encoding='latin-1')
df.head()

Unnamed: 0,S.N,Candidate,Party,EVM Votes,Postal Votes,Total Votes,% of Votes,State,Constituency
0,1,BISHNU PADA RAY,Bharatiya Janata Party,102182,254,102436,50.58,Andaman & Nicobar Islands,Andaman & Nicobar Islands - 1
1,2,KULDEEP RAI SHARMA,Indian National Congress,77829,211,78040,38.54,Andaman & Nicobar Islands,Andaman & Nicobar Islands - 1
2,3,MANOJ PAUL,Andaman Nicobar Democratic Congress,8236,18,8254,4.08,Andaman & Nicobar Islands,Andaman & Nicobar Islands - 1
3,4,D AYYAPPAN,Communist Party of India (Marxist),6009,8,6017,2.97,Andaman & Nicobar Islands,Andaman & Nicobar Islands - 1
4,5,V.K. ABDUL AZIZ,Independent,2195,8,2203,1.09,Andaman & Nicobar Islands,Andaman & Nicobar Islands - 1


In [199]:
df.rename(columns={'Candidate':'candidate_name', 
                   'Party':'party_name',
                   'Total Votes':'secured_votes',
                   'State':'state_name',
                   'Constituency':'constituency_name'}, 
                   inplace=True)

In [200]:
# extract constituency number
df['constituency_number'] = df['constituency_name'].str.extract(r'-(\s*\d+)$')[0].str.strip()

In [201]:
# remove SC and ST
df['constituency_name'] = df['constituency_name'].str.replace(r'\s*\(SC\)\s*|\s*\(ST\)\s*|\s*-\s*\d+$', '', regex=True).str.strip()

In [203]:
# convert party name to short form and upper case
df['party_name'] = df['party_name'].apply(convert_to_short_form)
df['party_name'] = df['party_name'].str.upper()

In [204]:
get_NA_values(df)

S.N                    0
candidate_name         0
party_name             0
EVM Votes              0
Postal Votes           0
secured_votes          0
% of Votes             0
state_name             0
constituency_name      0
constituency_number    0
dtype: int64


In [205]:
get_empty_values(df)

S.N                    0
candidate_name         0
party_name             0
EVM Votes              0
Postal Votes           0
secured_votes          0
% of Votes             0
state_name             0
constituency_name      0
constituency_number    0
dtype: int64


In [206]:
get_unique_values(df)

S.N                      55
candidate_name         8099
party_name               23
EVM Votes              6081
Postal Votes           1447
secured_votes          6100
% of Votes             1557
state_name               36
constituency_name       541
constituency_number      80
dtype: int64


Unique values in 'S.N':
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55]


Unique values in 'candidate_name':
['BISHNU PADA RAY', 'KULDEEP RAI SHARMA', 'MANOJ PAUL', 'D AYYAPPAN', 'V.K. ABDUL AZIZ', 'K J B SELVARAJ', 'DR ARUN KUMAR MALLIK', 'RINKU MALA MONDAL', 'K VENKAT RAM BABU', 'USHA KUMARI', 'SALAMAT MONDAL', 'ANAND RAMNATH ARLEKAR', 'NOTA', 'G M HARISH (BALAYOGI)', 'RAPAKA VARAPRASADA RAO', 'GOUTHAM JANGA', 'DORABABU YALLA', 'KATRU NAGA BABU', 'VADDI LALITH KUMAR', 'MAKEY DAVY PRASAD', 'GUDE VENKATESWARULU', 'RAMESH JILLELLA', 'YA

In [207]:
df.head()

Unnamed: 0,S.N,candidate_name,party_name,EVM Votes,Postal Votes,secured_votes,% of Votes,state_name,constituency_name,constituency_number
0,1,BISHNU PADA RAY,B,102182,254,102436,50.58,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1
1,2,KULDEEP RAI SHARMA,I,77829,211,78040,38.54,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1
2,3,MANOJ PAUL,A,8236,18,8254,4.08,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1
3,4,D AYYAPPAN,C,6009,8,6017,2.97,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1
4,5,V.K. ABDUL AZIZ,I,2195,8,2203,1.09,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1


In [208]:
df.to_csv('final/final_eci_data_2024.csv')

# maha_results_2019.csv

In [249]:
df = pd.read_csv("maha_results_2019.csv", encoding='latin-1')
df.head()

Unnamed: 0,ï»¿ STATE/UT NAME,AC NO.,AC NAME,CANDIDATE NAME,SEX,AGE,CATEGORY,PARTY,SYMBOL,GENERAL,POSTAL,TOTAL,% VOTES POLLED,TOTAL ELECTORS
0,Maharashtra,1,Akkalkuwa,1 ADV. K. C. PADAVI,MALE,61.0,ST,INC,Hand,82509,261.0,82770,41.255458,278888.0
1,Maharashtra,1,Akkalkuwa,2 AAMSHYA FULJI PADAVI,MALE,51.0,ST,SHS,Bow and Arrow,80532,142.0,80674,40.210738,278888.0
2,Maharashtra,1,Akkalkuwa,3 NAGESH DILVARSING PADVI,MALE,44.0,ST,IND,Gas Stove,21583,81.0,21664,10.798094,278888.0
3,Maharashtra,1,Akkalkuwa,4 NOTA,,,,NOTA,NOTA,4856,1.0,4857,2.420898,278888.0
4,Maharashtra,1,Akkalkuwa,5 ADV. KAILAS PRATAPSING VASAVE,MALE,28.0,ST,AAAP,Broom,4034,21.0,4055,2.021154,278888.0


In [250]:
df.rename(columns={'ï»¿ STATE/UT NAME ':'state_name', 
                   ' AC NO. ':'assembly_constituency_number',
                   ' AC NAME ':'assembly_ constituency_name',
                   ' CANDIDATE NAME ':'candidate_name',
                   ' PARTY ':'party_name',
                   ' TOTAL ELECTORS ':'total_votes_in_state'}, 
                   inplace=True)

In [255]:
# remove number in candidate name
df['candidate_name'] = df['candidate_name'].str.replace(r'^\d+\s+', '', regex=True)

In [256]:
get_NA_values(df)

state_name                      0
assembly_constituency_number    0
assembly_ constituency_name     0
candidate_name                  0
 SEX                            0
 AGE                            0
 CATEGORY                       0
party_name                      0
 SYMBOL                         0
 GENERAL                        0
 POSTAL                         0
 TOTAL                          0
 % VOTES POLLED                 0
total_votes_in_state            0
dtype: int64


In [257]:
df.fillna('', inplace=True)

In [258]:
get_empty_values(df)

state_name                         0
assembly_constituency_number       0
assembly_ constituency_name      288
candidate_name                   288
 SEX                             576
 AGE                             576
 CATEGORY                        576
party_name                       288
 SYMBOL                          288
 GENERAL                           0
 POSTAL                         1041
 TOTAL                             0
 % VOTES POLLED                    0
total_votes_in_state             288
dtype: int64


In [259]:
get_unique_values(df)

state_name                         2
assembly_constituency_number     289
assembly_ constituency_name      289
candidate_name                  3233
 SEX                               4
 AGE                              59
 CATEGORY                          4
party_name                       127
 SYMBOL                          144
 GENERAL                        2726
 POSTAL                          694
 TOTAL                          2721
 % VOTES POLLED                 3791
total_votes_in_state             289
dtype: int64


Unique values in 'state_name':
['Maharashtra', ' TURNOUT ']


Unique values in 'assembly_constituency_number':
['1', ' TOTAL : ', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59

In [260]:
df.head()

Unnamed: 0,state_name,assembly_constituency_number,assembly_ constituency_name,candidate_name,SEX,AGE,CATEGORY,party_name,SYMBOL,GENERAL,POSTAL,TOTAL,% VOTES POLLED,total_votes_in_state
0,Maharashtra,1,Akkalkuwa,ADV. K. C. PADAVI,MALE,61.0,ST,INC,Hand,82509,261.0,82770,41.255458,278888.0
1,Maharashtra,1,Akkalkuwa,AAMSHYA FULJI PADAVI,MALE,51.0,ST,SHS,Bow and Arrow,80532,142.0,80674,40.210738,278888.0
2,Maharashtra,1,Akkalkuwa,NAGESH DILVARSING PADVI,MALE,44.0,ST,IND,Gas Stove,21583,81.0,21664,10.798094,278888.0
3,Maharashtra,1,Akkalkuwa,NOTA,,,,NOTA,NOTA,4856,1.0,4857,2.420898,278888.0
4,Maharashtra,1,Akkalkuwa,ADV. KAILAS PRATAPSING VASAVE,MALE,28.0,ST,AAAP,Broom,4034,21.0,4055,2.021154,278888.0


In [261]:
df.to_csv('final/maha_results_2019.csv')

# Final CSV files

In [262]:
df = pd.read_csv('final/final_details_of_assembly_segment_2019.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,state_name,constituency_name,assembly_constituency_name,nota_votes,candidate_name,party_name,secured_votes
0,0,Andhra Pradesh,Aruku,Palakonda,3736,KISHORE CHANDRA DEO,TDP,54056.0
1,1,Andhra Pradesh,Aruku,Palakonda,3736,Dr. KOSURI KASI VISWANADHA VEERA VENKATA SATYA...,BJP,1753.0
2,2,Andhra Pradesh,Aruku,Palakonda,3736,GODDETI. MADHAVI,YSRCP,69588.0
3,3,Andhra Pradesh,Aruku,Palakonda,3736,SHRUTI DEVI VYRICHERLA,INC,1327.0
4,4,Andhra Pradesh,Aruku,Palakonda,3736,GANGULAIAH VAMPURU.,JNP,2987.0


In [263]:
df = pd.read_csv('final/final_eci_data_2024.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,S.N,candidate_name,party_name,EVM Votes,Postal Votes,secured_votes,% of Votes,state_name,constituency_name,constituency_number
0,0,1,BISHNU PADA RAY,B,102182,254,102436,50.58,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1
1,1,2,KULDEEP RAI SHARMA,I,77829,211,78040,38.54,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1
2,2,3,MANOJ PAUL,A,8236,18,8254,4.08,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1
3,3,4,D AYYAPPAN,C,6009,8,6017,2.97,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1
4,4,5,V.K. ABDUL AZIZ,I,2195,8,2203,1.09,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1


In [264]:
df = pd.read_csv('final/maha_results_2019.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,state_name,assembly_constituency_number,assembly_ constituency_name,candidate_name,SEX,AGE,CATEGORY,party_name,SYMBOL,GENERAL,POSTAL,TOTAL,% VOTES POLLED,total_votes_in_state
0,0,Maharashtra,1,Akkalkuwa,ADV. K. C. PADAVI,MALE,61.0,ST,INC,Hand,82509,261.0,82770,41.255458,278888.0
1,1,Maharashtra,1,Akkalkuwa,AAMSHYA FULJI PADAVI,MALE,51.0,ST,SHS,Bow and Arrow,80532,142.0,80674,40.210738,278888.0
2,2,Maharashtra,1,Akkalkuwa,NAGESH DILVARSING PADVI,MALE,44.0,ST,IND,Gas Stove,21583,81.0,21664,10.798094,278888.0
3,3,Maharashtra,1,Akkalkuwa,NOTA,,,,NOTA,NOTA,4856,1.0,4857,2.420898,278888.0
4,4,Maharashtra,1,Akkalkuwa,ADV. KAILAS PRATAPSING VASAVE,MALE,28.0,ST,AAAP,Broom,4034,21.0,4055,2.021154,278888.0
