# Analysis of contestents who got a perfect score on the CCC

In [46]:
import pandas as pd

table = pd.read_csv('table.csv')
table

Unnamed: 0,Last Name,First Name,School,Location
0,AGARWAL,INIKA,French Int'l School,"Happy Valley, Hong Kong SAR"
1,AHSAN,UMAYEER,Bloor C.I.,"Toronto, ON"
2,AIOANEI,ANTONIA,A.Y. Jackson S.S.,"North York, ON"
3,ANTONIADIS,STYLIANOS NIKOLAOS,Georgios Ioakeimidis,"Athens, Greece"
4,ANYANWU,LIGHT-ALVIN,Graceland Int'l School,"Port Harcourt, Nigeria"
...,...,...,...,...
180,ZHOU,ZHIYUAN,Earl of March S.S.,"Kanata, ON"
181,ZHOU,DONG XIAO,,"Richmond, BC"
182,ZHOU,RICHARD,Diocesan Boys School,"Kowloon, Hong Kong SAR"
183,ZHOU,YIHANG,Nanjing Foreign Language School I,"Nanjing, China"


### Most common locations

In [47]:
# get the most common locations
locations = table['Location'].value_counts()
locations

Location
Toronto, ON                   14
Hong Kong, Hong Kong SAR      13
North York, ON                10
Markham, ON                   10
Kowloon, Hong Kong SAR         9
                              ..
Timmins, ON                    1
Daegu, Korea Republic of       1
Breslau, ON                    1
Whitchurch-Stouffville, ON     1
West Vancouver, BC             1
Name: count, Length: 66, dtype: int64

### Most common countries/provinces/states

In [48]:
countries = {}
for location in locations.keys():
    country = location.split(',')[-1].strip()
    if country not in countries:
        countries[country] = locations.at[location]
    else:
        countries[country] += locations.at[location]
countries = pd.Series(countries).sort_values(ascending=False)
countries

ON                   79
China                28
Hong Kong SAR        23
BC                   13
NJ                    6
Korea Republic of     6
AB                    5
Singapore             4
MB                    3
WA                    2
SK                    2
CA                    2
Australia             2
PA                    2
Turkey                1
NY                    1
QC                    1
Greece                1
Nigeria               1
MA                    1
Indonesia             1
NB                    1
dtype: int64

### Filtering to count only countries

In [49]:
# abbreviations for US and Canada states and provinces
canada_abbr = [
    'ON',
    'BC',
    'MB',
    'NB',
    'NF',
    'NS',
    'NT',
    'QC',
    'SK',
    'AB',
    'PE',
    'NU'
]
# it is unclear what the other abbreviations are but I am going to assume they are American
us_state_abbreviations = [
    "AL",  # Alabama
    "AK",  # Alaska
    "AZ",  # Arizona
    "AR",  # Arkansas
    "CA",  # California
    "CO",  # Colorado
    "CT",  # Connecticut
    "DE",  # Delaware
    "FL",  # Florida
    "GA",  # Georgia
    "HI",  # Hawaii
    "ID",  # Idaho
    "IL",  # Illinois
    "IN",  # Indiana
    "IA",  # Iowa
    "KS",  # Kansas
    "KY",  # Kentucky
    "LA",  # Louisiana
    "ME",  # Maine
    "MD",  # Maryland
    "MA",  # Massachusetts
    "MI",  # Michigan
    "MN",  # Minnesota
    "MS",  # Mississippi
    "MO",  # Missouri
    "MT",  # Montana
    "NE",  # Nebraska
    "NV",  # Nevada
    "NH",  # New Hampshire
    "NJ",  # New Jersey
    "NM",  # New Mexico
    "NY",  # New York
    "NC",  # North Carolina
    "ND",  # North Dakota
    "OH",  # Ohio
    "OK",  # Oklahoma
    "OR",  # Oregon
    "PA",  # Pennsylvania
    "RI",  # Rhode Island
    "SC",  # South Carolina
    "SD",  # South Dakota
    "TN",  # Tennessee
    "TX",  # Texas
    "UT",  # Utah
    "VT",  # Vermont
    "VA",  # Virginia
    "WA",  # Washington
    "WV",  # West Virginia
    "WI",  # Wisconsin
    "WY"   # Wyoming
]

In [54]:
only_countries = {}
for country in countries.keys():
    new_country = country
    if country in canada_abbr:
        new_country = 'Canada'
    elif country in us_state_abbreviations:
        new_country = 'United States'
    if new_country not in only_countries:
        only_countries[new_country] = countries.at[country]
    else:
        only_countries[new_country] += countries.at[country]
only_countries = pd.Series(only_countries).sort_values(ascending=False)
only_countries

Canada               104
China                 28
Hong Kong SAR         23
United States         14
Korea Republic of      6
Singapore              4
Australia              2
Turkey                 1
Greece                 1
Nigeria                1
Indonesia              1
dtype: int64

### Most common schools

In [51]:
schools = table['School']
schools.value_counts()

School
Nanjing Foreign Language School I    8
Diocesan Boys School                 8
University of Toronto Schools        8
Wah Yan College Kowloon              7
William Lyon Mackenzie C.I.          6
                                    ..
Daegu International School           1
E.S. Theriault                       1
Centennial Collegiate                1
Fredericton Christian Academy        1
Canada Royal Arts High School        1
Name: count, Length: 108, dtype: int64

### Most common last names

In [52]:
last_names = table['Last Name']
last_names.value_counts()

Last Name
ZHANG    13
WANG      9
LI        8
ZHOU      6
LIU       5
         ..
HUI       1
HU        1
HORNE     1
HAN       1
LOMOV     1
Name: count, Length: 115, dtype: int64

### Most common first names

In [53]:
first_names = table['First Name']
first_names.value_counts()

First Name
ANDREW    3
KEVIN     3
ERIC      3
RYAN      3
EDWARD    2
         ..
AIDAN     1
HENRY     1
ZEYAN     1
SIYUAN    1
RUI       1
Name: count, Length: 168, dtype: int64