# Data Extraction

## Import modules

In [1]:
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By

from bs4 import BeautifulSoup
import re
import requests
import pandas as pd

## Define functions

In [2]:
def extract_table(year):
    response = requests.get(url + year + '/all')
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        table = soup.find('table')
    df = pd.read_html(str(table))[0]    
    return df

In [3]:
def transform_data_new(df):
    
    data = {}
    data['school_name'] = []
    data['total_vac'] = []
    
    # Vacancy row
    data['p1_vac'] = []
    data['p2a_vac'] = []
    data['p2b_vac'] = []
    data['p2c_vac'] = []
    data['p2cs_vac'] = []
    data['p3_vac'] = []

    # Applied row
    data['p1_appl'] = []
    data['p2a_appl'] = []
    data['p2b_appl'] = []
    data['p2c_appl'] = []
    data['p2cs_appl'] = []
    data['p3_appl'] = []

    # Taken row
    data['p1_taken'] = []
    data['p2a_taken'] = []
    data['p2b_taken'] = []
    data['p2c_taken'] = []
    data['p2cs_taken'] = []
    data['p3_taken'] = []

    for idx, row in df.iterrows():
        if idx % 4 == 0:
            data['school_name'].append(row['School'])
        elif idx % 4 == 1:
            # print(row)
            data['total_vac'].append(re.findall(r'\d+', row['School'])[0])
            data['p1_vac'].append(row['Phase 1'])
            data['p2a_vac'].append(row['2A'])
            data['p2b_vac'].append(row['2B'])
            data['p2c_vac'].append(row['2C'])
            data['p2cs_vac'].append(row['2C(S)'])
            data['p3_vac'].append(row['3'])
        elif idx % 4 == 2:
            data['p1_appl'].append(row['Phase 1'])
            data['p2a_appl'].append(row['2A'])
            data['p2b_appl'].append(row['2B'])
            data['p2c_appl'].append(row['2C'])
            data['p2cs_appl'].append(row['2C(S)'])
            data['p3_appl'].append(row['3'])
        elif idx % 4 == 3:
            data['p1_taken'].append(row['Phase 1'])
            data['p2a_taken'].append(row['2A'])
            data['p2b_taken'].append(row['2B'])
            data['p2c_taken'].append(row['2C'])
            data['p2cs_taken'].append(row['2C(S)'])
            data['p3_taken'].append(row['3'])

    return pd.DataFrame(data)

In [4]:
def transform_data_old(df):
    
    data = {}
    data['school_name'] = []
    data['total_vac'] = []
    
    # Vacancy row
    data['p1_vac'] = []
    data['p2a1_vac'] = []
    data['p2a2_vac'] = []
    data['p2b_vac'] = []
    data['p2c_vac'] = []
    data['p2cs_vac'] = []
    data['p3_vac'] = []

    # Applied row
    data['p1_appl'] = []
    data['p2a1_appl'] = []
    data['p2a2_appl'] = []
    data['p2b_appl'] = []
    data['p2c_appl'] = []
    data['p2cs_appl'] = []
    data['p3_appl'] = []

    # Taken row
    data['p1_taken'] = []
    data['p2a1_taken'] = []
    data['p2a2_taken'] = []
    data['p2b_taken'] = []
    data['p2c_taken'] = []
    data['p2cs_taken'] = []
    data['p3_taken'] = []

    for idx, row in df.iterrows():
        if idx % 4 == 0:
            data['school_name'].append(row['School'])
        elif idx % 4 == 1:
            data['total_vac'].append(re.findall(r'\d+', row['School'])[0])
            data['p1_vac'].append(row['Phase 1'])
            data['p2a1_vac'].append(row['2A(1)'])
            data['p2a2_vac'].append(row['2A(2)'])
            data['p2b_vac'].append(row['2B'])
            data['p2c_vac'].append(row['2C'])
            data['p2cs_vac'].append(row['2C(S)'])
            data['p3_vac'].append(row['3'])
        elif idx % 4 == 2:
            data['p1_appl'].append(row['Phase 1'])
            data['p2a1_appl'].append(row['2A(1)'])
            data['p2a2_appl'].append(row['2A(2)'])
            data['p2b_appl'].append(row['2B'])
            data['p2c_appl'].append(row['2C'])
            data['p2cs_appl'].append(row['2C(S)'])
            data['p3_appl'].append(row['3'])
        elif idx % 4 == 3:
            data['p1_taken'].append(row['Phase 1'])
            data['p2a1_taken'].append(row['2A(1)'])
            data['p2a2_taken'].append(row['2A(2)'])
            data['p2b_taken'].append(row['2B'])
            data['p2c_taken'].append(row['2C'])
            data['p2cs_taken'].append(row['2C(S)'])
            data['p3_taken'].append(row['3'])

    return pd.DataFrame(data)

## Data extraction via web-scraping

In [5]:
url = 'https://sgschooling.com/year/'
driver = webdriver.Chrome()
driver.get(url)

all_years = driver.find_elements(By.XPATH, "/html/body/div[3]/div/section[2]/div/ul")
all_years = sum([year.text.split("\n") for year in all_years], [])
all_years = [year.split()[0] for year in all_years]
print(all_years)

['2022', '2021', '2020', '2019', '2018', '2017', '2016', '2015', '2014', '2013', '2012', '2011', '2010', '2009']


In [144]:
df_new_list = []
df_old_list = []

for year in all_years:
    df = extract_table(year)
    if int(year) >= 2022:
        df_new = transform_data_new(df)
        df_new['year'] = year
        df_new_list.append(df_new)        
    else:
        df_old = transform_data_old(df)
        df_old['year'] = year     
        df_old_list.append(df_old)
        
df_2022_onwards = pd.concat(df_new_list)
df_before_2022 = pd.concat(df_old_list)

## Data cleaning

We are only interested in data from 2019 onwards, when MOE started providing data on the number of applications/registrations and vacancies taken up in each phase.

In [145]:
df_2019_to_2021 = df_before_2022[df_before_2022['year'].isin(['2021', '2020', '2019'])]

For some of the columns, there are additional strings, such as 'SC<1' or 'PR>1', appended behind the numerical value. We will remove these strings using regex.

In [146]:
taken_cols = [col for col in df_2019_to_2021.columns.tolist() if col.endswith('_taken')]
for col in taken_cols:
    df_2019_to_2021 = df_2019_to_2021.copy()
    df_2019_to_2021[col] = df_2019_to_2021[col].replace(' (SC|PR)[<>]?[0-9-#]*', '', regex=True)

In [147]:
# Merging columns for Phase 2A(1) and Phase 2A(2) into Phase 2...
df_2019_to_2021 = df_2019_to_2021.copy()
df_2019_to_2021['p2a_vac'] = df_2019_to_2021['p2a1_vac'] 
df_2019_to_2021['p2a_appl'] = df_2019_to_2021['p2a1_appl'] + df_2019_to_2021['p2a2_appl']
df_2019_to_2021['p2a_taken'] = df_2019_to_2021['p2a1_taken'] + df_2019_to_2021['p2a1_taken']

# And remove Phase 2A(1) and Phase 2A(2) columns
df_2019_to_2021.drop(columns=['p2a1_vac', 'p2a2_vac', 'p2a1_appl', 'p2a2_appl', 'p2a1_taken', 'p2a2_taken'], axis=1, inplace=True)

2022 data also contains additional strings, such as 'SC<1' or 'PR>1', appended behind the numerical value. Likewise, we will remove these strings using regex.

In [148]:
taken_cols = [col for col in df_2022_onwards.columns.tolist() if col.endswith('_taken')]
for col in taken_cols:
    df_2022_onwards[col] = df_2022_onwards[col].replace(' (SC|PR)[<>]?[0-9-#]*', '', regex=True)

Then, we combine 2019-2021 data and 2022 onwards data together into a single DataFrame.

In [149]:
df = pd.concat([df_2019_to_2021, df_2022_onwards])

Let's rearrange the columns in a way that makes sense.

In [150]:
col_seq = ['school_name', 'year', 'total_vac',
         'p1_vac', 'p1_appl', 'p1_taken',
         'p2a_vac', 'p2a_appl', 'p2a_taken',
         'p2b_vac', 'p2b_appl', 'p2b_taken',
         'p2c_vac', 'p2c_appl', 'p2c_taken',
         'p2cs_vac', 'p2cs_appl', 'p2cs_taken',
         'p3_vac', 'p3_appl', 'p3_taken']

df = df[col_seq]

Let's convert each column into the correct data types, starting with the `year` column.

In [151]:
df['year'] = pd.to_datetime(df['year'])

Next, we convert all numerical columns to integer data types.

In [152]:
for col in df.columns.tolist()[2:-2]:
    df[col] = df[col].astype(int)

In [153]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 733 entries, 0 to 180
Data columns (total 21 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   school_name  733 non-null    object        
 1   year         733 non-null    datetime64[ns]
 2   total_vac    733 non-null    int64         
 3   p1_vac       733 non-null    int64         
 4   p1_appl      733 non-null    int64         
 5   p1_taken     733 non-null    int64         
 6   p2a_vac      733 non-null    int64         
 7   p2a_appl     733 non-null    int64         
 8   p2a_taken    733 non-null    int64         
 9   p2b_vac      733 non-null    int64         
 10  p2b_appl     733 non-null    int64         
 11  p2b_taken    733 non-null    int64         
 12  p2c_vac      733 non-null    int64         
 13  p2c_appl     733 non-null    int64         
 14  p2c_taken    733 non-null    int64         
 15  p2cs_vac     733 non-null    int64         
 16  p2cs_app

Then, we sort by year, starting with the most recent year.

In [154]:
df.sort_values(by=['year', 'school_name'], ascending=[False, True], inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,school_name,year,total_vac,p1_vac,p1_appl,p1_taken,p2a_vac,p2a_appl,p2a_taken,p2b_vac,...,p2b_taken,p2c_vac,p2c_appl,p2c_taken,p2cs_vac,p2cs_appl,p2cs_taken,p3_vac,p3_appl,p3_taken
0,Admiralty,2022-01-01,210,150,107,107,44,37,37,22,...,22,45,104,45,0,0,0,0,-,-
1,Ahmad Ibrahim,2022-01-01,190,130,56,56,74,5,5,43,...,0,129,31,31,98,20,20,78,-,-
2,Ai Tong,2022-01-01,300,240,130,130,110,146,110,20,...,20,42,86,42,0,0,0,0,-,-
3,Alexandra,2022-01-01,200,140,75,75,65,9,9,39,...,4,113,146,113,0,0,0,0,-,-
4,Anchor Green,2022-01-01,240,180,75,75,105,46,46,40,...,0,120,24,24,96,108,96,0,-,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
728,Yuhua,2019-01-01,180,140,40,40,100,10,0,65,...,0,130,34,34,96,28,28,68,-,-
729,Yumin,2019-01-01,150,110,42,42,68,17,0,46,...,0,93,30,30,63,44,44,17,-,-
730,Zhangde,2019-01-01,210,170,75,75,96,12,0,62,...,1,124,124,124,0,0,0,0,-,-
731,Zhenghua,2019-01-01,210,170,85,85,85,23,0,51,...,2,100,76,76,24,19,19,5,-,-


## Merging with other useful datasets

In [155]:
# Load CSV file containing general info of schools
info_df = pd.read_csv("Data/general-information-of-schools.csv")

# Select required columns only
req_cols = ['school_name', 'address', 'postal_code', 'dgp_code', 'zone_code', 'type_code', 'nature_code', 'session_code', 'mainlevel_code', 'sap_ind', 'autonomous_ind', 'gifted_ind', 'ip_ind']
info_df = info_df[req_cols] 

# Select Primary Schools only
info_df = info_df[(info_df['mainlevel_code'] == 'PRIMARY') | (info_df['school_name'].isin(["CHIJ ST. NICHOLAS GIRLS' SCHOOL", "MARIS STELLA HIGH SCHOOL", "CATHOLIC HIGH SCHOOL"]))]
info_df.reset_index(drop=True, inplace=True)

# Change various columns to title casing
for col in ['school_name', 'address', 'dgp_code', 'zone_code', 'type_code', 'nature_code', 'session_code', 'mainlevel_code']:
    info_df = info_df.copy()
    info_df[col] = info_df[col].str.title()
    
# Some simple data cleaning
info_df['school_name'] = info_df['school_name'].str.replace("Chij", "CHIJ")
info_df['school_name'] = info_df['school_name'].str.replace("'S", "'s")

# Convert `postal_code` column to string datatype
info_df['postal_code'] = info_df['postal_code'].astype('str')

# Making sure that all postal codes have 6 characters
info_df['postal_code'] = info_df['postal_code'].str.zfill(6)
    
# Manual standardisation of selected school names
info_df['school_name'] = info_df['school_name'].replace({"Catholic High School": "Catholic High School (Primary)", "Singapore Chinese Girls' Primary School": "Singapore Chinese Girls’ School (Primary)"})
    
# Sanity check
info_df

Unnamed: 0,school_name,address,postal_code,dgp_code,zone_code,type_code,nature_code,session_code,mainlevel_code,sap_ind,autonomous_ind,gifted_ind,ip_ind
0,Admiralty Primary School,11 Woodlands Circle,738907,Woodlands,North,Government School,Co-Ed School,Full Day,Primary,No,No,No,No
1,Ahmad Ibrahim Primary School,10 Yishun Street 11,768643,Yishun,North,Government School,Co-Ed School,Single Session,Primary,No,No,No,No
2,Ai Tong School,100 Bright Hill Drive,579646,Bishan,South,Government-Aided Sch,Co-Ed School,Single Session,Primary,Yes,No,No,No
3,Alexandra Primary School,2A Prince Charles Crescent,159016,Bukit Merah,South,Government School,Co-Ed School,Single Session,Primary,No,No,No,No
4,Anchor Green Primary School,31 Anchorvale Drive,544969,Seng Kang,North,Government School,Co-Ed School,Single Session,Primary,No,No,No,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...
181,Yuhua Primary School,158 Jurong East Street 24,609558,Jurong East,West,Government School,Co-Ed School,Single Session,Primary,No,No,No,No
182,Yumin Primary School,3 Tampines Street 21,529393,Tampines,East,Government School,Co-Ed School,Single Session,Primary,No,No,No,No
183,Zhangde Primary School,51 Jalan Membina,169485,Bukit Merah,South,Government School,Co-Ed School,Single Session,Primary,No,No,No,No
184,Zhenghua Primary School,9 Fajar Road,679002,Bukit Panjang,West,Government School,Co-Ed School,Single Session,Primary,No,No,No,No


## Standardising names of schools

It makes sense to standardise the names of each school by renaming them to their respective full names.

In [156]:
contains_pri_sch = [name for name in info_df.school_name.unique().tolist() if "Primary School" in name]
contains_pri_sch  = [' '.join(name.split()[:-2]) for name in contains_pri_sch]
does_not_contain_pri_sch = [name for name in info_df.school_name.unique().tolist() if "Primary School" not in name]

In [157]:
df_school_names = df.school_name.unique().tolist()

school_name_mapping_dict = {} # Mapping for school which ends with "Primary School"
manual_mapping = []

for school in df_school_names:
    
    # Check if school should be appended with "Primary School"
    if school in contains_pri_sch:
        school_name_mapping_dict[school] = school + " Primary School"
        
    # Otherwise, check if school is amongst the schools not ending with "Primary School" -> `does_not_contain_pri_sch` list
    else:
        manual_mapping.append(school)
    
assert (len(school_name_mapping_dict) + len(manual_mapping)) == len(df_school_names)

# Mapping for school which does not end with "Primary School"
manual_full_name = ['Ai Tong School', 'Anglo-Chinese School (Junior)', 'Anglo-Chinese School (Primary)', 'CHIJ (Katong) Primary', 'CHIJ (Kellock)',
                    'CHIJ Primary (Toa Payoh)', 'CHIJ Our Lady Queen Of Peace', 'CHIJ Our Lady Of Good Counsel', 'CHIJ Our Lady Of The Nativity', 
                    "CHIJ St. Nicholas Girls' School", 'Catholic High School (Primary)', 'Chongfu School', 'De La Salle School', 'Fairfield Methodist School (Primary)', 
                    'Geylang Methodist School (Primary)', "Haig Girls' School", "Holy Innocents' Primary School", 'Hong Wen School', 'Kheng Cheng School', 
                    'Kong Hwa School', 'Maha Bodhi School', "Maris Stella High School", 'Marymount Convent School', 'Mee Toh School', "Methodist Girls' School (Primary)", 
                    'Montfort Junior School', "Paya Lebar Methodist Girls' School (Primary)", 'Pei Chun Public School', 'Poi Ching School', 
                    "Raffles Girls' Primary School", 'Red Swastika School', 'Rosyth School', "Singapore Chinese Girls’ School (Primary)", "St. Andrew's Junior School", 
                    "St. Anthony's Primary School", "St. Anthony's Canossian Primary School", "St. Gabriel's Primary School", "St. Hilda's Primary School",  
                    "St. Joseph's Institution Junior", "St. Margaret's Primary School", "St. Stephen's School", 'Tao Nan School']

manual_mapping_dict = dict(zip(manual_mapping, manual_full_name))

# Full mapping of schools from their short names to their full names
full_school_mapping = {**school_name_mapping_dict, **manual_mapping_dict}
full_school_mapping = dict(sorted(full_school_mapping.items(), key=lambda item: item[1])) # Sort by alphabetical order

assert len(full_school_mapping) == df.school_name.nunique()

# Finally, we standardise the school names in `df` DataFrame
df['school_name'] = df['school_name'].replace(full_school_mapping)

# Sanity check
df.head()

Unnamed: 0,school_name,year,total_vac,p1_vac,p1_appl,p1_taken,p2a_vac,p2a_appl,p2a_taken,p2b_vac,...,p2b_taken,p2c_vac,p2c_appl,p2c_taken,p2cs_vac,p2cs_appl,p2cs_taken,p3_vac,p3_appl,p3_taken
0,Admiralty Primary School,2022-01-01,210,150,107,107,44,37,37,22,...,22,45,104,45,0,0,0,0,-,-
1,Ahmad Ibrahim Primary School,2022-01-01,190,130,56,56,74,5,5,43,...,0,129,31,31,98,20,20,78,-,-
2,Ai Tong School,2022-01-01,300,240,130,130,110,146,110,20,...,20,42,86,42,0,0,0,0,-,-
3,Alexandra Primary School,2022-01-01,200,140,75,75,65,9,9,39,...,4,113,146,113,0,0,0,0,-,-
4,Anchor Green Primary School,2022-01-01,240,180,75,75,105,46,46,40,...,0,120,24,24,96,108,96,0,-,-


## Get lat-long from postal code

In [158]:
import pgeocode
nomi = pgeocode.Nominatim('sg')

In [159]:
df_latlong = nomi.query_postal_code(info_df['postal_code'].tolist())[['postal_code', 'latitude', 'longitude']]
df_latlong

Unnamed: 0,postal_code,latitude,longitude
0,738907,1.4426,103.8001
1,768643,1.4333,103.8328
2,579646,1.3606,103.8330
3,159016,1.2913,103.8242
4,544969,1.3904,103.8872
...,...,...,...
181,609558,1.3429,103.7410
182,529393,1.3513,103.9505
183,169485,1.2842,103.8260
184,679002,1.3795,103.7693


In [160]:
info_df = info_df.merge(df_latlong, on='postal_code', how='left')
info_df

Unnamed: 0,school_name,address,postal_code,dgp_code,zone_code,type_code,nature_code,session_code,mainlevel_code,sap_ind,autonomous_ind,gifted_ind,ip_ind,latitude,longitude
0,Admiralty Primary School,11 Woodlands Circle,738907,Woodlands,North,Government School,Co-Ed School,Full Day,Primary,No,No,No,No,1.4426,103.8001
1,Ahmad Ibrahim Primary School,10 Yishun Street 11,768643,Yishun,North,Government School,Co-Ed School,Single Session,Primary,No,No,No,No,1.4333,103.8328
2,Ai Tong School,100 Bright Hill Drive,579646,Bishan,South,Government-Aided Sch,Co-Ed School,Single Session,Primary,Yes,No,No,No,1.3606,103.8330
3,Alexandra Primary School,2A Prince Charles Crescent,159016,Bukit Merah,South,Government School,Co-Ed School,Single Session,Primary,No,No,No,No,1.2913,103.8242
4,Anchor Green Primary School,31 Anchorvale Drive,544969,Seng Kang,North,Government School,Co-Ed School,Single Session,Primary,No,No,No,No,1.3904,103.8872
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181,Yuhua Primary School,158 Jurong East Street 24,609558,Jurong East,West,Government School,Co-Ed School,Single Session,Primary,No,No,No,No,1.3429,103.7410
182,Yumin Primary School,3 Tampines Street 21,529393,Tampines,East,Government School,Co-Ed School,Single Session,Primary,No,No,No,No,1.3513,103.9505
183,Zhangde Primary School,51 Jalan Membina,169485,Bukit Merah,South,Government School,Co-Ed School,Single Session,Primary,No,No,No,No,1.2842,103.8260
184,Zhenghua Primary School,9 Fajar Road,679002,Bukit Panjang,West,Government School,Co-Ed School,Single Session,Primary,No,No,No,No,1.3795,103.7693


## Save datasets as CSV files

In [161]:
# help(pd.DataFrame.to_csv)

In [162]:
# Save `df` 
df.to_csv("Data/registration_data.csv", index=False)

# Save `info_df`
info_df.to_csv("Data/general_info.csv", index=False)

In [2]:
import pandas as pd

In [3]:
# pd.read_csv("Data/school-distinctive-programmes.csv")
pd.read_csv("Data/co-curricular-activities-ccas.csv").head(20)

Unnamed: 0,school_name,school_section,cca_grouping_desc,cca_generic_name,cca_customized_name
0,ADMIRALTY PRIMARY SCHOOL,PRIMARY,PHYSICAL SPORTS,MODULAR CCA (SPORTS),SPORTS CLUB
1,ADMIRALTY PRIMARY SCHOOL,PRIMARY,VISUAL AND PERFORMING ARTS,ART AND CRAFTS,VISUAL ARTS CLUB
2,ADMIRALTY PRIMARY SCHOOL,PRIMARY,CLUBS AND SOCIETIES,"ENGLISH LANGUAGE, DRAMA AND DEBATING",ENGLISH LANGUAGE AND DRAMA
3,ADMIRALTY PRIMARY SCHOOL,PRIMARY,CLUBS AND SOCIETIES,INFOCOMM TECHNOLOGY (COMPUTING),INFOCOMM TECHNOLOGY
4,ADMIRALTY PRIMARY SCHOOL,PRIMARY,CLUBS AND SOCIETIES,DESIGN AND INNOVATION,na
5,ADMIRALTY PRIMARY SCHOOL,PRIMARY,CLUBS AND SOCIETIES,ENVIRONMENTAL SCIENCE,na
6,ADMIRALTY PRIMARY SCHOOL,PRIMARY,PHYSICAL SPORTS,FOOTBALL,FOOTBALL
7,ADMIRALTY PRIMARY SCHOOL,PRIMARY,PHYSICAL SPORTS,TRACK AND FIELD,na
8,ADMIRALTY PRIMARY SCHOOL,PRIMARY,PHYSICAL SPORTS,WUSHU,na
9,ADMIRALTY PRIMARY SCHOOL,PRIMARY,PHYSICAL SPORTS,ROPE SKIPPING,na


### Next steps ##

1. Engineer relevant columns to create additional features.
2. Recreate the 'Cumulative Take Up Rate' table?
3. Combine with other useful primary school datasets!

In [30]:
info_df['postal_code']

0      738907
1      768643
2      579646
3      159016
4      544969
        ...  
178    609558
179    529393
180    169485
181    679002
182    556095
Name: postal_code, Length: 183, dtype: int64

In [31]:
info_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 183 entries, 0 to 182
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   school_name     183 non-null    object
 1   address         183 non-null    object
 2   postal_code     183 non-null    int64 
 3   dgp_code        183 non-null    object
 4   zone_code       183 non-null    object
 5   type_code       183 non-null    object
 6   nature_code     183 non-null    object
 7   session_code    183 non-null    object
 8   mainlevel_code  183 non-null    object
 9   sap_ind         183 non-null    object
 10  autonomous_ind  183 non-null    object
 11  gifted_ind      183 non-null    object
 12  ip_ind          183 non-null    object
dtypes: int64(1), object(12)
memory usage: 18.7+ KB


In [None]:
>>> import pgeocode

>>> nomi = pgeocode.Nominatim('fr')
>>> nomi.query_postal_code("75013")
postal_code               75013
country_code                 FR
place_name             Paris 13
state_name        Île-de-France
state_code                   11
county_name               Paris
county_code                  75
community_name            Paris
community_code              751
latitude                48.8322
longitude                2.3561
accuracy                      5

>>> nomi.query_postal_code(["75013", "69006"])
      postal_code place_name            state_name  latitude  longitude
0       75013   Paris 13         Île-de-France   48.8322     2.3561
1       69006    Lyon 06  Auvergne-Rhône-Alpes   45.7679     4.8506

Unnamed: 0,postal_code,country_code,place_name,state_name,state_code,county_name,county_code,community_name,community_code,latitude,longitude,accuracy
0,738907,SG,Woodlands Circle,,,,,,,1.4426,103.8001,
1,768643,SG,Yishun Street 11,,,,,,,1.4333,103.8328,
2,579646,SG,Bright Hill Drive,,,,,,,1.3606,103.8330,
3,159016,SG,Prince Charles Crescent,,,,,,,1.2913,103.8242,
4,544969,SG,Anchorvale Drive,,,,,,,1.3904,103.8872,
...,...,...,...,...,...,...,...,...,...,...,...,...
178,609558,SG,Jurong East Street 24,,,,,,,1.3429,103.7410,
179,529393,SG,Tampines Street 21,,,,,,,1.3513,103.9505,
180,169485,SG,Jalan Membina,,,,,,,1.2842,103.8260,
181,679002,SG,Fajar Road,,,,,,,1.3795,103.7693,


In [43]:
nomi.query_postal_code('738907')

postal_code                 738907
country_code                    SG
place_name        Woodlands Circle
state_name                     NaN
state_code                     NaN
county_name                    NaN
county_code                    NaN
community_name                 NaN
community_code                 NaN
latitude                    1.4426
longitude                 103.8001
accuracy                       NaN
Name: 0, dtype: object

To ensure continued open access to all schools in later phases, we are reserving 60 places in each school for Phases 2B and 2C (20 places for Phase 2B and 40 places for Phase 2C).

At the end of Phase 2A, one-third of remaining places will be allocated to Phase 2B, and two-thirds to Phase 2C.

If there are vacancies left at the end of Phase 2B, the remaining places will be carried forward to Phase 2C.

Changes to Registration Phases

From the 2022 Primary One Registration Exercise, phases 2A 1 and 2A 2 will be merged into phase 2A. The number of places reserved under Phase 2C of the Primary 1 registration exercise will double from 20 to 40. Phase 2B will continue to have 20 reserved places

The 2020 registration exercise was done online. Parents should ensure that their SingPass account is valid and the 2-Step Verification done before the Primary 1 registration exercise starts. From this year MOE will be introducing a cap on the intake of children of permanent residents.

From the 2018 Primary One Registration Exercise, children attending MOE kindergartens situated within primary school compounds will be eligible to register under Phase 2A2 of those schools.

From the 2015 Primary One Registration Exercise, a child who gains priority admission into a school through his/her distance category is required to reside at the address used for registration for at least 30 months from the commencement of the P1 registration exercise.

Starting from the 2014 Primary One (P1) Registration Exercise, the Ministry of Education (MOE) will reserve 40 places in every primary school for registrants in Phase 2B and 2C (20 places for each phase) to ensure continued open access to all primary schools.

From 2012 onwards, the Ministry of Education (MOE) will implement measures to further differentiate between Singapore Citizens (SCs) and Permanent Residents (PRs) at the Primary One (P1) Registration Exercise. When balloting is necessary in a specific phase, SCs will be given absolute priority over PRs. SCs and PRs will continue to be eligible for the same phases, and all applicants will be admitted if the total number of applicants in any phase does not exceed the number of vacancies. However, if the number of applications exceeds the number of vacancies in a specific phase, SCs will be admitted first ahead of PRs, before home-school distance is considered.