# Data Visual 2 – Enrolment by Institutions - Gender Ratio

## Step 1 - Import Libraries

In [39]:
import pandas as pd
import mysql.connector
from bokeh.io import output_notebook, show
from bokeh.layouts import gridplot
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category20
from bokeh.transform import cumsum
from math import pi

## Step 2 - Connect to SQL Database and Retrieve Data Table

In [40]:
# Database connection configuration
db_config = {
    'user': 'weaver',
    'password': 'web101SG',
    'host': 'localhost',
    'database': 'dbsingaporepoly',
}

# Establish a connection to the database
conn = mysql.connector.connect(**db_config)

# Query to load the processed data from the MySQL table
query = "SELECT * FROM enrolmentbyinstitutions_processed"

# Load the data into a pandas DataFrame
enrolment_data = pd.read_sql(query, conn)

# Close the database connection
conn.close()

  enrolment_data = pd.read_sql(query, conn)


## Step 3 - Setup Variables

In [41]:
# List of institutions to generate pie charts for
institutions = [
    'nus', 'ntu', 'smu', 'sit', 'sutd', 'suss', 'nie', 
    'singapore_polytechnic', 'ngee_ann_polytechnic', 'temasek_polytechnic', 
    'nanyang_polytechnic', 'republic_polytechnic', 
    'lasalle_diploma', 'lasalle_degree', 'nafa_diploma', 'nafa_degree', 'ite'
]

# List of years to generate pie charts for
years = enrolment_data['year'].unique()

## Step 4 - Setup Grid for Generation of Pie Charts

In [35]:
output_notebook()  # This is for Jupyter Notebooks. Remove this line if using a Python script.

# To contain the pie charts
plots = []
# Iterate over each institution to create a grid of pie charts
for institution in institutions:
    valid_years = []
    valid_pies = []
    
    for year in years:
        # Filter data for the specific year and institution
        data_year = enrolment_data[enrolment_data['year'] == year]

        # Get the total male and female enrolment
        male_enrolment = pd.to_numeric(data_year[data_year['sex'] == 'M'][institution], errors='coerce').sum()
        female_enrolment = pd.to_numeric(data_year[data_year['sex'] == 'F'][institution], errors='coerce').sum()

        # Handle cases where data might be missing or invalid
        if pd.notna(male_enrolment) and pd.notna(female_enrolment) and (male_enrolment > 0 or female_enrolment > 0):
            valid_years.append(year)
            valid_pies.append((male_enrolment, female_enrolment))
    
    # If there are valid data points, create a grid of pie charts
    if valid_years:
        for i, (male_enrolment, female_enrolment) in enumerate(valid_pies):
            data = pd.DataFrame({
                'Gender': ['Male', 'Female'],
                'Enrollment': [male_enrolment, female_enrolment]
            })
            
            data['angle'] = data['Enrollment']/data['Enrollment'].sum() * 2 * pi
            data['color'] = ['#3498db', '#e74c3c']  # Blue for males, Red for females
            
            p = figure(height=350, title=f"{institution.upper()} - {valid_years[i]}", 
                       toolbar_location=None, tools="hover", tooltips="@Gender: @Enrollment", x_range=(-0.5, 1.0))
            
            p.wedge(x=0, y=1, radius=0.4,
                    start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
                    line_color="white", fill_color='color', legend_field='Gender', source=ColumnDataSource(data))
            
            p.axis.axis_label = None
            p.axis.visible = False
            p.grid.grid_line_color = None
            
            plots.append(p)

# Create a grid layout for the plots with 2 charts per row
num_cols = 2
grid = gridplot(plots, ncols=num_cols)
show(grid)