# Resolving Edutech Company Issues - Dashboard

## Preparation

##### *Import Libraries*

In [1]:
import os
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.multiclass import OneVsRestClassifier

##### *Import Datasets*

In [2]:
# Load the dataset
df = pd.read_csv('data/data.csv', delimiter=";")

## Data Preparation/Preprocessing

In [3]:
# List of columns that should be kept based on the provided description
columns_to_keep = [
    'Marital_status',
    'Application_mode',
    'Application_order',
    'Course',
    'Daytime_evening_attendance',
    'Previous_qualification',
    'Previous_qualification_grade',
    'Nacionality',
    'Mothers_qualification',
    'Fathers_qualification',
    'Mothers_occupation',
    'Fathers_occupation',
    'Admission_grade',
    'Displaced',
    'Educational_special_needs',
    'Debtor',
    'Tuition_fees_up_to_date',
    'Gender',
    'Scholarship_holder',
    'Age_at_enrollment',
    'International',
    'Curricular_units_1st_sem_credited',
    'Curricular_units_1st_sem_enrolled',
    'Curricular_units_1st_sem_evaluations',
    'Curricular_units_1st_sem_approved',
    'Status'
]

# Filter the DataFrame to keep only the specified columns
df = df[columns_to_keep]

In [4]:
# Dictionary to rename columns to be more descriptive and clean
columns_rename = {
    'Marital_status': 'Marital Status',
    'Application_mode': 'Application Mode',
    'Application_order': 'Application Order',
    'Course': 'Course',
    'Daytime_evening_attendance': 'Daytime/Evening Attendance',
    'Previous_qualification': 'Previous Qualification',
    'Previous_qualification_grade': 'Previous Qualification Grade',
    'Nacionality': 'Nationality',
    'Mothers_qualification': 'Mother\'s Qualification',
    'Fathers_qualification': 'Father\'s Qualification',
    'Mothers_occupation': 'Mother\'s Occupation',
    'Fathers_occupation': 'Father\'s Occupation',
    'Admission_grade': 'Admission Grade',
    'Displaced': 'Displaced',
    'Educational_special_needs': 'Educational Special Needs',
    'Debtor': 'Debtor',
    'Tuition_fees_up_to_date': 'Tuition Fees Up to Date',
    'Gender': 'Gender',
    'Scholarship_holder': 'Scholarship Holder',
    'Age_at_enrollment': 'Age at Enrollment',
    'International': 'International Student',
    'Curricular_units_1st_sem_credited': 'Curricular Units 1st Sem Credited',
    'Curricular_units_1st_sem_enrolled': 'Curricular Units 1st Sem Enrolled',
    'Curricular_units_1st_sem_evaluations': 'Curricular Units 1st Sem Evaluations',
    'Curricular_units_1st_sem_approved': 'Curricular Units 1st Sem Approved',
    'Status': 'Student Status'
}

# Rename the columns in the DataFrame using the dictionary
df = df.rename(columns=columns_rename)

In [5]:
# Mapping columns to more descriptive boolean or categorical values
mapping_columns = {
    'Displaced': {1: 'Yes', 0: 'No'},
    'Educational Special Needs': {1: 'Yes', 0: 'No'},
    'Debtor': {1: 'Yes', 0: 'No'},
    'Tuition Fees Up to Date': {1: 'Yes', 0: 'No'},
    'Gender': {1: 'Male', 0: 'Female'},
    'Scholarship Holder': {1: 'Yes', 0: 'No'},
    'International Student': {1: 'Yes', 0: 'No'},
}

# Applying the mapping to convert these columns to more descriptive categorical values
for column, mapping in mapping_columns.items():
    df[column] = df[column].map(mapping)

In [6]:
# Mapping columns to descriptive categorical values
mapping_columns = {
    'Marital Status': {
        1: 'Single', 2: 'Married', 3: 'Widower',
        4: 'Divorced', 5: 'Facto Union', 6: 'Legally Separated'
    },
    'Application Mode': {
        1: '1st phase - general contingent', 2: 'Ordinance No. 612/93',
        5: '1st phase - special contingent (Azores Island)', 7: 'Holders of other higher courses',
        10: 'Ordinance No. 854-B/99', 15: 'International student (bachelor)',
        16: '1st phase - special contingent (Madeira Island)', 17: '2nd phase - general contingent',
        18: '3rd phase - general contingent', 26: 'Ordinance No. 533-A/99, item b2) (Different Plan)',
        27: 'Ordinance No. 533-A/99, item b3 (Other Institution)', 39: 'Over 23 years old',
        42: 'Transfer', 43: 'Change of course', 44: 'Technological specialization diploma holders',
        51: 'Change of institution/course', 53: 'Short cycle diploma holders',
        57: 'Change of institution/course (International)'
    },
    'Course': {
        33: 'Biofuel Production Technologies', 171: 'Animation and Multimedia Design',
        8014: 'Social Service (evening attendance)', 9003: 'Agronomy',
        9070: 'Communication Design', 9085: 'Veterinary Nursing',
        9119: 'Informatics Engineering', 9130: 'Equinculture', 9147: 'Management',
        9238: 'Social Service', 9254: 'Tourism', 9500: 'Nursing',
        9556: 'Oral Hygiene', 9670: 'Advertising and Marketing Management',
        9773: 'Journalism and Communication', 9853: 'Basic Education',
        9991: 'Management (evening attendance)'
    },
    'Daytime/Evening Attendance': {
        1: 'Daytime', 0: 'Evening'
    },
    'Previous Qualification': {
        1: 'Secondary education', 2: 'Higher education - bachelor\'s degree',
        3: 'Higher education - degree', 4: 'Higher education - master\'s',
        5: 'Higher education - doctorate', 6: 'Frequency of higher education',
        9: '12th year of schooling - not completed', 10: '11th year of schooling - not completed',
        12: 'Other - 11th year of schooling', 14: '10th year of schooling',
        15: '10th year of schooling - not completed', 19: 'Basic education 3rd cycle (9th/10th/11th year) or equiv.',
        38: 'Basic education 2nd cycle (6th/7th/8th year) or equiv.', 39: 'Technological specialization course',
        40: 'Higher education - degree (1st cycle)', 42: 'Professional higher technical course',
        43: 'Higher education - master (2nd cycle)'
    },
    'Nationality': {
        1: 'Portuguese', 2: 'German', 6: 'Spanish', 11: 'Italian', 13: 'Dutch', 14: 'English',
        17: 'Lithuanian', 21: 'Angolan', 22: 'Cape Verdean', 24: 'Guinean', 25: 'Mozambican',
        26: 'Santomean', 32: 'Turkish', 41: 'Brazilian', 62: 'Romanian', 100: 'Moldova (Republic of)',
        101: 'Mexican', 103: 'Ukrainian', 105: 'Russian', 108: 'Cuban', 109: 'Colombian'
    },
    'Mother\'s Qualification': {
        1: 'Secondary Education - 12th Year of Schooling or Eq.', 2: 'Higher Education - Bachelor\'s Degree',
        3: 'Higher Education - Degree', 4: 'Higher Education - Master\'s', 5: 'Higher Education - Doctorate',
        6: 'Frequency of Higher Education', 9: '12th Year of Schooling - Not Completed', 10: '11th Year of Schooling - Not Completed',
        11: '7th Year (Old)', 12: 'Other - 11th Year of Schooling', 14: '10th Year of Schooling',
        18: 'General commerce course', 19: 'Basic Education 3rd Cycle (9th/10th/11th Year) or Equiv.',
        22: 'Technical-professional course', 26: '7th year of schooling', 27: '2nd cycle of the general high school course',
        29: '9th Year of Schooling - Not Completed', 30: '8th year of schooling', 34: 'Unknown',
        35: 'Can\'t read or write', 36: 'Can read without having a 4th year of schooling',
        37: 'Basic education 1st cycle (4th/5th year) or equiv.', 38: 'Basic Education 2nd Cycle (6th/7th/8th Year) or Equiv.',
        39: 'Technological specialization course', 40: 'Higher education - degree (1st cycle)',
        41: 'Specialized higher studies course', 42: 'Professional higher technical course',
        43: 'Higher Education - Master (2nd cycle)', 44: 'Higher Education - Doctorate (3rd cycle)'
    },
    'Father\'s Qualification': {
        1: 'Secondary Education - 12th Year of Schooling or Eq.', 2: 'Higher Education - Bachelor\'s Degree',
        3: 'Higher Education - Degree', 4: 'Higher Education - Master\'s', 5: 'Higher Education - Doctorate',
        6: 'Frequency of Higher Education', 9: '12th Year of Schooling - Not Completed', 10: '11th Year of Schooling - Not Completed',
        11: '7th Year (Old)', 12: 'Other - 11th Year of Schooling', 13: '2nd year complementary high school course',
        14: '10th Year of Schooling', 18: 'General commerce course', 19: 'Basic Education 3rd Cycle (9th/10th/11th Year) or Equiv.',
        20: 'Complementary High School Course', 22: 'Technical-professional course', 25: 'Complementary High School Course - not concluded',
        26: '7th year of schooling', 27: '2nd cycle of the general high school course', 29: '9th Year of Schooling - Not Completed',
        30: '8th year of schooling', 31: 'General Course of Administration and Commerce', 33: 'Supplementary Accounting and Administration',
        34: 'Unknown', 35: 'Can\'t read or write', 36: 'Can read without having a 4th year of schooling',
        37: 'Basic education 1st cycle (4th/5th year) or equiv.', 38: 'Basic Education 2nd Cycle (6th/7th/8th Year) or Equiv.',
        39: 'Technological specialization course', 40: 'Higher education - degree (1st cycle)',
        41: 'Specialized higher studies course', 42: 'Professional higher technical course',
        43: 'Higher Education - Master (2nd cycle)', 44: 'Higher Education - Doctorate (3rd cycle)'
    },
    'Mother\'s Occupation': {
        0: 'Student', 1: 'Representatives of the Legislative Power and Executive Bodies, Directors, Directors and Executive Managers',
        2: 'Specialists in Intellectual and Scientific Activities', 3: 'Intermediate Level Technicians and Professions',
        4: 'Administrative staff', 5: 'Personal Services, Security and Safety Workers and Sellers',
        6: 'Farmers and Skilled Workers in Agriculture, Fisheries and Forestry', 7: 'Skilled Workers in Industry, Construction and Craftsmen',
        8: 'Installation and Machine Operators and Assembly Workers', 9: 'Unskilled Workers', 10: 'Armed Forces Professions',
        90: 'Other Situation', 99: '(blank)', 122: 'Health professionals', 123: 'Teachers', 125: 'Specialists in ICT',
        131: 'Intermediate level science and engineering technicians and professions', 132: 'Technicians and professionals, of intermediate level of health',
        134: 'Intermediate level technicians from legal, social, sports, cultural and similar services',
        141: 'Office workers, secretaries in general and data processing operators',
        143: 'Data, accounting, statistical, financial services and registry-related operators',
        144: 'Other administrative support staff', 151: 'Personal service workers', 152: 'Sellers',
        153: 'Personal care workers and the like', 171: 'Skilled construction workers and the like, except electricians',
        173: 'Skilled workers in printing, precision instrument manufacturing, jewelers, artisans and the like',
        175: 'Workers in food processing, woodworking, clothing and other industries and crafts',
        191: 'Cleaning workers', 192: 'Unskilled workers in agriculture, animal production, fisheries and forestry',
        193: 'Unskilled workers in extractive industry, construction, manufacturing and transport',
        194: 'Meal preparation assistants'
    },
    'Father\'s Occupation': {
        0: 'Student', 1: 'Representatives of the Legislative Power and Executive Bodies, Directors, Directors and Executive Managers',
        2: 'Specialists in Intellectual and Scientific Activities', 3: 'Intermediate Level Technicians and Professions',
        4: 'Administrative staff', 5: 'Personal Services, Security and Safety Workers and Sellers',
        6: 'Farmers and Skilled Workers in Agriculture, Fisheries and Forestry', 7: 'Skilled Workers in Industry, Construction and Craftsmen',
        8: 'Installation and Machine Operators and Assembly Workers', 9: 'Unskilled Workers', 10: 'Armed Forces Professions',
        90: 'Other Situation', 99: '(blank)', 101: 'Armed Forces Officers', 102: 'Armed Forces Sergeants', 103: 'Other Armed Forces personnel',
        112: 'Directors of administrative and commercial services', 114: 'Hotel, catering, trade and other services directors',
        121: 'Specialists in the physical sciences, mathematics, engineering and related techniques', 122: 'Health professionals',
        123: 'Teachers', 124: 'Specialists in finance, accounting, administrative organization, public and commercial relations',
        131: 'Intermediate level science and engineering technicians and professions', 132: 'Technicians and professionals, of intermediate level of health',
        134: 'Intermediate level technicians from legal, social, sports, cultural and similar services',
        135: 'Information and communication technology technicians', 141: 'Office workers, secretaries in general and data processing operators',
        143: 'Data, accounting, statistical, financial services and registry-related operators', 144: 'Other administrative support staff',
        151: 'Personal service workers', 152: 'Sellers', 153: 'Personal care workers and the like',
        154: 'Protection and security services personnel', 161: 'Market-oriented farmers and skilled agricultural and animal production workers',
        163: 'Farmers, livestock keepers, fishermen, hunters and gatherers, subsistence', 171: 'Skilled construction workers and the like, except electricians',
        172: 'Skilled workers in metallurgy, metalworking and similar', 174: 'Skilled workers in electricity and electronics',
        175: 'Workers in food processing, woodworking, clothing and other industries and crafts', 181: 'Fixed plant and machine operators',
        182: 'Assembly workers', 183: 'Vehicle drivers and mobile equipment operators', 192: 'Unskilled workers in agriculture, animal production, fisheries and forestry',
        193: 'Unskilled workers in extractive industry, construction, manufacturing and transport', 194: 'Meal preparation assistants',
        195: 'Street vendors (except food) and street service providers'
    }
}

# Applying the mapping to convert these columns to more descriptive categorical values
for column, mapping in mapping_columns.items():
    df[column] = df[column].map(mapping)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4424 entries, 0 to 4423
Data columns (total 26 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   Marital Status                        4424 non-null   object 
 1   Application Mode                      4424 non-null   object 
 2   Application Order                     4424 non-null   int64  
 3   Course                                4424 non-null   object 
 4   Daytime/Evening Attendance            4424 non-null   object 
 5   Previous Qualification                4424 non-null   object 
 6   Previous Qualification Grade          4424 non-null   float64
 7   Nationality                           4424 non-null   object 
 8   Mother's Qualification                4424 non-null   object 
 9   Father's Qualification                4424 non-null   object 
 10  Mother's Occupation                   4424 non-null   object 
 11  Father's Occupati

In [10]:
# Adjust the index to start from 1
df.index = df.index + 1

# Export the DataFrame to a CSV file with the index labeled as 'ID'
df.to_csv('data/student_data_cleaned.csv', index_label='ID')