# 📘 Mongolian University Entrance Examination Results Analysis & Dashboard

## 🧾 Project Overview

This project analyzes student performance data from the Mongolian University Entrance Examinations across multiple subjects and provinces. The goal is to consolidate subject-specific exam datasets and build a comprehensive dashboard to uncover key insights on student outcomes, regional trends, percentile thresholds, and subject participation.

The analysis follows a standard data science workflow:

---

In [2]:
#Import neccesary libraries
import pandas as pd
import os

## Data Preprocessing

In [3]:
#Folder path
folder_path = "Dataset"

# Define mappings
region_map = {
    1: "Arkhangai Province", 2: "Bayan-Ölgii Province", 3: "Bayankhongor Province",
    4: "Bulgan Province", 5: "Govi-Altai Province", 6: "Govisümber Province",
    7: "Darkhan-Uul Province", 8: "Dornogovi Province", 9: "Dornod Province",
    10: "Dundgovi Province", 11: "Zavkhan Province", 12: "Orkhon Province",
    13: "Övörkhangai Province", 14: "Ömnögovi Province", 15: "Sükhbaatar Province",
    16: "Selenge Province", 17: "Töv Province", 18: "Uvs Province", 19: "Khovd Province",
    20: "Khentii Province", 21: "Khövsgöl Province", 22: "Baganuur District", 23: "Ulaanbaatar City"
}

iso_map = {
    "Arkhangai Province": "MN-073", "Bayan-Ölgii Province": "MN-071", "Bayankhongor Province": "MN-069",
    "Bulgan Province": "MN-067", "Govi-Altai Province": "MN-065", "Govisümber Province": "MN-064",
    "Darkhan-Uul Province": "MN-037", "Dornogovi Province": "MN-063", "Dornod Province": "MN-061",
    "Dundgovi Province": "MN-059", "Zavkhan Province": "MN-057", "Orkhon Province": "MN-035",
    "Övörkhangai Province": "MN-055", "Ömnögovi Province": "MN-053", "Sükhbaatar Province": "MN-051",
    "Selenge Province": "MN-049", "Töv Province": "MN-047", "Uvs Province": "MN-046",
    "Khovd Province": "MN-043", "Khentii Province": "MN-039", "Khövsgöl Province": "MN-041",
    "Baganuur District": "MN-020", "Ulaanbaatar City": "MN-1"
}

# Initialize list to collect dataframes
all_data = []

# Folder containing datasets
folder_path = "Dataset"

# Loop through CSV files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(folder_path, filename)
        subject = filename.replace(".csv", "").capitalize()
        
        # Load and process
        df = pd.read_csv(file_path)
        df["Subject"] = subject
        df["Region"] = df["Areacode"].map(region_map)
        df["ISO_Code"] = df["Region"].map(iso_map)
        
        all_data.append(df)

# Combine all into one dataset
combined_df = pd.concat(all_data, ignore_index=True)

# Drop unnecessary columns
combined_df = combined_df.drop("Placement", axis=1)

# Save combined dataset
output_path = "combined_subjects_dataset.csv"
combined_df.to_csv(output_path, index=False)

output_path


'combined_subjects_dataset.csv'

In [15]:
import pandas as pd

# Load your combined dataset
df = pd.read_csv("Combined Dataset.csv")  # Or use .xlsx if needed

# Define the target percentiles
percentiles = [0.9, 0.75, 0.6, 0.5, 0.4, 0.25]

# Prepare an empty list to hold results
results = []

# Group by Subject and compute percentiles
for subject, group in df.groupby("Subject"):
    for p in percentiles:
        results.append({
            "Subject": subject,
            "Percentile": f"{int(p * 100)}%",
            "Firstscore": round(group["Firstscore"].quantile(p), 2),
            "Convertedscore": round(group["Convertedscore"].quantile(p), 2)
        })

# Create a DataFrame
percentile_df = pd.DataFrame(results)

# Save the result to CSV or Excel
percentile_df.to_csv("Subject_Percentile_Scores.csv", index=False)
# Or: percentile_df.to_excel("Subject_Percentile_Scores.xlsx", index=False)
