In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
df = pd.read_csv("data.csv")
df.head()
df["final_result"] = df["final_result"].replace({"Distinction": "Pass"})
df = df[df["final_result"].isin(["Pass", "Fail"])]
df["final_result"].value_counts()
# Grouping the education levels together as No Formal Qualifications and Post Graduate Qualification have very low counts
# We can combine these with the closest education qualifications
df["highest_education"] = df["highest_education"].apply(
    lambda x: "HE Qualification"
    if x in ["HE Qualification", "Post Graduate Qualification"]
    else x
)
df["highest_education"] = df["highest_education"].apply(
    lambda x: "Lower Than A level"
    if x in ["Lower Than A Level", "No Formal quals"]
    else x
)
df = df.dropna()

In [2]:
df = df[df['code_module'] == 'AAA']

In [3]:
df.columns

Index(['id_student', 'code_module', 'code_presentation', 'total_score*weight',
       'attempted_weight', 'adjusted_mark', 'mark', 'gender', 'region',
       'highest_education', 'imd_band', 'age_band', 'num_of_prev_attempts',
       'studied_credits', 'disability', 'final_result', 'module_presentation'],
      dtype='object')

In [4]:
max_failling_marks = df[df['final_result'] == 'Fail']['adjusted_mark'].max()
min_passing_marks = df[df['final_result'] == 'Pass']['adjusted_mark'].min()

In [5]:
print(f"Max failing marks: {max_failling_marks}")
print(f"Min passing marks: {min_passing_marks}")
# For the sake of this example, we will assume that the passing mark is 40

Max failing marks: 39.21779141104295
Min passing marks: 40.456852791877736


In [6]:
passing_mark = 40

In [7]:
marks =df['adjusted_mark'].values

In [8]:
from sklearn.mixture import GaussianMixture
gmm1 = GaussianMixture(n_components=2, random_state=0,init_params='random',covariance_type='tied')
gmm2 = GaussianMixture(n_components=2, random_state=0,init_params='kmeans',covariance_type='tied')
gmm3 = GaussianMixture(n_components=2, random_state=0,init_params='k-means++',covariance_type='tied')

In [9]:
gmm1.fit(marks.reshape(-1, 1))
gmm2.fit(marks.reshape(-1, 1))
gmm3.fit(marks.reshape(-1, 1))

In [10]:
gmm1_labels = gmm1.predict(marks.reshape(-1, 1))
gmm2_labels = gmm2.predict(marks.reshape(-1, 1))
gmm3_labels = gmm3.predict(marks.reshape(-1, 1))

In [11]:
for x in np.linspace(0,100,101):
    cluster = gmm1.predict([[x]])[0]
    print(f"GMM1 x: {x}, Cluster: {cluster}")

GMM1 x: 0.0, Cluster: 1
GMM1 x: 1.0, Cluster: 1
GMM1 x: 2.0, Cluster: 1
GMM1 x: 3.0, Cluster: 1
GMM1 x: 4.0, Cluster: 1
GMM1 x: 5.0, Cluster: 1
GMM1 x: 6.0, Cluster: 1
GMM1 x: 7.0, Cluster: 1
GMM1 x: 8.0, Cluster: 1
GMM1 x: 9.0, Cluster: 1
GMM1 x: 10.0, Cluster: 1
GMM1 x: 11.0, Cluster: 1
GMM1 x: 12.0, Cluster: 1
GMM1 x: 13.0, Cluster: 1
GMM1 x: 14.0, Cluster: 1
GMM1 x: 15.0, Cluster: 1
GMM1 x: 16.0, Cluster: 1
GMM1 x: 17.0, Cluster: 1
GMM1 x: 18.0, Cluster: 1
GMM1 x: 19.0, Cluster: 1
GMM1 x: 20.0, Cluster: 1
GMM1 x: 21.0, Cluster: 1
GMM1 x: 22.0, Cluster: 1
GMM1 x: 23.0, Cluster: 1
GMM1 x: 24.0, Cluster: 1
GMM1 x: 25.0, Cluster: 1
GMM1 x: 26.0, Cluster: 1
GMM1 x: 27.0, Cluster: 1
GMM1 x: 28.0, Cluster: 1
GMM1 x: 29.0, Cluster: 1
GMM1 x: 30.0, Cluster: 1
GMM1 x: 31.0, Cluster: 1
GMM1 x: 32.0, Cluster: 1
GMM1 x: 33.0, Cluster: 1
GMM1 x: 34.0, Cluster: 1
GMM1 x: 35.0, Cluster: 1
GMM1 x: 36.0, Cluster: 1
GMM1 x: 37.0, Cluster: 1
GMM1 x: 38.0, Cluster: 1
GMM1 x: 39.0, Cluster: 1
GMM1 x: 40

In [12]:
for x in np.linspace(0,100,101):
    cluster = gmm2.predict([[x]])[0]
    print(f"GMM2 x: {x}, Cluster: {cluster}")

GMM2 x: 0.0, Cluster: 0
GMM2 x: 1.0, Cluster: 0
GMM2 x: 2.0, Cluster: 0
GMM2 x: 3.0, Cluster: 0
GMM2 x: 4.0, Cluster: 0
GMM2 x: 5.0, Cluster: 0
GMM2 x: 6.0, Cluster: 0
GMM2 x: 7.0, Cluster: 0
GMM2 x: 8.0, Cluster: 0
GMM2 x: 9.0, Cluster: 0
GMM2 x: 10.0, Cluster: 0
GMM2 x: 11.0, Cluster: 0
GMM2 x: 12.0, Cluster: 0
GMM2 x: 13.0, Cluster: 0
GMM2 x: 14.0, Cluster: 0
GMM2 x: 15.0, Cluster: 0
GMM2 x: 16.0, Cluster: 0
GMM2 x: 17.0, Cluster: 0
GMM2 x: 18.0, Cluster: 0
GMM2 x: 19.0, Cluster: 0
GMM2 x: 20.0, Cluster: 0
GMM2 x: 21.0, Cluster: 0
GMM2 x: 22.0, Cluster: 0
GMM2 x: 23.0, Cluster: 0
GMM2 x: 24.0, Cluster: 0
GMM2 x: 25.0, Cluster: 0
GMM2 x: 26.0, Cluster: 0
GMM2 x: 27.0, Cluster: 0
GMM2 x: 28.0, Cluster: 1
GMM2 x: 29.0, Cluster: 1
GMM2 x: 30.0, Cluster: 1
GMM2 x: 31.0, Cluster: 1
GMM2 x: 32.0, Cluster: 1
GMM2 x: 33.0, Cluster: 1
GMM2 x: 34.0, Cluster: 1
GMM2 x: 35.0, Cluster: 1
GMM2 x: 36.0, Cluster: 1
GMM2 x: 37.0, Cluster: 1
GMM2 x: 38.0, Cluster: 1
GMM2 x: 39.0, Cluster: 1
GMM2 x: 40

In [14]:
for x in np.linspace(0,100,101):
    cluster = gmm3.predict([[x]])[0]
    print(f"GMM3 x: {x}, Cluster: {cluster}")

GMM3 x: 0.0, Cluster: 0
GMM3 x: 1.0, Cluster: 0
GMM3 x: 2.0, Cluster: 0
GMM3 x: 3.0, Cluster: 0
GMM3 x: 4.0, Cluster: 0
GMM3 x: 5.0, Cluster: 0
GMM3 x: 6.0, Cluster: 0
GMM3 x: 7.0, Cluster: 0
GMM3 x: 8.0, Cluster: 0
GMM3 x: 9.0, Cluster: 0
GMM3 x: 10.0, Cluster: 0
GMM3 x: 11.0, Cluster: 0
GMM3 x: 12.0, Cluster: 0
GMM3 x: 13.0, Cluster: 0
GMM3 x: 14.0, Cluster: 0
GMM3 x: 15.0, Cluster: 0
GMM3 x: 16.0, Cluster: 0
GMM3 x: 17.0, Cluster: 0
GMM3 x: 18.0, Cluster: 0
GMM3 x: 19.0, Cluster: 0
GMM3 x: 20.0, Cluster: 0
GMM3 x: 21.0, Cluster: 0
GMM3 x: 22.0, Cluster: 0
GMM3 x: 23.0, Cluster: 0
GMM3 x: 24.0, Cluster: 0
GMM3 x: 25.0, Cluster: 0
GMM3 x: 26.0, Cluster: 0
GMM3 x: 27.0, Cluster: 0
GMM3 x: 28.0, Cluster: 0
GMM3 x: 29.0, Cluster: 0
GMM3 x: 30.0, Cluster: 0
GMM3 x: 31.0, Cluster: 0
GMM3 x: 32.0, Cluster: 0
GMM3 x: 33.0, Cluster: 0
GMM3 x: 34.0, Cluster: 0
GMM3 x: 35.0, Cluster: 0
GMM3 x: 36.0, Cluster: 0
GMM3 x: 37.0, Cluster: 0
GMM3 x: 38.0, Cluster: 0
GMM3 x: 39.0, Cluster: 0
GMM3 x: 40