In [2]:
!pip install "numpy<2" && pip install git+https://github.com/NicolasHug/Surprise.git

Collecting git+https://github.com/NicolasHug/Surprise.git
  Cloning https://github.com/NicolasHug/Surprise.git to /tmp/pip-req-build-jamg8kk8
  Running command git clone --filter=blob:none --quiet https://github.com/NicolasHug/Surprise.git /tmp/pip-req-build-jamg8kk8
  Resolved https://github.com/NicolasHug/Surprise.git to commit 2381fb11d0c4bf917cc4b9126f205d0013649966
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp312-cp312-linux_x86_64.whl size=2611000 sha256=f91ffed4de7e9cd552900ac70939fbb200462710e320e94cbae798dda6afde49
  Stored in directory: /tmp/pip-ephem-wheel-cache-rhxr0egf/wheels/81/ef/f8/c6e173e03432de2eeaa047537fada2fa9b7cf37559598634f0
Successfully buil

In [3]:
import numpy as np
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

In [None]:
df = pd.read_csv('adaptiveLearning/data/personalized_learning_dataset.csv')

In [5]:
df.head()

Unnamed: 0,Student_ID,Age,Gender,Education_Level,Course_Name,Time_Spent_on_Videos,Quiz_Attempts,Quiz_Scores,Forum_Participation,Assignment_Completion_Rate,Engagement_Level,Final_Exam_Score,Learning_Style,Feedback_Score,Dropout_Likelihood
0,S00001,15,Female,High School,Machine Learning,171,4,67,2,89,Medium,51,Visual,1,No
1,S00002,49,Male,Undergraduate,Python Basics,156,4,64,0,94,Medium,92,Reading/Writing,5,No
2,S00003,20,Female,Undergraduate,Python Basics,217,2,55,2,67,Medium,45,Reading/Writing,1,No
3,S00004,37,Female,Undergraduate,Data Science,489,1,65,43,60,High,59,Visual,4,No
4,S00005,34,Female,Postgraduate,Python Basics,496,3,59,34,88,Medium,93,Visual,3,No


In [11]:
df['Engagement_Level'].unique()

array(['Medium', 'High', 'Low'], dtype=object)

In [12]:
df['Dropout_Likelihood'] = df['Dropout_Likelihood'].map({"Yes": 1, "No": 0})
df['Engagement_Level'] = df['Engagement_Level'].map({"Low": 0, "Medium": 1, "High": 2})

In [13]:
df.head()

Unnamed: 0,Student_ID,Age,Gender,Education_Level,Course_Name,Time_Spent_on_Videos,Quiz_Attempts,Quiz_Scores,Forum_Participation,Assignment_Completion_Rate,Engagement_Level,Final_Exam_Score,Learning_Style,Feedback_Score,Dropout_Likelihood
0,S00001,15,Female,High School,Machine Learning,171,4,67,2,89,1,51,Visual,1,0
1,S00002,49,Male,Undergraduate,Python Basics,156,4,64,0,94,1,92,Reading/Writing,5,0
2,S00003,20,Female,Undergraduate,Python Basics,217,2,55,2,67,1,45,Reading/Writing,1,0
3,S00004,37,Female,Undergraduate,Data Science,489,1,65,43,60,2,59,Visual,4,0
4,S00005,34,Female,Postgraduate,Python Basics,496,3,59,34,88,1,93,Visual,3,0


In [14]:
df["Adaptive_Rating"] = (df["Feedback_Score"] * df["Engagement_Level"] * (1 - df["Dropout_Likelihood"]))

In [15]:
df.head()

Unnamed: 0,Student_ID,Age,Gender,Education_Level,Course_Name,Time_Spent_on_Videos,Quiz_Attempts,Quiz_Scores,Forum_Participation,Assignment_Completion_Rate,Engagement_Level,Final_Exam_Score,Learning_Style,Feedback_Score,Dropout_Likelihood,Adaptive_Rating
0,S00001,15,Female,High School,Machine Learning,171,4,67,2,89,1,51,Visual,1,0,1
1,S00002,49,Male,Undergraduate,Python Basics,156,4,64,0,94,1,92,Reading/Writing,5,0,5
2,S00003,20,Female,Undergraduate,Python Basics,217,2,55,2,67,1,45,Reading/Writing,1,0,1
3,S00004,37,Female,Undergraduate,Data Science,489,1,65,43,60,2,59,Visual,4,0,8
4,S00005,34,Female,Postgraduate,Python Basics,496,3,59,34,88,1,93,Visual,3,0,3


In [16]:
min_r = df["Adaptive_Rating"].min()
max_r = df["Adaptive_Rating"].max()

In [17]:
df["Adaptive_Rating"] = 1 + 4 * (df["Adaptive_Rating"] - min_r) / (max_r - min_r)

In [18]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[["Student_ID", "Course_Name", "Adaptive_Rating"]], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [19]:
model = SVD(n_factors=50, n_epochs=20, lr_all=0.005, reg_all=0.02, random_state=42)
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7ac3191db7a0>

In [20]:
predictions = model.test(testset)
rmse = accuracy.rmse(predictions, verbose=False)
mae = accuracy.mae(predictions, verbose=False)
print(f"RMSE: {rmse:.4f}, MAE: {mae:.4f}")

RMSE: 1.1161, MAE: 0.9150


In [21]:
def recommend_courses(model, trainset, student_id, df, top_n=5):

    all_courses = df["Course_Name"].unique()
    taken_courses = df[df["Student_ID"] == student_id]["Course_Name"].unique()

    predictions = []
    for course in all_courses:
        if course not in taken_courses:
            est = model.predict(student_id, course).est
            predictions.append((course, est))

    top_recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)[:top_n]
    return top_recommendations

In [22]:
example_student = df["Student_ID"].sample(1, random_state=42).iloc[0]
print(f"\nGenerating recommendations for student: {example_student}\n")

recommendations = recommend_courses(model, trainset, example_student, df, top_n=5)

for course, score in recommendations:
    print(f"Recommended: {course} — Predicted rating: {score:.2f}")


Generating recommendations for student: S06253

Recommended: Web Development — Predicted rating: 2.46
Recommended: Cybersecurity — Predicted rating: 2.34
Recommended: Data Science — Predicted rating: 2.00
Recommended: Python Basics — Predicted rating: 1.87


In [23]:
student_style = df.loc[df["Student_ID"] == example_student, "Learning_Style"].mode()[0]
preferred_courses = df[df["Learning_Style"] == student_style]["Course_Name"].unique()

print(f"\nStudent's learning style: {student_style}")
print("Courses matching preferred style and predicted high ratings:")
for course, score in recommendations:
    if course in preferred_courses:
        print(f"  - {course} (Predicted={score:.2f}) ✅")


Student's learning style: Reading/Writing
Courses matching preferred style and predicted high ratings:
  - Web Development (Predicted=2.46) ✅
  - Cybersecurity (Predicted=2.34) ✅
  - Data Science (Predicted=2.00) ✅
  - Python Basics (Predicted=1.87) ✅


In [None]:
import pickle

with open("adaptiveLearning/model/trained_model.pkl", "wb") as f:
    pickle.dump(model, f)
with open("adaptiveLearning/model/train_data.pkl", "wb") as f:
    pickle.dump(trainset, f)