# Simple Course Recommendation System

This notebook creates a basic recommendation model for the LMS system.

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import pymysql
import pickle

# Set random seed for reproducibility
np.random.seed(42)

## 1. Connect to the LMS Database

In [2]:
# Database connection parameters
db_params = {
    'host': 'localhost',  # Update as needed
    'user': 'root',       # Update as needed
    'password': '',       # Update as needed
    'database': 'lms_db', # Update as needed
    'charset': 'utf8mb4',
    'cursorclass': pymysql.cursors.DictCursor
}

# Function to execute query and return results as a DataFrame
def execute_query(query):
    connection = pymysql.connect(**db_params)
    try:
        with connection.cursor() as cursor:
            cursor.execute(query)
            result = cursor.fetchall()
            return pd.DataFrame(result)
    finally:
        connection.close()

## 2. Extract Student Enrollment Data

In [3]:
# Extract enrollments data
enrollments_query = """
SELECT 
    e.student_id, 
    e.course_id, 
    e.completion_status
FROM enrollments e
"""

enrollments_df = execute_query(enrollments_query)
enrollments_df.head()

Unnamed: 0,student_id,course_id,completion_status
0,9,1,in_progress
1,9,2,not_started
2,9,5,not_started
3,10,3,completed
4,10,4,in_progress


In [4]:
# Extract course details
courses_query = """
SELECT 
    c.id, 
    c.title, 
    c.description,
    c.instructor_id,
    c.department_id,
    d.name as department_name
FROM courses c
LEFT JOIN departments d ON c.department_id = d.id
"""

courses_df = execute_query(courses_query)
courses_df.head()

Unnamed: 0,id,title,description,instructor_id,department_id,department_name
0,1,Introduction to Programming,Learn the fundamentals of programming with Jav...,3,1,Computer Science
1,2,Web Development Basics,"Introduction to HTML, CSS, and modern web deve...",3,6,Web Development
2,3,Database Design,Learn how to design and implement relational d...,4,1,Computer Science
3,4,Advanced JavaScript,Deep dive into JavaScript frameworks and moder...,4,6,Web Development
4,5,Mobile App Development,Introduction to building mobile applications w...,5,7,Mobile Development


In [5]:
# If no data is available, use sample data for testing
if len(enrollments_df) == 0:
    print("No enrollment data found. Using sample data for testing.")
    
    # Sample enrollments
    enrollments_df = pd.DataFrame([
        {'student_id': 1, 'course_id': 1, 'completion_status': 'completed'},
        {'student_id': 1, 'course_id': 2, 'completion_status': 'in_progress'},
        {'student_id': 1, 'course_id': 4, 'completion_status': 'not_started'},
        {'student_id': 2, 'course_id': 1, 'completion_status': 'completed'},
        {'student_id': 2, 'course_id': 3, 'completion_status': 'completed'},
        {'student_id': 3, 'course_id': 2, 'completion_status': 'in_progress'},
        {'student_id': 3, 'course_id': 3, 'completion_status': 'completed'},
        {'student_id': 4, 'course_id': 1, 'completion_status': 'in_progress'},
        {'student_id': 4, 'course_id': 4, 'completion_status': 'not_started'},
        {'student_id': 5, 'course_id': 2, 'completion_status': 'completed'},
        {'student_id': 5, 'course_id': 5, 'completion_status': 'in_progress'}
    ])

if len(courses_df) == 0:
    print("No course data found. Using sample data for testing.")
    
    # Sample courses
    courses_df = pd.DataFrame([
        {'id': 1, 'title': 'Introduction to Programming', 'department_id': 1, 'description': 'Learn programming basics'},
        {'id': 2, 'title': 'Web Development Basics', 'department_id': 1, 'description': 'HTML, CSS and JavaScript fundamentals'},
        {'id': 3, 'title': 'Database Design', 'department_id': 1, 'description': 'Relational database principles'},
        {'id': 4, 'title': 'Advanced JavaScript', 'department_id': 1, 'description': 'Deep dive into JavaScript frameworks'},
        {'id': 5, 'title': 'Mobile App Development', 'department_id': 2, 'description': 'Build mobile apps with React Native'},
        {'id': 6, 'title': 'Artificial Intelligence Fundamentals', 'department_id': 3, 'description': 'Introduction to AI concepts'},
        {'id': 7, 'title': 'DevOps Practices', 'department_id': 1, 'description': 'CI/CD and deployment strategies'}
    ])

## 3. Create Student-Course Interaction Matrix

In [6]:
# Map completion status to numerical values
status_map = {
    'completed': 1.0,        # Strongest signal
    'in_progress': 0.5,      # Medium signal
    'not_started': 0.2       # Weakest signal
}

# Convert completion status to numerical values
enrollments_df['interaction_strength'] = enrollments_df['completion_status'].map(status_map)

# Create user-course matrix
user_course_matrix = enrollments_df.pivot_table(
    index='student_id',
    columns='course_id',
    values='interaction_strength',
    fill_value=0
)

# Display the matrix
print("Student-Course Interaction Matrix:")
user_course_matrix

Student-Course Interaction Matrix:


course_id,1,2,3,4,5
student_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
9,0.5,0.2,0.0,0.0,0.2
10,0.0,0.0,1.0,0.5,0.0
11,1.0,0.5,0.5,0.0,0.0


## 4. Calculate Course Similarity Matrix

In [7]:
# Calculate course similarity matrix using cosine similarity
course_similarity = cosine_similarity(user_course_matrix.T)

# Create a dataframe for easier indexing
course_similarity_df = pd.DataFrame(
    course_similarity,
    index=user_course_matrix.columns,
    columns=user_course_matrix.columns
)

print("Course Similarity Matrix:")
course_similarity_df

Course Similarity Matrix:


course_id,1,2,3,4,5
course_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1.0,0.996546,0.4,0.0,0.447214
2,0.996546,1.0,0.415227,0.0,0.371391
3,0.4,0.415227,1.0,0.894427,0.0
4,0.0,0.0,0.894427,1.0,0.0
5,0.447214,0.371391,0.0,0.0,1.0


## 5. Build Course Recommendation Function

In [8]:
def get_course_recommendations(student_id, n=3):
    """Get top N course recommendations for a student"""
    # Check if the student exists in our matrix
    if student_id not in user_course_matrix.index:
        return f"Student {student_id} not found in training data"
    
    # Get the courses the student has already interacted with
    student_courses = user_course_matrix.loc[student_id]
    enrolled_courses = student_courses[student_courses > 0].index.tolist()
    
    # Calculate recommendation scores for all courses
    recommendation_scores = {}
    
    for course_id in user_course_matrix.columns:
        # Skip courses the student is already enrolled in
        if course_id in enrolled_courses:
            continue
            
        # Calculate a weighted score based on similar courses
        score = 0
        for enrolled_course in enrolled_courses:
            similarity = course_similarity_df.loc[enrolled_course, course_id]
            interaction = student_courses[enrolled_course]
            score += similarity * interaction
        
        recommendation_scores[course_id] = score
    
    # Sort courses by recommendation score
    sorted_recommendations = sorted(recommendation_scores.items(), key=lambda x: x[1], reverse=True)
    
    # Return top N recommendations with course details
    result = []
    for course_id, score in sorted_recommendations[:n]:
        course_info = courses_df[courses_df['id'] == course_id].iloc[0].to_dict() if len(courses_df[courses_df['id'] == course_id]) > 0 else {'title': f'Course {course_id}', 'description': 'No description available'}
        result.append({
            'course_id': course_id,
            'score': score,
            'title': course_info.get('title', f'Course {course_id}'),
            'description': course_info.get('description', 'No description available'),
            'reason': get_recommendation_reason(student_id, course_id, enrolled_courses)
        })
    
    return result

def get_recommendation_reason(student_id, recommended_course_id, enrolled_courses):
    """Generate a reason for recommending this course"""
    # Find the most similar enrolled course
    most_similar_course_id = None
    highest_similarity = -1
    
    for enrolled_course in enrolled_courses:
        similarity = course_similarity_df.loc[enrolled_course, recommended_course_id]
        if similarity > highest_similarity:
            highest_similarity = similarity
            most_similar_course_id = enrolled_course
    
    if most_similar_course_id is not None and highest_similarity > 0.3:
        similar_course = courses_df[courses_df['id'] == most_similar_course_id]
        if len(similar_course) > 0:
            return f"Similar to '{similar_course.iloc[0]['title']}' which you've taken"
    
    # Generic reasons if we can't find a specific one
    reasons = [
        "Based on your learning history",
        "Popular among students with similar interests",
        "Complements your current skillset"
    ]
    return np.random.choice(reasons)

# Test the recommendation function
student_id = user_course_matrix.index[0]  # Just pick the first student as an example
recommendations = get_course_recommendations(student_id)

print(f"\nTop recommendations for student {student_id}:")
for rec in recommendations:
    print(f"Course {rec['course_id']} ({rec['title']}): Score {rec['score']:.2f}")
    print(f"Reason: {rec['reason']}")
    print(f"Description: {rec['description']}")
    print("---")


Top recommendations for student 9:
Course 3 (Database Design): Score 0.28
Reason: Similar to 'Web Development Basics' which you've taken
Description: Learn how to design and implement relational databases. Topics include ER diagrams, normalization, SQL, and database optimization.
---
Course 4 (Advanced JavaScript): Score 0.00
Reason: Complements your current skillset
Description: Deep dive into JavaScript frameworks and modern practices. Learn about closures, async programming, and front-end frameworks.
---


## 6. Package Model for Production

In [9]:
# Create model package with components needed for recommendations
recommendation_model = {
    'user_course_matrix': user_course_matrix,
    'course_similarity_matrix': course_similarity_df,
    'courses': courses_df
}

# Save the model to a file
with open('recommendation_model.pkl', 'wb') as f:
    pickle.dump(recommendation_model, f)

print("Model saved to recommendation_model.pkl")

Model saved to recommendation_model.pkl
