In [3]:
import json
import pandas as pd
import numpy as np
import warnings
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler

# Filter out specific warnings
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')
warnings.filterwarnings('ignore', category=RuntimeWarning)

# Read the JSON file
with open('courses.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Extract the data from the 'value' key
course_data = data['value']

# Convert course data to DataFrame
df_courses = pd.DataFrame(course_data)

# Select important attributes for embeddings
df_courses['embedding_text'] = df_courses.apply(
    lambda row: f"{row['NAME']} {row['PROFESSOR']} {row['DAYS']} {row['STARTTIME']} {row['ENDTIME']} {row['ROOM']} {row['INSTRUCTION_MODE']}",
    axis=1
)

# Generate embeddings for the selected attributes
model = SentenceTransformer('bert-base-nli-mean-tokens')
embeddings = model.encode(df_courses['embedding_text'].tolist())
embeddings = np.array(embeddings, dtype=np.float64)

# Function to recommend courses based on cosine similarity
def recommend_cosine(course, embeddings, courses_df, top_n=3):
    course_embedding = model.encode([course])[0]
    similarities = cosine_similarity([course_embedding], embeddings)[0]
    indices = np.argsort(similarities)[::-1][1:top_n+1]
    return courses_df.iloc[indices]

# Function to recommend courses using clustering (K-Means)
def recommend_kmeans(course, embeddings, courses_df, top_n=3, num_clusters=5):
    kmeans = KMeans(n_clusters=num_clusters)
    kmeans.fit(embeddings)
    cluster_labels = kmeans.predict(embeddings)
    course_embedding = model.encode([course])[0]
    course_embedding = np.array(course_embedding, dtype=np.float64)  # Ensure dtype consistency
    cluster = kmeans.predict([course_embedding])[0]
    cluster_indices = np.where(cluster_labels == cluster)[0]
    similarities = cosine_similarity([course_embedding], embeddings[cluster_indices])[0]
    sorted_cluster_indices = cluster_indices[np.argsort(similarities)[::-1][1:top_n+1]]
    return courses_df.iloc[sorted_cluster_indices]

# Function to recommend courses using neural network (Autoencoder)
def recommend_autoencoder(course, embeddings, courses_df, top_n=3):
    scaler = StandardScaler()
    embeddings_scaled = scaler.fit_transform(embeddings)
    course_embedding = scaler.transform(model.encode([course]))

    autoencoder = MLPRegressor(hidden_layer_sizes=(64, 32, 64), max_iter=5000, random_state=42)
    autoencoder.fit(embeddings_scaled, embeddings_scaled)

    course_embedding_pred = autoencoder.predict(course_embedding)
    similarities = cosine_similarity(course_embedding_pred, embeddings_scaled)[0]
    indices = np.argsort(similarities)[::-1][1:top_n+1]
    return courses_df.iloc[indices]

# Define the test course to replace
test_course = "C S 329E Data Analytics TR 9:30 a.m.-11:00 a.m. Instructor XYZ"

# Print original course
print("Original course:", test_course)

# Generate recommendations using different methods
print("\nRecommendations using Cosine Similarity:")
recommendations_cosine = recommend_cosine(test_course, embeddings, df_courses)
print(recommendations_cosine[['NAME', 'DAYS', 'STARTTIME', 'ENDTIME']])

print("\nRecommendations using K-Means Clustering:")
recommendations_kmeans = recommend_kmeans(test_course, embeddings, df_courses)
print(recommendations_kmeans[['NAME', 'DAYS', 'STARTTIME', 'ENDTIME']])

print("\nRecommendations using Autoencoder:")
recommendations_autoencoder = recommend_autoencoder(test_course, embeddings, df_courses)
print(recommendations_autoencoder[['NAME', 'DAYS', 'STARTTIME', 'ENDTIME']])


Original course: C S 329E Data Analytics TR 9:30 a.m.-11:00 a.m. Instructor XYZ

Recommendations using Cosine Similarity:
                                    NAME DAYS STARTTIME ENDTIME
87   C S 329E ELEMENTS OF DATA ANALYTICS  TTH      8:30   10:00
95   C S 330E ELMNTS SOFTWARE ENGINEER I   MW      9:30   11:00
5   C S 309 AI LITERACY: ESSENT OF AI-WB  TTH      9:30   11:00

Recommendations using K-Means Clustering:
                                    NAME DAYS STARTTIME ENDTIME
87   C S 329E ELEMENTS OF DATA ANALYTICS  TTH      8:30   10:00
95   C S 330E ELMNTS SOFTWARE ENGINEER I   MW      9:30   11:00
5   C S 309 AI LITERACY: ESSENT OF AI-WB  TTH      9:30   11:00

Recommendations using Autoencoder:
                                   NAME DAYS STARTTIME ENDTIME
95  C S 330E ELMNTS SOFTWARE ENGINEER I   MW      9:30   11:00
96  C S 330E ELMNTS SOFTWARE ENGINEER I   MW     11:00   12:30
87  C S 329E ELEMENTS OF DATA ANALYTICS  TTH      8:30   10:00


In [4]:
import numpy as np
import json
import warnings
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler

# Filter out specific warnings
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')
warnings.filterwarnings('ignore', category=RuntimeWarning)

# Sample course dataset
courses = [
    "Introduction to Computer Science",
    "Advanced Data Structures",
    "Machine Learning",
    "Database Systems",
    "Algorithms",
    "Artificial Intelligence",
    "Software Engineering",
    "Computer Networks",
    "Operating Systems",
    "Computer Graphics",
    "Data Mining"
]

# Load pre-trained model for embeddings
model = SentenceTransformer('bert-base-nli-mean-tokens')

# Generate embeddings
embeddings = model.encode(courses)

# Ensure embeddings are of type float64 (double)
embeddings = np.array(embeddings, dtype=np.float64)

# Save embeddings and course names
with open('course_embeddings.json', 'w') as f:
    json.dump({
        'courses': courses,
        'embeddings': embeddings.tolist()
    }, f)

# Function to recommend courses based on cosine similarity
def recommend_cosine(course, embeddings, courses, top_n=3):
    course_embedding = model.encode([course])[0]
    similarities = cosine_similarity([course_embedding], embeddings)[0]
    indices = np.argsort(similarities)[::-1][1:top_n+1]
    return [courses[i] for i in indices]

# Function to recommend courses using clustering (K-Means)
def recommend_kmeans(course, embeddings, courses, top_n=3, num_clusters=5):
    kmeans = KMeans(n_clusters=num_clusters)
    kmeans.fit(embeddings)
    cluster_labels = kmeans.predict(embeddings)
    course_embedding = model.encode([course])[0]
    course_embedding = np.array(course_embedding, dtype=np.float64)  # Ensure dtype consistency
    cluster = kmeans.predict([course_embedding])[0]
    cluster_indices = np.where(cluster_labels == cluster)[0]
    similarities = cosine_similarity([course_embedding], embeddings[cluster_indices])[0]
    sorted_cluster_indices = cluster_indices[np.argsort(similarities)[::-1][1:top_n+1]]
    return [courses[i] for i in sorted_cluster_indices]

# Function to recommend courses using neural network (Autoencoder)
def recommend_autoencoder(course, embeddings, courses, top_n=3):
    scaler = StandardScaler()
    embeddings_scaled = scaler.fit_transform(embeddings)
    course_embedding = scaler.transform(model.encode([course]))

    autoencoder = MLPRegressor(hidden_layer_sizes=(64, 32, 64), max_iter=5000, random_state=42)
    autoencoder.fit(embeddings_scaled, embeddings_scaled)

    course_embedding_pred = autoencoder.predict(course_embedding)
    similarities = cosine_similarity(course_embedding_pred, embeddings_scaled)[0]
    indices = np.argsort(similarities)[::-1][1:top_n+1]
    return [courses[i] for i in indices]

# Test the different methods
test_course = "Advanced Data Structures"

print("Original course:", test_course)
print("\nRecommendations using Cosine Similarity:")
print(recommend_cosine(test_course, embeddings, courses))

print("\nRecommendations using K-Means Clustering:")
print(recommend_kmeans(test_course, embeddings, courses))

print("\nRecommendations using Autoencoder:")
print(recommend_autoencoder(test_course, embeddings, courses))


Original course: Advanced Data Structures

Recommendations using Cosine Similarity:
['Algorithms', 'Operating Systems', 'Data Mining']

Recommendations using K-Means Clustering:
['Algorithms', 'Operating Systems']

Recommendations using Autoencoder:
['Algorithms', 'Artificial Intelligence', 'Operating Systems']
