In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv("./coursea_data.csv")
# Split the dataset into train and test sets
train_df, test_df = train_test_split(data, test_size=0.2, random_state=42)

# Combine relevant text-based attributes into a single column for TF-IDF
text_attributes = ['course_organization', 'course_Certificate_type', 'course_difficulty']
train_df['course_text'] = train_df[text_attributes].apply(lambda x: ' '.join(x), axis=1)

# Create TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(train_df['course_text'])

# Calculate cosine similarity for the training set
course_similarity_matrix_train = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to get course recommendations
def get_course_recommendations(course_attributes, n=5):
    course_text = ' '.join(course_attributes.values())
    course_tfidf = tfidf_vectorizer.transform([course_text])
    cosine_similarities = linear_kernel(course_tfidf, tfidf_matrix).flatten()
    similar_courses_indices = cosine_similarities.argsort()[::-1][1:n+1]  # Exclude the course itself
    recommendations = []

    for index in similar_courses_indices:
        recommended_course = train_df.iloc[index]['course_title']  # Use iloc instead of loc
        recommendations.append(recommended_course)

    return recommendations

# Sample course attributes for user input
course_attributes = {
    'course_organization': "IBM",
    'course_Certificate_type': "COURSE",
    'course_difficulty': "Beginner",
}

# Get course recommendations
recommended_courses = get_course_recommendations(course_attributes)
if recommended_courses:
    print(f"Recommended courses: {recommended_courses}")
else:
    print("No recommendations available for the provided attributes.")

ModuleNotFoundError: No module named 'sklearn'

In [None]:
import joblib

# ... (previous code)

# Calculate cosine similarity for the training set
course_similarity_matrix_train = linear_kernel(tfidf_matrix, tfidf_matrix)

# Save cosine_similarity_matrix_train to a file
joblib.dump(course_similarity_matrix_train, 'course_recommendation')


: 