In [3]:
!pip install fuzzywuzzy



Defaulting to user installation because normal site-packages is not writeable
Collecting fuzzywuzzy
  Using cached fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [5]:
import pandas as pd

# Load your datasets
df_courses = pd.read_csv(r"C:\Users\SACHIN\OneDrive\Desktop\Projects(ZENITH)\Coursera_courses.csv")      # course names, institutions, urls
df_stats = pd.read_csv(r"C:\Users\SACHIN\OneDrive\Desktop\Projects(ZENITH)\coursea_data.csv")            # ratings, difficulty, etc.
df_reviews = pd.read_csv(r"C:\Users\SACHIN\OneDrive\Desktop\Projects(ZENITH)\reviews.csv")               # text reviews + sentiment labels


In [7]:
import re

def extract_slug(text):
    # Extract course slug from URL or use name directly
    if "coursera.org" in text:
        match = re.search(r"/(?:learn|specializations)/([a-zA-Z0-9\-]+)", text)
        return match.group(1).replace('-', ' ') if match else text.lower()
    else:
        return text.lower()


In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib

# Load review dataset
df_reviews = pd.read_csv(r"C:\Users\SACHIN\OneDrive\Desktop\Projects(ZENITH)\reviews.csv")  # contains Review, Label

# Features and labels
X = df_reviews['Review']
y = df_reviews['Label']

# TF-IDF vectorizer (fixed max_features for consistency)
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
X_vec = tfidf.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)

# Train model
model = LogisticRegression(max_iter=300)
model.fit(X_train, y_train)

# Evaluate
print("Classification Report:\n", classification_report(y_test, model.predict(X_test)))

# Save model and vectorizer
joblib.dump(model, "sentiment_model.pkl")
joblib.dump(tfidf, "vectorizer.pkl")
print("✅ Model and vectorizer saved!")


Classification Report:
               precision    recall  f1-score   support

           1       0.60      0.35      0.45       493
           2       0.38      0.08      0.14       484
           3       0.28      0.12      0.17       933
           4       0.46      0.22      0.30      3613
           5       0.81      0.97      0.89     15881

    accuracy                           0.77     21404
   macro avg       0.51      0.35      0.39     21404
weighted avg       0.72      0.77      0.73     21404

✅ Model and vectorizer saved!


In [35]:
import pandas as pd
import joblib
import difflib

# Load CSVs
df_courses = pd.read_csv(r"C:\Users\SACHIN\OneDrive\Desktop\Projects(ZENITH)\Coursera_courses.csv")      # Contains name, institution, course_id
df_stats = pd.read_csv(r"C:\Users\SACHIN\OneDrive\Desktop\Projects(ZENITH)\coursea_data.csv")            # Contains course_title, course_rating, difficulty
# df_reviews is not needed here, it was used during training only

# Load trained model and vectorizer
model = joblib.load("sentiment_model.pkl")
vectorizer = joblib.load("vectorizer.pkl")

# Helper to extract course slug from Coursera URL or input name
def extract_slug(text):
    if "coursera.org" in text:
        parts = text.strip('/').split('/')
        if "learn" in parts:
            idx = parts.index("learn")
            if idx + 1 < len(parts):
                return parts[idx + 1].replace("-", " ")
    return text.lower()

# Predict sentiment for sample reviews
def predict_sentiment(text_series):
    X = vectorizer.transform(text_series)
    return model.predict(X)

# Main analysis function
def analyze_course(user_input):
    slug = extract_slug(user_input)

    all_names = df_courses['name'].dropna().str.lower().tolist()
    all_ids = df_courses['course_id'].dropna().str.lower().tolist()
    candidates = all_names + all_ids

    best_match = difflib.get_close_matches(slug, candidates, n=1, cutoff=0.4)
    if not best_match:
        print("❌ No matching course found.")
        return

    match = best_match[0]
    matched = df_courses[df_courses['name'].str.lower() == match]
    if matched.empty:
        matched = df_courses[df_courses['course_id'].str.lower() == match]
    if matched.empty:
        print("❌ Still no exact match found.")
        return

    course = matched.iloc[0]
    course_name = course['name']
    course_provider = course['institution']

    print(f"\n✅ Course: {course_name}")
    print(f"🏫 Offered by: {course_provider}")

    # Stats (rating, difficulty, enrolled)
    stats_match = df_stats[df_stats['course_title'].str.lower().str.contains(course_name.lower(), na=False)]
    if not stats_match.empty:
        stats = stats_match.iloc[0]
        print(f"⭐ Rating: {stats['course_rating']} / 5")
        print(f"📊 Difficulty: {stats['course_difficulty']}")
        print(f"🎓 Enrolled: {stats['course_students_enrolled']} students")
    else:
        print("ℹ️ No rating/difficulty info found.")

    # Simulated reviews
    reviews = pd.Series([
        "Great and informative course!",
        "Too basic, not what I expected.",
        "Very detailed and engaging.",
        "The content felt outdated.",
        "Awesome projects and explanations!"
    ])
    
    sentiments = predict_sentiment(reviews)
    pos = sum(sentiments)
    neg = len(sentiments) - pos

    print("\n📝 Simulated Sentiment Review Summary:")
    print(f"👍 Positive Reviews: {pos}")
    print(f"👎 Negative Reviews: {neg}")

    print(f"\n📘 Description: '{course_name}' is a course offered by {course_provider} on Coursera.")

# Run in CLI
user_input = input("Enter course name or Coursera URL: ")
analyze_course(user_input)


Enter course name or Coursera URL:  AI For Everyone



✅ Course: AI For Everyone
🏫 Offered by: DeepLearning.AI
⭐ Rating: 4.8 / 5
📊 Difficulty: Beginner
🎓 Enrolled: 350k students

📝 Simulated Sentiment Review Summary:
👍 Positive Reviews: 21
👎 Negative Reviews: -16

📘 Description: 'AI For Everyone' is a course offered by DeepLearning.AI on Coursera.


In [37]:
import os
os.getcwd()


'C:\\Users\\SACHIN\\Documents\\PROJECT INTERNSHIP\\Course review analyser'