In [2]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

data_folder = '../ui/data'

df = pd.read_csv(f'{data_folder}/clean_user_with_courses.csv')

df.head()

Unnamed: 0,user,item
0,2,"LB0105ENv1, ST0101EN, PY0101EN, ML0120ENv2, DA..."
1,4,"PY0101EN, BD0153EN, BD0101EN, DS0105EN, BD0135..."
2,5,"LB0107ENv1, ST0201EN, DB0151EN, DS0110EN, DS02..."
3,8,"BD0111EN, RP0105EN, BD0212EN"
4,9,"BD0131EN, BD0115EN, ML0115EN, BD0121EN, CB0103..."


In [2]:
df = pd.read_csv(f'{data_folder}/all_courses.csv')

unique_courses = df["course_id"].unique()

len(unique_courses)

307

In [3]:
df_model = pd.read_csv(f'{data_folder}/latest_transaction.csv').astype(bool)
df_model.head()

Unnamed: 0,ML0201EN,ML0122EN,GPXX0ZG0EN,RP0105EN,GPXX0Z2PEN,CNSC02EN,DX0106EN,GPXX0FTCEN,RAVSCTEST1,GPXX06RFEN,...,excourse84,excourse85,excourse86,excourse87,excourse88,excourse89,excourse90,excourse91,excourse92,excourse93
0,False,True,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,True,False,True,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [4]:
frequent_items = apriori(df_model, min_support=0.02, use_colnames=True)
rules = association_rules(frequent_items, metric = 'lift', min_threshold=.6)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])

In [5]:
frequent_items.head()

Unnamed: 0,support,itemsets
0,0.033444,(RP0105EN)
1,0.054277,(CNSC02EN)
2,0.030174,(BD0145EN)
3,0.058697,(BD0212EN)
4,0.038424,(SC0103EN)


In [9]:
# import pickle
rules.to_csv(f'{data_folder}/apriori_association_rules.csv')

In [11]:
#rules.to_csv('../data/association_rules.csv', index=False)


def recommend_courses(input_courses):
    
#    rules = pd.read_csv("../data/association_rules.csv")

    matching_rules = rules[rules['antecedents'].apply(lambda x: set(input_courses).issubset(x))]
    
    if matching_rules.empty:
        return "No recommendations found for the given courses."
    
    matching_rules = matching_rules.sort_values(['confidence', 'lift'], ascending=[False, False])
    
    recommendations = set()
    for consequent in matching_rules['consequents']:
        recommendations.update(consequent)
    
    recommendations.difference_update(input_courses)
    
    return recommendations if recommendations else "No new recommendations found."



In [12]:

input_courses = ['DS0101EN', 'DA0101EN']      #["data analysis with python", "introduction to data science"]
recommended_courses = recommend_courses(input_courses)

print(f"Input Courses: {input_courses}")
print(f"Recommended Courses: {recommended_courses}")

Input Courses: ['DS0101EN', 'DA0101EN']
Recommended Courses: {'CO0101EN', 'BD0111EN', 'RP0101EN', 'BD0211EN', 'PA0101EN', 'DS0103EN', 'PY0101EN', 'BC0101EN', 'DB0101EN', 'CB0103EN', 'ST0101EN', 'ML0151EN', 'DS0105EN', 'DV0101EN', 'ML0120ENv2', 'BD0101EN', 'ML0115EN', 'ML0103EN', 'DS0301EN', 'DV0151EN', 'CC0101EN', 'ML0101ENv3'}


In [15]:
search_ids = ['CO0101EN', 'BD0111EN', 'RP0101EN', 'BD0211EN', 'PA0101EN', 'DS0103EN', 'PY0101EN', 'BC0101EN', 'DB0101EN', 'CB0103EN', 'ST0101EN', 'ML0151EN', 'DS0105EN', 'DV0101EN', 'ML0120ENv2', 'BD0101EN', 'ML0115EN', 'ML0103EN', 'DS0301EN', 'DV0151EN', 'CC0101EN', 'ML0101ENv3']  
filtered_courses = df[df['course_id'].isin(search_ids)]

filtered_courses

Unnamed: 0,course_id,title,description
79,PA0101EN,predictive modeling fundamentals i,the predictive modelling fundamentals i course...
81,BC0101EN,blockchain essentials,understand blockchain technology and how it ca...
102,BD0211EN,spark fundamentals i,ignite your interest in spark with an introduc...
106,ST0101EN,statistics 101,take this course and you won t fail statistics...
134,DS0105EN,data science hands on with open source tools,what tools do data scientists use in this cou...
135,DS0103EN,data science methodology,grab you lab coat beakers and pocket calcula...
142,ML0115EN,deep learning 101,deep learning 101
147,BD0101EN,big data 101,how big is big and why does big matter and wha...
150,CO0101EN,docker essentials a developer introduction,learn how to use containers for your applicati...
156,CB0103EN,build your own chatbot,learn how to build chatbots without having to ...
