In [2]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd

In [3]:

# Load the CSV into a DataFrame
df = pd.read_csv('../data/prep_dataset.csv')
df.head()


Unnamed: 0,title,text,authors,tags
0,Introduction to music information retrieval wi...,"In this article, I am going to illustrate some...",['Jean-Michel D'],Music
1,How Twitter does Automated Testing,It was 8:00 AM and I entered the Zoom call. Ka...,['Mark Hendersog'],"Web Development, Software Development"
2,Why you should start using short URLs in your ...,Photo by Luke Chesser on Unsplash\r\n\r\nWhen ...,['Joachim Zeelmaekers'],Programming
3,Why music isn’t a top-two category on Patreon ...,"Cherie Hu: Hey, Wyatt, thanks so much for join...",['Cherie Hu'],"Business, Startup, Music, Technology"
4,Love Not Fear: Freedom of Speech,I saw an article about a popular video platfor...,['Imogen Sita'],Love


In [4]:
df['tags'] = df['tags'].apply(lambda x: x.split(','))
print(df['tags'].head())

0                                      [Music]
1     [Web Development,  Software Development]
2                                [Programming]
3    [Business,  Startup,  Music,  Technology]
4                                       [Love]
Name: tags, dtype: object


In [5]:
transactions = df['tags'].tolist()

te = TransactionEncoder()
te_ary = te.fit_transform(transactions)
transaction_df = pd.DataFrame(te_ary, columns=te.columns_)
transaction_df

Unnamed: 0,Art,Artificial Intelligence,Bitcoin,Blockchain,Business,Coronavirus,Covid 19,Creativity,Cryptocurrency,Culture,...,Self Improvement,Social Media,Software Development,Startup,Technology,Travel,UX,Web Development,Work,Writing
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2175,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2176,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2177,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2178,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [7]:
frequent_itemsets = apriori(transaction_df, min_support=0.001, use_colnames=True)
num_itemsets = frequent_itemsets.shape[0]
rules = association_rules(frequent_itemsets, num_itemsets=num_itemsets, metric="support", min_threshold=0.001)

In [8]:
def recommend_tags(user_tags, rules, num_recommendations=5):
    # Filter rules where antecedents overlap with user's tags
    relevant_rules = rules[rules['antecedents'].apply(lambda x: len(x.intersection(user_tags)) > 0)]
    
    # Get unique consequents (recommended tags), sorted by confidence
    recommendations = relevant_rules[['consequents', 'confidence']].sort_values(by='confidence', ascending=False)
    recommended_tags = set()
    for _, row in recommendations.iterrows():
        recommended_tags.update(row['consequents'])
        if len(recommended_tags) >= num_recommendations:
            break
    
    # Return top recommendations
    return list(recommended_tags)[:num_recommendations]

# Example: User selects some tags
user_tags = {'Life', 'Programming'}
recommended_tags = recommend_tags(user_tags, rules)
print("\nUser's Chosen Tags:", user_tags)
print("Recommended Tags:", recommended_tags)


User's Chosen Tags: {'Programming', 'Life'}
Recommended Tags: [' Software Development', ' Python', ' Web Development', ' Data Science', ' JavaScript']


In [10]:

# def recommend_tags(user_tags, df, num_recommendations=10, min_support=0.001):
#     # Step 1: Prepare the dataset
#     # Convert articles' tags to a one-hot encoded dataframe for apriori algorithm
#     transactions = df['tags'].tolist()
#     te = TransactionEncoder()
#     te_ary = te.fit_transform(transactions)
#     transaction_df = pd.DataFrame(te_ary, columns=te.columns_)
    
#     # Step 2: Apply Apriori algorithm to find frequent itemsets
#     frequent_itemsets = apriori(transaction_df, min_support=min_support, use_colnames=True)    
    
#     # Step 3: Generate association rules from frequent itemsets
#     num_itemsets = frequent_itemsets.shape[0]
#     rules = association_rules(frequent_itemsets, num_itemsets=num_itemsets, metric="support", min_threshold=min_support)

#     # Step 4: Filter rules where antecedents overlap with user's tags
#     relevant_rules = rules[rules['antecedents'].apply(lambda x: len(x.intersection(user_tags)) > 0)]
    
#     # Step 5: Get unique consequents (recommended tags), sorted by support
#     recommendations = relevant_rules[['consequents', 'support']].sort_values(by='support', ascending=False)
#     recommended_tags = set()
    
#     for _, row in recommendations.iterrows():
#         recommended_tags.update(row['consequents'])
#         if len(recommended_tags) >= num_recommendations:
#             break
    
#     # Step 6: Include user tags in the recommended tags
#     recommended_tags.update(user_tags)
    
#     # Step 7: Return top recommendations (limited to num_recommendations)
#     return list(recommended_tags)[:num_recommendations]


In [9]:
# # Example: User selects some tags
# user_tags = {'AI', 'Data Science'}
# recommended_tags = recommend_tags(user_tags,df)
# print("\nUser's Chosen Tags:", user_tags)
# print("Recommended Tags:", recommended_tags)