# ***Recommendations: Personalized Attraction Suggestions***

This notebook implements:
 1. Collaborative Filtering (User-Item Matrix)
 2. Content-Based Filtering
 3. Hybrid Recommendation Approach

## **1. Data Preparation**

In [7]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import csr_matrix
import warnings
warnings.filterwarnings('ignore')

# Load datasets
print("Loading datasets...")
try:
    ratings = pd.read_csv('D:\\Projects\\Guvi_Project4\\Datasets\\transaction_data.csv')
    attractions = pd.read_csv('D:\\Projects\\Guvi_Project4\\Datasets\\attraction_type.csv')
    items = pd.read_csv('D:\\Projects\\Guvi_Project4\\Datasets\\item_data.csv')
except FileNotFoundError as e:
    print(f"Error loading dataset: {e}")
    raise

# Merge attraction data
attraction_data = pd.merge(items, attractions, on='AttractionTypeId')

# Verify required columns
print("\nChecking required columns...")
required_cols = {'ratings': ['UserId', 'AttractionId', 'Rating'],
                'attraction_data': ['AttractionId', 'AttractionType', 'AttractionAddress']}

for df_name, cols in required_cols.items():
    missing = set(cols) - set(eval(df_name).columns)
    if missing:
        raise ValueError(f"Missing columns in {df_name}: {missing}")

# Prepare rating matrix
print("\nPreparing rating matrix...")
ratings = ratings[['UserId', 'AttractionId', 'Rating']].dropna()

# Filter users with at least 3 ratings
user_counts = ratings['UserId'].value_counts()
valid_users = user_counts[user_counts >= 3].index
ratings = ratings[ratings['UserId'].isin(valid_users)]


Loading datasets...

Checking required columns...

Preparing rating matrix...


## **2. Collaborative Filtering**

### **2.1 User-Item Matrix**

In [8]:
# Create user-item matrix
user_item_matrix = ratings.pivot_table(
    index='UserId',
    columns='AttractionId',
    values='Rating',
    fill_value=0
)

# Convert to sparse matrix
sparse_matrix = csr_matrix(user_item_matrix.values)

# Calculate cosine similarity
user_similarity = cosine_similarity(sparse_matrix)
item_similarity = cosine_similarity(sparse_matrix.T)

# Convert to DataFrames
user_sim_df = pd.DataFrame(
    user_similarity,
    index=user_item_matrix.index,
    columns=user_item_matrix.index
)

item_sim_df = pd.DataFrame(
    item_similarity,
    index=user_item_matrix.columns,
    columns=user_item_matrix.columns
)

### **2.2 Recommendation Function**

In [9]:
def collaborative_recommend(user_id, n=5):
    """
    Get top N attraction recommendations for a user using collaborative filtering
    """
    if user_id not in user_sim_df.index:
        return pd.DataFrame(columns=attraction_data.columns)
    
    # Get similar users
    similar_users = user_sim_df[user_id].sort_values(ascending=False)[1:6]
    
    # Get attractions rated by similar users
    similar_users_ratings = ratings[ratings['UserId'].isin(similar_users.index)]
    
    # Exclude attractions already rated by target user
    user_rated = ratings[ratings['UserId'] == user_id]['AttractionId']
    recommendations = similar_users_ratings[~similar_users_ratings['AttractionId'].isin(user_rated)]
    
    # Get top rated attractions by similar users
    if recommendations.empty:
        return pd.DataFrame(columns=attraction_data.columns)
    
    top_attractions = recommendations.groupby('AttractionId')['Rating'].mean()
    top_attractions = top_attractions.sort_values(ascending=False).head(n)
    
    return attraction_data[attraction_data['AttractionId'].isin(top_attractions.index)]

# Test recommendation
print("\nTesting collaborative filtering...")
if not ratings.empty:
    sample_user = ratings['UserId'].sample(1).values[0]
    print(f"\nRecommendations for user {sample_user}:")
    display(collaborative_recommend(sample_user))
else:
    print("No valid ratings data available")


Testing collaborative filtering...

Recommendations for user 66324:


Unnamed: 0,AttractionId,AttractionCityId,AttractionTypeId,Attraction,AttractionAddress,AttractionType
2,640,1,63,Sacred Monkey Forest Sanctuary,"Jl. Monkey Forest, Ubud 80571 Indonesia",Nature & Wildlife Areas


## **3. Content-Based Filtering**

### **3.1 Feature Engineering**

In [10]:
# Prepare attraction features
print("\nPreparing content-based features...")
attraction_data['Features'] = (
    attraction_data['AttractionType'].fillna('') + " " +
    attraction_data['AttractionAddress'].fillna('')
)

# Create TF-IDF matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(attraction_data['Features'])

# Calculate cosine similarity
content_sim = cosine_similarity(tfidf_matrix)

# Create similarity DataFrame
content_sim_df = pd.DataFrame(
    content_sim,
    index=attraction_data['AttractionId'],
    columns=attraction_data['AttractionId']
)



Preparing content-based features...


### **3.2 Recommendation Function**

In [11]:
def content_based_recommend(attraction_id, n=5):
    """
    Get top N similar attractions based on content
    """
    if attraction_id not in content_sim_df.columns:
        return pd.DataFrame(columns=attraction_data.columns)
    
    similar_attractions = content_sim_df[attraction_id].sort_values(ascending=False)[1:n+1]
    return attraction_data[attraction_data['AttractionId'].isin(similar_attractions.index)]

# Test recommendation
print("\nTesting content-based filtering...")
if not attraction_data.empty:
    sample_attraction = attraction_data['AttractionId'].sample(1).values[0]
    print(f"\nAttractions similar to {sample_attraction}:")
    display(content_based_recommend(sample_attraction))
else:
    print("No attraction data available")



Testing content-based filtering...

Attractions similar to 824:


Unnamed: 0,AttractionId,AttractionCityId,AttractionTypeId,Attraction,AttractionAddress,AttractionType,Features
5,737,1,76,Tanah Lot Temple,"Kecamatan Kediri, Kabupaten Tabanan, Beraban 8...",Religious Sites,"Religious Sites Kecamatan Kediri, Kabupaten Ta..."
6,748,1,72,Tegalalang Rice Terrace,"Jalan Raya Ceking, Tegalalang 80517 Indonesia",Points of Interest & Landmarks,Points of Interest & Landmarks Jalan Raya Ceki...
7,749,1,93,Tegenungan Waterfall,"Jl. Raya Tegenungan, Kemenuh, Ubud 80581 Indon...",Waterfalls,"Waterfalls Jl. Raya Tegenungan, Kemenuh, Ubud ..."
9,841,1,92,Waterbom Bali,"Jl. Kartika Plaza, Kuta 80361 Indonesia",Water Parks,"Water Parks Jl. Kartika Plaza, Kuta 80361 Indo..."
29,1297,3,44,Yogyakarta Palace,Yogyakarta,Historic Sites,Historic Sites Yogyakarta


## **4. Hybrid Recommendation System**

In [12]:
def hybrid_recommend(user_id, n=5):
    """
    Combine collaborative and content-based filtering
    """
    # Get collaborative recommendations
    collab_recs = collaborative_recommend(user_id, n*2)
    
    if collab_recs.empty:
        return pd.DataFrame(columns=attraction_data.columns)
    
    # Get content-based recommendations for each collab recommendation
    hybrid_recs = pd.DataFrame()
    
    for _, row in collab_recs.iterrows():
        content_recs = content_based_recommend(row['AttractionId'], 2)
        hybrid_recs = pd.concat([hybrid_recs, content_recs])
    
    # Remove duplicates and sort
    if not hybrid_recs.empty:
        hybrid_recs = hybrid_recs.drop_duplicates(subset=['AttractionId'])
        if 'AttractionId' in hybrid_recs.columns:
            hybrid_recs = hybrid_recs.sort_values(by='AttractionId').head(n)
    
    return hybrid_recs

# Test hybrid recommendation
print("\nTesting hybrid recommendation...")
if not ratings.empty and not attraction_data.empty:
    sample_user = ratings['UserId'].sample(1).values[0]
    print(f"\nHybrid recommendations for user {sample_user}:")
    display(hybrid_recommend(sample_user))
else:
    print("Insufficient data for hybrid recommendations")



Testing hybrid recommendation...

Hybrid recommendations for user 59746:


Unnamed: 0,AttractionId,AttractionCityId,AttractionTypeId,Attraction,AttractionAddress,AttractionType,Features


## **5. Saving Recommendation Models***

In [13]:
import joblib
import os

# Create directory if it doesn't exist
os.makedirs('tourism_models/recommendation', exist_ok=True)

# Save components
print("\nSaving recommendation models...")
try:
    joblib.dump(user_sim_df, 'tourism_models/recommendation/user_similarity.pkl')
    joblib.dump(item_sim_df, 'tourism_models/recommendation/item_similarity.pkl')
    joblib.dump(content_sim_df, 'tourism_models/recommendation/content_similarity.pkl')
    joblib.dump(tfidf, 'tourism_models/recommendation/tfidf_vectorizer.pkl')
    attraction_data.to_pickle('tourism_models/recommendation/attraction_data.pkl')
    print("All components saved successfully!")
except Exception as e:
    print(f"Error saving models: {e}")



Saving recommendation models...
All components saved successfully!


## **6. Production Recommendation Function**

In [15]:
def get_recommendations(user_id=None, attraction_id=None, n=5, method='hybrid'):
    """
    Unified recommendation function for production use
    
    Parameters:
    - user_id: For collaborative/hybrid recommendations
    - attraction_id: For content-based recommendations
    - n: Number of recommendations
    - method: 'collaborative', 'content', or 'hybrid'
    """
    try:
        if method == 'collaborative' and user_id is not None:
            return collaborative_recommend(user_id, n)
        elif method == 'content' and attraction_id is not None:
            return content_based_recommend(attraction_id, n)
        elif method == 'hybrid' and user_id is not None:
            return hybrid_recommend(user_id, n)
        else:
            print("Invalid parameters for recommendation method")
            return pd.DataFrame(columns=attraction_data.columns)
    except Exception as e:
        print(f"Recommendation error: {e}")
        return pd.DataFrame(columns=attraction_data.columns)

# Example usage
print("\nProduction recommendation examples:")
if not ratings.empty and not attraction_data.empty:
    sample_user = ratings['UserId'].sample(1).values[0]
    sample_attraction = attraction_data['AttractionId'].sample(1).values[0]
    
    print("\nCollaborative:")
    display(get_recommendations(user_id=sample_user, method='collaborative'))
    
    print("\nContent-Based:")
    display(get_recommendations(attraction_id=sample_attraction, method='content'))
    
    print("\nHybrid:")
    display(get_recommendations(user_id=sample_user, method='hybrid'))
else:
    print("Insufficient data for demonstration")


Production recommendation examples:

Collaborative:


Unnamed: 0,AttractionId,AttractionCityId,AttractionTypeId,Attraction,AttractionAddress,AttractionType,Features
0,369,1,13,Kuta Beach - Bali,Kuta,Beaches,Beaches Kuta
5,737,1,76,Tanah Lot Temple,"Kecamatan Kediri, Kabupaten Tabanan, Beraban 8...",Religious Sites,"Religious Sites Kecamatan Kediri, Kabupaten Ta..."



Content-Based:


Unnamed: 0,AttractionId,AttractionCityId,AttractionTypeId,Attraction,AttractionAddress,AttractionType,Features
2,640,1,63,Sacred Monkey Forest Sanctuary,"Jl. Monkey Forest, Ubud 80571 Indonesia",Nature & Wildlife Areas,"Nature & Wildlife Areas Jl. Monkey Forest, Ubu..."
6,748,1,72,Tegalalang Rice Terrace,"Jalan Raya Ceking, Tegalalang 80517 Indonesia",Points of Interest & Landmarks,Points of Interest & Landmarks Jalan Raya Ceki...
8,824,1,76,Uluwatu Temple,"Jl. Raya Uluwatu Southern part of Bali, Pecatu...",Religious Sites,Religious Sites Jl. Raya Uluwatu Southern part...
12,897,2,93,Coban Rondo Waterfall,Malang District,Waterfalls,Waterfalls Malang District
28,1280,3,72,Water Castle (Tamansari),"Jl. Taman, 55133 Indonesia",Points of Interest & Landmarks,"Points of Interest & Landmarks Jl. Taman, 5513..."



Hybrid:


Unnamed: 0,AttractionId,AttractionCityId,AttractionTypeId,Attraction,AttractionAddress,AttractionType,Features
3,650,1,13,Sanur Beach,Sanur,Beaches,Beaches Sanur
4,673,1,13,Seminyak Beach,Seminyak,Beaches,Beaches Seminyak
8,824,1,76,Uluwatu Temple,"Jl. Raya Uluwatu Southern part of Bali, Pecatu...",Religious Sites,Religious Sites Jl. Raya Uluwatu Southern part...
29,1297,3,44,Yogyakarta Palace,Yogyakarta,Historic Sites,Historic Sites Yogyakarta
