In [35]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler

In [36]:
def recommend_places(preferred_activities, bucket_list_destination):
    # Your existing model logic
    tfidf = TfidfVectorizer(stop_words='english')
    activities_matrix = tfidf.fit_transform(places_df['activities'])

    # Normalize ratings
    scaler = MinMaxScaler()
    places_df[['Normalized_Average_Rating', 'Normalized_Total_Ratings']] = scaler.fit_transform(places_df[['rating', 'user_ratings_total']])

    # Bucket list boost
    places_df['Bucket_List_Boost'] = places_df['name'].apply(lambda x: 10 if x == bucket_list_destination else 0)

    # Vectorize user activities
    user_activities_vector = tfidf.transform([preferred_activities])
    
    # Calculate similarity
    cosine_sim = cosine_similarity(user_activities_vector, activities_matrix)
    
    # Similarity score
    places_df['Similarity_Score'] = cosine_sim[0]
    
    # Calculate the final score
    places_df['Final_Score'] = (
        (places_df['Similarity_Score'] * 0.5) + 
        (places_df['Normalized_Average_Rating'] * 0.1) + 
        (places_df['Bucket_List_Boost'] * 0.3) + 
        (places_df['Normalized_Total_Ratings'] * 0.1)
    )
    
    # Sort and return top 5 places
    recommended_places = places_df.sort_values(by='Final_Score', ascending=False)
    return recommended_places[['name', 'Final_Score']].head(5)


In [37]:
test_data = pd.read_excel('DataSets/Visitors Preference Dataset.xlsx')  # Load your actual dataset
places_df = pd.read_excel('DataSets/PLACES_FINAL.xlsx')

In [38]:
test_data.head()

Unnamed: 0,User ID,Name,Email,Preferred Activities,Bucket list destinations Sri Lanka
0,1,Jennifer Quinn,jennifer.quinn@example.com,"['cycling', 'historical monuments', 'village h...","['Polonnaruwa', 'Hatton', 'Anuradhapura', 'Ell..."
1,2,Emily Perry,emily.perry@example.com,"['butterfly watching', 'hot springs', 'wildlif...","['Madunagala Hot Water Spring', 'Wilpattu Nati..."
2,3,Danielle Mcbride,danielle.mcbride@example.com,"['sea cruises', 'themed parks', 'craft worksho...","['Mirissa Beach', 'Negombo Lagoon', 'Batadomba..."
3,4,Angelica Wilson,angelica.wilson@example.com,"['fishing', 'hot springs', 'sailing']","['Maha Oya Hot Water Springs', 'Colombo Port C..."
4,5,Laurie Powers,laurie.powers@example.com,"['history tours', 'sailing', 'literary tours']","['Negombo Lagoon', 'Colombo Port City', 'Galle..."


In [39]:
def precision_at_k(recommended, relevant, k):
    recommended_at_k = recommended[:k]
    hits = len(set(recommended_at_k) & set(relevant))
    return hits / k

def evaluate_precision_at_k(test_data, model, k=5):
    activity_precisions = []
    destination_precisions = []
    
    for i, row in test_data.iterrows():
        preferred_activities = row['Preferred Activities']
        bucket_list = row['Bucket list destinations Sri Lanka']
        
        # Get the top K recommended places
        recommended_places = model(preferred_activities, bucket_list)['name'].tolist()
        
        # Check for activity matches
        relevant_activities = test_data[test_data['Preferred Activities'] == preferred_activities]['Preferred Activities'].tolist()
        activity_precision = precision_at_k(recommended_places, relevant_activities, k)
        activity_precisions.append(activity_precision)
        
        # Check for destination matches
        relevant_destinations = test_data[test_data['Bucket list destinations Sri Lanka'] == bucket_list]['Bucket list destinations Sri Lanka'].tolist()
        destination_precision = precision_at_k(recommended_places, relevant_destinations, k)
        destination_precisions.append(destination_precision)
    
    # Calculate average precision across all users
    avg_activity_precision = np.mean(activity_precisions)
    avg_destination_precision = np.mean(destination_precisions)
    
    print(f'Average Precision@{k} for Activities: {avg_activity_precision:.2f}')
    print(f'Average Precision@{k} for Destinations: {avg_destination_precision:.2f}')


In [41]:
# Select only the first 100 rows of the test dataset
small_test_data = test_data.head(100)

# Run the evaluation on the smaller dataset
evaluate_precision_at_k(small_test_data, recommend_places, k=5)


Average Precision@5 for Activities: 0.00
Average Precision@5 for Destinations: 0.00


In [31]:
# Load your test dataset with user preferences (Preferred Activities, Bucket list destinations)
# and your model function to get recommended places

evaluate_precision_at_k(test_data, recommend_places, k=5)

Average Precision@5 for Activities: 0.00
Average Precision@5 for Destinations: 0.00


In [33]:
    print(f'Average Precision@{5} for Activities: {avg_activity_precision:.6f}')
    print(f'Average Precision@{5} for Destinations: {avg_destination_precision:.6f}')

NameError: name 'avg_activity_precision' is not defined