In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

# data = {
#     'name': ['Place A', 'Place B', 'Place C'],
#     'rating': [4.5, 3.9, 4.8],
#     'user_ratings_total': [200, 150, 180],
#     'activities': ['Hiking, Swimming', 'Beach', 'Hiking, Sightseeing']
# }

places_df = pd.DataFrame(data)

# TF-IDF Vectorizer for Activities
tfidf = TfidfVectorizer(stop_words='english')
activities_matrix = tfidf.fit_transform(places_df['activities'])

# Convert the matrix to a DataFrame to see the result
activities_df = pd.DataFrame(activities_matrix.toarray(), columns=tfidf.get_feature_names_out())

# Add the TF-IDF vectorized activities back to the main DataFrame
places_df = pd.concat([places_df, activities_df], axis=1)
print(places_df)


In [None]:
from sklearn.preprocessing import MinMaxScaler

# Normalize ratings and total ratings to bring them to the same scale
scaler = MinMaxScaler()
places_df[['Normalized_Average_Rating', 'Normalized_Total_Ratings']] = scaler.fit_transform(places_df[['rating', 'user_ratings_total']])

print(places_df)


In [None]:
bucket_list_destination = 'Place A'  # Example bucket list destination

# Add a boost to places that match the bucket list destination
places_df['Bucket_List_Boost'] = places_df['Place'].apply(lambda x: 10 if x == bucket_list_destination else 0)


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Example user input: preferred activities and bucket list destination
user_activities = 'Hiking, Swimming'

# Vectorize the user's activities
user_activities_vector = tfidf.transform([user_activities])

# Calculate the similarity between user preferences and place activities
cosine_sim = cosine_similarity(user_activities_vector, activities_matrix)

In [None]:
# Calculate the final score
places_df['Final_Score'] = (
    (places_df['Similarity_Score'] * 0.5) + 
    (places_df['Average_Rating'] * 0.2) + 
    (places_df['Bucket_List_Boost'] * 0.1) + 
    (places_df['Total_Ratings'] * 0.2)
)

# Sort places based on the final score
recommended_places = places_df.sort_values(by='Final_Score', ascending=False)

# Display top 5 recommended places
print(recommended_places[['Place', 'Final_Score']].head(5))
