In [1]:
import plotly.express as px
import json
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
import plotly.graph_objects as go
from itertools import chain

In [2]:
import plotly.io as pio

pio.templates["custom_dark_green"] = go.layout.Template(
    layout=dict(
        paper_bgcolor='black',
        plot_bgcolor='black',
        font=dict(color='lightgreen'),
        xaxis=dict(color='lightgreen', gridcolor='gray'),
        yaxis=dict(color='lightgreen', gridcolor='gray')
    )
)

pio.templates.default = "custom_dark_green"

In [4]:
df = pd.read_csv('../datasets/Worldwide_Travel_Cities_Dataset.csv')

In [5]:
df.head()

Unnamed: 0,id,city,country,region,short_description,latitude,longitude,avg_temp_monthly,ideal_durations,budget_level,culture,adventure,nature,beaches,nightlife,cuisine,wellness,urban,seclusion
0,c54acf38-3029-496b-8c7a-8343ad82785c,Milan,Italy,europe,"Chic streets lined with fashion boutiques, his...",45.464194,9.189635,"{""1"":{""avg"":3.7,""max"":7.8,""min"":0.4},""2"":{""avg...","[""Short trip"",""One week""]",Luxury,5,2,2,1,4,5,3,5,2
1,0bd12654-ed64-424e-a044-7bc574bcf078,Yasawa Islands,Fiji,oceania,"Crystal-clear waters, secluded beaches, and vi...",-17.290947,177.125786,"{""1"":{""avg"":28,""max"":30.8,""min"":25.8},""2"":{""av...","[""Long trip"",""One week""]",Luxury,2,4,5,5,2,3,4,1,5
2,73036cda-9134-46fc-a2c6-807782d59dfb,Whistler,Canada,north_america,Snow-capped peaks and lush forests create a se...,50.11719,-122.954302,"{""1"":{""avg"":-2.5,""max"":0.4,""min"":-5.5},""2"":{""a...","[""Short trip"",""Weekend"",""One week""]",Luxury,3,5,5,2,3,3,4,2,4
3,3872c9c0-6b6e-49e1-9743-f46bfe591b86,Guanajuato,Mexico,north_america,Winding cobblestone streets and colorful facad...,20.9877,-101.0,"{""1"":{""avg"":15.5,""max"":22.8,""min"":8.7},""2"":{""a...","[""Weekend"",""One week"",""Short trip""]",Mid-range,5,3,3,1,3,4,3,4,2
4,e1ebc1b6-8798-422d-847a-22016faff3fd,Surabaya,Indonesia,asia,Bustling streets filled with the aroma of loca...,-7.245972,112.737827,"{""1"":{""avg"":28.1,""max"":32.5,""min"":25.5},""2"":{""...","[""Short trip"",""Weekend""]",Budget,4,3,3,2,3,4,3,4,2


In [None]:
theme_cols = ['culture','adventure','nature','beaches','nightlife','cuisine','wellness','urban','seclusion']
budget_map = {'Budget': 3, 'Mid-range': 2, 'Luxury': 1}

def climate_comfort(json_str):
    try:
        temps = json.loads(json_str)
        avg_temps = [temps[str(m)]['avg'] for m in range(1, 13)]
        return -np.mean([abs(t - 23) for t in avg_temps])  # closer to 23°C is better
    except:
        return np.nan

In [None]:
df['theme_score'] = df[theme_cols].mean(axis=1)
df['budget_score'] = df['budget_level'].map(budget_map)
df['duration_score'] = df['ideal_durations'].apply(len)
df['climate_score'] = df['avg_temp_monthly'].apply(climate_comfort)

In [None]:
score_cols = ['theme_score', 'budget_score', 'duration_score', 'climate_score']
df_clean = df.dropna(subset=score_cols).copy()

In [None]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(df_clean[score_cols])

In [None]:
print(X_scaled)

In [None]:
import plotly.graph_objects as go
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# --- PCA Setup (Assuming you already have df and score_cols) ---
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[score_cols])

pca = PCA(n_components=1)
pca.fit(X_scaled)

weights = pca.components_[0]
feature_weights = dict(zip(score_cols, weights))

# --- Plotly Table ---
feature_names = list(feature_weights.keys())
weight_values = [round(w, 4) for w in feature_weights.values()]

fig = go.Figure(data=[go.Table(
    header=dict(values=["🧮 Feature", "📊 PCA Weight"],
                fill_color='darkslategray',
                font=dict(color='white', size=13),
                align='left'),
    cells=dict(values=[feature_names, weight_values],
               fill_color='#1f77b4',
               align='left',
               font=dict(size=12))
)])

fig.update_layout(title='📊 PCA-Derived Feature Weights', height=400)
fig.show(renderer='iframe')

In [None]:
df_clean['composite_score'] = sum(df_clean[feature] * weight for feature, weight in feature_weights.items())

In [None]:
top_cities_df = df_clean.sort_values(by='composite_score', ascending=False).head(8)
top_cities_df_display = top_cities_df[['city', 'country', 'region', 'budget_level']]
top_cities_df_display.reset_index(drop=True, inplace=True)

fig = go.Figure(
    data=[
        go.Table(
            header=dict(
                values=["🏙️ City", "🌍 Country", "📌 Region", "💸 Budget Level"],
                fill_color="red",
                align="left",
                font=dict(color="white", size=14),
                height=35
            ),
            cells=dict(
                values=[
                    top_cities_df_display['city'],
                    top_cities_df_display['country'],
                    top_cities_df_display['region'],
                    top_cities_df_display['budget_level'],
                ],
                fill_color="#1f77b4",
                align="left",
                font=dict(size=13),
                height=30
            )
        )
    ]
)

fig.update_layout(title="🏆 Top 8 Cities Based on PCA Composite Score", title_font_size=20)
fig.show(renderer='iframe')