In [92]:
import pandas as pd
import numpy as np

In [93]:
resort_stats_data = pd.read_csv("moutain_stat.csv")
visitor_data = pd.read_csv("predicted_visitors_2026.csv")
historical_data = pd.read_csv("climate_visitor_snow.csv",index_col = 0)
total_cost = pd.read_csv("total_cost_week_by_week.csv")
print(resort_stats_data.columns)
print(visitor_data.columns)
print(historical_data.columns)
print(total_cost.columns)

Index(['Resort', 'Highest Lifted Point', 'Lowest Lifted Point', 'Skiable Area',
       'Snow Making Area', 'Lifts', 'Terrain Advanced', 'Terrain Intermediate',
       'Terrain Beginner'],
      dtype='object')
Index(['Year', 'Week', 'Resort', 'Predicted_Visitors'], dtype='object')
Index(['Year', 'Week', 'Resort', 'Visitors', 'MaxTemp', 'MinTemp', 'Rainfall',
       'Total Snowfall', 'Snowfall Days', 'Average Base Depth',
       'Max Base Depth', 'Biggest Snowfall'],
      dtype='object')
Index(['Week', 'Mt. Baw Baw', 'Mt. Stirling', 'Mt. Hotham', 'Falls Creek',
       'Mt. Buller', 'Selwyn', 'Thredbo', 'Perisher', 'Charlotte Pass'],
      dtype='object')


In [94]:
visitor_data.rename(columns = {"Predicted_Visitors":"Forecasted_Visitors"}, inplace = True)
filter_data = historical_data[~historical_data["Resort"].isin(["Mt. Stirling"])]
total_cost = total_cost.loc[:, total_cost.columns != "Mt. Stirling"]
cost_long = total_cost.melt(id_vars=['Week'], var_name='Resort', value_name='Cost')
cost_long.rename(columns = {"Cost":"Price_2025"}, inplace = True)
cost_long.head(5)

Unnamed: 0,Week,Resort,Price_2025
0,1,Mt. Baw Baw,3381
1,2,Mt. Baw Baw,3381
2,3,Mt. Baw Baw,3381
3,4,Mt. Baw Baw,3381
4,5,Mt. Baw Baw,3556


In [95]:
resort_stats_data.rename(columns={
    'Highest Lifted Point': 'Highest_Lifted_Point',
    'Lowest Lifted Point': 'Lowest_Lifted_Point',
    'Skiable Area': 'Skiable_Area',
    'Snow Making Area': 'Snow_Making_Area',
    'Terrain Advanced': 'Terrain_Advanced',
    'Terrain Intermediate': 'Terrain_Intermediate',
    'Terrain Beginner': 'Terrain_Beginner'
}, inplace=True)
resort_stats_data.head()

Unnamed: 0,Resort,Highest_Lifted_Point,Lowest_Lifted_Point,Skiable_Area,Snow_Making_Area,Lifts,Terrain_Advanced,Terrain_Intermediate,Terrain_Beginner
0,Perisher,2034,1605,1245,53.4,47,0.18,0.6,0.22
1,Thredbo,2037,1365,480,70.0,14,0.17,0.67,0.16
2,Selwyn,1614,1492,45,36.0,11,0.12,0.48,0.4
3,Charlotte Pass,1955,1760,50,10.0,6,0.2,0.5,0.3
4,Mt. Buller,1805,1375,300,70.0,22,0.35,0.45,0.2


In [118]:
# ==============================================================================
# PART 1: ASSEMBLE THE RECOMMENDATION DATAFRAME (FINAL ENHANCED VERSION)
# ==============================================================================

# --- 1a. Start by merging your core 2026 forecast and price data ---
rec_df = pd.merge(visitor_data, cost_long, on=['Resort', 'Week'])
historical_data['Avg_Temp'] = historical_data["MaxTemp"] - historical_data["MinTemp"]
historical_data['Avg_Temp'] = historical_data['Avg_Temp']/2

# --- 1b. Calculate historical averages for ALL condition metrics ---
# NEW: We are adding MinTemp to our aggregation.
historical_conditions = historical_data.groupby(['Resort', 'Week']).agg(
    Historical_Avg_Base_Depth=('Average Base Depth', 'mean'),
    Historical_Avg_Snowfall_Days=('Snowfall Days', 'mean'),
    Historical_Avg_Rainfall=('Rainfall', 'mean'),
    Historical_Avg_Temp=('Avg_Temp', 'mean') # ADDED THIS LINE
).reset_index()

# Now merge this comprehensive condition data into our recommendation frame
rec_df = pd.merge(rec_df, historical_conditions, on=['Resort', 'Week'])


# --- 1c. Merge with the static resort stats (no change here) ---
rec_df = pd.merge(rec_df, resort_stats_data, on='Resort')

print("\n--- Assembled Recommendation DataFrame with Temperature Data (Top 5 rows) ---")
print(rec_df.head())


# ==============================================================================
# PART 2: BUILD THE RECOMMENDATION MODEL (WITH FINAL ENHANCED SCORING)
# ==============================================================================

# --- 2a. Normalization functions (no change here) ---
def normalize_positive(series):
    min_val, max_val = series.min(), series.max()
    if max_val == min_val: return pd.Series(1.0, index=series.index)
    return (series - min_val) / (max_val - min_val)

def normalize_negative(series):
    min_val, max_val = series.min(), series.max()
    if max_val == min_val: return pd.Series(1.0, index=series.index)
    return 1 - ((series - min_val) / (max_val - min_val))


# --- 2b. Create Normalized Sub-Scores ---
# NEW: Normalize all four of our condition components first
norm_base_depth = normalize_positive(rec_df['Historical_Avg_Base_Depth'])
norm_snowfall_days = normalize_positive(rec_df['Historical_Avg_Snowfall_Days'])
norm_rainfall = normalize_negative(rec_df['Historical_Avg_Rainfall'])       # Lower rain is better
norm_temp = normalize_negative(rec_df['Historical_Avg_Temp'])           # Lower temp is better

# NOW, create the final composite Condition_Score with the new re-balanced weights
rec_df['Condition_Score'] = (0.50 * norm_base_depth) + \
                            (0.25 * norm_snowfall_days) + \
                            (0.15 * norm_temp) + \
                            (0.10 * norm_rainfall)

# The rest of the sub-scores are calculated as before
rec_df['Crowd_Score'] = normalize_negative(rec_df['Forecasted_Visitors'])
rec_df['Price_Score'] = normalize_negative(rec_df['Price_2025'])
rec_df['Family_Terrain_Score'] = normalize_positive(rec_df['Terrain_Beginner'] + rec_df['Terrain_Intermediate'])
rec_df['Expert_Terrain_Score'] = normalize_positive(rec_df['Terrain_Advanced'] + normalize_positive(rec_df['Highest_Lifted_Point']))


--- Assembled Recommendation DataFrame with Temperature Data (Top 5 rows) ---
     Year  Week          Resort  Forecasted_Visitors  Price_2025  \
0  2026.0     1  Charlotte Pass                  358        6545   
1  2026.0     2  Charlotte Pass                  477        6545   
2  2026.0     3  Charlotte Pass                  182        6870   
3  2026.0     4  Charlotte Pass                 1342        6870   
4  2026.0     5  Charlotte Pass                 2264        6870   

   Historical_Avg_Base_Depth  Historical_Avg_Snowfall_Days  \
0                  44.666667                     18.181818   
1                  44.666667                     18.181818   
2                  44.666667                     18.181818   
3                  44.666667                     18.181818   
4                  44.666667                     18.181818   

   Historical_Avg_Rainfall  Historical_Avg_Temp  Highest_Lifted_Point  \
0                50.554545             3.179167                  1

In [115]:
# --- 2c. Define Persona Weights and Apply Scoring ---
persona_weights = {
    'family_fun_seeker': {
        'Family_Terrain_Score': 0.35,
        'Crowd_Score': 0.30,
        'Price_Score': 0.20,
        'Condition_Score': 0.15
    },
    'powder_hound': {
        'Condition_Score': 0.50,
        'Expert_Terrain_Score': 0.30,
        'Crowd_Score': 0.10,
        'Price_Score': 0.10
    }
}

for persona, weights in persona_weights.items():
    # Calculate score by summing weighted sub-scores.
    # We use .get() with a default of 0 to safely handle cases where a persona doesn't
    # explicitly define a weight for a score (e.g., 'Family_Terrain_Score' for 'powder_hound'
    # would not be used if 'Family_Terrain_Score' was not in its weights dict, though it is here)
    rec_df[f'{persona}_Ultimate_Score'] = (
        weights.get('Family_Terrain_Score', 0) * rec_df['Family_Terrain_Score'] +
        weights.get('Expert_Terrain_Score', 0) * rec_df['Expert_Terrain_Score'] +
        weights.get('Condition_Score', 0) * rec_df['Condition_Score'] +
        weights.get('Crowd_Score', 0) * rec_df['Crowd_Score'] +
        weights.get('Price_Score', 0) * rec_df['Price_Score']
    )

In [116]:
# ==============================================================================
# PART 3: GENERATE AND DISPLAY RECOMMENDATIONS
# ==============================================================================

holiday_weeks = [5, 6]
family_options_df = rec_df[rec_df['Week'].isin(holiday_weeks)].copy()

# NOW, sort this smaller, filtered DataFrame to find the best option *within* the holidays.
family_recommendation = family_options_df.sort_values(by='family_fun_seeker_Ultimate_Score', ascending=False).reset_index(drop=True)

print("\n\n--- TOP RECOMMENDATIONS for the 'Family Fun-Seeker' (Constrained to Holiday Weeks 5 & 6) ---")
family_recommendation[[
    'Resort', 'Week', 'family_fun_seeker_Ultimate_Score',
    'Family_Terrain_Score', 'Crowd_Score', 'Price_Score', 'Condition_Score'
]].head()



--- TOP RECOMMENDATIONS for the 'Family Fun-Seeker' (Constrained to Holiday Weeks 5 & 6) ---


Unnamed: 0,Resort,Week,family_fun_seeker_Ultimate_Score,Family_Terrain_Score,Crowd_Score,Price_Score,Condition_Score
0,Selwyn,5,0.857533,0.965517,0.895887,0.736842,0.689781
1,Selwyn,6,0.850824,0.965517,0.869482,0.736842,0.69787
2,Mt. Baw Baw,5,0.818866,1.0,0.842959,0.96271,0.156241
3,Mt. Baw Baw,6,0.81531,1.0,0.848922,0.96271,0.120606
4,Charlotte Pass,5,0.698977,0.689655,0.968164,0.256552,0.772252


In [117]:
# --- 3b. Rank and find the winner for the Powder Hound ---
powder_hound_recommendation = rec_df.sort_values(by='powder_hound_Ultimate_Score', ascending=False).reset_index(drop=True)
powder_hound_recommendation[[
    'Resort', 'Week', 'powder_hound_Ultimate_Score',
    'Condition_Score', 'Expert_Terrain_Score', 'Crowd_Score', 'Price_Score']].head()

Unnamed: 0,Resort,Week,powder_hound_Ultimate_Score,Condition_Score,Expert_Terrain_Score,Crowd_Score,Price_Score
0,Mt. Hotham,2,0.821921,0.731501,1.0,0.936313,0.6254
1,Mt. Hotham,3,0.814227,0.72316,1.0,0.901067,0.6254
2,Mt. Hotham,4,0.811356,0.730688,1.0,0.834723,0.6254
3,Mt. Hotham,14,0.800521,0.682073,1.0,0.969448,0.6254
4,Mt. Hotham,1,0.799419,0.678121,1.0,0.978185,0.6254
