In [None]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import pickle
from datetime import datetime

def load_and_prepare_data():
    """Load disaster data from CSV and prepare it for analysis."""
    df = pd.read_csv('1900_2021_DISASTERS.xlsx - emdat data.csv')

    # Convert 'Start Year' to datetime format
    df['Start Year'] = pd.to_datetime(df['Start Year'], format='%Y')

    return df

def calculate_disaster_probability(historical_data):
    """Calculate probability of disasters based on historical trends."""
    probabilities = {}
    current_year = datetime.now().year

    for country in historical_data['Country'].unique():
        country_data = historical_data[historical_data['Country'] == country]

        # Count occurrences of each disaster type
        disaster_counts = country_data['Disaster Type'].value_counts()
        total_years = current_year - 1900  # Since data starts from 1900

        for disaster_type in disaster_counts.index:
            yearly_probability = disaster_counts[disaster_type] / total_years

            # Adjust probability based on last 20 years
            recent_data = country_data[country_data['Start Year'].dt.year >= (current_year - 20)]
            recent_count = len(recent_data[recent_data['Disaster Type'] == disaster_type])
            recent_probability = recent_count / 20

            # Weighted probability
            adjusted_probability = (0.3 * yearly_probability + 0.7 * recent_probability)

            # Store in dictionary
            if country not in probabilities:
                probabilities[country] = {}
            probabilities[country][disaster_type] = adjusted_probability

    return probabilities

def normalize_probability(prob):
    """Ensure probabilities remain between 0-100%."""
    return min(max(prob * 100, 0), 100)

def predict_future_disasters(probabilities, threshold=0.3):
    """Predict future disaster probabilities based on past data."""
    current_year = datetime.now().year
    future_years = range(current_year + 1, current_year + 11)
    predictions = []

    for country in probabilities:
        for disaster_type, probability in probabilities[country].items():
            for year in future_years:
                # Apply slight randomness to probability (±10%)
                varied_prob = probability * np.random.uniform(0.9, 1.1)
                varied_prob = min(max(varied_prob, 0), 1)

                if varied_prob > threshold:
                    risk_level = 'High' if varied_prob > 0.6 else 'Medium'
                    predictions.append({
                        'Country': country,
                        'Year': year,
                        'Disaster_Type': disaster_type,
                        'Probability': varied_prob,
                        'Risk_Level': risk_level
                    })

    return pd.DataFrame(predictions)

def visualize_future_predictions(predictions_df):
    """Generate visualizations for disaster risk trends in the top 10 affected countries."""
    # Normalize probabilities
    predictions_df['Probability'] = predictions_df['Probability'].apply(normalize_probability)

    # Identify top 9 countries with highest disaster probability (excluding India)
    country_avg_prob = predictions_df[predictions_df['Country'] != 'India'].groupby('Country')['Probability'].mean()
    top_9_countries = country_avg_prob.nlargest(9).index.tolist()

    # Ensure India is included in the top 10
    top_10_countries = ['India'] + top_9_countries

    # Filter dataset to include only the top 10 countries
    plot_df = predictions_df[predictions_df['Country'].isin(top_10_countries)]

    ### 1. Line Chart - Disaster Probability Trends ###
    fig1 = go.Figure()

    for country in top_10_countries:
        country_data = plot_df[plot_df['Country'] == country]
        mean_probs = country_data.groupby('Year')['Probability'].mean()

        line_style = dict(width=3) if country == 'India' else dict(width=2)

        fig1.add_trace(go.Scatter(
            x=mean_probs.index,
            y=mean_probs.values,
            name=country,
            mode='lines+markers',
            line=line_style,
            hovertemplate="Year: %{x}<br>Probability: %{y:.1f}%<br>Country: " + country
        ))

    fig1.update_layout(
        title='Disaster Probability Trends - Top 10 Countries (2025-2035)',
        xaxis_title='Year',
        yaxis_title='Probability (%)',
        yaxis=dict(range=[0, 100]),
        hovermode='x unified',
        showlegend=True,
        legend=dict(yanchor="top", y=0.99, xanchor="left", x=1.05),
        margin=dict(r=150)
    )
    fig1.show()

    ### 2. Bar Chart - Disaster Probability by Type ###
    disaster_stats = plot_df.groupby('Disaster_Type').agg({'Probability': ['mean', 'std']}).reset_index()
    disaster_stats.columns = ['Disaster_Type', 'Mean_Probability', 'Std_Probability']

    fig2 = go.Figure()

    fig2.add_trace(go.Bar(
        x=disaster_stats['Disaster_Type'],
        y=disaster_stats['Mean_Probability'],
        error_y=dict(type='data', array=disaster_stats['Std_Probability'], visible=True),
        hovertemplate="Disaster Type: %{x}<br>Probability: %{y:.1f}%<br><extra></extra>"
    ))

    fig2.update_layout(
        title='Average Probability by Disaster Type (Top 10 Countries)',
        xaxis_title='Disaster Type',
        yaxis_title='Average Probability (%)',
        yaxis=dict(range=[0, 100]),
        showlegend=False,
        margin=dict(b=100)
    )

    fig2.update_xaxes(tickangle=45)
    fig2.show()

    ### 3. Bubble Chart - Disaster Risk Levels ###
    fig3 = px.scatter(
        plot_df,
        x='Year',
        y='Probability',
        size='Probability',
        color='Risk_Level',
        hover_data=['Country', 'Disaster_Type'],
        title='Disaster Risk Levels Over Time (Top 10 Countries)',
        labels={'Probability': 'Probability (%)', 'Risk_Level': 'Risk Level'}
    )

    fig3.update_layout(
        yaxis=dict(range=[0, 100]),
        hovermode='closest'
    )
    fig3.show()

def print_detailed_predictions(predictions_df):
    """Print a detailed list of high-risk disasters."""
    sorted_predictions = predictions_df.sort_values(['Year', 'Probability'], ascending=[True, False])

    for year in sorted_predictions['Year'].unique():
        year_predictions = sorted_predictions[sorted_predictions['Year'] == year]
        print(f"\nPredictions for {year}:")
        print("-" * 80)

        for _, row in year_predictions.iterrows():
            if row['Probability'] >= 30:
                print(f"Country: {row['Country']}")
                print(f"Disaster Type: {row['Disaster_Type']}")
                print(f"Probability: {row['Probability']:.1f}%")
                print(f"Risk Level: {row['Risk_Level']}")
                print("-" * 40)

def main():
    """Main function to load data, calculate probabilities, and generate predictions."""
    print("Loading and preparing data...")
    df = load_and_prepare_data()

    print("Calculating disaster probabilities...")
    probabilities = calculate_disaster_probability(df)

    # Save model to .pkl file
    with open('disaster_probabilities.pkl', 'wb') as f:
        pickle.dump(probabilities, f)
    print("Probabilities model saved to 'disaster_probabilities.pkl'.")

    print("Generating future predictions...")
    predictions_df = predict_future_disasters(probabilities)

    print("\nGenerating visualization...")
    visualize_future_predictions(predictions_df)

    print("\nDetailed Predictions:")
    print_detailed_predictions(predictions_df)

if __name__ == "__main__":
    main()
