In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import os

from io import StringIO
from google.colab import drive
from matplotlib.ticker import MultipleLocator
from matplotlib.lines import Line2D
from scipy.stats import mannwhitneyu, pearsonr, spearmanr, gaussian_kde
from matplotlib.patches import Patch


# OPTIONAL: Load data from local or online source
# If running on Colab and using Google Drive, uncomment below:

# from google.colab import drive
# drive.mount('/content/drive')
# os.chdir('/content/drive/MyDrive/your_project_folder')

# Otherwise, place your data in the same folder or set the correct relative path

In [None]:
pca_recent   = pd.read_csv("pca_normalized_recent.csv")
kpca_recent  = pd.read_csv("kpca_normalized_recent.csv")
wpca_recent  = pd.read_csv("wpca_normalized_recent.csv")

In [None]:
enso1_file_path = 'data/enso1_reconstruction.txt'
enso_df = pd.read_csv(enso1_file_path, sep=r'\s+', comment='#')
enso_df = enso_df.rename(columns={'age': 'Year_Bin', 'ensoi': 'ENSO'})
enso_df['ENSO_norm'] = (enso_df['ENSO'] - enso_df['ENSO'].mean()) / enso_df['ENSO'].std()
enso_df.head()

In [None]:
pdo1_file_path = 'data/pdo1_reconstruction.txt'
pdo_df = pd.read_csv(pdo1_file_path, sep=r'\s+', comment='#')
pdo_df = pdo_df.rename(columns={'age': 'Year_Bin', 'pdo': 'PDO'})
pdo_df['Year_Bin'] = pdo_df['Year_Bin'].astype(int)
pdo_df['PDO_norm'] = (pdo_df['PDO'] - pdo_df['PDO'].mean()) / pdo_df['PDO'].std()
pdo_df.head()

In [None]:
ipo_file_path = 'data/pdo2_reconstruction.txt'
ipo_df = pd.read_csv(ipo_file_path, sep=r'\s+', comment='#')
ipo_df.columns = ['Year_Bin', 'IPO', 'Std_Dev']
ipo_df['Year_Bin'] = ipo_df['Year_Bin'].astype(int)
ipo_df['IPO_norm'] = (ipo_df['IPO'] - ipo_df['IPO'].mean()) / ipo_df['IPO'].std()
ipo_df.head()

In [None]:
co2_df = pd.read_csv('data/co2-long-term-concentration.csv')
co2_df.rename(columns={'Annual concentration of atmospheric carbon dioxide': 'CO2', 'Year':'Year_Bin'}, inplace=True)
co2_df = co2_df[['Year_Bin', 'CO2']].copy()
co2_df['Year_Bin'] = co2_df['Year_Bin'].astype(int)
co2_df = co2_df[(co2_df['Year_Bin'] >= 1) & (co2_df['Year_Bin'] <= 2016)]
co2_df['CO2_norm'] = (co2_df['CO2'] - co2_df['CO2'].mean()) / co2_df['CO2'].std()
co2_df.head()

In [None]:
tsi_file_path = 'data/tsi_reconstruction.txt'
with open(tsi_file_path, 'r', encoding='latin1') as file:
    lines = file.read().splitlines()
start_idx = next(i for i, line in enumerate(lines) if line.strip()[:1].isdigit())
data_str = '\n'.join(lines[start_idx:])

tsi_df = pd.read_csv(StringIO(data_str), sep=r'\s+', names=['YearBP', 'dTSI', 'dTSI_sigma'])
tsi_df['Year_AD'] = 1950 - tsi_df['YearBP']
tsi_df = tsi_df[tsi_df['Year_AD'].between(1, 2016)]
tsi_df['TSI'] = 1365.57 + tsi_df['dTSI']
tsi_df['Year_Bin'] = np.floor(tsi_df['Year_AD']).astype(int)
tsi_df = tsi_df[['Year_Bin', 'TSI']].copy()
tsi_df['TSI_norm'] = (tsi_df['TSI'] - tsi_df['TSI'].mean()) / tsi_df['TSI'].std()
tsi_df.head()

In [None]:
def summarize(df, colname):
    grouped = df.groupby("Year_Bin")[colname]
    median = grouped.median()
    p5 = grouped.quantile(0.05)
    p95 = grouped.quantile(0.95)
    return pd.DataFrame({
        "Year_Bin": median.index,
        "median": median.values,
        "p5": p5.values,
        "p95": p95.values
    })

pca_summary = summarize(pca_recent, "PC1_norm")
kpca_summary = summarize(kpca_recent, "KPC1_norm")
wpca_summary = summarize(wpca_recent, "WPC1_norm")

In [None]:
fig, ax = plt.subplots(figsize=(14, 6))

# Plot PCA reconstructions with uncertainty
for summary, label, color in zip([pca_summary, kpca_summary, wpca_summary],
                                 ['Standard PCA', 'Kernel PCA', 'Wavelet PCA'],
                                 ['blue', 'green', 'orange']):
    ax.plot(summary["Year_Bin"], summary["median"], label=label, color=color)
    ax.fill_between(summary["Year_Bin"], summary["p5"], summary["p95"], alpha=0.2, color=color)

# Add vertical dotted lines at 1700 and 1850 CE
for year in [1700, 1850, 1950]:
    ax.axvline(x=year, color='black', linestyle=':', linewidth=1.5)

handles = [
    Patch(facecolor='gray', edgecolor='gray', alpha=0.15, label='5–95% Percentile Bound'),
    plt.Line2D([], [], color='blue', linewidth=2, label='Median PC1 (Standard)'),
    plt.Line2D([], [], color='green', linewidth=2, label='Median KPC1 (Kernel)'),
    plt.Line2D([], [], color='orange', linewidth=2, label='Median WPC1 (Wavelet)')
]

# Styling
ax.set_ylabel("PC1 (Normalized)", fontsize=12)
ax.set_xlabel("Year", fontsize=12)
ax.set_title("Hydroclimate Variability Reconstruction (1550–2016 CE)", fontsize=14)
ax.set_xlim(1550, 2016)
ax.tick_params(axis='x', labelsize=12)
ax.tick_params(axis='y', labelsize=12)

for spine in ax.spines.values():
    spine.set_linewidth(1)
    spine.set_color('black')

ax.legend(handles=handles, loc='upper left')
ax.set_xlabel("Year")
plt.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(5, 1, figsize=(15, 14), sharex=True, gridspec_kw={'height_ratios': [2, 1, 1, 1, 1]})

for ax in axs:
    ax.set_xlim(1550, 2016)
    ax.xaxis.set_major_locator(MultipleLocator(100))
    ax.tick_params(labelbottom=True)
    ax.grid(True, axis='x', linestyle='--', color='gray', linewidth=0.8, alpha=0.6)

# Hydroclimate Reconstruction
for summary, label, color in zip([pca_summary, kpca_summary, wpca_summary],
                                 ['Standard PCA', 'Kernel PCA', 'Wavelet PCA'],
                                 ['blue', 'green', 'orange']):
    axs[0].plot(summary["Year_Bin"], summary["median"], label=label, color=color)
    axs[0].fill_between(summary["Year_Bin"], summary["p5"], summary["p95"], alpha=0.2, color=color)

axs[0].set_ylabel("PC1 (Normalized)")
axs[0].set_title("Hydroclimate Variability Reconstruction (Post-1550 CE)")
axs[0].legend()

# Volcanic eruption years
eruption_years = [1580, 1586, 1593, 1660, 1673, 1815, 1822, 1883, 1963, 1991]

eruption_y = axs[0].get_ylim()[0] - 0.05

# Plot upright red triangles
axs[0].scatter(
    eruption_years,
    [eruption_y] * len(eruption_years),
    marker='^', color='red', s=80, label='Volcanic Eruption'
)

axs[0].set_ylim(eruption_y - 0.1, axs[0].get_ylim()[1])
axs[0].legend(loc='upper left')

axs[1].plot(enso_df[enso_df["Year_Bin"] >= 1550]["Year_Bin"], enso_df[enso_df["Year_Bin"] >= 1550]["ENSO_norm"], color='darkcyan')
axs[1].set_ylabel("ENSO (Normalized)")
axs[1].set_title("ENSO Reconstructions")

axs[2].plot(pdo_df[pdo_df["Year_Bin"] >= 1550]["Year_Bin"], pdo_df[pdo_df["Year_Bin"] >= 1550]["PDO_norm"], color='brown', label='PDO Reconstruction')
axs[2].plot(ipo_df[ipo_df["Year_Bin"] >= 1550]["Year_Bin"], ipo_df[ipo_df["Year_Bin"] >= 1550]["IPO_norm"], color='olive', label='IPO Reconstruction')
axs[2].set_ylabel("PDO (Normalized)")
axs[2].set_title("PDO Reconstructions")
axs[2].legend()

axs[3].plot(tsi_df[tsi_df["Year_Bin"] >= 1550]["Year_Bin"], tsi_df[tsi_df["Year_Bin"] >= 1550]["TSI_norm"], color='crimson')
axs[3].set_ylabel("TSI (Normalized)")
axs[3].set_title("TSI Reconstruction")

axs[4].plot(co2_df[co2_df["Year_Bin"] >= 1550]["Year_Bin"], co2_df[co2_df["Year_Bin"] >= 1550]["CO2_norm"], color='magenta')
axs[4].set_ylabel("CO2 (Normalized)")
axs[4].set_xlabel("Year (CE)")
axs[4].set_title("CO2 Reconstruction")

plt.tight_layout()
plt.show()

In [None]:
sns.set_style('whitegrid')

# Define periods
periods = [(1550, 1700), (1700, 1850), (1850, 2016)]
period_labels = ['1550–1700', '1700–1850', '1850-2016']

pca_info = [
    ("Standard", pca_recent, "PC1_norm"),
    ("Kernel", kpca_recent, "KPC1_norm"),
    ("Wavelet", wpca_recent, "WPC1_norm")
]

forcing_dfs = {
    'ENSO': enso_df[['Year_Bin', 'ENSO_norm']],
    'IPO': ipo_df[['Year_Bin', 'IPO_norm']],
    'PDO': pdo_df[['Year_Bin', 'PDO_norm']],
    'TSI': tsi_df[['Year_Bin', 'TSI_norm']],
    'CO2': co2_df[['Year_Bin', 'CO2_norm']]
}

forcing_colors = {
    'ENSO': 'blue',
    'IPO': 'green',
    'PDO': 'purple',
    'TSI': 'orange',
    'CO2': 'red'
}

# Loop through intervals
for (start, end), period_label in zip(periods, period_labels):
    interval = f"{start}-{end}"
    fig, axes = plt.subplots(1, 3, figsize=(20, 4), sharey=True)

    for ax, (pca_label, pca_df, pc1_col) in zip(axes, pca_info):
        interval_df = pca_df[(pca_df["Year_Bin"] >= start) & (pca_df["Year_Bin"] <= end)]

        for i, (forcing_name, forcing_df) in enumerate(forcing_dfs.items()):
            all_corrs = []
            sig_corrs = []

            for r in interval_df["Realization"].unique():
                r_df = interval_df[interval_df["Realization"] == r][['Year_Bin', pc1_col]]
                merged = pd.merge(r_df, forcing_df, on='Year_Bin', how='inner').dropna()

                if len(merged) > 2:
                    corr, pval = pearsonr(merged[pc1_col], merged.iloc[:, 2])
                    abs_corr = abs(corr)
                    all_corrs.append(abs_corr)
                    if pval < 0.05:
                        sig_corrs.append(abs_corr)

            if all_corrs:
                color = forcing_colors.get(forcing_name, 'gray')
                kde = gaussian_kde(all_corrs, bw_method=0.75)
                x_vals = np.linspace(0, 1, 500)
                y_vals = kde(x_vals)

                # Plot main KDE
                ax.plot(x_vals, y_vals, color=color, linewidth=2)

                # Highlight significant region
                if sig_corrs:
                    mask = (x_vals >= min(sig_corrs)) & (x_vals <= max(sig_corrs))
                    ax.fill_between(x_vals[mask], 0, y_vals[mask], color=color, alpha=0.3)

                # Vertical line for median of all correlations
                median_val = np.median(all_corrs)
                ax.axvline(median_val, linestyle='--', linewidth=1.2, color=color, alpha=0.7)

                # Annotate % significant, inside plot
                percent_sig = 100 * len(sig_corrs) / len(all_corrs)
                ax.annotate(
                    f"{forcing_name}: {percent_sig:.1f}%",
                    xy=(1.02, 0.95 - 0.08 * i),
                    xycoords='axes fraction',
                    fontsize=12,
                    color=color,
                    ha='left',
                    va='top'
                )

        ax.set_xlim(0, 1)
        ax.set_xticks(np.linspace(0, 1, 6))
        ax.set_title(f"{pca_label} PCA\n({period_label})")
        ax.set_xlabel("|Pearson Correlation|", fontsize=12)
        ax.grid(True, linestyle='--', alpha=0.3)

    axes[0].set_ylabel("Density", fontsize=12)

    custom_handles = [
        Line2D([0], [0], color=forcing_colors[forcing], lw=2)
        for forcing in forcing_dfs.keys()
    ]
    fig.legend(
        custom_handles,
        list(forcing_dfs.keys()),
        loc='lower center',
        bbox_to_anchor=(0.5, -0.08),
        ncol=5,
        fontsize=12,
        frameon=False
    )
    fig.suptitle(
        f"KDE of |r| with Significant (p<0.05) Regions Highlighted\nInterval: {interval}",
        y=0.96,
        fontsize=16
    )
    plt.tight_layout(rect=[0, 0.01, 1, 0.98])
    plt.show()