In [None]:
!pip install lipd
!pip install cartopy

In [None]:
import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import math
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import seaborn as sns
import os
import lipd
import geopandas as gpd
from shapely.geometry import Point
from google.colab import drive

# Optional: Set your working directory if needed
# If using Google Colab and Google Drive, uncomment and edit the lines below:
# from google.colab import drive
# drive.mount('/content/drive')
# os.chdir('/content/drive/MyDrive/your_project_folder')

## Iso2k

In [None]:
# Load the file from the current directory
with open('iso2k1_0_0.pkl', 'rb') as pickles:
    iso2k = pickle.load(pickles)

In [None]:
ts_data = iso2k['TS']

all_series_with_variable_group = []

for record in ts_data:
    if 'year' in record and 'paleoData_values' in record:
        if 'paleoData_interpretation' in record:
            variable_groups = [
                interpretation.get('variableGroup', 'Unknown')
                for interpretation in record['paleoData_interpretation']
            ]
            variable_group = ', '.join(variable_groups) if variable_groups else 'Unknown'
        else:
            variable_group = 'Unknown'

        measurement_material_detail = record.get('paleoData_measurementMaterialDetail', 'Unknown')
        paleo_units = record.get('paleoData_units', 'Unknown')
        depth_values = record.get('depth', [np.nan] * len(record['year']))
        measurement_standard = record.get('paleoData_measurementStandard', 'Unknown')

        df = pd.DataFrame({
            'Year': record['year'],
            'Value': record['paleoData_values'],
            'archiveType': record.get('archiveType', 'Unknown'),
            'VariableName': record.get('paleoData_variableName', 'Unknown'),
            'Latitude': record.get('geo_meanLat', np.nan),
            'Longitude': record.get('geo_meanLon', np.nan),
            'PrimaryTimeSeries': record.get('paleoData_iso2kPrimaryTimeseries', 'FALSE'),
            'ID': record.get('paleoData_iso2kUI', 'Unknown'),
            'InferredMaterial': record.get('paleoData_inferredMaterial', 'Unknown'),
            'InferredMaterialGroup': record.get('paleoData_inferredMaterialGroup', 'Unknown'),
            'VariableGroup': variable_group,
            'MeasurementMaterialDetail': measurement_material_detail,
            'Depth': depth_values,
            'Units': paleo_units,
            'MeasurementStandard': measurement_standard
        })
        all_series_with_variable_group.append(df)

combined_df = pd.concat(all_series_with_variable_group)
combined_df.dropna(subset=['Year', 'Value'], inplace=True)
combined_df['Year'] = pd.to_numeric(combined_df['Year'], errors='coerce')combined_df.dropna(subset=['Year'], inplace=True)

In [None]:
lat_min, lat_max = -30, 30
lon_min, lon_max = 30, 180
target_variables = ['d18O', 'd2H']

filtered_df = combined_df[
    (combined_df['Latitude'] >= lat_min) &
    (combined_df['Latitude'] <= lat_max) &
    (combined_df['Longitude'] >= lon_min) &
    (combined_df['Longitude'] <= lon_max) &
    (combined_df['PrimaryTimeSeries'] == 'TRUE') &
    (combined_df['VariableName'].isin(target_variables)) &
    (combined_df['Year'] > 0)
].copy()

filtered_df['Value'] = pd.to_numeric(filtered_df['Value'], errors='coerce')
filtered_df.dropna(subset=['Value'], inplace=True)
filtered_df = filtered_df.sort_values(by='Year')

In [None]:
filtered_excluded_df = filtered_df[filtered_df['archiveType'] != 'Coral']

variable_counts_by_archive = (
    filtered_excluded_df.groupby(['archiveType'])['ID']
    .nunique()
    .reset_index(name='UniqueIDCount')
)

variable_counts_by_archive = (
    filtered_excluded_df.groupby(['archiveType', 'VariableName'])['ID']
    .nunique()
    .reset_index(name='UniqueIDCount')
)

In [None]:
variable_counts_by_archive = (
    filtered_df.groupby(['archiveType'])['ID']
    .nunique()
    .reset_index(name='UniqueIDCount')
)
depth_counts_by_archive = (
    filtered_df[filtered_df['Depth'].notna()]
    .groupby(['archiveType'])['ID']
    .nunique()
    .reset_index(name='HasDepthCount')
)
combined_counts_by_archive = pd.merge(
    variable_counts_by_archive,
    depth_counts_by_archive,
    on='archiveType',
    how='left'
)
combined_counts_by_archive['HasDepthCount'] = combined_counts_by_archive['HasDepthCount'].fillna(0).astype(int)

In [None]:
def plot_wood_time_series_by_units_with_combined_map(df, lat_min, lat_max, lon_min, lon_max):
    wood_df = df[df['archiveType'] == 'Wood'].copy()
    wood_df['Value'] = pd.to_numeric(wood_df['Value'], errors='coerce')
    wood_df.dropna(subset=['Value'], inplace=True)
    unique_ids = wood_df['ID'].unique()
    num_ids = len(unique_ids)
    color_palette = sns.color_palette("tab20", num_ids)
    id_to_color = {uid: color_palette[i % len(color_palette)] for i, uid in enumerate(unique_ids)}
    id_to_marker = {uid: markers[i % len(markers)] for i, uid in enumerate(unique_ids)}

    for unit, unit_df in wood_df.groupby('Units'):
        plt.figure(figsize=(12, 6))
        for uid in unit_df['ID'].unique():
            ts_data = unit_df[unit_df['ID'] == uid]
            plt.plot(
                ts_data['Year'],
                ts_data['Value'],
                label=f"ID: {uid}",
                color=id_to_color[uid],
                marker=id_to_marker[uid],
                linestyle='-',
                markersize=5,
                markevery=10,
            )
        y_min, y_max = unit_df['Value'].min(), unit_df['Value'].max()
        padding = (y_max - y_min) * 0.2
        plt.ylim(y_min - padding, y_max + padding)

        if unit.lower() == "permil":
            ylabel = "Raw δ¹⁸O (‰)"
        elif unit.lower() == "zscore":
            ylabel = "Normalised δ¹⁸O (z-score)"
        else:
            ylabel = f"δ¹⁸O ({unit})"

        plt.title("Time Series for Wood (δ¹⁸O)", fontsize=14)
        plt.xlabel("Year", fontsize=12)
        plt.ylabel(ylabel, fontsize=12)
        plt.legend(title="Record ID", loc='upper left', bbox_to_anchor=(1, 1))
        plt.grid(True)
        plt.tight_layout()
        plt.show()

    plt.figure(figsize=(12, 8))
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    ax.add_feature(cfeature.LAND, edgecolor="black")
    ax.add_feature(cfeature.COASTLINE)
    ax.gridlines(draw_labels=True)

    for uid in unique_ids:
        rec_df = wood_df[wood_df['ID'] == uid]
        ax.scatter(
            rec_df['Longitude'],
            rec_df['Latitude'],
            color=id_to_color[uid],
            marker=id_to_marker[uid],
            edgecolor="black",
            s=100,
            transform=ccrs.PlateCarree(),
            label=f"ID: {uid}"
        )

    plt.title("Geographic Distribution of Indo-Pacific Wood Records", fontsize=14)
    plt.legend(title="Record ID", loc='upper left', bbox_to_anchor=(1, 1), ncol=2)
    plt.show()

plot_wood_time_series_by_units_with_combined_map(filtered_df, lat_min=-30, lat_max=30, lon_min=30, lon_max=180)

In [None]:
def plot_glacier_ice_timeseries_and_map(df, lat_min, lat_max, lon_min, lon_max):
    glacier_df = df[df['archiveType'] == 'GlacierIce'].copy()
    glacier_df['Value'] = pd.to_numeric(glacier_df['Value'], errors='coerce')
    glacier_df.dropna(subset=['Value'], inplace=True)

    unique_ids = glacier_df['ID'].unique()
    num_ids = len(unique_ids)
    color_palette = sns.color_palette("tab20", num_ids)
    id_to_color = {uid: color_palette[i % len(color_palette)] for i, uid in enumerate(unique_ids)}
    id_to_marker = {uid: markers[i % len(markers)] for i, uid in enumerate(unique_ids)}

    for varname, group in glacier_df.groupby("VariableName"):
        plt.figure(figsize=(12, 5))
        for uid in group['ID'].unique():
            ts_data = group[group['ID'] == uid]
            plt.plot(
                ts_data['Year'],
                ts_data['Value'],
                label=f"ID: {uid}",
                color=id_to_color[uid],
                marker=id_to_marker[uid],
                linestyle='-',
                markersize=5,
                markevery=10,
            )

        y_min, y_max = group['Value'].min(), group['Value'].max()
        padding = (y_max - y_min) * 0.2
        plt.ylim(y_min - padding, y_max + padding)

        if varname.lower() == "d18o":
            ylabel = "Ice δ¹⁸O (‰)"
            title = "Time Series of Glacier Ice δ¹⁸O"
        elif varname.lower() == "d2h":
            ylabel = "Ice δ²H (‰)"
            title = "Time Series of Glacier Ice δ²H"
        else:
            ylabel = f"{varname} (raw values)"
            title = f"Time Series of {varname} in Glacier Ice"

        plt.title(title, fontsize=14)
        plt.xlabel("Year", fontsize=12)
        plt.ylabel(ylabel, fontsize=12)
        plt.legend(title="Record ID", loc='upper left', bbox_to_anchor=(1, 1))
        plt.grid(True)
        plt.tight_layout()
        plt.show()

    plt.figure(figsize=(12, 8))
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    ax.add_feature(cfeature.LAND, edgecolor="black")
    ax.add_feature(cfeature.COASTLINE)
    ax.gridlines(draw_labels=True)

    for uid in unique_ids:
        rec_df = glacier_df[glacier_df['ID'] == uid]
        ax.scatter(
            rec_df['Longitude'],
            rec_df['Latitude'],
            color=id_to_color[uid],
            marker=id_to_marker[uid],
            edgecolor="black",
            s=100,
            transform=ccrs.PlateCarree(),
            label=f"ID: {uid}"
        )

    plt.title("Geographic Distribution of Indo-Pacific Glacier Ice Records", fontsize=14)
    plt.legend(title="Record ID", loc='upper left', bbox_to_anchor=(1, 1), ncol=2)
    plt.show()

plot_glacier_ice_timeseries_and_map(filtered_df, lat_min=-30, lat_max=30, lon_min=30, lon_max=180)

In [None]:
markers = ['o', 's', '^', 'P', 'D', '*', 'X', 'H', 'v', '>', '<', '8', 'p']
color_palette = sns.color_palette("tab20", 20)

def plot_time_series(df_subset, title, ylabel, marker_map, color_map):
    plt.figure(figsize=(12, 5))
    for uid in df_subset['ID'].unique():
        record = df_subset[df_subset['ID'] == uid]
        plt.plot(
            record['Year'],
            record['Value'],
            label=str(uid),
            color=color_map[uid],
            marker=marker_map[uid],
            linestyle='-',
            markersize=5,
            markevery=10
        )
    plt.title(title, fontsize=14)
    plt.xlabel("Year", fontsize=12)
    plt.ylabel(ylabel, fontsize=12)
    plt.grid(True)
    plt.legend(title="ID", bbox_to_anchor=(1, 1))
    plt.tight_layout()
    plt.show()


marine_df = filtered_df[filtered_df['archiveType'] == 'MarineSediment']
measured_ids = [rid for rid in marine_df['ID'].unique() if rid.endswith('a')] + ['MS11KDMS01']
inferred_ids = [rid for rid in marine_df['ID'].unique() if rid.endswith('b')]
d2H_id = 'MS10JTMA001'

measured_df = marine_df[marine_df['ID'].isin(measured_ids)]
inferred_df = marine_df[marine_df['ID'].isin(inferred_ids)]
d2h_df = marine_df[marine_df['ID'] == d2H_id]
map_df = marine_df[marine_df['ID'].isin(measured_ids + inferred_ids + [d2H_id])]

all_ids = measured_ids + inferred_ids + [d2H_id]
id_to_marker = {uid: markers[i % len(markers)] for i, uid in enumerate(all_ids)}
id_to_color = {uid: color_palette[i % len(color_palette)] for i, uid in enumerate(all_ids)}

plot_time_series(measured_df, "Measured δ¹⁸O from Planktonic Foraminifera in Marine Sediment", "δ¹⁸O (‰, VPDB scale)", id_to_marker, id_to_color)
plot_time_series(inferred_df, "Inferred δ¹⁸O of Surface Seawater from Marine Sediment", "δ¹⁸O (‰, VSMOW scale)", id_to_marker, id_to_color)
plot_time_series(d2h_df, "Measured δ²H from Terrestrial Leaf Wax Preserved in Marine Sediment", "δ²H (‰, VSMOW scale)", id_to_marker, id_to_color)

plt.figure(figsize=(12, 8))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent([30, 180, -30, 30], crs=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, edgecolor="black")
ax.add_feature(cfeature.COASTLINE)
ax.gridlines(draw_labels=True)

for uid in map_df['ID'].unique():
    loc = map_df[map_df['ID'] == uid]
    ax.scatter(
        loc['Longitude'],
        loc['Latitude'],
        color=id_to_color[uid],
        marker=id_to_marker[uid],
        edgecolor='black',
        s=100,
        transform=ccrs.PlateCarree(),
        label=uid
    )

plt.title("Geographical Distribution of Indo-Pacific Marine Sediment Records", fontsize=14)
plt.legend(title="ID", loc='upper left', bbox_to_anchor=(1, 1), ncol=2)
plt.show()

In [None]:
markers = ['o', 's', '^', 'P', 'D', '*', 'X', 'H', 'v', '>', '<', '8', 'p']

def plot_lake_sediment_time_series_and_map(df, lat_min, lat_max, lon_min, lon_max):
    lake_sediment = df[df['archiveType'] == 'LakeSediment']
    lake_sediment_d18O = lake_sediment[lake_sediment['VariableName'] == 'd18O']
    lake_sediment_d2H = lake_sediment[lake_sediment['VariableName'] == 'd2H']

    all_ids = lake_sediment['ID'].unique()
    color_palette = sns.color_palette("tab20", len(all_ids))
    id_to_color = {tsid: color_palette[i % len(color_palette)] for i, tsid in enumerate(all_ids)}
    id_to_marker = {tsid: markers[i % len(markers)] for i, tsid in enumerate(all_ids)}

    d18O_upper_range = lake_sediment_d18O['Value'].max() - 30
    d18O_lower_range = 10 - lake_sediment_d18O['Value'].min()
    d18O_max_range = max(d18O_upper_range, d18O_lower_range)

    d2H_upper_range = lake_sediment_d2H['Value'].max() + 10
    d2H_lower_range = abs(-80 - lake_sediment_d2H['Value'].min())
    d2H_max_range = max(d2H_upper_range, d2H_lower_range)

    fig, axes = plt.subplots(nrows=2, figsize=(12, 6), sharex=True)

    for unique_id in lake_sediment_d18O['ID'].unique():
        ts_data = lake_sediment_d18O[lake_sediment_d18O['ID'] == unique_id]
        color = id_to_color[unique_id]
        marker = id_to_marker[unique_id]
        high_values = ts_data[ts_data['Value'] > 30]
        axes[0].plot(high_values['Year'], high_values['Value'], label=f"ID: {unique_id}",
                     color=color, marker=marker, markersize=5, linestyle='-', markevery=10)
        low_values = ts_data[ts_data['Value'] < 10]
        axes[1].plot(low_values['Year'], low_values['Value'], label=f"ID: {unique_id}",
                     color=color, marker=marker, markersize=5, linestyle='-', markevery=10)

    axes[0].set_ylim(30, 30 + d18O_max_range)
    axes[1].set_ylim(10 - d18O_max_range, 10)
    axes[0].set_ylabel("δ¹⁸O (‰, Diatom Silica)", fontsize=12)
    axes[1].set_ylabel("δ¹⁸O (‰, Carbonates)", fontsize=12)
    axes[1].set_xlabel("Year")
    axes[0].set_title("Time Series for LakeSediment (δ¹⁸O)", fontsize=14)
    axes[0].legend(title="ID", loc='upper left', bbox_to_anchor=(1, 1))
    axes[0].grid(True)
    axes[1].grid(True)
    plt.tight_layout()
    plt.show()

    fig, axes = plt.subplots(nrows=2, figsize=(12, 6), sharex=True)

    for unique_id in lake_sediment_d2H['ID'].unique():
        ts_data = lake_sediment_d2H[lake_sediment_d2H['ID'] == unique_id]
        color = id_to_color[unique_id]
        marker = id_to_marker[unique_id]

        high_values = ts_data[ts_data['Value'] > -10]
        axes[0].plot(high_values['Year'], high_values['Value'], label=f"ID: {unique_id}",
                     color=color, marker=marker, markersize=5, linestyle='-', markevery=10)

        low_values = ts_data[ts_data['Value'] < -80]
        axes[1].plot(low_values['Year'], low_values['Value'], label=f"ID: {unique_id}",
                     color=color, marker=marker, markersize=5, linestyle='-', markevery=10)

    axes[0].set_ylim(-10, -10 + d2H_max_range)
    axes[1].set_ylim(-80 - d2H_max_range, -80)

    axes[0].set_ylabel("δ²H (‰, Aquatic)", fontsize=12)
    axes[1].set_ylabel("δ²H (‰, Leaf wax)", fontsize=12)
    axes[1].set_xlabel("Year")
    axes[0].set_title("Time Series for LakeSediment (δ²H)", fontsize=14)
    axes[0].legend(title="ID", loc='upper left', bbox_to_anchor=(1, 1))
    axes[0].grid(True)
    axes[1].grid(True)
    plt.tight_layout()
    plt.show()

    # --- Combined Map Plot for LakeSediment (d18O & d2H) ---
    plt.figure(figsize=(12, 6))
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    ax.add_feature(cfeature.LAND, edgecolor="black")
    ax.add_feature(cfeature.COASTLINE)
    ax.gridlines(draw_labels=True)

    for unique_id in all_ids:
        archive_data = lake_sediment[lake_sediment['ID'] == unique_id]
        ax.scatter(
            archive_data['Longitude'], archive_data['Latitude'],
            color=id_to_color[unique_id], marker=id_to_marker[unique_id],
            edgecolor="black", s=100, transform=ccrs.PlateCarree(),
            label=f"ID: {unique_id}"
        )

    plt.title("Geographical Distribution of Indo-Pacific LakeSediment Records")
    plt.legend(title="ID", loc='upper left', bbox_to_anchor=(1, 1), ncol=2)
    plt.show()

plot_lake_sediment_time_series_and_map(filtered_df, lat_min=-30, lat_max=30, lon_min=30, lon_max=180)

In [None]:
markers = ['o', 's', '^', 'P', 'D', '*', 'X', 'H', 'v', '>', '<', '8', 'p']

def plot_speleothem_time_series_and_map(df, lat_min, lat_max, lon_min, lon_max):
    speleothem_df = df[df['archiveType'] == 'Speleothem']
    num_records = speleothem_df['ID'].nunique()
    color_palette = sns.color_palette("tab20", num_records)

    id_to_color = {tsid: color_palette[i % len(color_palette)] for i, tsid in enumerate(speleothem_df['ID'].unique())}
    id_to_marker = {tsid: markers[i % len(markers)] for i, tsid in enumerate(speleothem_df['ID'].unique())}
    variable_groups = speleothem_df.groupby('VariableName')

    for variable, group in variable_groups:
        plt.figure(figsize=(12, 5))

        for unique_id in group['ID'].unique():
            ts_data = group[group['ID'] == unique_id]
            plt.plot(
                ts_data['Year'],
                ts_data['Value'],
                label=f"ID: {unique_id}",
                color=id_to_color[unique_id],
                marker=id_to_marker[unique_id],
                markersize=5,
                linestyle='-',
                markevery=10,
            )

        y_min, y_max = group['Value'].min(), group['Value'].max()
        padding = (y_max - y_min) * 0.2
        plt.ylim(y_min - padding, y_max + padding)

        plt.title(f"Time Series for Speleothem (δ¹⁸O)", fontsize=14)
        plt.xlabel("Year", fontsize=12)
        plt.ylabel("δ¹⁸O (‰, Carbonate)", fontsize=12)
        plt.legend(title="ID", loc='upper left', bbox_to_anchor=(1, 1))
        plt.grid(True)
        plt.tight_layout()
        plt.show()

    plt.figure(figsize=(12, 8))
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    ax.add_feature(cfeature.LAND, edgecolor="black")
    ax.add_feature(cfeature.COASTLINE)
    ax.gridlines(draw_labels=True)

    for unique_id in speleothem_df['ID'].unique():
        archive_data = speleothem_df[speleothem_df['ID'] == unique_id]
        ax.scatter(
            archive_data['Longitude'],
            archive_data['Latitude'],
            color=id_to_color[unique_id],
            marker=id_to_marker[unique_id],
            edgecolor="black",
            s=100,
            transform=ccrs.PlateCarree(),
            label=f"ID: {unique_id}"
        )

    plt.title("Geographical Distribution of Indo-Pacific Speleothem Records")
    plt.legend(title="ID", loc='upper left', bbox_to_anchor=(1, 1), ncol=2)
    plt.show()

plot_speleothem_time_series_and_map(filtered_df, lat_min=-30, lat_max=30, lon_min=30, lon_max=180)

In [None]:
def plot_terrestrial_sediment_records(df, lat_min, lat_max, lon_min, lon_max):
    ts_df = df[df['archiveType'] == 'TerrestrialSediment']

    if ts_df.empty:
        print("No terrestrial sediment records found in the dataset.")
        return

    variable_groups = ts_df.groupby('VariableName')
    unique_ids = ts_df['ID'].unique()
    color_palette = sns.color_palette("tab20", len(unique_ids))
    markers = ['o', 's', '^', 'P', 'D', '*', 'X', 'H', 'v', '>', '<', '8', 'p']
    id_to_color = {uid: color_palette[i % len(color_palette)] for i, uid in enumerate(unique_ids)}
    id_to_marker = {uid: markers[i % len(markers)] for i, uid in enumerate(unique_ids)}

    for variable, group in variable_groups:
        plt.figure(figsize=(12, 5))

        for uid in group['ID'].unique():
            ts_data = group[group['ID'] == uid]
            plt.plot(
                ts_data['Year'],
                ts_data['Value'],
                label=f"ID: {uid}",
                color=id_to_color[uid],
                marker=id_to_marker[uid],
                markersize=5,
                linestyle='-',
                markevery=10
            )

        y_min, y_max = group['Value'].min(), group['Value'].max()
        padding = (y_max - y_min) * 0.1
        plt.ylim(y_min - padding, y_max + padding)

        plt.title(f"Time Series for Terrestrial Sediment (δ²H)", fontsize=14)
        plt.xlabel("Year", fontsize=12)
        plt.ylabel(f"δ²H (‰)", fontsize=12)
        plt.legend(title="ID", loc='upper left', bbox_to_anchor=(1, 1))
        plt.grid(True)
        plt.tight_layout()
        plt.show()

    # Plot map
    plt.figure(figsize=(12, 8))
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    ax.add_feature(cfeature.LAND, edgecolor="black")
    ax.add_feature(cfeature.COASTLINE)
    ax.gridlines(draw_labels=True)

    for uid in unique_ids:
        record = ts_df[ts_df['ID'] == uid]
        ax.scatter(
            record['Longitude'],
            record['Latitude'],
            color=id_to_color[uid],
            marker=id_to_marker[uid],
            edgecolor='black',
            s=100,
            transform=ccrs.PlateCarree(),
            label=f"ID: {uid}"
        )

    plt.title("Geographical Distribution of Terrestrial Sediment Records")
    plt.legend(title="ID", loc='upper left', bbox_to_anchor=(1, 1), ncol=2)
    plt.show()

plot_terrestrial_sediment_records(filtered_df, lat_min=-30, lat_max=30, lon_min=30, lon_max=180)

## CoralHydro2k

In [None]:
# Load the CoralHydro2k data
with open("CoralHydro2k_Python.pkl", "rb") as file:
    proxies_all = pickle.load(file)["D"]

all_TS = lipd.extractTs(proxies_all)

# Filter for coral δ¹⁸O records
filtered_TS = lipd.filterTs(all_TS, "paleoData_variableName == d18O_sw")

# Filter time series based on latitude and longitude for the Indo-Pacific region
coral_swd18o_TS = [
    ts for ts in filtered_TS
    if 30 <= ts.get("geo_meanLon", 0) <= 180 and -30 <= ts.get("geo_meanLat", 0) <= 30
]

In [None]:
markers = ['o', 's', '^', 'P', 'D', '*', 'X', 'H', 'v', '>', '<', '8', 'p']
color_palette = sns.color_palette("tab20", len(coral_swd18o_TS))

id_to_color = {record['dataSetName']: color_palette[i % len(color_palette)] for i, record in enumerate(coral_swd18o_TS)}
id_to_marker = {record['dataSetName']: markers[i % len(markers)] for i, record in enumerate(coral_swd18o_TS)}

all_min_values = []
all_max_values = []

fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True, gridspec_kw={'width_ratios': [1, 2]})

for i, record in enumerate(coral_swd18o_TS):
    years = record.get('year', [])
    values = record.get('paleoData_values', [])
    label = record.get('dataSetName', f"Record {i+1}")

    if not years or not values:
        continue

    try:
        years = np.array(years, dtype=float)
        values = np.array(values, dtype=float)
    except ValueError:
        print(f"Skipping record {label} due to non-numeric values.")
        continue

    all_min_values.append(np.nanmin(values))
    all_max_values.append(np.nanmax(values))

    color = id_to_color[record['dataSetName']]
    marker = id_to_marker[record['dataSetName']]

    mask1 = years <= 400
    mask2 = years >= 1750

    if np.any(mask1):
        axes[0].plot(years[mask1], values[mask1], label=label, color=color, marker=marker,
                     markersize=5, linestyle='-', markevery=10)
    if np.any(mask2):
        axes[1].plot(years[mask2], values[mask2], label=label, color=color, marker=marker,
                     markersize=5, linestyle='-', markevery=10)

y_min = min(all_min_values)
y_max = max(all_max_values)
y_range = y_max - y_min
padding = 0.1 * y_range

axes[0].set_ylim(y_min - padding, y_max + padding)
axes[1].set_ylim(y_min - padding, y_max + padding)
axes[0].set_title("Time Series for Coral δ¹⁸Osw (0-500y)", fontsize=14)
axes[1].set_title("Time Series for Coral δ¹⁸Osw (from 1750 onwards)", fontsize=14)

for ax in axes:
    ax.set_xlabel("Year", fontsize=12)
    ax.set_ylabel("δ¹⁸Osw (‰)", fontsize=12)
    ax.grid(True)

axes[0].set_xlim(0, 400)
axes[1].set_xlim(1750, max(years))
axes[1].tick_params(labelleft=True)
axes[1].legend(title="Record ID", loc='upper left', bbox_to_anchor=(1, 1))

plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 6))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent([30, 180, -30, 30], crs=ccrs.PlateCarree())


ax.add_feature(cfeature.LAND, edgecolor="black")
ax.add_feature(cfeature.COASTLINE)
ax.gridlines(draw_labels=True)

lats = [ts.get("geo_meanLat") for ts in coral_swd18o_TS]
lons = [ts.get("geo_meanLon") for ts in coral_swd18o_TS]

for i, record in enumerate(coral_swd18o_TS):
    label = record.get('dataSetName', f"Record {i+1}")
    color = id_to_color[record['dataSetName']]
    marker = id_to_marker[record['dataSetName']]


    ax.scatter(
        record["geo_meanLon"], record["geo_meanLat"],
        color=color, marker=marker,
        edgecolor="black", s=100,
        transform=ccrs.PlateCarree(),
        label=label
    )

plt.title("Geographical Distribution of Indo-Pacific Coral Records")
plt.legend(title="Record ID", loc='upper left', bbox_to_anchor=(1, 1), ncol=2)
plt.show()

In [None]:
coral_swd18o_records = []

for record in coral_swd18o_TS:
    if 'paleoData_values' in record and 'year' in record:
        df_record = pd.DataFrame({
            "Year": record['year'],
            "Value": record['paleoData_values'],
            "Longitude": record.get('geo_meanLon', np.nan),
            "Latitude": record.get('geo_meanLat', np.nan),
            "archiveType": record.get('archiveType', 'Unknown'),
            "ID": record.get('dataSetName', 'Unknown'),
            "VariableName": record.get('paleoData_variableName', 'Unknown')
        })

        coral_swd18o_records.append(df_record)

coral_swd18o_df = pd.concat(coral_swd18o_records, ignore_index=True)
coral_swd18o_df['Value'] = pd.to_numeric(coral_swd18o_df['Value'], errors='coerce')
coral_swd18o_df.dropna(subset=['Value'], inplace=True)
coral_swd18o_df['Year'] = pd.to_numeric(coral_swd18o_df['Year'], errors='coerce')
coral_swd18o_df.dropna(subset=['Year'], inplace=True)
coral_swd18o_df = coral_swd18o_df.sort_values(by='Year')

In [None]:
common_columns = ['Year', 'Value', 'Longitude', 'Latitude', 'archiveType', 'ID', 'VariableName', 'Depth']

coral_swd18o_df['Depth'] = np.nan

# Filter out rows with archiveType 'Coral' or 'TerrestrialSediment' in filtered_df
filtered_df = filtered_df[~filtered_df['archiveType'].isin(['Coral', 'TerrestrialSediment'])]

# List of IDs to discard
ids_to_discard = [
    'MS11ANMN01a', 'MS09DOIP01a', 'MS11ANMS01a', 'MS07LSSP01a',
    'MS10BLMS01a', 'MS14GFSS01a', 'MS07LSPG01a', 'MS13RSAS01a', 'LS09SASP01'
]

filtered_df = filtered_df[~filtered_df['ID'].isin(ids_to_discard)]

# Remove the extreme outlier from MS09DOIP01b at year 1198.55
filtered_df = filtered_df[~((filtered_df['ID'] == 'MS09DOIP01b') & (filtered_df['Year'] == 1198.55))]

merged_df = pd.concat([coral_swd18o_df[common_columns], filtered_df[common_columns]], ignore_index=True)
merged_df['Depth'] = pd.to_numeric(merged_df['Depth'], errors='coerce')

In [None]:
merged_df = merged_df.sort_values(by=['ID', 'Year'])

def sort_within_group(group):
    if group['Depth'].notna().sum() > 0:
        return group.sort_values(by='Depth')
    else:
        return group.sort_values(by='Year')

merged_df = merged_df.groupby('ID', group_keys=False).apply(sort_within_group).reset_index(drop=True)

def calculate_thickness(group):
    if group['Depth'].notna().sum() > 0:
        thickness = group['Depth'].diff().bfill()
    else:
        thickness = group['Year'].diff().bfill()
    group['Thickness'] = thickness
    return group

merged_df = merged_df.groupby('ID').apply(calculate_thickness).reset_index(drop=True)
merged_df['Thickness'] = merged_df['Thickness'].fillna(1)