In [4]:
import numpy as np
import pandas as pd
import geopandas as gpd
from scipy.interpolate import griddata
from bs4 import BeautifulSoup
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from matplotlib import cm
from scipy.stats import norm, lognorm
import xarray as xr
import requests


FIGURE_PATH = "C:/Users/vitas/Desktop/LE PINN/pinn.global.dust/pinn.global.dust/Code/figures/"
DATA_PATH = "C:/Users/vitas/Desktop/LE PINN/pinn.global.dust/pinn.global.dust/Data/"
DATA_LOAD_PATH = DATA_PATH + "original_data/"
INPUT_MODEL_PATH = DATA_PATH + "processed_data/"
MODEL_SAVE_PATH = DATA_PATH + "trained_models/"
RESULTS_PATH = DATA_PATH + "model_results/"
path_to_shapefile = "C:/Users/vitas/Desktop/LE PINN/pinn.global.dust/pinn.global.dust/ne_110m_admin_0_countries.shp"
world = gpd.read_file(path_to_shapefile)

df_pinn = pd.read_csv(RESULTS_PATH + "df_pinn_simulated_Holocene.csv")
df_simulated= pd.read_csv(INPUT_MODEL_PATH + "df_simulation_Holocene.csv")
#df_kriging= pd.read_csv(INPUT_MODEL_PATH + "df_kriging_Holocene.csv")




In [5]:
class Preprocess:
    def __init__(self, df, time_label=None):
        '''
        Create a preprocess class for the given dataframe.
        The time label correspond to the climate period
        with the label 1 corresponding to the Holocene and
        the label 2 to the Last Glacial Maximum.

        '''
        self.df = df.copy()
        self.time = time_label

    def preprocess_kriging(self):
        '''Clean up the dataset of kriging interpolation.'''
        # Select the correct period
        self.df.reset_index(inplace=True)
        self.df = self.df.groupby(['period', 'lat', 'lon']).mean()
        self.df.reset_index(inplace=True)
        self.df = self.df[self.df['period'] == self.time]
        # rename data headers
        self.df.rename(columns={"Flux": "dep"}, inplace=True)
        # Convert data from kg/m^2/s to g/m^2/a
        self.df["dep"] = self.df["dep"].values * 1000 * 365 * 24 * 60 * 60
        # Change longitude range to (-180, 180)
        self.df.loc[self.df['lon'] > 180, 'lon'] = self.df['lon'] - 360

    def normalize_data(self):
        '''Normalize data based on z-score.'''
        self.df['log_dep'] = np.log10(self.df['dep'])
        self.df['log_dep_norm'] = StandardScaler().fit_transform(np.array(self.df['log_dep']).reshape(-1, 1))



with open("functions_plot.py", 'r') as file:
    content = file.read()

# Execute the content of the .py file
exec(content)
## Preprocess the datasets

url_search = "https://doi.pangaea.de/10.1594/PANGAEA.847983"

# get the html code
response = requests.get(url_search)
soup = BeautifulSoup(response.text)

# Find the anchor tag with the download link
download_link = soup.find('a', id='static-download-link')

# Extract the URL from the href attribute
url = download_link['href']

# Send a GET request to the URL
response = requests.get(url)

df_kriging_raw = xr.open_dataset(response.content).to_dataframe()
preprocessor_kriging_Holocene = Preprocess(df_kriging_raw, 1)
preprocessor_kriging_Holocene.preprocess_kriging()
preprocessor_kriging_Holocene.normalize_data()
df_kriging_Holocene = preprocessor_kriging_Holocene.df


print("Shape Holocene:", df_kriging_Holocene.shape)
print("Missing values Holocene:\n", df_kriging_Holocene[['lat', 'lon', 'log_dep']].isna().sum())
print("Preview Holocene:\n", df_kriging_Holocene[['lat', 'lon', 'log_dep']].dropna().head())
## Visualise the datasets


## Export the processed data
with open(INPUT_MODEL_PATH + 'df_kriging_Holocene.csv', 'w') as f:
    df_kriging_Holocene.to_csv(f, index=False)



Shape Holocene: (8192, 7)
Missing values Holocene:
 lat        0
lon        0
log_dep    0
dtype: int64
Preview Holocene:
        lat      lon   log_dep
0 -87.8638   0.0000 -3.220816
1 -87.8638   2.8125 -3.228133
2 -87.8638   5.6250 -3.228456
3 -87.8638   8.4375 -3.242660
4 -87.8638  11.2500 -3.244719


In [6]:
# Coordinate e valori simulati
coords_sim = df_simulated[['lon', 'lat']].values
values_sim = df_simulated['log_dep_norm'].values

# Coordinate e valori pinn
coords_pinn = df_pinn[['lon', 'lat']].values
values_pinn = df_pinn['PINN_log_dep'].values

# Coordinate e valori kriging
coords_kriging = df_kriging_Holocene[['lon', 'lat']].values
values_kriging = df_kriging_Holocene['log_dep_norm'].values

# Interpola i valori PINN nelle coordinate dei dati simulati
values_pinn_interp = griddata(coords_pinn, values_pinn, coords_sim, method='nearest')

# Interpola i valori Kriging nelle coordinate dei dati simulati
values_kriging_interp = griddata(coords_kriging, values_kriging, coords_sim, method='nearest')

In [7]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Errore PINN vs dati simulati
mse_pinn = mean_squared_error(values_sim, values_pinn_interp)
rmse_pinn = np.sqrt(mse_pinn)
mae_pinn = mean_absolute_error(values_sim, values_pinn_interp)

# Errore Kriging vs dati simulati
mse_kriging = mean_squared_error(values_sim, values_kriging_interp)
rmse_kriging = np.sqrt(mse_kriging)
mae_kriging = mean_absolute_error(values_sim, values_kriging_interp)

print("Errore PINN:")
print(f"MSE: {mse_pinn}, RMSE: {rmse_pinn}, MAE: {mae_pinn}")

print("Errore Kriging:")
print(f"MSE: {mse_kriging}, RMSE: {rmse_kriging}, MAE: {mae_kriging}")


Errore PINN:
MSE: 3.2906948993119864, RMSE: 1.8140272598039937, MAE: 1.5472675438541847
Errore Kriging:
MSE: 0.4140805453705464, RMSE: 0.6434909054295533, MAE: 0.5174693481601811
