In [15]:
import pandas as pd
from datetime import datetime, timedelta

# Define column names
col_names = ['col1', 'col2', 'date', 'col4', 'col5', 'col6', 'col7', 'col8', 'col9', 'col10', 'value', 'col12', 'col13', 'col14']

# Source of data NASA
# https://climate.nasa.gov/vital-signs/sea-level/

# Read the file into a DataFrame, skipping the first 50 lines
df = pd.read_csv('GMSL_TPJAOS_5.1_199209_202307.txt', delim_whitespace=True, skiprows=50, names=col_names)

# Function to parse fractional year
def parse_fractional_year(fractional_year):
    year = int(fractional_year)
    remainder = fractional_year - year
    base_date = datetime(year, 1, 1)
    result_date = base_date + timedelta(seconds=(base_date.replace(year=base_date.year + 1) - base_date).total_seconds() * remainder)
    return result_date

# Apply the function to the 'date' column
df['date'] = df['date'].apply(parse_fractional_year)

# Parse 'value' as a float
# Add value at 1992-01-01 to each value
# Divide each value by 1000 to get rise in meters
df['value'] = df['value'].astype(float).apply(lambda x: (x + 38.56) / 1000 )

# Keep only columns 'date' and 'value'
df = df[['date', 'value']]

print(df)

                           date    value
0    1993-01-05 04:58:03.936000 -0.00022
1    1993-01-15 02:56:30.912003 -0.00124
2    1993-01-25 00:54:57.887998 -0.00108
3    1993-02-03 22:53:56.399997 -0.00111
4    1993-02-13 20:52:23.375999 -0.00019
...                         ...      ...
1119 2023-06-02 17:31:32.304001  0.10347
1120 2023-06-12 15:29:59.279996  0.10409
1121 2023-06-22 13:28:26.255999  0.10457
1122 2023-07-02 11:26:53.232001  0.10491
1123 2023-07-12 09:25:20.207997  0.10514

[1124 rows x 2 columns]


In [16]:
# Create a new column 'year' for grouping
df['year'] = df['date'].dt.year.astype(str)

# Group by 'year' and calculate mean of 'value'
df_mean = df.groupby('year')['value'].mean().reset_index()

print(df_mean)

    year     value
0   1993  0.004032
1   1994  0.008008
2   1995  0.011184
3   1996  0.013254
4   1997  0.017289
5   1998  0.014912
6   1999  0.018524
7   2000  0.021773
8   2001  0.027346
9   2002  0.031483
10  2003  0.034851
11  2004  0.036811
12  2005  0.041605
13  2006  0.042957
14  2007  0.043664
15  2008  0.045843
16  2009  0.050947
17  2010  0.052251
18  2011  0.051597
19  2012  0.062485
20  2013  0.064906
21  2014  0.068561
22  2015  0.079547
23  2016  0.081713
24  2017  0.082859
25  2018  0.087085
26  2019  0.094070
27  2020  0.095107
28  2021  0.099425
29  2022  0.101945
30  2023  0.103037


In [17]:
# Convert DataFrame to list of lists
data_list = df_mean.values.tolist()

# Split data into x and y
x_values = [item[0] for item in data_list]
y_values = [item[1] for item in data_list]

print(x_values)
print(y_values)

['1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023']
[0.004031891891891895, 0.00800756756756757, 0.011183513513513516, 0.013254444444444447, 0.017289189189189192, 0.014911621621621624, 0.018523783783783786, 0.021772702702702703, 0.027346216216216218, 0.031483333333333335, 0.03485108108108108, 0.036810810810810814, 0.0416045945945946, 0.04295694444444445, 0.04366351351351352, 0.04584277777777778, 0.0509472972972973, 0.052250810810810816, 0.05159675675675676, 0.062485405405405404, 0.06490648648648649, 0.06856083333333333, 0.0795472972972973, 0.08171324324324325, 0.08285891891891892, 0.08708486486486486, 0.09407027027027026, 0.09510666666666667, 0.09942513513513514, 0.1019454054054054, 0.103037]


In [18]:
import datetime

last_year = int(x_values[-1])
last_value = y_values[-1]

data = {
   'timestamp': datetime.datetime.now().isoformat(),
   'sealevels': y_values, 
   'last_sealevel': last_value,
   'years': x_values,
   'forecast_years': [str(year) for year in range(last_year+1, 2101)],
   # Source: https://sealevel.nasa.gov/ipcc-ar6-sea-level-projection-tool?psmsl_id=32&data_layer=scenario&boxinfo=true
   'best_case': 0.42,
   'worst_case': 0.8
}

In [19]:
import json

with open('../data/sealevels.json', 'w', encoding='utf-8') as f:
    json.dump(data, f, indent=2, allow_nan=False)
        