# ENSO file to load to SQL database

load necessary packages

In [None]:
import sqlite3
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle as pkl

load necessary dataset

In [None]:
url = "https://www.psl.noaa.gov/enso/mei/data/meiv2.data"
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Save the data to a file
    with open('../../Data/meiv2.data', 'w') as file:
        file.write(response.text)
else:
    print("Failed to retrieve data")

In [None]:

# Read the data into a DataFrame, delim tab, no header, last lines delete, skip first line
enso_mei = pd.read_csv('../../Data/meiv2.data', delim_whitespace=True, header=None, skipfooter=3, engine='python', skiprows=1)
#Set first column as index as year, remove 0 column index
enso_mei = enso_mei.set_index(0)
enso_mei.index.name = None
#Set column names as bi monthly periods (Dec/Jan, Jan/Feb, etc)
enso_mei.columns = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
#Remove last row and all -999.00 values
enso_mei = enso_mei.iloc[:-1]
enso_mei = enso_mei.replace(-999.00, None)
#Row index to YYYY format
enso_mei.index = pd.to_datetime(enso_mei.index, format='%Y')
display(enso_mei.tail())

In [None]:
#Reformat data to have one column for year, one column for month, and one column for MEI value
#Create empty dataframe
enso_mei_long = pd.DataFrame()
#Create empty list for year, month, and mei values
year = []
month = []
mei = []
#Loop through each row in the dataframe
for index, row in enso_mei.iterrows():
    #Loop through each column in the dataframe
    for column in enso_mei.columns:
        #Append the year, month, and mei value to the corresponding list
        year.append(index.year)
        month.append(column)
        mei.append(row[column])
#Add the lists to the dataframe
enso_mei_long['Year'] = year
enso_mei_long['Month'] = month
enso_mei_long['MEI'] = mei
#Set the index as the year and month
enso_mei_long = enso_mei_long.set_index(['Year', 'Month'])
display(enso_mei_long.head(20))

#Save CSV file
#enso_mei_long.to_csv('enso_mei_long.csv')

In [None]:
from datetime import datetime

#Add Date column %Y%m (YYYYMM)
enso_mei_long.reset_index(inplace=True)
#Transform month column to number (Jan to 01)

enso_mei_long['Month_num'] = enso_mei_long['Month'].apply(lambda x: datetime.strptime(x, '%b').strftime('%m'))
#get month num 01, 02, 03, 04 from 1, 2, 3, 4
enso_mei_long['Date'] = (enso_mei_long['Year'].astype(str) + enso_mei_long['Month_num'].astype(str)).astype(int)
enso_mei_long

In [None]:
#Normalize MEI values, Wich is actually not needed because it is supposed to be normalized already
enso_mei_long['MEI_norm'] = (enso_mei_long['MEI'] - enso_mei_long['MEI'].mean()) / enso_mei_long['MEI'].std()

## Open connection SQL

In [None]:
db_path = "../../Database/thesis_database.db"
conn = sqlite3.connect(db_path)

In [None]:
enso_mei_long.to_sql('enso_mei', conn, if_exists='replace', index=False)

## Close connection SQL

In [None]:
conn.close()


## Store to pickle

In [None]:
#store enso to pickle

with open("../../Data/pickle/enso_mei_long.pkl", "wb") as f:
    pkl.dump(enso_mei_long, f)
