# ENSO file to load to SQL database

load necessary packages

In [25]:
import sqlite3
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

load necessary dataset

In [None]:
url = "https://www.psl.noaa.gov/enso/mei/data/meiv2.data"
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Save the data to a file
    with open('../Data/meiv2.data', 'w') as file:
        file.write(response.text)
else:
    print("Failed to retrieve data")

In [30]:

# Read the data into a DataFrame, delim tab, no header, last lines delete, skip first line
enso_mei = pd.read_csv('../../Data/meiv2.data', delim_whitespace=True, header=None, skipfooter=3, engine='python', skiprows=1)
#Set first column as index as year, remove 0 column index
enso_mei = enso_mei.set_index(0)
enso_mei.index.name = None
#Set column names as bi monthly periods (Dec/Jan, Jan/Feb, etc)
enso_mei.columns = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
#Remove last row and all -999.00 values
enso_mei = enso_mei.iloc[:-1]
enso_mei = enso_mei.replace(-999.00, None)
#Row index to YYYY format
enso_mei.index = pd.to_datetime(enso_mei.index, format='%Y')
display(enso_mei.tail())

Unnamed: 0,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
2019-01-01,0.04,0.46,0.71,0.29,0.2,0.28,0.2,0.26,0.13,0.22,0.44,0.4
2020-01-01,0.22,0.13,0.09,-0.14,-0.36,-0.87,-0.88,-0.94,-1.21,-1.19,-1.14,-1.16
2021-01-01,-1.18,-0.93,-0.81,-1.04,-1.16,-1.29,-1.53,-1.35,-1.45,-1.5,-1.43,-1.19
2022-01-01,-1.06,-0.96,-1.28,-1.76,-1.88,-2.07,-2.1,-1.79,-1.76,-1.71,-1.48,-1.25
2023-01-01,-1.11,-0.81,-0.67,-0.41,-0.14,0.23,0.25,0.43,0.59,,,


In [31]:
#Reformat data to have one column for year, one column for month, and one column for MEI value
#Create empty dataframe
enso_mei_long = pd.DataFrame()
#Create empty list for year, month, and mei values
year = []
month = []
mei = []
#Loop through each row in the dataframe
for index, row in enso_mei.iterrows():
    #Loop through each column in the dataframe
    for column in enso_mei.columns:
        #Append the year, month, and mei value to the corresponding list
        year.append(index.year)
        month.append(column)
        mei.append(row[column])
#Add the lists to the dataframe
enso_mei_long['Year'] = year
enso_mei_long['Month'] = month
enso_mei_long['MEI'] = mei
#Set the index as the year and month
enso_mei_long = enso_mei_long.set_index(['Year', 'Month'])
display(enso_mei_long.head(20))

#Save CSV file
#enso_mei_long.to_csv('enso_mei_long.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,MEI
Year,Month,Unnamed: 2_level_1
1979,Jan,0.47
1979,Feb,0.27
1979,Mar,-0.04
1979,Apr,0.26
1979,May,0.35
1979,Jun,-0.14
1979,Jul,-0.13
1979,Aug,0.5
1979,Sep,0.42
1979,Oct,0.28


In [35]:
#Add Date column %Y%m (YYYYMM)
enso_mei_long.reset_index(inplace=True)
#Transform month column to number (Jan to 01)
enso_mei_long['Month'] = pd.to_datetime(enso_mei_long['Month'], format='%b').dt.month
enso_mei_long['Date'] = enso_mei_long['Year'].astype(str) + enso_mei_long['Month'].astype(str)

enso_mei_long['Date'] = enso_mei_long['Year'].astype(str) + enso_mei_long['Month'].astype(str)
enso_mei_long.head()

Unnamed: 0,Year,Month,MEI,Date
0,1979,Jan,0.47,1979Jan
1,1979,Feb,0.27,1979Feb
2,1979,Mar,-0.04,1979Mar
3,1979,Apr,0.26,1979Apr
4,1979,May,0.35,1979May


## Open connection SQL

In [None]:
db_path = "../Database/thesis_database.db"
conn = sqlite3.connect(db_path)

### Transform dataset to necessary

## Close connection SQL

In [None]:
conn.close()