In [None]:
from datetime import date
import requests
import pandas as pd
from dotenv import load_dotenv
import os
import sqlite3
import urllib3
from bs4 import BeautifulSoup
import re

In [None]:
load_dotenv()

In [None]:
connection = sqlite3.connect(os.getenv("EL_NINO_DB_URL"))

In [None]:
url_weekly_sst = "https://www.cpc.ncep.noaa.gov/data/indices/wksst9120.for"
request_weekly_sst = requests.get(url_weekly_sst)

In [None]:
lines_weekly_sst = request_weekly_sst.text.splitlines()

In [None]:
def parse_weekly_sst_data_line(line: str) -> dict:    
    line_trimmed = line.strip()
    return {
        'event_calendar_date': pd.to_datetime(line_trimmed[:9], format='%d%b%Y'),
        'nino12_sst': float(line_trimmed[14:18]),
        'nino12_var': float(line_trimmed[18:22]),
        'nino3_sst': float(line_trimmed[27:31]),
        'nino3_var': float(line_trimmed[31:35]),
        'nino34_sst': float(line_trimmed[40:44]),
        'nino34_var': float(line_trimmed[44:48]),
        'nino4_sst': float(line_trimmed[53:57]),
        'nino4_var': float(line_trimmed[57:61]),
    }

In [None]:
weekly_sst_df = pd.DataFrame([parse_weekly_sst_data_line(line) for line in lines_weekly_sst[4:]])

In [None]:
weekly_sst_df.to_sql('sst_weekly', con=connection, if_exists='append', index=False)


In [None]:
url_eq_temp_month =  'https://www.cpc.ncep.noaa.gov/products/analysis_monitoring/ocean/index/heat_content_index.txt'
eq_temp_month_request = requests.get(url_eq_temp_month)

In [None]:
lines_eq_temp_month = eq_temp_month_request.text.splitlines()


In [None]:
lines_eq_temp_month[:5]

In [None]:
def parse_monthly_equatorial_temperature_average(line: str) -> dict:
    tokens = line.strip().split()
    return {
        'event_calendar_date': date(int(tokens[0]), int(tokens[1]), 1),
        'e130_w80': float(tokens[2]),
        'e160_w80': float(tokens[3]),
        'w180_w100': float(tokens[4]),
    }

In [None]:
eq_temp_month_df = pd.DataFrame([parse_monthly_equatorial_temperature_average(line) for line in lines_eq_temp_month[2:]])

In [None]:
eq_temp_month_df.to_sql('equatorial_average_temperature_monthly', con=connection, if_exists='append', index=False)

In [None]:
url_monthly_oni = 'https://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt'
monthly_oni_request = requests.get(url_monthly_oni)
lines_monthly_oni = monthly_oni_request.text.splitlines()

In [None]:
def parse_monthly_ony(line: str) -> dict:
    month_dictionary = {'DJF': 1,
                        'JFM': 2,
                        'FMA': 3,
                        'MAM': 4,
                        'AMJ': 5,
                        'MJJ': 6,
                        'JJA': 7,
                        'JAS': 8,
                        'ASO': 9,
                        'SON': 10,
                        'OND': 11,
                        'NDJ': 12}


    tokens = line.strip().split()
    return {
        'event_calendar_date': date(int(tokens[1]), month_dictionary[tokens[0]], 1),
        'total_oni': float(tokens[2]),
        'anomaly_oni': float(tokens[3])
    }

In [None]:
monthly_oni_df = pd.DataFrame([parse_monthly_ony(line) for line in lines_monthly_oni[1:]])


In [None]:
monthly_oni_df.to_sql('oni_monthly', con=connection, if_exists='append', index=False)


In [None]:
http = urllib3.PoolManager()
url = 'https://psl.noaa.gov/enso/past_events.html'
response = http.request('GET', url)
soup = BeautifulSoup(response.data)

categories = soup.select('div.text-center.col-md-4.col-sm-4.col-xs-4')

years_categories = []
for k in categories:
    tokens = [token for token in re.split('[\t><br/\n]', str(k)) if (len(token) == 4 or len(token) == 9) and all([ch.isdigit() or ch=='-' for ch in token])]
    extended_tokens = [[int(token)] if len(token) == 4 else range(int(token[:4]),int(token[5:]) + 1) for token in tokens]
    years = [year for list_years in extended_tokens for year in list_years]

    tag = k.find('h4')
    category = None
    if tag.text == "El Niño":
        category ='elnino'
    elif tag.text == "Neutral":
        category ='neutral'
    elif tag.text == "La Niña":
        category ='lanina'
        
    for year in years:
        years_categories.append({'year_id': year, 'year_enso_type': category})

In [None]:
past_enso_events_pd = pd.DataFrame(years_categories)
past_enso_events_pd.to_sql('enso_past_events', con=connection, if_exists='append', index=False)



In [None]:
connection.close()