In [17]:
import requests
pacific_raw = requests.get("https://www.nhc.noaa.gov/data/hurdat/hurdat2-nepac-1949-2018-122019.txt")
pacific_raw.raise_for_status()  # check that we actually got something back

In [18]:
import io
from collections import Counter

c = Counter()
for line in io.StringIO(pacific_raw.text).readlines():
    c[line[:2]] += 1
c

Counter({'EP': 1050, '19': 18050, 'CP': 77, '20': 10520})

In [19]:
pacific_storms_r = []
pacific_storm_r = {'header': None, 'data': []}

for i, line in enumerate(io.StringIO(pacific_raw.text).readlines()):
    if line[:2] == 'EP' or line[:2] == 'CP':
        pacific_storms_r.append(pacific_storm_r.copy())
        pacific_storm_r['header'] = line
        pacific_storm_r['data'] = []
    else:
        pacific_storm_r['data'].append(line)

pacific_storms_r = pacific_storms_r[1:]

In [20]:
import pandas as pd

pacific_storm_dfs = []
for storm_dict in pacific_storms_r:
    storm_id, storm_name, storm_entries_n = storm_dict['header'].split(",")[:3]
    # remove hanging newline ('\n'), split fields
    data = [[entry.strip() for entry in datum[:-1].split(",")] for datum in storm_dict['data']]
    frame = pd.DataFrame(data)
    frame['id'] = storm_id
    frame['name'] = storm_name
    pacific_storm_dfs.append(frame)

pacific_storms = pd.concat(pacific_storm_dfs)

In [21]:
pacific_storms = pacific_storms.reindex(columns=pacific_storms.columns[-2:] | pacific_storms.columns[:-2])


In [22]:
pacific_storms.columns = [
        "id",
        "name",
        "date",
        "hours_minutes",
        "record_identifier",
        "status_of_system",
        "latitude",
        "longitude",
        "maximum_sustained_wind_knots",
        "maximum_pressure",
        "34_kt_ne",
        "34_kt_se",
        "34_kt_sw",
        "34_kt_nw",
        "50_kt_ne",
        "50_kt_se",
        "50_kt_sw",
        "50_kt_nw",
        "64_kt_ne",
        "64_kt_se",
        "64_kt_sw",
        "64_kt_nw",
        "na"
]

In [23]:
del pacific_storms['na']

pd.set_option("max_columns", None)



In [24]:
import numpy as np
pacific_storms = pacific_storms.replace(to_replace='-999', value=np.nan)

In [25]:
pacific_storms = pacific_storms.replace(to_replace="", value=np.nan)
pacific_storms['record_identifier'].value_counts(dropna=False)

NaN    28419
L        105
I          6
T          4
S          3
Name: record_identifier, dtype: int64

In [26]:

pacific_storms['latitude'] = pacific_storms['latitude'].map(lambda lat: lat[:-1] if lat[-1] == "N" else -lat[:-1])
pacific_storms['longitude']= pacific_storms['longitude'].map(lambda long: long[:-1] if long[-1] == "E" else "-" + long[:-1])

In [27]:
pacific_storms['date'] = pd.to_datetime(pacific_storms['date'])

In [28]:
pacific_storms['date'] = pacific_storms\
    .apply(
        lambda srs: srs['date'].replace(hour=int(srs['hours_minutes'][:2]), minute=int(srs['hours_minutes'][2:])), 
        axis='columns'
    )

In [29]:
del pacific_storms['hours_minutes']

In [30]:
pacific_storms['name'] = pacific_storms['name'].map(lambda n: n.strip())

In [31]:
pacific_storms.index = range(len(pacific_storms.index))
pacific_storms.index.name = "index"

In [33]:
pacific_storms.to_csv("../pacific_storms.csv", encoding='utf-8')

In [36]:
import requests
atlantic_raw = requests.get("https://www.nhc.noaa.gov/data/hurdat/hurdat2-1851-2018-120319.txt")
atlantic_raw.raise_for_status()  # check that we actually got something back

In [37]:
import io
from collections import Counter

c = Counter()
for line in io.StringIO(atlantic_raw.text).readlines():
    c[line[:2]] += 1


In [38]:
import io

atlantic_storms_r = []
atlantic_storm_r = {'header': None, 'data': []}

for i, line in enumerate(io.StringIO(atlantic_raw.text).readlines()):
    if line[:2] == 'AL':
        atlantic_storms_r.append(atlantic_storm_r.copy())
        atlantic_storm_r['header'] = line
        atlantic_storm_r['data'] = []
    else:
        atlantic_storm_r['data'].append(line)

atlantic_storms_r = atlantic_storms_r[1:]


In [39]:
import pandas as pd

atlantic_storm_dfs = []
for storm_dict in atlantic_storms_r:
    storm_id, storm_name, storm_entries_n = storm_dict['header'].split(",")[:3]
    # remove hanging newline ('\n'), split fields
    data = [[entry.strip() for entry in datum[:-1].split(",")] for datum in storm_dict['data']]
    frame = pd.DataFrame(data)
    frame['id'] = storm_id
    frame['name'] = storm_name
    atlantic_storm_dfs.append(frame)

In [40]:
atlantic_storms = pd.concat(atlantic_storm_dfs)

In [41]:
atlantic_storms = atlantic_storms.reindex(columns=atlantic_storms.columns[-2:] | atlantic_storms.columns[:-2])

In [42]:
atlantic_storms.columns = [
        "id",
        "name",
        "date",
        "hours_minutes",
        "record_identifier",
        "status_of_system",
        "latitude",
        "longitude",
        "maximum_sustained_wind_knots",
        "maximum_pressure",
        "34_kt_ne",
        "34_kt_se",
        "34_kt_sw",
        "34_kt_nw",
        "50_kt_ne",
        "50_kt_se",
        "50_kt_sw",
        "50_kt_nw",
        "64_kt_ne",
        "64_kt_se",
        "64_kt_sw",
        "64_kt_nw",
        "na"
]

In [43]:
del atlantic_storms['na']
pd.set_option("max_columns", None)

In [44]:
import numpy as np
atlantic_storms = atlantic_storms.replace(to_replace='-999', value=np.nan)

In [45]:
atlantic_storms = atlantic_storms.replace(to_replace="", value=np.nan)

In [46]:
atlantic_storms['latitude'] = atlantic_storms['latitude'].map(lambda lat: lat[:-1] if lat[-1] == "N" else -lat[:-1])
atlantic_storms['longitude']= atlantic_storms['longitude'].map(lambda long: long[:-1] if long[-1] == "E" else "-" + long[:-1])

In [47]:
atlantic_storms['date'] = pd.to_datetime(atlantic_storms['date'])

In [48]:
atlantic_storms['date'] = atlantic_storms\
    .apply(
        lambda srs: srs['date'].replace(hour=int(srs['hours_minutes'][:2]), minute=int(srs['hours_minutes'][2:])), 
        axis='columns'
    )

In [49]:

del atlantic_storms['hours_minutes']

In [50]:
atlantic_storms['name'] = atlantic_storms['name'].map(lambda n: n.strip())

In [52]:
atlantic_storms.index = range(len(atlantic_storms.index))
atlantic_storms.index.name = "index"
atlantic_storms.to_csv("../atlantic_storms.csv", encoding='utf-8')