# Hurricane Katrina  
## Analytics Project I - Project 1 - Hurricane Katrina

In [1]:
import pandas as pd
import numpy as np

## Dataset Creation

In [2]:
import tropycal.tracks as tracks
import datetime as dt

In [3]:
basin = tracks.TrackDataset(basin='north_atlantic',source='hurdat',include_btk=False)

--> Starting to read in HURDAT2 data
--> Completed reading in HURDAT2 data (1.36 seconds)


In [4]:
print(basin)

<tropycal.tracks.Dataset>
Dataset Summary:
    Basin:             north_atlantic
    Source:            hurdat
    Number of storms:  1952
    Maximum wind:      165 knots (Allen 1980)
    Minimum pressure:  882 hPa (Wilma 2005)
    Year range:        1851 — 2022


In [5]:
basin.to_dataframe()

Unnamed: 0_level_0,all_storms,named_storms,hurricanes,major_hurricanes,ace,start_time,end_time
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1851,6,6,3,1,36.1,1851-06-25 00:00:00,1851-10-19 18:00:00
1852,5,5,5,1,73.4,1852-08-19 00:00:00,1852-10-11 18:00:00
1853,8,8,4,2,76.4,1853-08-05 12:00:00,1853-10-22 06:00:00
1854,5,5,3,1,31.1,1854-06-25 00:00:00,1854-10-22 18:00:00
1855,5,5,4,1,18.2,1855-08-06 12:00:00,1855-09-17 06:00:00
...,...,...,...,...,...,...,...
2018,16,15,8,2,132.5,2018-05-25 12:00:00,2018-10-31 12:00:00
2019,20,18,6,3,132.3,2019-05-20 18:00:00,2019-11-24 18:00:00
2020,31,30,14,7,180.3,2020-05-16 18:00:00,2020-11-18 12:00:00
2021,21,21,7,4,145.3,2021-05-22 06:00:00,2021-11-07 06:00:00


#### import hurricane cost dataset (hc_df):  
https://www.ncei.noaa.gov/access/billions/events/US/1980-2023?disasters[]=tropical-cyclone

In [6]:
hc_df = pd.read_csv('events-US-1980-2023.csv', skiprows=1)

In [7]:
hc_df.head()

Unnamed: 0,Name,Disaster,Begin Date,End Date,Total CPI-Adjusted Cost (Millions of Dollars),Deaths
0,Hurricane Allen (August 1980),Tropical Cyclone,19800807,19800811,2165.4,13
1,Hurricane Alicia (August 1983),Tropical Cyclone,19830817,19830820,9150.0,21
2,Hurricane Elena (September 1985),Tropical Cyclone,19850830,19850903,3671.6,4
3,Hurricane Gloria (September 1985),Tropical Cyclone,19850926,19850928,2414.0,11
4,Hurricane Juan (October 1985),Tropical Cyclone,19851027,19851031,4205.8,63


In [8]:
import re

def parse_name_and_year_split(name_str):
    # Use regex to extract the name and year
    match = re.search(r'(Hurricane|Tropical Storm)?\s*([A-Za-z\s]+)\s\((\w+\s(\d{4}))\)', name_str)
    if match:
        return match.group(2).strip(), match.group(4)
    return None, None

In [9]:
hc_df['name'], hc_df['year'] = zip(*hc_df['Name'].apply(parse_name_and_year_split))

In [10]:
hc_df

Unnamed: 0,Name,Disaster,Begin Date,End Date,Total CPI-Adjusted Cost (Millions of Dollars),Deaths,name,year
0,Hurricane Allen (August 1980),Tropical Cyclone,19800807,19800811,2165.4,13,Allen,1980
1,Hurricane Alicia (August 1983),Tropical Cyclone,19830817,19830820,9150.0,21,Alicia,1983
2,Hurricane Elena (September 1985),Tropical Cyclone,19850830,19850903,3671.6,4,Elena,1985
3,Hurricane Gloria (September 1985),Tropical Cyclone,19850926,19850928,2414.0,11,Gloria,1985
4,Hurricane Juan (October 1985),Tropical Cyclone,19851027,19851031,4205.8,63,Juan,1985
...,...,...,...,...,...,...,...,...
56,Hurricane Nicholas (September 2021),Tropical Cyclone,20210914,20210918,1137.9,0,Nicholas,2021
57,Hurricane Fiona (September 2022),Tropical Cyclone,20220917,20220918,2585.3,25,Fiona,2022
58,Hurricane Ian (September 2022),Tropical Cyclone,20220928,20220930,115154.0,152,Ian,2022
59,Hurricane Nicole (November 2022),Tropical Cyclone,20221110,20221111,1050.6,5,Nicole,2022


In [11]:
hurricaneNames = list(hc_df['name'])
hurricaneNames

['Allen',
 'Alicia',
 'Elena',
 'Gloria',
 'Juan',
 'Allison',
 'Hugo',
 'Bob',
 'Andrew',
 'Iniki',
 'Alberto',
 'Erin',
 'Marilyn',
 'Opal',
 'Fran',
 'Frances',
 'Bonnie',
 'Georges',
 'Floyd',
 'Allison',
 'Lili',
 'Isidore',
 'Isabel',
 'Charley',
 'Frances',
 'Ivan',
 'Jeanne',
 'Dennis',
 'Katrina',
 'Rita',
 'Wilma',
 'Dolly',
 'Gustav',
 'Ike',
 'Irene',
 'Lee',
 'Isaac',
 'Sandy',
 'Matthew',
 'Harvey',
 'Irma',
 'Maria',
 'Florence',
 'Michael',
 'Dorian',
 'Imelda',
 'Hanna',
 'Isaias',
 'Laura',
 'Sally',
 'Delta',
 'Zeta',
 'Eta',
 'Elsa',
 'Fred',
 'Ida',
 'Nicholas',
 'Fiona',
 'Ian',
 'Nicole',
 'Idalia']

##### there are multiple hurricanes with repeating names:

In [12]:
for name in hurricaneNames:
    storm = basin.search_name(name)
    print(name, storm)

Allen [1980]
Alicia [1983]
Elena [1965, 1979, 1985]
Gloria [1976, 1979, 1985]
Juan [1985, 2003]
Allison [1989, 1995, 2001]
Hugo [1989]
Bob [1979, 1985, 1991]
Andrew [1986, 1992]
Iniki []
Alberto [1982, 1988, 1994, 2000, 2006, 2012, 2018]
Erin [1989, 1995, 2001, 2007, 2013, 2019]
Marilyn [1995]
Opal [1995]
Fran [1973, 1984, 1990, 1996]
Frances [1961, 1968, 1976, 1980, 1986, 1992, 1998, 2004]
Bonnie [1980, 1986, 1992, 1998, 2004, 2010, 2016, 2022]
Georges [1980, 1998]
Floyd [1981, 1987, 1993, 1999]
Allison [1989, 1995, 2001]
Lili [1984, 1990, 1996, 2002]
Isidore [1984, 1990, 1996, 2002]
Isabel [1985, 2003]
Charley [1980, 1986, 1992, 1998, 2004]
Frances [1961, 1968, 1976, 1980, 1986, 1992, 1998, 2004]
Ivan [1980, 1998, 2004]
Jeanne [1980, 1998, 2004]
Dennis [1981, 1987, 1993, 1999, 2005]
Katrina [1981, 1999, 2005]
Rita [2005]
Wilma [2005]
Dolly [1953, 1954, 1968, 1974, 1996, 2002, 2008, 2014, 2020]
Gustav [1984, 1990, 1996, 2002, 2008]
Ike [2008]
Irene [1959, 1971, 1981, 1999, 2005, 2011]

...but we are interested only in hurricanes we have cost data for:

In [13]:
hNamesDates = hc_df[['name', 'year']]
hNamesDates.head()
hNamesDates

Unnamed: 0,name,year
0,Allen,1980
1,Alicia,1983
2,Elena,1985
3,Gloria,1985
4,Juan,1985
...,...,...
56,Nicholas,2021
57,Fiona,2022
58,Ian,2022
59,Nicole,2022


In [14]:
hNamesDates['year']=hNamesDates['year'].astype(int)
hNamesDates['name']=hNamesDates['name'].astype(str)
hNamesDates.dtypes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hNamesDates['year']=hNamesDates['year'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hNamesDates['name']=hNamesDates['name'].astype(str)


name    object
year     int32
dtype: object

In [15]:
storm = basin.get_storm(('Michael', 2018))
storm.to_dict()

{'id': 'AL142018',
 'operational_id': 'AL142018',
 'name': 'MICHAEL',
 'year': 2018,
 'season': 2018,
 'basin': 'north_atlantic',
 'source_info': 'NHC Hurricane Database',
 'source': 'hurdat',
 'time': [datetime.datetime(2018, 10, 6, 18, 0),
  datetime.datetime(2018, 10, 7, 0, 0),
  datetime.datetime(2018, 10, 7, 6, 0),
  datetime.datetime(2018, 10, 7, 12, 0),
  datetime.datetime(2018, 10, 7, 18, 0),
  datetime.datetime(2018, 10, 8, 0, 0),
  datetime.datetime(2018, 10, 8, 6, 0),
  datetime.datetime(2018, 10, 8, 12, 0),
  datetime.datetime(2018, 10, 8, 18, 0),
  datetime.datetime(2018, 10, 9, 0, 0),
  datetime.datetime(2018, 10, 9, 6, 0),
  datetime.datetime(2018, 10, 9, 12, 0),
  datetime.datetime(2018, 10, 9, 18, 0),
  datetime.datetime(2018, 10, 10, 0, 0),
  datetime.datetime(2018, 10, 10, 6, 0),
  datetime.datetime(2018, 10, 10, 12, 0),
  datetime.datetime(2018, 10, 10, 17, 30),
  datetime.datetime(2018, 10, 10, 18, 0),
  datetime.datetime(2018, 10, 11, 0, 0),
  datetime.datetime(20

In [16]:
# import tropycal.tracks as tracks

# Create a TrackDataset object for the North Atlantic basin
# basin = tracks.TrackDataset(basin='north_atlantic')

# Create a dictionary to store the storm data
stormsData = {}
for index, row in hNamesDates.iterrows():
    try:
         #print(row['name'], row['year'])
        # Create a dictionary to store the storm data
        stormsData[row['name']] = {}
        # Load the storm data
        stormsData[row['name']] = basin.get_storm((row['name'],row['year']))
    except Exception as e:
        print(e)
        continue
stormsData

Storm not found
Storm not found


{'Allen': <tropycal.tracks.Storm>
 Storm Summary:
     Maximum Wind:      165 knots
     Minimum Pressure:  899 hPa
     Start Time:        0000 UTC 01 August 1980
     End Time:          1800 UTC 11 August 1980
 
 Variables:
     time        (datetime) [1980-07-31 12:00:00 .... 1980-08-11 18:00:00]
     extra_obs   (int32) [0 .... 0]
     special     (str) [ .... ]
     type        (str) [DB .... TD]
     lat         (float64) [11.0 .... 28.9]
     lon         (float64) [-30.0 .... -102.9]
     vmax        (int32) [25 .... 30]
     mslp        (float64) [nan .... 1008.0]
     wmo_basin   (str) [north_atlantic .... north_atlantic]
 
 More Information:
     id:              AL041980
     operational_id:  AL041980
     name:            ALLEN
     year:            1980
     season:          1980
     basin:           north_atlantic
     source_info:     NHC Hurricane Database
     source:          hurdat
     ace:             52.3
     realtime:        False
     invest:          False
  

In [17]:
stormsData['Allen'].to_dataframe()

Unnamed: 0,time,extra_obs,special,type,lat,lon,vmax,mslp,wmo_basin
0,1980-07-31 12:00:00,0,,DB,11.0,-30.0,25,,north_atlantic
1,1980-07-31 18:00:00,0,,DB,10.9,-32.2,25,,north_atlantic
2,1980-08-01 00:00:00,0,,TD,10.8,-34.3,30,1010.0,north_atlantic
3,1980-08-01 06:00:00,0,,TD,10.7,-36.4,30,1009.0,north_atlantic
4,1980-08-01 12:00:00,0,,TD,10.7,-38.6,30,1008.0,north_atlantic
5,1980-08-01 18:00:00,0,,TD,10.7,-40.7,30,1006.0,north_atlantic
6,1980-08-02 00:00:00,0,,TS,11.0,-42.8,35,1005.0,north_atlantic
7,1980-08-02 06:00:00,0,,TS,11.4,-44.8,45,1000.0,north_atlantic
8,1980-08-02 12:00:00,0,,TS,11.9,-46.9,55,995.0,north_atlantic
9,1980-08-02 18:00:00,0,,TS,12.3,-49.1,60,990.0,north_atlantic


In [18]:
# Modified approach to handle potential non-storm objects in stormsData

dfs = []

for storm_name, storm_obj in stormsData.items():
    # Check if the object has a to_dataframe method
    if hasattr(storm_obj, 'to_dataframe'):
        df_storm = storm_obj.to_dataframe()
        df_storm['storm_name'] = storm_name
        dfs.append(df_storm)

# If we have any dataframes in the list, concatenate them
if dfs:
    master_df = pd.concat(dfs, ignore_index=True)
    # Display the first few rows of the resulting master DataFrame
    output = master_df.head()
else:
    output = "No valid storm objects found in stormsData."

output

Unnamed: 0,time,extra_obs,special,type,lat,lon,vmax,mslp,wmo_basin,storm_name
0,1980-07-31 12:00:00,0,,DB,11.0,-30.0,25,,north_atlantic,Allen
1,1980-07-31 18:00:00,0,,DB,10.9,-32.2,25,,north_atlantic,Allen
2,1980-08-01 00:00:00,0,,TD,10.8,-34.3,30,1010.0,north_atlantic,Allen
3,1980-08-01 06:00:00,0,,TD,10.7,-36.4,30,1009.0,north_atlantic,Allen
4,1980-08-01 12:00:00,0,,TD,10.7,-38.6,30,1008.0,north_atlantic,Allen


In [19]:
master_df['year']=master_df['time'].dt.year

In [23]:
master_df

Unnamed: 0,time,extra_obs,special,type,lat,lon,vmax,mslp,wmo_basin,storm_name,year
0,1980-07-31 12:00:00,0,,DB,11.0,-30.0,25,,north_atlantic,Allen,1980
1,1980-07-31 18:00:00,0,,DB,10.9,-32.2,25,,north_atlantic,Allen,1980
2,1980-08-01 00:00:00,0,,TD,10.8,-34.3,30,1010.0,north_atlantic,Allen,1980
3,1980-08-01 06:00:00,0,,TD,10.7,-36.4,30,1009.0,north_atlantic,Allen,1980
4,1980-08-01 12:00:00,0,,TD,10.7,-38.6,30,1008.0,north_atlantic,Allen,1980
...,...,...,...,...,...,...,...,...,...,...,...
2730,2022-11-10 19:00:00,1,L,TS,29.2,-83.0,40,989.0,north_atlantic,Nicole,2022
2731,2022-11-11 00:00:00,0,L,TS,30.1,-84.0,35,992.0,north_atlantic,Nicole,2022
2732,2022-11-11 06:00:00,0,,TD,31.2,-84.6,30,996.0,north_atlantic,Nicole,2022
2733,2022-11-11 12:00:00,0,,TD,33.2,-84.6,25,999.0,north_atlantic,Nicole,2022


In [21]:
master_df.to_csv('Hurricane_dataset.csv')

In [25]:
master_df.dtypes

time          datetime64[ns]
extra_obs              int64
special               object
type                  object
lat                  float64
lon                  float64
vmax                   int64
mslp                 float64
wmo_basin             object
storm_name            object
year                   int32
dtype: object

In [26]:
hc_df.dtypes

Name                                             object
Disaster                                         object
Begin Date                                        int64
End Date                                          int64
Total CPI-Adjusted Cost (Millions of Dollars)    object
Deaths                                            int64
name                                             object
year                                             object
dtype: object

In [27]:
hc_df['year']=hc_df['year'].astype(int)

In [28]:
merged_df = pd.merge(master_df, hc_df, left_on=['storm_name', 'year'], right_on=['name', 'year'])


In [33]:
merged_df

Unnamed: 0,time,extra_obs,special,type,lat,lon,vmax,mslp,wmo_basin,storm_name,year,Name,Disaster,Begin Date,End Date,Total CPI-Adjusted Cost (Millions of Dollars),Deaths,name
0,1980-07-31 12:00:00,0,,DB,11.0,-30.0,25,,north_atlantic,Allen,1980,Hurricane Allen (August 1980),Tropical Cyclone,19800807,19800811,2165.4,13,Allen
1,1980-07-31 18:00:00,0,,DB,10.9,-32.2,25,,north_atlantic,Allen,1980,Hurricane Allen (August 1980),Tropical Cyclone,19800807,19800811,2165.4,13,Allen
2,1980-08-01 00:00:00,0,,TD,10.8,-34.3,30,1010.0,north_atlantic,Allen,1980,Hurricane Allen (August 1980),Tropical Cyclone,19800807,19800811,2165.4,13,Allen
3,1980-08-01 06:00:00,0,,TD,10.7,-36.4,30,1009.0,north_atlantic,Allen,1980,Hurricane Allen (August 1980),Tropical Cyclone,19800807,19800811,2165.4,13,Allen
4,1980-08-01 12:00:00,0,,TD,10.7,-38.6,30,1008.0,north_atlantic,Allen,1980,Hurricane Allen (August 1980),Tropical Cyclone,19800807,19800811,2165.4,13,Allen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2730,2022-11-10 19:00:00,1,L,TS,29.2,-83.0,40,989.0,north_atlantic,Nicole,2022,Hurricane Nicole (November 2022),Tropical Cyclone,20221110,20221111,1050.6,5,Nicole
2731,2022-11-11 00:00:00,0,L,TS,30.1,-84.0,35,992.0,north_atlantic,Nicole,2022,Hurricane Nicole (November 2022),Tropical Cyclone,20221110,20221111,1050.6,5,Nicole
2732,2022-11-11 06:00:00,0,,TD,31.2,-84.6,30,996.0,north_atlantic,Nicole,2022,Hurricane Nicole (November 2022),Tropical Cyclone,20221110,20221111,1050.6,5,Nicole
2733,2022-11-11 12:00:00,0,,TD,33.2,-84.6,25,999.0,north_atlantic,Nicole,2022,Hurricane Nicole (November 2022),Tropical Cyclone,20221110,20221111,1050.6,5,Nicole


In [34]:
merged_df.dtypes

time                                             datetime64[ns]
extra_obs                                                 int64
special                                                  object
type                                                     object
lat                                                     float64
lon                                                     float64
vmax                                                      int64
mslp                                                    float64
wmo_basin                                                object
storm_name                                               object
year                                                      int32
Name                                                     object
Disaster                                                 object
Begin Date                                                int64
End Date                                                  int64
Total CPI-Adjusted Cost (Millions of Dol

In [39]:
merged_df[['Begin Date','End Date']]= merged_df[['Begin Date','End Date']].astype(str)
merged_df.dtypes

time                                             datetime64[ns]
extra_obs                                                 int64
special                                                  object
type                                                     object
lat                                                     float64
lon                                                     float64
vmax                                                      int64
mslp                                                    float64
wmo_basin                                                object
storm_name                                               object
year                                                      int32
Name                                                     object
Disaster                                                 object
Begin Date                                               object
End Date                                                 object
Total CPI-Adjusted Cost (Millions of Dol

In [44]:
from datetime import datetime

def conv_to_date(int_value):
    str_value = str(int_value)
    date = datetime.strptime(str_value, '%Y%m%d')
    return date.strftime('%Y-%m-%d')

merged_df[['Begin Date', 'End Date']] = merged_df[['Begin Date', 'End Date']].applymap(conv_to_date)


In [47]:
merged_df

Unnamed: 0,time,extra_obs,special,type,lat,lon,vmax,mslp,wmo_basin,storm_name,year,Name,Disaster,Begin Date,End Date,Total CPI-Adjusted Cost (Millions of Dollars),Deaths,name
0,1980-07-31 12:00:00,0,,DB,11.0,-30.0,25,,north_atlantic,Allen,1980,Hurricane Allen (August 1980),Tropical Cyclone,1980-08-07,1980-08-11,2165.4,13,Allen
1,1980-07-31 18:00:00,0,,DB,10.9,-32.2,25,,north_atlantic,Allen,1980,Hurricane Allen (August 1980),Tropical Cyclone,1980-08-07,1980-08-11,2165.4,13,Allen
2,1980-08-01 00:00:00,0,,TD,10.8,-34.3,30,1010.0,north_atlantic,Allen,1980,Hurricane Allen (August 1980),Tropical Cyclone,1980-08-07,1980-08-11,2165.4,13,Allen
3,1980-08-01 06:00:00,0,,TD,10.7,-36.4,30,1009.0,north_atlantic,Allen,1980,Hurricane Allen (August 1980),Tropical Cyclone,1980-08-07,1980-08-11,2165.4,13,Allen
4,1980-08-01 12:00:00,0,,TD,10.7,-38.6,30,1008.0,north_atlantic,Allen,1980,Hurricane Allen (August 1980),Tropical Cyclone,1980-08-07,1980-08-11,2165.4,13,Allen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2730,2022-11-10 19:00:00,1,L,TS,29.2,-83.0,40,989.0,north_atlantic,Nicole,2022,Hurricane Nicole (November 2022),Tropical Cyclone,2022-11-10,2022-11-11,1050.6,5,Nicole
2731,2022-11-11 00:00:00,0,L,TS,30.1,-84.0,35,992.0,north_atlantic,Nicole,2022,Hurricane Nicole (November 2022),Tropical Cyclone,2022-11-10,2022-11-11,1050.6,5,Nicole
2732,2022-11-11 06:00:00,0,,TD,31.2,-84.6,30,996.0,north_atlantic,Nicole,2022,Hurricane Nicole (November 2022),Tropical Cyclone,2022-11-10,2022-11-11,1050.6,5,Nicole
2733,2022-11-11 12:00:00,0,,TD,33.2,-84.6,25,999.0,north_atlantic,Nicole,2022,Hurricane Nicole (November 2022),Tropical Cyclone,2022-11-10,2022-11-11,1050.6,5,Nicole


In [52]:
merged_df.shape

(2735, 18)

In [69]:
# Convert the 'time', 'Begin Date', and 'End Date' columns to datetime format
merged_df['time'] = pd.to_datetime(merged_df['time'])
merged_df['Begin Date'] = pd.to_datetime(merged_df['Begin Date'])
merged_df['End Date'] = pd.to_datetime(merged_df['End Date'])

# Filter the DataFrame so dates are on or between Begin and End dates
filtered_df = merged_df[(merged_df['time'] >= merged_df['Begin Date']) & (merged_df['time'] <= merged_df['End Date'])]

filtered_df.shape


(873, 18)

In [70]:
filtered_df.drop(['extra_obs','special','name','Disaster','Name','wmo_basin'],axis=1,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(['extra_obs','special','name','Disaster','Name','wmo_basin'],axis=1,inplace=True)


In [75]:
filtered_df

Unnamed: 0,time,type,lat,lon,vmax,mslp,storm_name,year,Begin Date,End Date,Total CPI-Adjusted Cost (Millions of Dollars),Deaths
26,1980-08-07 00:00:00,HU,20.1,-81.9,135,945.0,Allen,1980,1980-08-07,1980-08-11,2165.4,13
27,1980-08-07 06:00:00,HU,20.4,-83.6,145,935.0,Allen,1980,1980-08-07,1980-08-11,2165.4,13
28,1980-08-07 12:00:00,HU,21.0,-84.8,155,910.0,Allen,1980,1980-08-07,1980-08-11,2165.4,13
29,1980-08-07 18:00:00,HU,21.8,-86.4,165,899.0,Allen,1980,1980-08-07,1980-08-11,2165.4,13
30,1980-08-08 00:00:00,HU,22.2,-87.9,155,920.0,Allen,1980,1980-08-07,1980-08-11,2165.4,13
...,...,...,...,...,...,...,...,...,...,...,...,...
2727,2022-11-10 07:45:00,HU,27.6,-80.3,65,980.0,Nicole,2022,2022-11-10,2022-11-11,1050.6,5
2728,2022-11-10 12:00:00,TS,28.0,-81.6,55,984.0,Nicole,2022,2022-11-10,2022-11-11,1050.6,5
2729,2022-11-10 18:00:00,TS,29.0,-82.8,40,989.0,Nicole,2022,2022-11-10,2022-11-11,1050.6,5
2730,2022-11-10 19:00:00,TS,29.2,-83.0,40,989.0,Nicole,2022,2022-11-10,2022-11-11,1050.6,5


In [74]:
# get_season function
def get_season(date):
    """Determine the season based on month for Northern Hemisphere"""
    if date.month in [12, 1, 2]:
        return "Winter"
    elif date.month in [3, 4, 5]:
        return "Spring"
    elif date.month in [6, 7, 8]:
        return "Summer"
    else:  # [9, 10, 11]
        return "Fall"

In [76]:
# Create season column based on time
filtered_df['season'] = filtered_df['time'].apply(get_season)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['season'] = filtered_df['time'].apply(get_season)


In [77]:
filtered_df

Unnamed: 0,time,type,lat,lon,vmax,mslp,storm_name,year,Begin Date,End Date,Total CPI-Adjusted Cost (Millions of Dollars),Deaths,season
26,1980-08-07 00:00:00,HU,20.1,-81.9,135,945.0,Allen,1980,1980-08-07,1980-08-11,2165.4,13,Summer
27,1980-08-07 06:00:00,HU,20.4,-83.6,145,935.0,Allen,1980,1980-08-07,1980-08-11,2165.4,13,Summer
28,1980-08-07 12:00:00,HU,21.0,-84.8,155,910.0,Allen,1980,1980-08-07,1980-08-11,2165.4,13,Summer
29,1980-08-07 18:00:00,HU,21.8,-86.4,165,899.0,Allen,1980,1980-08-07,1980-08-11,2165.4,13,Summer
30,1980-08-08 00:00:00,HU,22.2,-87.9,155,920.0,Allen,1980,1980-08-07,1980-08-11,2165.4,13,Summer
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2727,2022-11-10 07:45:00,HU,27.6,-80.3,65,980.0,Nicole,2022,2022-11-10,2022-11-11,1050.6,5,Fall
2728,2022-11-10 12:00:00,TS,28.0,-81.6,55,984.0,Nicole,2022,2022-11-10,2022-11-11,1050.6,5,Fall
2729,2022-11-10 18:00:00,TS,29.0,-82.8,40,989.0,Nicole,2022,2022-11-10,2022-11-11,1050.6,5,Fall
2730,2022-11-10 19:00:00,TS,29.2,-83.0,40,989.0,Nicole,2022,2022-11-10,2022-11-11,1050.6,5,Fall


In [82]:
arranged_df = filtered_df[['storm_name','year','Begin Date','End Date','time','season','type','lat','lon','vmax','mslp','Total CPI-Adjusted Cost (Millions of Dollars)','Deaths']]
arranged_df

Unnamed: 0,storm_name,year,Begin Date,End Date,time,season,type,lat,lon,vmax,mslp,Total CPI-Adjusted Cost (Millions of Dollars),Deaths
26,Allen,1980,1980-08-07,1980-08-11,1980-08-07 00:00:00,Summer,HU,20.1,-81.9,135,945.0,2165.4,13
27,Allen,1980,1980-08-07,1980-08-11,1980-08-07 06:00:00,Summer,HU,20.4,-83.6,145,935.0,2165.4,13
28,Allen,1980,1980-08-07,1980-08-11,1980-08-07 12:00:00,Summer,HU,21.0,-84.8,155,910.0,2165.4,13
29,Allen,1980,1980-08-07,1980-08-11,1980-08-07 18:00:00,Summer,HU,21.8,-86.4,165,899.0,2165.4,13
30,Allen,1980,1980-08-07,1980-08-11,1980-08-08 00:00:00,Summer,HU,22.2,-87.9,155,920.0,2165.4,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2727,Nicole,2022,2022-11-10,2022-11-11,2022-11-10 07:45:00,Fall,HU,27.6,-80.3,65,980.0,1050.6,5
2728,Nicole,2022,2022-11-10,2022-11-11,2022-11-10 12:00:00,Fall,TS,28.0,-81.6,55,984.0,1050.6,5
2729,Nicole,2022,2022-11-10,2022-11-11,2022-11-10 18:00:00,Fall,TS,29.0,-82.8,40,989.0,1050.6,5
2730,Nicole,2022,2022-11-10,2022-11-11,2022-11-10 19:00:00,Fall,TS,29.2,-83.0,40,989.0,1050.6,5


In [84]:
arranged_df.shape

(873, 13)