In [2]:
# Dependencies
import pandas as pd
import geopandas as gpd
from pathlib import Path

In [3]:
# Wildfire Data
# Name of the geojson file
file_js = Path('Resources/California_Fire_Perimete.geojson')

In [4]:
# Step 1: Load Wildfire GeoJSON Data
wildfire_gdf = gpd.read_file(file_js)

In [5]:
# Display the first few rows to understand the structure
print(wildfire_gdf.head())

   OBJECTID   YEAR_ STATE AGENCY UNIT_ID  FIRE_NAME   INC_NUM  \
0         1  2023.0    CA    CDF     SKU  WHITWORTH  00004808   
1         2  2023.0    CA    LRA     BTU     KAISER  00010225   
2         3  2023.0    CA    CDF     AEU    JACKSON  00017640   
3         4  2023.0    CA    CDF     AEU     CARBON  00018821   
4         5  2023.0    CA    CDF     AEU    LIBERTY  00018876   

                      ALARM_DATE                      CONT_DATE  CAUSE  \
0  Sat, 17 Jun 2023 00:00:00 GMT  Sat, 17 Jun 2023 00:00:00 GMT    5.0   
1  Fri, 02 Jun 2023 00:00:00 GMT  Fri, 02 Jun 2023 00:00:00 GMT    5.0   
2  Sat, 01 Jul 2023 00:00:00 GMT  Sun, 02 Jul 2023 00:00:00 GMT    2.0   
3  Tue, 11 Jul 2023 00:00:00 GMT  Tue, 11 Jul 2023 00:00:00 GMT    9.0   
4  Tue, 11 Jul 2023 00:00:00 GMT  Wed, 12 Jul 2023 00:00:00 GMT   14.0   

   C_METHOD  OBJECTIVE  GIS_ACRES COMMENTS COMPLEX_NAME  \
0       1.0        1.0   5.729125     None         None   
1       1.0        1.0  13.602380     None    

In [6]:
# Extract column names
column_names = wildfire_gdf.columns
print("Column Names:", column_names)

Column Names: Index(['OBJECTID', 'YEAR_', 'STATE', 'AGENCY', 'UNIT_ID', 'FIRE_NAME',
       'INC_NUM', 'ALARM_DATE', 'CONT_DATE', 'CAUSE', 'C_METHOD', 'OBJECTIVE',
       'GIS_ACRES', 'COMMENTS', 'COMPLEX_NAME', 'IRWINID', 'FIRE_NUM',
       'COMPLEX_ID', 'DECADES', 'geometry'],
      dtype='object')


In [7]:
# Check the minimum and maximum year in the dataset
min_year = wildfire_gdf['YEAR_'].min()
max_year = wildfire_gdf['YEAR_'].max()

print(f"Year Range: {min_year} to {max_year}")

Year Range: 1878.0 to 2023.0


In [10]:
# Step 2: Process Wildfire Data - Change column names
# Rename specific columns
wildfire_gdf = wildfire_gdf.rename(columns={
    'OBJECTID': 'ID',
    'YEAR_': 'Year',
    'STATE': 'State',
    'AGENCY': 'Agency',
    'UNIT_ID': 'Unit ID',
    'FIRE_NAME': 'Fire Name',
    'INC_NUM': 'Incident Number',
    'ALARM_DATE': 'Alarm Date',
    'CONT_DATE': 'Containment Date',
    'CAUSE': 'Cause',
    'C_METHOD': 'Collection Method',
    'OBJECTIVE': 'Management Objective',
    'GIS_ACRES': 'GIS Acres',
    'COMMENTS': 'Comments', 
    'COMPLEX_NAME': 'Complex Name',
    'IRWINID': 'IRWIN ID',
    'FIRE_NUM': 'Fire Number',
    'COMPLEX_ID': 'Complex ID',
    'DECADES':'Decades', 
    'geometry': 'Geometry'
})

In [11]:
#Confirm column name changes
column_names = wildfire_gdf.columns
print("Column Names:", column_names)

Column Names: Index(['ID', 'Year', 'State', 'Agency', 'Unit ID', 'Fire Name',
       'Incident Number', 'Alarm Date', 'Containment Date', 'Cause',
       'Collection Method', 'Management Objective', 'GIS Acres', 'Comments',
       'Complex Name', 'IRWIN ID', 'Fire Number', 'Complex ID', 'Decades',
       'Geometry'],
      dtype='object')


In [12]:
#Keep only a subset of columns for analysis
wildfire_gdf = wildfire_gdf[['ID', 'Year', 'State', 'Agency', 'Unit ID', 'Fire Name',
    'Incident Number', 'Alarm Date', 'Containment Date', 'Cause', 'GIS Acres', 
    'Comments','Complex Name', 'Fire Number', 'Decades','Geometry']]

In [None]:
# Calculate the centroid of each geometry (for polygons)
wildfire_gdf['Centroid'] = wildfire_gdf['Geometry'].centroid

# Extract latitude and longitude from the centroid
wildfire_gdf['Latitude'] = wildfire_gdf['Centroid'].y
wildfire_gdf['Longitude'] = wildfire_gdf['Centroid'].x

# Convert the geometry column to WKT (Well-Known Text)
wildfire_gdf['Geometry'] = wildfire_gdf['Geometry'].apply(lambda x: x.wkt)

In [None]:
#check transformed data
print(wildfire_gdf)

In [None]:
#Check all columns
column_names = wildfire_gdf.columns
print("Column Names:", column_names)

In [16]:
# Rainfall SF Data
# Name of the rainfall csv file
sf_rain= Path('Resources/sf_rainfall.csv')
# The correct encoding must be used to read the CSV in pandas
df_sf_rain = pd.read_csv(sf_rain)
# Preview of the rain fall dataFrame
df_sf_rain.head()

In [17]:
# Rainfall LA Data
# Name of the rainfall csv file
la_rain= Path('Resources/la_rainfall.csv')
# The correct encoding must be used to read the CSV in pandas
df_la_rain = pd.read_csv(la_rain)
# Preview of the rain fall dataFrame
df_la_rain.head()

In [18]:
# Rainfall San Diego Data
# Name of the rainfall csv file
sdg_rain= Path('Resources/sdg_rainfall.csv')
# The correct encoding must be used to read the CSV in pandas
df_sdg_rain = pd.read_csv(sdg_rain)
# Preview of the rain fall dataFrame
df_sdg_rain.head()

Unnamed: 0,Year,Precipitation (inches)
0,1896,26.42
1,1897,27.07
2,1898,17.29
3,1899,20.19
4,1900,24.48


In [None]:
#Combine all rainfall data
df_rain = pd.concat([df_sf_rain, df_la_rain, df_sdg_rain], axis=0, ignore_index=True)
df_rain.head()

In [None]:
# Rainfall Station Geo Data
# Name of the rainfall station geo csv file
rainfall_station = Path('Resources/weather_station_geo.csv')
# The correct encoding must be used to read the CSV in pandas
df_rain_geo = pd.read_csv(rainfall_station)
# Preview of the rain fall dataFrame
df_rain_geo.head()

In [None]:
#change Rainfall Station Geo Data to be consistent with df_rain data for merge later
df_rain_geo.columns = ['STATION NAME', 'STATION_ID', 'ELEV (FEET)', 'LATITUDE', 'LONGITUDE',
'COUNTY', 'OPERATOR AGENCY']

In [None]:
#Inspect mergered data
df_precip = pd.merge(df_rain, df_rain_geo, on='STATION_ID', how = 'left')
df_precip.head()

In [None]:
#Transform date columns to the correct datetime format
df_precip['DATE TIME']=pd.to_datetime(df_precip['DATE TIME'])
df_precip['OBS DATE']=pd.to_datetime(df_precip['OBS DATE'])
df_precip.info()

In [None]:
# Create a new column with just the year and month in datetime format
df_precip['YEAR MONTH'] = df_precip['DATE TIME'].dt.to_period('M').dt.to_timestamp()
df_precip.head()

In [None]:
# Average Temperature Data
sf_temp_path = Path('Resources/avg-temps-sf.csv')
la_temp_path = Path('Resources/avg-temps-la.csv')
sd_temp_path = Path('Resources/avg-temps-sd.csv')
sac_temp_path = Path('Resources/avg-temps-sac.csv')
bf_temp_path = Path('Resources/avg-temps-bf.csv')
erk_temp_path = Path('Resources/avg-temps-erk.csv')
ca_temp_path = Path('Resources/avg-temps-ca.csv')

In [None]:
# Read in CSVs without unnecessary rows
sf_temps_df = pd.read_csv(sf_temp_path, skiprows=3)
la_temps_df = pd.read_csv(la_temp_path, skiprows=3)
sd_temps_df = pd.read_csv(sd_temp_path, skiprows=3)
sac_temps_df = pd.read_csv(sac_temp_path, skiprows=3)
bf_temps_df = pd.read_csv(bf_temp_path, skiprows=3)
erk_temps_df = pd.read_csv(erk_temp_path, skiprows=3)
ca_temps_df = pd.read_csv(ca_temp_path, skiprows=3)

In [None]:
# Create list of all city dfs
cities_temps = [sf_temps_df, la_temps_df, sd_temps_df, sac_temps_df, bf_temps_df, erk_temps_df, ca_temps_df]

In [None]:
# Convert date to datetime format and split year and month into two columns, then drop 'Date' column
for city in cities_temps:
    city['date'] = pd.to_datetime(city['Date'], format='%Y%m')
    city['month'] = city['date'].dt.month
    city['year'] = city['date'].dt.year
    city = city.drop('Date', axis=1, inplace=True)

In [None]:
# Drop 'date' column in each df
for city in cities_temps:
    city = city.drop('date', axis=1, inplace=True)

In [None]:
# Rename each 'Value' column to 'temperature'
sf_temps_df = sf_temps_df.rename(columns={'Value': 'temperature'})
la_temps_df = la_temps_df.rename(columns={'Value': 'temperature'})     
sd_temps_df = sd_temps_df.rename(columns={'Value': 'temperature'})     
sac_temps_df = sac_temps_df.rename(columns={'Value': 'temperature'})     
bf_temps_df = bf_temps_df.rename(columns={'Value': 'temperature'})     
erk_temps_df = erk_temps_df.rename(columns={'Value': 'temperature'}) 
ca_temps_df = ca_temps_df.rename(columns={'Value': 'temperature'})  

In [None]:
# Reorder each df
sf_temps_df = sf_temps_df[['year', 'month', 'temperature']]
la_temps_df = la_temps_df[['year', 'month', 'temperature']]
sd_temps_df = sd_temps_df[['year', 'month', 'temperature']]
sac_temps_df = sac_temps_df[['year', 'month', 'temperature']]
bf_temps_df = bf_temps_df[['year', 'month', 'temperature']]
erk_temps_df = erk_temps_df[['year', 'month', 'temperature']]
ca_temps_df = ca_temps_df[['year', 'month', 'temperature']]

In [None]:
# Check each df
print(sf_temps_df)
print(la_temps_df)
print(sd_temps_df)
print(sac_temps_df)
print(bf_temps_df)
print(erk_temps_df)
print(ca_temps_df)