In [2]:
# Dependencies
import pandas as pd
import geopandas as gpd
from pathlib import Path

In [3]:
# Wildfire Data
# Name of the geojson file
file_js = Path('Resources/California_Fire_Perimete.geojson')

In [4]:
# Step 1: Load Wildfire GeoJSON Data
wildfire_gdf = gpd.read_file(file_js)

In [5]:
# Display the first few rows to understand the structure
print(wildfire_gdf.head())

   OBJECTID   YEAR_ STATE AGENCY UNIT_ID  FIRE_NAME   INC_NUM  \
0         1  2023.0    CA    CDF     SKU  WHITWORTH  00004808   
1         2  2023.0    CA    LRA     BTU     KAISER  00010225   
2         3  2023.0    CA    CDF     AEU    JACKSON  00017640   
3         4  2023.0    CA    CDF     AEU     CARBON  00018821   
4         5  2023.0    CA    CDF     AEU    LIBERTY  00018876   

                      ALARM_DATE                      CONT_DATE  CAUSE  \
0  Sat, 17 Jun 2023 00:00:00 GMT  Sat, 17 Jun 2023 00:00:00 GMT    5.0   
1  Fri, 02 Jun 2023 00:00:00 GMT  Fri, 02 Jun 2023 00:00:00 GMT    5.0   
2  Sat, 01 Jul 2023 00:00:00 GMT  Sun, 02 Jul 2023 00:00:00 GMT    2.0   
3  Tue, 11 Jul 2023 00:00:00 GMT  Tue, 11 Jul 2023 00:00:00 GMT    9.0   
4  Tue, 11 Jul 2023 00:00:00 GMT  Wed, 12 Jul 2023 00:00:00 GMT   14.0   

   C_METHOD  OBJECTIVE  GIS_ACRES COMMENTS COMPLEX_NAME  \
0       1.0        1.0   5.729125     None         None   
1       1.0        1.0  13.602380     None    

In [6]:
# Extract column names
column_names = wildfire_gdf.columns
print("Column Names:", column_names)

Column Names: Index(['OBJECTID', 'YEAR_', 'STATE', 'AGENCY', 'UNIT_ID', 'FIRE_NAME',
       'INC_NUM', 'ALARM_DATE', 'CONT_DATE', 'CAUSE', 'C_METHOD', 'OBJECTIVE',
       'GIS_ACRES', 'COMMENTS', 'COMPLEX_NAME', 'IRWINID', 'FIRE_NUM',
       'COMPLEX_ID', 'DECADES', 'geometry'],
      dtype='object')


In [7]:
# Check the minimum and maximum year in the dataset
min_year = wildfire_gdf['YEAR_'].min()
max_year = wildfire_gdf['YEAR_'].max()

print(f"Year Range: {min_year} to {max_year}")

Year Range: 1878.0 to 2023.0


In [10]:
# Step 2: Process Wildfire Data - Change column names
# Rename specific columns
wildfire_gdf = wildfire_gdf.rename(columns={
    'OBJECTID': 'ID',
    'YEAR_': 'Year',
    'STATE': 'State',
    'AGENCY': 'Agency',
    'UNIT_ID': 'Unit ID',
    'FIRE_NAME': 'Fire Name',
    'INC_NUM': 'Incident Number',
    'ALARM_DATE': 'Alarm Date',
    'CONT_DATE': 'Containment Date',
    'CAUSE': 'Cause',
    'C_METHOD': 'Collection Method',
    'OBJECTIVE': 'Management Objective',
    'GIS_ACRES': 'GIS Acres',
    'COMMENTS': 'Comments', 
    'COMPLEX_NAME': 'Complex Name',
    'IRWINID': 'IRWIN ID',
    'FIRE_NUM': 'Fire Number',
    'COMPLEX_ID': 'Complex ID',
    'DECADES':'Decades', 
    'geometry': 'Geometry'
})

In [11]:
#Confirm column name changes
column_names = wildfire_gdf.columns
print("Column Names:", column_names)

Column Names: Index(['ID', 'Year', 'State', 'Agency', 'Unit ID', 'Fire Name',
       'Incident Number', 'Alarm Date', 'Containment Date', 'Cause',
       'Collection Method', 'Management Objective', 'GIS Acres', 'Comments',
       'Complex Name', 'IRWIN ID', 'Fire Number', 'Complex ID', 'Decades',
       'Geometry'],
      dtype='object')


In [12]:
#Keep only a subset of columns for analysis
wildfire_gdf = wildfire_gdf[['ID', 'Year', 'State', 'Agency', 'Unit ID', 'Fire Name',
    'Incident Number', 'Alarm Date', 'Containment Date', 'Cause', 'GIS Acres', 
    'Comments','Complex Name', 'Fire Number', 'Decades','Geometry']]

In [16]:
# Rainfall Data
# Name of the rainfall csv file
file_rain= Path('Resources/ca_rainfall.csv')

In [17]:
# The correct encoding must be used to read the CSV in pandas
df_rain = pd.read_csv(file_rain)

In [18]:
# Preview of the rain fall dataFrame
df_rain.head()

Unnamed: 0,Year,Precipitation (inches)
0,1896,26.42
1,1897,27.07
2,1898,17.29
3,1899,20.19
4,1900,24.48
