In [1]:
# Dependencies
import pandas as pd
import geopandas as gpd
from pathlib import Path

In [2]:
# Wildfire Data
# Name of the geojson file
file_js = Path('Resources/California_Fire_Perimete.geojson')

In [3]:
# Step 1: Load Wildfire GeoJSON Data
wildfire_gdf = gpd.read_file(file_js)

DriverError: Resources\California_Fire_Perimete.geojson: No such file or directory

In [None]:
# Display the first few rows to understand the structure
print(wildfire_gdf.head())

In [None]:
# Extract column names
column_names = wildfire_gdf.columns
print("Column Names:", column_names)

In [None]:
# Check the minimum and maximum year in the dataset
min_year = wildfire_gdf['YEAR_'].min()
max_year = wildfire_gdf['YEAR_'].max()

print(f"Year Range: {min_year} to {max_year}")

In [None]:
# Step 2: Process Wildfire Data - Change column names
# Rename specific columns
wildfire_gdf = wildfire_gdf.rename(columns={
    'OBJECTID': 'ID',
    'YEAR_': 'Year',
    'STATE': 'State',
    'AGENCY': 'Agency',
    'UNIT_ID': 'Unit ID',
    'FIRE_NAME': 'Fire Name',
    'INC_NUM': 'Incident Number',
    'ALARM_DATE': 'Alarm Date',
    'CONT_DATE': 'Containment Date',
    'CAUSE': 'Cause',
    'C_METHOD': 'Collection Method',
    'OBJECTIVE': 'Management Objective',
    'GIS_ACRES': 'GIS Acres',
    'COMMENTS': 'Comments', 
    'COMPLEX_NAME': 'Complex Name',
    'IRWINID': 'IRWIN ID',
    'FIRE_NUM': 'Fire Number',
    'COMPLEX_ID': 'Complex ID',
    'DECADES':'Decades', 
    'geometry': 'Geometry'
})

In [None]:
#Confirm column name changes
column_names = wildfire_gdf.columns
print("Column Names:", column_names)

In [None]:
#Keep only a subset of columns for analysis
wildfire_gdf = wildfire_gdf[['ID', 'Year', 'State', 'Agency', 'Unit ID', 'Fire Name',
    'Incident Number', 'Alarm Date', 'Containment Date', 'Cause', 'GIS Acres', 
    'Comments','Complex Name', 'Fire Number', 'Decades','Geometry']]

In [None]:
# Rainfall Data
# Name of the rainfall csv file
file_rain= Path('Resources/ca_rainfall.csv')

In [None]:
# The correct encoding must be used to read the CSV in pandas
df_rain = pd.read_csv(file_rain)

In [None]:
# Preview of the rain fall dataFrame
df_rain.head()

In [4]:
# Average Temperature Data
sf_temp_path = Path('Resources/avg-temps-sf.csv')
la_temp_path = Path('Resources/avg-temps-la.csv')
sd_temp_path = Path('Resources/avg-temps-sd.csv')
sac_temp_path = Path('Resources/avg-temps-sac.csv')
bf_temp_path = Path('Resources/avg-temps-bf.csv')
erk_temp_path = Path('Resources/avg-temps-erk.csv')
ca_temp_path = Path('Resources/avg-temps-ca.csv')

In [5]:
# Read in CSVs without unnecessary rows
sf_temps_df = pd.read_csv(sf_temp_path, skiprows=3)
la_temps_df = pd.read_csv(la_temp_path, skiprows=3)
sd_temps_df = pd.read_csv(sd_temp_path, skiprows=3)
sac_temps_df = pd.read_csv(sac_temp_path, skiprows=3)
bf_temps_df = pd.read_csv(bf_temp_path, skiprows=3)
erk_temps_df = pd.read_csv(erk_temp_path, skiprows=3)
ca_temps_df = pd.read_csv(ca_temp_path, skiprows=3)

In [6]:
# Create list of all city dfs
cities_temps = [sf_temps_df, la_temps_df, sd_temps_df, sac_temps_df, bf_temps_df, erk_temps_df, ca_temps_df]

In [7]:
# Convert date to datetime format and split year and month into two columns, then drop 'Date' column
for city in cities_temps:
    city['date'] = pd.to_datetime(city['Date'], format='%Y%m')
    city['month'] = city['date'].dt.month
    city['year'] = city['date'].dt.year
    city = city.drop('Date', axis=1, inplace=True)

In [8]:
# Drop 'date' column in each df
for city in cities_temps:
    city = city.drop('date', axis=1, inplace=True)

In [10]:
# Rename each 'Value' column to 'temperature'
sf_temps_df = sf_temps_df.rename(columns={'Value': 'temperature'})
la_temps_df = la_temps_df.rename(columns={'Value': 'temperature'})     
sd_temps_df = sd_temps_df.rename(columns={'Value': 'temperature'})     
sac_temps_df = sac_temps_df.rename(columns={'Value': 'temperature'})     
bf_temps_df = bf_temps_df.rename(columns={'Value': 'temperature'})     
erk_temps_df = erk_temps_df.rename(columns={'Value': 'temperature'}) 
ca_temps_df = ca_temps_df.rename(columns={'Value': 'temperature'})        

In [11]:
# Reorder each df
sf_temps_df = sf_temps_df[['year', 'month', 'temperature']]
la_temps_df = la_temps_df[['year', 'month', 'temperature']]
sd_temps_df = sd_temps_df[['year', 'month', 'temperature']]
sac_temps_df = sac_temps_df[['year', 'month', 'temperature']]
bf_temps_df = bf_temps_df[['year', 'month', 'temperature']]
erk_temps_df = erk_temps_df[['year', 'month', 'temperature']]
ca_temps_df = ca_temps_df[['year', 'month', 'temperature']]

In [17]:
# Check each df
print(sf_temps_df)
print(la_temps_df)
print(sd_temps_df)
print(sac_temps_df)
print(bf_temps_df)
print(erk_temps_df)
print(ca_temps_df)

     year  month  temperature
0    1945      7         62.5
1    1945      8         61.8
2    1945      9         63.5
3    1945     10         60.3
4    1945     11         54.7
..    ...    ...          ...
944  2024      3         54.9
945  2024      4         57.2
946  2024      5         60.0
947  2024      6         62.7
948  2024      7         66.6

[949 rows x 3 columns]
     year  month  temperature
0    1945      1         53.3
1    1945      2         54.8
2    1945      3         52.9
3    1945      4         57.1
4    1945      5         60.2
..    ...    ...          ...
950  2024      3         57.4
951  2024      4         58.7
952  2024      5         61.4
953  2024      6         65.9
954  2024      7         68.3

[955 rows x 3 columns]
     year  month  temperature
0    1945      1         52.1
1    1945      2         53.7
2    1945      3         52.7
3    1945      4         55.2
4    1945      5         59.5
..    ...    ...          ...
950  2024      3      