In [104]:
import geopandas as gpd

# Read the fire data
gdf = gpd.read_file("nfdb/NFDB_point_20250519.shp")

# read in the historical fire management zones
zones = gpd.read_file("historical_fire_zones\Historical_Fire_Management_Zones.shp")

# Ensure both GeoDataFrames use the same CRS
gdf = gdf.to_crs(zones.crs)

# join the two GeoDataFrames
data = gpd.sjoin(gdf, zones, how="inner", predicate="within")

In [105]:
# drop the data in FMZ_DESIGN == 'Parks Zone'
data = data[data.FMZ_DESIGN != "Parks Zone"]

data.NFDBFIREID.duplicated().sum()

# show me the rows where NFDBFIREID is duplicated
duplicates = data[data.NFDBFIREID.duplicated(keep=False)]

# remove the duplicates
data = data[~data.NFDBFIREID.duplicated(keep="first")]
#

In [112]:
data.to_csv('test.csv')

In [113]:
# select only rows with CAUSE = N or U

data = data[(data.CAUSE == "N") | (data.CAUSE == "U")]

In [114]:
# drop rows where the REP_DATE is null
joined = data[data.REP_DATE.notnull()]

# make a new column for whether the SIZE_HA is greater than 100\
joined["SIZE_HA_100"] = joined["SIZE_HA"].apply(lambda x: True if x > 100 else False)

# remove the rows with YEAR before 1975
joined = joined[joined.YEAR >= 1976]

# make dictionary to change values of FMZ_DESIGN
fmz_dict = {
    "Hudson Bay Zone": "Extensive",
    "Great Lakes/St. Lawrence Zone": "Intensive Measured",
    "Boreal Zone": "Intensive Measured",
    "Northern Boreal Zone": "Intensive Measured"
}

# make a new column in the joined GeoDataFrame
joined["FMZ_ZONE"] = joined["FMZ_DESIGN"].map(fmz_dict)

season_dict = {1: "Winter",
               2: "Winter",
               3: "Spring",
                4: "Spring",
                5: "Spring",
                6: "Summer",
                7: "Summer",
                8: "Summer",
                9: "Fall",
                10: "Fall",
                11: "Fall",
                12: "Winter"}

# make a new column for the season based on the column called "MONTH"
joined["SEASON"] = joined["MONTH"].map(season_dict)


# make a new column based on the column year which is whether it is after 2006 or not
def after_2005(year):
    if year > 2005:
        return "After 2005"
    else:
        return "Before 2005"

joined["AFTER_2005"] = joined["YEAR"].apply(after_2006)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [115]:
# select the columns that we want to keep

joined = joined[['YEAR', 'REP_DATE', 'SIZE_HA', 'FMZ_ZONE', 'SEASON', 'AFTER_2005', 'SIZE_HA_100']]

joined

Unnamed: 0,YEAR,REP_DATE,SIZE_HA,FMZ_ZONE,SEASON,AFTER_2005,SIZE_HA_100
1514,2024,2024-09-16,2.477798,Extensive,Fall,After 2006,False
11306,2024,2024-06-25,247.000000,Extensive,Summer,After 2006,True
11309,2024,2024-07-13,22.000000,Extensive,Summer,After 2006,False
11310,2024,2024-09-11,148.000000,Extensive,Fall,After 2006,True
11311,2024,2024-09-16,0.700000,Intensive Measured,Fall,After 2006,False
...,...,...,...,...,...,...,...
439305,2019,2019-07-10,0.400000,Intensive Measured,Summer,After 2006,False
439311,2019,2019-07-24,0.500000,Intensive Measured,Summer,After 2006,False
439313,2019,2019-07-25,0.500000,Intensive Measured,Summer,After 2006,False
439314,2019,2019-08-03,0.100000,Intensive Measured,Summer,After 2006,False


In [118]:
import pandas as pd
# make a table for the total area burned by year with another column as the number of fires and the number of fires greater than 100 ha
joined_grouped = joined.groupby(['YEAR']).agg(
    {'SIZE_HA': 'sum'}).reset_index()

# now make the same columns by for the fires in the Intensive Measured zone
joined_grouped_intensive = joined[joined.FMZ_ZONE == "Intensive Measured"].groupby(['YEAR']).agg(
    {'REP_DATE':'count', 'SIZE_HA_100': 'sum'}).reset_index()

# rename the columns
joined_grouped_intensive = joined_grouped_intensive.rename(
    columns={'REP_DATE': 'Number of Fires in the IM Zones', 'SIZE_HA_100': 'Number of Large Fires in the IM Zones'})

# do the same for the Extensive zone
joined_grouped_extensive = joined[joined.FMZ_ZONE == "Extensive"].groupby(['YEAR']).agg(
    {'REP_DATE':'count', 'SIZE_HA_100': 'sum'}).reset_index()

# rename the columns
joined_grouped_extensive = joined_grouped_extensive.rename(
    columns={'REP_DATE': 'Number of Fires in the Ext Zones', 'SIZE_HA_100': 'Number of Large Fires in the Ext Zones'})

# now merge the two dataframes
joined_grouped = pd.merge(joined_grouped, joined_grouped_intensive, on='YEAR', how='left')

# now merge the two dataframes

joined_grouped = pd.merge(joined_grouped, joined_grouped_extensive, on='YEAR', how='left')


In [123]:
# I want to fit a weibull distribution to the data by the before or after 2005, and also the season

# make a functoin that fit the weibull distribution to the data

import numpy as np
from scipy.stats import weibull_min

def fit_weibull(data, col):
    # fit the weibull distribution to the data
    params = weibull_min.fit(data[col], floc=0)
    # get the shape, scale and location parameters
    shape, loc, scale = params
    return shape, scale
# get only the data with HA_100 == True

joined_100 = joined[joined.SIZE_HA_100 == True]

# now group the data by the BEFORE_2005 and SEASON columns
joined_grouped_weibull = joined_100.groupby(['AFTER_2005', 'SEASON', 'FMZ_ZONE'])

# apply the fit_weibull function to each group
weibull_params = joined_grouped_weibull.apply(lambda x: fit_weibull(x, 'SIZE_HA')).reset_index()

weibull_params

  weibull_params = joined_grouped_weibull.apply(lambda x: fit_weibull(x, 'SIZE_HA')).reset_index()


Unnamed: 0,AFTER_2005,SEASON,FMZ_ZONE,0
0,After 2006,Fall,Extensive,"(1.036474636793074, 884.4855418638231)"
1,After 2006,Fall,Intensive Measured,"(0.7001139651543715, 870.3779407837735)"
2,After 2006,Spring,Extensive,"(0.45436451040525005, 3684.471332362743)"
3,After 2006,Spring,Intensive Measured,"(0.6572399897172694, 1435.4785522533834)"
4,After 2006,Summer,Extensive,"(0.5869645570366842, 1818.4538900965576)"
5,After 2006,Summer,Intensive Measured,"(0.5229476938023473, 2503.612258309916)"
6,Before 2006,Fall,Extensive,"(0.7083396929283436, 1164.9329143736966)"
7,Before 2006,Fall,Intensive Measured,"(0.6458415597860223, 1272.0697623560854)"
8,Before 2006,Spring,Extensive,"(0.6052468482638573, 5731.67076906436)"
9,Before 2006,Spring,Intensive Measured,"(0.5152018146827372, 3191.610252290389)"


In [119]:
joined

Unnamed: 0,YEAR,REP_DATE,SIZE_HA,FMZ_ZONE,SEASON,AFTER_2005,SIZE_HA_100
1514,2024,2024-09-16,2.477798,Extensive,Fall,After 2006,False
11306,2024,2024-06-25,247.000000,Extensive,Summer,After 2006,True
11309,2024,2024-07-13,22.000000,Extensive,Summer,After 2006,False
11310,2024,2024-09-11,148.000000,Extensive,Fall,After 2006,True
11311,2024,2024-09-16,0.700000,Intensive Measured,Fall,After 2006,False
...,...,...,...,...,...,...,...
439305,2019,2019-07-10,0.400000,Intensive Measured,Summer,After 2006,False
439311,2019,2019-07-24,0.500000,Intensive Measured,Summer,After 2006,False
439313,2019,2019-07-25,0.500000,Intensive Measured,Summer,After 2006,False
439314,2019,2019-08-03,0.100000,Intensive Measured,Summer,After 2006,False
