In [4]:
import pandas as pd

This data publication contains a spatial database of wildfires that occurred in the United States from 1992 to 2015. It is the third update of a publication originally generated to support the national Fire Program Analysis (FPA) system. The wildfire records were acquired from the reporting systems of federal, state, and local fire organizations. The following core data elements were required for records to be included in this data publication: discovery date, final fire size, and a point location at least as precise as Public Land Survey System (PLSS) section (1-square mile grid). The data were transformed to conform, when possible, to the data standards of the National Wildfire Coordinating Group (NWCG). Basic error-checking was performed and redundant records were identified and removed, to the degree possible. The resulting product, referred to as the Fire Program Analysis fire-occurrence database (FPA FOD), includes 1.88 million geo-referenced wildfire records, representing a total of 140 million acres burned during the 24-year period.

These data were collected using funding from the U.S. Government and can be used without additional permissions or fees. If you use these data in a publication, presentation, or other research product please use the following citation:

Short, Karen C. 2017. Spatial wildfire occurrence data for the United States, 1992-2015 [FPAFOD20170508]. 4th Edition. Fort Collins, CO: Forest Service Research Data Archive. https://doi.org/10.2737/RDS-2013-0009.4

In [5]:
df = pd.read_csv("fire_data.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [6]:
df.head()

Unnamed: 0,OBJECTID,FOD_ID,FPA_ID,SOURCE_SYSTEM_TYPE,SOURCE_SYSTEM,NWCG_REPORTING_AGENCY,NWCG_REPORTING_UNIT_ID,NWCG_REPORTING_UNIT_NAME,SOURCE_REPORTING_UNIT,SOURCE_REPORTING_UNIT_NAME,...,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,OWNER_CODE,OWNER_DESCR,STATE,COUNTY,FIPS_CODE,FIPS_NAME,Shape
0,1,1,FS-1418826,FED,FS-FIRESTAT,FS,USCAPNF,Plumas National Forest,511,Plumas National Forest,...,A,40.036944,-121.005833,5.0,USFS,CA,63,63.0,Plumas,
1,2,2,FS-1418827,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,503,Eldorado National Forest,...,A,38.933056,-120.404444,5.0,USFS,CA,61,61.0,Placer,
2,3,3,FS-1418835,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,503,Eldorado National Forest,...,A,38.984167,-120.735556,13.0,STATE OR PRIVATE,CA,17,17.0,El Dorado,
3,4,4,FS-1418845,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,503,Eldorado National Forest,...,A,38.559167,-119.913333,5.0,USFS,CA,3,3.0,Alpine,
4,5,5,FS-1418847,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,503,Eldorado National Forest,...,A,38.559167,-119.933056,5.0,USFS,CA,3,3.0,Alpine,


In [4]:
df.columns

Index(['OBJECTID', 'FOD_ID', 'FPA_ID', 'SOURCE_SYSTEM_TYPE', 'SOURCE_SYSTEM',
       'NWCG_REPORTING_AGENCY', 'NWCG_REPORTING_UNIT_ID',
       'NWCG_REPORTING_UNIT_NAME', 'SOURCE_REPORTING_UNIT',
       'SOURCE_REPORTING_UNIT_NAME', 'LOCAL_FIRE_REPORT_ID',
       'LOCAL_INCIDENT_ID', 'FIRE_CODE', 'FIRE_NAME',
       'ICS_209_INCIDENT_NUMBER', 'ICS_209_NAME', 'MTBS_ID', 'MTBS_FIRE_NAME',
       'COMPLEX_NAME', 'FIRE_YEAR', 'DISCOVERY_DATE', 'DISCOVERY_DOY',
       'DISCOVERY_TIME', 'STAT_CAUSE_CODE', 'STAT_CAUSE_DESCR', 'CONT_DATE',
       'CONT_DOY', 'CONT_TIME', 'FIRE_SIZE', 'FIRE_SIZE_CLASS', 'LATITUDE',
       'LONGITUDE', 'OWNER_CODE', 'OWNER_DESCR', 'STATE', 'COUNTY',
       'FIPS_CODE', 'FIPS_NAME', 'Shape'],
      dtype='object')

LATITUDE = Latitude (NAD83) for point location of the fire (decimal degrees).

LONGITUDE = Longitude (NAD83) for point location of the fire (decimal degrees).

FIRE_YEAR = Calendar year in which the fire was discovered or confirmed to exist.

DISCOVERY_DATE = Date on which the fire was discovered or confirmed to exist.

DISCOVERY_DOY = Day of year on which the fire was discovered or confirmed to exist.

DISCOVERY_TIME = Time of day that the fire was discovered or confirmed to exist.

STATCAUSECODE = Code for the (statistical) cause of the fire.

STATE = Two-letter alphabetic code for the state in which the fire burned (or originated), based on the nominal designation in the fire report.

STATCAUSEDESCR = Description of the (statistical) cause of the fire.

FIRESIZECLASS = Code for fire size based on the number of acres within the final fire perimeter expenditures (A=greater than 0 but less than or equal to 0.25 acres, B=0.26-9.9 acres, C=10.0-99.9 acres, D=100-299 acres, E=300 to 999 acres, F=1000 to 4999 acres, and G=5000+ acres).

FIPS_CODE = Three-digit code from the Federal Information Process Standards (FIPS) publication 6-4 for representation of counties and equivalent entities.

FIPS_NAME = County name from the FIPS publication 6-4 for representation of counties and equivalent entities.

SOURCEREPORTINGUNIT = Code for the agency unit preparing the fire report, based on code/name in the source dataset.

SOURCEREPORTINGUNIT_NAME = Name of reporting agency unit preparing the fire report, based on code/name in the source dataset.

In [15]:
fire = df[['LATITUDE','LONGITUDE','FIRE_YEAR', 'DISCOVERY_DATE', 'DISCOVERY_DOY','DISCOVERY_TIME', 'STATE', 'STAT_CAUSE_CODE', 'STAT_CAUSE_DESCR',
           'FIRE_SIZE_CLASS', 'FIPS_CODE',  'FIPS_NAME', 'SOURCE_REPORTING_UNIT', 'SOURCE_REPORTING_UNIT_NAME']]

In [16]:
fire.head()

Unnamed: 0,LATITUDE,LONGITUDE,FIRE_YEAR,DISCOVERY_DATE,DISCOVERY_DOY,DISCOVERY_TIME,STATE,STAT_CAUSE_CODE,STAT_CAUSE_DESCR,FIRE_SIZE_CLASS,FIPS_CODE,FIPS_NAME,SOURCE_REPORTING_UNIT,SOURCE_REPORTING_UNIT_NAME
0,40.036944,-121.005833,2005,2453403.5,33,1300.0,CA,9.0,Miscellaneous,A,63.0,Plumas,511,Plumas National Forest
1,38.933056,-120.404444,2004,2453137.5,133,845.0,CA,1.0,Lightning,A,61.0,Placer,503,Eldorado National Forest
2,38.984167,-120.735556,2004,2453156.5,152,1921.0,CA,5.0,Debris Burning,A,17.0,El Dorado,503,Eldorado National Forest
3,38.559167,-119.913333,2004,2453184.5,180,1600.0,CA,1.0,Lightning,A,3.0,Alpine,503,Eldorado National Forest
4,38.559167,-119.933056,2004,2453184.5,180,1600.0,CA,1.0,Lightning,A,3.0,Alpine,503,Eldorado National Forest


In [17]:
fire.to_csv('fire.csv')

In [21]:
fire.size

26326510

In [9]:
fire['STATE'].unique()

array(['CA', 'NM', 'OR', 'NC', 'WY', 'CO', 'WA', 'MT', 'UT', 'AZ', 'SD',
       'AR', 'NV', 'ID', 'MN', 'TX', 'FL', 'SC', 'LA', 'OK', 'KS', 'MO',
       'NE', 'MI', 'KY', 'OH', 'IN', 'VA', 'IL', 'TN', 'GA', 'AK', 'ND',
       'WV', 'WI', 'AL', 'NH', 'PA', 'MS', 'ME', 'VT', 'NY', 'IA', 'DC',
       'MD', 'CT', 'MA', 'NJ', 'HI', 'DE', 'PR', 'RI'], dtype=object)

In [10]:
CA_fire = fire.copy()

In [11]:
CA_fire.head()

Unnamed: 0,LATITUDE,LONGITUDE,FIRE_YEAR,DISCOVERY_DATE,DISCOVERY_DOY,DISCOVERY_TIME,STAT_CAUSE_CODE,FIRE_SIZE_CLASS,STATE
0,40.036944,-121.005833,2005,2453403.5,33,1300.0,9.0,A,CA
1,38.933056,-120.404444,2004,2453137.5,133,845.0,1.0,A,CA
2,38.984167,-120.735556,2004,2453156.5,152,1921.0,5.0,A,CA
3,38.559167,-119.913333,2004,2453184.5,180,1600.0,1.0,A,CA
4,38.559167,-119.933056,2004,2453184.5,180,1600.0,1.0,A,CA


In [12]:
CA_fire = CA_fire.loc[CA_fire['STATE'] == 'CA']

In [13]:
CA_fire.head()

Unnamed: 0,LATITUDE,LONGITUDE,FIRE_YEAR,DISCOVERY_DATE,DISCOVERY_DOY,DISCOVERY_TIME,STAT_CAUSE_CODE,FIRE_SIZE_CLASS,STATE
0,40.036944,-121.005833,2005,2453403.5,33,1300.0,9.0,A,CA
1,38.933056,-120.404444,2004,2453137.5,133,845.0,1.0,A,CA
2,38.984167,-120.735556,2004,2453156.5,152,1921.0,5.0,A,CA
3,38.559167,-119.913333,2004,2453184.5,180,1600.0,1.0,A,CA
4,38.559167,-119.933056,2004,2453184.5,180,1600.0,1.0,A,CA


In [14]:
CA_fire['STATE'].unique()

array(['CA'], dtype=object)

In [15]:
CA_fire['FIRE_SIZE_CLASS'].unique()

array(['A', 'B', 'G', 'C', 'F', 'D', 'E'], dtype=object)

In [23]:
CA_fire = pd.get_dummies(CA_fire)

In [24]:
CA_fire = CA_fire.drop(['STATE_CA'], axis=1)

In [25]:
CA_fire.head()

Unnamed: 0,LATITUDE,LONGITUDE,FIRE_YEAR,DISCOVERY_DATE,DISCOVERY_DOY,DISCOVERY_TIME,STAT_CAUSE_CODE,FIRE_SIZE_CLASS_A,FIRE_SIZE_CLASS_B,FIRE_SIZE_CLASS_C,FIRE_SIZE_CLASS_D,FIRE_SIZE_CLASS_E,FIRE_SIZE_CLASS_F,FIRE_SIZE_CLASS_G
0,40.036944,-121.005833,2005,2453403.5,33,1300.0,9.0,1,0,0,0,0,0,0
1,38.933056,-120.404444,2004,2453137.5,133,845.0,1.0,1,0,0,0,0,0,0
2,38.984167,-120.735556,2004,2453156.5,152,1921.0,5.0,1,0,0,0,0,0,0
3,38.559167,-119.913333,2004,2453184.5,180,1600.0,1.0,1,0,0,0,0,0,0
4,38.559167,-119.933056,2004,2453184.5,180,1600.0,1.0,1,0,0,0,0,0,0


In [28]:
CA_fire.columns

Index(['LATITUDE', 'LONGITUDE', 'FIRE_YEAR', 'DISCOVERY_DATE', 'DISCOVERY_DOY',
       'DISCOVERY_TIME', 'STAT_CAUSE_CODE', 'FIRE_SIZE_CLASS_A',
       'FIRE_SIZE_CLASS_B', 'FIRE_SIZE_CLASS_C', 'FIRE_SIZE_CLASS_D',
       'FIRE_SIZE_CLASS_E', 'FIRE_SIZE_CLASS_F', 'FIRE_SIZE_CLASS_G'],
      dtype='object')

In [36]:
new_df = pd.read_csv("new_fire.csv")

In [37]:
new_df.size

18804650