# Wildfire Prediction

### Get the dataset

In [14]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt 

In [6]:

wildfires = pd.read_csv("modis_2021_Canada.csv")

wildfires.head()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,54.8753,-117.4533,321.9,1.1,1.0,2021-01-01,522,Terra,MODIS,91,6.03,266.1,0.0,N,0
1,54.2008,-118.9019,313.9,1.2,1.1,2021-01-01,522,Terra,MODIS,88,6.03,267.7,23.4,N,0
2,55.3087,-111.709,313.8,2.4,1.5,2021-01-02,1934,Terra,MODIS,0,6.03,265.3,67.6,D,0
3,54.4509,-107.1336,305.0,4.0,1.9,2021-01-02,1934,Terra,MODIS,34,6.03,265.1,94.0,D,0
4,54.4552,-107.1247,322.5,4.0,1.9,2021-01-02,1934,Terra,MODIS,0,6.03,265.8,197.2,D,0


In [11]:
wildfires.isnull().sum()

latitude      0
longitude     0
brightness    0
scan          0
track         0
acq_date      0
acq_time      0
satellite     0
instrument    0
confidence    0
version       0
bright_t31    0
frp           0
daynight      0
type          0
dtype: int64

In [15]:
wildfires.describe()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_time,confidence,version,bright_t31,frp,type
count,97100.0,97100.0,97100.0,97100.0,97100.0,97100.0,97100.0,97100.0,97100.0,97100.0,97100.0
mean,53.557203,-109.326503,333.503459,1.670557,1.229398,1404.042894,67.413913,6.03,294.31491,108.334157,0.004985
std,3.968219,12.862361,28.765267,0.866015,0.260027,668.91279,30.852951,0.0,9.833665,281.002078,0.099721
min,42.047,-140.9847,300.0,1.0,1.0,141.0,0.0,6.03,264.4,0.0,0.0
25%,50.8556,-120.8718,314.0,1.1,1.0,607.0,49.0,6.03,289.0,17.7,0.0
50%,52.2603,-105.45585,326.0,1.3,1.1,1812.0,75.0,6.03,294.5,37.8,0.0
75%,55.465825,-96.46295,344.1,1.9,1.4,1943.0,95.0,6.03,300.0,91.4,0.0
max,68.4008,-54.8199,507.9,4.8,2.0,2311.0,100.0,6.03,400.1,12808.4,2.0


### Remove 'instrument' and 'version' from the dataset as they arent useful

In [20]:
wildfires.drop('instrument', axis=1, inplace=True)
wildfires.drop('version', axis=1, inplace=True)
wildfires.head()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,confidence,bright_t31,frp,daynight,type
0,54.8753,-117.4533,321.9,1.1,1.0,2021-01-01,522,Terra,91,266.1,0.0,N,0
1,54.2008,-118.9019,313.9,1.2,1.1,2021-01-01,522,Terra,88,267.7,23.4,N,0
2,55.3087,-111.709,313.8,2.4,1.5,2021-01-02,1934,Terra,0,265.3,67.6,D,0
3,54.4509,-107.1336,305.0,4.0,1.9,2021-01-02,1934,Terra,34,265.1,94.0,D,0
4,54.4552,-107.1247,322.5,4.0,1.9,2021-01-02,1934,Terra,0,265.8,197.2,D,0


### Change satellite and daynight values to integers

In [31]:
wildfires['satellite'] = wildfires['satellite'].astype('category').cat.codes
wildfires['daynight'] = wildfires['daynight'].astype('category').cat.codes
wildfires['month'] = wildfires['acq_date'].apply(lambda x:int(x.split('-')[1]))
wildfires.drop('acq_date', axis=1, inplace=True)
wildfires.head()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_time,satellite,confidence,bright_t31,frp,daynight,type,month
0,54.8753,-117.4533,321.9,1.1,1.0,522,1,91,266.1,0.0,1,0,1
1,54.2008,-118.9019,313.9,1.2,1.1,522,1,88,267.7,23.4,1,0,1
2,55.3087,-111.709,313.8,2.4,1.5,1934,1,0,265.3,67.6,0,0,1
3,54.4509,-107.1336,305.0,4.0,1.9,1934,1,34,265.1,94.0,0,0,1
4,54.4552,-107.1247,322.5,4.0,1.9,1934,1,0,265.8,197.2,0,0,1


In [32]:
wildfires.corr()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_time,satellite,confidence,bright_t31,frp,daynight,type,month
latitude,1.0,-0.307643,0.032445,0.035222,0.038626,0.082573,0.036753,-0.052645,-0.0187,0.019916,-0.087393,-0.108377,0.073
longitude,-0.307643,1.0,0.006891,-0.027327,-0.028239,-0.048188,-0.033966,-0.150344,0.102708,-0.026073,-0.067138,0.093504,-0.135966
brightness,0.032445,0.006891,1.0,-0.066879,-0.065368,0.228201,-0.088433,0.397022,0.600865,0.653885,-0.235988,-0.042871,-0.08073
scan,0.035222,-0.027327,-0.066879,1.0,0.983855,0.077492,-0.019243,-0.094234,-0.111862,0.231463,-0.085997,-0.018168,-0.045187
track,0.038626,-0.028239,-0.065368,0.983855,1.0,0.083708,-0.017634,-0.096394,-0.110528,0.228622,-0.092989,-0.018758,-0.045479
acq_time,0.082573,-0.048188,0.228201,0.077492,0.083708,1.0,-0.459672,-0.153145,0.339904,0.104029,-0.955178,-0.006437,0.010789
satellite,0.036753,-0.033966,-0.088433,-0.019243,-0.017634,-0.459672,1.0,0.051623,-0.128841,-0.046525,0.28391,0.004633,-0.009546
confidence,-0.052645,-0.150344,0.397022,-0.094234,-0.096394,-0.153145,0.051623,1.0,0.147495,0.17158,0.172209,-0.040122,-0.000388
bright_t31,-0.0187,0.102708,0.600865,-0.111862,-0.110528,0.339904,-0.128841,0.147495,1.0,0.340096,-0.35916,-0.015062,-0.223897
frp,0.019916,-0.026073,0.653885,0.231463,0.228622,0.104029,-0.046525,0.17158,0.340096,1.0,-0.105253,-0.016887,-0.049749


### Prepare the dataset

### Do stuff