# ========================================

# Import Dependencies

# ========================================

In [1]:
import os
import csv
# import math
import datetime
import numpy as np
import pandas as pd
# from datetime import datetime
# from datetime import timedelta
import matplotlib.pyplot as plt
# import time
# import requests
# import datefinder

# # FOR SQL LITE# ========================================
# from sqlalchemy import create_engine
# from datetime import date

# # FOR PLOTTING
# import matplotlib.pyplot as plt
# from matplotlib import style
# style.use("fivethirtyeight")
# from matplotlib import rcParams
# rcParams['figure.figsize'] = 10, 8

# ========================================

# Step 1 - Import Idaho Fire Data

# ========================================

## 1.1 File path for the Idaho Fire Data

In [41]:
# File path for the CSV File; where, ID = Idaho, frs = fires, pth = path

ID_frs_file_pth = os.path.join("Data", "fires_Idaho.csv")

## 1.2 List of Columns to Import from the Idaho Fire CSV

In [42]:
# List of columns to import

ID_frs_lst_Clmns = ["FOD_ID",
                    "FIRE_NAME",
                    "FIRE_SIZE", # Thousands of Acres Burned
                    "FIRE_SIZE_CLASS",
                    "FIRE_YEAR",
                    "DISCOVERY_DATE_CONVERTED",
                    "CONT_DATE_CONVERTED", # Containment Date
                    "LATITUDE",
                    "LONGITUDE",
                    "FIPS_CODE", # County Code
                    "FIPS_NAME", # County Name
                    "STAT_CAUSE_CODE", # Fire Start Cause
                    "STAT_CAUSE_DESCR", # Fire Start Description 
                    "OWNER_CODE", # Land Owner Code, Who's Land was Damaged
                    "OWNER_DESCR", # Land Owner Description, Who's Land was Damaged
                    "DISCOVERY_DOY",
                    "CONT_DOY"] 

## 1.3 Columns to Convert to a Datetime Data Type

In [43]:
parse_dates = ["DISCOVERY_DATE_CONVERTED",
               "CONT_DATE_CONVERTED"]

## 1.4 Import the Idaho Fire CSV

In [44]:
# Open the CSV Files, Convert to a Dataframe, and Save as a Variable, fires_Idaho_df
# Convert the Columne "DISCOVERY_DATE_CONVERTED" to a Datetime Datatype
# Reference: 
#     - Parsing Dates
#         - https://stackoverflow.com/questions/21269399/datetime-dtypes-in-pandas-read-csv
#     - Usecols
#         - https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html


ID_frs_df = pd.read_csv(ID_frs_file_pth, usecols = ID_frs_lst_Clmns, parse_dates=parse_dates)

## 1.5 Check the Columns and Data Types of the Imported File

In [45]:
# View the Data in the Dataframe to Make Sure it Imported Correctly
print(ID_frs_df.keys())
print(ID_frs_df.dtypes)
ID_frs_df.head()

Index(['FOD_ID', 'FIRE_NAME', 'FIRE_YEAR', 'DISCOVERY_DOY', 'STAT_CAUSE_CODE',
       'STAT_CAUSE_DESCR', 'CONT_DOY', 'FIRE_SIZE', 'FIRE_SIZE_CLASS',
       'LATITUDE', 'LONGITUDE', 'OWNER_CODE', 'OWNER_DESCR', 'FIPS_CODE',
       'FIPS_NAME', 'DISCOVERY_DATE_CONVERTED', 'CONT_DATE_CONVERTED'],
      dtype='object')
FOD_ID                               int64
FIRE_NAME                           object
FIRE_YEAR                            int64
DISCOVERY_DOY                        int64
STAT_CAUSE_CODE                    float64
STAT_CAUSE_DESCR                    object
CONT_DOY                             int64
FIRE_SIZE                          float64
FIRE_SIZE_CLASS                     object
LATITUDE                           float64
LONGITUDE                          float64
OWNER_CODE                         float64
OWNER_DESCR                         object
FIPS_CODE                            int64
FIPS_NAME                           object
DISCOVERY_DATE_CONVERTED    datetime6

Unnamed: 0,FOD_ID,FIRE_NAME,FIRE_YEAR,DISCOVERY_DOY,STAT_CAUSE_CODE,STAT_CAUSE_DESCR,CONT_DOY,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,OWNER_CODE,OWNER_DESCR,FIPS_CODE,FIPS_NAME,DISCOVERY_DATE_CONVERTED,CONT_DATE_CONVERTED
0,155,LOOP,2005,197,9.0,Miscellaneous,197,0.1,A,44.488611,-111.256111,5.0,USFS,43,Fremont,2005-07-16,2005-07-16
1,172,EAST MINK,2005,183,4.0,Campfire,183,0.1,A,42.736389,-112.384444,5.0,USFS,5,Bannock,2005-07-02,2005-07-02
2,176,INMAN,2005,185,4.0,Campfire,185,0.1,A,42.839722,-112.176667,5.0,USFS,5,Bannock,2005-07-04,2005-07-04
3,177,WHITEROCK,2005,185,4.0,Campfire,185,0.1,A,42.691389,-112.368611,5.0,USFS,5,Bannock,2005-07-04,2005-07-04
4,178,CLEAR CREEK,2005,188,4.0,Campfire,188,0.1,A,42.736389,-112.072222,5.0,USFS,5,Bannock,2005-07-07,2005-07-07


## 1.6 Check to Make Sure FOD_ID is a Unique Idenitfier

If the Column "FOD_ID" is EQUAL to the Length of the "ID_frs_df" then PRINT "FOD_ID is a Unique Idenitfier"

Else If the Column "FOD_ID" is NOT EQUAL to the Length of the "ID_frs_df" then PRINT "NO!!!!!!! FOD_ID is NOT Unique Idenitfier"

In [46]:
if len(ID_frs_df) == len(pd.DataFrame(ID_frs_df["FOD_ID"].unique())):
    print("FOD_ID is a Unique Idenitfier")
elif len(ID_frs_df) == len(pd.DataFrame(ID_frs_df["FOD_ID"].unique())):
    print("NO!!!!!!! FOD_ID is NOT Unique Idenitfier")

FOD_ID is a Unique Idenitfier


In [None]:
len(ID_frs_df)

# ========================================

# Step 2 - Sum the Number of Fires by Month and Year

# ========================================

## 2.1 - Rename the "DISCOVERY_DATE_CONVERTED" Column to "time"

In [None]:
# ID_frs_df = ID_frs_df.rename(columns = {"DISCOVERY_DATE_CONVERTED": "time"})
# print(ID_frs_df.dtypes)
# ID_frs_df.head()

## 2.2 - Check the Data

### 2.2.1 - Check to see if how many rows for each month are in the ID_frs_df

In [47]:
# https://stackoverflow.com/questions/25873772/how-to-filter-a-dataframe-of-dates-by-a-particular-month-day

ID_frs_df_fltrd_nmbr_mnth = ID_frs_df[ID_frs_df["DISCOVERY_DATE_CONVERTED"].dt.month == 7]
ID_frs_df_fltrd_nmbr_mnth = ID_frs_df_fltrd_nmbr_mnth[ID_frs_df_fltrd_nmbr_mnth["DISCOVERY_DATE_CONVERTED"].dt.year == 2015]
print(f"Number of columns for 7/2015: {len(ID_frs_df_fltrd_nmbr_mnth)}")

Number of columns for 7/2015: 247


### 2.2.2 - Check to see if how many rows for each year are in the ID_frs_df

In [48]:
# https://stackoverflow.com/questions/25873772/how-to-filter-a-dataframe-of-dates-by-a-particular-month-day

ID_frs_df_fltrd_nmbr_yr= ID_frs_df[ID_frs_df["DISCOVERY_DATE_CONVERTED"].dt.year == 2006]
print(f"Number of columns for 2015: {len(ID_frs_df_fltrd_nmbr_yr)}")

Number of columns for 2015: 1437


## 2.3 - Group the Data and Rename a Column

### 2.3.1 - Groupby the Month and Count the Number of Fires (Rows) in Each Month

In [49]:
# https://stackoverflow.com/questions/38792122/how-to-group-and-count-rows-by-month-and-year-using-pandas
# https://stackoverflow.com/questions/19384532/get-statistics-for-each-group-such-as-count-mean-etc-using-pandas-groupby

ID_frs_df_mnthly = pd.DataFrame(ID_frs_df['DISCOVERY_DATE_CONVERTED'].groupby(ID_frs_df.DISCOVERY_DATE_CONVERTED.dt.to_period("M")).agg('count').reset_index(name='numb_fires'))

print(ID_frs_df_mnthly.dtypes)
ID_frs_df_mnthly

DISCOVERY_DATE_CONVERTED    period[M]
numb_fires                      int64
dtype: object


Unnamed: 0,DISCOVERY_DATE_CONVERTED,numb_fires
0,1992-06,1
1,1992-07,2
2,1993-08,2
3,1994-07,4
4,1994-08,15
...,...,...
189,2015-07,247
190,2015-08,272
191,2015-09,49
192,2015-10,49


### 2.3.1.1 - Convert the "DISCOVERY_DATE_CONVERTED" Column into a Datatime Data Type

In [55]:
# https://stackoverflow.com/questions/29394730/converting-periodindex-to-datetimeindex
# https://www.geeksforgeeks.org/python-pandas-period-to_timestamp/

ID_frs_df_mnthly_test = ID_frs_df_mnthly
yr_mnth_date_lst = []

ID_frs_df_mnthly_cnvrtd_dtetime = pd.DataFrame(columns= ["DISCOVERY_DATE_CONVERTED", "numb_fires"])
ID_frs_df_mnthly_cnvrtd_dtetime

# Converting the Period Time Data Type to a Datetime Data Type
for ID_frs_df_mnthly_row in ID_frs_df_mnthly_test.index:
    time_period_M_to_convert = ID_frs_df_mnthly_test["DISCOVERY_DATE_CONVERTED"][ID_frs_df_mnthly_row]
    converted_time_data = time_period_M_to_convert.to_timestamp()
    yr_mnth_date = datetime.datetime.strftime(converted_time_data, "%Y-%m")
    
#     ID_frs_annual_df_2["time"] = converted_time_data
    ID_frs_df_mnthly_cnvrtd_dtetime = ID_frs_df_mnthly_cnvrtd_dtetime.append({"DISCOVERY_DATE_CONVERTED": converted_time_data,
                                                                                "numb_fires": ID_frs_df_mnthly_test["numb_fires"][ID_frs_df_mnthly_row],
                                                                                "date": yr_mnth_date},
                                                                                ignore_index = True)

    
    
    
#     print(yr_mnth_date)
#     yr_mnth_date_lst.append(yr_mnth_date)
    
    
# print(len(ID_frs_df_mnthly_cnvrtd_dtetime))
print(ID_frs_df_mnthly_cnvrtd_dtetime.dtypes)
ID_frs_df_mnthly_cnvrtd_dtetime.head()
                                                   
                                                   
# ID_frs_annual_df["time"].to_timestamp()

# ID_frs_annual_df['time'] = ID_frs_annual_df.as_of_date.values.astype('datetime64[M]')
# ID_frs_annual_df['time'] = ID_frs_annual_df.to_timestamp(axis='time')

DISCOVERY_DATE_CONVERTED    datetime64[ns]
numb_fires                          object
date                                object
dtype: object


Unnamed: 0,DISCOVERY_DATE_CONVERTED,numb_fires,date
0,1992-06-01,1,1992-06
1,1992-07-01,2,1992-07
2,1993-08-01,2,1993-08
3,1994-07-01,4,1994-07
4,1994-08-01,15,1994-08


In [59]:
ID_frs_df_mnthly_cnvrtd_dtetime = ID_frs_df_mnthly_cnvrtd_dtetime[["date", "numb_fires"]]
ID_frs_df_mnthly_cnvrtd_dtetime

Unnamed: 0,date,numb_fires
0,1992-06,1
1,1992-07,2
2,1993-08,2
3,1994-07,4
4,1994-08,15
...,...,...
189,2015-07,247
190,2015-08,272
191,2015-09,49
192,2015-10,49


### 2.3.2 - Groupby the Year and Count the Number of Fires (Rows) in Each Year

In [None]:
# https://stackoverflow.com/questions/38792122/how-to-group-and-count-rows-by-month-and-year-using-pandas
# https://stackoverflow.com/questions/19384532/get-statistics-for-each-group-such-as-count-mean-etc-using-pandas-groupby

ID_frs_df_annlly = pd.DataFrame(ID_frs_df['time'].groupby(ID_frs_df.time.dt.to_period("Y")).agg('count').reset_index(name='numb_fires'))

print(ID_frs_df_annlly.dtypes)
ID_frs_df_annlly.head()

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

In [None]:
***************************************************************
# ID_frs_df_annlly['time'] = ID_frs_df_annlly['time'].values.astype('datetime64[M]')
# ID_frs_df_annlly.head()

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# ========================================

# Step 3 - Import Historic Weather Data

# ========================================

# ========================================

## 3.1 - Import Meteostat Library

In [2]:
# Import Meteostat library and dependencies
from meteostat import Stations, Daily, Hourly, units

## 3.2 - Pull the Data from the Meteostat Library

## +++++++++++++++++++++++++++++++++++++++++++++++

### 3.2.1 - Creates a Dataframe with Only the 9 Idaho Cites and Their Latitude and Longitude

In [3]:
# Creates a Dataframe with only the 9 Idaho Cites and their Latitude and Longitude

# ID_citis_clmns_nm = ["city_name", "lat", "long"]

# ID_cities_lat_long_df = pd.DataFrame([["Clayton City", 44.259014, -114.399725],
#                                     ["Salmon City", 45.178110, -113.902660],
#                                     ["Elk City City", 45.826944, -115.436667],
#                                     ["Wallace City", 47.474167, -115.928056],
#                                     ["McCall City", 44.910833, -116.103056],
#                                     ["Grangevill City", 45.916667, -116.116667],
#                                     ["Sandpoint City", 48.266667, -116.566667],
#                                     ["Coeur d'Alene City", 47.692778, -116.78],
#                                     ["Yewllo Pine City", 44.965, -115.493611]], 
#                                      columns = ID_citis_clmns_nm)

ID_cnty_clmns_nm = ["county_name", "lat", "long"]

ID_cnty_lst = [["Ada County", 43.450107, -116.239985],
                ["Adams County", 44.910456, -116.450687],
                ["Bannock County", 42.670000, -112.220000],
                ["Bear Lake County", 42.290000, -111.330000],
                ["Benewah County", 47.231425, -116.660000],
                ["Bingham County", 43.220009904309855, -112.39999485062445],
                ["Blaine County", 43.390041, -113.980009],
                ["Boise County", 44.009990, -115.740035],
                ["Bonner County", 48.298223, -116.600687],
                ["Bonneville County", 43.380000, -111.600000],
                ["Boundary County", 48.799944, -116.451361],
                ["Butte County", 43.720000, -113.170000],
                ["Camas County", 43.470000, -114.810000],
                ["Canyon County", 43.641323, -116.710685],
                ["Caribou County", 42.760000, -111.550000],
                ["Cassia County", 42.270000, -113.610000],
                ["Clark County", 44.280000, -112.380000],
                ["Clearwater County", 46.678716, -115.660687],
                ["Custer County", 44.230000, -114.290000],
                ["Elmore County", 43.340000, -115.470000],
                ["Franklin County", 42.180000, -111.810000],
                ["Fremont County", 44.220000, -111.480000],
                ["Gem County", 44.060000, -116.410000],
                ["Gooding County", 42.970000, -114.800000],
                ["Idaho County", 45.850000, -115.460000],
                ["Jefferson County", 43.820000, -112.310000],
                ["Jerome County", 42.690000, -114.260000],
                ["Kootenai County", 47.690863, -116.702060],
                ["Latah County", 46.845284, -116.676700],
                ["Lemhi County", 44.971417, -113.950000],
                ["Lewis County", 46.250684, -116.430687],
                ["Lincoln County", 42.980000, -114.130000],
                ["Madison County", 43.790000, -111.660000],
                ["Minidoka County", 42.850000, -113.640000],
                ["Nez Perce County", 46.338771, -116.750000],
                ["Oneida County", 42.210000, -112.520000],
                ["Owyhee County", 42.559999, -116.169998],
                ["Payette County", 44.010000, -116.760000],
                ["Power County", 42.690000, -112.840000],
                ["Shoshone County", 47.360001, -115.889313],
                ["Teton County", 43.750000, -111.210000],
                ["Twin Falls County", 42.350000, -114.660000],
                ["Valley County", 44.751948, -115.560687],
                ["Washington County", 44.461028, -116.779313]]



ID_cnty_lat_long_df = pd.DataFrame(ID_cnty_lst, columns = ID_cnty_clmns_nm)


# ID_cities_lat_long_df
ID_cnty_lat_long_df

Unnamed: 0,county_name,lat,long
0,Ada County,43.450107,-116.239985
1,Adams County,44.910456,-116.450687
2,Bannock County,42.67,-112.22
3,Bear Lake County,42.29,-111.33
4,Benewah County,47.231425,-116.66
5,Bingham County,43.22001,-112.399995
6,Blaine County,43.390041,-113.980009
7,Boise County,44.00999,-115.740035
8,Bonner County,48.298223,-116.600687
9,Bonneville County,43.38,-111.6


### 3.2.2 - List of Columns for the New Weather Data Dataframe

In [4]:
# List of columns to import from the Meteostat Library

ID_wthr_lst_clmn_nm = ["county_name", "avg_temp", "prcp", "humidity", "dew_point", "time_daily", "time_hourly"]

### 3.2.3 - Loop Through Each Latitude and Longitude and Pull Weather from the Nearest Weather Station from the Meteostat Library

In [None]:
# Take the data and put it into a dataframe

# Idaho_county_Idaho_LatLong_df["LATITUDE"][59]

ID_city_wthr_df = pd.DataFrame(columns = ID_wthr_lst_clmn_nm)

for ID_cities_lat_long_row in ID_cities_lat_long_df.index:
#         print(frs_row)
#         print(Idaho_county_Idaho_df_2["FOD_ID"][frs_row])
    # Set time period
    start = datetime(1992, 1, 1)
#         print(start)
    end = datetime(2015, 12, 31)
#         print(end)


    lat = 44.259014 # Idaho_county_Idaho_df["LATITUDE"][frs_row]
#         print(Idaho_county_Idaho_df_2["LATITUDE"][frs_row])
    lon = -114.399725 #Idaho_county_Idaho_df["LONGITUDE"][frs_row]
#         print(Idaho_county_Idaho_df_2["LONGITUDE"][frs_row])

    # Get closest weather station to Vancouver, BC
    stations = Stations()
    stations = stations.nearby(lat, lon)
    stations = stations.inventory('daily', (start, end))
    station = stations.fetch(1)

    # Get daily data for 2018 at the selected weather station
    daily_data = Daily(station, start, end)
    daily_data = daily_data.aggregate("1M") # Agg. Weekly data Commit out for daily, use 1M for monthly, 1W for weekly
# daily_data = daily_data.normalize()
    daily_data = daily_data.fetch()
    daily_data = daily_data.reset_index(level=0)
        
       # Get Hourly data for Humidity and Dew Point readings
    hourly_data = Hourly(station, start= start, end = end)
    hourly_data = hourly_data.normalize()
    hourly_data = hourly_data.aggregate("1M")
    hourly_data = hourly_data.fetch()
    hourly_data = hourly_data.reset_index(level=0)
# #         print(data["tavg"])
        
    for wthr_row in daily_data.index:
#             print(data["tavg"][wthr_row])
#             print(data["prcp"][wthr_row])
#             print(data["time"][wthr_row])

        ID_city_wthr_df = ID_city_wthr_df.append({# This data is coming from the "daily_data"
                                                            "time_daily": daily_data["time"][wthr_row],
                                                            "avg_temp": daily_data["tavg"][wthr_row],
                                                            "prcp": daily_data["prcp"][wthr_row],

                                                            # This data is coming from the "hourly_data"
                                                            "time_hourly": hourly_data["time"][wthr_row],
                                                            "humidity": hourly_data["rhum"][wthr_row],
                                                            "dew_point": hourly_data["dwpt"][wthr_row],

                                                            # This data is coming from the "ID_cities_lat_long_df"
                                                            "city_name": ID_cities_lat_long_df["city_name"][ID_cities_lat_long_row]},
                                                            ignore_index = True)

In [5]:
# Take the data and put it into a dataframe

# Idaho_county_Idaho_LatLong_df["LATITUDE"][59]

ID_cnty_wthr_df = pd.DataFrame(columns = ID_wthr_lst_clmn_nm)

for ID_cnty_lat_long_df_index in ID_cnty_lat_long_df.index:
#         print(index_row)
#     print(ID_cnty_lat_long_df["lat"][index_row])
#     print(ID_cnty_lat_long_df["long"][index_row])
    
    # Set time period
    start = datetime.datetime(1997, 1, 1)
#         print(start)
    end = datetime.datetime(2015, 12, 31)
#         print(end)


    lat = ID_cnty_lat_long_df["lat"][ID_cnty_lat_long_df_index] # 44.259014
#     print(lat)
#         print(Idaho_county_Idaho_df_2["LATITUDE"][frs_row])
    long = ID_cnty_lat_long_df["long"][ID_cnty_lat_long_df_index] # -114.399725 
#     print(long)
#         print(Idaho_county_Idaho_df_2["LONGITUDE"][frs_row])

    # Get closest weather station to Vancouver, BC
    stations = Stations()
    stations = stations.nearby(lat, long)
    stations = stations.inventory('daily', (start, end))
    station = stations.fetch(1)
#     print(station)

    # Get daily data for 2018 at the selected weather station
    daily_data = Daily(station, start, end)
    daily_data = daily_data.aggregate("1M") # Agg. Weekly data Commit out for daily, use 1M for monthly, 1W for weekly
# daily_data = daily_data.normalize()
    daily_data = daily_data.convert(units.imperial)
    daily_data = daily_data.fetch()
    daily_data = daily_data.reset_index(level=0)
#     print(daily_data)
#     print("------------------------------------------------------------------------")
        
       # Get Hourly data for Humidity and Dew Point readings
    hourly_data = Hourly(station, start= start, end = end)
    hourly_data = hourly_data.normalize()
    hourly_data = hourly_data.aggregate("1M")
    hourly_data = hourly_data.fetch()
    hourly_data = hourly_data.reset_index(level=0)
#     print(hourly_data)
#     print("========================================================================")
#     print("************************************************************************")
#     print("========================================================================")
        
    for wthr_row in daily_data.index:
#         print(wthr_row)
#             print(data["tavg"][wthr_row])
#             print(data["prcp"][wthr_row])
#             print(data["time"][wthr_row])

        ID_cnty_wthr_df = ID_cnty_wthr_df.append({# This data is coming from the "daily_data"
                                                    "time_daily": daily_data["time"][wthr_row],
                                                    "avg_temp": daily_data["tavg"][wthr_row],
                                                    "prcp": daily_data["prcp"][wthr_row],

                                                    # This data is coming from the "hourly_data"
                                                    "time_hourly": hourly_data["time"][wthr_row],
                                                    "humidity": hourly_data["rhum"][wthr_row],
                                                    "dew_point": hourly_data["dwpt"][wthr_row],

                                                    # This data is coming from the "ID_cnty_lat_long_df"
                                                    "county_name": ID_cnty_lat_long_df["county_name"][ID_cnty_lat_long_df_index]},
                                                    ignore_index = True)

In [6]:
ID_cnty_wthr_df

Unnamed: 0,county_name,avg_temp,prcp,humidity,dew_point,time_daily,time_hourly
0,Ada County,32.0,2.736,78.303763,-3.504435,1997-01-31,1997-01-31
1,Ada County,36.5,0.189,64.885417,-3.931250,1997-02-28,1997-02-28
2,Ada County,45.3,0.524,51.401882,-3.071774,1997-03-31,1997-03-31
3,Ada County,48.9,1.890,53.180556,-0.886389,1997-04-30,1997-04-30
4,Ada County,63.4,1.138,48.289367,4.744684,1997-05-31,1997-05-31
...,...,...,...,...,...,...,...
10010,Washington County,78.2,0.181,29.114247,4.916801,2015-08-31,2015-08-31
10011,Washington County,66.2,0.516,40.326389,3.564861,2015-09-30,2015-09-30
10012,Washington County,59.0,0.925,54.932796,4.642204,2015-10-31,2015-10-31
10013,Washington County,37.3,1.717,70.156944,-2.590833,2015-11-30,2015-11-30


### 3.2.4 - Check the Data

In [7]:
print(ID_cnty_wthr_df.dtypes)
print("============================")
print("============================")
print(len(ID_cnty_wthr_df))
ID_cnty_wthr_df.head()

county_name            object
avg_temp              float64
prcp                  float64
humidity              float64
dew_point             float64
time_daily     datetime64[ns]
time_hourly    datetime64[ns]
dtype: object
10015


Unnamed: 0,county_name,avg_temp,prcp,humidity,dew_point,time_daily,time_hourly
0,Ada County,32.0,2.736,78.303763,-3.504435,1997-01-31,1997-01-31
1,Ada County,36.5,0.189,64.885417,-3.93125,1997-02-28,1997-02-28
2,Ada County,45.3,0.524,51.401882,-3.071774,1997-03-31,1997-03-31
3,Ada County,48.9,1.89,53.180556,-0.886389,1997-04-30,1997-04-30
4,Ada County,63.4,1.138,48.289367,4.744684,1997-05-31,1997-05-31


### 3.2.5 - Drop the "time_hourly" Column

In [8]:
# Drops the time_hourly column and then reformats the time_daily to month-year
# https://stackoverflow.com/questions/55282655/how-to-convert-python-date-format-b-y-back-to-y-m-d

ID_cnty_wthr_df_drp_tmhrly = ID_cnty_wthr_df
ID_cnty_wthr_df_drp_tmhrly = ID_cnty_wthr_df_drp_tmhrly[["avg_temp", "prcp", "humidity", "dew_point", "time_daily"]]
# ID_city_wthr_df_new_datetime['time_daily'] = pd.to_datetime(ID_city_wthr_df['time_daily'],format='%Y-%m-%d').dt.strftime('%Y-%m')

print(ID_cnty_wthr_df_drp_tmhrly.dtypes)
ID_cnty_wthr_df_drp_tmhrly.head()

avg_temp             float64
prcp                 float64
humidity             float64
dew_point            float64
time_daily    datetime64[ns]
dtype: object


Unnamed: 0,avg_temp,prcp,humidity,dew_point,time_daily
0,32.0,2.736,78.303763,-3.504435,1997-01-31
1,36.5,0.189,64.885417,-3.93125,1997-02-28
2,45.3,0.524,51.401882,-3.071774,1997-03-31
3,48.9,1.89,53.180556,-0.886389,1997-04-30
4,63.4,1.138,48.289367,4.744684,1997-05-31


In [None]:
# # print(ID_city_wthr_df_new_datetime.dtypes)
# print("============================")
# print("============================")

# ID_city_wthr_df_new_datetime_2 = ID_city_wthr_df_drp_tmhrly
# ID_city_wthr_df_new_datetime_2['time_daily'] = pd.to_datetime(ID_city_wthr_df_new_datetime_2['time_daily'])
# # ID_city_wthr_df_new_datetime_2
# print(ID_city_wthr_df_new_datetime_2.dtypes)
# ID_city_wthr_df_new_datetime_2

### 3.2.5 - Set the Index to the "time_daily" Column

In [9]:
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly = ID_cnty_wthr_df_drp_tmhrly
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly = ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly.set_index("time_daily")
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly.head()

Unnamed: 0_level_0,avg_temp,prcp,humidity,dew_point
time_daily,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1997-01-31,32.0,2.736,78.303763,-3.504435
1997-02-28,36.5,0.189,64.885417,-3.93125
1997-03-31,45.3,0.524,51.401882,-3.071774
1997-04-30,48.9,1.89,53.180556,-0.886389
1997-05-31,63.4,1.138,48.289367,4.744684


## 3.3 - Group the Data by Month and by Year

### 3.3.1 - Group the Data by Month and Average the Weather Data for Each Month

In [10]:
# DataFrameGroupBy (grouped by Month)
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly = ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly.groupby(pd.Grouper(freq="M"))
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly

# Avergae the Data for Each Month
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg = pd.DataFrame(ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly["avg_temp", "prcp", "humidity", "dew_point"].mean())
# ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg[200:250]


  


### 3.3.2 - Group the Data by Year and Average the Weather Data for Each Month

In [None]:
# # DataFrameGroupBy (grouped by Year)
# ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly = ID_city_wthr_df_drp_tmhrly_indx_tmdaly.groupby(pd.Grouper(freq="Y"))
# ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly

# # Avergae the Data for Each Year
# ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg = pd.DataFrame(ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly["avg_temp", "prcp", "humidity", "dew_point"].mean())
# ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg.head()


### 3.2.7 - Reset the Index from the "time_daily" Column

#### 3.2.7.1 Reset the Index for the Monthly Dataframe

In [24]:
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg = ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg.reset_index()
print(ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg.dtypes)
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg.head()

index                  int64
time_daily    datetime64[ns]
avg_temp             float64
prcp                 float64
humidity             float64
dew_point            float64
date                  object
dtype: object


Unnamed: 0,index,time_daily,avg_temp,prcp,humidity,dew_point,date
0,0,1997-01-31,30.082051,2.217854,78.8006,-4.567051,2015-12
1,1,1997-02-28,33.382051,0.353707,71.178455,-4.291505,2015-12
2,2,1997-03-31,42.292308,1.035585,58.164373,-3.043872,2015-12
3,3,1997-04-30,45.738462,1.665829,57.112693,-1.460228,2015-12
4,4,1997-05-31,59.920513,1.093634,52.850319,4.295041,2015-12


In [39]:
# ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test = ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test = ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test[["avg_temp", "prcp", "humidity", "dew_point", "time_daily"]]
yr_mnth_date_lst = []

for index in ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test.index:
#     ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test["date"] = ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test
    yr_mnth_date = datetime.datetime.strftime(ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test["time_daily"][index], "%Y-%m")
#     print(yr_mnth_date)
    
    yr_mnth_date_lst.append(yr_mnth_date)
    
yr_mnth_date_lst
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test["date"] = yr_mnth_date_lst
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test

Unnamed: 0,avg_temp,prcp,humidity,dew_point,time_daily,date
0,30.082051,2.217854,78.800600,-4.567051,1997-01-31,1997-01
1,33.382051,0.353707,71.178455,-4.291505,1997-02-28,1997-02
2,42.292308,1.035585,58.164373,-3.043872,1997-03-31,1997-03
3,45.738462,1.665829,57.112693,-1.460228,1997-04-30,1997-04
4,59.920513,1.093634,52.850319,4.295041,1997-05-31,1997-05
...,...,...,...,...,...,...
223,73.245455,0.379795,36.460167,5.492461,2015-08-31,2015-08
224,63.113636,0.715909,43.361013,3.332986,2015-09-30,2015-09
225,55.747727,1.084250,59.380000,4.525904,2015-10-31,2015-10
226,34.452273,1.391068,71.776756,-2.996882,2015-11-30,2015-11


In [57]:
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test = ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test[["date", "avg_temp", "prcp", "humidity", "dew_point"]]
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test

Unnamed: 0,date,avg_temp,prcp,humidity,dew_point
0,1997-01,30.082051,2.217854,78.800600,-4.567051
1,1997-02,33.382051,0.353707,71.178455,-4.291505
2,1997-03,42.292308,1.035585,58.164373,-3.043872
3,1997-04,45.738462,1.665829,57.112693,-1.460228
4,1997-05,59.920513,1.093634,52.850319,4.295041
...,...,...,...,...,...
223,2015-08,73.245455,0.379795,36.460167,5.492461
224,2015-09,63.113636,0.715909,43.361013,3.332986
225,2015-10,55.747727,1.084250,59.380000,4.525904
226,2015-11,34.452273,1.391068,71.776756,-2.996882


#### 3.2.7.2 Reset the Index for the Yearly Dataframe

In [None]:
ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg = ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg.reset_index()
ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg.head()

### 3.2.8 Reformat the time_daily Column to a DataTime Data Type

In [None]:
ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly = ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg
ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly["time_daily"] = pd.to_datetime(ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly['time_daily'],format='%Y-%m-%d').dt.strftime('%Y')

# Rename the "time_daily" Column to "time"
ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly = ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly.rename(columns = {"time_daily": "time"})
ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly.head()

# ========================================

# Step 4 - Combine the Weather Data () with Idaho Fire Data (ID_frs_df) 

# ========================================

## 4.1 - Weather Data

### 4.1.1 - Convert the "time" Column into a Datatime Data Type 

In [58]:
ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly["time"] = pd.to_datetime(ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly['time'])

NameError: name 'ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly' is not defined

In [None]:
ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly["time"] = pd.to_datetime(ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly["time"],format='%Y-%m-%d').dt.strftime('%Y')
ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly.head()

### 4.1.2 - Check the Weather Data

In [None]:
print(f"Number of Rows: {len(ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly)}")
print("=========================")
print("Column Data Types:")
print(ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly.dtypes)
ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly.reset_index()

ID_city_wthr_df_drp_tmhrly_indx_tmdaly_grpby_yrly_avg_rfrmt_tmdaly.head()

## +++++++++++++++++++++++++++++++++++++++++++++++

In [None]:
# from pandas.tseries.offset import BYearBegin
# ts = pd.Timestamp('2020-05-24 05:01:15')

## +++++++++++++++++++++++++++++++++++++++++++++++

## 4.2 - Idaho Fire Data

### 4.2.1 - Change the "time" Column from Period Time Data Type to a Datetime Data Type

In [None]:
# https://stackoverflow.com/questions/29394730/converting-periodindex-to-datetimeindex

ID_frs_df_annlly_dtetime = pd.DataFrame(columns= ["time", "numb_fires"])
ID_frs_df_annlly_dtetime

# Converting the Period Time Data Type to a Datetime Data Type
for ID_frs_df_annlly_row in ID_frs_df_annlly.index:
    time_data_to_convert = ID_frs_df_annlly["time"][ID_frs_df_annlly_row]
    converted_time_data = time_data_to_convert.to_timestamp()
    
#     ID_frs_annual_df_2["time"] = converted_time_data
    ID_frs_df_annlly_dtetime = ID_frs_df_annlly_dtetime.append({"time": converted_time_data,
                                                    "numb_fires": ID_frs_df_annlly["numb_fires"][ID_frs_df_annlly_row]},
                                                            ignore_index = True)

print(ID_frs_df_annlly_dtetime.dtypes)
ID_frs_df_annlly_dtetime.head()
                                                   
                                                   
# ID_frs_annual_df["time"].to_timestamp()

# ID_frs_annual_df['time'] = ID_frs_annual_df.as_of_date.values.astype('datetime64[M]')
# ID_frs_annual_df['time'] = ID_frs_annual_df.to_timestamp(axis='time')

In [None]:
ID_frs_df_annlly_dtetime["time"] = pd.to_datetime(ID_frs_df_annlly_dtetime['time'],format='%Y-%m-%d').dt.strftime('%Y')
ID_frs_df_annlly_dtetime.head()

In [None]:
print(f"Number of Rows: {len(ID_frs_df_annlly_dtetime)}")
print("=========================")
print("Column Data Types:")
print(ID_frs_df_annlly_dtetime.dtypes)
ID_frs_df_annlly_dtetime.head()

In [84]:
# wthr_fire_merged_df = pd.DataFrame()
wthr_fire_merged_df = pd.merge(ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test, ID_frs_df_mnthly_cnvrtd_dtetime, how="outer", on="date")
print(len(wthr_fire_merged_df))
print(wthr_fire_merged_df.dtypes)

wthr_fire_merged_df.head()

241
date           object
avg_temp      float64
prcp          float64
humidity      float64
dew_point     float64
numb_fires     object
dtype: object


Unnamed: 0,date,avg_temp,prcp,humidity,dew_point,numb_fires
0,1997-01,30.082051,2.217854,78.8006,-4.567051,
1,1997-02,33.382051,0.353707,71.178455,-4.291505,
2,1997-03,42.292308,1.035585,58.164373,-3.043872,
3,1997-04,45.738462,1.665829,57.112693,-1.460228,1.0
4,1997-05,59.920513,1.093634,52.850319,4.295041,


In [85]:
# Check for Null Values in the Merged Dataframe


# Columns with Null Values
# wthr_fire_merged_df.isnull().any(axis=0)

# Rows with Null Values
# wthr_fire_merged_df.isnull().any(axis=1)

# Dataframe of the Null Values
wthr_fire_merged_df[wthr_fire_merged_df.isnull().any(axis=1)]

# wthrwthr_fire_merged_df_fire_merged_df.head()

Unnamed: 0,date,avg_temp,prcp,humidity,dew_point,numb_fires
0,1997-01,30.082051,2.217854,78.8006,-4.567051,
1,1997-02,33.382051,0.353707,71.178455,-4.291505,
2,1997-03,42.292308,1.035585,58.164373,-3.043872,
4,1997-05,59.920513,1.093634,52.850319,4.295041,
10,1997-11,38.333333,0.65725,73.033276,-1.371623,
11,1997-12,27.169048,0.6535,77.268028,-4.378815,
12,1998-01,34.952381,2.056295,76.705594,-1.475552,
13,1998-02,35.980952,1.187023,74.638511,-1.165564,
14,1998-03,39.933333,1.04725,68.038839,-1.270818,
22,1998-11,40.942857,1.554159,72.9303,0.348553,


In [86]:
# Replace the Null Values in the "numb_fires" Column with 0
wthr_fire_merged_df['numb_fires'] = wthr_fire_merged_df['numb_fires'].fillna(0)
wthr_fire_merged_df[wthr_fire_merged_df.isnull().any(axis=1)]

Unnamed: 0,date,avg_temp,prcp,humidity,dew_point,numb_fires
228,1992-06,,,,,1
229,1992-07,,,,,2
230,1993-08,,,,,2
231,1994-07,,,,,4
232,1994-08,,,,,15
233,1994-09,,,,,1
234,1995-06,,,,,2
235,1995-07,,,,,4
236,1995-08,,,,,5
237,1995-09,,,,,1


In [91]:
# Delete the Rows with a Null Values
wthr_fire_merged_df = wthr_fire_merged_df.dropna()
wthr_fire_merged_df[wthr_fire_merged_df.isnull().any(axis=1)]

Unnamed: 0,date,avg_temp,prcp,humidity,dew_point,numb_fires


In [92]:
wthr_fire_merged_df

Unnamed: 0,date,avg_temp,prcp,humidity,dew_point,numb_fires
0,1997-01,30.082051,2.217854,78.800600,-4.567051,0
1,1997-02,33.382051,0.353707,71.178455,-4.291505,0
2,1997-03,42.292308,1.035585,58.164373,-3.043872,0
3,1997-04,45.738462,1.665829,57.112693,-1.460228,1
4,1997-05,59.920513,1.093634,52.850319,4.295041,0
...,...,...,...,...,...,...
223,2015-08,73.245455,0.379795,36.460167,5.492461,272
224,2015-09,63.113636,0.715909,43.361013,3.332986,49
225,2015-10,55.747727,1.084250,59.380000,4.525904,49
226,2015-11,34.452273,1.391068,71.776756,-2.996882,2


In [15]:
ID_cnty_wthr_df_drp_tmhrly_indx_tmdaly_grpby_mnthly_avg_test

'1997-01'

In [94]:
# File path for the CSV File; where, ID = Idaho, frs = fires, pth = path

wthr_fire_merged_df_file_pth = os.path.join("Data", "wthr_fire_mnthly_merged_df.csv")

wthr_fire_merged_df.to_csv(wthr_fire_merged_df_file_pth)

# ========================================

# Step ? - Merge the all the data sets

# ========================================

In [105]:
mnth_strmflw_df_file_pth = os.path.join("Data", "mnth_strmflw_df.csv")
mnthl_lghtnng_strk_df_file_pth = os.path.join("Data", "mnthl_lghtnng_strk_df.csv")

mnth_strmflw_df = pd.read_csv(mnth_strmflw_df_file_pth, usecols = ["date", "strms_blw_extndd_wghtd_avg"]) 
print(mnth_strmflw_df.dtypes)
mnth_strmflw_df.head()


mnthl_lghtnng_strk_df = pd.read_csv(mnthl_lghtnng_strk_df_file_pth, usecols = ["date", "number_of_strikes"])
print(mnthl_lghtnng_strk_df.dtypes)
mnthl_lghtnng_strk_df

date                           object
strms_blw_extndd_wghtd_avg    float64
dtype: object
date                 object
number_of_strikes     int64
dtype: object


Unnamed: 0,date,number_of_strikes
0,1997-05,1
1,1998-07,2
2,1998-08,2
3,1999-08,1
4,2001-08,1
5,2002-06,4
6,2002-07,16
7,2002-08,10
8,2003-05,14
9,2003-07,13


In [107]:
# Merge the Weather & Fire Dataframe ("wthr_fire_merged_df") with the Lightning Dataframe ("mnth_strmflw_df")

wthr_fire_mnth_strmflw_merged_df = pd.merge(wthr_fire_merged_df, mnth_strmflw_df, how="outer", on="date")
wthr_fire_mnth_strmflw_merged_df

Unnamed: 0,date,avg_temp,prcp,humidity,dew_point,numb_fires,strms_blw_extndd_wghtd_avg
0,1997-01,30.082051,2.217854,78.800600,-4.567051,0,9.079257
1,1997-02,33.382051,0.353707,71.178455,-4.291505,0,11.851520
2,1997-03,42.292308,1.035585,58.164373,-3.043872,0,5.375434
3,1997-04,45.738462,1.665829,57.112693,-1.460228,1,1.763397
4,1997-05,59.920513,1.093634,52.850319,4.295041,0,0.563885
...,...,...,...,...,...,...,...
223,2015-08,73.245455,0.379795,36.460167,5.492461,272,43.084198
224,2015-09,63.113636,0.715909,43.361013,3.332986,49,44.911618
225,2015-10,55.747727,1.084250,59.380000,4.525904,49,56.129587
226,2015-11,34.452273,1.391068,71.776756,-2.996882,2,56.625477


In [110]:
# Check for Null Values in the Merged Dataframe


# Columns with Null Values
# wthr_fire_mnth_strmflw_merged_df.isnull().any(axis=0)

# Rows with Null Values
# wthr_fire_mnth_strmflw_merged_df.isnull().any(axis=1)

# # Dataframe of the Null Values
wthr_fire_mnth_strmflw_merged_df[wthr_fire_mnth_strmflw_merged_df.isnull().any(axis=1)]

# # wthrwthr_fire_merged_df_fire_merged_df.head()

Unnamed: 0,date,avg_temp,prcp,humidity,dew_point,numb_fires,strms_blw_extndd_wghtd_avg


In [111]:
# Merge the Weather, Fire & Streamflow Dataframe ("wthr_fire_mnth_strmflw_merged_df") with 
# the Lightning Strike Dataframe ("mnthl_lghtnng_strk_df")

wthr_fire_mnth_strmflw_lghtnng_strk_merged_df = pd.merge(wthr_fire_mnth_strmflw_merged_df, mnthl_lghtnng_strk_df, how="outer", on="date")
wthr_fire_mnth_strmflw_lghtnng_strk_merged_df

Unnamed: 0,date,avg_temp,prcp,humidity,dew_point,numb_fires,strms_blw_extndd_wghtd_avg,number_of_strikes
0,1997-01,30.082051,2.217854,78.800600,-4.567051,0,9.079257,
1,1997-02,33.382051,0.353707,71.178455,-4.291505,0,11.851520,
2,1997-03,42.292308,1.035585,58.164373,-3.043872,0,5.375434,
3,1997-04,45.738462,1.665829,57.112693,-1.460228,1,1.763397,
4,1997-05,59.920513,1.093634,52.850319,4.295041,0,0.563885,1.0
...,...,...,...,...,...,...,...,...
223,2015-08,73.245455,0.379795,36.460167,5.492461,272,43.084198,11.0
224,2015-09,63.113636,0.715909,43.361013,3.332986,49,44.911618,
225,2015-10,55.747727,1.084250,59.380000,4.525904,49,56.129587,
226,2015-11,34.452273,1.391068,71.776756,-2.996882,2,56.625477,


In [112]:
# Check for Null Values in the Merged Dataframe


# Columns with Null Values
wthr_fire_mnth_strmflw_lghtnng_strk_merged_df.isnull().any(axis=0)

# Rows with Null Values
# wthr_fire_mnth_strmflw_lghtnng_strk_merged_df.isnull().any(axis=1)

# # Dataframe of the Null Values
# wthr_fire_mnth_strmflw_lghtnng_strk_merged_df[wthr_fire_mnth_strmflw_lghtnng_strk_merged_df.isnull().any(axis=1)]

# # wthrwthr_fire_merged_df_fire_merged_df.head()

date                          False
avg_temp                      False
prcp                          False
humidity                      False
dew_point                     False
numb_fires                    False
strms_blw_extndd_wghtd_avg    False
number_of_strikes              True
dtype: bool

In [114]:
# Replace the Null Values in the "wthr_fire_mnth_strmflw_lghtnng_strk_merged_df" Column with 0
wthr_fire_mnth_strmflw_lghtnng_strk_merged_df['number_of_strikes'] = wthr_fire_mnth_strmflw_lghtnng_strk_merged_df['number_of_strikes'].fillna(0)
wthr_fire_mnth_strmflw_lghtnng_strk_merged_df[wthr_fire_mnth_strmflw_lghtnng_strk_merged_df.isnull().any(axis=1)]

Unnamed: 0,date,avg_temp,prcp,humidity,dew_point,numb_fires,strms_blw_extndd_wghtd_avg,number_of_strikes


In [129]:
# Reference: 
#     - How to add new columns to Pandas dataframe?
#         - https://re-thought.com/how-to-add-new-columns-in-a-dataframe-in-pandas/

fr_tre_no_fr_flse_lst = []

for index in wthr_fire_mnth_strmflw_lghtnng_strk_merged_df.index:
#     print(index)
    if wthr_fire_mnth_strmflw_lghtnng_strk_merged_df["numb_fires"][index] > 0:
        fr_tre_no_fr_flse_lst.append("True")
        
    elif wthr_fire_mnth_strmflw_lghtnng_strk_merged_df["numb_fires"][index] == 0:
        fr_tre_no_fr_flse_lst.append("False")

wthr_fire_mnth_strmflw_lghtnng_strk_merged_df["fire_true_no_fire_false"] = fr_tre_no_fr_flse_lst
wthr_fire_mnth_strmflw_lghtnng_strk_merged_df.head()

Unnamed: 0,date,avg_temp,prcp,humidity,dew_point,numb_fires,strms_blw_extndd_wghtd_avg,number_of_strikes,fire_true_no_fire_false
0,1997-01,30.082051,2.217854,78.8006,-4.567051,0.0,9.079257,0.0,False
1,1997-02,33.382051,0.353707,71.178455,-4.291505,0.0,11.85152,0.0,False
2,1997-03,42.292308,1.035585,58.164373,-3.043872,0.0,5.375434,0.0,False
3,1997-04,45.738462,1.665829,57.112693,-1.460228,0.0,1.763397,0.0,False
4,1997-05,59.920513,1.093634,52.850319,4.295041,1.0,0.563885,1.0,True


In [130]:
wthr_fire_mnth_strmflw_lghtnng_strk_merged_df_FINAL = wthr_fire_mnth_strmflw_lghtnng_strk_merged_df[["fire_true_no_fire_false", "avg_temp", "prcp", "humidity", "dew_point", "strms_blw_extndd_wghtd_avg", "number_of_strikes"]]
wthr_fire_mnth_strmflw_lghtnng_strk_merged_df_FINAL

Unnamed: 0,fire_true_no_fire_false,avg_temp,prcp,humidity,dew_point,strms_blw_extndd_wghtd_avg,number_of_strikes
0,False,30.082051,2.217854,78.800600,-4.567051,9.079257,0.0
1,False,33.382051,0.353707,71.178455,-4.291505,11.851520,0.0
2,False,42.292308,1.035585,58.164373,-3.043872,5.375434,0.0
3,False,45.738462,1.665829,57.112693,-1.460228,1.763397,0.0
4,True,59.920513,1.093634,52.850319,4.295041,0.563885,1.0
...,...,...,...,...,...,...,...
223,True,73.245455,0.379795,36.460167,5.492461,43.084198,11.0
224,False,63.113636,0.715909,43.361013,3.332986,44.911618,0.0
225,False,55.747727,1.084250,59.380000,4.525904,56.129587,0.0
226,False,34.452273,1.391068,71.776756,-2.996882,56.625477,0.0


In [131]:
wthr_fire_mnth_strmflw_lghtnng_strk_merged_df_FINAL_file_pth = os.path.join("Data", "mnth_wthr_fire_strmflw_lghtnng_strk_mrgd_df_FINAL.csv")

wthr_fire_mnth_strmflw_lghtnng_strk_merged_df_FINAL.to_csv(wthr_fire_mnth_strmflw_lghtnng_strk_merged_df_FINAL_file_pth)

# ========================================

# Step 5 - Visualize the Data by Plotting the Data

# ========================================

## 5.1 - Histogram of Idaho Fire Data

### 5.1.1 - Histogram of the number of fires by month

In [75]:
# Create a histogram of the number of fires by month

# Get fire year data
fire_month = ID_frs_df_mnthly_cnvrtd_dtetime['DISCOVERY_DATE_CONVERTED']
# Plot as Histogram
plt.figure(figsize=[15,10])
plt.hist(fire_month, bins=194, color="#fed8b1") # 194 = len(ID_frs_df_mnthly_cnvrtd_dtetime)
plt.suptitle("Number of Fires in the Idaho, Month", fontsize=19)
plt.title("(Between 1992 and 2015)", fontsize=19)
plt.xlabel("Year", fontsize=14)
plt.ylabel("# of Fires", fontsize=14)
# Set tick values and labels
# plt.xlim(1992,2015)
# plt.yticks( (0,20,40,60,80,100,150,200), 
#             ('0','20', '40','60', '80','100',"150",'200')    )
plt.savefig( "Number of Fires in the US.jpg" )
plt.show()

KeyError: 'DISCOVERY_DATE_CONVERTED'

### 5.1.2 - Histogram of the Number of Fires by Year

In [None]:
# Create a histogram of the number of fires by year

# Get fire year data
fire_year = ID_frs_df['FIRE_YEAR']
# Plot as Histogram
plt.figure(figsize=[15,10])
plt.hist(fire_year, bins=24, color="#fed8b1")
plt.suptitle("Number of Fires in the Idaho, Year", fontsize=19)
plt.title("(Between 1992 and 2015)", fontsize=19)
plt.xlabel("Year", fontsize=14)
plt.ylabel("# of Fires", fontsize=14)
# Set tick values and labels
plt.xlim(1992,2015)
# plt.yticks( (0,20,40,60,80,100,150,200), 
#             ('0','20', '40','60', '80','100',"150",'200')    )
plt.savefig( "Number of Fires in the US.jpg" )
plt.show()

## 5.2 - Line Chart of Idaho Fire Sizes Over Time

### 5.2.1 - Line Chart of the Number of Fires by Month

In [None]:

# Get maximum and average fire size for each year
size_avg = {} # defaultdict(float)
size_max = {} # defaultdict(float)
for ID_frs_df_mnthly_cnvrtd_dtetime_ROW in range(1992,2016):
    # Dataframe including data from fires in the given year
    df = ID_frs_df_mnthly_cnvrtd_dtetime[ID_frs_df_mnthly_cnvrtd_dtetime['DISCOVERY_DATE_CONVERTED'] == yr]
    # All fire sizes in the given year
    sz = np.array(df['FIRE_SIZE'])
    # Get average and maximum fire size for given year
    # and add to dictionary with given year as the key
    size_avg[yr] = sum(sz)/len(sz)
    size_max[yr] = max(sz)
# Plot average fire size, yearly
fit, ax = plt.subplots(figsize=(12,4))
ax.plot([yr for yr in range(1992,2016)], list(size_avg.values()))                
plt.xlabel("Year", fontsize=14)
plt.ylabel("Size (in acres burned)", fontsize=14)
plt.suptitle("Average Fire Size, Yearly", fontsize=19)                 
ax.grid()
plt.savefig("Avg Fire Size.jpg")
plt.show()
# Plot maximum fire size, yearly
fit, ax = plt.subplots(figsize=(12,4))
ax.plot([yr for yr in range(1992,2016)], list(size_max.values()))
plt.xlabel("Year", fontsize=14)
plt.ylabel("Size (in thousands of acres burned)", fontsize=14)
plt.suptitle("Maximum Fire Size, Yearly", fontsize=19)
plt.title("(in thousands)", fontsize=9)                      
ax.grid()
plt.savefig("Max Fire Size.jpg")
plt.show()

### 5.2.2 - Line Chart of the Number of Fires by Year

In [None]:

# Get maximum and average fire size for each year
size_avg = {} # defaultdict(float)
size_max = {} # defaultdict(float)
for yr in range(1992,2016):
    # Dataframe including data from fires in the given year
    df = ID_frs_df[ID_frs_df['FIRE_YEAR'] == yr]
    # All fire sizes in the given year
    sz = np.array(df['FIRE_SIZE'])
    # Get average and maximum fire size for given year
    # and add to dictionary with given year as the key
    size_avg[yr] = sum(sz)/len(sz)
    size_max[yr] = max(sz)
# Plot average fire size, yearly
fit, ax = plt.subplots(figsize=(12,4))
ax.plot([yr for yr in range(1992,2016)], list(size_avg.values()))                
plt.xlabel("Year", fontsize=14)
plt.ylabel("Size (in acres burned)", fontsize=14)
plt.suptitle("Average Fire Size, Yearly", fontsize=19)                 
ax.grid()
plt.savefig("Avg Fire Size.jpg")
plt.show()
# Plot maximum fire size, yearly
fit, ax = plt.subplots(figsize=(12,4))
ax.plot([yr for yr in range(1992,2016)], list(size_max.values()))
plt.xlabel("Year", fontsize=14)
plt.ylabel("Size (in thousands of acres burned)", fontsize=14)
plt.suptitle("Maximum Fire Size, Yearly", fontsize=19)
plt.title("(in thousands)", fontsize=9)                      
ax.grid()
plt.savefig("Max Fire Size.jpg")
plt.show()

## 5.3 - GeoGraphical Trends, County Level

In [None]:
state_fire_year = ID_frs_df[['FIPS_NAME', 'FIRE_YEAR']]
top_states = []
for year in range(1992, 2016):
    labels = state_fire_year[state_fire_year['FIRE_YEAR']==year].index.tolist() 
    year_subset = state_fire_year.loc[labels, :] 
    agg_year_subset = year_subset.groupby('FIPS_NAME').count()["FIRE_YEAR"].reset_index(name="# of Fires")
    sorted_subset = agg_year_subset.sort_values(by= "# of Fires", ascending = False)
    top_entry = sorted_subset[:1]
    top_state = list(top_entry['FIPS_NAME'])[0]
    top_states.append(top_state)
    print(top_states)

In [None]:
years = list(range(1992, 2016))
plt.figure(figsize=(20, 6))
Latah_years, Latah_list = [], []
Clearwater_years, Clearwater_list = [], []
Ada_years, Ada_list = [], []
Idaho_years, Idaho_list = [], []
Elmore_years, Elmore_list = [], []
Shoshone_years, Shoshone_list = [], []

for index in range(len(top_states)):
    state = top_states[index]
    if state=="Latah":
        Latah_years.append(years[index])
        Latah_list.append(top_states[index])
    elif state=="Clearwater":
        Clearwater_years.append(years[index])
        Clearwater_list.append(top_states[index])
    elif state=="Ada":
        Ada_years.append(years[index])
        Ada_list.append(top_states[index])
    elif state=="Idaho":
        Idaho_years.append(years[index])
        Idaho_list.append(top_states[index])
    elif state=="Elmore":
        Elmore_years.append(years[index])
        Elmore_list.append(top_states[index])
    elif state=="Shoshone":
        Shoshone_years.append(years[index])
        Shoshone_list.append(top_states[index])
plt.scatter(Latah_years, Latah_list, linewidths='10', color='green', 
            label="Latah", marker='s')
plt.scatter(Clearwater_years, Clearwater_list, linewidths='10', color='blue', 
            label="Clearwater",  marker='s')
plt.scatter(Ada_years, Ada_list, linewidths='10', color='red', 
            label = "Ada",  marker='s')
plt.scatter(Idaho_years, Idaho_list, linewidths='10', color='purple', 
            label = "Idaho",  marker='s')
plt.scatter(Elmore_years, Elmore_list, linewidths='10', color='black', 
            label = "Elmore",  marker='s')
plt.scatter(Shoshone_years, Shoshone_list, linewidths='10', color='pink', 
            label = "Shoshone",  marker='s')
plt.xticks(size='14')
plt.xticks(np.arange(1992, 2016, step=1))
plt.xlabel('Year', size='20', labelpad=20)
plt.yticks(size='18')
plt.ylabel('County', size='20', labelpad=15)
plt.grid(axis='y', linestyle='--', linewidth=0.5)
plt.title("Counties with Highest Number of Fires from 1992-2015", 
          size="20")
plt.show()

## 5.4 - GeoGraphical Trends, County Level

In [None]:
disc_dates = ID_frs_df.groupby('FIRE_YEAR').mean()["DISCOVERY_DOY"].reset_index(name="average")
cont_dates = ID_frs_df.groupby('FIRE_YEAR').mean()["CONT_DOY"].reset_index(name="average")
avg_disc_dates = disc_dates["average"]
avg_cont_dates = cont_dates["average"]
years = disc_dates["FIRE_YEAR"]

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(years, avg_disc_dates, color="green", label="Discovery Date")
plt.xlabel("Year", size="18")
plt.xticks(np.arange(1992, 2016, step=1))
plt.ylabel("Day of Year", size="18")
plt.xticks(size="14")
plt.title("Average Discovery Date of Fires from 1992-2015", size="20")
plt.plot(years, avg_cont_dates, color="red", label="Containment Date")
plt.xlabel("Year", size="18")
plt.xticks(size="14")
plt.xticks(np.arange(1992, 2016, step=1))
plt.title("Average Discovery Date and Containment Date of Fires from 1992-2015", size="20")
plt.hlines(151, 1992, 2015, linestyles='dashed', color = 'black')
plt.hlines(242, 1992, 2015, linestyles='dashed', color = 'black')
plt.axhspan(59, 151, facecolor='green', alpha=0.1, label = "spring season")
plt.axhspan(152, 242, facecolor='yellow', alpha=0.1, label = "summer season")
plt.axhspan(243, 333, facecolor='orange', alpha=0.1, label = "fall season")
plt.legend(fontsize="16", loc="upper right")

## 5.5 - Line Chart of Idaho Fire Sizes Over Time

In [None]:
cause_types = ID_frs_df.groupby('STAT_CAUSE_DESCR').count()["FIRE_YEAR"].reset_index(name="count")
cause_types = cause_types.sort_values(by="count", ascending = False)

In [None]:
cause = cause_types["STAT_CAUSE_DESCR"]
count = cause_types["count"]
total_count = cause_types["count"]
plt.figure(figsize=(20, 6))
total_cause_count = sum(list(total_count))
plt.bar(cause, (count/total_cause_count), color="darkred", alpha = 0.5)
plt.xticks(rotation=30, ha='right', size="15")
plt.xlabel("Fire Cause", size="20")
plt.ylabel("Percentage of Fires", size="20")
plt.title("Distribution of Fire Causes from 1992-2015", size="20")
plt.show()

# +++++++++++++++++++++++++++++++++++++++++

# Annual Average

In [None]:
plt.figure(figsize=[15,10])
plt.plot(wthr_fire_merged_df['avg_temp'],label='avg_temp')
plt.plot(wthr_fire_merged_df['prcp'],label='prcp')
plt.plot(wthr_fire_merged_df['humidity'],label='humidity')
plt.plot(wthr_fire_merged_df['dew_point'],label='dew_point')
# plt.plot(wthr_fire_merged_df_annl_fire['numb_fires'],label='numb_fires')

plt.legend(loc=2)

# Annul averages

In [None]:
plt.figure(figsize=[15,10])

fig, ax1 = plt.subplots()

# color = 'tab:red'
# ax1.set_xlabel('time (year)')
# ax1.set_ylabel('Weather Data')
# ax1.plot(wthr_fire_merged_annl_df['time_daily'], wthr_fire_merged_annl_df['avg_temp'])
# ax1.tick_params(axis='y', labelcolor=color)

plt.plot(wthr_fire_merged_df['avg_temp'],label='avg_temp')
plt.plot(wthr_fire_merged_df['prcp'],label='prcp')
plt.plot(wthr_fire_merged_df['humidity'],label='humidity')
plt.plot(wthr_fire_merged_df['dew_point'],label='dew_point')

# plt.plot(wthr_fire_merged_annl_df['numb_fires'],label='numb_fires')

plt.plot()
# plt.plot(wthr_fire_merged_df_annl_fire['numb_fires'],label='numb_fires')

plt.legend(loc=2)

In [None]:
plt.figure(figsize=[15,10])
numb_fires_handle = plt.plot(wthr_fire_merged_df['numb_fires'],label='numb_fires', color = "red")
prcp_handle = plt.plot(wthr_fire_merged_df['prcp'], label='prcp', color = "blue")
avg_temp_handle = plt.plot(wthr_fire_merged_df['avg_temp'], label='prcp', color = "green")
humidity_handle = plt.plot(wthr_fire_merged_df['humidity'], label='prcp', color = "purple")
humidity_handle = plt.plot(wthr_fire_merged_df['dew_point'], label='prcp', color = "turquoise")



plt.legend(handles = [numb_fires_handle[0], prcp_handle[0]], labels = ["Fires", "Prcp"], loc = "best")

In [None]:
# Multiple Yaxis With Spines
# https://matplotlib.org/3.1.1/gallery/ticks_and_spines/multiple_yaxis_with_spines.html#sphx-glr-gallery-ticks-and-spines-multiple-yaxis-with-spines-py
# Change figure size
# https://stackoverflow.com/questions/332289/how-do-you-change-the-size-of-figures-drawn-with-matplotlib

# plt.figure(figsize=[15,10])

# Create some mock data
t = wthr_fire_merged_df['time']
data1 = wthr_fire_merged_df['numb_fires']
data2 = wthr_fire_merged_df['dew_point']
data3 = wthr_fire_merged_df['humidity']
data4 = wthr_fire_merged_df['prcp']
data5 = wthr_fire_merged_df['avg_temp']

fig, ax1 = plt.subplots(figsize=[15,10])


color = 'tab:red'
ax1.set_xlabel('time (s)')
ax1.set_ylabel('numb_fires', color=color)
ax1.plot(t, data1, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
ax3 = ax1.twinx()
ax4 = ax1.twinx()
ax5 = ax1.twinx()


# Offset the right spine of par2.  The ticks and label have already been
# placed on the right by twinx above.
ax2.spines["right"].set_position(("axes", 1.1))
ax3.spines["right"].set_position(("axes", 1.2))
ax4.spines["right"].set_position(("axes", 1.3))
ax5.spines["right"].set_position(("axes", 1.4))
# Having been created by twinx, par2 has its frame off, so the line of its
# detached spine is invisible.  First, activate the frame but make the patch
# and spines invisible.
make_patch_spines_invisible(ax2)
make_patch_spines_invisible(ax3)
make_patch_spines_invisible(ax4)
make_patch_spines_invisible(ax5)
# Second, show the right spine.
ax2.spines["right"].set_visible(True)
ax3.spines["right"].set_visible(True)
ax4.spines["right"].set_visible(True)
ax5.spines["right"].set_visible(True)

# Legend
# p1, = ax1.plot([0, 1, 2], [0, 1, 2], "r-", label="Number Fires")
# p2, = ax2.plot([0, 1, 2], [0, 3, 2], "g-", label="Dew Point")
# p3, = ax3.plot([0, 1, 2], [50, 30, 15], "p-", label="Humidity")
# p4, = ax4.plot([0, 1, 2], [75, 65, 40], "b-", label="Prcp")
# p5, = ax4.plot([0, 1, 2], [100, 90, 65], "o-", label="Average Temp.")

# Axis Limits
# ax1.set_xlim(0, 20)
ax1.set_ylim(0, 1500)
ax2.set_ylim(0, 10)
ax3.set_ylim(1, 65)


color = 'tab:green'
ax2.set_ylabel('dew_point', color=color)  # we already handled the x-label with ax1
ax2.plot(t, data2, color=color)
ax2.tick_params(axis='y', labelcolor=color)

color = 'tab:purple'
ax3.set_ylabel('humidity', color=color)  # we already handled the x-label with ax1
ax3.plot(t, data3, color=color)
ax3.tick_params(axis='y', labelcolor=color)

color = 'tab:blue'
ax4.set_ylabel('prcp', color=color)  # we already handled the x-label with ax1
ax4.plot(t, data4, color=color)
ax4.tick_params(axis='y', labelcolor=color)

color = 'tab:orange'
ax5.set_ylabel('avg_temp (c)', color=color)  # we already handled the x-label with ax1
ax5.plot(t, data5, color=color)
ax5.tick_params(axis='y', labelcolor=color)

# Lines for the Legend
# lines = [p1, p2, p3, p4, p5]

# ax1.legend(lines, [l.get_label() for l in lines])

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()

In [None]:
def make_patch_spines_invisible(ax):
    ax.set_frame_on(True)
    ax.patch.set_visible(False)
    for sp in ax.spines.values():
        sp.set_visible(False)


fig, host = plt.subplots()
fig.subplots_adjust(right=0.75)

par1 = host.twinx()
par2 = host.twinx()

# Offset the right spine of par2.  The ticks and label have already been
# placed on the right by twinx above.
par2.spines["right"].set_position(("axes", 1.2))
# Having been created by twinx, par2 has its frame off, so the line of its
# detached spine is invisible.  First, activate the frame but make the patch
# and spines invisible.
make_patch_spines_invisible(par2)
# Second, show the right spine.
par2.spines["right"].set_visible(True)

p1, = host.plot(wthr_fire_merged_df['avg_temp'], "b-", label="Density")
p2, = par1.plot(wthr_fire_merged_df['dew_point'], "r-", label="Temperature")
p3, = par2.plot(wthr_fire_merged_df['time'], "g-", label="Velocity")

host.set_xlim(0, 2)
host.set_ylim(0, 2)
par1.set_ylim(0, 4)
par2.set_ylim(1, 65)

host.set_xlabel("Distance")
host.set_ylabel("Density")
par1.set_ylabel("Temperature")
par2.set_ylabel("Velocity")

host.yaxis.label.set_color(p1.get_color())
par1.yaxis.label.set_color(p2.get_color())
par2.yaxis.label.set_color(p3.get_color())

tkw = dict(size=4, width=1.5)
host.tick_params(axis='y', colors=p1.get_color(), **tkw)
par1.tick_params(axis='y', colors=p2.get_color(), **tkw)
par2.tick_params(axis='y', colors=p3.get_color(), **tkw)
host.tick_params(axis='x', **tkw)

lines = [p1, p2, p3]

host.legend(lines, [l.get_label() for l in lines])

plt.show()

In [None]:
plt.figure(figsize=[15,10])
plt.plot(wthr_fire_merged_df['numb_fires'], wthr_fire_merged_df['avg_temp'], label='dew_point')
plt.xlabel("numb_fires")
plt.ylabel('prcp')
plt.legend(loc=2)

In [None]:
plt.figure(figsize=[15,10])
# plt.grid(True)
plt.plot(wthr_fire_merged_df_moving_average['avg_temp'],label='avg_temp')
plt.plot(wthr_fire_merged_df_moving_average['numb_fires'],label='numb_fires')
plt.plot(wthr_fire_merged_df_moving_average['humidity'],label='humidity')
plt.plot(wthr_fire_merged_df_moving_average['dew_point'],label='dew_point')
plt.plot(wthr_fire_merged_df_moving_average['prcp'],label='prcp')
plt.legend(loc=2)

In [None]:
plt.figure(figsize=[15,10])
# plt.grid(True)
# plt.plot(wthr_fire_merged_df_moving_average['numb_fires'], wthr_fire_merged_df_moving_average['avg_temp'],label='avg_temp')
plt.plot(wthr_fire_merged_df_moving_average['numb_fires'],label='numb_fires')
plt.plot(wthr_fire_merged_df_moving_average['humidity'],label='humidity')
# plt.plot(wthr_fire_merged_df_moving_average['dew_point'],label='dew_point')
# plt.plot(wthr_fire_merged_df_moving_average['prcp'],label='prcp')
plt.legend(loc=2)

## 3D Plot of the Data

In [None]:
%matplotlib inline
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter

In [None]:
fig = plt.figure(1, figsize=(7, 7))

axes = Axes3D(fig, elev=20, azim=45)

# Note axes.scatter takes in list of data so the dataframe columns were converted into a list
# https://stackoverflow.com/questions/36589521/how-to-surface-plot-3d-plot-from-dataframe

axes.scatter(wthr_fire_merged_df_moving_average["avg_temp"].to_list(), 
             wthr_fire_merged_df_moving_average["humidity"].to_list(), 
             wthr_fire_merged_df_moving_average["numb_fires"].to_list(), 
#              c="y", 
#              cmap=cm.coolwarm)
             cmap=cm.get_cmap("Spectral"))
plt.show()

# ========================================

# Step 4 - Multiple Liner Regression Machine Learning

# ========================================

In [None]:
X = wthr_fire_merged_df[["avg_temp", "humidity", "prcp", "dew_point"]].values.tolist()

X

In [None]:
y = wthr_fire_merged_df[["numb_fires"]].values.tolist()
type(y)
y

# There's no need to scale the data b/c the magintude of the change between the each features are the same and the magintude of the change between the features and the number of fires are the same. Everything is counted in increments of one.
### Fit the training data to the StandardScaler

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
X_scaler = StandardScaler().fit(X)
y_scaler = StandardScaler().fit(y)

### Create variables to hold the scaled train & test data

In [None]:
X_scaled = X_scaler.transform(X)
# X_test_scaled = X_scaler.transform(X_test)
y_scaled = y_scaler.transform(y)
# y_test_scaled = y_scaler.transform(y_test)

### Step 2) Split data into training and testing data

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
print(type(X_train))
type(y_train)

### Step 4) Fit the Model to the training data and make predictions using the test data

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
model = LinearRegression()

model.fit(X, y)

print(model.score(X, y))

In [None]:
# Score for the Test data
model.score(X_test, y_test)

# Create a Table to look at the Data

In [None]:
predictions = model.predict(X_train)

In [None]:
# type(y_train)
error = predictions- y_train
type(error)

In [None]:
pd.DataFrame({"Predicted": predictions, "Actual": y, "Error": predictions - y})

# Plot the residuals

In [None]:
# create a residuals plot using the predictions for both test and train data
plt.scatter(model.predict(X_train), model.predict(X_train) - y_train, c="blue", label="Training Data")
plt.scatter(model.predict(X_test), model.predict(X_test) - y_test, c="orange", label="Testing Data")
plt.legend()

# create a horizontal line at y=0 to show how much error is in each prediction
plt.hlines(y=0, xmin = min(X_test), xmax = max(X_test))
plt.title("Residual Plot")
plt.xlabel("Prediction")
plt.show()

In [None]:
preds = model.predict(X_train)

plt.scatter(preds, preds-y_train, c="red")
plt.hlines(y=0, xmin = min(preds), xmax = max(preds))
plt.xlabel("Predicted Value")
plt.ylabel("Residual/Error")

#### Create a DataFrame with each predicted y-value, actual y-value, and error.

In [None]:
pd.DataFrame({"Predicted": predictions, "Actual": y, "Error": predictions - y})

# Quantify your model using the scaled data

In [None]:
from sklearn.metrics import mean_squared_error

predictions = model.predict(X_test)
MSE = mean_squared_error(y_test, predictions)
r2 = model.score(X_test, y_test)

print(f"MSE: {MSE}, R2: {r2}")

# ========================================

# Step 4 - Multiple Liner Regression Machine Learning

# ========================================

# ========================================

# Step 4 - Export the Data to a CSV File

# ========================================

In [None]:
# File path to export the CSV File
export_file_pth = os.path.join("Data", "wthr_fire_merged_df.csv")

wthr_fire_merged_df.to_csv(export_file_pth)

In [None]:
ID_cmbnd_wthr_frs_df.head()

In [None]:
len(ID_cmbnd_wthr_frs_df)

In [None]:
data

In [None]:
empty_df

In [None]:
# https://stackoverflow.com/questions/20461165/how-to-convert-index-of-a-pandas-dataframe-into-a-column
data_new = data.reset_index(level=0)
print(data_new.dtypes)
data_new

# ========================================

# ========================================

### 2.2.1.2 - Combine the List of Columns from the Weather Data and Fire Dataframe

In [None]:
# Combine the List of Columns from the Weather Data and Fire Dataframe

ID_cmbnd_wthr_frs_lst_Clmns = ID_wthr_lst_Clmns + ID_frs_lst_Clmns
print(ID_cmbnd_wthr_frs_lst_Clmns)

In [None]:
data_new_groupby_time = data_new.groupby('time').agg({'prcp': ['mean']})
data_new_groupby_time

# ========================================

# ========================================

# ========================================

### Generate and associate cities using the lat lng coordinates

In [None]:
from citipy import citipy

# Pull lat lng columns from df
location_df = fires_Idaho_df[['LATITUDE', 'LONGITUDE']]

In [None]:
### Generate a random cities from Lat Long table make sure we have up to 500. Delete any duplicates: 
# Make lists needed
cityList = []
countryList = []

# Create a loop to find a city for each lat long pair
for index, row in location_df.iterrows():
#     print(f"Searching for nearest city to lat long {row['Latitude'], row['Longitude']}...")
    city = citipy.nearest_city(row["LATITUDE"], row["LONGITUDE"])
    cityName = city.city_name
    location_df.loc[index,"CITY"] = cityName
    cityList.append(cityName)

# ### Trying to identify and remove any duplicate cities 
# CleanLatLong_df = location_df.drop_duplicates(subset = ['CITY'])
# print(location_df["CITY"].value_counts())
          
# # Update my cities and country list and lat long lists
# citySeries = CleanLatLong_df['CITY']
# LatSeries = CleanLatLong_df['LATITUDE']
# LongSeries = CleanLatLong_df['LONGITUDE']
# cityList = citySeries.values.tolist()
# LatList = LatSeries.values.tolist()
# LongList = LongSeries.values.tolist()

# citySeries

In [None]:
location_df

In [None]:
# ========================================location_df['CITY'].value_counts()
# location_df['CITY'].unique()