In [2]:
import pandas as pd
import os
import geopandas as gpd
import geoplot
import geoplot.crs as gcrs
import matplotlib.pyplot as plt
import rasterio
from statsmodels.tsa.seasonal import STL
from arch.unitroot import PhillipsPerron
import numpy as np
from sklearn.linear_model import LinearRegression
import datetime as dt
from matplotlib.colors import TwoSlopeNorm
import time

In [4]:
#Load Data
os.chdir('c:\\Users\\Jesse\\OneDrive\\Documenten\\Master BAOR\\Thesis\\GitHub\\dicra\\analytics\\notebooks\\crop_fires\\Classification_Fires')
fire_data_classified = pd.read_csv('fires_data_classified.csv', index_col=0)
#Select only the agricultural fires
fire_data = fire_data_classified[fire_data_classified['agricultural'] == 1]

os.chdir('c:\\Users\\Jesse\\OneDrive\\Documenten\\Master BAOR\\Thesis\\GitHub\\dicra\\analytics\\geospatial_internship\\datasets')
telangana_shape = gpd.read_file('telangana_shapefile.geojson')

os.chdir('c:\\Users\\Jesse\\OneDrive\\Documenten\\Master BAOR\\Thesis\\GitHub\\dicra\\src\\data_preprocessing\\tsdm\\')
district_boundaries = gpd.read_file('District_Boundary.shp')
district_boundaries = district_boundaries.reset_index()

os.chdir('c:\\Users\\Jesse\\OneDrive\\Documenten\\Master BAOR\\Thesis\\GitHub\\dicra\\analytics\\notebooks\\crop_fires\\Mandal_boundaries_pre')
mandal_boundaries = gpd.read_file('mandal_boundaries.shp')
mandal_boundaries = mandal_boundaries.reset_index()

os.chdir('c:\\Users\\Jesse\\OneDrive\\Documenten\\Master BAOR\\Thesis\\GitHub\\dicra\\analytics\\notebooks\\crop_fires\\DPPD_Jesse')
one_km_grid = gpd.read_file('telangana_1km_grid.geojson')
one_km_grid = one_km_grid.reset_index()

os.chdir('c:\\Users\\Jesse\\OneDrive\\Documenten\\Master BAOR\\Thesis\\GitHub\\dicra\\analytics\\notebooks\\crop_fires\\DPPD_Jesse')
ten_km_grid = gpd.read_file('telangana_10km_grid.geojson')
ten_km_grid = ten_km_grid.reset_index()

In [5]:
def Trend_Score(df, index): 
    df_selected = df[df['index'] == index] 
    df_selected = df_selected.sort_values(by=['ModifiedDateTime'])
    df_selected.index = df_selected['ModifiedDateTime']

    if not df_selected.empty:

        X = df_selected['Fires']

        #Decompose using STL Seasonal-Trend Decomposition using LOESS
        stl = STL(df_selected['Fires'], seasonal=7)
        res = stl.fit()

        df_trend = res.trend.to_frame().reset_index().dropna()
        df_trend['ModifiedDateTime_num'] = df_trend['ModifiedDateTime'].map(dt.datetime.toordinal)

        X = np.array(df_trend['ModifiedDateTime_num'])
        X = X.reshape(-1, 1)
        y = np.array(df_trend['trend'])
        y = y.reshape(-1, 1)
        reg = LinearRegression().fit(X, y)

        y_pred_trend = reg.predict(X)

        slope, intercept = np.polyfit(np.array(df_trend['ModifiedDateTime_num']), y_pred_trend,1)

        line_slope = slope[0]

    else:
        line_slope = 'Unknown'

    return(line_slope)

In [6]:
def fire_counts(beginyear, endyear, fire_data, boundaries, level):
    st = time.time()
    #Create geodataframe from the data
    geo_fire_data = gpd.GeoDataFrame(fire_data,geometry = gpd.points_from_xy(fire_data.longitude,fire_data.latitude), crs = {'init': 'epsg:4326'}) 

    #Make sure the geometry columns are in the right format
    geo_fire_data = geo_fire_data[['geometry', 'acq_date', 'fireID']]
    geo_fire_data['geometry'] = geo_fire_data['geometry'].to_crs(epsg = 4326)

    #Load date into date format
    geo_fire_data['acq_date'] =  pd.to_datetime(geo_fire_data['acq_date'])
    geo_fire_data['year'] = (geo_fire_data['acq_date']).dt.year
    geo_fire_data['month'] = (geo_fire_data['acq_date']).dt.month
    geo_fire_data['day'] = (geo_fire_data['acq_date']).dt.day

    #Select complete years (2015-2020)
    geo_fire_data = geo_fire_data[(geo_fire_data['acq_date'] >= str(beginyear)+'-01-01') & (geo_fire_data['acq_date'] < str(endyear)+'-01-01')]

    boundaries = gpd.GeoDataFrame(boundaries,geometry = boundaries.geometry, crs = {'init': 'epsg:4326'}) 

    fires_per_boundaries= gpd.sjoin(geo_fire_data, boundaries, how="inner")

    #Create the right time format: we count per month per year
    fires_per_boundaries['day'] = 1 #pd.Series(pd.to_numeric(fires_per_mandal['day'], errors='coerce'), dtype='int64')
    fires_per_boundaries['year'] = pd.Series(pd.to_numeric(fires_per_boundaries['year'], errors='coerce'), dtype='int64')
    fires_per_boundaries['month'] = pd.Series(pd.to_numeric(fires_per_boundaries['month'], errors='coerce'), dtype='int64')
    fires_per_boundaries['ModifiedDateTime'] = pd.to_datetime(fires_per_boundaries[['year', 'month', 'day']].astype('int64').astype('str'), yearfirst=True)

    #Sum amount of fires per mandal per month per year make sure that if no fire happens at a specific time write a zero
    fires_per_boundaries_count = fires_per_boundaries.groupby(['index', 'ModifiedDateTime'])['fireID'].count().unstack(fill_value=0).stack().reset_index()

    fires_per_boundaries_count['Fires'] = fires_per_boundaries_count[0] 

    #Delete column
    del fires_per_boundaries_count[0]
    
    fires_per_boundaries_count['ModifiedDateTime'] =  pd.to_datetime(fires_per_boundaries_count['ModifiedDateTime'])

    #DPPD Scores: The negativer the number, we observe a higher decrease in fires over the years
    #The completely white mandals are the mandals in which no agricultural fires are observed
    ids= []
    scores = []

    for i in range(0, len(boundaries['index'])):
        ids.append(boundaries['index'].iloc[i])
        score = Trend_Score(fires_per_boundaries_count, i)
        scores.append(score)
    
    DPPD_df = pd.DataFrame({'index': ids, 'Slope Score': scores}) 


    DPPD_df = DPPD_df[DPPD_df['Slope Score'] != 'Unknown']
    DPPD_df = DPPD_df.sort_values('Slope Score')
    DPPD_df = DPPD_df.merge(boundaries[['index', 'geometry']], how='left', on=['index'])
    DPPD_df = gpd.GeoDataFrame(DPPD_df, geometry = DPPD_df.geometry, crs = {'init': 'epsg:4326'}) 
    DPPD_df['Slope Score'] = DPPD_df['Slope Score'].astype('float')

    if level == '1kmgrid':
        text = 'Changes in Amount of Fires over the Years STL one km resolution ('+ str(beginyear) +'-'+str(endyear)+')'
    elif level == '10kmgrid':
        text = 'Changes in Amount of Fires over the Years STL ten km resolution ('+ str(beginyear) +'-'+str(endyear)+')'
    elif level == 'mandal':
        text = 'Changes in Amount of Fires over the Years STL mandal level ('+ str(beginyear) +'-'+str(endyear)+')'
    elif level == 'district':
        text = 'Changes in Amount of Fires over the Years STL district level ('+ str(beginyear) +'-'+str(endyear)+')'

    vmin, vmax, vcenter = DPPD_df['Slope Score'].min(), DPPD_df['Slope Score'].max(), 0
    norm = TwoSlopeNorm(vmin= vmin , vcenter=vcenter, vmax= vmax) 
    # create a normalized colorbar
    cmap = 'RdYlGn_r'
    cbar = plt.cm.ScalarMappable(norm=norm, cmap=cmap)
    DPPD_df.plot(column = 'Slope Score', 
                        legend = True, 
                        figsize = [20,10],\
                        legend_kwds = {'label': 'Deviance'}, 
                        cmap = 'RdYlGn_r',
                        norm = norm)
    plt.title(text)
    plt.savefig(text + '.png')
    
    print('Scores are calculated in',  str((time.time()-st)/60) , ' minutes')
    
    return(plt.show())
 


In [None]:
# Levels Available: 1kmgrid, 10kmgrid, mandal, district

In [8]:
fire_counts(2015, 2021, fire_data, one_km_grid, '1kmgrid')

  return _prepare_from_string(" ".join(pjargs))
  boundaries = gpd.GeoDataFrame(boundaries,geometry = boundaries.geometry, crs = {'init': 'epsg:4326'})
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

  fires_per_boundaries= gpd.sjoin(geo_fire_data, boundaries, how="inner")
