In [7]:
import geopandas as gpd
import pandas as pd
import rasterio
from rasterstats import zonal_stats
import matplotlib.pyplot as plt
from rasterio.plot import show
import os

In [6]:
#Load data
os.chdir('c:\\Users\\Jesse\\OneDrive\\Documenten\\Master BAOR\\Thesis\\GitHub\\dicra\\analytics\\geospatial_internship\\datasets')
fire_data = pd.read_csv('telangana_fires.csv')
telangana_shape = gpd.read_file('telangana_shapefile.geojson')

os.chdir('c:\\Users\\Jesse\\OneDrive\\Documenten\\Master BAOR\\Thesis\\GitHub\\dicra\\src\\data_preprocessing\\tsdm\\')
district_boundaries = gpd.read_file('District_Boundary.shp')

In [11]:
#Create geodataframe from the data
geo_fire_data = gpd.GeoDataFrame(fire_data,geometry = gpd.points_from_xy(fire_data.longitude,fire_data.latitude), crs = {'init': 'epsg:4326'}) 

#Make sure the
geo_fire_data['geometry'] = geo_fire_data['geometry'].geometry.to_crs(epsg = 4326)
district_boundaries['geometry'] = district_boundaries['geometry'].geometry.to_crs(epsg = 4326)

  return _prepare_from_string(" ".join(pjargs))


# Classification (Using Shivangs Approach)

###Reference : https://github.com/luckyw0w/Data4Policy/tree/main/Geospatial%20Data%20Science%20Internship/ShivangPandey

So, the buffer function of geopandas (based on the shapely library) is to create a buffer polygon around a point. The first parameter of the buffer is to tell length in the degree to mark the point in a square pixel and 'cap_style' to tell the type of polygon. here 'cap_style = 3' means square polygon for buffer point. 

We need to create a polygon, because MODIS Thermal Anomalies & Fire Daily data is calculated on 1km resolution and given esri LULC data is at 10m resolution and we can say it like that 1 pixel of MODIS fire data should be as big as 10000 pixels of LULC map.
How we are going to use that? by creating a polygon as a size of 10000 pixels of LULC map.


In [4]:
polygons = []

#-------create a buffer of square of 1km size using buffer function from shapely-----#

for i in geo_fire_data.geometry:
    p1 = i
    buffer = p1.buffer(0.004504505, cap_style = 3)         #500m = 0.004504505 and cap_style 3 is square box of same of 1km side length
    polygons.append(buffer)

#create a new column in GeoDataFrame newdf and dump polygon buffer of respective point values
geo_fire_data['geometry buffered'] = polygons 

Calculating Probability will be done as follows, 10000 pixels of LULC can have pixels of all classes. We just have to take the ratio of the number of pixels of crop class which is 5 and the total number of pixels within polygon:
P(A) = No. of crop class(5) pixels / Total no. of pixels.

###Reference: https://www.arcgis.com/home/item.html?id=d6642f8a4f6d4685a24ae2dc0c73d4ac

In [8]:
os.chdir('c:\\Users\\Jesse\\OneDrive\\Documenten\\Master BAOR\\Thesis\\GitHub\\dicra\\analytics\\notebooks\\crop_fires')
tiff = 'vmcx_mosiac_reprj.tif'

data = geo_fire_data
data['index'] = data.index

#------------------Checking probability in mosaic tiff file----------------------#
import time                                      #to calculate time taken to run the model
start_time = time.time()                         #start time of the program

index_list = []                               #declaring empty to get corresponding id of fire point
flag_list = []                                #empty list to store class of corresponding fire points based polygon
    
for j in range(len(data)):                #iterating all rows of dataframe to get point info

    stats = zonal_stats(data.iloc[j].geometry, tiff, stats="*", categorical=True)         #getting statistics from the raster point 
    i = stats[0]                                                                          #storing statsistical dictionary in a value
    if i['count'] != 0:                                                                   #check if polygon is within the tiff file or not
        index = data.iloc[j]['index']                                                     #get id of polygon
        index_list.append(index)                                                          #store id in a list
        if (5 in i) or (4 in i): 
            #print(i)                                                         #check if crop class or flooded vegitation is in polygon region or not
            flag = 1                                                                      #Mark whether occurence is in crop field
        else:
            flag = 0                                                                      # if crop class is not in polygon, return 0 pixels
        flag_list.append(flag)                                                            #store class value in a list
        
print("--- %s seconds ---" % (time.time() - start_time))                                  #print total time taken to run code 

#dictionary created wth fireID and class (1,0)
id_class = {key: value for key, value in zip(index_list, flag_list)}

#creating tuple of each key and  value pair
data_items = id_class.items()                                                
#dumping all tuples in a list
data_list = list(data_items)

#creating DataFrame with id and class values
class_df = pd.DataFrame(data_list, columns= ['fireID','class (1,0)'])




{5: 1, 'min': 5.0, 'max': 5.0, 'mean': 5.0, 'count': 1, 'sum': 5.0, 'std': 0.0, 'median': 5.0, 'majority': 5.0, 'minority': 5.0, 'unique': 1, 'range': 0.0, 'nodata': 0.0, 'nan': 0}
{5: 1, 'min': 5.0, 'max': 5.0, 'mean': 5.0, 'count': 1, 'sum': 5.0, 'std': 0.0, 'median': 5.0, 'majority': 5.0, 'minority': 5.0, 'unique': 1, 'range': 0.0, 'nodata': 0.0, 'nan': 0}
{5: 1, 'min': 5.0, 'max': 5.0, 'mean': 5.0, 'count': 1, 'sum': 5.0, 'std': 0.0, 'median': 5.0, 'majority': 5.0, 'minority': 5.0, 'unique': 1, 'range': 0.0, 'nodata': 0.0, 'nan': 0}
{5: 1, 'min': 5.0, 'max': 5.0, 'mean': 5.0, 'count': 1, 'sum': 5.0, 'std': 0.0, 'median': 5.0, 'majority': 5.0, 'minority': 5.0, 'unique': 1, 'range': 0.0, 'nodata': 0.0, 'nan': 0}
{5: 1, 'min': 5.0, 'max': 5.0, 'mean': 5.0, 'count': 1, 'sum': 5.0, 'std': 0.0, 'median': 5.0, 'majority': 5.0, 'minority': 5.0, 'unique': 1, 'range': 0.0, 'nodata': 0.0, 'nan': 0}
{5: 1, 'min': 5.0, 'max': 5.0, 'mean': 5.0, 'count': 1, 'sum': 5.0, 'std': 0.0, 'median': 5.0, 

In [6]:
geo_fire_data['agricultural'] = class_df['class (1,0)']

In [7]:
geo_fire_data[['fireID', 'latitude', 'longitude', 'brightness', 'scan', 'track',
       'acq_date', 'acq_time', 'satellite', 'instrument', 'confidence',
       'version', 'bright_t31', 'frp', 'daynight', 'type', 'geometry','agricultural']].to_csv('fires_data_classified.csv')