In [58]:
#  Load the "autoreload" extension so that code can change
%load_ext autoreload
%reload_ext autoreload
#  always reload modules so that as you change code in src, it gets loaded
%autoreload 2
%matplotlib inline

import sys
sys.path.append('../')
import src
from src.imports import *
from src.gen_functions import *
from src.features.map_dataset import MapDataset
# import the Dataset object class
from src.features.dataset import Dataset
from src.features.landuse import *
from src.visualization.mapper import *
from src.visualization.vis_data import *

from src.data.fire_data import *

import shutil
import pyproj

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Separate Fire data into year, select country and Add provinces

In [38]:
map_folder = '../data/world_maps/'
mfire_folder = '../data/fire_map/world_2000-2020/M6_proc/'
vfire_folder = '../data/fire_map/world_2000-2020/V1_proc/'
poll_folder = '../data/poll_map/'
thfire_folder = poll_folder + 'th_fire_years/'
report_folder = '../reports/map/'

In [4]:
# process raw fire data. call this function after loading new data from NASA
add_merc_to_fire(instr=instr)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 461.39it/s]


In [33]:
def split_1fire_by_year(file, start_stop_dates, save_prefix, save_folder, timezone='Asia/Bangkok', chunk=1E6):
    save_filenames = []
    # load fire data  in chunk and append the files to proper year 
    for fire_df in pd.read_csv(file, chunksize=chunk):
        fire_df = process_fire_data(filename=None, fire=fire_df, and_save=False, timezone=timezone, to_drop=False)
        for year, start_date, end_date in start_stop_dates:
            save_filename = save_folder + 'th_fire_' + save_prefix + str(year) + '.csv'
            sub_fire = fire_df.loc[start_date:end_date]
            if len(sub_fire) > 0:
                # save fire by year 
                if os.path.exists(save_filename):
                    # fire already exist 
                    exist_sub_fire = pd.read_csv(save_filename)
                    exist_sub_fire['datetime'] = pd.to_datetime(exist_sub_fire['datetime'] )
                    exist_sub_fire = exist_sub_fire.set_index('datetime')
                    sub_fire = pd.concat([sub_fire, exist_sub_fire])
                    sub_fire = sub_fire.drop_duplicates()
            
                sub_fire.to_csv(save_filename, index=True)
                save_filenames.append(save_filename)
    return save_filenames

In [39]:
def split_fires_by_year(year_range, save_folder, instr='MODIS', start_season = '07-01', end_season = '06-30', timezone='Asia/Bangkok'):
    
    # build date start and stop pair 
    start_list = [f'{y}-{start_season}' for y in year_range]
    stop_list = [f'{y+1}-{end_season}' for y in year_range]
    start_stop_dates = [*zip(year_range, start_list, stop_list)]
    print(start_stop_dates)
    
    # load all modis fires and save them in proper file by year 
    if instr == 'MODIS':
        raw_folder = '../data/fire_map/world_2000-2020/M6_proc/'
        save_prefix = 'm_'
        save_folder = save_folder.replace('s/', 's_m/')
    elif instr == 'VIIRS':
        raw_folder = '../data/fire_map/world_2000-2020/V1_proc/'
        save_prefix = 'v_'
        save_folder = save_folder.replace('s/', 's_v/')
    
    if os.path.exists(save_folder):
        shutil.rmtree(save_folder)
    os.mkdir(save_folder)
    
    save_filenames = []

    files = glob(raw_folder + '*.csv')
    print('there are ', len(files) , 'files')
    for file in tqdm(files):
        save_filenames += split_1fire_by_year(file, start_stop_dates, save_prefix, save_folder=save_folder, timezone=timezone)
    
    return np.unique(save_filenames)

In [31]:
instr = 'MODIS'

# modis year arange 
year_range = np.arange(2003, datetime.now().year )
print(year_range)

[2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
 2017 2018 2019 2020]


In [32]:
split_fires_by_year(year_range=year_range, save_folder=thfire_folder, instr=instr)


  0%|                                                                                                                                | 0/30 [00:00<?, ?it/s][A

[(2003, '2003-07-01', '2004-06-30'), (2004, '2004-07-01', '2005-06-30'), (2005, '2005-07-01', '2006-06-30'), (2006, '2006-07-01', '2007-06-30'), (2007, '2007-07-01', '2008-06-30'), (2008, '2008-07-01', '2009-06-30'), (2009, '2009-07-01', '2010-06-30'), (2010, '2010-07-01', '2011-06-30'), (2011, '2011-07-01', '2012-06-30'), (2012, '2012-07-01', '2013-06-30'), (2013, '2013-07-01', '2014-06-30'), (2014, '2014-07-01', '2015-06-30'), (2015, '2015-07-01', '2016-06-30'), (2016, '2016-07-01', '2017-06-30'), (2017, '2017-07-01', '2018-06-30'), (2018, '2018-07-01', '2019-06-30'), (2019, '2019-07-01', '2020-06-30'), (2020, '2020-07-01', '2021-06-30')]
there are  30 files
before drop (1000000, 19)
after drop (1000000, 11)
before drop (1000000, 19)
after drop (1000000, 11)
before drop (1000000, 19)
after drop (1000000, 11)
before drop (1000000, 19)
after drop (1000000, 11)
before drop (1000000, 19)
after drop (1000000, 11)
before drop (772188, 19)
after drop (772188, 11)



  3%|███▉                                                                                                                 | 1/30 [06:22<3:04:52, 382.51s/it][A

before drop (944228, 19)
after drop (944228, 11)



  7%|███████▊                                                                                                             | 2/30 [07:36<2:15:15, 289.84s/it][A

before drop (696234, 19)
after drop (696234, 11)



 10%|███████████▋                                                                                                         | 3/30 [08:21<1:37:28, 216.61s/it][A

before drop (1000000, 19)
after drop (1000000, 11)
before drop (27201, 19)
after drop (27201, 11)



 13%|███████████████▌                                                                                                     | 4/30 [09:16<1:12:50, 168.10s/it][A

before drop (748354, 19)
after drop (748354, 11)



 17%|███████████████████▊                                                                                                   | 5/30 [09:50<53:15, 127.83s/it][A

before drop (578216, 19)
after drop (578216, 11)



 20%|████████████████████████                                                                                                | 6/30 [10:18<39:11, 97.99s/it][A

before drop (759575, 19)
after drop (759575, 11)



 23%|████████████████████████████                                                                                            | 7/30 [10:54<30:21, 79.19s/it][A

before drop (748082, 19)
after drop (748082, 11)



 27%|████████████████████████████████                                                                                        | 8/30 [11:28<24:04, 65.67s/it][A

before drop (740016, 19)
after drop (740016, 11)



 30%|████████████████████████████████████                                                                                    | 9/30 [12:05<19:55, 56.95s/it][A

before drop (789762, 19)
after drop (789762, 11)



 33%|███████████████████████████████████████▋                                                                               | 10/30 [12:39<16:43, 50.19s/it][A

before drop (661330, 19)
after drop (661330, 11)



 37%|███████████████████████████████████████████▋                                                                           | 11/30 [13:10<14:02, 44.33s/it][A

before drop (781680, 19)
after drop (781680, 11)



 40%|███████████████████████████████████████████████▌                                                                       | 12/30 [13:44<12:26, 41.45s/it][A

before drop (928306, 19)
after drop (928306, 11)



 43%|███████████████████████████████████████████████████▌                                                                   | 13/30 [14:02<09:45, 34.41s/it][A

before drop (593516, 19)



 47%|███████████████████████████████████████████████████████▌                                                               | 14/30 [14:05<06:37, 24.86s/it][A
 50%|███████████████████████████████████████████████████████████▌                                                           | 15/30 [14:05<04:21, 17.44s/it][A

after drop (450545, 11)
before drop (9597, 19)
after drop (0, 11)



 53%|███████████████████████████████████████████████████████████████▍                                                       | 16/30 [14:05<02:51, 12.24s/it][A

before drop (15991, 19)
after drop (0, 11)
before drop (798163, 19)
after drop (798163, 11)



 57%|███████████████████████████████████████████████████████████████████▍                                                   | 17/30 [14:39<04:01, 18.60s/it][A

before drop (686811, 19)
after drop (686811, 11)



 60%|███████████████████████████████████████████████████████████████████████▍                                               | 18/30 [15:50<06:52, 34.41s/it][A

before drop (586567, 19)
after drop (586567, 11)



 63%|███████████████████████████████████████████████████████████████████████████▎                                           | 19/30 [16:17<05:52, 32.08s/it][A

before drop (680281, 19)
after drop (680281, 11)



 67%|███████████████████████████████████████████████████████████████████████████████▎                                       | 20/30 [16:47<05:15, 31.60s/it][A

before drop (198399, 19)
after drop (198399, 11)



 70%|███████████████████████████████████████████████████████████████████████████████████▎                                   | 21/30 [17:02<03:58, 26.54s/it][A

before drop (43053, 19)
after drop (43053, 11)



 73%|███████████████████████████████████████████████████████████████████████████████████████▎                               | 22/30 [17:21<03:14, 24.36s/it][A

before drop (187075, 17)
after drop (187075, 10)



 77%|███████████████████████████████████████████████████████████████████████████████████████████▏                           | 23/30 [17:31<02:20, 20.01s/it][A

before drop (154751, 18)
after drop (154751, 11)



 80%|███████████████████████████████████████████████████████████████████████████████████████████████▏                       | 24/30 [17:44<01:48, 18.04s/it][A

before drop (138692, 18)
after drop (138692, 11)



 83%|███████████████████████████████████████████████████████████████████████████████████████████████████▏                   | 25/30 [17:54<01:17, 15.47s/it][A

before drop (69509, 18)
after drop (69509, 11)



 87%|███████████████████████████████████████████████████████████████████████████████████████████████████████▏               | 26/30 [18:03<00:54, 13.61s/it][A

before drop (26420, 18)
after drop (26420, 11)



 90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████            | 27/30 [18:14<00:38, 12.96s/it][A

before drop (14127, 18)
after drop (14127, 11)



 93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 28/30 [18:22<00:22, 11.40s/it][A

before drop (18862, 17)
after drop (18862, 10)



 97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 29/30 [18:30<00:10, 10.17s/it][A

before drop (230388, 17)
after drop (230388, 10)



100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [18:39<00:00, 37.30s/it][A


In [45]:
save_filenames = glob(thfire_folder.replace('s/', 's_m/') + '*.csv')
print(len(save_filenames))
print(save_filenames)

18
['../data/poll_map/th_fire_years_m\\th_fire_m_2003.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2004.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2005.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2006.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2007.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2008.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2009.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2010.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2011.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2012.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2013.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2014.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2015.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2016.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2017.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2018.csv', '../data/poll_map/th_fire_years_m\\th_fire_m_2019.csv', '../data/poll_map/th_fire_years_m\\th_fire_m

In [48]:
fire = pd.read_csv(save_filenames[-1])
fire.head()

Unnamed: 0,datetime,latitude,longitude,brightness,scan,track,acq_time,confidence,frp,long_km,lat_km,country
0,2021-01-22 08:10:00,15.211,145.764,312.9,1.6,1.2,110,17,12.0,16226.0,1713.0,
1,2021-01-22 08:10:00,15.213,145.771,313.9,1.6,1.2,110,0,14.9,16227.0,1713.0,
2,2021-01-22 08:15:00,-4.061,138.412,300.7,1.0,1.0,115,26,3.6,15407.0,-452.0,
3,2021-01-22 09:45:00,39.436,118.458,300.1,1.3,1.1,245,29,8.6,13186.0,4784.0,
4,2021-01-22 09:45:00,26.918,119.341,303.4,1.0,1.0,245,54,4.5,13284.0,3113.0,


In [56]:
def select_fire_country(save_filenames_list:list, country='Thailand'):
    # add country and keep only Thailand, save over the old file 
    columns_to_keep = ['datetime', 'latitude', 'longitude', 'long_km', 'lat_km']
    for file in tqdm(save_filenames_list):
        fire = pd.read_csv(file)
        fire = fire[columns_to_keep]
        fire = fire.drop_duplicates(['datetime', 'latitude', 'longitude'])
        fire = add_countries(fire)
        fire = fire[fire['country'] == country]
        fire.to_csv(file, index=False)

In [55]:
select_fire_country(save_filenames_list= save_filenames, country='Thailand')

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=683951.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=787868.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=605473.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=881399.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=811753.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=675804.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=743127.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=544356.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=899562.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=915570.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=1549899.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=2382160.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=3410602.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=618983.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=617562.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=703388.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=802186.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=567327.0, style=ProgressStyle(descript…




In [59]:
# load Thailand provincial boundry
filename = map_folder + 'THA.gdb'
# select province level
prov_map = gpd.read_file(filename, driver='FileGDB', layer=2)
prov_map['geometry'].shape
# overide old crs and convert
crs = pyproj.CRS('EPSG:4326')
prov_map['geometry'] = prov_map['geometry'].set_crs(crs, allow_override=True)

In [60]:
def locate_province(p, gdf, col='admin1Name_en'):
    """Find a province hosting the hotspot.

    Args:
        p: Point object
        gdf: geopandas dataframe with albel 
    
    Returns: str 
        name of the country 
    """
    try: 
        province = gdf[gdf['geometry'].contains(p)][col].values[0]
    except: 
        province = np.nan
        
    return province

In [61]:
def add_provinces(save_filenames_list, prov_map, col='admin1Name_en'):
    for file in tqdm(save_filenames_list):
        fire = pd.read_csv(file)
        # add province 
        fire['geometry'] = [Point(x,y) for x, y in zip(fire['longitude'], fire['latitude'])]
        fire['province'] = fire['geometry'].swifter.apply(locate_province, gdf=prov_map, col=col)
        fire = fire.drop('geometry', axis=1)
        fire.to_csv(file, index=False)

In [62]:
add_provinces(save_filenames_list=save_filenames, prov_map=prov_map, col='admin1Name_en')


  0%|                                                                                                                                | 0/18 [00:00<?, ?it/s][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=48571.0, style=ProgressStyle(descripti…





  6%|██████▌                                                                                                              | 1/18 [06:45<1:54:50, 405.30s/it][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=52797.0, style=ProgressStyle(descripti…





 11%|█████████████                                                                                                        | 2/18 [14:22<1:52:16, 421.01s/it][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=24935.0, style=ProgressStyle(descripti…







 17%|███████████████████▌                                                                                                 | 3/18 [17:58<1:29:49, 359.29s/it][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=42902.0, style=ProgressStyle(descripti…





 22%|██████████████████████████                                                                                           | 4/18 [23:44<1:22:55, 355.37s/it][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=26002.0, style=ProgressStyle(descripti…







 28%|████████████████████████████████▌                                                                                    | 5/18 [27:14<1:07:34, 311.88s/it][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=25540.0, style=ProgressStyle(descripti…


 33%|███████████████████████████████████████▋                                                                               | 6/18 [30:45<56:16, 281.36s/it][A




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=33873.0, style=ProgressStyle(descripti…





 39%|██████████████████████████████████████████████▎                                                                        | 7/18 [35:15<50:57, 277.99s/it][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=18556.0, style=ProgressStyle(descripti…


 44%|████████████████████████████████████████████████████▉                                                                  | 8/18 [38:00<40:40, 244.09s/it][A




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=30950.0, style=ProgressStyle(descripti…





 50%|███████████████████████████████████████████████████████████▌                                                           | 9/18 [42:09<36:52, 245.82s/it][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=32645.0, style=ProgressStyle(descripti…





 56%|█████████████████████████████████████████████████████████████████▌                                                    | 10/18 [47:02<34:39, 259.91s/it][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=31065.0, style=ProgressStyle(descripti…





 61%|████████████████████████████████████████████████████████████████████████                                              | 11/18 [51:36<30:49, 264.14s/it][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=29846.0, style=ProgressStyle(descripti…





 67%|██████████████████████████████████████████████████████████████████████████████▋                                       | 12/18 [56:03<26:29, 264.86s/it][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=30951.0, style=ProgressStyle(descripti…





 72%|███████████████████████████████████████████████████████████████████████████████████▊                                | 13/18 [1:00:41<22:24, 268.92s/it][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=18577.0, style=ProgressStyle(descripti…


 78%|██████████████████████████████████████████████████████████████████████████████████████████▏                         | 14/18 [1:03:36<16:02, 240.62s/it][A




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=16922.0, style=ProgressStyle(descripti…


 83%|████████████████████████████████████████████████████████████████████████████████████████████████▋                   | 15/18 [1:06:16<10:49, 216.40s/it][A




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=32557.0, style=ProgressStyle(descripti…





 89%|███████████████████████████████████████████████████████████████████████████████████████████████████████             | 16/18 [1:11:08<07:58, 239.05s/it][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=32454.0, style=ProgressStyle(descripti…





 94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▌      | 17/18 [1:15:54<04:13, 253.18s/it][A

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=15902.0, style=ProgressStyle(descripti…


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [1:18:20<00:00, 261.16s/it][A







In [65]:
df = pd.read_csv(save_filenames[0])
df.head()

Unnamed: 0,datetime,latitude,longitude,long_km,lat_km,country,province
0,2003-07-04 11:07:00,15.127,100.29,11164.231732,1703.840795,Thailand,Chai Nat
1,2003-07-04 14:08:00,15.0607,100.5349,11191.493875,1696.196592,Thailand,Lop Buri
2,2003-07-04 14:08:00,16.1572,100.0197,11134.142073,1822.934595,Thailand,Nakhon Sawan
3,2003-07-04 14:08:00,16.1602,100.0244,11134.665275,1823.282289,Thailand,Nakhon Sawan
4,2003-07-06 10:54:00,14.6489,101.0417,11247.910593,1648.770053,Thailand,Saraburi
