In [34]:
%matplotlib inline
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
from datetime import datetime

from datacube import Datacube
import matplotlib.pyplot as plt

sys.path.append("../scripts")
from dea_plotting import map_shapefile
from dea_spatialtools import xr_rasterize

**Table of contents** <a id="top"></a>
- [Data](#data)
- [Auxiliary functions](#auxiliary)
- [Mapping train linescan to test linescan](#train_test_match)

In [35]:
dc = Datacube(app="Getting started")
linescan_datasets = dc.find_datasets(product='linescan')
linescan_datasets = sorted(linescan_datasets, key = lambda ds: (ds.center_time, ds.id))

## Data <a id="data"></a>

[Back to top](#top)

In [36]:
train_df = pd.read_csv('resources/challenge1_train.csv', index_col='id')
vector_file = 'resources/fire_boundaries.shp'
gdf = gpd.read_file(vector_file)

In [37]:
train_df.head()

Unnamed: 0_level_0,label,dateTimeLocal,dateTimeUTC
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,ROSEDALE_P1_201901041439_MGA94_55,4/01/2019 14:39,4/01/2019 3:39
1,ROSEDALE_1_P1_201901041446_MGA94_55,4/01/2019 14:46,4/01/2019 3:46
2,ROSEDALE_3_P1_201901041501_MGA94_55,4/01/2019 15:01,4/01/2019 4:01
3,POINT_H_62_P1_201901162128_MGA94_55,16/01/2019 21:28,16/01/2019 10:28
4,NUNNETT_73_P1_201901171134_MGA94_55,17/01/2019 11:34,17/01/2019 0:34


In [38]:
gdf.head()

Unnamed: 0,Source,SourceName,Type,dtString,Comments,dtUTC,dtLocal,ID,event,geometry
0,Linescan,aberfeldy west 200 p1_201901260955_mga94_55.jpg,Initial ignition,2019/01/26 09:55:00.000,originally Latrobe 87 - Jericho - Jim Track,2019-01-25 22:55:00,2019-01-26 09:55:00,0,Latrobe86,"POLYGON ((431455.605 5829806.837, 431460.570 5..."
1,Linescan,aberfeldy west 200 p1_201901260955_mga94_55.jpg,Initial ignition,2019/01/26 09:55:00.000,originally Latrobe 86 - Jericho - Thomson - Jo...,2019-01-25 22:55:00,2019-01-26 09:55:00,1,Latrobe86,"POLYGON ((433874.449 5827986.798, 433882.377 5..."
2,Linescan,aberfeldy west 200 p1_201901260955_mga94_55.jpg,Initial ignition,2019/01/26 09:55:00.000,originally Latrobe 85 - Jericho - Cream Can Hill,2019-01-25 22:55:00,2019-01-26 09:55:00,2,Latrobe86,"POLYGON ((430459.914 5831285.492, 430457.641 5..."
3,Linescan,aberfeldy west 214 p1_201901261750_mga94_55.jpg,Spot,2019/01/26 17:50:00.000,originally Latrobe 87 - Jericho - Jim Track,2019-01-26 06:50:00,2019-01-26 17:50:00,3,Latrobe86,"POLYGON ((431937.376 5829943.371, 431941.068 5..."
4,Linescan,aberfeldy west 214 p1_201901261750_mga94_55.jpg,Main run,2019/01/26 17:50:00.000,originally Latrobe 87 - Jericho - Jim Track,2019-01-26 06:50:00,2019-01-26 17:50:00,4,Latrobe86,"POLYGON ((431541.651 5829804.025, 431547.139 5..."


## Auxiliary functions <a id="auxiliary"></a>

[Back to top](#top)

In [39]:
def get_linescan(index=None, label=None, resolution=(-10,10), linescan_datasets=linescan_datasets):
    """
    :param id:
    :param label:
    :param resolution:
    :param linescan_datasets:
    :return:
    """
    
    if index is not None:
        linescan = dc.load(product='linescan', id=linescan_datasets[index].id, 
                           output_crs='epsg:28355', resolution=resolution).linescan

        return linescan
    
    if label is not None:
        linescan = dc.load(product='linescan', label=label, 
                           output_crs='epsg:28355', resolution=resolution).linescan
        return linescan
    
    return None

def plot_linescan(index=None, label=None, resolution=(-10,10), linescan_datasets=linescan_datasets):
    """
    :param id:
    :param label:
    :param resolution:
    :param linescan_datasets:
    :return:
    """
    linescan = get_linescan(index, label, resolution, linescan_datasets)
    linescan.plot()
    
def clean_name(name):
    """
    :param name:
    :return:
    """
    if name is None:
        res = None
    else:
        if name.upper()[-4::] == ".JPG":
            res = name.upper()[:-4].replace(' ','_')
        else:
            res = name.upper().replace(' ','_')
    return res
    
    
    




[Back to top](#top)

In [40]:
train_df.label

id
0              ROSEDALE_P1_201901041439_MGA94_55
1            ROSEDALE_1_P1_201901041446_MGA94_55
2            ROSEDALE_3_P1_201901041501_MGA94_55
3            POINT_H_62_P1_201901162128_MGA94_55
4            NUNNETT_73_P1_201901171134_MGA94_55
                         ...                    
124    MACALISTER97_806_P1_201903161522_MGA94_55
125    MACALISTER97_807_P1_201903161532_MGA94_55
126    MACALISTER99_623_P1_201903051858_MGA94_55
127    MACALISTER99_646_P1_201903070440_MGA94_55
128    MACALISTER99_683_P1_201903080529_MGA94_55
Name: label, Length: 129, dtype: object

## Mapping train linescan to test linescan <a id="train_test_match"></a>

[Back to top](#top)

In [41]:
gdf['SourceNameClean'] = gdf.apply(lambda row: clean_name(row.SourceName), axis=1)
gdf.dtUTC = gdf.apply(lambda row: datetime.strptime(row.dtUTC, '%Y-%m-%d %H:%M:%S'), axis=1)
gdf.dtLocal = gdf.apply(lambda row: datetime.strptime(row.dtLocal, '%Y-%m-%d %H:%M:%S'), axis=1)

In [42]:
label_to_srcname_dict = {}

for i in range(len(train_df)):
    label = train_df.label[i]
    
    if label in set(gdf.SourceNameClean):
        label_to_srcname_dict[label] = label
    else:
        label_to_srcname_dict[label] = None

In [43]:
label_to_srcname_dict

{'ROSEDALE_P1_201901041439_MGA94_55': None,
 'ROSEDALE_1_P1_201901041446_MGA94_55': None,
 'ROSEDALE_3_P1_201901041501_MGA94_55': None,
 'POINT_H_62_P1_201901162128_MGA94_55': None,
 'NUNNETT_73_P1_201901171134_MGA94_55': None,
 'NUNNETT_88_P1_201901171656_MGA94_55': None,
 'NUNNETT_96_P1_201901172230_MGA94_55': None,
 'NUNNETT_107_P1_201901181517_MGA94_55': None,
 'NUNNETT_121_P1_201901191642_MGA94_55': None,
 'NUNNETT_128_P1_201901192201_MGA94_55': None,
 'NUNNETT_173_P1_201901251120_MGA94_55': None,
 'NUNNETT_174_P1_201901251128_MGA94_55': None,
 'NUNNETT_175_P1_201901251137_MGA94_55': None,
 'NUNNETT_176_P1_201901251157_MGA94_55': None,
 'NUNNETT_177_P1_201901251211_MGA94_55': None,
 'NUNNETT_179_P1_201901251234_MGA94_55': None,
 'NUNNETT_180_P1_201901251246_MGA94_55': None,
 'NUNNETT_181_P1_201901251254_MGA94_55': None,
 'NUNNETT_182_P1_201901251321_MGA94_55': None,
 'NUNNETT_183_P1_201901251342_MGA94_55': None,
 'NUNNETT_184_P1_201901251400_MGA94_55': None,
 'NUNNETT_185_P1_20190

In [54]:
composites = gdf[gdf.SourceName.str.upper().str.contains(",|&|(COMPOSITE)", na=False)]
list(composites.SourceNameClean.unique())

['COMPOSITE_WALLHALLA_397,398_&_401_20190225_(1311_TO_1342HRS)',
 'MACALISTER_681_&_682,_(684_&_685_MINOR)_COMPOSITE_0517-0539HRS',
 'COMPOSITE_MACALISTER97_803,_806_&__807_1455-1532HRS',
 'MACALISTER91_751,_752,_755,_760,_761_&_762_COMPOSITE_1549-1730HRS',
 'MACALISTER91_766,767_&_770_COMPOSITE_1239-1314HRS',
 'MACALISTER91_775,_776,_779_&_783_COMPOSITE_1345-1643HRS',
 'MACALISTER91_789,_790_&_793_COMPOSITE_1330-1358HRS',
 'MACALISTER91_804,_805_&_808_COMPOSITE_1503-1539HRS',
 'MACALISTER91_646,648_&_649_COMPOSITE_-_0440HRS_TO_0453HRS',
 'MACALISTER_695,698_&_699_COMPOSITE_1542_-_1610HRS',
 'MACALISTER_696,697,700_-_1545-1613HRS',
 'COMPOSITE_WALLHALLA_380_&_381_20190206_(1347_&_1356HRS)']