# Camera Data Test

## Delcare Paths

In [None]:
working_dir = "/Volumes/Samsung_T5/WeatherNet_V2"
data_dir = working_dir + "/npy_dataset"
flir_path = data_dir + "/flir"
top_path = data_dir + "/top"
bottom_path = data_dir + "/bottom"
weather_data_dir = data_dir + "/weather/30_min"
weather_data_input_path = weather_data_dir + "/scaled_30_min_weather.csv"
weather_data_labels_path = weather_data_dir + "/30_labels.csv"

## Import Modules

In [None]:
import os
from tqdm import trange

# Data Manipulation
from PIL import Image
import pandas as pd
import glob 

# Numeric operations 
import numpy as np 
from random import shuffle 
from itertools import chain 

# Data Generator
from tensorflow.keras.utils import Sequence
from tensorflow.keras.utils import to_categorical 

# Deep Learning Layers
from tensorflow.keras.layers import Input, ConvLSTM2D, Concatenate,Dropout,TimeDistributed, SeparableConv2D, GlobalAveragePooling2D,Dense,GlobalAveragePooling3D,MaxPooling2D, LSTM

# General TF and Keras
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
from tensorflow.keras.losses import categorical_crossentropy
from sklearn.metrics import classification_report

## Generator Class

In [None]:
class SeqDataGenerator(Sequence):
    def __init__(self,sample_ids,labels,seq_len,df_data,flir_path,top_path,
                  bottom_path,batch_size=32,n_class=3,dim=(4,3,480,640),shuffle=True):
        
        self.dim = dim 
        self.batch_size = batch_size
        self.sample_ids = sample_ids
        self.labels = labels
        self.n_class = n_class 
        self.shuffle = shuffle
        
        self.seq_len = seq_len
        self.df_data = df_data
        
        self.flir_path = flir_path
        self.top_path = top_path
        self.bottom_path = bottom_path
        
        self.on_epoch_end()
        
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.sample_ids))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)        
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.sample_ids) / self.batch_size))  
    
    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        sample_ids_temp = [self.sample_ids[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(sample_ids_temp,indexes)

        return X, y   
        
    def __data_generation(self, sample_ids_temp,indexes):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X_flir = np.empty((self.batch_size, *self.dim))
        X_bottom = np.empty((self.batch_size, *self.dim))
        X_top = np.empty((self.batch_size, *self.dim))
        
        # X_df : (n_samples, seq_len, self.df_data.shape[1])
        X_df = np.empty((self.batch_size, self.seq_len, self.df_data.shape[1])) 
        
        y = np.empty((self.batch_size,self.n_class), dtype=int)

        # Generate data
        for i, ID in enumerate(sample_ids_temp):
            # Store sample
            X_flir[i,] = np.load(self.flir_path + '/' + ID + ".npy")
            X_bottom[i,] = np.load(self.bottom_path + '/' + ID + ".npy")
            X_top[i,] = np.load(self.top_path + '/' + ID + ".npy") 
            
        for i, ID in enumerate(sample_ids_temp):
            # Store sample
            idx = np.where(self.df_data.index == ID)
            idx = idx[0][0]
            X_df[i,] = self.df_data.iloc[idx-self.seq_len+1:idx+1,:]

        for i , idx in enumerate(sample_ids_temp):    
            # Store class
            a = self.labels.loc[idx].values
            y[i,:] = self.labels.loc[idx].values
            
        return [X_flir,X_bottom,X_top,X_df], y    

## Get names of images

In [None]:
flir_files = [os.path.basename(file) for i,file in\
                    enumerate(glob.glob(flir_path+"/*.npy"))]
top_files = [os.path.basename(file) for i,file in\
                    enumerate(glob.glob(top_path+"/*.npy"))]
bottom_files = [os.path.basename(file) for i,file in\
                    enumerate(glob.glob(bottom_path+"/*.npy"))]

flir_files.sort()
top_files.sort()
bottom_files.sort()

if flir_files == top_files == bottom_files:
    print("ALL SAMPLES FOR EACH CLASS ARE MATCHING!")
    
image_names = bottom_files    

## Load Weather df

In [None]:
weather_df = pd.read_csv(weather_data_input_path)
weather_df.index = weather_df["Unnamed: 0"]
weather_df.drop(["Unnamed: 0"],axis=1,inplace=True)
weather_df.head()

### Weather df Columns
["Average 60 m temperature",
"Average 60 m wind speed",
"Vector-averaged 60 m wind speed",
"Vector-averaged 60 m wind direction",
"Standard deviation of 60 m wind direction",
"Total precipitaion for the period",
"Estimated heat flux",
"Estimated friction velocity",
"Average 10 m temperature",
"Average 10 m wind speed",
"Vector-averaged 10 m wind speed",
"Vector-averaged 10 m wind direction",
"Standard deviation of 10 m wind direction",
"Average global irrradiation",
"Average net radiation",
"Estimated surface roughness length",
"Average 10 m vapor pressure",
"Average 10 m dew point temperature",
"target",
"month_1",
"month_2",
"month_3",
"month_4",
"month_5",
"hour_min_00_00",
"hour_min_00_15",
"hour_min_00_30",
"hour_min_00_45",
"hour_min_01_00",
"hour_min_01_15",
"hour_min_01_30",
"hour_min_01_45",
"hour_min_02_00",
"hour_min_02_15",
"hour_min_02_30",
"hour_min_02_45",
"hour_min_03_00",
"hour_min_03_15",
"hour_min_03_30",
"hour_min_03_45",
"hour_min_04_00",
"hour_min_04_15",
"hour_min_04_30",
"hour_min_04_45",
"hour_min_05_00",
"hour_min_05_15",
"hour_min_05_30",
"hour_min_05_45",
"hour_min_06_00",
"hour_min_06_15",
"hour_min_06_30",
"hour_min_06_45",
"hour_min_07_00",
"hour_min_07_15",
"hour_min_07_30",
"hour_min_07_45",
"hour_min_08_00",
"hour_min_08_15",
"hour_min_08_30",
"hour_min_08_45",
"hour_min_09_00",
"hour_min_09_15",
"hour_min_09_30",
"hour_min_09_45",
"hour_min_10_00",
"hour_min_10_15",
"hour_min_10_30",
"hour_min_10_45",
"hour_min_11_00",
"hour_min_11_15",
"hour_min_11_30",
"hour_min_11_45",
"hour_min_12_00",
"hour_min_12_15",
"hour_min_12_30",
"hour_min_12_45",
"hour_min_13_00",
"hour_min_13_15",
"hour_min_13_30",
"hour_min_13_45",
"hour_min_14_00",
"hour_min_14_15",
"hour_min_14_30",
"hour_min_14_45",
"hour_min_15_00",
"hour_min_15_15",
"hour_min_15_30",
"hour_min_15_45",
"hour_min_16_00",
"hour_min_16_15",
"hour_min_16_30",
"hour_min_16_45",
"hour_min_17_00",
"hour_min_17_15",
"hour_min_17_30",
"hour_min_17_45",
"hour_min_18_00",
"hour_min_18_15",
"hour_min_18_30",
"hour_min_18_45",
"hour_min_19_00",
"hour_min_19_15",
"hour_min_19_30",
"hour_min_19_45",
"hour_min_20_00",
"hour_min_20_15",
"hour_min_20_30",
"hour_min_20_45",
"hour_min_21_00",
"hour_min_21_15",
"hour_min_21_30",
"hour_min_21_45",
"hour_min_22_00",
"hour_min_22_15",
"hour_min_22_30",
"hour_min_22_45",
"hour_min_23_00",
"hour_min_23_15",
"hour_min_23_30",
"hour_min_23_45"]

### Drop columns depending on test

In [None]:
drop_col = ["Average 60 m temperature", "Average 60 m wind speed",
"Vector-averaged 60 m wind speed", "Vector-averaged 60 m wind direction",
"Standard deviation of 60 m wind direction", "Total precipitaion for the period",
"Estimated heat flux", "Estimated friction velocity", "Average 10 m temperature",
"Average 10 m wind speed", "Vector-averaged 10 m wind speed",
"Vector-averaged 10 m wind direction", "Standard deviation of 10 m wind direction",
"Average global irrradiation", "Average net radiation", "Estimated surface roughness length",
"Average 10 m vapor pressure", "Average 10 m dew point temperature"]

In [None]:
weather_df.drop(drop_col,axis=1,inplace=True)
weather_df.head()

## Load labels

In [None]:
labels = pd.read_csv(weather_data_labels_path)
labels.index = labels['time_stamp']
labels.drop(['time_stamp'],axis=1,inplace=True)
labels.columns = ['target']
labels.head()

In [None]:
label_index = [name.split(".")[0] for i,name in enumerate(image_names)]
diff = set(label_index)-set(labels.index)
for i,label in enumerate(list(diff)):
    if label in label_index:
        label_index.remove(label)
labels = labels.loc[label_index]

labels = pd.concat([labels,pd.get_dummies(labels['target'], prefix='target')],axis=1)
labels.drop(['target'],axis=1, inplace=True)

labels.head()

## Split image file list between train, val, and test

In [None]:
train_image_names = label_index[:-750-250]
train_labels = labels.iloc[:-750-250,:]

val_image_names = label_index[-750-250:-750]
val_labels = labels.iloc[:-750-250:-750,:]

test_image_names = label_index[-750:]
test_labels = labels.iloc[-750:,:]

## Declare Generators

In [None]:
# dim ~ (# frames per seq, channels, height, width)
params = {'dim': (4,3,480,640),
          'batch_size': 32,
          'n_class': 3,
          'shuffle': False}

seq_len = 4

train_sample_ids = train_labels.index.values
train_gen = SeqDataGenerator(train_sample_ids,train_labels,seq_len,weather_df,\
                            flir_path,top_path,bottom_path,**params)

val_sample_ids = val_labels.index.values
val_gen = SeqDataGenerator(val_sample_ids,val_labels,seq_len,weather_df,\
                            flir_path,top_path,bottom_path,**params)

test_sample_ids = test_labels.index.values
test_gen = SeqDataGenerator(test_sample_ids,test_labels,seq_len,weather_df,\
                            flir_path,top_path,bottom_path,**params)

In [None]:
X, y = train_gen.__getitem__(0)
X_flir, X_bottom, X_top, X_df = X[0], X[1], X[2], X[3]
print("Flir seq shape: ", X_flir.shape)
print("Bottom seq shape: ", X_bottom.shape)
print("Top seq shape: ", X_top.shape)
print("Weather data shape: ", X_df.shape)
print("Target data shape: ",y.shape) 

In [None]:
X, y = test_gen.__getitem__(0)
X_flir, X_bottom, X_top, X_df = X[0], X[1], X[2], X[3]
print("Flir seq shape: ", X_flir.shape)
print("Bottom seq shape: ", X_bottom.shape)
print("Top seq shape: ", X_top.shape)
print("Weather data shape: ", X_df.shape)
print("Target data shape: ",y.shape) 

## Declare Model

In [None]:
# shape=(batch_size, time_steps, channels, row, col)
input_flir = Input(shape=(seq_len,3,480,640,))
x_flir = TimeDistributed(SeparableConv2D(12,(4,4),kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),padding="same"))(input_flir)
x_flir = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x_flir)

input_bottom = Input(shape=(seq_len,3,480,640,))
x_bottom = TimeDistributed(SeparableConv2D(12,(4,4),kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),padding="same"))(input_bottom)
x_bottom = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x_bottom)

input_top = Input(shape=(seq_len,3,480,640,))
x_top = TimeDistributed(SeparableConv2D(12,(4,4),kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),padding="same"))(input_top)
x_top = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x_top)

input_weather = Input(shape=(seq_len,weather_df.shape[1]))
x_LSTM = LSTM(60,kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),return_sequences=False)(input_weather)

x_concat = Concatenate(axis=-1)([x_flir,x_bottom,x_top])
x_ConvLSTM2D = ConvLSTM2D(16,(4,4),padding="same",kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),return_sequences=True)(x_concat)
x_ConvLSTM2D = ConvLSTM2D(32,(2,2),padding="same",kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),return_sequences=False)(x_ConvLSTM2D)

x_flat = GlobalAveragePooling2D()(x_ConvLSTM2D)
x_flat = Concatenate(axis=-1)([x_flat, x_LSTM])

x_flat = Dropout(.2)(x_flat)
yh = Dense(3,activation="softmax",kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4))(x_flat)

In [None]:
model = Model([input_flir,input_bottom,input_top,input_weather],yh)

model.compile(loss=categorical_crossentropy,
                 optimizer=Adam(lr=.001),
                 metrics=["accuracy"])

print(model.summary())

In [None]:
from ann_visualizer.visualize import ann_viz;
ann_vis(model)

## Train Model

In [None]:
checkpoint = ModelCheckpoint(working_dir+"/model.h5", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
#model.fit(train_gen,validation_data=val_gen,callbacks = [checkpoint],epochs=1)
model.fit_generator(train_gen,validation_data=val_gen,callbacks = [checkpoint],use_multiprocessing=False,epochs=1)

In [None]:
model.save("weathernet_v2.h5

## Test Model

In [None]:
yh = model.predict(test_gen)

In [None]:
target_names = ['low', 'mid', 'high']
print(classification_report(np.argmax(test_labels.values[:yh.shape[0],:],axis=1),\
                            np.argmax(yh,axis=1), target_names=target_names))