In [None]:
import scipy.io as sio
import numpy as np
import os
import fileinput
import plotly.graph_objs as go
import sklearn
import struct
import re
import csv
import pickle
import pandas as pd
from scipy.cluster.vq import whiten
from sklearn import cluster
from sklearn import manifold
from sklearn.decomposition import PCA
from sklearn import decomposition
import matplotlib.pyplot as plt
import urllib
from matplotlib.backends.backend_pdf import PdfPages
import urllib3
from sklearn.cross_decomposition import CCA
import plotly.plotly as py
import http
from sklearn.decomposition import PCA, KernelPCA
from sklearn.datasets import make_circles
import plotly.graph_objs as go
from numpy import genfromtxt
%matplotlib inline

In [None]:
# Load the data from the Google cloud for FOLD_3. (ON THE GOOGLE CLOUD STORAGE)

# Import the name of files from virtual machine

my_data = genfromtxt(r'/home/uriot_thomas/FOLD3.txt', delimiter='\n', dtype=str)

url =  "https://storage.googleapis.com/bucket-pinouche/ImperialData/FOLD_3"

dic = {}
number_of_files = 0

for filename in my_data:
    print(filename)
    f = urllib.request.urlopen(url+'/'+filename)
    with open(os.path.basename(url), "wb") as local_file:
        local_file.write(f.read())
        dic[filename] = sio.loadmat(local_file.name)
    
    number_of_files = number_of_files + 1

In [None]:
# The Annotation folder is stored on the virtual machine instance directly (ON THE VIRTUAL MACHINE)

even_names_files = list()
index = 0

for name in my_data:
    if(index%2 == 0):
        even_names_files.append(name[:-10])
    index += 1

dic2 = {}
number_of_files = 0
path = r'/home/uriot_thomas/two'

for filename in even_names_files:
    dic2[filename] = sio.loadmat(path+'/'+filename+'/'+'meanAnnotation.mat')
    number_of_files = number_of_files + 1
    
sequences_shape = list()
mean_annotations = list()

for filename in even_names_files:
    mean_annotations.append(dic2[filename]['annotations'])
    sequences_shape.append(dic2[filename]['annotations'].shape[0])

array_annotations_FOLD_3 = np.concatenate(np.asarray(mean_annotations), axis=0)

print([number_of_files, array_annotations_FOLD_3.shape])

In [None]:
# Getting the features data into usable matrix format

def dataMatrix(array1, array2):

    person1 = list()
    person2 = list()

    for frame in range(0, array1.shape[2]):
        for sift in range(0,49):
            person1.append(array1[sift,:,frame])
            person2.append(array2[sift,:,frame])

    tmp_arr_mat1 = np.reshape(np.asarray(person1),(array1.shape[2],6272))
    tmp_arr_mat2 = np.reshape(np.asarray(person2),(array2.shape[2],6272))
    frame_vector = np.concatenate([tmp_arr_mat1,tmp_arr_mat2], axis=1)
    
    return(frame_vector)

In [None]:

# Get all the values from the SIFT values for each of the frame from the dictionnary

key=0
concat = []

for val in my_data:
    if(key%2 == 0):
        person1 = dic[val]['SIFT']
    else:
        person2 = dic[val]['SIFT']
        concat.append(dataMatrix(person1, person2))
    key +=1
        
Big_list = np.concatenate(concat)
print(Big_list.shape)

# The dimensions of the Big_list (data matrix) is 20640 by 12544

In [None]:
# Instead of concatenating together the vectors for the 2 people, use the average for the data matrix. Note that later on,
# the response we have for each frame is the same for the 2 people (i.e there is a conflict intensity for the overall 
# interaction and not a seperated conflict value for each of the interlocutors). So this approach may be a better way.

Person1_columns = Big_list[:,:(49*128)]
Person2_columns = Big_list[:,(49*128):]
print([Person1_columns.shape,Person2_columns.shape])
Average_Big_list_FOLD_3 = (Person1_columns + Person2_columns)/2

# Plot of an example of the ground truth of conflict intensity

In [None]:
# Plots the annotation for a whole video sequence as an example

mean_contents1 = array_annotations_FOLD_3[:len(dic['20120213_seq1_01_01.mat']['SIFT'][1][1])]


range_array = np.asarray(list(range(0, len(dic['20120213_seq1_01_01.mat']['SIFT'][1][1]), 1)))

fig = plt.figure(figsize=(17, 6))
plt.plot(range_array , mean_contents1)
plt.axis([0,len(dic['20120213_seq1_01_01.mat']['SIFT'][1][1]), 0, 1])
fig.suptitle('Ground truth conflict intensity between the 2 participants')
plt.xlabel('Number of frames')
plt.ylabel('Conflict intensity')
plt.show()
fig.savefig('meanplot.png', bbox_inches='tight')

# PCA

In [None]:
# PCA on the new matrix

n_components = 1200
svd = decomposition.TruncatedSVD(n_components=n_components, algorithm='arpack')
svd.fit(Average_Big_list_FOLD_3)
print(svd.explained_variance_ratio_.sum())

# 500 components: 81%

In [None]:
# Scree plot

range_array = np.asarray(list(range(0, svd.explained_variance_ratio_.shape[0], 1)))

fig = plt.figure(figsize=(17, 6))
plt.plot(range_array , svd.explained_variance_ratio_)
plt.axis([0,svd.explained_variance_ratio_.shape[0], 0, 0.1])
fig.suptitle('Scree plot')
plt.xlabel('Components')
plt.ylabel('Percentage of variance explained')
plt.show()
fig.savefig('scree.png', bbox_inches='tight')

In [None]:
# Get the new data of dimension  (number of frames * 500)

svd_data = np.dot(Average_Big_list_FOLD_3,np.transpose(svd.components_))

In [None]:
# Save a dictionary into a pickle file.
import pickle

pickle.dump(svd_data, open( "PCA_data.p", "wb" ) )

In [None]:
# Load the dictionary back from the pickle file.

svd_data = pickle.load( open( "PCA_data.p", "rb" ) )

In [None]:
# Using t-sne on the PCA data to visualize the data (only on 10000 data points for speed purposes)

random_indices = np.random.choice(svd_data.shape[0], 10000, replace=False)
sampled_rows = svd_data[random_indices, :]
sampled_annotations = array_annotations_FOLD_3[random_indices, :]
tsne_svd = manifold.TSNE(n_components=2, verbose=0, perplexity=30, n_iter=2500) 
tsne_results_svd = tsne_svd.fit_transform(sampled_rows)

In [None]:
import plotly 
plotly.tools.set_credentials_file(username='pinouche', api_key='lNAc8TgFyighg1amN5jI')

In [None]:
kmeans = cluster.KMeans(n_clusters=3) # k-means
kmeans.fit(sampled_rows)

c = kmeans.labels_
x = tsne_results_svd[:,0]
y = tsne_results_svd[:,1]
#z = tsne_results_svd[:,2]
t = [str(s[0]) for s in sampled_annotations]

trace1 = go.Scatter(x=x,y=y,text=t, mode='markers',marker=dict(size=12,color=c, colorscale = 'Viridis', opacity=0.8))
data = [trace1]
layout = go.Layout(margin=dict(l=0,r=0,b=0,t=0))
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='t-SNE PCA with k-Means')

# Kernel PCA

In [None]:
# Transform the data with an RBF Gaussian kernal and perform PCA. Could again use k-means and plot the data in 2 or 3 dimensions
# using t-SNE as was done for PCA above.

kpca = KernelPCA(kernel="cosine")
X = kpca.fit_transform(Average_Big_list_FOLD_3)

In [None]:
# see how much variance the first 500 components are explaining

print(np.sum(kpca.lambdas_[:1000])/np.sum(kpca.lambdas_))
Kernel_data = X[:,:1000]

In [None]:
# Save the cubic kernel pca with parameter = 1/number of features

pickle.dump(Kernel_data, open( "KPCA_data.p", "wb" ))

In [None]:
# Load the kernel cubic with parameter = 1/number of features

Kernel_cubic = pickle.load( open( "KPCA_data.p", "rb" ))

# Canonical correlation analysis

In [None]:
# Loading the name of files of FOLD_4

my_data_Fold_4 = genfromtxt(r'/home/uriot_thomas/FOLD4.txt', delimiter='\n', dtype=str)

In [None]:

# Load the data from the Google cloud for FOLD_4. 
# Note that the variable name_of_files is the same one we define in a couple of cells below. (ON THE GOOGLE CLOUD STORAGE)

url =  "https://storage.googleapis.com/bucket-pinouche/ImperialData/FOLD_4"

dic = {}
number_of_files = 0

for filename in my_data_Fold_4 :
    print(filename)
    f = urllib.request.urlopen(url+'/'+filename)
    with open(os.path.basename(url), "wb") as local_file:
        local_file.write(f.read())
        dic[filename] = sio.loadmat(local_file.name)
    
    number_of_files = number_of_files + 1

In [None]:

# Create a similar matrix to Average_big_list but using the data FOLD_4. This is a sueprvised methods (it uses the labels) and
# we do not want to use the labels to transform our data which will then be trained using the same labels (introduces bias).

# Get all the values from the SIFT values for each of the frame from the dictionnary

key=0
concat = []

for val in my_data_Fold_4:
    if(key%2 == 0):
        person1 = dic[val]['SIFT']
    else:
        person2 = dic[val]['SIFT']
        concat.append(dataMatrix(person1, person2))
    key +=1
        
Big_list = np.concatenate(concat)
print(Big_list.shape)

In [None]:
# Take the average for the two people

Person1_columns = Big_list[:,:(49*128)]
Person2_columns = Big_list[:,(49*128):]
print([Person1_columns.shape,Person2_columns.shape])
Average_Big_list_FOLD_4 = (Person1_columns + Person2_columns)/2

In [None]:
# The Annotation folder is stored on the virtual machine instance directly (ON THE VIRTUAL MACHINE) for FOLD_4

even_names_files = list()
index = 0

for name in my_data_Fold_4:
    if(index%2 == 0):
        even_names_files.append(name[:-10])
    index += 1

dic2 = {}
number_of_files = 0
path = r'/home/uriot_thomas/two'

for filename in even_names_files:
    dic2[filename] = sio.loadmat(path+'/'+filename+'/'+'meanAnnotation.mat')
    number_of_files = number_of_files + 1
    
sequences_shape = list()
mean_annotations = list()

for filename in even_names_files:
    mean_annotations.append(dic2[filename]['annotations'])
    sequences_shape.append(dic2[filename]['annotations'].shape[0])

array_annotations_FOLD_4 = np.concatenate(np.asarray(mean_annotations), axis=0)

print([number_of_files, array_annotations_FOLD_4.shape])

In [None]:
# Perform CCA

new = np.repeat(array_annotations_FOLD_4,Average_Big_list_FOLD_4.shape[1] , axis=1)
print(new[2000,:],array_annotations_FOLD_4[2000])
cca = CCA(n_components=2, scale=True, max_iter=500, tol=1e-06, copy=True)
cca.fit(Average_Big_list_FOLD_4, array_annotations_FOLD_4)  # THIS IS THE TRAINING PART WHICH NEEDS TO BE DONE ON A DIFFERENT FOLD

In [None]:
# Transform the data

X_c, Y_c = cca.transform(Average_Big_list_FOLD_3, array_annotations_FOLD_3) # HERE THIS IS THE USUAL DATA ON FOLD_3
print(X_c[0,:])

# Variational autoencoder

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

from keras.layers import Input, Dense, Lambda, Layer
from keras.models import Model
from keras import backend as K
from keras import metrics
from sklearn.model_selection import KFold
from keras.datasets import mnist

In [None]:
batch_size = 32 # Take it as mutiple of the number of rows of the training data to use all of it
original_dim = 1200 # number of variables in the training data
latent_dim = 30
intermediate_dim = 600
epochs = 20
epsilon_std = 1.0 # Standard deviation of epsilon to be used the reparametrization trick

In [None]:
# Defining the encoder network

x = Input(batch_shape=(batch_size, original_dim))
h = Dense(intermediate_dim, activation='relu')(x)
z_mean = Dense(latent_dim)(h)
z_log_sigma = Dense(latent_dim)(h)

In [None]:
# Defining a sampling function to sample the latent value z using the reparametrization trick

def sampling(args):
    z_mean, z_log_sigma = args
    epsilon = K.random_normal(shape=(batch_size, latent_dim),mean=0., stddev=epsilon_std)
    return z_mean + K.exp(z_log_sigma) * epsilon

# note that "output_shape" isn't necessary with the TensorFlow backend
# so you could write `Lambda(sampling)([z_mean, z_log_sigma])`
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])

In [None]:
# Defining the decoder network

decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)

In [None]:
# end-to-end autoencoder
vae = Model(x, x_decoded_mean)

# encoder, from inputs to latent space
encoder = Model(x, z_mean)

# generator, from latent space to reconstructed inputs
decoder_input = Input(shape=(latent_dim,))
_h_decoded = decoder_h(decoder_input)
_x_decoded_mean = decoder_mean(_h_decoded)
generator = Model(decoder_input, _x_decoded_mean)

In [None]:
# Defining the loss function with a normal suboptimal variational factor and a normal likelihood where the parameters of
# the normals are defined by the encoder and decoder networks

def vae_loss(x, x_decoded_mean):
    xent_loss = metrics.binary_crossentropy(x, x_decoded_mean)
    kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
    return xent_loss + kl_loss

vae.compile(optimizer='rmsprop', loss=vae_loss)

In [None]:
# Define a function that makes a train and validations sets out of the input data

def MakeData(data,labels):
    validation_set_size = round(data.shape[0]/5)
    random_indices_val = np.random.choice(data.shape[0], validation_set_size, replace=False)
    
    val_set = data[random_indices_val, :]
    train_set = np.delete(data, random_indices_val, 0)
    val_labels = labels[random_indices_val, :]
    train_labels = np.delete(labels, random_indices_val, 0)

    
    return val_set , train_set, val_labels, train_labels

In [None]:
# Fit the VAE for the svd_data PCA data. Could do a loop and take the average of the validation losses to get more consistant
# values for the validation loss but this does not matter for very large data set and it is also computationally very expensive.

x_val, x_train, y_val, y_train = MakeData(svd_data,array_annotations_FOLD_3)
print([x_train.shape,x_val.shape,y_val.shape,y_train.shape])
variational = vae.fit(x_train, x_train,epochs=10,batch_size=batch_size,validation_data=(x_val, x_val), verbose=0,initial_epoch=0)

In [None]:
print([variational.history.keys(),vae.evaluate(x_val,x_val,verbose=0)])

In [None]:
# Obtain the validation_loss

fig = plt.figure(figsize=(17, 6))
plt.plot(variational.history['loss'])
plt.plot(variational.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
fig.savefig('newfiguseless.png', bbox_inches='tight')

In [None]:
# Fit all the data

vae.fit(svd_data, svd_data,epochs=35,batch_size=batch_size,verbose=0)

In [None]:
# Retrieve the latent once the model has been trained

VAE_data = encoder.predict(svd_data, batch_size=batch_size)
print(VAE_data.shape)

In [None]:
# Save the VAE applied on the svd_data 100 latents and 600 intermediate dimensions for the hidden layer

pickle.dump(VAE_data, open( "VAE_SVD_100_650.p", "wb" ) )

In [None]:
# Retrieve the data

VAE_SVD_100_650 = pickle.load( open( "VAE_SVD_100_650.p", "rb" ))

# MLP

In [None]:
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import scale
from sklearn.utils import resample
from sklearn.neural_network import MLPRegressor
#from hyperopt import STATUS_OK, hp, fmin, tpe, Trials, space_eval

In [None]:
# Pick indices and always keep the same to be used in the function below

validation_set_size = round(svd_data.shape[0]/5)
random_indices_val = np.random.choice(svd_data.shape[0], validation_set_size, replace=False)

In [None]:
# Save the random indices to us e the same one as validation 

pickle.dump(random_indices_val, open( "Random_indices_MLP.p", "wb" ) )

In [None]:
# Retrieve the data

random_indices_val = pickle.load( open( "Random_indices_MLP.p", "rb" ))

In [None]:
# Make a validation and training set using the indices defined above
    
def MakeData(data,labels,indices):
    val_set = data[indices, :]
    train_set = np.delete(data, indices, 0)
    val_labels = labels[indices, :]
    train_labels = np.delete(labels, indices, 0)

    return val_set , train_set, val_labels, train_labels

In [None]:
# Multi layer perceptron to predict the conflict intensity labels. Need to taylor it for the number of hidden layers according
# to what the input data is.

def MLP_Layers(predictors,labels,random_indices_val,number_of_neurons1):
    model_dnn = MLPRegressor(hidden_layer_sizes=(number_of_neurons1,),max_iter=20,early_stopping=True)
    x_val, x_train, y_val, y_train = MakeData(predictors,labels,random_indices_val)
    model_dnn.fit(x_train, y_train)
    predictions = model_dnn.predict(x_val)
    return(mean_squared_error(predictions, y_val))
    


In [None]:
# Run the MLP function and return the MSE to choose the best model and tune the number of neurons and layers

MLP_Layers(VAE_SVD_100_650, array_annotations_FOLD_3, random_indices_val, 70)

In [None]:
# Fit the best model onthe svd_data (data just with PCA)


MLP_Layers(svd_data, array_annotations_FOLD_3, random_indices_val, 70)

#model_dnn = MLPRegressor(hidden_layer_sizes=(450,225,),max_iter=3000,early_stopping=True)
#model_dnn.fit(svd_data, array_annotations_FOLD_3)

In [None]:
# Fit the best model on the PCA + VAE data with 30 latents

MLP_Layers(VAE_SVD_30_600, array_annotations_FOLD_3, random_indices_val, 70)

#model_dnn_VAE30 = MLPRegressor(hidden_layer_sizes=(20,),max_iter=3000,early_stopping=True)
#model_dnn_VAE30.fit(VAE_SVD_30_600, array_annotations_FOLD_3)

# LSTM

In [None]:
from keras.models import load_model
import h5py
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers.recurrent import GRU
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import TimeDistributed
from keras.optimizers import RMSprop
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
from keras.callbacks import ReduceLROnPlateau

In [None]:
# Define a function that puts 19 video sequences in training set and keep the 20th as validation

def Makelist(data,sequences_shape=sequences_shape):
    list_data = []
    for index in range(0,len(sequences_shape)):
        list_data.append(data[sequences_shape[index]:(sequences_shape[index]+sequences_shape[index]),:])
    return(list_data)
        
        
def Dotraining(Mylist):
    train_data = []
    val_data = []
    print(len(Mylist))
    indices = np.array([x for x in range(len(Mylist))])
    
    for index in range(0,len(Mylist)):
        l = Mylist
        val_data.append(l[index])
        trainn = [x for i,x in enumerate(Mylist) if i!=index]
        train_data.append(trainn)
    return train_data, val_data

In [None]:
# Convert the list of list into a usable numpy array to feed into the function split2sequences

l = Makelist(array_annotations_FOLD_3,sequences_shape)
train,val = Dotraining(l)

flat_list = list()
for sublist in train:
    for item in sublist:
        flat_list.append(item)
flat_array = np.array(flat_list)

data_list = []

for index in range(0,len(l)):
    data_list.append(np.concatenate(flat_array[(index*(len(l)-1)):(len(l)-1)*index+(len(l)-1),], axis=0))

In [None]:
# Define the function to split the inputs and outputs of the LSTM
input_length = 10

def split2sequencesTrain(data, length_x=input_length, length_y=1, split=1):
    step = 1
    xN = []
    yN = []
    
    for i in range(0, len(data) - length_x , step):
        xN.append(data[i: i + length_x])
        yN.append(data[i+1+length_x-length_y:i + length_x + 1])
        
    train_size = int(len(xN) * split)
    test_size = len(xN) - train_size
    
    xN = np.array(xN)
    yN = np.array(yN)
    n = len(data)
    X_train, X_test = xN[0:train_size],  xN[train_size:n]
    Y_train,Y_test = yN[0:train_size], yN[train_size:n]

    return xN, yN, X_train, X_test, Y_train, Y_test

def split2sequencesValidation(data, length_x=input_length, length_y=1, split=0):
    step = 1
    xN = []
    yN = []
    
    for i in range(0, len(data) - length_x, step):
        xN.append(data[i: i + length_x])
        yN.append(data[i+1+length_x-length_y:i + length_x + 1])
        
    train_size = int(len(xN) * split)
    test_size = len(xN) - train_size
    
    xN = np.array(xN)
    yN = np.array(yN)
    n = len(data)
    X_train, X_test = xN[0:train_size],  xN[train_size:n]
    Y_train,Y_test = yN[0:train_size], yN[train_size:n]

    return xN, yN, X_train, X_test, Y_train, Y_test

In [None]:
# Define the model here

N_HIDDEN = 32
N_HIDDEN2 = 16
N_DENSE = 1 # number of outputs (1 for label array)
epochs = 100
batch_size = 16
input_dim = 1

model = Sequential()
model.add(LSTM(N_HIDDEN, input_shape = (input_length,input_dim)))
#model.add(LSTM(32))
model.add(Dense(1, activation='relu'))
model.compile(loss='mse', optimizer=RMSprop(lr=0.000001, rho=0.9, epsilon=1e-08), metrics=['mae'])

In [None]:
# Define a function that takes as input the video sequence to be taken as validation. The 19 remaining video sequences are then
# automatically pointed to and fed to the LSTM sequence by sequence such that the LSTM input does not have any overlap 
# (i.e, when there is a transition from 2 video sequences, there is no time depency and so we do not want our LSTM to learn from
# these erroneous inputs).


model.save('my_lstm.h5')

def TrainLSTM1(fold_number):
    
    for count in range(0,epochs):
        list_val = list()
        valid = val[fold_number]
        xN, yN, X_train, X_test1, Y_train, Y_test1 = split2sequencesValidation(valid)
        Y_test1 = np.reshape(Y_test1, (Y_test1.shape[0], Y_test1.shape[1]))
        #print([Y_test1.shape,X_test1.shape])
        for index2 in range(0,number_of_files-1):
        
            model = load_model('my_lstm.h5')
            xN, yN, X_train, X_test, Y_train, Y_test = split2sequencesTrain(flat_array[index2+(number_of_files-1)*fold_number])
            Y_train = np.reshape(Y_train, (Y_train.shape[0], Y_train.shape[1]))
            model.fit(X_train, Y_train, batch_size=32, epochs=1, verbose=0, validation_data=None)
            model.save('my_lstm.h5')
        
           
        new_loss = model.evaluate(X_test1, Y_test1,verbose=0)
        list_val.append(new_loss)
    #model = load_model('my_lstm.h5')
    return(list_val)
        

In [None]:
# Call the function with the first video sequence as validation set and the the19 remaining sequences as training

val_values = TrainLSTM1(0)

In [None]:
# The above function works but saving and loading the model to the hardware takes to much time. Instead we first apply the
# Split function to the 19 video sequences separately and then concatenate them to directly input into the model.fit

def SplitVidByVid(fold_number):
    
    my_list_Y_train = []
    my_list_X_train = []
    valid = val[fold_number]
    xN, yN, X_train, X_test1, Y_train, Y_test1 = split2sequencesValidation(valid)
    Y_test1 = np.reshape(Y_test1, (Y_test1.shape[0], Y_test1.shape[1]))
        
    for index2 in range(0,number_of_files-1):
        
        xN, yN, X_train, X_test, Y_train, Y_test = split2sequencesTrain(flat_array[index2+(number_of_files-1)*fold_number])
        Y_train = np.reshape(Y_train, (Y_train.shape[0], Y_train.shape[1]))
        my_list_Y_train.append(Y_train)
        my_list_X_train.append(X_train)
        
    Full_splitted_data_Y = np.concatenate(my_list_Y_train)
    Full_splitted_data_X = np.concatenate(my_list_X_train)
    return X_test1, Y_test1, Full_splitted_data_X, Full_splitted_data_Y 


In [None]:
X_test, Y_test, X_train, Y_train = SplitVidByVid(0)

print("X_train shape: ", X_train.shape)
print("Y_train shape: ", Y_train.shape)
print("X_test shape: ", X_test.shape)
print("Y_test shape: ", Y_test.shape)

In [None]:
red = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, verbose=0, mode='auto', epsilon=0.00001, cooldown=0, min_lr=0.00000001)
monitor = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto')
lstm = model.fit(X_train, Y_train, batch_size=32, epochs=100, verbose=0, validation_data=(X_test,Y_test), callbacks=[monitor,red])

In [None]:
print([lstm.history['loss'][98],lstm.history['val_loss'][98]])

In [None]:
# Plot the validation and training loss side to side

fig = plt.figure(figsize=(17, 6))
plt.plot(lstm.history['loss'])
plt.plot(lstm.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
fig.savefig('newfiguseless.png', bbox_inches='tight')

In [None]:
# Plot the validation and training loss from the 30th epoch onwards on an appropriately scaled y axis

fig = plt.figure(figsize=(17, 6))
plt.plot(lstm.history['loss'][30:])
plt.plot(lstm.history['val_loss'][30:])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
fig.savefig('newfiguseless.png', bbox_inches='tight')

In [None]:
# Predict the intensity conflict labels for the chosen video sequence

predicted_values = model.predict(X_test, verbose=0)
print([predicted_values.shape, val[0][(input_length):,].shape])

In [None]:
# Plot the true intensity labels with the one predicted from the LSTM

fig = plt.figure(figsize=(17, 6))
plt.plot(predicted_values)
plt.plot(val[0][(input_length):,])
plt.title('Model prediction vs real labels')
plt.ylabel('Conflict intensity')
plt.xlabel('Epochs')
plt.legend(['Predicted labels', 'True labels'], loc='upper left')
plt.show()
fig.savefig('newfiguseless.png', bbox_inches='tight')

In [None]:
# Function that use the first lstm predicted value as an input value in next lstm prediction

from numpy import zeros, newaxis

def AbsolutePredict(test):
    predicted_val = list()
    
    for index in range(0,(test.shape[0]-1)):
    
        predicted_val.append(model.predict(test[index,:][newaxis,...], verbose=0))
        test[index+1,(input_length-1)] = predicted_val[index]
        
    return(predicted_val)
    
    

In [None]:
AbsPred_array2 = np.asarray(AbsolutePredict(X_test))
AbsPred_array2 = np.reshape(AbsPred_array2, (AbsPred_array2.shape[0], AbsPred_array2.shape[1]))

In [None]:
fig = plt.figure(figsize=(17, 6))
plt.plot(AbsPred_array2)
plt.plot(val[0][(input_length):(val[0].shape[0]-1),])
plt.plot(predicted_values[:(val[0].shape[0]-1),])
plt.title('Model prediction vs real labels')
plt.ylabel('Conflict intensity')
plt.xlabel('Epochs')
plt.legend([' Absolute prediction','True labels', 'Predicted values'], loc='upper left')
plt.show()
fig.savefig('newfiguseless.png', bbox_inches='tight')