In [None]:
#Owned
__author__ = “Manali Chakraborty”
__version__ = “0.1.0”
__maintainer__ = “Manali Chakraborty”
__email__ = “mc8153@rit.edu”
__status__ = “Dev”

In [1]:
from math import sqrt
from numpy import concatenate
from matplotlib import pyplot
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

Using TensorFlow backend.


In [2]:
# Function for converting series data to a supervised data of format, t-1, t, t+1
## Basically feeding in the (t-1)th data to predict the t data
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg


In [3]:
 
# load dataset
dataset = read_csv('/Users/manali/CLionProjects/exact/datasets/2020_tactic/dummy.csv')
dataset= dataset.drop(columns=["newCol","ID"])
values = dataset.values
print(dataset.head(5))
## Load Validation
validation = read_csv('/Users/manali/CLionProjects/exact/datasets/2020_tactic/dummy_validate.csv')
validation= validation.drop(columns=["newCol","ID"])
values_validation = validation.values


      Hours   Minutes   Seconds   Latency      Cost  Reliability  Friday  \
0  0.826087  0.915254  0.719857  0.010629  0.294723            1       0   
1  0.826087  0.915254  0.752826  0.001839  0.643501            1       0   
2  0.826087  0.915254  0.753299  0.000026  0.642214            1       0   
3  0.826087  0.932203  0.408255  0.036898  0.553411            1       0   
4  0.826087  0.932203  0.412858  0.000258  0.364221            1       0   

   Monday  Saturday  Sunday  ...  Server_4.0  Server_5.0  Server_6.0  \
0       1         0       0  ...           0           0           0   
1       1         0       0  ...           0           0           0   
2       1         0       0  ...           0           0           0   
3       1         0       0  ...           0           0           0   
4       1         0       0  ...           0           0           0   

   Server_7.0  Server_8.0  Tactic_1.0  Tactic_2.0  Tactic_3.0  Tactic_4.0  \
0           0           0        

In [4]:
## Calling the function to do the preprocessing the data and removing unwanted columns


# frame as supervised learning
reframed = series_to_supervised(values, 1, 1)
reframed_validation = series_to_supervised(values_validation, 1, 1)
# drop columns we don't want to predict
reframed.drop(reframed.columns[[26,27,28,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51]], axis=1, inplace=True)
reframed_validation.drop(reframed_validation.columns[[26,27,28,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51]], axis=1, inplace=True)
print(reframed.head(3))

   var1(t-1)  var2(t-1)  var3(t-1)  var4(t-1)  var5(t-1)  var6(t-1)  \
1   0.826087   0.915254   0.719857   0.010629   0.294723        1.0   
2   0.826087   0.915254   0.752826   0.001839   0.643501        1.0   
3   0.826087   0.915254   0.753299   0.000026   0.642214        1.0   

   var7(t-1)  var8(t-1)  var9(t-1)  var10(t-1)  ...  var20(t-1)  var21(t-1)  \
1        0.0        1.0        0.0         0.0  ...         0.0         0.0   
2        0.0        1.0        0.0         0.0  ...         0.0         0.0   
3        0.0        1.0        0.0         0.0  ...         0.0         0.0   

   var22(t-1)  var23(t-1)  var24(t-1)  var25(t-1)  var26(t-1)   var4(t)  \
1         1.0         0.0         0.0         0.0         0.0  0.001839   
2         0.0         1.0         0.0         0.0         0.0  0.000026   
3         0.0         0.0         1.0         0.0         0.0  0.036898   

    var5(t)  var6(t)  
1  0.643501      1.0  
2  0.642214      1.0  
3  0.553411      1.0  

[3 r

In [5]:
## Splitting the data into training and validation sets


test = reframed.values
train = reframed_validation.values
# split into input and outputs
train_X, train_y = train[:, :-3], train[:,-3:]
test_X, test_y = test[:, :-3], test[:,-3:]
# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

(9190, 1, 26) (9190, 3) (36761, 1, 26) (36761, 3)


In [6]:
# flatten input
n_input = train_X.shape[1] * train_X.shape[2]
X = train_X.reshape((train_X.shape[0], n_input))

In [7]:
### Doing the same process for testing dataset

test_dataset = read_csv('/Users/manali/CLionProjects/exact/datasets/2020_tactic/dummy_1.csv')
test_dataset= test_dataset.drop(columns=["newCol","ID"])
test_values = test_dataset.values
reframed_test = series_to_supervised(test_values, 1, 1)
reframed_test.drop(reframed_test.columns[[26,27,28,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51]], axis=1, inplace=True)
testset = reframed_test.values
testset_X, testset_y = testset[:, :-3], testset[:,-3:]
testdataReshaped = testset_X.reshape((testset_X.shape[0], 1, testset_X.shape[1]))


## SVR algorithm with RBF kernel. 

In [None]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
regressor = SVR(kernel='rbf')
# flatten input
n_input = testdataReshaped.shape[1] * testdataReshaped.shape[2]
X2 = testdataReshaped.reshape((testdataReshaped.shape[0], n_input))
regr = MultiOutputRegressor(regressor)

regr.fit(X,train_y)
out= regr.predict(X2)

rmse = sqrt(mean_squared_error(out,testset_y))
print('Test RMSE: %.3f' % rmse)

## SVR with Linear kernel 

In [8]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
regressor = SVR(kernel='linear')
# flatten input
n_input = testdataReshaped.shape[1] * testdataReshaped.shape[2]
X2 = testdataReshaped.reshape((testdataReshaped.shape[0], n_input))
regr = MultiOutputRegressor(regressor)

regr.fit(X,train_y)
out= regr.predict(X2)

rmse = sqrt(mean_squared_error(out,testset_y))
print('Test RMSE: %.3f' % rmse)

Test RMSE: 0.084


## KNN Regression

In [None]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.multioutput import MultiOutputRegressor
knn = KNeighborsRegressor()
regr_knn = MultiOutputRegressor(knn)

# flatten input
n_input = testdataReshaped.shape[1] * testdataReshaped.shape[2]
X2 = testdataReshaped.reshape((testdataReshaped.shape[0], n_input))

regr_knn.fit(X,train_y)
regr_knn.predict(testset_X)
out= regr_knn.predict(X2)

rmse = sqrt(mean_squared_error(out,testset_y))
print('Test RMSE: %.3f' % rmse)

# Saving to file

In [9]:
## Feeding the test dataset for predictions
import pandas as pd

dataset = pd.DataFrame({'predicted_Latency': out[:, 0], 'predicted_Cost': out[:, 1],
                       'predicted_Reliability': out[:, 2]})
dataset['predicted_Reliability'].loc[dataset['predicted_Reliability'] >0.5] = 1
dataset['predicted_Reliability'].loc[dataset['predicted_Reliability'] <.5] = 0

In [None]:
frames = [test_dataset, dataset]
result = pd.concat(frames,axis =1)

In [None]:
import numpy as np
print(result.head(10))
result.to_csv('/Users/manali/JupyterProjects/predictions_KNN.csv', sep=',', index=0)