In [1]:
from math import sqrt
from numpy import concatenate
from matplotlib import pyplot
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

Using TensorFlow backend.


In [2]:
# Function for converting series data to a supervised data of format, t-1, t, t+1
## Basically feeding in the (t-1)th data to predict the t data
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg


In [3]:
 
# load dataset
dataset = read_csv('normalized_tva_server_1_tactic_1_train.csv')
dataset= dataset.drop(columns=["ID"])
values = dataset.values
print(dataset.head(5))
## Load Validation
validation = read_csv('normalized_tva_server_1_tactic_1_test.csv')
validation= validation.drop(columns=["ID"])
values_validation = validation.values


      Hours   Minutes   Seconds   Latency      Cost  Reliability
0  0.695652  0.576271  0.758364  0.015102  0.193359            1
1  0.695652  0.644068  0.861176  0.015117  0.310547            1
2  0.695652  0.661017  0.854314  0.015297  0.169922            1
3  0.695652  0.728814  0.320405  0.014803  0.191406            1
4  0.695652  0.796610  0.353964  0.014817  0.167969            1


In [4]:
## Calling the function to do the preprocessing the data and removing unwanted columns


# frame as supervised learning
reframed = series_to_supervised(values, 1, 1)
reframed_validation = series_to_supervised(values_validation, 1, 1)
# drop columns we don't want to predict
reframed.drop(reframed.columns[[6,7,8]], axis=1, inplace=True)
reframed_validation.drop(reframed_validation.columns[[6,7,8]], axis=1, inplace=True)
print(reframed.head(3))

   var1(t-1)  var2(t-1)  var3(t-1)  var4(t-1)  var5(t-1)  var6(t-1)   var4(t)  \
1   0.695652   0.576271   0.758364   0.015102   0.193359        1.0  0.015117   
2   0.695652   0.644068   0.861176   0.015117   0.310547        1.0  0.015297   
3   0.695652   0.661017   0.854314   0.015297   0.169922        1.0  0.014803   

    var5(t)  var6(t)  
1  0.310547      1.0  
2  0.169922      1.0  
3  0.191406      1.0  


In [5]:
## Splitting the data into training and validation sets


test = reframed.values
train = reframed_validation.values
# split into input and outputs
train_X, train_y = train[:, :-3], train[:,-3:]
test_X, test_y = test[:, :-3], test[:,-3:]
# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

(2604, 1, 6) (2604, 3) (12156, 1, 6) (12156, 3)


In [6]:
# flatten input
n_input = train_X.shape[1] * train_X.shape[2]
X = train_X.reshape((train_X.shape[0], n_input))

In [7]:
### Doing the same process for testing dataset

test_dataset = read_csv('normalized_tva_server_1_tactic_1_validation.csv')
test_dataset= test_dataset.drop(columns=["ID"])
test_values = test_dataset.values
reframed_test = series_to_supervised(test_values, 1, 1)
reframed_test.drop(reframed_test.columns[[6,7,8]], axis=1, inplace=True)
testset = reframed_test.values
testset_X, testset_y = testset[:, :-3], testset[:,-3:]
testdataReshaped = testset_X.reshape((testset_X.shape[0], 1, testset_X.shape[1]))


## SVR algorithm with RBF kernel. 

In [8]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
regressor = SVR(kernel='rbf')
# flatten input
n_input = testdataReshaped.shape[1] * testdataReshaped.shape[2]
X2 = testdataReshaped.reshape((testdataReshaped.shape[0], n_input))
regr = MultiOutputRegressor(regressor)

regr.fit(X,train_y)
out= regr.predict(X2)

rmse = sqrt(mean_squared_error(out,testset_y))
print('Test RMSE: %.3f' % rmse)

Test RMSE: 0.064


## SVR with Linear kernel 

In [None]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
regressor = SVR(kernel='linear')
# flatten input
n_input = testdataReshaped.shape[1] * testdataReshaped.shape[2]
X2 = testdataReshaped.reshape((testdataReshaped.shape[0], n_input))
regr = MultiOutputRegressor(regressor)

regr.fit(X,train_y)
out= regr.predict(X2)

rmse = sqrt(mean_squared_error(out,testset_y))
print('Test RMSE: %.3f' % rmse)

## KNN Regression

In [None]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.multioutput import MultiOutputRegressor
knn = KNeighborsRegressor()
regr_knn = MultiOutputRegressor(knn)

# flatten input
n_input = testdataReshaped.shape[1] * testdataReshaped.shape[2]
X2 = testdataReshaped.reshape((testdataReshaped.shape[0], n_input))

regr_knn.fit(X,train_y)
regr_knn.predict(testset_X)
out= regr_knn.predict(X2)

rmse = sqrt(mean_squared_error(out,testset_y))
print('Test RMSE: %.3f' % rmse)

# Saving to file

In [9]:
## Feeding the test dataset for predictions
import pandas as pd

dataset = pd.DataFrame({'predicted_Latency': out[:, 0], 'predicted_Cost': out[:, 1],
                       'predicted_Reliability': out[:, 2]})
dataset['predicted_Reliability'].loc[dataset['predicted_Reliability'] >0.5] = 1
dataset['predicted_Reliability'].loc[dataset['predicted_Reliability'] <.5] = 0

In [10]:
frames = [test_dataset, dataset]
result = pd.concat(frames,axis =1)

In [11]:
import numpy as np
print(result.head(10))
result.to_csv('/Users/manali/JupyterProjects/predictions_SVR_RBF_Server_1_Tactic_1.csv', sep=',', index=0)

     Hours   Minutes   Seconds   Latency      Cost  Reliability  \
0  0.26087  0.118644  0.391044  0.017155  0.160156            1   
1  0.26087  0.135593  0.558481  0.014948  0.167969            1   
2  0.26087  0.135593  0.754070  0.015292  0.164062            1   
3  0.26087  0.169492  0.727705  0.016283  0.169922            1   
4  0.26087  0.169492  0.738709  0.015571  0.173828            1   
5  0.26087  0.220339  0.591174  0.015474  0.171875            1   
6  0.26087  0.254237  0.085589  0.016747  0.175781            1   
7  0.26087  0.271186  0.970546  0.015070  0.162109            1   
8  0.26087  0.288136  0.392530  0.014855  0.164062            1   
9  0.26087  0.305085  0.287840  0.016296  0.167969            1   

   predicted_Latency  predicted_Cost  predicted_Reliability  
0           0.065029        0.238948                    1.0  
1           0.065029        0.243761                    1.0  
2           0.065029        0.235521                    1.0  
3           0.