In [17]:
from math import sqrt
from numpy import concatenate
from matplotlib import pyplot
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

In [18]:
# Function for converting series data to a supervised data of format, t-1, t, t+1
## Basically feeding in the (t-1)th data to predict the t data
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg


In [19]:
 
# load dataset
dataset = read_csv('/Users/sakshikarnawat/Desktop/valet-tool/parse_tactics/normalized_tva_server_1_tactic_1_train.csv')
dataset= dataset.drop(columns=["timestamp","ping_timestamp","ping_success"])
values = dataset.values
print(dataset.head(5))
## Load Validation
validation = read_csv('/Users/sakshikarnawat/Desktop/valet-tool/parse_tactics/normalized_tva_server_1_tactic_1_test.csv')
validation= validation.drop(columns=["timestamp","ping_timestamp","ping_success"])
values_validation = validation.values


   time_since_last_recording   latency      cost  reliability  \
0                   0.000000  0.015102  0.193359            1   
1                   0.016458  0.015117  0.310547            1   
2                   0.003947  0.015297  0.169922            1   
3                   0.013916  0.014803  0.191406            1   
4                   0.016191  0.014817  0.167969            1   

   time_since_last_ping  ping_time  
0                   0.0   0.000000  
1                   0.0   0.000000  
2                   0.0   0.000000  
3                   0.0   0.000000  
4                   0.0   0.491071  


In [20]:
## Calling the function to do the preprocessing the data and removing unwanted columns


# frame as supervised learning
reframed = series_to_supervised(values, 1, 1)
reframed_validation = series_to_supervised(values_validation, 1, 1)
# drop columns we don't want to predict
reframed.drop(reframed.columns[[6,10,11]], axis=1, inplace=True)
reframed_validation.drop(reframed_validation.columns[[6,10,11]], axis=1, inplace=True)
print(reframed.head(3))

   var1(t-1)  var2(t-1)  var3(t-1)  var4(t-1)  var5(t-1)  var6(t-1)   var2(t)  \
1   0.000000   0.015102   0.193359        1.0        0.0        0.0  0.015117   
2   0.016458   0.015117   0.310547        1.0        0.0        0.0  0.015297   
3   0.003947   0.015297   0.169922        1.0        0.0        0.0  0.014803   

    var3(t)  var4(t)  
1  0.310547      1.0  
2  0.169922      1.0  
3  0.191406      1.0  


In [21]:
## Splitting the data into training and validation sets


test = reframed.values
train = reframed_validation.values
# split into input and outputs
train_X, train_y = train[:, :-3], train[:,-3:]
test_X, test_y = test[:, :-3], test[:,-3:]
# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

(2604, 1, 6) (2604, 3) (12156, 1, 6) (12156, 3)


In [22]:
# flatten input
n_input = train_X.shape[1] * train_X.shape[2]
X = train_X.reshape((train_X.shape[0], n_input))

In [23]:
### Doing the same process for testing dataset

test_dataset = read_csv('/Users/sakshikarnawat/Desktop/valet-tool/parse_tactics/normalized_tva_server_1_tactic_1_validation.csv')
test_dataset= test_dataset.drop(columns=["timestamp","ping_timestamp","ping_success"])
test_values = test_dataset.values
reframed_test = series_to_supervised(test_values, 1, 1)
reframed_test.drop(reframed_test.columns[[6,10,11]], axis=1, inplace=True)
testset = reframed_test.values
testset_X, testset_y = testset[:, :-3], testset[:,-3:]
testdataReshaped = testset_X.reshape((testset_X.shape[0], 1, testset_X.shape[1]))


## SVR algorithm with RBF kernel. 

In [24]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
regressor = SVR(kernel='rbf')
# flatten input
n_input = testdataReshaped.shape[1] * testdataReshaped.shape[2]
X2 = testdataReshaped.reshape((testdataReshaped.shape[0], n_input))
regr = MultiOutputRegressor(regressor)

regr.fit(X,train_y)
out= regr.predict(X2)

rmse = sqrt(mean_squared_error(out,testset_y))
print('Test RMSE: ' , rmse)

mse = mean_squared_error(out,testset_y)
print('Test MSE: ' , mse)

mae = mean_absolute_error(out,testset_y)
print('Test MSE: ' , mae)

Test RMSE:  0.05984670763705947
Test MSE:  0.003581628414995672
Test MSE:  0.025856263099595125


## SVR with Linear kernel 

In [9]:
# from sklearn.svm import SVR
# from sklearn.multioutput import MultiOutputRegressor
# regressor = SVR(kernel='linear')
# # flatten input
# n_input = testdataReshaped.shape[1] * testdataReshaped.shape[2]
# X2 = testdataReshaped.reshape((testdataReshaped.shape[0], n_input))
# regr = MultiOutputRegressor(regressor)

# regr.fit(X,train_y)
# out= regr.predict(X2)

# rmse = sqrt(mean_squared_error(out,testset_y))
# print('Test RMSE: %.3f' % rmse)

## KNN Regression

In [10]:
# from sklearn.neighbors import KNeighborsRegressor
# from sklearn.multioutput import MultiOutputRegressor
# knn = KNeighborsRegressor()
# regr_knn = MultiOutputRegressor(knn)

# # flatten input
# n_input = testdataReshaped.shape[1] * testdataReshaped.shape[2]
# X2 = testdataReshaped.reshape((testdataReshaped.shape[0], n_input))

# regr_knn.fit(X,train_y)
# regr_knn.predict(testset_X)
# out= regr_knn.predict(X2)

# rmse = sqrt(mean_squared_error(out,testset_y))
# print('Test RMSE: %.3f' % rmse)

# Saving to file

In [11]:
## Feeding the test dataset for predictions
import pandas as pd

dataset = pd.DataFrame({'predicted_Latency': out[:, 0], 'predicted_Cost': out[:, 1],
                       'predicted_Reliability': out[:, 2]})
dataset['predicted_Reliability'].loc[dataset['predicted_Reliability'] >0.5] = 1
dataset['predicted_Reliability'].loc[dataset['predicted_Reliability'] <.5] = 0

In [12]:
frames = [test_dataset, dataset]
result = pd.concat(frames,axis =1)

In [13]:
import numpy as np
print(result.head(10))
# result.to_csv('/Users/manali/Desktop/PingPredictions/predictions_SVR_RBF_Server_3_Tactic_1_Normalized.csv', sep=',', index=0)

   time_since_last_recording   latency      cost  reliability  \
0                   0.005620  0.060326  0.046875            1   
1                   0.012578  0.196410  0.027344            1   
2                   0.002275  0.157676  0.031250            1   
3                   0.000937  0.124168  0.039062            1   
4                   0.003078  0.127279  0.023438            1   
5                   0.001873  0.069212  0.048828            1   
6                   0.018800  0.041113  0.064453            1   
7                   0.001204  0.045181  0.062500            1   
8                   0.001806  0.094119  0.048828            1   
9                   0.004014  0.043100  0.056641            1   

   time_since_last_ping  ping_time  predicted_Latency  predicted_Cost  \
0              0.010126   0.039746           0.135849        0.067383   
1              0.007816   0.039680           0.139878        0.067383   
2              0.001631   0.039549           0.139061        0.06