In [None]:
import pandas as pd
import numpy as np
from sklearn import metrics 
from sklearn.model_selection import train_test_split
from matplotlib import pyplot


def clean_dataset(df):
    assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(np.float64)


def rank_feature_importance(m, type="Logistic Regression"):
  if type == "Random Forest":
    importance = m.feature_importances_
  else:
    importance = m.coef_[0]

  for i,v in enumerate(importance):
    print('Feature: %0d, Score: %.5f' % (i,v))
  # plot feature importance
  pyplot.bar([x for x in range(len(importance))], importance)
  pyplot.show()

  
df = pd.read_csv('dataset_sdn.csv')

unneeded_features = ['byteperflow','tot_dur','packetins', 'tx_bytes', 'rx_bytes','pktperflow', 'pktrate', 'src', 'dst', 'Protocol', 'dt', 'switch', 'dur', 'dur_nsec', 'flows', 'Pairflow', 'port_no', 'tx_kbps', 'rx_kbps', 'tot_kbps']

for i in unneeded_features: del df[i]

# Clean dataframe
assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
df.dropna(inplace=True)
indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)

df = df[indices_to_keep].astype(np.float64)

df.dropna(inplace=True)
indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)

df = df[indices_to_keep].astype(np.float64)
x = df.drop('label',axis = 1)
y = df.label
x_train, x_test, y_train, y_test = train_test_split(x, y)


In [None]:
from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler(feature_range=(-1,1)).fit(x_train)
x_train = scaling.transform(x_train)
x_test = scaling.transform(x_test)

In [None]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(max_iter=10000)
clf.fit(x_train, y_train)
y_pred=clf.predict(x_test)
accuracy = metrics.accuracy_score(y_test, y_pred)
accuracy_percentage = 100 * accuracy
print("Logistic Regression Accuracy:", accuracy_percentage)
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))

Logistic Regression Accuracy: 69.04205159658066
Precision: 0.6204580884998201
Recall: 0.5130391670798216


In [None]:
from sklearn.ensemble import RandomForestClassifier
clf=RandomForestClassifier()
clf.fit(x_train, y_train)
y_pred=clf.predict(x_test)
accuracy = metrics.accuracy_score(y_test, y_pred)
accuracy_percentage = 100 * accuracy
print("Random Forest Accuracy:", accuracy_percentage)
print("Precision:",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))

Random Forest Accuracy: 99.78916701805497
Precision: 0.9966329966329966
Recall: 0.9979176995537927


In [None]:
from sklearn import svm 
clf = svm.SVC(kernel='rbf', verbose=3)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
accuracy = metrics.accuracy_score(y_test, y_pred)
accuracy_percentage = 100 * accuracy
print("SVM Accuracy:", accuracy_percentage)
print("Precision:",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))

[LibSVM]SVM Accuracy: 84.97335837773605
Precision: 0.8157328689951859
Recall: 0.7896876549330689


In [None]:
from sklearn import svm 
clf = svm.SVC(kernel='linear', verbose=3)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
accuracy = metrics.accuracy_score(y_test, y_pred)
accuracy_percentage = 100 * accuracy
print("SVM Accuracy:", accuracy_percentage)
print("Precision:",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))

[LibSVM]SVM Accuracy: 68.58971901713498
Precision: 0.5918585446419897
Recall: 0.6040654437283094


In [None]:
from sklearn.ensemble import ExtraTreesClassifier

clf = ExtraTreesClassifier()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
accuracy = metrics.accuracy_score(y_test, y_pred)
accuracy_percentage = 100 * accuracy
print("Extra Trees Accuracy:", accuracy_percentage)
print("Precision:",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))

Extra Trees Accuracy: 99.78916701805497
Precision: 0.9966329966329966
Recall: 0.9979176995537927
