In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from itertools import combinations
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE
from sklearn.tree import DecisionTreeClassifier
from keras.optimizers import Adam


In [2]:
# Load data
data = pd.read_csv('processed_Infilteration_2.csv')

In [3]:

# Separate features and labels
X = data.drop(['Label', 'Timestamp'], axis=1)
y = data['Label']

In [4]:

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [5]:

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [6]:

# Apply SMOTE to the training data
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

In [7]:

# Compute correlation matrix
corr_matrix = data.corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))

  corr_matrix = data.corr().abs()


In [8]:

# Select top 15 features based on correlation with label
best_features_15 = upper_tri.nlargest(15, 'Label')['Label'].index.tolist()

# Select top 20 features based on correlation with label
best_features_20 = upper_tri.nlargest(20, 'Label')['Label'].index.tolist()

# Select top 30 features based on correlation with label
best_features_30 = upper_tri.nlargest(30, 'Label')['Label'].index.tolist()

In [None]:
from sklearn.svm import SVC

# Iterate through feature sets and train SVM model
for feature_set in [best_features_15, best_features_20, best_features_30]:
    # Convert feature names to indices
    feature_indices = [X.columns.get_loc(feature_name) for feature_name in feature_set]
    X_train_subset = X_train[:, np.array(feature_indices)]
    X_test_subset = X_test[:, np.array(feature_indices)]

    # Create and fit SVM model
    svm = SVC(kernel='linear', random_state=42)
    svm.fit(X_train_subset, y_train)

    # Make predictions and calculate accuracy
    y_pred = svm.predict(X_test_subset)
    accuracy = accuracy_score(y_test, y_pred)
    print('Feature set:', feature_set)
    print('Accuracy:', accuracy)

    # Plot confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure()
    sns.heatmap(cm, annot=True, cmap='Blues', fmt='g')
    plt.xlabel('Predicted label')
    plt.ylabel('True label')
    plt.title('Confusion matrix')
    plt.show()
