In [10]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from keras.models import Sequential
from keras.layers import LSTM, Dense
import matplotlib.pyplot as plt
import seaborn as sns
from geneticalgorithm2 import geneticalgorithm2 as ga



In [11]:
# Load data
data = pd.read_csv('final_dataset_Infilteration.csv')
X = data.drop(['Timestamp', 'Label'], axis=1)
y = data['Label']
y = y.values


# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [12]:
# Define LSTM model
def create_model(input_dim):
    model = Sequential()
    model.add(LSTM(50, input_shape=(1, input_dim)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [31]:

# Define fitness function for genetic algorithm
def fitness_function(X, y, chromosome):
    # Get selected feature indices
    feature_indices = [i for i, x in enumerate(chromosome) if x.all() == 1]
    X_subset = X[:, feature_indices]

    # Reshape data to match LSTM input shape
    X_subset = X_subset.reshape((X_subset.shape[0], 1, X_subset.shape[1]))

    # Create and fit LSTM model
    model = create_model(X_subset.shape[2])
    model.fit(X_subset, y, epochs=10, batch_size=32, verbose=0)

    # Make predictions and calculate accuracy
    y_pred = model.predict_classes(X_subset)
    accuracy = accuracy_score(y, y_pred)
    if np.all(y == 1):
        return 1.0
    elif np.all(y == 0):
        return 0.0
    else:
        return accuracy

In [28]:
# Define genetic algorithm object
dimension = X.shape[1]
algorithm_param = {'max_num_iteration': 1000, 'population_size': 100, 'mutation_probability': 0.1, 'elit_ratio': 0.01, 'crossover_probability': 0.5, 'parents_portion': 0.3, 'crossover_type': 'uniform', 'mutation_type': 'uniform_by_center', 'selection_type': 'roulette', 'max_iteration_without_improv': None}
model_ga = ga(function=fitness_function, dimension=dimension, variable_type='bool', variable_boundaries=np.array([[0, 1]] * dimension), algorithm_parameters=algorithm_param)



In [40]:

# Run genetic algorithm
best_chromosome, best_fitness = model_ga.run(X_train, y_train, np.all(y_train == 1), progress_bar_stream=None)



TypeError: geneticalgorithm2.run() got multiple values for argument 'progress_bar_stream'

In [None]:
# Get best feature set
best_features = [i for i, x in enumerate(model_ga.best_variable) if x == 1]
X_train_subset = X_train[:, best_features]
X_test_subset = X_test[:, best_features]



In [None]:
# Reshape data to match LSTM input shape
X_train_subset = X_train_subset.reshape((X_train_subset.shape[0], 1, X_train_subset.shape[1]))
X_test_subset = X_test_subset.reshape((X_test_subset.shape[0], 1, X_test_subset.shape[1]))


In [None]:

# Create and fit LSTM model
model = create_model(X_train_subset.shape[2])
model.fit(X_train_subset, y_train, epochs=10, batch_size=32, verbose=1)



In [None]:
# Make predictions and calculate accuracy
y_pred = model.predict_classes(X_test_subset)
accuracy = accuracy_score(y_test, y_pred)
print('Best feature set:', best_features)

In [None]:
# Get the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot the confusion matrix
sns.heatmap(cm, annot=True, cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()
