In [1]:
import pandas as pd
import glob
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical


In [2]:
# Grayson's path
train_folder = "C:\\Users\\grays\\Will-Grayson GitHub Repo\\will-grayson-ML\\two-class\\data\\train\\"
test_folder = "C:\\Users\\grays\\Will-Grayson GitHub Repo\\will-grayson-ML\\two-class\\data\\test\\"

# Will's path (comment out if not in use)
#train_folder = "C:\\Users\\willg\\OneDrive\\CSCI\\summer-2024-work\\will-grayson-ML\\train\\"
#test_folder = "C:\\Users\\willg\\OneDrive\\CSCI\\summer-2024-work\\will-grayson-ML\\test\\"

# Function to load and concatenate CSV files from a list of file paths
def load_and_concat(files, sample_fraction=None):
    df_list = []
    for file in files:
        df = pd.read_csv(file)
        if sample_fraction:
            df = df.sample(frac=sample_fraction, random_state=42)  # Random sampling
        df_list.append(df)
    return pd.concat(df_list, ignore_index=True)

# List all CSV files in the train and test folders
train_files = glob.glob(train_folder + "*.csv")
test_files = glob.glob(test_folder + "*.csv")

# Load and concatenate training and testing data
train_df = load_and_concat(train_files, sample_fraction=0.1)  # Use 10% of the data
test_df = load_and_concat(test_files, sample_fraction=0.1)

In [3]:
feature_columns = [col for col in train_df.columns if col not in ['benign', 'attack']]

# Separate features and labels
X_train = train_df[feature_columns]
y_train = train_df['attack']
X_test = test_df[feature_columns]
y_test = test_df['attack']

# Create a scaler object
scaler = MinMaxScaler()

# Fit the scaler to the training features and transform both training and testing features
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
# Build the DNN model
model = Sequential()
model.add(Dense(128, input_dim=len(feature_columns), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
# Train the model
model.fit(X_train_scaled, y_train, epochs=25, batch_size=128, validation_split=0.2)

# Make predictions
y_pred = (model.predict(X_test_scaled) > 0.5).astype("int32")

# Evaluate the model
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


Epoch 1/25
[1m4476/4476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - accuracy: 0.9952 - loss: 0.0119 - val_accuracy: 0.9995 - val_loss: 0.0016
Epoch 2/25
[1m4476/4476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - accuracy: 0.9958 - loss: 0.0104 - val_accuracy: 0.9995 - val_loss: 0.0015
Epoch 3/25
[1m4476/4476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - accuracy: 0.9958 - loss: 0.0101 - val_accuracy: 0.9996 - val_loss: 0.0017
Epoch 4/25
[1m4476/4476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - accuracy: 0.9959 - loss: 0.0097 - val_accuracy: 0.9996 - val_loss: 0.0017
Epoch 5/25
[1m4476/4476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - accuracy: 0.9960 - loss: 0.0091 - val_accuracy: 0.9996 - val_loss: 0.0012
Epoch 6/25
[1m4476/4476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.9960 - loss: 0.0093 - val_accuracy: 0.9999 - val_loss: 6.8499e-04
Epoch 7/25
