In [None]:
pip install pandas numpy scikit-learn matplotlib tensorflow

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPooling1D, LSTM
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("All.csv")
df.dropna(inplace=True)

X = df.drop("URL_Type_obf_Type", axis=1)
y = df["URL_Type_obf_Type"]

In [None]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_cat = to_categorical(y_encoded)

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_reshaped = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y_cat, test_size=0.2, stratify=y_cat, random_state=42)

In [None]:
#defining
def cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv1D(64, 3, activation='relu', input_shape=input_shape),
        MaxPooling1D(2),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    return model

def lstm_model(input_shape, num_classes):
    model = Sequential([
        LSTM(64, input_shape=input_shape),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    return model

def cnn_lstm_model(input_shape, num_classes):
    model = Sequential([
        Conv1D(64, 3, activation='relu', input_shape=input_shape),
        MaxPooling1D(2),
        LSTM(64),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    return model

In [None]:
#training
input_shape = (X_train.shape[1], 1)
num_classes = y_cat.shape[1]

cnn = cnn_model(input_shape, num_classes)
lstm = lstm_model(input_shape, num_classes)
cnn_lstm = cnn_lstm_model(input_shape, num_classes)

history_cnn = cnn.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=64, verbose=0, callbacks=callbacks)
history_lstm = lstm.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=64, verbose=0, callbacks=callbacks)
history_cnn_lstm = cnn_lstm.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=64, verbose=0, callbacks=callbacks)

In [None]:
def evaluate_model(model, X_test, y_test, model_name):
    y_pred = model.predict(X_test)
    y_pred_labels = np.argmax(y_pred, axis=1)
    y_true_labels = np.argmax(y_test, axis=1)

    acc = accuracy_score(y_true_labels, y_pred_labels)
    prec = precision_score(y_true_labels, y_pred_labels, average='macro')
    rec = recall_score(y_true_labels, y_pred_labels, average='macro')
    f1 = f1_score(y_true_labels, y_pred_labels, average='macro')
    loss = model.evaluate(X_test, y_test, verbose=0)[0]

    print(f"\nClassification Report for {model_name}:\n")
    print(classification_report(y_true_labels, y_pred_labels, target_names=le.classes_))

    return {
        'Model': model_name,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1,
        'Loss': loss
    }

In [None]:
cnn_metrics = evaluate_model(cnn, X_test, y_test, 'CNN')
lstm_metrics = evaluate_model(lstm, X_test, y_test, 'LSTM')
cnn_lstm_metrics = evaluate_model(cnn_lstm, X_test, y_test, 'CNN+LSTM')

In [None]:
def plot_grouped_metrics(metrics_list):
    import matplotlib.pyplot as plt
    import numpy as np

    metrics_names = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'Loss']
    model_names = [m['Model'] for m in metrics_list]
    num_metrics = len(metrics_names)
    num_models = len(model_names)

    # Set bar width and positions
    bar_width = 0.25
    index = np.arange(num_metrics)

    # Extract metric values per model
    model_scores = []
    for m in metrics_list:
        scores = [m[metric] * 100 if metric != 'Loss' else m[metric] for metric in metrics_names]
        model_scores.append(scores)

    # Plotting
    plt.figure(figsize=(10, 6))
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c']  # Customize for more models
    for i in range(num_models):
        plt.bar(index + i * bar_width, model_scores[i], width=bar_width, label=model_names[i], color=colors[i % len(colors)])

    # Labels and settings
    plt.xlabel('Metrics', fontsize=12)
    plt.ylabel('Score', fontsize=12)
    plt.title('Performance Comparison: CNN vs LSTM vs CNN+LSTM', fontsize=14)
    plt.xticks(index + bar_width, metrics_names)
    plt.ylim(0, 110)
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.6)

    # Annotate values on top of bars
    for i in range(num_models):
        for j in range(num_metrics):
            value = model_scores[i][j]
            plt.text(index[j] + i * bar_width, value + 1, f'{value:.2f}', ha='center', va='bottom', fontsize=8)

    plt.tight_layout()
    plt.show()

# Call the function
plot_grouped_metrics(all_metrics)
