In [2]:
#A2
import pandas as pd
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings

# Suppress warnings
warnings.filterwarnings('ignore')

# Load the dataset (update the path as per your file location)
data = pd.read_csv("C:/Users/Dell/Downloads/DCT_withoutduplicate 7.csv")

# Separate features and target
X = data.drop(columns=['LABEL'])
y = data['LABEL']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 1. Hyperparameter tuning for Perceptron
perceptron = Perceptron()

# Define hyperparameter search space for Perceptron
param_dist_perceptron = {
    'penalty': ['l2', 'l1', 'elasticnet'],  # Regularization type
    'alpha': [0.0001, 0.001, 0.01, 0.1],   # Strength of regularization
    'max_iter': [1000, 2000, 3000]          # Maximum number of iterations
}

# Setup RandomizedSearchCV for Perceptron
random_search_perceptron = RandomizedSearchCV(perceptron, param_distributions=param_dist_perceptron, 
                                              n_iter=10, scoring='accuracy', cv=5, random_state=42)
random_search_perceptron.fit(X_train, y_train)

# Evaluate the best Perceptron model
best_perceptron = random_search_perceptron.best_estimator_
y_pred_perceptron = best_perceptron.predict(X_test)

# Perceptron performance metrics
perceptron_accuracy = accuracy_score(y_test, y_pred_perceptron)
perceptron_precision = precision_score(y_test, y_pred_perceptron, average='weighted', zero_division=1)
perceptron_recall = recall_score(y_test, y_pred_perceptron, average='weighted', zero_division=1)
perceptron_f1 = f1_score(y_test, y_pred_perceptron, average='weighted', zero_division=1)

print("Best Perceptron Parameters:", random_search_perceptron.best_params_)
print("Perceptron - Accuracy:", perceptron_accuracy)
print("Perceptron - Precision:", perceptron_precision)
print("Perceptron - Recall:", perceptron_recall)
print("Perceptron - F1 Score:", perceptron_f1)

### 2. Hyperparameter tuning for MLPClassifier
mlp = MLPClassifier()

# Define hyperparameter search space for MLP
param_dist_mlp = {
    'hidden_layer_sizes': [(50, 50), (100,), (50, 100)],  # Different hidden layer configurations
    'activation': ['tanh', 'relu'],                       # Activation functions
    'solver': ['adam', 'sgd'],                            # Optimizers
    'alpha': [0.0001, 0.001, 0.01],                      # L2 regularization
    'learning_rate': ['constant', 'adaptive']             # Learning rate schedule
}

# Setup RandomizedSearchCV for MLP
random_search_mlp = RandomizedSearchCV(mlp, param_distributions=param_dist_mlp, 
                                       n_iter=10, scoring='accuracy', cv=5, random_state=42)
random_search_mlp.fit(X_train, y_train)

# Evaluate the best MLP model
best_mlp = random_search_mlp.best_estimator_
y_pred_mlp = best_mlp.predict(X_test)

# MLP performance metrics
mlp_accuracy = accuracy_score(y_test, y_pred_mlp)
mlp_precision = precision_score(y_test, y_pred_mlp, average='weighted', zero_division=1)
mlp_recall = recall_score(y_test, y_pred_mlp, average='weighted', zero_division=1)
mlp_f1 = f1_score(y_test, y_pred_mlp, average='weighted', zero_division=1)

print("Best MLP Parameters:", random_search_mlp.best_params_)
print("MLP - Accuracy:", mlp_accuracy)
print("MLP - Precision:", mlp_precision)
print("MLP - Recall:", mlp_recall)
print("MLP - F1 Score:", mlp_f1)

### 3. Comparing results
comparison_results = pd.DataFrame({
    'Model': ['Perceptron', 'MLPClassifier'],
    'Accuracy': [perceptron_accuracy, mlp_accuracy],
    'Precision': [perceptron_precision, mlp_precision],
    'Recall': [perceptron_recall, mlp_recall],
    'F1 Score': [perceptron_f1, mlp_f1]
})

print("\nComparison of Perceptron and MLP Classifier:")
print(comparison_results)


Best Perceptron Parameters: {'penalty': 'l1', 'max_iter': 2000, 'alpha': 0.001}
Perceptron - Accuracy: 0.30666666666666664
Perceptron - Precision: 0.7984675458530317
Perceptron - Recall: 0.30666666666666664
Perceptron - F1 Score: 0.278979159426382
Best MLP Parameters: {'solver': 'adam', 'learning_rate': 'adaptive', 'hidden_layer_sizes': (100,), 'alpha': 0.001, 'activation': 'relu'}
MLP - Accuracy: 0.7933333333333333
MLP - Precision: 0.8252496639163307
MLP - Recall: 0.7933333333333333
MLP - F1 Score: 0.7938015746249985

Comparison of Perceptron and MLP Classifier:
           Model  Accuracy  Precision    Recall  F1 Score
0     Perceptron  0.306667   0.798468  0.306667  0.278979
1  MLPClassifier  0.793333   0.825250  0.793333  0.793802


In [2]:
#A3
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load your dataset (replace with the correct file path)
file_path = r'C:\Users\Dell\Downloads\DCT_withoutduplicate 7.csv'
data = pd.read_csv(file_path)

# Filter the dataset to include only records with LABEL 3333 and 3334
data_filtered = data[data['LABEL'].isin([3333, 3334])]

# Map labels 3333 to 0 and 3334 to 1 using .loc
data_filtered.loc[:, 'LABEL'] = data_filtered['LABEL'].map({3333: 0, 3334: 1})

# Split the dataset into features and labels
X = data_filtered.drop('LABEL', axis=1)
y = data_filtered['LABEL']

# Split the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a dictionary of classifiers to evaluate
classifiers = {
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(algorithm='SAMME'),
    "XGBoost": XGBClassifier(eval_metric='logloss'),
    "Naive Bayes": GaussianNB(),
    "CatBoost": CatBoostClassifier(silent=True)  # Set silent=True to suppress output
}

# Create a list to store the results
results = []

# Train and evaluate each classifier
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    # Calculate performance metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    # Store the results in the list
    results.append({
        "Classifier": name,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1
    })

# Convert the list of results to a DataFrame
results_df = pd.DataFrame(results)

# Display the results
print(results_df)


      Classifier  Accuracy  Precision    Recall  F1-Score
0            SVM  0.777778   0.833333  0.777778  0.738095
1  Decision Tree  1.000000   1.000000  1.000000  1.000000
2  Random Forest  1.000000   1.000000  1.000000  1.000000
3       AdaBoost  1.000000   1.000000  1.000000  1.000000
4        XGBoost  1.000000   1.000000  1.000000  1.000000
5    Naive Bayes  0.944444   0.948718  0.944444  0.943030
6       CatBoost  1.000000   1.000000  1.000000  1.000000
