In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import xgboost as xgb
import pandas as pd

# Load your dataset
df = pd.read_csv("M:/Grad/Final.csv")
df = df.drop("Unnamed: 0", axis=1)

# Shuffling
df = df.sample(frac=1, random_state=42)
# Display the columns and their data types
print(df.dtypes)
df.isna().sum()

# Calculate the mean for '0_Kurtosis' and '0_Skewness' columns
mean_kurtosis = df['0_Kurtosis'].mean()
mean_skewness = df['0_Skewness'].mean()

# Fill null values in '0_Kurtosis' and '0_Skewness' columns with their respective means
df['0_Kurtosis'].fillna(mean_kurtosis, inplace=True)
df['0_Skewness'].fillna(mean_skewness, inplace=True)

# Assuming the target variable is in the 'Class' column
X = df.drop('Class',axis=1)
y = df['Class']
# Replace 'your_dataset.csv' with the actual path or URL of your dataset
# Assuming dataset is already loaded as X and y

# Define the parameter grids
test_size_values = [0.2, 0.3,0.1]  # Test size values to try
random_state_values = [42, 100,2022]  # Random state values to try
learning_rate_values = [0.01, 0.1,0.001]  # Learning rate values to try
max_depth_values = [2,3, 5]  # Max depth values to try
n_estimators_values = [50,100, 200]  # Number of estimators values to try

best_accuracy = 0  # Initialize best accuracy

# Iterate over all parameter combinations
for test_size in test_size_values:
    for random_state in random_state_values:
        for learning_rate in learning_rate_values:
            for max_depth in max_depth_values:
                for n_estimators in n_estimators_values:
                    # Split the data into training and testing sets
                    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

                    # Initialize and train the XGBoost model
                    model = xgb.XGBClassifier(
                        objective='multi:softmax',  
                        learning_rate=learning_rate,
                        max_depth=max_depth,
                        n_estimators=n_estimators,
                        eval_metric='mlogloss'
                    )
                    model.fit(X_train, y_train)

                    # Make predictions on the test set
                    y_pred = model.predict(X_test)

                    # Evaluate the model
                    accuracy_test = accuracy_score(y_test, y_pred)

                    # Check if current accuracy is better than the best accuracy
                    if accuracy_test > best_accuracy:
                        best_accuracy = accuracy_test
                        best_parameters = {
                            'test_size': test_size,
                            'random_state': random_state,
                            'learning_rate': learning_rate,
                            'max_depth': max_depth,
                            'n_estimators': n_estimators
                        }

# Print the best parameters and the corresponding best accuracy
print("Best Parameters:")
print(best_parameters)
print("Best Test Accuracy:", best_accuracy)


1                         int64
2                         int64
3                         int64
4                         int64
5                         int64
                         ...   
0_Wavelet variance_5    float64
0_Wavelet variance_6    float64
0_Wavelet variance_7    float64
0_Wavelet variance_8    float64
0_Zero crossing rate    float64
Length: 1829, dtype: object
Best Parameters:
{'test_size': 0.1, 'random_state': 2022, 'learning_rate': 0.1, 'max_depth': 2, 'n_estimators': 200}
Best Test Accuracy: 0.8029197080291971


In [2]:
accuracy_train = accuracy_score(y_train, model.predict(X_train))
print(f'Training Accuracy: {accuracy_train:.2f}')

Training Accuracy: 0.85
