| Column Name         | Description                                                                                     |
|---------------------|-------------------------------------------------------------------------------------------------|
| Booking_ID | Unique identifier for each booking.                                                               |
| number of adults           | The count of adult guests in the booking.                                                                              |
| number of children         |The count of child guests in the booking.                                                                            |
| number of weekend nights       | The number of nights during the weekend (typically Friday and Saturday nights).                                             |
| number of week nights             | The number of nights during the weekdays (Sunday through Thursday nights).                         |
| type of meal   | The meal plan included in the booking.                      |
| car parking space                 | Whether the booking includes a parking space for a car.                                                            |
| room type         | The type of room booked.                                                          |
| lead time     | Number of days that elapsed between date of the booking into the PMS and the arrival date                                                           |
| market segment type    | The market segment the booking belongs to                                                   |
| repeated     | Indicates whether the booking is a repeat booking.                                                    |
| P-C   | Probability of Cancelation - The likelihood that the booking will be canceled.                                                  |
| P-not-C    | Probability of Not Cancelation - The likelihood that the booking will not be canceled.                                                   |
| average price      | The average price per night for the booking.                                                              |
| special requests                | Any additional requests or requirements made by the guest.                                                         |
| date of reservation                 | Date when the booking was made.                                                        |
| booking status               | The current status of the booking (e.g., confirmed, canceled, pending).                                                                   |


In [618]:
import pandas as pd
import numpy as np
import pickle

import seaborn as sns
import matplotlib.pyplot as plt

from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, LabelEncoder, MinMaxScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, f1_score, accuracy_score, classification_report
from sklearn.ensemble import  RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV


from scipy.stats import zscore, skew, randint, uniform




# Step 1 **Data Preprocessing**

In [644]:
df = pd.read_csv(r"C:\Users\youss\Desktop\eng\Machine_learning_course\Cellula technologies\first inten project.csv")
df1 = df.copy()
df2 = df.copy()

In [645]:
df.rename(columns ={"average price " : "average price" }, inplace= True)    # for the extra white space
df.describe()

Unnamed: 0,number of adults,number of children,number of weekend nights,number of week nights,car parking space,lead time,repeated,P-C,P-not-C,average price,special requests
count,36285.0,36285.0,36285.0,36285.0,36285.0,36285.0,36285.0,36285.0,36285.0,36285.0,36285.0
mean,1.844839,0.10536,0.810693,2.204602,0.030977,85.239851,0.02563,0.023343,0.153369,103.421636,0.619733
std,0.518813,0.402704,0.87059,1.410946,0.173258,85.938796,0.158032,0.368281,1.753931,35.086469,0.786262
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,1.0,0.0,17.0,0.0,0.0,0.0,80.3,0.0
50%,2.0,0.0,1.0,2.0,0.0,57.0,0.0,0.0,0.0,99.45,0.0
75%,2.0,0.0,2.0,3.0,0.0,126.0,0.0,0.0,0.0,120.0,1.0
max,4.0,10.0,7.0,17.0,1.0,443.0,1.0,13.0,58.0,540.0,5.0


In [646]:
df['market segment type'].unique()

array(['Offline', 'Online', 'Corporate', 'Aviation', 'Complementary'],
      dtype=object)

In [647]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36285 entries, 0 to 36284
Data columns (total 17 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Booking_ID                36285 non-null  object 
 1   number of adults          36285 non-null  int64  
 2   number of children        36285 non-null  int64  
 3   number of weekend nights  36285 non-null  int64  
 4   number of week nights     36285 non-null  int64  
 5   type of meal              36285 non-null  object 
 6   car parking space         36285 non-null  int64  
 7   room type                 36285 non-null  object 
 8   lead time                 36285 non-null  int64  
 9   market segment type       36285 non-null  object 
 10  repeated                  36285 non-null  int64  
 11  P-C                       36285 non-null  int64  
 12  P-not-C                   36285 non-null  int64  
 13  average price             36285 non-null  float64
 14  specia

In [648]:
df['room type'].unique()

array(['Room_Type 1', 'Room_Type 4', 'Room_Type 2', 'Room_Type 6',
       'Room_Type 5', 'Room_Type 7', 'Room_Type 3'], dtype=object)

In [649]:
print(f"The size of my Dataset: {df.shape}")

The size of my Dataset: (36285, 17)


## 1.1 Check Null

In [650]:
has_null = df.isnull().values.any()  # False
print(f"DataFrame has null values: {has_null}")

DataFrame has null values: False


## 1.2 Data Type

In [651]:
df.dtypes

Booking_ID                   object
number of adults              int64
number of children            int64
number of weekend nights      int64
number of week nights         int64
type of meal                 object
car parking space             int64
room type                    object
lead time                     int64
market segment type          object
repeated                      int64
P-C                           int64
P-not-C                       int64
average price               float64
special requests              int64
date of reservation          object
booking status               object
dtype: object

Each Column has its appropriate data type

In [652]:
# Check for duplicates in the entire DataFrame
duplicates = df[df.duplicated()]

print("Duplicate rows in the DataFrame:")
print(duplicates)

Duplicate rows in the DataFrame:
Empty DataFrame
Columns: [Booking_ID, number of adults, number of children, number of weekend nights, number of week nights, type of meal, car parking space, room type, lead time, market segment type, repeated, P-C, P-not-C, average price, special requests, date of reservation, booking status]
Index: []


Note: Data has no dublicates

In [653]:
Sscaler = StandardScaler()
Mscaler = MinMaxScaler()

## Normalize and Scale the columns of high variance
df['lead time'] = Sscaler.fit_transform(df[['lead time']])
df['lead time'] = Mscaler.fit_transform(df[['lead time']])
df['average price'] = Sscaler.fit_transform(df[['average price']])
df['average price'] = Mscaler.fit_transform(df[['average price']])



In [654]:

with open('Sscaler.pkl', 'wb') as f:  # open a text file
    pickle.dump(Sscaler, f) # serialize the list


with open('Mscaler.pkl', 'wb') as f:  # open a text file
    pickle.dump(Mscaler, f) # serialize the list

# Step 2 **Check an Handle Outliers**

## 2.1 Check Outliers

In [655]:
numeric_cols = df.select_dtypes(include=[np.number]).columns
cat_cols = df.select_dtypes(include=['object'])

skewness_values = df[numeric_cols].apply(skew)
print("Skewness of numeric columns:")
print(skewness_values)

Skewness of numeric columns:
number of adults            -0.332819
number of children           4.707481
number of weekend nights     0.737605
number of week nights        1.598619
car parking space            5.414240
lead time                    1.292382
repeated                     6.003535
P-C                         25.202317
P-not-C                     19.252059
average price                0.667158
special requests             1.145051
dtype: float64


Notice the high Skewness in the data.
To handle outliers we have 2 methods:

1.   Use ***Zscore*** for normal distributed data

2.   Use ***IQR*** for skewed data




In the following code we check for skweness and apply the suitable method (using a threshold of 0.5)

In [656]:
numeric_cont_cols = ['lead time', 'average price']
skewed_cols = df[numeric_cont_cols].apply(skew)

outliers_zscore = pd.DataFrame()
outliers_IQR = pd.DataFrame()

for col in numeric_cont_cols:

  if(np.abs(skewed_cols[col]) < 0.5):
    # Use Z-score for near-normal distributions
    z_scores = np.abs(zscore(df[col]))
    outliers_rows = df.loc[z_scores > 4, col]
    # Store the outliers rows with its outlier value for any further processing
    outliers_zscore = pd.concat([outliers_zscore, outliers_rows], axis=1)       #column wise

  else:
    Q1 = df[col].quantile(.25)
    Q3 = df[col].quantile(.75)
    IQR = Q3 - Q1
    # Used factor 2 instead of 1.5 due to the high percentage of outliers
    outliers_col = df.loc[(df[col] < (Q1 - 2 * IQR)) | (df[col] > (Q3 + 2 * IQR)), col]
    outliers_IQR = pd.concat([outliers_IQR, outliers_col], axis=1)


# Combine outlier indices from both methods
outlier_indices_zscore = outliers_zscore.index
outlier_indices_IQR = outliers_IQR.index
total_outlier_indices = outlier_indices_zscore.union(outlier_indices_IQR)

# Remove outliers from the original DataFrame
df_cleaned = df.drop(total_outlier_indices)

print("Outliers detected using Z-score:")
print(outliers_zscore)
print("\nOutliers detected using IQR:")
print(outliers_IQR)

outliers_count = len(total_outlier_indices)
total_rows = df.shape[0]
cleaned_rows = df_cleaned.shape[0]
percentage_outliers = (outliers_count / total_rows) * 100


print(f'Total number of rows has outliers: "{outliers_count}" ')
print(f'The size of the Dataset after extracting outliers: "{df_cleaned.shape}"')

print(f'The percentage of outliers removed: {percentage_outliers:.2f}%')
df = df_cleaned

Outliers detected using Z-score:
Empty DataFrame
Columns: []
Index: []

Outliers detected using IQR:
       lead time  average price
5       0.781038            NaN
118     1.000000            NaN
199     0.943567            NaN
205     0.977427            NaN
369     0.871332            NaN
...          ...            ...
36217        NaN       0.000000
36221        NaN       0.385000
36227        NaN       0.427778
36250        NaN       0.000000
36269        NaN       0.400000

[1442 rows x 2 columns]
Total number of rows has outliers: "1442" 
The size of the Dataset after extracting outliers: "(34843, 17)"
The percentage of outliers removed: 3.97%


Note:


*   Low Percentage: In many cases, datasets with well-behaved, normally distributed data might have outlier percentages in the range of 1% to 5%.
*   Higher Percentage: Datasets with skewed distributions or data prone to extreme values (e.g., income data, social media metrics) might have outlier percentages exceeding 5% or more.

-ChatGPT



# Step 3 **Feature Engineering**

In [657]:
df['date of reservation'] = pd.to_datetime(df['date of reservation'], format='%m/%d/%Y',errors ='coerce')
df.drop(columns=['Booking_ID'], inplace=True)
df.drop(columns=['repeated'], inplace=True)
df.drop(columns=['P-not-C'], inplace=True)


# Extract date features
# df['reservation_year'] = df['date of reservation'].dt.year
df['reservation_month'] = df['date of reservation'].dt.month
# df['reservation_day'] = df['date of reservation'].dt.day
df['reservation_day_of_week'] = df['date of reservation'].dt.dayofweek
df['is_weekend'] = df['reservation_day_of_week'].apply(lambda x: 1 if x in [4,5] else 0)      # for friday and saturday
df.drop(columns=['date of reservation'], inplace=True)

# Create new features
df['stay_duration'] = df['number of weekend nights'] + df['number of week nights']
df['total_guests'] = df['number of adults'] + df['number of children']

# Handle missing values if any
num_rows_with_nan = df.isna().any(axis=1).sum()
print(f"DataFrame has null values: {num_rows_with_nan}")
if num_rows_with_nan:
  df.dropna(inplace=True)


DataFrame has null values: 37


# Step 4: **Transformation For The Categorical Data**

In [658]:
# First encode the binary categories
df['booking status'] = df['booking status'].map({'Canceled': 0, 'Not_Canceled': 1})
label_encoder = LabelEncoder()
df['type of meal'] = label_encoder.fit_transform(df['type of meal'])
df['market segment type'] = label_encoder.fit_transform(df['market segment type'])


# Second encode the multivalue categories
tmp_cols = df.select_dtypes(include='object').columns
encoded_df = pd.get_dummies(data = df, prefix = tmp_cols, columns = tmp_cols)
del(tmp_cols)

In [659]:
# df2 = df.copy()

# df2['room type'] = label_encoder.fit_transform(df2['room type'])
# df2['booking status'] = label_encoder.fit_transform(df2['booking status'])
# df2.drop(columns=['reservation_year','reservation_day'],inplace=True)

# encoded_df = df2.copy()

# df2

In [660]:
encoded_df.columns

Index(['number of adults', 'number of children', 'number of weekend nights',
       'number of week nights', 'type of meal', 'car parking space',
       'lead time', 'market segment type', 'P-C', 'average price',
       'special requests', 'booking status', 'reservation_month',
       'reservation_day_of_week', 'is_weekend', 'stay_duration',
       'total_guests', 'room type_Room_Type 1', 'room type_Room_Type 2',
       'room type_Room_Type 3', 'room type_Room_Type 4',
       'room type_Room_Type 5', 'room type_Room_Type 6',
       'room type_Room_Type 7'],
      dtype='object')

In [661]:
encoded_df

Unnamed: 0,number of adults,number of children,number of weekend nights,number of week nights,type of meal,car parking space,lead time,market segment type,P-C,average price,...,is_weekend,stay_duration,total_guests,room type_Room_Type 1,room type_Room_Type 2,room type_Room_Type 3,room type_Room_Type 4,room type_Room_Type 5,room type_Room_Type 6,room type_Room_Type 7
0,1,1,2,5,0,0,0.505643,3,0,0.162963,...,1,7,2,True,False,False,False,False,False,False
1,1,0,1,3,2,0,0.011287,4,0,0.197556,...,0,4,1,True,False,False,False,False,False,False
2,2,1,1,3,0,0,0.002257,4,0,0.092593,...,0,4,3,True,False,False,False,False,False,False
3,1,0,0,2,0,0,0.476298,4,0,0.185185,...,1,2,1,True,False,False,False,False,False,False
4,1,0,1,2,2,0,0.108352,4,0,0.142593,...,0,3,1,True,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36279,2,0,1,1,2,0,0.108352,4,0,0.175000,...,0,2,2,True,False,False,False,False,False,False
36281,2,0,1,3,0,0,0.076749,4,0,0.199167,...,0,4,2,True,False,False,False,False,False,False
36282,2,0,1,3,0,0,0.187359,4,0,0.195574,...,0,4,2,False,False,False,True,False,False,False
36283,3,0,0,4,0,0,0.273138,3,0,0.179444,...,1,4,3,True,False,False,False,False,False,False


In [662]:
df1 = encoded_df.copy()
encoded_df.dtypes

number of adults              int64
number of children            int64
number of weekend nights      int64
number of week nights         int64
type of meal                  int32
car parking space             int64
lead time                   float64
market segment type           int32
P-C                           int64
average price               float64
special requests              int64
booking status                int64
reservation_month           float64
reservation_day_of_week     float64
is_weekend                    int64
stay_duration                 int64
total_guests                  int64
room type_Room_Type 1          bool
room type_Room_Type 2          bool
room type_Room_Type 3          bool
room type_Room_Type 4          bool
room type_Room_Type 5          bool
room type_Room_Type 6          bool
room type_Room_Type 7          bool
dtype: object

In [663]:
num_rows_with_nan = encoded_df.isna().any(axis=1).sum()
print(f"DataFrame has null values: {num_rows_with_nan}")


DataFrame has null values: 0


# Step 5: **Train Test Split**

In [664]:
y = df['booking status'].to_numpy()

X = np.array(encoded_df.drop(columns='booking status', inplace = False))
print(X.shape)
print(y.shape)

(34806, 23)
(34806,)


In [665]:
X_train, X_, y_train, y_ = train_test_split(X, y, train_size = 0.8)
X_cv, X_test, y_cv, y_test = train_test_split(X_,y_, train_size = 0.5)
del(X_,y_)

In [666]:
print("X train shape: ",X_train.shape)
print("y train shape: ",y_train.shape)

print("X CV shape:",X_cv.shape)
print("X CV shape:",y_cv.shape)

print("X test shape:",X_test.shape)
print("X test shape:",y_test.shape)


X train shape:  (27844, 23)
y train shape:  (27844,)
X CV shape: (3481, 23)
X CV shape: (3481,)
X test shape: (3481, 23)
X test shape: (3481,)


# Step 6: **Modeling And Accuracy Calculation**

## ***Random Forest Classifier***

In [667]:
rfc = RandomForestClassifier(random_state=42)
rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_test)
# Accuracy calculation
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: ', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy:  0.8974432634300489
              precision    recall  f1-score   support

           0       0.87      0.81      0.84      1151
           1       0.91      0.94      0.92      2330

    accuracy                           0.90      3481
   macro avg       0.89      0.88      0.88      3481
weighted avg       0.90      0.90      0.90      3481



# **Task 3**

In [668]:
X = df1.drop('booking status', axis=1)
y = df1['booking status']
print("X shape: ", X.shape)
print("y shape: ", y.shape)

X shape:  (34806, 23)
y shape:  (34806,)


## ***ANOVA***  Feature Selection

In [677]:
selector = SelectKBest(f_classif, k=23)  # selecting top 10 features
X_new = selector.fit_transform(X, y)

# Split the dataset again with the selected features
X_train_new, x_, y_train_new, y_ = train_test_split(X_new, y, test_size=0.4, random_state=42)
X_cv_new, X_test_new, y_cv_new, y_test_new =  train_test_split(X_new, y, test_size=0.5, random_state=42)





## Here is the required output from the feature selection method X and Y 
###################################################################################
# Train the model with selected features
rfc.fit(X_train_new, y_train_new)

# Evaluate the model
y_pred = rfc.predict(X_train_new)
print(f"Optimized Model Accuracy (train): {accuracy_score(y_train_new, y_pred)}")
y_pred = rfc.predict(X_test_new)
print(f"Optimized Model Accuracy (test): {accuracy_score(y_test_new, y_pred)}")
# Overfitting test
y_pred_train = rfc.predict(X_train_new)
y_pred_cv = rfc.predict(X_cv_new)
jtrain = mean_squared_error(y_train_new, y_pred_train)
jcv = mean_squared_error(y_cv_new, y_pred_cv)
print("Jtrain: ",jtrain)
print("Jcv: ",jcv)



Optimized Model Accuracy (train): 0.9947804434228799
Optimized Model Accuracy (test): 0.9120841234269953
Jtrain:  0.005219556577120145
Jcv:  0.005401367580302247


In [678]:
# Get the boolean mask of selected features
selected_mask = selector.get_support()

# Get the names of selected features if X is a DataFrame
selected_features = X.columns[selected_mask]
selected_features

Index(['number of adults', 'number of children', 'number of weekend nights',
       'number of week nights', 'type of meal', 'car parking space',
       'lead time', 'market segment type', 'P-C', 'average price',
       'special requests', 'reservation_month', 'reservation_day_of_week',
       'is_weekend', 'stay_duration', 'total_guests', 'room type_Room_Type 1',
       'room type_Room_Type 2', 'room type_Room_Type 3',
       'room type_Room_Type 4', 'room type_Room_Type 5',
       'room type_Room_Type 6', 'room type_Room_Type 7'],
      dtype='object')

In [671]:

with open('model.pkl', 'wb') as f:  # open a text file
    pickle.dump(rfc, f) # serialize the list
nnmodel = pickle.load('nn_model')

In [None]:
nnmodel = pickle.load('nn_model')

In [672]:
from sklearn.feature_selection import mutual_info_classif  # Import the function

# Calculate mutual information between each feature and the target
mutual_info = mutual_info_classif(X_train, y_train)

# Create a DataFrame to show feature importance
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Mutual_Info': mutual_info
})

# Sort features by their mutual information score
feature_importance = feature_importance.sort_values(by='Mutual_Info', ascending=False)

# Select the highest n features
n = 10
Selected_features_Minfo = feature_importance.head(n)['Feature']
df_Minfo = df1[Selected_features_Minfo]
# Show the Selected Features
print(df_Minfo.columns)

Index(['lead time', 'average price', 'special requests', 'reservation_month',
       'market segment type', 'room type_Room_Type 1', 'stay_duration',
       'total_guests', 'P-C', 'type of meal'],
      dtype='object')


## ***Random Forest*** finetuning

In [679]:
param_grid = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_features': ['sqrt', 'log2'],
    'max_depth': [10, 20, 30, 40, 50, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

rfc_random = RandomizedSearchCV(estimator=rfc, param_distributions=param_grid, n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1)
rfc_random.fit(X_train_new, y_train_new)

best_rf = rfc_random.best_estimator_



Fitting 3 folds for each of 100 candidates, totalling 300 fits


In [680]:
# Evaluate the model
y_pred = best_rf.predict(X_train_new)
print(f"Optimized Model Accuracy (train): {accuracy_score(y_train_new, y_pred)}")
y_pred = best_rf.predict(X_test_new)
print(f"Optimized Model Accuracy (test): {accuracy_score(y_test_new, y_pred)}")
# Overfitting test
y_pred_train = rfc.predict(X_train_new)
y_pred_cv = rfc.predict(X_cv_new)
jtrain = mean_squared_error(y_train_new, y_pred_train)
# jcv = mean_squared_error(y_cv_new, y_pred_cv)
print("Jtrain: ",jtrain)
# print("Jcv: ",jcv)

Optimized Model Accuracy (train): 0.9802231480151319
Optimized Model Accuracy (test): 0.9092110555651325
Jtrain:  0.005219556577120145


## ***XGBOOST*** finetuning

In [681]:
# Define the parameter distributions
param_dist = {
    'n_estimators': randint(50, 200),
    'max_depth': randint(3, 10),
    'learning_rate': uniform(0.01, 0.3),
    'subsample': uniform(0.6, 0.4),
    'colsample_bytree': uniform(0.6, 0.4)
}

# Initialize the XGBClassifier
xgb = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=xgb, param_distributions=param_dist,
                                   n_iter=100, cv=5, scoring='accuracy',
                                   random_state=42, n_jobs=-1, verbose=2)

# Fit the model
random_search.fit(X_train_new, y_train_new)

# Use the best model
best_xgb = random_search.best_estimator_


Fitting 5 folds for each of 100 candidates, totalling 500 fits


Parameters: { "use_label_encoder" } are not used.



In [682]:
# Evaluate the model
y_pred = best_xgb.predict(X_train_new)
print(f"Optimized Model Accuracy (train): {accuracy_score(y_train_new, y_pred)}")
y_pred = best_xgb.predict(X_test_new)
print(f"Optimized Model Accuracy (test): {accuracy_score(y_test_new, y_pred)}")
# Overfitting test
y_pred_train = best_xgb.predict(X_train_new)
y_pred_cv = best_xgb.predict(X_cv_new)
jtrain = mean_squared_error(y_train_new, y_pred_train)
jcv = mean_squared_error(y_cv_new, y_pred_cv)
print("Jtrain: ",jtrain)
print("Jcv: ",jcv)

Optimized Model Accuracy (train): 0.9720825551884308
Optimized Model Accuracy (test): 0.9071999080618284
Jtrain:  0.027917444811569218
Jcv:  0.027696374188358328


In [None]:
from sklearn.metrics import accuracy_score, mean_squared_error, confusion_matrix, classification_report

def model_evaluation_classification(model, X_train, y_train, X_cv, y_cv, X_test, y_test):
    # Evaluate on training data
    y_train_pred = model.predict(X_train)
    train_accuracy = accuracy_score(y_train, y_train_pred)
    print(f"Training Accuracy: {train_accuracy}")
    
    # Evaluate on cross-validation data
    y_cv_pred = model.predict(X_cv)
    cv_accuracy = accuracy_score(y_cv, y_cv_pred)
    print(f"Cross-Validation Accuracy: {cv_accuracy}")
    
    # Evaluate on test data
    y_test_pred = model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    print(f"Test Accuracy: {test_accuracy}")
    
    # Additional metrics
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))
    print("Classification Report:\n", classification_report(y_test, y_test_pred))
