In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [25]:
# Load the dataset
data = pd.read_csv('car_evaluation.csv')

# Display the first few rows
data.columns = ['buying','maint','doors','persons','lug_boot','safety','class']
print(data.head())

  buying  maint doors persons lug_boot safety  class
0  vhigh  vhigh     2       2    small    med  unacc
1  vhigh  vhigh     2       2    small   high  unacc
2  vhigh  vhigh     2       2      med    low  unacc
3  vhigh  vhigh     2       2      med    med  unacc
4  vhigh  vhigh     2       2      med   high  unacc


In [26]:
# Separate the target variable and features
y = data['safety']  # Make sure this is the correct column name
X = data.drop('safety', axis=1)  # Drop the target variable before encoding

In [29]:
# Check for missing values
print(data.isnull().sum())

# Display the data types
print(data.dtypes)

buying_low        0
buying_med        0
buying_vhigh      0
maint_low         0
maint_med         0
maint_vhigh       0
doors_3           0
doors_4           0
doors_5more       0
persons_4         0
persons_more      0
lug_boot_med      0
lug_boot_small    0
safety_low        0
safety_med        0
class_good        0
class_unacc       0
class_vgood       0
dtype: int64
buying_low        bool
buying_med        bool
buying_vhigh      bool
maint_low         bool
maint_med         bool
maint_vhigh       bool
doors_3           bool
doors_4           bool
doors_5more       bool
persons_4         bool
persons_more      bool
lug_boot_med      bool
lug_boot_small    bool
safety_low        bool
safety_med        bool
class_good        bool
class_unacc       bool
class_vgood       bool
dtype: object


In [30]:
# Convert categorical variables to numerical using one-hot encoding
X = pd.get_dummies(X, drop_first=True)

# Check the new feature columns
print("New feature columns after one-hot encoding:", X.columns)

New feature columns after one-hot encoding: Index(['buying_low', 'buying_med', 'buying_vhigh', 'maint_low', 'maint_med',
       'maint_vhigh', 'doors_3', 'doors_4', 'doors_5more', 'persons_4',
       'persons_more', 'lug_boot_med', 'lug_boot_small', 'class_good',
       'class_unacc', 'class_vgood'],
      dtype='object')


In [31]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

In [32]:
# Make Predictions
y_pred = rf_classifier.predict(X_test)

In [33]:
# Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Confusion Matrix:
[[33 35 55]
 [29 42 37]
 [50 48 17]]

Classification Report:
              precision    recall  f1-score   support

        high       0.29      0.27      0.28       123
         low       0.34      0.39      0.36       108
         med       0.16      0.15      0.15       115

    accuracy                           0.27       346
   macro avg       0.26      0.27      0.26       346
weighted avg       0.26      0.27      0.26       346

Accuracy: 0.27
