In [1]:
# 1: Import the Necessary Libraries

import pandas as pd                  
import numpy as np                    
from sklearn.datasets import load_iris 
from sklearn.model_selection import train_test_split 
from sklearn.tree import DecisionTreeClassifier         
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

# 2: Load the Iris Dataset

# The load_iris() function returns a dictionary-like object that contains data and target labels.
iris = load_iris()

# The iris.data contains the measurements like sepal length, sepal width, petal length, and petal width.
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)

df['species'] = iris.target


# 3: Data Preprocessing

#Introduce a missing value to simulate incomplete data.
df.loc[0, 'sepal length (cm)'] = np.nan

# Check for missing values in each column.
print("Missing values before cleaning:")
print(df.isnull().sum())

# Remove rows that have any missing values using the dropna() method.
df.dropna(inplace=True)

# Check again for missing values to ensure they have been removed.
print("\nMissing values after cleaning:")
print(df.isnull().sum())


# 4: Split the Data into Training and Testing Sets
# Separate the data into features (X) and labels (y).
X = df.drop(columns=['species'])  # 
y = df['species']                 

# Split the data into training data and testing data.

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


#5: Train the Decision Tree Classifier

# Create an instance of the DecisionTreeClassifier.

clf = DecisionTreeClassifier(random_state=42)

# Train the decision tree model using the training data.
clf.fit(X_train, y_train)


#6 Make Predictions and Evaluate the Model

# Use the trained model to predict the species labels for the test data.
y_pred = clf.predict(X_test)

# Calculate the accuracy of our predictions.
accuracy = accuracy_score(y_test, y_pred)

# Calculate precision and recall. We use 'macro' to average metrics for all classes equally.
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

# Print the evaluation metrics.
print("\nModel Evaluation:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

# For a more detailed summary (including F1-score for each class), print a classification report.
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Missing values before cleaning:
sepal length (cm)    1
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
species              0
dtype: int64

Missing values after cleaning:
sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
species              0
dtype: int64

Model Evaluation:
Accuracy: 0.9
Precision: 0.9285714285714285
Recall: 0.8888888888888888

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.67      0.80         9
           2       0.79      1.00      0.88        11

    accuracy                           0.90        30
   macro avg       0.93      0.89      0.89        30
weighted avg       0.92      0.90      0.90        30

