# 1. Loading the Data 

In [None]:
from sklearn.datasets import load_iris
import pandas as pd 

# Load the Iris dataset
iris = load_iris()
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)

# Add the target species as a new column
iris_df['species'] = iris.target_names[iris.target]

# Display the first 5 rows of the dataframe
print(iris_df.head())

# 2. Exploratory Data Analysis 

In [None]:
print(iris_df.info())

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

iris_df.hist(figsize=(12, 8), bins=20)
plt.suptitle('Feature Distribution')
plt.show()

In [None]:
sns.scatterplot(x='sepal length (cm)', y='sepal width (cm)', hue='species', data=iris_df)
plt.title('Sepal Length vs. Sepal Width')
plt.show()

sns.scatterplot(x='petal length (cm)', y='petal width (cm)', hue='species', data=iris_df)
plt.title('Petal Length vs. Petal Width')
plt.show()

In [None]:
sns.pairplot(iris_df, hue='species')
plt.show()

# 3. Preparing the data for machine learning

In [None]:
X = iris_df[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']]
y = iris_df['species']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)
print(X_train_scaled_df.head())

In [None]:
from sklearn.tree import DecisionTreeClassifier

dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)

In [None]:
y_pred = dt_classifier.predict(X_test)
print("First few predictions:", y_pred[:5])

In [None]:
from sklearn.tree import plot_tree

plt.figure(figsize=(20,10))
plot_tree(dt_classifier, filled=True, feature_names=iris.feature_names, class_names=iris.target_names.tolist()) 

# Evaluating the model

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")