In [None]:
#IBRAHIM SULTAN
#TASK 1 IRIS FLOWER CLASSIFICATION
#OASIS INFOBYTE


# **Iris Flower Classification Dataset**

The Iris flower classification dataset comprises 150 samples of Iris flowers, categorized into three species:
1. Iris setosa
1. Iris versicolor
1. Iris virginica

The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant.One class is linearly separable from the other 2; the latter are NOT linearly separable from each other.
Attribute Information:

1. Sepal length in cm
1. sepal width in cm
1. petal length in cm
1. petal width in cm
1. class

The dataset is widely used as a benchmark in machine learning for supervised classification tasks aiming to accurately classify Iris flowers based on their measurements.

 **Import Modules**

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

**Load the dataset**

In [None]:
df = pd.read_csv('/kaggle/input/iriscsv/Iris.csv')
df.head()

In [None]:
# Drop the 'Id' column as it is not required for analysis
df = df.drop(columns=["Id"])
df.head()

In [None]:
df.shape


In [None]:
#Display the first 10 rows of the dataframe
df.head(10)


In [None]:
# to display stats about the data
df.describe()

In [None]:
# basic  info of datatype in dataset
df.info()

In [None]:
# Display the number of samples on each class
df["Species"].value_counts()

**Preprocessing the Dataset**

In [None]:
#Label encoding to convert class labels into numeric form
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['Species'] = le.fit_transform(df['Species'])
df['Species']

In [None]:
# check for null values
df.isnull().sum()

In [None]:
df

In [None]:
df.Species.value_counts

**Exploratory Data Analysis (EDA)**

In [None]:
# Plot histograms of each feature
df['SepalLengthCm'].hist()


In [None]:
df['SepalWidthCm'].hist()

In [None]:
df['PetalLengthCm'].hist()

In [None]:
df['PetalWidthCm'].hist()

In [None]:
#Plotting the histogram of all features toghether
df['SepalLengthCm'].hist()
df['SepalWidthCm'].hist()
df['PetalLengthCm'].hist()
df['PetalWidthCm'].hist()

In [None]:
sns.pairplot(df,hue='Species')

**Correlation Matrix**

A correlation matrix is a table showing correlation coefficients between variables. Each cell in the table shows the correlation between two variables. The value is in the range of -1 to 1. If two varibles have high correlation, we can neglect one variable from those two.

In [None]:
# Compute the correlation matrix
df.corr()

In [None]:
# display the correlation matrix using a heatmap
corr = df.corr()
fig, ax = plt.subplots(figsize=(5, 4))
sns.heatmap(corr, annot=True, ax=ax, cmap='coolwarm')

**Model Training**

In [None]:
# Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split
X = df.drop(columns=['Species'])
Y = df['Species']
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.40)

In [None]:
X

In [None]:
Y

**Model 1**

In [None]:
# Logistic Regression Model
from sklearn.linear_model import LogisticRegression
model1 = LogisticRegression()
model1.fit(x_train, y_train)
accuracy_logreg = model1.score(x_test, y_test) * 100
print("Accuracy (Logistic Regression): ", accuracy_logreg)


Model 2

In [None]:
# K-nearest Neighbours Model (KNN)
from sklearn.neighbors import KNeighborsClassifier
model2 = KNeighborsClassifier()
model2.fit(x_train, y_train)
accuracy_knn = model2.score(x_test, y_test) * 100
print("Accuracy (KNN): ", accuracy_knn)

**Model 3**

In [None]:
# Decision Tree Model
from sklearn.tree import DecisionTreeClassifier
model3 = DecisionTreeClassifier()
model3.fit(x_train, y_train)
accuracy_decision_tree = model3.score(x_test, y_test) * 100
print("Accuracy (Decision Tree): ", accuracy_decision_tree)

**Project Report**

In [None]:
# Model Comparison - Visualization
models = ['Logistic Regression', 'KNN', 'Decision Tree']
accuracies = [accuracy_logreg, accuracy_knn, accuracy_decision_tree]

plt.bar(models, accuracies, color=['blue', 'green', 'orange'])
plt.xlabel("Models")
plt.ylabel("Accuracy")
plt.title("Model Comparison - Accuracy")
plt.ylim([0, 100])
plt.show()

THANK YOU