In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, accuracy_score
%matplotlib inline

In [None]:
df=pd.read_csv("/kaggle/input/iris-flower-dataset/IRIS.csv")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
df["species"].value_counts()

In [None]:
sns.countplot(y="species",data=df)
plt.show()

In [None]:
sns.scatterplot(data=df, x="sepal_length", y="sepal_width",hue="species")
plt.show()

In [None]:
sns.scatterplot(data=df, x="petal_length", y="petal_width",hue="species")
plt.show()

In [None]:
sns.histplot(data=df, x="sepal_length",color="red",kde=True)
plt.show()

In [None]:
sns.histplot(data=df, x="sepal_width",color="navy",kde=True)
plt.show()

In [None]:
sns.histplot(data=df, x="petal_width",color="darkorange",kde=True)
plt.show()

In [None]:
sns.histplot(data=df, x="petal_length",color="darkgreen",kde=True)
plt.show()

In [None]:
sns.pairplot(df,hue="species")

In [None]:
df["species"]=df["species"].replace({"Iris-setosa":0,"Iris-versicolor":1,"Iris-virginica":2})
label_name=["Iris-setosa","Iris-versicolor","Iris-virginica"]

In [None]:
corr = df.corr()
corr.style.background_gradient(cmap='coolwarm')

In [None]:
X=df.drop(columns="species",axis=1)
y=df["species"]

In [None]:
y.shape

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

logistic_model = LogisticRegression()
svm_model = SVC(probability=True)
random_forest_model = RandomForestClassifier()

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic regression

In [None]:
logistic_model.fit(X_train,y_train)
pred1=logistic_model.predict(X_test)

print(f"Accuracy Score : {accuracy_score(y_test,pred1)}")

# confusion matrix

In [None]:

cf=confusion_matrix(y_test,pred1)
sns.heatmap(cf,annot=True,fmt="d",cmap="cividis",xticklabels=label_name,yticklabels=label_name)
plt.title('Confusion Matrix')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.show()

# classification report

In [None]:
print(classification_report(y_test,pred1,target_names=label_name))

# roc auc score

In [None]:
x=logistic_model.predict_proba(X_test)
roc_auc = roc_auc_score(y_test,x, multi_class='ovr')

print("ROC AUC Score:", roc_auc)

# roc curve

In [None]:
import scikitplot as skplt
y_probas = logistic_model.predict_proba(X_test)
skplt.metrics.plot_roc(y_test, y_probas)
plt.show()

# SVM

In [None]:
svm_model.fit(X_train,y_train)
pred2=svm_model.predict(X_test)
print(f"Accuracy Score : {accuracy_score(y_test,pred2)}")

# confusion matrix

In [None]:

cf=confusion_matrix(y_test,pred2)
sns.heatmap(cf,annot=True,fmt="d",cmap="hot",xticklabels=label_name,yticklabels=label_name)
plt.title('Confusion Matrix')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.show()

# classification report

In [None]:
print(classification_report(y_test,pred2,target_names=label_name))

# roc auc score

In [None]:
y=logistic_model.predict_proba(X_test)
roc_auc = roc_auc_score(y_test,y, multi_class='ovr')

print("ROC AUC Score:", roc_auc)

# roc curve

In [None]:
import scikitplot as skplt
y_probas = svm_model.predict_proba(X_test)
skplt.metrics.plot_roc(y_test, y_probas)
plt.show()

# cross validation

In [None]:
train_data=df.drop(columns="species",axis=1)
label_data=df["species"]

In [None]:
label_data.shape

In [None]:
from sklearn.model_selection import cross_val_score, KFold
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
cross_val_results = cross_val_score(svm_model, train_data,label_data, cv=kf)
print(f'Cross-Validation Results (Accuracy): {cross_val_results}')
print(f'Mean Accuracy: {cross_val_results.mean()}')

# random forest model

In [None]:
random_forest_model.fit(X_train,y_train)
pred3=random_forest_model.predict(X_test)
print(f"Accuracy Score : {accuracy_score(y_test,pred3)}")

# Confusion matrix

In [None]:
cf=confusion_matrix(y_test,pred2)
sns.heatmap(cf,annot=True,fmt="d",cmap="spring",xticklabels=label_name,yticklabels=label_name)
plt.title('Confusion Matrix')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.show()

# Classification report

In [None]:
print(classification_report(y_test,pred3,target_names=label_name))

# roc auc score

In [None]:
z=logistic_model.predict_proba(X_test)
roc_auc = roc_auc_score(y_test,z, multi_class='ovr')

print("ROC AUC Score:", roc_auc)

# roc curve

In [None]:
import scikitplot as skplt
y_probas = random_forest_model.predict_proba(X_test)
skplt.metrics.plot_roc(y_test, y_probas)
plt.show()

# cross validation

In [None]:
from sklearn.model_selection import cross_val_score, KFold
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
cross_val_results = cross_val_score(random_forest_model, train_data,label_data, cv=kf)
print(f'Cross-Validation Results (Accuracy): {cross_val_results}')
print(f'Mean Accuracy: {cross_val_results.mean()}')

# custom data prediction

### Iris-setosa:0
#### Iris-versicolor:1
### Iris-virginica:2

In [None]:
import numpy as np
test=np.array([[5.1,3.5,1.4,0.3]])
random_forest_model.predict(test)