<a href="https://colab.research.google.com/github/tugcebyrl/MachineLearning/blob/main/IrisDatasetAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Steps to download Kaggle datasets in Google Colab
Follow the steps below to download and use kaggle datasets in Google Colab:
*Go to your kaggle account, Scroll to API section and Click Expire API Token to remove previous tokens

*Click on Create New API Token - It will download kaggle.json file on your machine.

*Go to your Google Colab project file and run the following commands:

Mount your Google Drive files Following code make mount your google drive:
from google.colab import drive
drive.mount('/content/gdrive')

Now upload the kaggle.json file:
from google.colab import files
files.upload() #this will prompt you to upload the kaggle.json

make sure kaggle.json file is present:
!ls -lha kaggle.json

Install kaggle API client:
!pip install -q kaggle

*kaggle API client expects the file to be in ~/.kaggle

so move it there:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

we need to set permissions:
!chmod 600 /root/.kaggle/kaggle.json

check your directory before downloading the datasets:
!pwd

list all available datasets:
!kaggle datasets list

download the required dataset from kaggle:
!kaggle datasets download -d varsharainer/dna-sequencing-classifier

If your file is a zip file you can unzip with the following code:
!unzip dna-sequencing-classifier.zip

In [None]:
# Import Modules
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
from google.colab import files
files.upload()

In [None]:
!ls -lha kaggle.json

In [None]:
!pip install -q kaggle

In [None]:
!mkdir -p ~/.kaggle

In [None]:
!cp kaggle.json ~/.kaggle/

In [None]:
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
!pwd

In [None]:
!kaggle datasets list

In [None]:
!kaggle datasets download -d uciml/iris

In [None]:
!unzip iris.zip

In [None]:
# Loading dataset
df=pd.read_csv('/content/Iris.csv')
df.head()

In [None]:
# Delete columns
df=df.drop(columns=['Id'])
df.head()

In [None]:
# Display stats about data
df.describe()

In [None]:
# Basic info about dataset
df.info()

In [None]:
# Display no. of samples on each class
df['Species'].value_counts()

# Preprocessing the dataset

In [None]:
# check for null values
df.isnull().sum()

# Exploratory Data Analysis
* Histogram
* Scatter

In [None]:
df['SepalLengthCm'].hist()

In [None]:
df['SepalWidthCm'].hist()

In [None]:
df['PetalLengthCm'].hist()

In [None]:
df['PetalWidthCm'].hist()

In [None]:
colors=['red','orange','blue']
species=['Iris-setosa','Iris-versicolor','Iris-virginica']

In [None]:
for i in range(3):
  x=df[df['Species']== species[i]]
  plt.scatter(x['SepalLengthCm'],x['SepalWidthCm'],c=colors[i],label=species[i])
plt.xlabel("Sepal Length")
plt.ylabel("Sepal Width")
plt.legend()

In [None]:
for i in range(3):
  x=df[df['Species']== species[i]]
  plt.scatter(x['PetalLengthCm'],x['PetalWidthCm'],c=colors[i],label=species[i])
plt.xlabel("Petal Length")
plt.ylabel("Petal Width")
plt.legend()

In [None]:
for i in range(3):
  x=df[df['Species']== species[i]]
  plt.scatter(x['PetalLengthCm'],x['SepalLengthCm'],c=colors[i],label=species[i])
plt.xlabel("Petal Length")
plt.ylabel("Sepal Length")
plt.legend()

In [None]:
for i in range(3):
  x=df[df['Species']== species[i]]
  plt.scatter(x['SepalWidthCm'],x['PetalWidthCm'],c=colors[i],label=species[i])
plt.xlabel("Sepal width")
plt.ylabel("Petal width")
plt.legend()

# Correlation Matrix

In [None]:
df = df.select_dtypes(include=[float,int])
df.corr()

In [None]:
corr= df.corr()
fig,ax=plt.subplots(figsize=(5,4))
sns.heatmap(corr,annot=True,ax=ax)

# Label Encoder


In [None]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()

In [None]:
df['Species']=le.fit_transform(df['Species'])
df.head()

# Model Training

In [None]:
from sklearn.model_selection import train_test_split
# train-70
# test-30
X=df.drop(columns=['Species'])
Y=df['Species']
x_train,x_test,y_Train,y_test=train_test_split(X,Y,test_size=0.3)

In [None]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression
model=LogisticRegression()

In [None]:
# Model training
model.fit(x_train,y_train)

In [None]:
# performance for LR
print("Accuracy: ",model.score(x_test,y_test)*100)

In [None]:
# KNN
from sklearn.neighbors import KNeighborsClassifier
modelKNN=KNeighborsClassifier()

In [None]:
modelKNN.fit(x_train,y_train)

In [None]:
# performance for KNN
print("Accuracy: ",modelKNN.score(x_test,y_test)*100)

In [None]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
modelDT=DecisionTreeClassifier()

In [None]:
modelDT.fit(x_train,y_train)

In [None]:
# performance for DT
print("Accuracy: ",modelDT.score(x_test,y_test)*100)