In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis


In [32]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

In [33]:
columns = ['Sepal length', 'Sepal width', 'Petal length', 'Petal width', 'Species']

In [34]:
df = pd.read_csv(url,names=columns)
df.head()

Unnamed: 0,Sepal length,Sepal width,Petal length,Petal width,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [35]:
df.shape

(150, 5)

In [36]:
df.dtypes

Sepal length    float64
Sepal width     float64
Petal length    float64
Petal width     float64
Species          object
dtype: object

In [37]:
df.isnull().sum(axis=0)

Sepal length    0
Sepal width     0
Petal length    0
Petal width     0
Species         0
dtype: int64

In [38]:
df['Species'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [39]:
df.groupby(['Species']).size()

Species
Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
dtype: int64

In [40]:
df.describe()

Unnamed: 0,Sepal length,Sepal width,Petal length,Petal width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [41]:
df.head()

Unnamed: 0,Sepal length,Sepal width,Petal length,Petal width,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [42]:
X = df.drop(columns=['Species'])

In [43]:
target = df.drop(columns=["Sepal length", "Sepal width", "Petal length", "Petal width"])

In [44]:
target.head()

Unnamed: 0,Species
0,Iris-setosa
1,Iris-setosa
2,Iris-setosa
3,Iris-setosa
4,Iris-setosa


In [45]:
models = []
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('KNN', KNeighborsClassifier()))
models.append(('SVC', SVC()))
models.append(('NB', GaussianNB()))
models.append(('RF', RandomForestClassifier()))
models.append(('DT', DecisionTreeClassifier()))

In [46]:
for name, model in models :
    accuracy = cross_val_score(model,X, target.values.ravel(), scoring='accuracy', cv =10)
    print("Acuracia de %s: é %.2f percentual" %(name, accuracy.mean()*100))

Acuracia de LR: é 95.33 percentual
Acuracia de KNN: é 96.67 percentual
Acuracia de SVC: é 97.33 percentual
Acuracia de NB: é 95.33 percentual
Acuracia de RF: é 96.00 percentual
Acuracia de DT: é 95.33 percentual
