<a href="https://colab.research.google.com/github/victorhcunha/reandomforests/blob/main/Random_Forest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Importação de Bibliotecas

In [None]:
#Importação de bibliotecas
from sklearn import datasets #Dados
from sklearn.model_selection import train_test_split #Divisão de dados
from sklearn.ensemble import RandomForestClassifier #Arvore Aleatória
import pandas as pd #Tabela de Dados e importancia da feature
from sklearn import metrics #acuracia


#Dados

In [None]:
#Carregar dados
dataIris = datasets.load_iris()

In [None]:
#Imprimir classes
print(dataIris.target_names)

#Imprimir features
print(dataIris.feature_names)

['setosa' 'versicolor' 'virginica']
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [None]:
# Criação de Tabela
data=pd.DataFrame({
    'sepal length':dataIris.data[:,0],
    'sepal width':dataIris.data[:,1],
    'petal length':dataIris.data[:,2],
    'petal width':dataIris.data[:,3],
    'species':dataIris.target
})
data.head() #imprime os 4 primeiros

Unnamed: 0,sepal length,sepal width,petal length,petal width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


#Divisão de dados

In [None]:
# Divisão de dados
X=data[['sepal length', 'sepal width', 'petal length', 'petal width']]  # Features
y=data['species']  # Classes

# Dividir dados de teste e treino
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) # 25% dos dados para teste

In [None]:
# Impressão de dados
print(X,y)

     sepal length  sepal width  petal length  petal width
0             5.1          3.5           1.4          0.2
1             4.9          3.0           1.4          0.2
2             4.7          3.2           1.3          0.2
3             4.6          3.1           1.5          0.2
4             5.0          3.6           1.4          0.2
..            ...          ...           ...          ...
145           6.7          3.0           5.2          2.3
146           6.3          2.5           5.0          1.9
147           6.5          3.0           5.2          2.0
148           6.2          3.4           5.4          2.3
149           5.9          3.0           5.1          1.8

[150 rows x 4 columns] 0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: species, Length: 150, dtype: int64


#Árvore Aleatória e Calculo de Acurácia

In [None]:
#Criando Árvore Aleatoria
clf=RandomForestClassifier(n_estimators=100)

#Treinando o Modelo
clf.fit(X_train,y_train)

#Ralizando a previsão
y_pred=clf.predict(X_test)

#Calculando a acurácia da previsão
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9736842105263158


In [None]:
#Calculando a importância de cada feature
feature_imp = pd.Series(clf.feature_importances_,index=dataIris.feature_names).sort_values(ascending=False)
feature_imp

petal length (cm)    0.451040
petal width (cm)     0.411592
sepal length (cm)    0.110936
sepal width (cm)     0.026432
dtype: float64

#Calculando para vários tamanhos

In [None]:
numberTree = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
accuracy = []

for i in numberTree:
  trees = RandomForestClassifier(n_estimators=i)
  trees.fit(X_train,y_train)
  y_pred=trees.predict(X_test)
  accuracy.append(metrics.accuracy_score(y_test, y_pred))

In [None]:
# Criação de Tabela
data=pd.DataFrame({
    'Número de Árvores':numberTree,
    'Acurácia':accuracy,
})
data.head(len(numberTree))

Unnamed: 0,Número de Árvores,Acurácia
0,100,0.947368
1,200,0.973684
2,300,0.947368
3,400,0.947368
4,500,0.973684
5,600,0.973684
6,700,0.973684
7,800,0.973684
8,900,0.973684
9,1000,0.973684


#Calculando para quantidades menores de árvores

In [None]:
numberTree = range(1,21)
accuracy = []

for i in numberTree:
  trees = RandomForestClassifier(n_estimators=i)
  trees.fit(X_train,y_train)
  y_pred=trees.predict(X_test)
  accuracy.append(metrics.accuracy_score(y_test, y_pred))

# Criação de Tabela
data=pd.DataFrame({
    'Número de Árvores':numberTree,
    'Acurácia':accuracy,
})
data.head(len(numberTree))

Unnamed: 0,Número de Árvores,Acurácia
0,1,0.947368
1,2,0.947368
2,3,0.921053
3,4,0.947368
4,5,0.973684
5,6,0.947368
6,7,0.947368
7,8,0.947368
8,9,0.973684
9,10,0.947368


#Random Forest com feature a menos

In [None]:
# Recriação de Tabela
data=pd.DataFrame({
    'sepal length':dataIris.data[:,0],
    'petal length':dataIris.data[:,2],
    'petal width':dataIris.data[:,3],
    'species':dataIris.target
})
data.head() #imprime os 4 primeiros

X=data[['petal length', 'petal width','sepal length']]  # Removido "sepal width"
y=data['species']  # Classes

#Dividindo dados
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) # 25% dos dados para teste


In [None]:
numberTree = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
accuracy = []

for i in numberTree:
  trees = RandomForestClassifier(n_estimators=i)
  trees.fit(X_train,y_train)
  y_pred=trees.predict(X_test)
  accuracy.append(metrics.accuracy_score(y_test, y_pred))

# Criação de Tabela
data=pd.DataFrame({
    'Número de Árvores':numberTree,
    'Acurácia':accuracy,
})
data.head(len(numberTree))

Unnamed: 0,Número de Árvores,Acurácia
0,100,0.947368
1,200,0.947368
2,300,0.947368
3,400,0.947368
4,500,0.947368
5,600,0.947368
6,700,0.947368
7,800,0.947368
8,900,0.947368
9,1000,0.947368
