# Importando as bibliotecas iniciais necessárias

In [44]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

# Importando a base de dados

In [45]:
df = pd.read_csv('dataset/ev_charging_patterns.csv')
# print(df)

# Organizando os conjuntos de treinamento e teste

In [27]:
# Como funciona o iloc
np.array(df.iloc[0:5, 0:4])

array([['User_1', 'BMW i3', 108.46300741284072, 'Station_391'],
       ['User_2', 'Hyundai Kona', 100.0, 'Station_428'],
       ['User_3', 'Chevy Bolt', 75.0, 'Station_181'],
       ['User_4', 'Hyundai Kona', 50.0, 'Station_327'],
       ['User_5', 'Hyundai Kona', 50.0, 'Station_108']], dtype=object)

In [28]:
df["charger_type"] = df["charger_type"].astype('category').cat.codes
df["vehicle_model"] = df["vehicle_model"].astype('category').cat.codes
df["time_of_day"] = df["time_of_day"].astype('category').cat.codes
# df['charging_start_time'] = pd.to_datetime(df['charging_start_time'])
# df['charging_end_time'] = pd.to_datetime(df['charging_end_time'])


X = np.array(df[[ 
        # "vehicle_model",
        "distance_driven_since_last_charge_km",
        "charging_cost_usd",
        # "battery_capacity_kwh",
        # "charging_start_time",
        # "charging_end_time",
        # "time_of_day",
        "state_of_charge_start_%",
        "state_of_charge_end_%"
        # "temperature_c"
]])

# Criando um vetor com as espécies de todos os registros de flores
y = np.array(df['user_type'])


In [29]:
from sklearn.model_selection import train_test_split

# Dividindo os conjuntos de treinamento e teste
X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(X, y, test_size=0.3)

In [30]:
print(X_treinamento.shape)
print(X_treinamento[0:5,])

(924, 4)
[[240.79521178  13.09959827  78.27386156  76.89957103]
 [156.41701742  26.22041153  74.68253406  64.64710567]
 [ 36.42403857  29.02733721  49.92314429  56.18148263]
 [280.97620242  21.90827594  28.90083255  97.35343585]
 [ 46.81290396  36.29871225  75.48580234  56.12136493]]


In [31]:
print(X_teste.shape)
# print(X_teste[0:5,])

(396, 4)


In [32]:
print(y_treinamento.shape)
# print(y_treinamento[0:5,])

(924,)


In [33]:
print(y_teste.shape)
print(y_teste[0:5],)

(396,)
['Commuter' 'Commuter' 'Casual Driver' 'Commuter' 'Commuter']


# Importando a biblioteca necessária para trabalhar com Árvores de Decisão

In [34]:
from sklearn import tree

# Configurando o modelo a ser induzido

In [35]:
modeloarvoredecisao = tree.DecisionTreeClassifier(criterion='entropy') 

# Treinando o modelo

In [36]:
modeloarvoredecisao.fit(X_treinamento, y_treinamento)

# Verificando score do modelo

In [37]:
modeloarvoredecisao.score(X_treinamento, y_treinamento)

0.9956709956709957

# Fazendo a predição com o modelo induzido (dedução)

In [38]:
y_predicao = modeloarvoredecisao.predict(X_teste)
print(y_predicao)

['Commuter' 'Commuter' 'Long-Distance Traveler' 'Long-Distance Traveler'
 'Long-Distance Traveler' 'Long-Distance Traveler' 'Commuter' 'Commuter'
 'Commuter' 'Casual Driver' 'Casual Driver' 'Casual Driver'
 'Long-Distance Traveler' 'Casual Driver' 'Casual Driver' 'Casual Driver'
 'Casual Driver' 'Commuter' 'Commuter' 'Casual Driver'
 'Long-Distance Traveler' 'Commuter' 'Commuter' 'Commuter' 'Commuter'
 'Long-Distance Traveler' 'Commuter' 'Commuter' 'Casual Driver'
 'Long-Distance Traveler' 'Commuter' 'Commuter' 'Long-Distance Traveler'
 'Casual Driver' 'Long-Distance Traveler' 'Casual Driver' 'Commuter'
 'Commuter' 'Long-Distance Traveler' 'Long-Distance Traveler'
 'Casual Driver' 'Casual Driver' 'Long-Distance Traveler' 'Commuter'
 'Commuter' 'Commuter' 'Casual Driver' 'Commuter' 'Long-Distance Traveler'
 'Commuter' 'Casual Driver' 'Long-Distance Traveler' 'Commuter'
 'Long-Distance Traveler' 'Long-Distance Traveler' 'Commuter'
 'Casual Driver' 'Long-Distance Traveler' 'Casual Driver'

# Acurácia e outras métricas da predição

In [39]:
from sklearn import metrics

In [40]:
print("Acurácia:",metrics.accuracy_score(y_teste, y_predicao))
print("Precisão:",metrics.precision_score(y_teste, y_predicao, average=None))
print("Recall:",metrics.recall_score(y_teste, y_predicao, average=None))
print("F1-score:",metrics.f1_score(y_teste, y_predicao, average=None))

Acurácia: 0.31313131313131315
Precisão: [0.26315789 0.35036496 0.32539683]
Recall: [0.2892562  0.31788079 0.33064516]
F1-score: [0.27559055 0.33333333 0.328     ]


# Relatório da Predição

In [41]:
from sklearn.metrics import classification_report

In [42]:
print(classification_report(y_teste, y_predicao))

                        precision    recall  f1-score   support

         Casual Driver       0.26      0.29      0.28       121
              Commuter       0.35      0.32      0.33       151
Long-Distance Traveler       0.33      0.33      0.33       124

              accuracy                           0.31       396
             macro avg       0.31      0.31      0.31       396
          weighted avg       0.32      0.31      0.31       396



# Matriz de Confusão

In [43]:
labels = ['BMW i3','Chevy Bolt', 'Hyundai Kona', 'Nissan Leaf', 'Tesla Model 3']
matrizconfusao = metrics.confusion_matrix(y_teste, y_predicao, labels=labels)

print(matrizconfusao)

ValueError: At least one label specified must be in y_true

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
ax= plt.subplot()
sns.heatmap(matrizconfusao, annot=True, ax = ax);

# labels, title and ticks
ax.set_xlabel('Predição');
ax.set_ylabel('Valor Real'); 
ax.set_title('Matriz de Confusão'); 
ax.xaxis.set_ticklabels( labels ); 
ax.yaxis.set_ticklabels( labels );