# Como podemos prever se uma entrega vai atrasar ou não.
Vamos usar a base da `E-Commerce Public Dataset by Olist`

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import pandas as pd
import numpy as np
import random
import sqlite3
from datetime import datetime
from haversine import haversine

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/brazilian-ecommerce/olist_customers_dataset.csv
/kaggle/input/brazilian-ecommerce/olist_sellers_dataset.csv
/kaggle/input/brazilian-ecommerce/olist_order_reviews_dataset.csv
/kaggle/input/brazilian-ecommerce/olist_order_items_dataset.csv
/kaggle/input/brazilian-ecommerce/olist_products_dataset.csv
/kaggle/input/brazilian-ecommerce/olist_geolocation_dataset.csv
/kaggle/input/brazilian-ecommerce/product_category_name_translation.csv
/kaggle/input/brazilian-ecommerce/olist_orders_dataset.csv
/kaggle/input/brazilian-ecommerce/olist_order_payments_dataset.csv


In [2]:
# entradas dos dados
df_customers = pd.read_csv('/kaggle/input/brazilian-ecommerce/olist_customers_dataset.csv')
df_geolocation = pd.read_csv('/kaggle/input/brazilian-ecommerce/olist_geolocation_dataset.csv')
df_order_items = pd.read_csv('/kaggle/input/brazilian-ecommerce/olist_order_items_dataset.csv')
df_order_reviews = pd.read_csv('/kaggle/input/brazilian-ecommerce/olist_order_reviews_dataset.csv')
df_order_payments = pd.read_csv('/kaggle/input/brazilian-ecommerce/olist_order_payments_dataset.csv')
df_orders = pd.read_csv('/kaggle/input/brazilian-ecommerce/olist_orders_dataset.csv')
df_products = pd.read_csv('/kaggle/input/brazilian-ecommerce/olist_products_dataset.csv')
df_sellers = pd.read_csv('/kaggle/input/brazilian-ecommerce/olist_sellers_dataset.csv')
df_category_name_translation = pd.read_csv('/kaggle/input/brazilian-ecommerce/product_category_name_translation.csv')


### 1. Relacionamento das bases
<img src="https://i.imgur.com/HRhd2Y0.png" width="1000"/>


In [3]:
# Criando nosso banco de dados
conn = sqlite3.connect('olist.db')  # se conectando com o banco

In [4]:
# criando as tabelas usando as informações do kaggle

conn.execute("""CREATE TABLE IF NOT EXISTS geolocation (
    geolocation_zip_code_prefix TEXT PRIMARY KEY,
    geolocation_lat FLOAT,
    geolocation_lng FLOAT,
    geolocation_city TEXT,
    geolocation_state TEXT
);
""")


conn.execute("""CREATE TABLE IF NOT EXISTS sellers (
    seller_id TEXT PRIMARY KEY,
    seller_zip_code_prefix INTEGER,
    seller_city TEXT,
    seller_state TEXT,
    FOREIGN KEY (seller_zip_code_prefix) REFERENCES geolocation(geolocation_zip_code_prefix)
);
""")


conn.execute("""CREATE TABLE IF NOT EXISTS customers (
    customer_id TEXT PRIMARY KEY,
    customer_unique_id TEXT,
    customer_zip_code_prefix INTEGER,
    customer_city TEXT,
    customer_state TEXT,
    FOREIGN KEY (customer_zip_code_prefix) REFERENCES geolocation(geolocation_zip_code_prefix)
);
""")


conn.execute("""CREATE TABLE IF NOT EXISTS products (
    product_id TEXT PRIMARY KEY,
    product_category_name TEXT,
    product_name_length INTEGER,
    product_description_length INTEGER,
    product_photos_qty INTEGER,
    product_weight_g INTEGER,
    product_length_cm INTEGER,
    product_height_cm INTEGER,
    product_width_cm INTEGER
);
""")


conn.execute("""CREATE TABLE IF NOT EXISTS category_translation (
    product_category_name TEXT PRIMARY KEY,
    product_category_name_english TEXT
);
""")


conn.execute("""CREATE TABLE IF NOT EXISTS orders (
    order_id TEXT PRIMARY KEY,
    customer_id TEXT,
    order_status TEXT,
    order_purchase_timestamp TEXT,
    order_approved_at TEXT,
    order_delivered_carrier_date TEXT,
    order_delivered_customer_date TEXT,
    order_estimated_delivery_date TEXT,
    FOREIGN KEY (customer_id) REFERENCES customers(customer_id)
);
""")


conn.execute("""CREATE TABLE IF NOT EXISTS order_items (
    order_id TEXT,
    order_item_id INTEGER,
    product_id TEXT,
    seller_id TEXT,
    shipping_limit_date TEXT,
    price REAL,
    freight_value REAL,
    PRIMARY KEY (order_id, order_item_id),
    FOREIGN KEY (order_id) REFERENCES orders(order_id),
    FOREIGN KEY (product_id) REFERENCES products(product_id),
    FOREIGN KEY (seller_id) REFERENCES sellers(seller_id)
);
""")


conn.execute("""CREATE TABLE IF NOT EXISTS order_payments (
    order_id TEXT,
    payment_sequential INTEGER,
    payment_type TEXT,
    payment_installments INTEGER,
    payment_value FLOAT,
    PRIMARY KEY (order_id, payment_sequential),
    FOREIGN KEY (order_id) REFERENCES orders(order_id)
);
""")


conn.execute("""CREATE TABLE IF NOT EXISTS order_reviews (
    review_id TEXT PRIMARY KEY,
    order_id TEXT,
    review_score INTEGER,
    review_comment_title TEXT,
    review_comment_message TEXT,
    review_creation_date TEXT,
    review_answer_timestamp TEXT,
    FOREIGN KEY (order_id) REFERENCES orders(order_id)
);

""")

# ativando chave estrangeira
conn.execute("PRAGMA foreign_keys = ON;")
conn.close()


In [5]:
# alimentando o banco de dados 
conn = sqlite3.connect('olist.db')
df_geolocation.to_sql('geolocation', conn,if_exists='replace',index=False)
df_sellers.to_sql('sellers', conn,if_exists='replace',index=False)
df_customers.to_sql('customers', conn,if_exists='replace',index=False)
df_products.to_sql('products', conn,if_exists='replace',index=False)
df_category_name_translation.to_sql('category_translation', conn,if_exists='replace',index=False)
df_orders.to_sql('orders', conn,if_exists='replace',index=False)
df_order_items.to_sql('order_items', conn,if_exists='replace',index=False)
df_order_payments.to_sql('order_payments', conn,if_exists='replace',index=False)
df_order_reviews.to_sql('order_reviews', conn,if_exists='replace',index=False)
conn.close()

In [6]:
# teste simples na conexão
conn = sqlite3.connect('olist.db')
query = "SELECT * FROM orders LIMIT 5"
df_test = pd.read_sql(query, conn)
display(df_test)
conn.close()


Unnamed: 0,order_id,customer_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date
0,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,2017-10-02 11:07:15,2017-10-04 19:55:00,2017-10-10 21:25:13,2017-10-18 00:00:00
1,53cdb2fc8bc7dce0b6741e2150273451,b0830fb4747a6c6d20dea0b8c802d7ef,delivered,2018-07-24 20:41:37,2018-07-26 03:24:27,2018-07-26 14:31:00,2018-08-07 15:27:45,2018-08-13 00:00:00
2,47770eb9100c2d0c44946d9cf07ec65d,41ce2a54c0b03bf3443c3d931a367089,delivered,2018-08-08 08:38:49,2018-08-08 08:55:23,2018-08-08 13:50:00,2018-08-17 18:06:29,2018-09-04 00:00:00
3,949d5b44dbf5de918fe9c16f97b45f8a,f88197465ea7920adcdbec7375364d82,delivered,2017-11-18 19:28:06,2017-11-18 19:45:59,2017-11-22 13:39:59,2017-12-02 00:28:42,2017-12-15 00:00:00
4,ad21c59c0840e6cb83a9ceb5573f8159,8ab97904e6daea8866dbdbc4fb7aad2c,delivered,2018-02-13 21:18:39,2018-02-13 22:20:29,2018-02-14 19:46:34,2018-02-16 18:17:02,2018-02-26 00:00:00


In [7]:
conn = sqlite3.connect('olist.db')
query = '''
SELECT 
    o.order_id,
    JULIANDAY(o.order_delivered_customer_date) - JULIANDAY(o.order_approved_at) AS tempo_entrega,
    JULIANDAY(o.order_estimated_delivery_date) - JULIANDAY(o.order_approved_at) AS tempo_estimado,
    JULIANDAY(o.order_approved_at) - JULIANDAY(o.order_delivered_carrier_date) AS tempo_preparacao,
    oi.order_item_id as quantidade,
    strftime('%Y-%m', o.order_purchase_timestamp) as mes_referencia,
    strftime('%w', o.order_purchase_timestamp) as dia_semana,
    c.customer_id,
    c.customer_state,
    c.customer_zip_code_prefix,
    p.product_id,
    p.product_weight_g,
    p.product_length_cm,
    p.product_height_cm,
    p.product_width_cm,
    s.seller_id,
    s.seller_state,
    s.seller_zip_code_prefix,
    oi.freight_value,
    gs.seller_lat,
    gs.seller_lng,
    gc.customer_lat,
    gc.customer_lng
FROM orders AS o
JOIN order_items AS oi ON o.order_id = oi.order_id
JOIN customers AS c ON o.customer_id = c.customer_id
JOIN products AS p ON oi.product_id = p.product_id
JOIN sellers AS s ON oi.seller_id = s.seller_id
LEFT JOIN (
    SELECT geolocation_zip_code_prefix, 
           AVG(geolocation_lat) AS seller_lat, 
           AVG(geolocation_lng) AS seller_lng
    FROM geolocation
    GROUP BY geolocation_zip_code_prefix
) AS gs ON s.seller_zip_code_prefix = gs.geolocation_zip_code_prefix
LEFT JOIN (
    SELECT geolocation_zip_code_prefix, 
           AVG(geolocation_lat) AS customer_lat, 
           AVG(geolocation_lng) AS customer_lng
    FROM geolocation
    GROUP BY geolocation_zip_code_prefix
) AS gc ON c.customer_zip_code_prefix = gc.geolocation_zip_code_prefix
;
'''

df_freight_full = pd.read_sql_query(query, conn)
display(df_freight_full)
conn.close()

Unnamed: 0,order_id,tempo_entrega,tempo_estimado,tempo_preparacao,quantidade,mes_referencia,dia_semana,customer_id,customer_state,customer_zip_code_prefix,...,product_height_cm,product_width_cm,seller_id,seller_state,seller_zip_code_prefix,freight_value,seller_lat,seller_lng,customer_lat,customer_lng
0,e481f51cbdc54678b7cc49136f2d6af7,8.429144,15.536632,-2.366493,1,2017-10,1,9ef432eb6251297304e76186b10a928d,SP,3149,...,8.0,13.0,3504c0cb71d7fa48d967e0e4c94d59d9,SP,9350,8.72,-23.680729,-46.444238,-23.576983,-46.587161
1,53cdb2fc8bc7dce0b6741e2150273451,12.502292,17.858021,-0.462882,1,2018-07,2,b0830fb4747a6c6d20dea0b8c802d7ef,BA,47813,...,13.0,19.0,289cdb325fb7e7f891c38608bf9e0962,SP,31570,22.76,-19.807681,-43.980427,-12.177924,-44.660711
2,47770eb9100c2d0c44946d9cf07ec65d,9.382708,26.628206,-0.204595,1,2018-08,3,41ce2a54c0b03bf3443c3d931a367089,GO,75265,...,19.0,21.0,4869f7a5dfa277a7dca6462dcf3b52b2,SP,14840,19.22,-21.363502,-48.229601,-16.745150,-48.514783
3,949d5b44dbf5de918fe9c16f97b45f8a,13.196331,26.176400,-3.745833,1,2017-11,6,f88197465ea7920adcdbec7375364d82,RN,59296,...,10.0,20.0,66922902710d126a0e7d26b0e3805106,MG,31842,27.20,-19.837682,-43.924053,-5.774190,-35.271143
4,ad21c59c0840e6cb83a9ceb5573f8159,2.830938,12.069109,-0.893113,1,2018-02,2,8ab97904e6daea8866dbdbc4fb7aad2c,SP,9195,...,15.0,15.0,2c9e548be18521d1c43cde1c582c6de8,SP,8752,8.72,-23.543395,-46.262086,-23.676370,-46.514627
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112645,63943bddc261676b46f01ca7ac2f7bd8,22.185637,23.450961,-1.425058,1,2018-02,2,1fca14ff2861355f6e5f14306ff977a7,SP,11722,...,10.0,40.0,1f9ab4708f3056ede07124aad39a2554,SP,17602,20.10,-21.930548,-50.498348,-24.001500,-46.449864
112646,83c1379a015df1e13d02aae0204711ab,24.847234,30.372037,-1.241782,1,2017-08,0,1aa71eb042121263aafbe80c1b562c9c,BA,45920,...,90.0,22.0,d50d79cb34e38265a8649c383dcffd48,SP,8290,65.02,-23.553642,-46.452661,-17.898358,-39.373630
112647,11c177c8e97725db2631073c19f07b62,17.080938,37.099757,-3.749097,1,2018-01,1,b331b74b18dc79bcdf6532d51e1637c1,RJ,28685,...,20.0,20.0,a1043bafd471dff536d0c462352beb48,MG,37175,40.59,-20.940578,-45.827237,-22.562825,-42.694574
112648,11c177c8e97725db2631073c19f07b62,17.080938,37.099757,-3.749097,2,2018-01,1,b331b74b18dc79bcdf6532d51e1637c1,RJ,28685,...,20.0,20.0,a1043bafd471dff536d0c462352beb48,MG,37175,40.59,-20.940578,-45.827237,-22.562825,-42.694574


In [8]:
# Criando o indicador de volume por mês
total_mes = df_freight_full.mes_referencia.value_counts()
df_freight_full['volume_mes'] = df_freight_full.apply(lambda row: total_mes[row['mes_referencia']] ,axis=1)


In [9]:
# Dropando na em qualquer uma das colunas 
df_freight_full = df_freight_full.dropna(how='any').copy()

# criando indicado que vou usar para difinir se a entrega foi em tempo ou atrasada
df_freight_full['estimado_vs_realizado'] = df_freight_full['tempo_entrega']-df_freight_full['tempo_estimado']

# criando o indicador de distancia do frete
df_freight_full['freight_distance'] = df_freight_full.apply(lambda row: haversine((row['customer_lat'], row['customer_lng']),( row['seller_lat'], row['seller_lng'])),axis=1)

# calculando o metro cubico do produto
df_freight_full.loc[:, 'cubic_meters_cm'] = df_freight_full['product_length_cm'] * df_freight_full['product_height_cm'] * df_freight_full['product_width_cm'] / 1000000

# indicador de dia da semana, vou usa para saber se a compra foi fim de semana. garantindo que ele está no tipo inteiro
df_freight_full.loc[:,'dia_semana'] = df_freight_full['dia_semana'].astype(int)

In [10]:
# agrupando os dados para que cada order_id seja ma unica linha 
df_freight_full_grouped = df_freight_full.groupby('order_id') \
    .agg({'freight_distance': 'first',
          'volume_mes':'first',
          'tempo_preparacao': 'mean',
          'customer_state': 'first',
          'seller_state': 'first',
          'customer_state': 'first',
          'dia_semana': 'first',
          'quantidade': 'sum',
          'product_weight_g': 'sum','cubic_meters_cm': 'sum',
          'tempo_estimado': 'first',
          'estimado_vs_realizado': 'first'}) \
          .reset_index()

df_freight_full_grouped.loc[:, 'atrasado'] = (df_freight_full_grouped['estimado_vs_realizado'] >= 1).astype(int)

df_freight_full_grouped

Unnamed: 0,order_id,freight_distance,volume_mes,tempo_preparacao,customer_state,seller_state,dia_semana,quantidade,product_weight_g,cubic_meters_cm,tempo_estimado,estimado_vs_realizado,atrasado
0,00010242fe8c5a6d1ba2dd792cb16214,301.505097,4831,-6.367141,RJ,SP,3,1,650.0,0.003528,15.593345,-8.011250,0
1,00018f77f2f0320c557190d7a144bdd3,585.564745,2684,-8.145683,SP,SP,3,1,30000.0,0.060000,18.538044,-2.330278,0
2,000229ec398224ef6ca0657da4fc703e,312.343943,8208,-1.908542,MG,MG,0,1,3050.0,0.014157,21.382986,-13.444954,0
3,00024acbcdf0a6daa1e931b038114c75,293.168825,7248,-2.137292,SP,SP,3,1,200.0,0.002400,11.576181,-5.435660,0
4,00042b26cf59d7ce69dfabb4e55b4fd9,646.164355,1951,-11.816620,SP,PR,6,1,3750.0,0.042000,40.409572,-15.303808,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95964,fffc94f6ce00a00581880bf54a75a037,2754.541514,7975,-0.331933,MA,SC,1,1,10150.0,0.053400,22.825683,-7.043981,0
95965,fffcd46ef2263f404302a634eb57f7eb,351.729906,7092,-0.148056,PR,SP,6,1,8950.0,0.044460,14.811250,-8.144502,0
95966,fffce4705a9662cd70adb13d4a31832d,339.057429,5322,-1.915845,SP,PR,1,1,967.0,0.009576,16.281655,-12.484468,0
95967,fffe18544ffabc95dfada21779c9644f,72.336098,4910,-0.790521,SP,SP,1,1,100.0,0.008000,9.996852,-8.083565,0


In [11]:
# preparando indicador se comprador e vendedor são do mesmo estado
df_freight_full_grouped.loc[:, 'mesmo_estado'] = (df_freight_full_grouped['seller_state'] == df_freight_full_grouped['customer_state']).astype(int)

# criando indicador se a compra foi feita fim de semana
df_freight_full_grouped.loc[:, 'dia_util'] = ((df_freight_full_grouped['dia_semana'] < 6) & (df_freight_full_grouped['dia_semana'] > 0)).astype(int)

In [12]:
#colnas que vou usar
features = ['atrasado','freight_distance','volume_mes','tempo_preparacao','quantidade','product_weight_g','cubic_meters_cm','tempo_estimado','dia_util','mesmo_estado','customer_state']

# equilibrando os dados para o modelo não treinar desbalanceado
com_atraso = df_freight_full_grouped[features].loc[df_freight_full_grouped.atrasado == 1]
sem_atrado = df_freight_full_grouped[df_freight_full_grouped['atrasado'] == 0][features].sample(n=com_atraso.shape[0], random_state=42)
df_freight_equi = pd.concat([com_atraso, sem_atrado], ignore_index=True)


In [13]:
# criando o indicado de estado do cliente
df_encoded = pd.get_dummies(df_freight_equi, columns=['customer_state'], prefix='uf')
df_encoded[df_encoded.columns.difference(features)] = df_encoded[df_encoded.columns.difference(features)].astype(int)
print(df_encoded.columns)

Index(['atrasado', 'freight_distance', 'volume_mes', 'tempo_preparacao',
       'quantidade', 'product_weight_g', 'cubic_meters_cm', 'tempo_estimado',
       'dia_util', 'mesmo_estado', 'uf_AC', 'uf_AL', 'uf_AM', 'uf_AP', 'uf_BA',
       'uf_CE', 'uf_DF', 'uf_ES', 'uf_GO', 'uf_MA', 'uf_MG', 'uf_MS', 'uf_MT',
       'uf_PA', 'uf_PB', 'uf_PE', 'uf_PI', 'uf_PR', 'uf_RJ', 'uf_RN', 'uf_RO',
       'uf_RR', 'uf_RS', 'uf_SC', 'uf_SE', 'uf_SP', 'uf_TO'],
      dtype='object')


In [14]:
import plotly.express as px

In [15]:

# para o graficar ficar mais legivel vou separar os indicadores e os estados
features = ['atrasado','freight_distance','volume_mes','tempo_preparacao','quantidade','product_weight_g','cubic_meters_cm','tempo_estimado','dia_util','mesmo_estado']

# gerando uma matrix de correlação
corr = df_encoded[features].corr()

fig = px.imshow(
    corr.round(2),
    text_auto=True,  
    aspect='auto', 
    
    color_continuous_scale='RdBu_r', 
    width=1000, height=500
    )

fig.update_layout(margin=dict(l=50, r=50, t=50, b=50),
                  title=dict(text='Correlação entre os indicadores ', y=0.95, x=0.5, xanchor='center', yanchor='top')
                  )
fig.show()

In [16]:
# separando variaveis dependentes das independentes
independente = df_encoded.atrasado.astype(int).values
dependentes = df_encoded.drop(columns='atrasado').values
dependentes.shape

(12980, 36)

### Criando o modelo

In [17]:
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [18]:
# seprando os dados entre treino e teste com 30/70
x_train, x_test, y_train, y_test = train_test_split(dependentes, independente, test_size= 0.3,random_state=42)

# normalizando os dados 
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)


# Definindo a grelha de hiperparâmetros para testar o modelos com diferentes parametros 
param_grid = {
    'n_estimators': [int(x) for x in np.linspace(start=100, stop=1000, num=10)],
    'max_depth': [int(x) for x in np.linspace(10, 110, num=11)] + [None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}

# Criando o modelo RandomForest
rf = RandomForestClassifier()

# Configurando o RandomizedSearchCV
rf_random = RandomizedSearchCV(estimator=rf, param_distributions=param_grid,
                               n_iter=50, cv=3, verbose=0, random_state=42, n_jobs=-1)

# Treinando o modelo com o RandomizedSearchCV
rf_random.fit(x_train, y_train)

# avaliando o modelo
y_pred = rf_random.predict(x_test)

# avaliando o modelo
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
print('Classification Report:\n', classification_report(y_test, y_pred))

# Definindo o modelo com os melhores hiperparâmetros encontrados
best_rf = rf_random.best_estimator_

# Realizando a validação cruzada com 20 folds
cv_scores = cross_val_score(best_rf, x_train, y_train, cv=20, scoring='accuracy', n_jobs=-1)

# Exibindo os resultados
print("Acurácias em cada fold:", cv_scores)
print(f"Acurácia média: {cv_scores.mean():.3f}")
print(f"Desvio padrão da acurácia: {cv_scores.std():.2f}")

# Acuracia inicial: 62,9%
# Adicionando total de vendas no mÊs :69%
# adicionando tepo de preparo do pacote : 74% 
# Depois de adcionar dia util, comprador e vendedor do mesmo estado, estado dos clientes : 75% ()

# Saída:
# Accuracy: 0.7506420133538778
# Confusion Matrix:
#  [[1492  426]
#  [ 545 1431]]
# Classification Report:
#                precision    recall  f1-score   support

#            0       0.73      0.78      0.75      1918
#            1       0.77      0.72      0.75      1976

#     accuracy                           0.75      3894
#    macro avg       0.75      0.75      0.75      3894
# weighted avg       0.75      0.75      0.75      3894

# Acurácias em cada fold: [0.74285714 0.75164835 0.73406593 0.80879121 0.77582418 0.73846154
#  0.76431718 0.77312775 0.7246696  0.79295154 0.76651982 0.75991189
#  0.76872247 0.75991189 0.78193833 0.78854626 0.74889868 0.78854626
#  0.75770925 0.75991189]
# Acurácia média: 0.764
# Desvio padrão da acurácia: 0.02

Accuracy: 0.748587570621469
Confusion Matrix:
 [[1493  425]
 [ 554 1422]]
Classification Report:
               precision    recall  f1-score   support

           0       0.73      0.78      0.75      1918
           1       0.77      0.72      0.74      1976

    accuracy                           0.75      3894
   macro avg       0.75      0.75      0.75      3894
weighted avg       0.75      0.75      0.75      3894

Acurácias em cada fold: [0.74945055 0.75384615 0.72967033 0.7978022  0.77802198 0.74505495
 0.76651982 0.77312775 0.73348018 0.79955947 0.76431718 0.75550661
 0.75991189 0.76211454 0.78193833 0.78854626 0.74669604 0.78854626
 0.76431718 0.75110132]
Acurácia média: 0.764
Desvio padrão da acurácia: 0.02
