In [1]:
import numpy as np
import pandas as pd
import random
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn import preprocessing
from xgboost.sklearn import XGBClassifier
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
import time

  import pandas.util.testing as tm


In [2]:
city = 'Curitiba'

data_path = '../../data/output/' + city + '/normalized_data_X_FE_gps_gtfs_waze.csv'
y_path = '../../data/output/' + city + '/y_FE.csv'

data = pd.read_csv(data_path)
y = pd.read_csv(y_path)
print(data.shape)
print(y.shape)
data.head()

(6211570, 96)
(6211570, 1)


Unnamed: 0,tripNum,shapeSequence,shapeLat,shapeLon,distanceTraveledShape,gpsPointId,gpsLat,gpsLon,distanceToShapePoint,stopPointId,...,alertSubtype_code,alertType_code,alertRoadType_code,jamBlockType_code,busCodeSB_code,alertSubtypeSB_code,alertTypeSB_code,alertRoadTypeSB_code,jamBlockTypeSB_code,problemSB_code
0,0.0,0.899945,0.916733,0.788385,0.014696,0.0,0.348779,0.517589,0.0,0.550342,...,0.814815,0.666667,0.25,0.25,0.015395,0.814815,0.666667,0.25,0.333333,0.0
1,0.0,0.899947,0.921318,0.772879,0.022706,0.0,0.350518,0.513454,0.0,0.5501,...,0.814815,0.666667,0.25,0.25,0.015395,0.814815,0.666667,0.25,0.333333,0.0
2,0.0,0.899949,0.927984,0.771251,0.028732,0.0,0.353047,0.51302,0.0,0.65371,...,0.814815,0.666667,0.25,0.25,0.015395,0.814815,0.666667,0.25,0.333333,0.0
3,0.0,0.899954,0.938787,0.776257,0.037058,0.012792,0.3571,0.514376,4.8e-05,0.653771,...,0.814815,0.666667,0.25,0.25,0.015395,0.814815,0.666667,0.25,0.333333,0.333333
4,0.0,0.899958,0.948479,0.772553,0.044445,0.0,0.360821,0.513367,0.0,0.653831,...,0.814815,0.666667,0.25,0.25,0.015395,0.814815,0.666667,0.25,0.333333,0.0


In [13]:
# same data of rvm test
data_size = 5000

data = data[0:data_size]
y = y[0:data_size]
print(data.shape)
print(y.shape)

(5000, 420)
(5000, 1)


In [3]:
# FILTERING HIGHER HEADWAYS (2% of the data)
# two_hours = 120
# data = data[data.headway <= two_hours]

In [3]:
target_col = ['headway']
bb_col = ['busBunching']
hd_threshold = ["headwayThreshold"]
features = list(set(list(data.columns))-set(target_col)-set(bb_col)-set(hd_threshold))

In [3]:
# Making training and test data: 80% Training, 20% Test
random.seed(15) #to get always the same set
train_X, test_X, train_Y, test_Y = train_test_split(data, y, test_size=0.20, random_state=7)

In [4]:
test_X = pd.read_csv('../../data/output/' + city + '/test_X.csv')
test_Y = pd.read_csv('../../data/output/' + city + '/test_Y.csv')

In [4]:
del data
del y

### Ensemble Model

### Majoriting Voting: Random Forest, CatBoost, XGBoost

In [4]:
#rf = joblib.load('../RandomForest/Models/Saved_' + city + '_RF_25_15_09_gps_gtfs.pkl') #cityA
rf = joblib.load('../RandomForest/Models/Saved_' + city + '_RF_25_5_05_gps_gtfs_waze.pkl')
catBoost = joblib.load('../CatBoost/Models/Saved_' + city + '_CatBoost_GRID_gps_gtfs_waze.pkl')
xgb = joblib.load('../XGBoost/Models/Saved_' + city + '_XGBoost_GRID_gps_gtfs_waze.pkl')

In [7]:
#prediction time

one_test = test_X.iloc[[0], :]
one_test

Unnamed: 0,tripNum,shapeSequence,shapeLat,shapeLon,distanceTraveledShape,gpsPointId,gpsLat,gpsLon,distanceToShapePoint,stopPointId,...,alertSubtype_code,alertType_code,alertRoadType_code,jamBlockType_code,busCodeSB_code,alertSubtypeSB_code,alertTypeSB_code,alertRoadTypeSB_code,jamBlockTypeSB_code,problemSB_code
1920043,0.340909,0.593839,0.58512,0.893964,0.014765,0.0,0.222986,0.545744,0.0,0.606325,...,0.814815,0.666667,0.25,0.25,0.273762,0.814815,0.666667,0.25,0.333333,0.0


In [5]:
start = time.time() 

rf_pred = rf.predict(test_X) #one_test
cat_pred = catBoost.predict(test_X) #one_test
xgb_pred = xgb.predict(test_X) #one_test

In [9]:
# Optional
# Saving to calculate statistical test for QP1
preds_rf_pd = pd.DataFrame(rf_pred, columns=['pred'])
preds_rf_pd.to_csv('outputs/rf_pred_q2_' + city + '.csv', index=False)

preds_cat_pd = pd.DataFrame(cat_pred, columns=['pred'])
preds_cat_pd.to_csv('outputs/cat_pred_q2_' + city + '.csv', index=False)

preds_xgb_pd = pd.DataFrame(xgb_pred, columns=['pred'])
preds_xgb_pd.to_csv('outputs/xgb_pred_q2_' + city + '.csv', index=False)

In [6]:
final_predictions = []

for i in range(0, len(rf_pred)):
    temp_preds = []
    temp_preds.append(rf_pred[i])
    temp_preds.append(cat_pred[i])
    temp_preds.append(xgb_pred[i])
    
    final_predictions.append(
        max(temp_preds,key=temp_preds.count)
    )
    
end = time.time()
#print("Prediction execution time: " + str(end - start) + " sec")

In [7]:
def get_quality(bb_pred):
    #headway
    #alpha = y.headwayThreshold[test_Y.index]
    
    #bb_pred = np.less_equal(pred, alpha)
    #bb_label = np.less_equal(test_Y, alpha)
    
    #rmse = np.sqrt(mean_squared_error(test_Y, pred))
    #print('RMSE: ' + str(rmse))
    
    bb_label = test_Y
    
    # Bus Bunching
    print("Accuracy: " + str(accuracy_score(bb_label, bb_pred)))
    print("Precision: " + str(precision_score(bb_label, bb_pred)))
    print("Recall: " + str(recall_score(bb_label, bb_pred)))
    print("F-measure: " + str(f1_score(bb_label, bb_pred)))

Experiments after find parameters with GridSearch in 5% of the data

Curitiba

In [8]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q4/ens_pred_q4_gps_gtfs_' + city + '.csv', index=False)

get_quality(final_predictions) #_gps_gtfs

Accuracy: 0.9799301947816735
Precision: 0.8863489361702128
Recall: 0.7404200258783716
F-measure: 0.8068392224916524


In [8]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q4/ens_pred_q4_gps_gtfs_weather_' + city + '.csv', index=False)

get_quality(final_predictions) #_gps_gtfs_weather

Accuracy: 0.9799084611458939
Precision: 0.886375636592802
Recall: 0.7399508026560878
F-measure: 0.8065716057036577


In [8]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q4/ens_pred_q4_gps_gtfs_waze_' + city + '.csv', index=False)

get_quality(final_predictions) #_gps_gtfs_waze

Accuracy: 0.9793095787377426
Precision: 0.8855824563222562
Recall: 0.7286610075502282
F-measure: 0.7994945240101095


In [12]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q3/ens_pred_q3_5p_' + city + '.csv', index=False)

get_quality(final_predictions) #5%

Accuracy: 0.9437372516127163
Precision: 0.5765829501238061
Recall: 0.023176783403716816
F-measure: 0.044562305210782444


In [17]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q3/ens_pred_q3_25p_' + city + '.csv', index=False)

get_quality(final_predictions) #25%

Accuracy: 0.9576733418443324
Precision: 0.818576762889559
Recall: 0.32417637105603664
F-measure: 0.4644279443069433


In [22]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q3/ens_pred_q3_50p_' + city + '.csv', index=False)

get_quality(final_predictions) #50%

Accuracy: 0.9696896275820767
Precision: 0.8969723477669242
Recall: 0.5248759402238052
F-measure: 0.6622354977889006


In [27]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q3/ens_pred_q3_75p_' + city + '.csv', index=False)

get_quality(final_predictions) #75%

Accuracy: 0.9834679477169218
Precision: 0.9261979353910944
Recall: 0.7692701446060658
F-measure: 0.8404716409563313


In [12]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/ens_pred_q1_' + city + '.csv', index=False)

get_quality(final_predictions) #100% grid 5% Curitiba

Accuracy: 0.9791389294493985
Precision: 0.8847346627626951
Recall: 0.7261016081559527
F-measure: 0.797607147319755


In [9]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/ens_pred_q1_' + city + '.csv', index=False)

get_quality(final_predictions) #100% grid 3% Curitiba

Accuracy: 0.977140239907141
Precision: 0.8835528723015001
Recall: 0.6867010763696342
F-measure: 0.7727880053444705


City A

In [8]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q4/ens_pred_q4_gps_gtfs_' + city + '.csv', index=False)

get_quality(final_predictions) #_gps_gtfs

Accuracy: 0.9202972145731467
Precision: 0.9202712805724661
Recall: 0.4467169556989916
F-measure: 0.6014693820567859


In [8]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q4/ens_pred_q4_gps_gtfs_weather_' + city + '.csv', index=False)

get_quality(final_predictions) #_gps_gtfs_weather

Accuracy: 0.9195538291330311
Precision: 0.9215398816311017
Recall: 0.4399512662985717
F-measure: 0.5955714636628416


In [9]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q4/ens_pred_q4_gps_gtfs_waze_' + city + '.csv', index=False)

get_quality(final_predictions) #_gps_gtfs_waze

Accuracy: 0.9189046756501132
Precision: 0.9218964853418404
Recall: 0.43448168597869197
F-measure: 0.590612777053455


In [9]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q3/ens_pred_q3_5p_' + city + '.csv', index=False)

get_quality(final_predictions) #5%

Accuracy: 0.869701633702932
Precision: 0.7430582714118107
Recall: 0.049252145060528295
F-measure: 0.09238099868721739


In [14]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q3/ens_pred_q3_25p_' + city + '.csv', index=False)

get_quality(final_predictions) #25%

Accuracy: 0.8771110575966663
Precision: 0.7763546798029557
Recall: 0.12256007465588303
F-measure: 0.21169991268721874


In [19]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q3/ens_pred_q3_52p_' + city + '.csv', index=False)

get_quality(final_predictions) #52%

Accuracy: 0.9106157534892
Precision: 0.8777091587042647
Recall: 0.3905176659667678
F-measure: 0.5405356918605695


In [24]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/q3/ens_pred_q3_75p_' + city + '.csv', index=False)

get_quality(final_predictions) #75%

Accuracy: 0.9181228994126208
Precision: 0.9393710399348951
Recall: 0.41890245483059857
F-measure: 0.5794191466475439


In [13]:
preds_pd = pd.DataFrame(final_predictions, columns=['pred'])
preds_pd.to_csv('outputs/ens_pred_q1_' + city + '.csv', index=False)

get_quality(final_predictions) #100% grid CityA

Accuracy: 0.9183916349942588
Precision: 0.9223371136313098
Recall: 0.43007491510485524
F-measure: 0.5866171660921065


In [8]:
get_quality(final_predictions) #gps_gtfs_waze Curitiba

Accuracy: 0.9804582416361725
Precision: 0.881313549498228
Recall: 0.7567148686885922
F-measure: 0.8142753318287879


In [9]:
get_quality(final_predictions) #gps_gtfs_weather Curitiba

Accuracy: 0.9810933467706232
Precision: 0.8843647941181297
Recall: 0.7662130842184589
F-measure: 0.8210601697368621


In [8]:
get_quality(final_predictions) #gps_gtfs Curitiba

Accuracy: 0.9810522943474838
Precision: 0.8826213528719089
Recall: 0.7673505950603592
F-measure: 0.8209594365383006


In [11]:
get_quality(final_predictions) #30000

Accuracy: 0.9741666666666666
Precision: 0.9376498800959233
Recall: 0.7519230769230769
F-measure: 0.8345784418356457


In [20]:
get_quality(final_predictions) #25000

Accuracy: 0.9718
Precision: 0.9235880398671097
Recall: 0.702020202020202
F-measure: 0.7977044476327116


In [28]:
get_quality(final_predictions) #20000

Accuracy: 0.97025
Precision: 0.8767772511848341
Recall: 0.6654676258992805
F-measure: 0.7566462167689161


In [36]:
get_quality(final_predictions) #15000

Accuracy: 0.973
Precision: 0.9272727272727272
Recall: 0.5828571428571429
F-measure: 0.7157894736842105


In [11]:
get_quality(final_predictions) #10000

Accuracy: 0.983
Precision: 0.9032258064516129
Recall: 0.4745762711864407
F-measure: 0.6222222222222222


In [19]:
get_quality(final_predictions) #5000

Accuracy: 0.992
Precision: 0.75
Recall: 0.3
F-measure: 0.4285714285714285


In [8]:
accuracy = accuracy_score(test_Y, final_predictions)
precision = precision_score(test_Y, final_predictions)
recall = recall_score(test_Y, final_predictions)
f_measure = f1_score(test_Y, final_predictions)

In [12]:
#Recife: RF, CatBoost, XGBoost gps+gtfs+waze
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9516694761750202
Precision: 0.90644620492423
Recall: 0.7148041579179304
F-measure: 0.7992985303921853


In [9]:
#Recife: RF, CatBoost, XGBoost gps+gtfs+weather
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9526327361819305
Precision: 0.9059648667077962
Recall: 0.7232547891230526
F-measure: 0.8043647475999655


In [9]:
#Recife: RF, CatBoost, XGBoost gps+gtfs
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9528456306037476
Precision: 0.9054412525879917
Recall: 0.7255359411048034
F-measure: 0.8055663486307185


In [14]:
#Recife: RF, CatBoost, XGBoost 75p
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9534249826368894
Precision: 0.9362982431871628
Recall: 0.7018171449309174
F-measure: 0.8022757915635694


In [9]:
#Recife: RF, CatBoost, XGBoost 52p
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.906270613240637
Precision: 0.813345452601187
Recall: 0.394328226663556
F-measure: 0.531145251396648


In [19]:
#Recife: RF, CatBoost, XGBoost 25p
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.889539205729303
Precision: 0.7804680540934489
Recall: 0.24983798636493249
F-measure: 0.37850999489455284


In [24]:
#Recife: RF, CatBoost, XGBoost 5p
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.8700192302994133
Precision: 0.6177612994350282
Recall: 0.09070171345620448
F-measure: 0.15817906466852016


In [9]:
#Curitiba: RF, CatBoost, XGBoost
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9804928544635253
Precision: 0.8819922101599403
Recall: 0.7566579931464972
F-measure: 0.8145319292230453


In [8]:
#Random Forest, KNN, CatBoost, XGBoost
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9512401972588971
Precision: 0.9091453275690057
Recall: 0.7086606008761698
F-measure: 0.796480545398925


In [None]:
#Random Forest, CatBoost, XGBoost - 1000, to compare with rvm
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

In [9]:
#Random Forest, CatBoost, XGBoost - 3000, to compare with rvm
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9733333333333334
Precision: 0.9634146341463414
Recall: 0.8586956521739131
F-measure: 0.9080459770114943


In [16]:
#Random Forest, CatBoost, XGBoost - all data
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9518439797994604
Precision: 0.9075627487746307
Recall: 0.7151670684604816
F-measure: 0.7999594061702621


In [11]:
#Random Forest, KNN, XGBoost
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9497394660887106
Precision: 0.9118849666076053
Recall: 0.6937294242683464
F-measure: 0.7879867500920132


In [10]:
#Random Forest, Gradient Boosting, XGBoost
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9403023100789804
Precision: 0.9032754864397866
Recall: 0.6233507011950126
F-measure: 0.7376493504501603


## Old tests

### Stacking

#### 1. rf>xgboost>knn(final) 1 day
Accuracy: 0.9653001359376118, 0.9996422694426558, 0.9997853616655935, 0.9995707233311869, 0.9997138155541246, 0.9997853616655935
Precision: 0.9405241935483871, 0.42857142857142855, 1.0, 1.0, 0.75, 1.0
Recall: 0.6865342163355408, 0.75, 0.5, 0.25, 0.5, 0.4
F-measure: 0.7937048064653338, 0.5454545454545454, 0.6666666666666666, 0.4, 0.6, 0.5714285714285715

In [5]:
rf = joblib.load('../RandomForest/Saved_RF_100_5_08_BB_class.pkl')
rf_pred = rf.predict(train_X) #train_X

rf_pred



array([0., 1., 0., ..., 0., 0., 0.])

In [6]:
train_X['rf'] = rf_pred

In [6]:
train_X['rf'].head(5)

223035     0.0
704879     1.0
1412767    0.0
486302     0.0
203578     1.0
Name: rf, dtype: float64

In [8]:
rf_xgboost_stac = XGBClassifier(
     learning_rate =0.1,
     n_estimators=120, 
     max_depth=50,
     min_child_weight=1,
     gamma=0,
     subsample=0.8,
     colsample_bytree=0.8,
     objective= 'binary:logistic',
     nthread=-1,
     scale_pos_weight=1,
     seed=23)

rf_xgboost_stac.fit(train_X, train_Y)
xgb_pred = rf_xgboost_stac.predict(train_X)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


MemoryError: Unable to allocate 3.59 GiB for an array with shape (421, 1146106) and data type float64

In [None]:
train_X['xgboost'] = xgb_pred

In [10]:
rf_xgboost_knn_final = KNeighborsClassifier(n_jobs=-1)

rf_xgboost_knn_final.fit(train_X, train_Y)

  This is separate from the ipykernel package so we can avoid doing imports until


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=-1, n_neighbors=5, p=2,
                     weights='uniform')

In [11]:
rf_pred_test = rf.predict(test_X)
test_X['rf'] = rf_pred_test
xgb_pred_test = rf_xgboost_stac.predict(test_X)
test_X['xgboost'] = xgb_pred_test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [12]:
pred_array = rf_xgboost_knn_final.predict(test_X)
pred_array

array([0., 0., 0., ..., 0., 0., 0.])

In [13]:
# Bus Bunching
accuracy = accuracy_score(test_Y, pred_array)
precision = precision_score(test_Y, pred_array)
recall = recall_score(test_Y, pred_array)
f_measure = f1_score(test_Y, pred_array)

In [14]:
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9863346927094513
Precision: 0.9850498338870431
Recall: 0.8727005150846211
F-measure: 0.925477955520874


In [16]:
# Saving a pickle file for the model
joblib.dump(rf_xgboost_knn_final, 'Saved_rf_xgboost_knn_final.pkl')

['Saved_rf_xgboost_knn_final.pkl']

#### 1.1. rf>xgboost>catboost

In [11]:
rf_xgboost_cb_final = model = CatBoostClassifier(
    custom_loss=['Accuracy', 'Precision', 'Recall', 'F1'],
    iterations=10000,
    learning_rate=0.9,
    random_seed=42,
    depth=8,
    l2_leaf_reg=5,
    logging_level='Silent'
)

rf_xgboost_cb_final.fit(train_X, train_Y)


rf_pred_test = rf.predict(test_X)
test_X['rf'] = rf_pred_test
xgb_pred_test = rf_xgboost_stac.predict(test_X)
test_X['xgboost'] = xgb_pred_test


pred_array = rf_xgboost_cb_final.predict(test_X)
pred_array

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


array([0., 1., 0., ..., 0., 0., 0.])

In [12]:
# Bus Bunching
accuracy = accuracy_score(test_Y, pred_array)
precision = precision_score(test_Y, pred_array)
recall = recall_score(test_Y, pred_array)
f_measure = f1_score(test_Y, pred_array)

print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9517846485671507
Precision: 0.9034213098729228
Recall: 0.7187184073411619
F-measure: 0.8005543765429425


#### 1.2. rf>xgboost>knn>catboost

In [9]:
rf_xgboost_knn = KNeighborsClassifier(n_jobs=-1)
rf_xgboost_knn.fit(train_X, train_Y)
knn_pred = rf_xgboost_knn.predict(train_X)
train_X['knn'] = knn_pred


rf_xgboost_knn_cb_final = model = CatBoostClassifier(
    custom_loss=['Accuracy', 'Precision', 'Recall', 'F1'],
    iterations=10000,
    learning_rate=0.9,
    random_seed=42,
    depth=8,
    l2_leaf_reg=5,
    logging_level='Silent'
)

rf_xgboost_knn_cb_final.fit(train_X, train_Y)

rf_pred_test = rf.predict(test_X)
test_X['rf'] = rf_pred_test
xgb_pred_test = rf_xgboost_stac.predict(test_X)
test_X['xgboost'] = xgb_pred_test
knn_pred_test = rf_xgboost_knn.predict(test_X)
test_X['knn'] = knn_pred_test

pred_array = rf_xgboost_knn_cb_final.predict(test_X)
pred_array

  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pa

array([0., 1., 0., ..., 0., 0., 0.])

In [10]:
# Bus Bunching
accuracy = accuracy_score(test_Y, pred_array)
precision = precision_score(test_Y, pred_array)
recall = recall_score(test_Y, pred_array)
f_measure = f1_score(test_Y, pred_array)

print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9517811584946619
Precision: 0.9033130273316611
Recall: 0.7187961738859943
F-measure: 0.8005600946964228


#### 1.3. rf>xgboost>catboost>knn

In [9]:
rf_xgboost_cb = model = CatBoostClassifier(
    custom_loss=['Accuracy', 'Precision', 'Recall', 'F1'],
    iterations=10000,
    learning_rate=0.9,
    random_seed=42,
    depth=8,
    l2_leaf_reg=5,
    logging_level='Silent'
)
rf_xgboost_cb.fit(train_X, train_Y)
cb_pred = rf_xgboost_cb.predict(train_X)
train_X['cb'] = cb_pred


rf_xgboost_cb_knn = KNeighborsClassifier(n_jobs=-1)
rf_xgboost_cb_knn.fit(train_X, train_Y)


rf_pred_test = rf.predict(test_X)
test_X['rf'] = rf_pred_test
xgb_pred_test = rf_xgboost_stac.predict(test_X)
test_X['xgboost'] = xgb_pred_test
cb_pred_test = rf_xgboost_cb.predict(test_X)
test_X['cb'] = cb_pred_test

pred_array = rf_xgboost_cb_knn.predict(test_X)
pred_array

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: 

array([0., 1., 0., ..., 0., 0., 0.])

In [10]:
# Bus Bunching
accuracy = accuracy_score(test_Y, pred_array)
precision = precision_score(test_Y, pred_array)
recall = recall_score(test_Y, pred_array)
f_measure = f1_score(test_Y, pred_array)

print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9515857144352888
Precision: 0.9035577771243752
Recall: 0.7169297768100164
F-measure: 0.7994970080652156


#### 2. rf>knn>xgboost (MELHOR NA MENOR BASE)


In [4]:
rf = joblib.load('../RandomForest/Saved_RF_100_5_08_BB_class.pkl')
rf_pred = rf.predict(train_X) #train_X

rf_pred

array([0., 1., 0., ..., 0., 0., 0.])

In [7]:
train_X['rf'] = rf_pred

rf_knn = KNeighborsClassifier(n_jobs=-1)
rf_knn.fit(train_X, train_Y)
rf_knn_pred = rf_knn.predict(train_X)
train_X['knn'] = rf_knn_pred
#testes: diminuir n=100 e del preds

rf_knn_xgboost = XGBClassifier(
     learning_rate =0.1,
     n_estimators=120, 
     max_depth=50,
     min_child_weight=1,
     gamma=0,
     subsample=0.8,
     colsample_bytree=0.8,
     objective= 'binary:logistic',
     nthread=-1,
     scale_pos_weight=1,
     seed=23)
rf_knn_xgboost.fit(train_X, train_Y)
rf_knn_xgboost_pred = rf_knn_xgboost.predict(train_X)


rf_pred_test = rf.predict(test_X)
test_X['rf'] = rf_pred_test
rf_knn_pred_test = rf_knn.predict(test_X)
test_X['knn'] = rf_knn_pred_test

pred_array = rf_knn_xgboost.predict(test_X)
pred_array

  after removing the cwd from sys.path.
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


MemoryError: Unable to allocate array with shape (483656732,) and data type float32

In [None]:
# Bus Bunching
accuracy = accuracy_score(test_Y, pred_array)
precision = precision_score(test_Y, pred_array)
recall = recall_score(test_Y, pred_array)
f_measure = f1_score(test_Y, pred_array)

In [None]:
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

In [10]:
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure)) #1day

Accuracy: 0.9881948916076412
Precision: 0.9830097087378641
Recall: 0.8940397350993378
F-measure: 0.9364161849710982


#### 2.1 rf>catboost>xgboost

In [5]:
train_X['rf'] = rf_pred

rf_cb = CatBoostClassifier(
    custom_loss=['Accuracy', 'Precision', 'Recall', 'F1'],
    iterations=10000,
    learning_rate=0.9,
    random_seed=42,
    depth=8,
    l2_leaf_reg=5,
    logging_level='Silent'
)
rf_cb.fit(train_X, train_Y)
rf_cb_pred = rf_cb.predict(train_X)
train_X['cb'] = rf_cb_pred
#testes: diminuir n=100 e del preds

rf_cb_xgboost = XGBClassifier(
     learning_rate =0.1,
     n_estimators=120, 
     max_depth=50,
     min_child_weight=1,
     gamma=0,
     subsample=0.8,
     colsample_bytree=0.8,
     objective= 'binary:logistic',
     nthread=-1,
     scale_pos_weight=1,
     seed=23)
rf_cb_xgboost.fit(train_X, train_Y)
rf_cb_xgboost_pred = rf_cb_xgboost.predict(train_X)


rf_pred_test = rf.predict(test_X)
test_X['rf'] = rf_pred_test
rf_cb_pred_test = rf_cb.predict(test_X)
test_X['cb'] = rf_cb_pred_test

pred_array = rf_cb_xgboost.predict(test_X)
pred_array

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,c

array([0., 1., 0., ..., 0., 0., 0.])

In [6]:
# Bus Bunching
accuracy = accuracy_score(test_Y, pred_array)
precision = precision_score(test_Y, pred_array)
recall = recall_score(test_Y, pred_array)
f_measure = f1_score(test_Y, pred_array)

print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9514740321156471
Precision: 0.8999254384543067
Recall: 0.7195997615159292
F-measure: 0.799723438580318


#### 3. knn>xgboost>rf

In [5]:
knn = KNeighborsClassifier(n_jobs=-1)
knn.fit(train_X, train_Y)
knn_pred = knn.predict(train_X)
train_X['knn'] = knn_pred


knn_xgboost = XGBClassifier(
     learning_rate =0.1,
     n_estimators=120, 
     max_depth=50,
     min_child_weight=1,
     gamma=0,
     subsample=0.8,
     colsample_bytree=0.8,
     objective= 'binary:logistic',
     nthread=-1,
     scale_pos_weight=1,
     seed=23)
knn_xgboost.fit(train_X, train_Y)
knn_xgboost_pred = knn_xgboost.predict(train_X)
train_X['xgboost'] = knn_xgboost_pred


knn_xgboost_rf = RandomForestClassifier(n_estimators=100, min_samples_split=5,
                           max_features=0.8, n_jobs=-1)
knn_xgboost_rf.fit(train_X, train_Y)
knn_xgboost_rf_pred = knn_xgboost_rf.predict(train_X)


knn_pred_test = knn.predict(test_X)
test_X['knn'] = knn_pred_test
knn_xgboost_pred_test = knn_xgboost.predict(test_X)
test_X['xgboost'] = knn_xgboost_pred_test

pred_array = knn_xgboost_rf.predict(test_X)
pred_array

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


array([0., 0., 0., ..., 0., 0., 0.])

In [6]:
# Bus Bunching
accuracy = accuracy_score(test_Y, pred_array)
precision = precision_score(test_Y, pred_array)
recall = recall_score(test_Y, pred_array)
f_measure = f1_score(test_Y, pred_array)

In [7]:
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9580024325677899
Precision: 0.8641509433962264
Recall: 0.6740250183958794
F-measure: 0.7573377428689542


####  4. xgboost>rf>knn

In [4]:
xgboost = XGBClassifier(
     learning_rate =0.1,
     n_estimators=120, 
     max_depth=50,
     min_child_weight=1,
     gamma=0,
     subsample=0.8,
     colsample_bytree=0.8,
     objective= 'binary:logistic',
     nthread=-1,
     scale_pos_weight=1,
     seed=23)
xgboost.fit(train_X, train_Y)
xgboost_pred = xgboost.predict(train_X)
train_X['xgboost'] = xgboost_pred


xgboost_rf = RandomForestClassifier(n_estimators=100, min_samples_split=5,
                           max_features=0.8, n_jobs=-1)
xgboost_rf.fit(train_X, train_Y)
xgboost_rf_pred = xgboost_rf.predict(train_X)
train_X['rf'] = xgboost_rf_pred


xgboost_rf_knn = KNeighborsClassifier(n_jobs=-1)
xgboost_rf_knn.fit(train_X, train_Y)
xgboost_rf_knn_pred = xgboost_rf_knn.predict(train_X)


xgboost_pred_test = xgboost.predict(test_X)
test_X['xgboost'] = xgboost_pred_test
xgboost_rf_pred_test = xgboost_rf.predict(test_X)
test_X['rf'] = xgboost_rf_pred_test

pred_array = xgboost_rf_knn.predict(test_X)
pred_array

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

array([0., 0., 0., ..., 0., 0., 0.])

In [5]:
# Bus Bunching
accuracy = accuracy_score(test_Y, pred_array)
precision = precision_score(test_Y, pred_array)
recall = recall_score(test_Y, pred_array)
f_measure = f1_score(test_Y, pred_array)

In [6]:
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9649424053802675
Precision: 0.9263984298331698
Recall: 0.6946284032376747
F-measure: 0.7939444911690496


In [18]:
#todos 1.
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

Accuracy: 0.9508562892851284
Precision: 0.9031334342702916
Recall: 0.711278741218861
F-measure: 0.7958062036862483


#### 4.1 xgboost>rf>catboost

In [None]:
xgboost = XGBClassifier(
     learning_rate =0.1,
     n_estimators=120, 
     max_depth=50,
     min_child_weight=1,
     gamma=0,
     subsample=0.8,
     colsample_bytree=0.8,
     objective= 'binary:logistic',
     nthread=-1,
     scale_pos_weight=1,
     seed=23)
xgboost.fit(train_X, train_Y)
xgboost_pred = xgboost.predict(train_X)
train_X['xgboost'] = xgboost_pred


xgboost_rf = RandomForestClassifier(n_estimators=100, min_samples_split=5,
                           max_features=0.8, n_jobs=-1)
xgboost_rf.fit(train_X, train_Y)
xgboost_rf_pred = xgboost_rf.predict(train_X)
train_X['rf'] = xgboost_rf_pred


xgboost_rf_cb = CatBoostClassifier(
    custom_loss=['Accuracy', 'Precision', 'Recall', 'F1'],
    iterations=10000,
    learning_rate=0.9,
    random_seed=42,
    depth=8,
    l2_leaf_reg=5,
    logging_level='Silent'
)
xgboost_rf_cb.fit(train_X, train_Y)
xgboost_rf_cb_pred = xgboost_rf_cb.predict(train_X)


xgboost_pred_test = xgboost.predict(test_X)
test_X['xgboost'] = xgboost_pred_test
xgboost_rf_pred_test = xgboost_rf.predict(test_X)
test_X['rf'] = xgboost_rf_pred_test

pred_array = xgboost_rf_cb.predict(test_X)
pred_array

In [None]:
# Bus Bunching
accuracy = accuracy_score(test_Y, pred_array)
precision = precision_score(test_Y, pred_array)
recall = recall_score(test_Y, pred_array)
f_measure = f1_score(test_Y, pred_array)

In [None]:
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

#### 4.2 xgboost>rf>catboost>knn

In [None]:
xgboost = XGBClassifier(
     learning_rate =0.1,
     n_estimators=120, 
     max_depth=50,
     min_child_weight=1,
     gamma=0,
     subsample=0.8,
     colsample_bytree=0.8,
     objective= 'binary:logistic',
     nthread=-1,
     scale_pos_weight=1,
     seed=23)
xgboost.fit(train_X, train_Y)
xgboost_pred = xgboost.predict(train_X)
train_X['xgboost'] = xgboost_pred


xgboost_rf = RandomForestClassifier(n_estimators=100, min_samples_split=5,
                           max_features=0.8, n_jobs=-1)
xgboost_rf.fit(train_X, train_Y)
xgboost_rf_pred = xgboost_rf.predict(train_X)
train_X['rf'] = xgboost_rf_pred


xgboost_rf_cb = CatBoostClassifier(
    custom_loss=['Accuracy', 'Precision', 'Recall', 'F1'],
    iterations=10000,
    learning_rate=0.9,
    random_seed=42,
    depth=8,
    l2_leaf_reg=5,
    logging_level='Silent'
)
xgboost_rf_cb.fit(train_X, train_Y)
xgboost_rf_cb_pred = xgboost_rf_cb.predict(train_X)
train_X['cat'] = xgboost_rf_cb_pred


xgboost_rf_cb_knn = KNeighborsClassifier(n_jobs=-1)
xgboost_rf_cb_knn.fit(train_X, train_Y)
xgboost_rf_knn_pred = xgboost_rf_cb_knn.predict(train_X)


xgboost_pred_test = xgboost.predict(test_X)
test_X['xgboost'] = xgboost_pred_test
xgboost_rf_pred_test = xgboost_rf.predict(test_X)
test_X['rf'] = xgboost_rf_pred_test
xgboost_rf_cb_pred_test = xgboost_rf_cb.predict(test_X)
test_X['cat'] = xgboost_rf_cb_pred_test

pred_array = xgboost_rf_cb_knn.predict(test_X)
pred_array

In [None]:
# Bus Bunching
accuracy = accuracy_score(test_Y, pred_array)
precision = precision_score(test_Y, pred_array)
recall = recall_score(test_Y, pred_array)
f_measure = f1_score(test_Y, pred_array)

In [None]:
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure))

### Boosting

In [None]:
# check predictions and add weights
def checkPredictions(pred):
    weights = []
    for i in range(0, len(pred)):
        if (pred[i] == train_Y.values[i][0]): # prediction correct
            weights.append(0.1)
        else:
            if (pred[i] == 0.0): # false negative
                weights.append(0.5)
            else: # false positive
                weights.append(0.4)
                
    return weights

#### 1. rf>xgboost>knn>catboost

In [None]:
xgboost = XGBClassifier(
     learning_rate =0.1,
     n_estimators=120, 
     max_depth=50,
     min_child_weight=1,
     gamma=0,
     subsample=0.8,
     colsample_bytree=0.8,
     objective= 'binary:logistic',
     nthread=-1,
     scale_pos_weight=1,
     seed=23)

xgboost.fit(train_X, train_Y)
xgb_pred = xgboost.predict(train_X)

train_X['xgboost'] = xgb_pred

In [None]:
xgb_rf = RandomForestClassifier(n_estimators=100, min_samples_split=5,
                                            max_features=0.8, n_jobs=-1)
xgb_rf.fit(train_X, train_Y, checkPredictions(xgb_pred))
rf_pred = xgb_rf.predict(train_X)

train_X['rf'] = rf_pred

In [None]:
xgb_rf_cb = model = CatBoostClassifier(
    custom_loss=['Accuracy', 'Precision', 'Recall', 'F1'],
    iterations=10000,
    learning_rate=0.9,
    random_seed=42,
    depth=8,
    l2_leaf_reg=5,
    logging_level='Silent'
)

xgb_rf_cb.fit(train_X, train_Y, sample_weight=checkPredictions(rf_pred))
cat_pred = xgb_rf_cb.predict(train_X)

train_X['cat'] = cat_pred

In [None]:
xgb_rf_cb_knn = KNeighborsClassifier(n_jobs=-1)
xgb_rf_cb_knn.fit(train_X, train_Y)

In [None]:
xgboost_pred_test = xgboost.predict(test_X)
test_X['xgboost'] = xgboost_pred_test

xgb_rf_pred_test = xgb_rf.predict(test_X)
test_X['rf'] = xgb_rf_pred_test

xgb_rf_cb_pred_test = xgb_rf_cb.predict(test_X)
test_X['cat'] = xgb_rf_cb_pred_test

pred_array = xgb_rf_cb_knn.predict(test_X)
#pred_array = xgb_rf_cb.predict(test_X) #sem knn
pred_array

In [None]:
# Bus Bunching
accuracy = accuracy_score(test_Y, pred_array)
precision = precision_score(test_Y, pred_array)
recall = recall_score(test_Y, pred_array)
f_measure = f1_score(test_Y, pred_array)

In [None]:
# boosting: xgb > rf (peso) > cat (peso)
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure)) #0.79

In [None]:
# boosting: xgb > rf (peso) > cat (peso) > knn
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure)) #0.79

In [None]:
# boosting: rf (sem peso) > xgb (com peso, kernel dead) > knn (n tem peso) > cat (com peso)
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F-measure: " + str(f_measure)) #0.8