# Model preparation

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os

In [2]:
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import mean_squared_error, r2_score, precision_score, confusion_matrix,classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC

defining which columns are to be kept from the complete feature data frame

In [3]:
remove_features = ['person','time','amount', 'offer_id', 'time_received', 'time_viewed', 'mobile', 'email', 'social', 'web','converted','delta_time_reception_viewed','prev_person',
       'delta_time_viewed_completion','gender','O','time_completed', 'reward', 'potential_reward', 'duration','offer_type', 'difficulty','is_transaction', ]
kept_features = [ 'person',
       'age', 'income', 'F', 'M', 'member_since_month','viewed',  'is_completed',
       'count_offers_completed', 'count_offers_viewed', 'count_transactions','avg_spending', 'avg_reward',  'delta_time_reception_viewed_avg',
       'delta_time_viewed_completion_avg', 'bogo','discount', 'informational']

read in features after feature engineering

In [4]:
data_dir = './data'

In [5]:
features = pd.read_csv(os.path.join(data_dir, 'features.csv'),index_col=0)

for the first test we will only look at bogo data to check some model performance. if we only want to run the demographic data we can switch it for the processing here

In [6]:
run_demographic_only = False

In [7]:
df = features[features.bogo.isnull()==False].copy()
df.replace([np.inf, -np.inf], np.nan,inplace=True)
df = df[kept_features]

# remove columns not wanted
df = df.drop(['person','discount','informational','is_completed'],axis=1)
if run_demographic_only:
    demographic = ['age','income','F','M','member_since_month','bogo']
    df = df[demographic].copy()
df.dropna(inplace=True)

In [8]:
df.columns

Index([u'age', u'income', u'F', u'M', u'member_since_month', u'viewed',
       u'count_offers_completed', u'count_offers_viewed',
       u'count_transactions', u'avg_spending', u'avg_reward',
       u'delta_time_reception_viewed_avg', u'delta_time_viewed_completion_avg',
       u'bogo'],
      dtype='object')

In order to get an overview of which models might deliver good results lets look at basic models without any optimization. For this i chose sklearn to create confusion matrices and classification reports to get an idea on which model performs how. the metrics are run on test data

sacling the data by ```MinMaxScaler```

In [9]:
scaler = MinMaxScaler()
scaled = pd.DataFrame(scaler.fit_transform(df))

  return self.partial_fit(X, y)


splitting into train and test set. training set will be split further into train and validation set

In [10]:
df_y = scaled.iloc[:,-1:]
df_X = scaled.iloc[:,:-1]
print('length of df_y: {}, \nlength of df_X: {}'.format(len(df_y),len(df_X)))
X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.3, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.3, random_state=42)
print('train: {},\nvalid: {},\ntest: {}'.format(len(y_train),len(y_valid),len(y_test)))

length of df_y: 16717, 
length of df_X: 16717
train: 8190,
valid: 3511,
test: 5016


## Testing different models

checking ```LogisticRegression```

In [12]:
lm = linear_model.LogisticRegression()
lm.fit(X_train.values,y_train.values.squeeze())
y_pred = lm.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[1135  904]
 [ 606 2371]]
              precision    recall  f1-score   support

         0.0       0.65      0.56      0.60      2039
         1.0       0.72      0.80      0.76      2977

    accuracy                           0.70      5016
   macro avg       0.69      0.68      0.68      5016
weighted avg       0.69      0.70      0.69      5016



checking different parameters for ``support vector classifiers``

In [None]:
for kernel in ['rbf', 'poly']:
    for gamma in [1, 5]:
        print(kernel)
        svm = SVC(kernel=kernel, gamma=gamma)
        svm.fit(X_train.values,y_train.values.squeeze())
        y_pred = svm.predict(X_test)
        print(confusion_matrix(y_test,y_pred))
        print(classification_report(y_test,y_pred))

testing for ``stochastic gradient decent classifier``

In [None]:
sgdc = SGDClassifier()
sgdc.fit(X_train.values,y_train.values.squeeze())
y_pred = svm.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

testing ``random forest classifier``

In [None]:
from sklearn.ensemble import RandomForestClassifier
forest = RandomForestClassifier(n_estimators=19)
forest.fit(X_train.values,y_train.values.squeeze())
y_pred = forest.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

testing ``xgboost``

In [None]:
#! pip install xgboost

xgboost does not seem to install in the aws environment

In [None]:
#from xgboost import XGBClassifier
#xgb = XGBClassifier(gamma=6)
#xgb.fit(X_train.values,y_train.values.squeeze())
#y_pred = xgb.predict(X_test.values)
#print(confusion_matrix(y_test,y_pred))
#print(classification_report(y_test,y_pred))

# Model transferal to Sagemaker and hyperparameter tuning

without any further configuration, the models perform quite similarly. xgboost model had the highest rates regarding precision and recall. lets quickly recap what precision and recall are:  
recall = TP /(TP + FN)  
precision = TP / (TP + FP)

In [11]:
import sagemaker
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.predictor import csv_serializer

session = sagemaker.Session()
role = sagemaker.get_execution_role()

No handlers could be found for logger "sagemaker"


uploading the data to an S3 bucket to be able to work with the data in sagemaker

In [12]:
data_dir

'./data'

we repeat the steps mentioned above to create the data which can be fed to the model

In [13]:
offer_types = ['bogo','discount','informational']
for offer_type in offer_types:
    df = features[features[offer_type].isnull()==False].copy()
    df.replace([np.inf, -np.inf], np.nan,inplace=True)
    df = df[kept_features]

    # we have to ensure that the colum 'bogo' has to be removed when processing the bogo offer trype. same procedure
    # when processing the other offer types
    remaining_offers = list(offer_types)
    remaining_offers.remove(offer_type)
    df = df.drop(['person','is_completed']+remaining_offers,axis=1)
    if run_demographic_only:
        demographic = ['age','income','F','M','member_since_month','bogo']
        df = df[demographic].copy()
    df.dropna(inplace=True)
    
    scaler = MinMaxScaler()
    scaled = pd.DataFrame(scaler.fit_transform(df))
    df_y = scaled.iloc[:,-1:]
    df_X = scaled.iloc[:,:-1]
    print('offer type: {} \nlength of df_y: {}, \nlength of df_X: {}'.format(offer_type, len(df_y),len(df_X)))
    X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.3, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3, random_state=42)
    print('train: {},\nvalid: {},\ntest: {}'.format(len(y_train),len(y_val),len(y_test)))
    
    # lets write the data to data directory
    X_test.to_csv(os.path.join(data_dir, '{}_test.csv'.format(offer_type)), header=False, index=False)
    y_test.to_csv(os.path.join(data_dir, '{}_y_test.csv'.format(offer_type)), header=False, index=False)
    pd.concat([y_val, X_val], axis=1).to_csv(os.path.join(data_dir, '{}_validation.csv'.format(offer_type)), header=False, index=False)
    pd.concat([y_train, X_train], axis=1).to_csv(os.path.join(data_dir, '{}_train.csv'.format(offer_type)), header=False, index=False)

offer type: bogo 
length of df_y: 16717, 
length of df_X: 16717
train: 8190,
valid: 3511,
test: 5016
offer type: discount 
length of df_y: 14088, 
length of df_X: 14088
train: 6902,
valid: 2959,
test: 4227
offer type: informational 
length of df_y: 8080, 
length of df_X: 8080
train: 3959,
valid: 1697,
test: 2424


lets write the S3 locations into a dictionary of the form {offer:{test:...,train...,validation}}

In [14]:
prefix = 'capstone_starbucks_20201106'
locations = {}
for offer_type in offer_types:
    usage_dict = {}
    for usage in ['train','test','validation']:
        location = session.upload_data(os.path.join(data_dir, '{}_{}.csv'.format(offer_type,usage)), key_prefix=prefix)
        usage_dict[usage] = location
        print(location)
    locations[offer_type] = usage_dict

s3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/bogo_train.csv
s3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/bogo_test.csv
s3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/bogo_validation.csv
s3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/discount_train.csv
s3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/discount_test.csv
s3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/discount_validation.csv
s3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/informational_train.csv
s3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/informational_test.csv
s3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/informational_validation.csv


In [15]:
locations

{'bogo': {'test': 's3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/bogo_test.csv',
  'train': 's3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/bogo_train.csv',
  'validation': 's3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/bogo_validation.csv'},
 'discount': {'test': 's3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/discount_test.csv',
  'train': 's3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/discount_train.csv',
  'validation': 's3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/discount_validation.csv'},
 'informational': {'test': 's3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/informational_test.csv',
  'train': 's3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/informational_train.csv',
  'validation': 's3://sagemaker-us-east-2-330335126841/capstone_starbucks_20201106/informational_validation.csv'}}

train an xgboost model

In [16]:
# As stated above, we use this utility method to construct the image name for the training container.
container = get_image_uri(session.boto_region_name, 'xgboost')

# Now that we know which container to use, we can construct the estimator object.
xgb = sagemaker.estimator.Estimator(container, # The name of the training container
                                    role,      # The IAM role to use (our current role in this case)
                                    train_instance_count=1, # The number of instances to use for training
                                    train_instance_type='ml.m4.xlarge', # The type of instance ot use for training
                                    output_path='s3://{}/{}/output'.format(session.default_bucket(), prefix),
                                                                        # Where to save the output (the model artifacts)
                                    sagemaker_session=session) # The current SageMaker session

In [17]:
xgb.set_hyperparameters(max_depth=5,
                        eta=0.2,
                        gamma=4,
                        min_child_weight=6,
                        subsample=0.8,
                        objective='reg:linear',
                        early_stopping_rounds=10,
                        num_round=200)

## just train one model plain

connect the right input data to sagemaker

In [18]:
offer_type = 'bogo'
train_location = locations[offer_type]['train']
val_location = locations[offer_type]['validation']
test_location = locations[offer_type]['test']

In [56]:
# This is a wrapper around the location of our train and validation data, to make sure that SageMaker
# knows our data is in csv format.
s3_input_train = sagemaker.s3_input(s3_data=train_location, content_type='csv')
s3_input_validation = sagemaker.s3_input(s3_data=val_location, content_type='csv')

xgb.fit({'train': s3_input_train, 'validation': s3_input_validation})

2020-11-06 14:02:33 Starting - Starting the training job...
2020-11-06 14:02:36 Starting - Launching requested ML instances......
2020-11-06 14:03:40 Starting - Preparing the instances for training...
2020-11-06 14:04:30 Downloading - Downloading input data...
2020-11-06 14:04:46 Training - Downloading the training image..[34mArguments: train[0m
[34m[2020-11-06:14:05:08:INFO] Running standalone xgboost training.[0m
[34m[2020-11-06:14:05:08:INFO] File size need to be processed in the node: 1.5mb. Available memory size in the node: 8469.97mb[0m
[34m[2020-11-06:14:05:08:INFO] Determined delimiter of CSV input is ','[0m
[34m[14:05:08] S3DistributionType set as FullyReplicated[0m
[34m[14:05:08] 8190x13 matrix with 106470 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[2020-11-06:14:05:08:INFO] Determined delimiter of CSV input is ','[0m
[34m[14:05:08] S3DistributionType set as FullyReplicated[0m
[34m[14:05:08] 3511x13 matrix with 4

In [57]:
xgb_transformer = xgb.transformer(instance_count = 1, instance_type = 'ml.m4.xlarge')
xgb_transformer.transform(test_location, content_type='text/csv', split_type='Line')
xgb_transformer.wait()

.............................[32m2020-11-06T14:10:29.446:[sagemaker logs]: MaxConcurrentTransforms=4, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD[0m
[34mArguments: serve[0m
[34m[2020-11-06 14:10:29 +0000] [1] [INFO] Starting gunicorn 19.7.1[0m
[35mArguments: serve[0m
[35m[2020-11-06 14:10:29 +0000] [1] [INFO] Starting gunicorn 19.7.1[0m
[34m[2020-11-06 14:10:29 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)[0m
[34m[2020-11-06 14:10:29 +0000] [1] [INFO] Using worker: gevent[0m
[34m[2020-11-06 14:10:29 +0000] [36] [INFO] Booting worker with pid: 36[0m
[34m[2020-11-06 14:10:29 +0000] [37] [INFO] Booting worker with pid: 37[0m
[34m[2020-11-06:14:10:29:INFO] Model loaded successfully for worker : 36[0m
[34m[2020-11-06:14:10:29:INFO] Model loaded successfully for worker : 37[0m
[34m[2020-11-06:14:10:29:INFO] Sniff delimiter as ','[0m
[34m[2020-11-06:14:10:29:INFO] Determined delimiter of CSV input is ','[0m
[34m[2020-11-06 14:10:29 +0000] [38] [INFO] Boot

In [23]:
def get_y_test(offer_type,usage):
    return pd.read_csv(os.path.join(data_dir,'{}_y_{}.csv'.format(offer_type,usage)),header=None)

In [58]:
!aws s3 cp --recursive $xgb_transformer.output_path $data_dir


download: s3://sagemaker-us-east-2-330335126841/xgboost-2020-11-06-14-05-45-916/bogo_test.csv.out to data/bogo_test.csv.out


In [59]:
y_pred = pd.read_csv(os.path.join(data_dir, 'bogo_test.csv.out'), header=None)
y_test = get_y_test('bogo','test')
print(confusion_matrix(y_test,y_pred.round()))
print(classification_report(y_test,y_pred.round()))

In [62]:
print(confusion_matrix(y_test,y_pred.round()))
print(classification_report(y_test,y_pred.round()))

[[1346  693]
 [ 516 2461]]
              precision    recall  f1-score   support

         0.0       0.72      0.66      0.69      2039
         1.0       0.78      0.83      0.80      2977

   micro avg       0.76      0.76      0.76      5016
   macro avg       0.75      0.74      0.75      5016
weighted avg       0.76      0.76      0.76      5016



## train one model with all offer types

In [38]:
trained_models

{'bogo': 'xgboost-2020-11-06-15-17-27-487',
 'discount': 'xgboost-2020-11-06-15-26-25-194',
 'informational': 'xgboost-2020-11-06-15-35-21-812'}

In [32]:
trained_models = {}
for offer_type in offer_types:
    train_location = locations[offer_type]['train']
    val_location = locations[offer_type]['validation']
    test_location = locations[offer_type]['test']
    
    s3_input_train = sagemaker.s3_input(s3_data=train_location, content_type='csv')
    s3_input_validation = sagemaker.s3_input(s3_data=val_location, content_type='csv')

    xgb.fit({'train': s3_input_train, 'validation': s3_input_validation})
    xgb_transformer = xgb.transformer(instance_count = 1, instance_type = 'ml.m4.xlarge')
    trained_models[offer_type] = xgb_transformer.model_name
    xgb_transformer.transform(test_location, content_type='text/csv', split_type='Line')
    xgb_transformer.wait()
    !aws s3 cp --recursive $xgb_transformer.output_path $data_dir

2020-11-06 15:17:28 Starting - Starting the training job...
2020-11-06 15:17:31 Starting - Launching requested ML instances......
2020-11-06 15:18:53 Starting - Preparing the instances for training......
2020-11-06 15:19:58 Downloading - Downloading input data
2020-11-06 15:19:58 Training - Downloading the training image...
2020-11-06 15:20:24 Uploading - Uploading generated training model[34mArguments: train[0m
[34m[2020-11-06:15:20:19:INFO] Running standalone xgboost training.[0m
[34m[2020-11-06:15:20:19:INFO] File size need to be processed in the node: 1.5mb. Available memory size in the node: 8477.59mb[0m
[34m[2020-11-06:15:20:19:INFO] Determined delimiter of CSV input is ','[0m
[34m[15:20:19] S3DistributionType set as FullyReplicated[0m
[34m[15:20:19] 8190x13 matrix with 106470 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[2020-11-06:15:20:19:INFO] Determined delimiter of CSV input is ','[0m
[34m[15:20:19] S3DistributionT

In [37]:
for offer_type in offer_types:
    print(offer_type)
    y_pred = pd.read_csv(os.path.join(data_dir, '{}_test.csv.out'.format(offer_type)), header=None)
    y_test = get_y_test(offer_type,'test')
    print(confusion_matrix(y_test,y_pred.round()))
    print(classification_report(y_test,y_pred.round()))

bogo
[[1331  708]
 [ 491 2486]]
              precision    recall  f1-score   support

         0.0       0.73      0.65      0.69      2039
         1.0       0.78      0.84      0.81      2977

   micro avg       0.76      0.76      0.76      5016
   macro avg       0.75      0.74      0.75      5016
weighted avg       0.76      0.76      0.76      5016

discount
[[ 497  572]
 [ 343 2815]]
              precision    recall  f1-score   support

         0.0       0.59      0.46      0.52      1069
         1.0       0.83      0.89      0.86      3158

   micro avg       0.78      0.78      0.78      4227
   macro avg       0.71      0.68      0.69      4227
weighted avg       0.77      0.78      0.77      4227

informational
[[1309  187]
 [ 636  292]]
              precision    recall  f1-score   support

         0.0       0.67      0.88      0.76      1496
         1.0       0.61      0.31      0.42       928

   micro avg       0.66      0.66      0.66      2424
   macro avg       

## train the model with hyper parameter tuning

In [64]:
from sagemaker.tuner import IntegerParameter, ContinuousParameter, HyperparameterTuner

xgb_hyperparameter_tuner = HyperparameterTuner(estimator = xgb, # The estimator object to use as the basis for the training jobs.
                                               objective_metric_name = 'validation:rmse', # The metric used to compare trained models.
                                               objective_type = 'Minimize', # Whether we wish to minimize or maximize the metric.
                                               max_jobs = 20, # The total number of models to train
                                               max_parallel_jobs = 3, # The number of models to train in parallel
                                               hyperparameter_ranges = {
                                                    'max_depth': IntegerParameter(3, 12),
                                                    'eta'      : ContinuousParameter(0.05, 0.5),
                                                    'min_child_weight': IntegerParameter(2, 8),
                                                    'subsample': ContinuousParameter(0.5, 0.9),
                                                    'gamma': ContinuousParameter(0, 10),
                                               })

In [65]:
# This is a wrapper around the location of our train and validation data, to make sure that SageMaker
# knows our data is in csv format.
s3_input_train = sagemaker.s3_input(s3_data=train_location, content_type='csv')
s3_input_validation = sagemaker.s3_input(s3_data=val_location, content_type='csv')

xgb_hyperparameter_tuner.fit({'train': s3_input_train, 'validation': s3_input_validation})

In [66]:
xgb_hyperparameter_tuner.wait()

................................................................................................................................................................................................................................................................................................!


In [67]:
xgb_hyperparameter_tuner.best_training_job()

u'xgboost-201106-1413-015-b8fd35ee'

u'xgboost-201106-1413-015-b8fd35ee'

In [25]:
xgb_attached.base_job_name

u'xgboost-201106-1413-015-b8fd35ee'

In [19]:
xgb_attached = sagemaker.estimator.Estimator.attach(u'xgboost-201106-1413-015-b8fd35ee')

2020-11-06 14:31:34 Starting - Preparing the instances for training
2020-11-06 14:31:34 Downloading - Downloading input data
2020-11-06 14:31:34 Training - Training image download completed. Training in progress.
2020-11-06 14:31:34 Uploading - Uploading generated training model
2020-11-06 14:31:34 Completed - Training job completed[34mArguments: train[0m
[34m[2020-11-06:14:31:22:INFO] Running standalone xgboost training.[0m
[34m[2020-11-06:14:31:22:INFO] Setting up HPO optimized metric to be : rmse[0m
[34m[2020-11-06:14:31:22:INFO] File size need to be processed in the node: 1.5mb. Available memory size in the node: 8480.8mb[0m
[34m[2020-11-06:14:31:22:INFO] Determined delimiter of CSV input is ','[0m
[34m[14:31:22] S3DistributionType set as FullyReplicated[0m
[34m[14:31:22] 8190x13 matrix with 106470 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[2020-11-06:14:31:22:INFO] Determined delimiter of CSV input is ','[0m
[34m[14:

In [26]:
xgb_transformer.model_name

u'xgboost-201106-1413-015-b8fd35ee'

In [20]:
#xgb_attached = sagemaker.estimator.Estimator.attach(xgb_hyperparameter_tuner.best_training_job())
xgb_transformer = xgb_attached.transformer(instance_count = 1, instance_type = 'ml.m4.xlarge')
xgb_transformer.transform(test_location, content_type='text/csv', split_type='Line')
xgb_transformer.wait()

...........................
[32m2020-11-06T14:50:55.380:[sagemaker logs]: MaxConcurrentTransforms=4, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD[0m
[34mArguments: serve[0m
[34m[2020-11-06 14:50:55 +0000] [1] [INFO] Starting gunicorn 19.7.1[0m
[34m[2020-11-06 14:50:55 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)[0m
[34m[2020-11-06 14:50:55 +0000] [1] [INFO] Using worker: gevent[0m
[34m[2020-11-06 14:50:55 +0000] [37] [INFO] Booting worker with pid: 37[0m
[34m[2020-11-06 14:50:55 +0000] [38] [INFO] Booting worker with pid: 38[0m
[34m[2020-11-06 14:50:55 +0000] [39] [INFO] Booting worker with pid: 39[0m
[34m[2020-11-06:14:50:55:INFO] Model loaded successfully for worker : 37[0m
[34m[2020-11-06:14:50:55:INFO] Model loaded successfully for worker : 38[0m
[34m[2020-11-06:14:50:55:INFO] Model loaded successfully for worker : 39[0m
[34m[2020-11-06 14:50:55 +0000] [40] [INFO] Booting worker with pid: 40[0m
[34m[2020-11-06:14:50:55:INFO] Sniff delimiter as ',

In [21]:
!aws s3 cp --recursive $xgb_transformer.output_path $data_dir

download: s3://sagemaker-us-east-2-330335126841/xgboost-201106-1413-015-b8fd35ee-2020-11-06-14-46-30-851/bogo_test.csv.out to data/bogo_test.csv.out


In [24]:
y_pred = pd.read_csv(os.path.join(data_dir, 'bogo_test.csv.out'), header=None)
y_test = get_y_test('bogo','test')
print(confusion_matrix(y_test,y_pred.round()))
print(classification_report(y_test,y_pred.round()))

[[1345  694]
 [ 492 2485]]
              precision    recall  f1-score   support

         0.0       0.73      0.66      0.69      2039
         1.0       0.78      0.83      0.81      2977

   micro avg       0.76      0.76      0.76      5016
   macro avg       0.76      0.75      0.75      5016
weighted avg       0.76      0.76      0.76      5016

