In [1]:
import pymysql
from sqlalchemy import create_engine
import pandas as pd
import getpass  # To get the password without showing the input
password = getpass.getpass()

 ········


In [2]:
connection_string = 'mysql+pymysql://root:' + password + '@localhost/sakila'
engine = create_engine(connection_string)
query = '''SELECT f.title, c.name AS category, rental_duration, rental_rate, length, rating
FROM film f
JOIN inventory i
USING (film_id)
JOIN rental r
USING (inventory_id)
JOIN film_category fc
USING (film_id)
JOIN category c
USING (category_id);'''

data = pd.read_sql_query(query, engine)
data.head(50)

data.isna().value_counts()

title  category  rental_duration  rental_rate  length  rating
False  False     False            False        False   False     16044
dtype: int64

In [3]:
import statistics as stats

grouped_data = data.groupby('title').agg({
    'rental_duration': 'mean',
    'rental_rate': 'mean',
    'length': 'mean',
    'category': lambda x: stats.mode(x)[0][0],
    'rating': lambda x: stats.mode(x)[0][0]
}).reset_index()

print(grouped_data)

                 title  rental_duration  rental_rate  length category rating
0     ACADEMY DINOSAUR              6.0         0.99    86.0        D      P
1       ACE GOLDFINGER              3.0         4.99    48.0        H      G
2     ADAPTATION HOLES              7.0         2.99    50.0        D      N
3     AFFAIR PREJUDICE              5.0         2.99   117.0        H      G
4          AFRICAN EGG              6.0         2.99   130.0        F      G
..                 ...              ...          ...     ...      ...    ...
953     YOUNG LANGUAGE              6.0         0.99   183.0        D      G
954         YOUTH KICK              4.0         0.99   179.0        M      N
955       ZHIVAGO CORE              6.0         0.99   105.0        H      N
956  ZOOLANDER FICTION              5.0         2.99   101.0        C      R
957          ZORRO ARK              3.0         4.99    50.0        C      N

[958 rows x 6 columns]


In [4]:
query = '''SELECT f.title, c.name AS category, rental_duration, rental_rate, length, rating
FROM film f
JOIN film_category fc
USING (film_id)
JOIN category c
USING (category_id)
WHERE film_id IN (
    SELECT f.film_id
    FROM film f
    LEFT JOIN inventory i
    ON f.film_id = i.film_id
    WHERE i.film_id IS NULL
);;'''

missing_data = pd.read_sql_query(query, engine)
missing_data.head(50)

Unnamed: 0,title,category,rental_duration,rental_rate,length,rating
0,ARK RIDGEMONT,Action,6,0.99,68,NC-17
1,FIREHOUSE VIETNAM,Action,7,0.99,103,G
2,SKY MIRACLE,Action,7,2.99,132,PG
3,ARGONAUTS TOWN,Animation,7,0.99,127,PG-13
4,FLOATS GARDEN,Animation,6,2.99,145,PG-13
5,SISTER FREDDY,Children,5,4.99,152,PG-13
6,WALLS ARTIST,Children,7,4.99,135,PG
7,ALICE FANTASIA,Classics,6,0.99,94,NC-17
8,TADPOLE PARK,Classics,6,2.99,155,PG
9,VOLUME HOUSE,Classics,7,4.99,132,PG


In [5]:
data = pd.concat([grouped_data, missing_data], axis=0)

In [6]:
data.head()

Unnamed: 0,title,rental_duration,rental_rate,length,category,rating
0,ACADEMY DINOSAUR,6.0,0.99,86.0,D,P
1,ACE GOLDFINGER,3.0,4.99,48.0,H,G
2,ADAPTATION HOLES,7.0,2.99,50.0,D,N
3,AFFAIR PREJUDICE,5.0,2.99,117.0,H,G
4,AFRICAN EGG,6.0,2.99,130.0,F,G


In [7]:
data.value_counts()

title             rental_duration  rental_rate  length  category  rating
ACADEMY DINOSAUR  6.0              0.99         86.0    D         P         1
PERFECT GROOVE    7.0              2.99         82.0    C         P         1
PARK CITIZEN      3.0              4.99         109.0   A         P         1
PARTY KNOCK       7.0              2.99         107.0   C         P         1
PAST SUICIDES     5.0              4.99         157.0   F         P         1
                                                                           ..
FROGMEN BREAKING  5.0              0.99         111.0   T         R         1
FRONTIER CABIN    6.0              4.99         183.0   N         P         1
FROST HEAD        5.0              0.99         82.0    C         P         1
FUGITIVE MAGUIRE  7.0              4.99         83.0    T         R         1
ZORRO ARK         3.0              4.99         50.0    C         N         1
Length: 1000, dtype: int64

In [8]:
data.isna().value_counts()

title  rental_duration  rental_rate  length  category  rating
False  False            False        False   False     False     1000
dtype: int64

In [9]:
data.sort_values(by='title')
X = data.drop(['title'], axis = 1)

In [10]:
query = '''SELECT title,
       CASE
           WHEN MONTH(r.rental_date) = 5 AND YEAR(r.rental_date) = 2005 THEN 1
           ELSE 0 END AS rented_in_May
FROM film f
JOIN inventory
USING (film_id)
JOIN rental r
USING (inventory_id);'''

data_target = pd.read_sql_query(query, engine)
data_target = data_target.groupby(['title']).agg({'rented_in_May':sum})

missing_films_query = ''' SELECT title 
FROM film 
WHERE film_id IN (
    SELECT f.film_id
    FROM film f
    LEFT JOIN inventory i
    ON f.film_id = i.film_id
    WHERE i.film_id IS NULL
);'''

missing_films = pd.read_sql_query(missing_films_query, engine)
missing_films['rented_in_May'] = 0
missing_films

Unnamed: 0,title,rented_in_May
0,ALICE FANTASIA,0
1,APOLLO TEEN,0
2,ARGONAUTS TOWN,0
3,ARK RIDGEMONT,0
4,ARSENIC INDEPENDENCE,0
5,BOONDOCK BALLROOM,0
6,BUTCH PANTHER,0
7,CATCH AMISTAD,0
8,CHINATOWN GLADIATOR,0
9,CHOCOLATE DUCK,0


In [11]:
data_target.reset_index(inplace = True)
missing_films.reset_index(inplace = True)

In [12]:
dt = pd.concat([data_target, missing_films], axis=0)

In [13]:
dt.drop(['index'], axis = 1)

Unnamed: 0,title,rented_in_May
0,ACADEMY DINOSAUR,2
1,ACE GOLDFINGER,0
2,ADAPTATION HOLES,1
3,AFFAIR PREJUDICE,2
4,AFRICAN EGG,1
...,...,...
37,TREASURE COMMAND,0
38,VILLAIN DESPERATE,0
39,VOLUME HOUSE,0
40,WAKE JAWS,0


In [14]:
dt = dt.sort_values(by='title')

In [15]:
y = dt

In [16]:
def simplify(x):
    if x >= 1:
        return 1
    else:
        return 0

y['rented_in_May'] = y['rented_in_May'].apply(simplify)

In [17]:
y['rented_in_May'].value_counts()

1    686
0    314
Name: rented_in_May, dtype: int64

In [18]:
y = y['rented_in_May']

In [19]:
y

0      1
1      0
2      1
3      1
4      1
      ..
953    0
954    0
955    1
956    1
957    1
Name: rented_in_May, Length: 1000, dtype: int64

In [20]:
X.dtypes

rental_duration    float64
rental_rate        float64
length             float64
category            object
rating              object
dtype: object

In [21]:
import numpy as np

X_num = X.select_dtypes(include = np.number)

In [22]:
# here we start scaling, so need to do train-test-split before
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2)

In [23]:
X_train_num = X_train.select_dtypes(include = np.number)
X_train_cat = X_train.select_dtypes(include=[np.object])


X_test_num = X_test.select_dtypes(include = np.number)
X_test_cat = X_test.select_dtypes(include=[np.object])



Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  X_train_cat = X_train.select_dtypes(include=[np.object])
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  X_test_cat = X_test.select_dtypes(include=[np.object])


In [24]:
from sklearn.preprocessing import MinMaxScaler

transformer = MinMaxScaler().fit(X_train_num) # need to keep transformer
X_train_normalized = transformer.transform(X_train_num)
X_train_norm = pd.DataFrame(X_train_normalized, columns=X_num.columns)
X_train_norm

Unnamed: 0,rental_duration,rental_rate,length
0,0.00,5.000000e-01,0.539568
1,0.50,5.000000e-01,0.870504
2,1.00,5.000000e-01,0.280576
3,0.25,1.000000e+00,0.093525
4,0.25,5.000000e-01,0.503597
...,...,...,...
745,0.75,0.000000e+00,0.115108
746,1.00,1.000000e+00,0.194245
747,0.00,5.000000e-01,0.661871
748,0.25,2.775558e-17,0.647482


In [25]:
transformer = MinMaxScaler().fit(X_test_num) # need to keep transformer
X_test_normalized = transformer.transform(X_test_num)
X_test_norm = pd.DataFrame(X_test_normalized, columns=X_num.columns)
X_test_norm

Unnamed: 0,rental_duration,rental_rate,length
0,0.75,5.000000e-01,0.892086
1,0.50,5.000000e-01,0.460432
2,0.50,1.000000e+00,0.302158
3,0.25,1.000000e+00,0.208633
4,0.50,5.000000e-01,0.633094
...,...,...,...
245,0.00,5.000000e-01,0.503597
246,0.50,2.775558e-17,0.625899
247,0.50,1.000000e+00,0.669065
248,0.75,2.775558e-17,0.359712


In [26]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(drop='first').fit(X_train_cat)

cols = encoder.get_feature_names_out(input_features=X_train_cat.columns)
X_train_encoded = pd.DataFrame(encoder.transform(X_train_cat).toarray(),columns=cols)
X_train_encoded

Unnamed: 0,category_Action,category_Animation,category_C,category_Children,category_Classics,category_Comedy,category_D,category_Documentary,category_Drama,category_F,...,category_Sci-Fi,category_Sports,category_T,category_Travel,rating_N,rating_NC-17,rating_P,rating_PG,rating_PG-13,rating_R
0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
746,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
747,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
748,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [27]:
X_test_cat

Unnamed: 0,category,rating
37,F,G
726,N,G
846,M,R
295,S,R
924,S,R
...,...,...
944,M,P
315,C,P
227,M,N
241,H,P


In [28]:
X_train_cat

Unnamed: 0,category,rating
673,C,N
107,A,R
898,N,N
441,N,R
439,S,P
...,...,...
534,G,N
584,C,P
493,S,G
527,C,P


In [29]:
cols = encoder.get_feature_names_out(input_features=X_test_cat.columns)
X_test_encoded = pd.DataFrame(encoder.transform(X_test_cat).toarray(),columns=cols)
X_test_encoded

Unnamed: 0,category_Action,category_Animation,category_C,category_Children,category_Classics,category_Comedy,category_D,category_Documentary,category_Drama,category_F,...,category_Sci-Fi,category_Sports,category_T,category_Travel,rating_N,rating_NC-17,rating_P,rating_PG,rating_PG-13,rating_R
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
246,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
247,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
248,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [30]:
X_train_transformed = pd.concat([X_train_norm, X_train_encoded], axis=1)

In [31]:
X_test_transformed = pd.concat([X_test_norm, X_test_encoded], axis=1)

In [32]:
X_train_transformed

Unnamed: 0,rental_duration,rental_rate,length,category_Action,category_Animation,category_C,category_Children,category_Classics,category_Comedy,category_D,...,category_Sci-Fi,category_Sports,category_T,category_Travel,rating_N,rating_NC-17,rating_P,rating_PG,rating_PG-13,rating_R
0,0.00,5.000000e-01,0.539568,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.50,5.000000e-01,0.870504,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,1.00,5.000000e-01,0.280576,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.25,1.000000e+00,0.093525,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.25,5.000000e-01,0.503597,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,0.75,0.000000e+00,0.115108,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
746,1.00,1.000000e+00,0.194245,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
747,0.00,5.000000e-01,0.661871,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
748,0.25,2.775558e-17,0.647482,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [33]:
y_train

644    0
6      0
861    1
418    1
416    1
      ..
509    1
559    1
470    1
502    1
158    1
Name: rented_in_May, Length: 750, dtype: int64

In [34]:
from sklearn.linear_model import LogisticRegression

LR = LogisticRegression(random_state=0, solver='lbfgs')
LR.fit(X_train_transformed, y_train)

In [35]:
y_test

3      0
694    1
811    0
279    1
886    1
      ..
905    1
299    0
211    1
225    1
467    1
Name: rented_in_May, Length: 250, dtype: int64

In [36]:
y_train

644    0
6      0
861    1
418    1
416    1
      ..
509    1
559    1
470    1
502    1
158    1
Name: rented_in_May, Length: 750, dtype: int64

In [37]:
from sklearn.linear_model import LogisticRegression

LR = LogisticRegression(random_state=0, solver='lbfgs')
LR.fit(X_train_transformed, y_train)
LR.score(X_test_transformed, y_test)

0.652

In [38]:
X_train_transformed

Unnamed: 0,rental_duration,rental_rate,length,category_Action,category_Animation,category_C,category_Children,category_Classics,category_Comedy,category_D,...,category_Sci-Fi,category_Sports,category_T,category_Travel,rating_N,rating_NC-17,rating_P,rating_PG,rating_PG-13,rating_R
0,0.00,5.000000e-01,0.539568,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.50,5.000000e-01,0.870504,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,1.00,5.000000e-01,0.280576,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.25,1.000000e+00,0.093525,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.25,5.000000e-01,0.503597,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,0.75,0.000000e+00,0.115108,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
746,1.00,1.000000e+00,0.194245,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
747,0.00,5.000000e-01,0.661871,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
748,0.25,2.775558e-17,0.647482,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [39]:
y_train

644    0
6      0
861    1
418    1
416    1
      ..
509    1
559    1
470    1
502    1
158    1
Name: rented_in_May, Length: 750, dtype: int64

In [40]:
X_test_transformed

Unnamed: 0,rental_duration,rental_rate,length,category_Action,category_Animation,category_C,category_Children,category_Classics,category_Comedy,category_D,...,category_Sci-Fi,category_Sports,category_T,category_Travel,rating_N,rating_NC-17,rating_P,rating_PG,rating_PG-13,rating_R
0,0.75,5.000000e-01,0.892086,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.50,5.000000e-01,0.460432,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.50,1.000000e+00,0.302158,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.25,1.000000e+00,0.208633,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.50,5.000000e-01,0.633094,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,0.00,5.000000e-01,0.503597,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
246,0.50,2.775558e-17,0.625899,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
247,0.50,1.000000e+00,0.669065,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
248,0.75,2.775558e-17,0.359712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [41]:
y_test

3      0
694    1
811    0
279    1
886    1
      ..
905    1
299    0
211    1
225    1
467    1
Name: rented_in_May, Length: 250, dtype: int64

In [42]:
from sklearn.metrics import precision_score, recall_score, f1_score

pred = LR.predict(X_test_transformed)

print("precision: ", precision_score(y_test, pred))
print("recall: ", recall_score(y_test, pred))
print("f1: ", f1_score(y_test, pred))

precision:  0.652
recall:  1.0
f1:  0.7893462469733655


In [43]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,pred)

array([[  0,  87],
       [  0, 163]])

In [44]:
#!pip install imblearn
from imblearn.over_sampling import SMOTE

In [45]:
from sklearn.linear_model import LogisticRegression

sm = SMOTE(random_state=100, k_neighbors=3)
X_train_SMOTE,y_train_SMOTE = sm.fit_resample(X_train_transformed,y_train)

LR = LogisticRegression(random_state=0, solver='lbfgs')
LR.fit(X_train_SMOTE, y_train_SMOTE)
pred = LR.predict(X_test_transformed)

print("precision: ",precision_score(y_test,pred))
print("recall: ",recall_score(y_test,pred))
print("f1: ",f1_score(y_test,pred))

precision:  0.625
recall:  0.4601226993865031
f1:  0.5300353356890459


In [46]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,pred)

array([[42, 45],
       [88, 75]])