In [53]:
# Importing happens here
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import pickle

In [9]:
# Data loading happens here
df = pd.read_csv('data/spam_results_linear.csv')

df['NNPrediction'] = df['NNPrediction'] * 100
df['BPrediction'] = df['BPrediction'] * 100

df

Unnamed: 0,Category,NNPrediction,BPrediction
0,ham,0.0,0.0
1,ham,0.0,0.0
2,spam,100.0,100.0
3,ham,0.0,0.0
4,ham,0.0,0.0
...,...,...,...
10757,ham,0.0,0.0
10758,ham,0.0,0.0
10759,ham,0.0,0.0
10760,ham,0.0,0.0


In [10]:
# Label encoding happens here
df_dummies = pd.get_dummies(df.Category)

df = pd.concat([df, df_dummies], axis = 1)

df = df.drop(['Category', 'ham'], axis = 1)
df

Unnamed: 0,NNPrediction,BPrediction,spam
0,0.0,0.0,0
1,0.0,0.0,0
2,100.0,100.0,1
3,0.0,0.0,0
4,0.0,0.0,0
...,...,...,...
10757,0.0,0.0,0
10758,0.0,0.0,0
10759,0.0,0.0,0
10760,0.0,0.0,0


In [14]:
# Splitting happens here
X_train, X_test, y_train, y_test = train_test_split(df[['NNPrediction', 'BPrediction']].values, df['spam'].values, test_size=0.2)

array([[100., 100.],
       [100., 100.],
       [  0.,   0.],
       ...,
       [  0.,   0.],
       [  0.,   0.],
       [  0.,   2.]])

In [15]:
# The model creation happens here
model = LogisticRegression()
model.fit(X_train, y_train)

LogisticRegression()

In [17]:
model.score(X_test, y_test)

0.9972131908964236

In [18]:
model.coef_

array([[0.26899585, 0.04702863]])

In [55]:
model.intercept_

array([-7.4326865])

In [52]:
model.predict([[10, 100]])

array([0], dtype=uint8)

In [54]:
# Exporting happens here
with open('spam_model_combiner.pickle', 'wb') as f:
    pickle.dump(model, f)