In [1]:
# General imports
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [41]:
# Load data
event_log_file = './data/event_log.csv'
df = pd.read_csv(event_log_file)
df.head()

Unnamed: 0,id,timestamp,email_id,action
0,4591b11ba8cca67079c1a43be2992a8f89fce422,2018-05-25 14:59:02 UTC,3498910,open
1,2bbdb4cff0fe8cc3cb6c1757291e31806ecefa47,2018-06-25 17:47:23 UTC,3498910,open
2,0f467135eabd4e385f9c2dcd3f00a9f2a04c0115,2018-06-27 12:32:36 UTC,3498910,open
3,6f17b7dc6f220c09fba4d8fbc2491317eece2ae8,2018-07-02 22:47:28 UTC,3498910,open
4,a645dd2ac5c5e000c8b5c7739b3a54435fb313cc,2018-01-24 19:21:49 UTC,3498910,open


In [42]:
list(df.action.unique())

['open', 'click', 'received', 'spamreport', 'unsubscribe']

In [43]:
# Data treatment

# New columns
df['timestamp_dt'] = pd.to_datetime(df.timestamp)
df['week_day'] = df.timestamp_dt.dt.weekday_name # or day_name() pandas v0.23+
df.week_day = pd.Categorical(df.week_day)
df['week_day_code'] = df.week_day.cat.codes
df['hour_minute'] = (df.timestamp_dt.dt.hour*100) + df.timestamp_dt.dt.minute

df.id = pd.Categorical(df.id)
df['id_code'] = df.id.cat.codes

# Create label column
labels = { 0: ['open', 'click'], 1: ['received', 'spamreport', 'unsubscribe'] }
category_label = dict()
for k,values in labels.items():
    for v in values:
        category_label[v] = k
df['label'] = df.action.map(lambda x: category_label[x])

print('Data Frame is ready!')

Data Frame is ready!


In [44]:
# Set input and labels
target_column = 'label'
columns_in = ['id_code', 'hour_minute', 'week_day_code']
X = np.array(df[columns_in].astype(float))
y = np.array(df[target_column])

In [63]:
# Train and test accuracy
from sklearn.neighbors import KNeighborsClassifier

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
knn = KNeighborsClassifier(n_neighbors=2).fit(X_train, y_train)
acc = knn.score(X_test, y_test)

print('Accuracy:', acc)

len 189598
Score: 0.8633860759493671


In [66]:
# Save model
import pickle
model_file = './models/knn_model.pickle'
pickle.dump(knn, open(model_file, 'wb'))
print('Model successfully saved!')