# GaussianNB Classifier

In [1]:
import numpy as np
import pandas as pd

## Load Data Files

In [2]:
X_train = pd.read_csv('../wechat_algo_data1/1.1_X_train.csv')
y_train = pd.read_csv('../wechat_algo_data1/1.1_y_train.csv')
X_test = pd.read_csv('../wechat_algo_data1/1.1_X_test.csv')

## Train Model

In [3]:
from sklearn.multioutput import MultiOutputClassifier
from sklearn.naive_bayes import GaussianNB

In [4]:
model = MultiOutputClassifier(GaussianNB(), n_jobs = -1)

In [5]:
%time model.fit(X_train, y_train)

Wall time: 14.2 s


MultiOutputClassifier(estimator=GaussianNB(), n_jobs=-1)

In [6]:
model.score(X_train, y_train)

0.9290347944938167

## Make Predictions

In [7]:
y_test = model.predict(X_test)

In [8]:
y_test = pd.DataFrame(
    y_test, 
    columns = ['read_comment', 'like', 'click_avatar', 'forward']
)
output_data = pd.concat([X_test[['userid', 'feedid']], y_test], axis = 1)
output_data

Unnamed: 0,userid,feedid,read_comment,like,click_avatar,forward
0,116490,18545,0,0,0,0
1,229006,18545,0,0,0,0
2,17099,18545,0,0,0,0
3,229660,18545,0,0,0,0
4,176548,18545,0,0,0,0
...,...,...,...,...,...,...
419641,136289,32838,0,0,0,0
419642,27818,55309,0,0,0,0
419643,59066,97704,0,0,0,0
419644,94658,75442,0,0,0,0


In [9]:
# Predict probabilities
probabilities = model.predict_proba(X_test)

In [10]:
prob_df = pd.DataFrame(np.array(probabilities)[:, :, 1]).transpose()
prob_df.columns = ['read_comment', 'like', 'click_avatar', 'forward']
output_data_prob = pd.concat([X_test[['userid', 'feedid']], prob_df], axis = 1)
output_data_prob

Unnamed: 0,userid,feedid,read_comment,like,click_avatar,forward
0,116490,18545,0.055735,0.022572,0.007739,0.008444
1,229006,18545,0.051637,0.021997,0.007573,0.008966
2,17099,18545,0.055154,0.021310,0.007593,0.008046
3,229660,18545,0.053196,0.022651,0.007666,0.009109
4,176548,18545,0.056748,0.023460,0.007769,0.008901
...,...,...,...,...,...,...
419641,136289,32838,0.043223,0.019687,0.004006,0.011708
419642,27818,55309,0.056759,0.020549,0.007685,0.007445
419643,59066,97704,0.055427,0.021747,0.008008,0.007241
419644,94658,75442,0.052184,0.020975,0.008163,0.008132


## Save Results to Files

In [11]:
# Save results
output_data.to_csv('../predictions/2.0_GaussianNB_predictions.csv', index = False)
output_data_prob.to_csv('../predictions/2.0_GaussianNB_predictions_prob.csv', index = False)