# MultinomialNB Classifier

In [1]:
import numpy as np
import pandas as pd

## Load Data Files

In [2]:
X_train = pd.read_csv('../wechat_algo_data1/1.1_X_train.csv')
y_train = pd.read_csv('../wechat_algo_data1/1.1_y_train.csv')
X_test = pd.read_csv('../wechat_algo_data1/1.1_X_test.csv')

## Train Model

In [3]:
from sklearn.multioutput import MultiOutputClassifier
from sklearn.naive_bayes import MultinomialNB

In [4]:
model = MultiOutputClassifier(MultinomialNB(), n_jobs = -1)

In [5]:
%time model.fit(X_train, y_train)

Wall time: 13 s


MultiOutputClassifier(estimator=MultinomialNB(), n_jobs=-1)

In [6]:
model.score(X_train, y_train)

0.6243121985295745

## Make Predictions

In [7]:
y_test = model.predict(X_test)

In [8]:
y_test = pd.DataFrame(
    y_test, 
    columns = ['read_comment', 'like', 'click_avatar', 'forward']
)
output_data = pd.concat([X_test[['userid', 'feedid']], y_test], axis = 1)
output_data

Unnamed: 0,userid,feedid,read_comment,like,click_avatar,forward
0,116490,18545,1,1,1,1
1,229006,18545,1,1,1,1
2,17099,18545,1,1,1,1
3,229660,18545,1,1,1,1
4,176548,18545,1,1,1,1
...,...,...,...,...,...,...
419641,136289,32838,0,0,0,0
419642,27818,55309,1,1,1,1
419643,59066,97704,1,1,1,1
419644,94658,75442,1,1,1,1


In [9]:
# Predict probabilities
probabilities = model.predict_proba(X_test)

In [10]:
prob_df = pd.DataFrame(np.array(probabilities)[:, :, 1]).transpose()
prob_df.columns = ['read_comment', 'like', 'click_avatar', 'forward']
output_data_prob = pd.concat([X_test[['userid', 'feedid']], prob_df], axis = 1)
output_data_prob

Unnamed: 0,userid,feedid,read_comment,like,click_avatar,forward
0,116490,18545,1.0,1.0,1.0,1.0
1,229006,18545,1.0,1.0,1.0,1.0
2,17099,18545,1.0,1.0,1.0,1.0
3,229660,18545,1.0,1.0,1.0,1.0
4,176548,18545,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...
419641,136289,32838,0.0,0.0,0.0,0.0
419642,27818,55309,1.0,1.0,1.0,1.0
419643,59066,97704,1.0,1.0,1.0,1.0
419644,94658,75442,1.0,1.0,1.0,1.0


## Save Results to Files

In [11]:
# Save results
output_data.to_csv('../predictions/2.0_MultinomialNB_predictions.csv', index = False)
output_data_prob.to_csv('../predictions/2.0_MultinomialNB_predictions_prob.csv', index = False)