# GaussianNB Classifier

## Mount Google Drive

In [1]:
import os
from google.colab import drive
drive.mount('/content/drive', force_remount = False)
data_path = "/content/drive/My Drive/WeChat_Big_Data_Contest/wechat_algo_data1"
os.chdir(data_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Load Data File

In [2]:
import numpy as np
import pandas as pd

In [3]:
X_train = pd.read_csv('./1.1_X_train.csv')
y_train = pd.read_csv('./1.1_y_train.csv')
X_test = pd.read_csv('./1.1_X_test.csv')

## Train Model

In [4]:
import pickle
from sklearn.pipeline import make_pipeline
from sklearn.multioutput import MultiOutputClassifier
from sklearn.naive_bayes import MultinomialNB

In [5]:
model = MultiOutputClassifier(MultinomialNB())

In [6]:
%time model.fit(X_train, y_train)

CPU times: user 6.01 s, sys: 631 ms, total: 6.64 s
Wall time: 5.79 s


MultiOutputClassifier(estimator=MultinomialNB(alpha=1.0, class_prior=None,
                                              fit_prior=True),
                      n_jobs=None)

In [7]:
model.score(X_train, y_train)

0.0039022219817154745

## Make Predictions

In [8]:
y_test = model.predict(X_test)

In [9]:
y_test = pd.DataFrame(
    y_test, 
    columns = ['read_comment', 'like', 'click_avatar', 'forward']
)
output_data = pd.concat([X_test[['userid', 'feedid']], y_test], axis = 1)
output_data

Unnamed: 0,userid,feedid,read_comment,like,click_avatar,forward
0,14298,67227,1,0,1,0
1,159403,67227,0,0,0,1
2,1940,67227,1,0,1,0
3,58618,67227,1,0,1,0
4,79942,67227,1,0,1,0
...,...,...,...,...,...,...
421980,192875,27885,0,0,0,1
421981,170953,112511,1,0,1,0
421982,181387,103702,1,1,1,0
421983,244479,83014,0,1,0,1


In [10]:
# Predict probabilities
probabilities = model.predict_proba(X_test)

In [11]:
prob_df = pd.DataFrame(np.array(probabilities)[:, :, 1]).transpose()
prob_df.columns = ['read_comment', 'like', 'click_avatar', 'forward']
output_data_prob = pd.concat([X_test[['userid', 'feedid']], prob_df], axis = 1)

## Save results

In [12]:
# Save results
output_data.to_csv('../predictions/2.0.4_MultinomialNB_predictions.csv', index = False)
output_data_prob.to_csv('../predictions/2.0.4_MultinomialNB_predictions_prob.csv', index = False)