In [1]:
import pandas as pd
import numpy as np

from sklearn import linear_model
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error

<h3>Preprocessing</h3>

In [2]:
hangover_df = pd.read_csv('./train-embedding/hangover.csv', header=None)
hangover_df['target'] = 1
normal_df   = pd.read_csv('./train-embedding/normal.csv', header=None)
normal_df['target'] = 0
df = pd.concat([hangover_df, normal_df])
df = shuffle(df)

msk = np.random.rand(len(df)) <= 0.7
train_df = df[msk]
test_df = df[~msk]

In [3]:
df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,119,120,121,122,123,124,125,126,127,target
18,0.060916,-0.0437,-0.072015,-0.005746,0.156927,0.036907,0.061266,-0.028009,-0.039283,0.015564,...,0.253954,-0.021074,-0.044485,-0.136814,0.112924,-0.123011,-0.006652,-0.131145,0.055034,0
86,0.011858,-0.079928,-0.020839,9.6e-05,0.063348,0.044836,0.016954,-0.029465,0.04787,-0.031126,...,0.092513,0.057035,0.050319,-0.046294,0.03778,0.041344,0.033409,0.0422,0.17296,1
73,0.017471,0.14508,0.024772,-0.006036,-0.00316,0.111001,-0.007143,0.097556,-0.138861,0.06327,...,0.132622,0.036354,0.032402,0.019163,0.072582,0.059501,-0.061626,0.119332,0.053256,0
91,0.114124,-0.077783,-0.086178,-0.052333,0.109709,0.066981,0.026119,-0.009682,-0.044196,-0.26069,...,0.185594,-0.066444,0.069844,0.005636,0.018122,-0.006824,-0.055215,-0.019155,0.020464,1
1,0.045909,0.051129,-0.098421,-0.042408,0.027225,0.012921,0.013856,0.040315,-0.10087,0.088504,...,0.1653,-0.049468,-0.102836,-0.027356,0.022993,-0.108741,0.034582,-0.071778,0.015519,0
38,0.02934,-0.027926,-0.131419,0.0273,0.079089,0.10734,-0.100474,-0.181568,0.041454,-0.0349,...,0.136788,0.06085,-0.046992,0.077491,0.074838,-0.098576,0.114444,0.079493,0.044993,1
124,0.039406,0.070009,0.003234,0.152126,-0.027763,0.046913,0.024327,0.015953,0.018388,0.029566,...,0.121268,-0.020258,0.018235,-0.130754,0.100945,0.119694,-0.047461,0.058365,0.095024,0
58,0.057448,-0.091783,0.006912,-0.080327,-0.074267,-0.018271,-0.047089,0.151978,-0.060895,0.1135,...,-0.082796,0.099487,-0.101574,0.089218,0.034067,0.165456,-0.05609,0.207459,0.10915,1
51,0.183762,0.152264,-0.107732,0.07171,0.001153,0.123411,0.045561,-0.026008,-0.101489,0.055489,...,-0.023929,-0.034631,0.050729,0.233128,0.053556,0.158357,-0.03305,0.127528,-0.066487,1
7,-0.035142,-0.006541,0.042246,-0.106633,0.167419,0.111144,-0.077174,-0.03158,0.023764,0.005187,...,0.114759,0.117179,-0.074285,-0.034403,0.030379,-0.00296,-0.023494,0.134103,0.103766,1


In [4]:
train = {'x': train_df.drop('target', axis=1), 'y': train_df['target']}
test  = {'x': test_df.drop('target', axis=1), 'y': test_df['target']}

<h3>Linear Model</h3>

In [5]:
model = linear_model.Ridge(alpha=0.1, random_state=1)
model.fit(train['x'], train['y'])

Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=1, solver='auto', tol=0.001)

In [6]:
train['pred'] = model.predict(train['x'])
test['pred'] = model.predict(test['x'])

print('Train error', mean_squared_error(train['y'], train['pred']))
print('Test error', mean_squared_error(test['y'], test['pred']))

Train error 0.0736117901106
Test error 0.169497957561


In [7]:
tmp_df = pd.DataFrame()
tmp_df['y'] = test['y']
tmp_df['pred'] = test['pred']

In [8]:
dima_drink_df = pd.read_csv('./train-embedding/dima_drink.csv', header=None)
dima_drink_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
0,-0.061841,0.137985,0.100331,-0.100835,0.084029,0.169799,-0.038081,0.057994,-0.051805,-0.003714,...,0.060776,0.120232,0.002748,-0.020624,-0.043453,0.033686,0.059491,0.003029,0.052273,-0.018734


In [9]:
model.predict(dima_drink_df)

array([ 1.0170866])

In [10]:
dima_not_drink_df = pd.read_csv('./train-embedding/dima_not_drink.csv', header=None)
dima_not_drink_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
0,0.03724,-0.094667,0.030205,-0.033487,0.034872,0.115102,-0.190567,0.011716,-0.128619,-0.101789,...,0.045036,0.168485,0.089637,-0.004015,0.019652,-0.04558,0.087051,-0.024257,0.249144,0.190036


In [11]:
model.predict(dima_not_drink_df)

array([ 0.77581497])

In [12]:
bred_drink_df = pd.read_csv('./train-embedding/bred_drink.csv', header=None)
bred_drink_df


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
0,0.009605,-0.026152,-0.034901,-0.051751,0.127656,-0.051015,-0.014556,-0.043262,-0.02539,-0.052328,...,0.019675,0.038041,0.067293,0.019175,0.036273,-0.024942,0.04948,0.003278,0.143844,0.032759


In [13]:
model.predict(bred_drink_df)

array([ 1.0753002])

In [14]:
pasha_drink_df = pd.read_csv('./train-embedding/pasha_drink.csv', header=None)
pasha_drink_df


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
0,-0.019857,0.156477,-0.109824,-0.052743,0.003061,0.230354,-0.134886,-0.115535,0.069287,0.058518,...,0.19203,0.104387,0.18918,-0.161128,0.054546,-0.028918,0.068993,-0.007898,0.09702,0.071359


In [15]:
model.predict(pasha_drink_df)

array([ 0.49005254])