In [49]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [50]:
import data_processor as dp
from implementations import *
from proj1_helpers import *

## Load the training data into feature matrix, class labels, and event ids:

In [51]:
DATA_TRAIN_PATH = '../data/train.csv' # TODO: download train data and supply path here 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

## Data Preprocessing

In [52]:
tX_train = tX[:int(tX.shape[0]*0.8)]
y_train = y[:int(tX.shape[0]*0.8)]

tX_val = tX[int(tX.shape[0]*0.8):]
y_val = y[int(tX.shape[0]*0.8):]

In [53]:
tX_train, filler = dp.fill_nan(tX_train, nan_value=-999, method='mean')

In [54]:
tX_val, _ = dp.fill_nan(tX_val, nan_value=-999, method='use_filler', filler=filler)

In [55]:
tX_train,_ = dp.remove_outliers(tX_train)

In [56]:
tX_train.shape,tX_val.shape

((200000, 30), (50000, 30))

#### After the minimal training, we try to do some extra data processing steps

Building polynomial in order to do feature augmentation.

In [62]:
tX_train_aug, tX_val_aug = dp.poly_features(tX_train,2), dp.poly_features(tX_val,2)
tX_train_aug, tX_val_aug = dp.add_ones(tX_train_aug), dp.add_ones(tX_val_aug)

In [63]:
tX_train_aug.shape, tX_val_aug.shape

((200000, 61), (50000, 61))

## Least Squares

Fill NaN. Apply LS

In [11]:
w, mse = least_squares(y_train, tX_train)

In [12]:
y_pred = predict_labels(w, tX_train)
print(y_pred)
print(y_pred.shape)
np.mean(y_train==y_pred)

[-1.  1. -1. ...  1. -1. -1.]
(200000,)


0.740765

In [13]:
y_pred = predict_labels(w, tX_val)
print(y_pred)
print(y_pred.shape)
np.mean(y_val==y_pred)

[-1. -1. -1. ... -1.  1. -1.]
(50000,)


0.73982

In [92]:
y_train.shape, np.sum(y_train > 0)

((200000,), 68548)

In [64]:
w2, mse2 = least_squares(y_train, tX_train_aug)
y_pred2 = predict_labels(w2, tX_train_aug)
print('Accuracy for deg train:' + str(np.mean(y_train==y_pred2)))
y_pred2 = predict_labels(w2, tX_val_aug)
print('Accuracy for deg val:' +str(np.mean(y_val==y_pred2)))
print(mse2)

Accuracy for deg train:0.385785
Accuracy for deg val:0.64388
475.54004162132316


## Ridge Regression

In [14]:
for l in [1e-4, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3]:
    w, mse = ridge_regression(y_train, tX_train, l)
    y_pred = predict_labels(w, tX_val)
    print(str(l)+": VAL="+str(np.mean(y_val==y_pred)))

0.0001: VAL=0.73982
0.001: VAL=0.73982
0.01: VAL=0.73986
0.1: VAL=0.73988
1: VAL=0.73988
10.0: VAL=0.73986
100.0: VAL=0.73982
1000.0: VAL=0.7393


## Least Squares GD

In [58]:
initial_w = np.zeros(tX.shape[1])#np.random.normal(loc=0, scale=1, size=(tX.shape[1],1))
max_iters = 1000
gamma = 3e-6

In [59]:
w, mse = least_squares_GD(np.float16(y_train), np.float16(tX_train), np.float16(initial_w), max_iters, gamma)

Gradient Descent(0/999): loss=0.5
Gradient Descent(1/999): loss=0.474999342622941
Gradient Descent(2/999): loss=0.4721726106390642
Gradient Descent(3/999): loss=0.46968451001295397
Gradient Descent(4/999): loss=0.4673086476825307
Gradient Descent(5/999): loss=0.4650369766970005
Gradient Descent(6/999): loss=0.4628639688549106
Gradient Descent(7/999): loss=0.46078441979937124
Gradient Descent(8/999): loss=0.4587934104117789
Gradient Descent(9/999): loss=0.4568862908711271
Gradient Descent(10/999): loss=0.45505866580197263
Gradient Descent(11/999): loss=0.45330638024552466
Gradient Descent(12/999): loss=0.4516255064060496
Gradient Descent(13/999): loss=0.45001233112954286
Gradient Descent(14/999): loss=0.44846334407405913
Gradient Descent(15/999): loss=0.4469752265333793
Gradient Descent(16/999): loss=0.4455448408778337
Gradient Descent(17/999): loss=0.4441692205781283
Gradient Descent(18/999): loss=0.4428455607799212
Gradient Descent(19/999): loss=0.4415712093986918
Gradient Descent(20/

Gradient Descent(162/999): loss=0.38751061897592604
Gradient Descent(163/999): loss=0.3873668465092516
Gradient Descent(164/999): loss=0.3872242900354348
Gradient Descent(165/999): loss=0.3870829378928184
Gradient Descent(166/999): loss=0.38694277855981807
Gradient Descent(167/999): loss=0.38680380065192727
Gradient Descent(168/999): loss=0.38666599291883963
Gradient Descent(169/999): loss=0.3865293442416863
Gradient Descent(170/999): loss=0.3863938436303811
Gradient Descent(171/999): loss=0.3862594802210677
Gradient Descent(172/999): loss=0.3861262432736638
Gradient Descent(173/999): loss=0.3859941221694984
Gradient Descent(174/999): loss=0.3858631064090354
Gradient Descent(175/999): loss=0.38573318560968184
Gradient Descent(176/999): loss=0.38560434950367317
Gradient Descent(177/999): loss=0.3854765879360355
Gradient Descent(178/999): loss=0.38534989086261817
Gradient Descent(179/999): loss=0.38522424834819313
Gradient Descent(180/999): loss=0.38509965056462153
Gradient Descent(181/9

Gradient Descent(322/999): loss=0.37456127473323847
Gradient Descent(323/999): loss=0.37452002187382427
Gradient Descent(324/999): loss=0.37447905792887787
Gradient Descent(325/999): loss=0.3744383804221285
Gradient Descent(326/999): loss=0.37439798689984854
Gradient Descent(327/999): loss=0.3743578749306421
Gradient Descent(328/999): loss=0.37431804210523495
Gradient Descent(329/999): loss=0.3742784860362688
Gradient Descent(330/999): loss=0.37423920435809443
Gradient Descent(331/999): loss=0.3742001947265695
Gradient Descent(332/999): loss=0.3741614548188566
Gradient Descent(333/999): loss=0.3741229823332237
Gradient Descent(334/999): loss=0.37408477498884707
Gradient Descent(335/999): loss=0.3740468305256157
Gradient Descent(336/999): loss=0.3740091467039371
Gradient Descent(337/999): loss=0.37397172130454537
Gradient Descent(338/999): loss=0.37393455212831184
Gradient Descent(339/999): loss=0.3738976369960561
Gradient Descent(340/999): loss=0.3738609737483596
Gradient Descent(341/9

Gradient Descent(483/999): loss=0.3704158391945908
Gradient Descent(484/999): loss=0.37040025733504267
Gradient Descent(485/999): loss=0.37038475319162495
Gradient Descent(486/999): loss=0.37036932617564927
Gradient Descent(487/999): loss=0.3703539757035065
Gradient Descent(488/999): loss=0.3703387011966221
Gradient Descent(489/999): loss=0.37032350208140924
Gradient Descent(490/999): loss=0.37030837778922476
Gradient Descent(491/999): loss=0.37029332775632334
Gradient Descent(492/999): loss=0.3702783514238143
Gradient Descent(493/999): loss=0.37026344823761753
Gradient Descent(494/999): loss=0.3702486176484192
Gradient Descent(495/999): loss=0.37023385911162987
Gradient Descent(496/999): loss=0.3702191720873407
Gradient Descent(497/999): loss=0.37020455604028235
Gradient Descent(498/999): loss=0.3701900104397823
Gradient Descent(499/999): loss=0.37017553475972353
Gradient Descent(500/999): loss=0.3701611284785036
Gradient Descent(501/999): loss=0.370146791078994
Gradient Descent(502/9

Gradient Descent(643/999): loss=0.36862024130871873
Gradient Descent(644/999): loss=0.3686120802533962
Gradient Descent(645/999): loss=0.36860394513334577
Gradient Descent(646/999): loss=0.3685958357930344
Gradient Descent(647/999): loss=0.3685877520781525
Gradient Descent(648/999): loss=0.36857969383560424
Gradient Descent(649/999): loss=0.36857166091349675
Gradient Descent(650/999): loss=0.3685636531611294
Gradient Descent(651/999): loss=0.36855567042898413
Gradient Descent(652/999): loss=0.3685477125687146
Gradient Descent(653/999): loss=0.368539779433137
Gradient Descent(654/999): loss=0.3685318708762186
Gradient Descent(655/999): loss=0.36852398675306974
Gradient Descent(656/999): loss=0.3685161269199318
Gradient Descent(657/999): loss=0.36850829123416945
Gradient Descent(658/999): loss=0.3685004795542598
Gradient Descent(659/999): loss=0.36849269173978316
Gradient Descent(660/999): loss=0.3684849276514132
Gradient Descent(661/999): loss=0.36847718715090877
Gradient Descent(662/99

Gradient Descent(804/999): loss=0.36755996781566064
Gradient Descent(805/999): loss=0.3675545940830857
Gradient Descent(806/999): loss=0.36754923176240994
Gradient Descent(807/999): loss=0.36754388080579475
Gradient Descent(808/999): loss=0.367538541165719
Gradient Descent(809/999): loss=0.3675332127949758
Gradient Descent(810/999): loss=0.36752789564667043
Gradient Descent(811/999): loss=0.3675225896742179
Gradient Descent(812/999): loss=0.3675172948313401
Gradient Descent(813/999): loss=0.36751201107206355
Gradient Descent(814/999): loss=0.36750673835071723
Gradient Descent(815/999): loss=0.3675014766219297
Gradient Descent(816/999): loss=0.367496225840627
Gradient Descent(817/999): loss=0.36749098596202995
Gradient Descent(818/999): loss=0.3674857569416525
Gradient Descent(819/999): loss=0.36748053873529846
Gradient Descent(820/999): loss=0.3674753312990602
Gradient Descent(821/999): loss=0.3674701345893153
Gradient Descent(822/999): loss=0.36746494856272505
Gradient Descent(823/999

Gradient Descent(964/999): loss=0.36681992233151633
Gradient Descent(965/999): loss=0.366815926147694
Gradient Descent(966/999): loss=0.3668119364996515
Gradient Descent(967/999): loss=0.36680795336866967
Gradient Descent(968/999): loss=0.3668039767361237
Gradient Descent(969/999): loss=0.3668000065834819
Gradient Descent(970/999): loss=0.36679604289230516
Gradient Descent(971/999): loss=0.3667920856442477
Gradient Descent(972/999): loss=0.36678813482105416
Gradient Descent(973/999): loss=0.3667841904045605
Gradient Descent(974/999): loss=0.36678025237669365
Gradient Descent(975/999): loss=0.3667763207194688
Gradient Descent(976/999): loss=0.36677239541499174
Gradient Descent(977/999): loss=0.3667684764454557
Gradient Descent(978/999): loss=0.3667645637931418
Gradient Descent(979/999): loss=0.3667606574404187
Gradient Descent(980/999): loss=0.36675675736974156
Gradient Descent(981/999): loss=0.36675286356365133
Gradient Descent(982/999): loss=0.36674897600477463
Gradient Descent(983/99

In [60]:
y_pred = predict_labels(w, tX_train)
np.mean(y_train.reshape(-1,1)==y_pred)

0.721525

In [64]:
y_pred = predict_labels(w, tX_val)
np.mean(y_val.reshape(-1,1)==y_pred)

0.7235

## Least Squares SGD

In [82]:
initial_w = np.zeros(tX.shape[1])#np.random.normal(loc=0, scale=1, size=(tX.shape[1],1))
max_iters = 1000
gamma = 3e-6

In [83]:
w, mse = least_squares_SGD(np.float16(y_train), np.float16(tX_train), np.float16(initial_w), max_iters, gamma)

Gradient Descent(0/999): loss=0.5948267886066436
Gradient Descent(1/999): loss=23.918287456359867
Gradient Descent(2/999): loss=6.482131631984115
Gradient Descent(3/999): loss=1.072019238193035
Gradient Descent(4/999): loss=0.5620987787550688
Gradient Descent(5/999): loss=0.7132101563155653
Gradient Descent(6/999): loss=0.4416907335579395
Gradient Descent(7/999): loss=0.9546526320773362
Gradient Descent(8/999): loss=0.44025807523488997
Gradient Descent(9/999): loss=0.622820172495842
Gradient Descent(10/999): loss=0.7327715994256736
Gradient Descent(11/999): loss=0.4805811141961813
Gradient Descent(12/999): loss=2.547217366743088
Gradient Descent(13/999): loss=1.6598004800122974
Gradient Descent(14/999): loss=10.665810132827756
Gradient Descent(15/999): loss=3.6932393459528683
Gradient Descent(16/999): loss=3.2435152139967673
Gradient Descent(17/999): loss=1.271850050452948
Gradient Descent(18/999): loss=1.708876825043559
Gradient Descent(19/999): loss=0.6463847397458552
Gradient Descen

Gradient Descent(165/999): loss=0.4267675859737396
Gradient Descent(166/999): loss=0.5267206713360548
Gradient Descent(167/999): loss=0.5204919648849964
Gradient Descent(168/999): loss=0.6383018074393271
Gradient Descent(169/999): loss=0.5979811944115162
Gradient Descent(170/999): loss=0.5381523439627885
Gradient Descent(171/999): loss=0.4349727834844589
Gradient Descent(172/999): loss=0.5552750364941359
Gradient Descent(173/999): loss=0.47861381176471707
Gradient Descent(174/999): loss=0.5411288452345133
Gradient Descent(175/999): loss=0.8320371055948733
Gradient Descent(176/999): loss=0.750480238226056
Gradient Descent(177/999): loss=0.7821323653179407
Gradient Descent(178/999): loss=0.4649884309452772
Gradient Descent(179/999): loss=0.7176039593595267
Gradient Descent(180/999): loss=0.9474488932836055
Gradient Descent(181/999): loss=0.6946846099376681
Gradient Descent(182/999): loss=6.129021634216307
Gradient Descent(183/999): loss=3.102127190757394
Gradient Descent(184/999): loss=1

Gradient Descent(329/999): loss=1.426836585037708
Gradient Descent(330/999): loss=1.2919340658706429
Gradient Descent(331/999): loss=0.7967910266155005
Gradient Descent(332/999): loss=0.9917094595992567
Gradient Descent(333/999): loss=1.0230497395956517
Gradient Descent(334/999): loss=1.1999708811807634
Gradient Descent(335/999): loss=1.5353533273774385
Gradient Descent(336/999): loss=0.7344069871914387
Gradient Descent(337/999): loss=1.058842031903267
Gradient Descent(338/999): loss=0.49292584601759915
Gradient Descent(339/999): loss=1.0560212452411648
Gradient Descent(340/999): loss=1.3264357104158404
Gradient Descent(341/999): loss=0.42647337322771556
Gradient Descent(342/999): loss=0.5620027696287633
Gradient Descent(343/999): loss=0.4250540415519476
Gradient Descent(344/999): loss=0.44547410776257523
Gradient Descent(345/999): loss=0.9277320336997509
Gradient Descent(346/999): loss=0.5028871032416821
Gradient Descent(347/999): loss=0.537125194787979
Gradient Descent(348/999): loss

Gradient Descent(493/999): loss=0.5861444668972493
Gradient Descent(494/999): loss=0.8633255346494911
Gradient Descent(495/999): loss=0.434107495753765
Gradient Descent(496/999): loss=0.5409223221689464
Gradient Descent(497/999): loss=0.7926104688179493
Gradient Descent(498/999): loss=0.5347611611008644
Gradient Descent(499/999): loss=0.5471947792059183
Gradient Descent(500/999): loss=0.660281293503046
Gradient Descent(501/999): loss=0.4309758179187774
Gradient Descent(502/999): loss=0.42509896345257764
Gradient Descent(503/999): loss=0.5218977323716879
Gradient Descent(504/999): loss=0.5320056591552497
Gradient Descent(505/999): loss=0.5709220310157537
Gradient Descent(506/999): loss=0.45990055133521573
Gradient Descent(507/999): loss=0.49505604653179647
Gradient Descent(508/999): loss=0.42251314749717706
Gradient Descent(509/999): loss=0.47457670105814925
Gradient Descent(510/999): loss=0.5024171053534747
Gradient Descent(511/999): loss=0.6135193176233766
Gradient Descent(512/999): l

Gradient Descent(655/999): loss=0.47608476152181617
Gradient Descent(656/999): loss=3.7205459779953958
Gradient Descent(657/999): loss=2.748095379756093
Gradient Descent(658/999): loss=1.9833845343691108
Gradient Descent(659/999): loss=0.4927812957775594
Gradient Descent(660/999): loss=0.5729756646358968
Gradient Descent(661/999): loss=0.5507459906196593
Gradient Descent(662/999): loss=0.8963071347934005
Gradient Descent(663/999): loss=1.7739030647754668
Gradient Descent(664/999): loss=1.3548076179856063
Gradient Descent(665/999): loss=1.5690865710002182
Gradient Descent(666/999): loss=0.414597266292572
Gradient Descent(667/999): loss=0.4254462594604493
Gradient Descent(668/999): loss=0.46745072169601914
Gradient Descent(669/999): loss=0.4309669992125034
Gradient Descent(670/999): loss=0.6021123644351958
Gradient Descent(671/999): loss=0.6776998422741889
Gradient Descent(672/999): loss=0.915358979369998
Gradient Descent(673/999): loss=0.48318323985457423
Gradient Descent(674/999): loss

Gradient Descent(817/999): loss=1.2830815220963956
Gradient Descent(818/999): loss=1.4207433948522805
Gradient Descent(819/999): loss=1.3727242014908791
Gradient Descent(820/999): loss=1.4626854499870536
Gradient Descent(821/999): loss=1.1515178243899347
Gradient Descent(822/999): loss=1.6419404947167635
Gradient Descent(823/999): loss=3.107848146137595
Gradient Descent(824/999): loss=0.9489244478112459
Gradient Descent(825/999): loss=3.974800354542732
Gradient Descent(826/999): loss=0.8426971556770801
Gradient Descent(827/999): loss=2.573976096732616
Gradient Descent(828/999): loss=0.9773521203303339
Gradient Descent(829/999): loss=0.8947859337913989
Gradient Descent(830/999): loss=1.2459949031609296
Gradient Descent(831/999): loss=1.056648781234026
Gradient Descent(832/999): loss=1.579359762094617
Gradient Descent(833/999): loss=2.3103788545280697
Gradient Descent(834/999): loss=1.643555042654276
Gradient Descent(835/999): loss=1.1313159684103729
Gradient Descent(836/999): loss=0.945

Gradient Descent(980/999): loss=0.9501457487756012
Gradient Descent(981/999): loss=0.9206569346904754
Gradient Descent(982/999): loss=0.6303025648528336
Gradient Descent(983/999): loss=0.4760884416282178
Gradient Descent(984/999): loss=0.5046426100170612
Gradient Descent(985/999): loss=0.6728682587897777
Gradient Descent(986/999): loss=0.6069099208748339
Gradient Descent(987/999): loss=0.48249127831220634
Gradient Descent(988/999): loss=0.6110678273767233
Gradient Descent(989/999): loss=0.4923436442041396
Gradient Descent(990/999): loss=0.8132637150108813
Gradient Descent(991/999): loss=0.7598894645202161
Gradient Descent(992/999): loss=0.7755986096388101
Gradient Descent(993/999): loss=0.7989404966562987
Gradient Descent(994/999): loss=0.8482243826639653
Gradient Descent(995/999): loss=0.6950487541723253
Gradient Descent(996/999): loss=0.7698709402674436
Gradient Descent(997/999): loss=0.4538547476208211
Gradient Descent(998/999): loss=0.46295630400180815
Gradient Descent(999/999): lo

In [84]:
y_pred = predict_labels(w, tX_train)
np.mean(y_train.reshape(-1,1)==y_pred)

0.660535

In [85]:
y_pred = predict_labels(w, tX_val)
np.mean(y_val.reshape(-1,1)==y_pred)

0.6607

## Logistic Regression

In [124]:
initial_w = np.zeros(tX.shape[1])#np.random.normal(loc=0, scale=1, size=(tX.shape[1],1))
max_iters = 1000
gamma = 1e-7

In [125]:
y_train_lr = y_train>0

In [129]:
w, mse = logistic_regression(y_train_lr, tX_train, initial_w, max_iters, gamma, batch_size=128)

Log Regression(0/999): loss=0.6733910493020513
Log Regression(1/999): loss=0.6658206432791178
Log Regression(2/999): loss=0.6791503506788689
Log Regression(3/999): loss=0.6599997082001471
Log Regression(4/999): loss=0.6581950792257761
Log Regression(5/999): loss=0.6581358323539759
Log Regression(6/999): loss=0.657872374609963
Log Regression(7/999): loss=0.6638171659941786
Log Regression(8/999): loss=0.6530769247076579
Log Regression(9/999): loss=0.6470175924950776
Log Regression(10/999): loss=0.6619592757522682
Log Regression(11/999): loss=0.6434968285415273
Log Regression(12/999): loss=0.6414838294920946
Log Regression(13/999): loss=0.6394832706106824
Log Regression(14/999): loss=0.6499576706465814
Log Regression(15/999): loss=0.6392728133854805
Log Regression(16/999): loss=0.6365353798713459
Log Regression(17/999): loss=0.6380752153355235
Log Regression(18/999): loss=0.6364315777310956
Log Regression(19/999): loss=0.6323909844668028
Log Regression(20/999): loss=0.6414278635441819
Log

Log Regression(170/999): loss=0.5725698990971164
Log Regression(171/999): loss=0.5751018045132454
Log Regression(172/999): loss=0.5798455875367179
Log Regression(173/999): loss=0.5731842694144367
Log Regression(174/999): loss=0.5690740248825776
Log Regression(175/999): loss=0.5692588224689149
Log Regression(176/999): loss=0.5691484261996678
Log Regression(177/999): loss=0.5683368152379623
Log Regression(178/999): loss=0.5735821025487755
Log Regression(179/999): loss=0.5680545778431715
Log Regression(180/999): loss=0.5679840940359132
Log Regression(181/999): loss=0.5696246172215338
Log Regression(182/999): loss=0.5683601821421993
Log Regression(183/999): loss=0.5698337932165771
Log Regression(184/999): loss=0.5736013324042647
Log Regression(185/999): loss=0.568695223458761
Log Regression(186/999): loss=0.5668666548122351
Log Regression(187/999): loss=0.5711204534457353
Log Regression(188/999): loss=0.5674750557767996
Log Regression(189/999): loss=0.5689221913026937
Log Regression(190/99

Log Regression(338/999): loss=0.5529299421056523
Log Regression(339/999): loss=0.551423767019834
Log Regression(340/999): loss=0.5511634823743833
Log Regression(341/999): loss=0.5528587721887784
Log Regression(342/999): loss=0.5511030574177666
Log Regression(343/999): loss=0.5515041954231795
Log Regression(344/999): loss=0.553328224732677
Log Regression(345/999): loss=0.557056835203508
Log Regression(346/999): loss=0.5518336598094145
Log Regression(347/999): loss=0.5516307613255022
Log Regression(348/999): loss=0.5533149010034227
Log Regression(349/999): loss=0.5506017314382581
Log Regression(350/999): loss=0.5507796843572594
Log Regression(351/999): loss=0.5524852517325275
Log Regression(352/999): loss=0.5555562642975636
Log Regression(353/999): loss=0.553323379309213
Log Regression(354/999): loss=0.5509344897123575
Log Regression(355/999): loss=0.5506075924308131
Log Regression(356/999): loss=0.550502317472411
Log Regression(357/999): loss=0.5499844363229643
Log Regression(358/999): 

Log Regression(508/999): loss=0.5482018652037288
Log Regression(509/999): loss=0.5443908167531063
Log Regression(510/999): loss=0.544823643687272
Log Regression(511/999): loss=0.5462198256678031
Log Regression(512/999): loss=0.544852954290529
Log Regression(513/999): loss=0.5454176506232196
Log Regression(514/999): loss=0.5448528317474249
Log Regression(515/999): loss=0.545228865687054
Log Regression(516/999): loss=0.5448248268641037
Log Regression(517/999): loss=0.5457697225902574
Log Regression(518/999): loss=0.5510062661813774
Log Regression(519/999): loss=0.5451594289175157
Log Regression(520/999): loss=0.5440902510040845
Log Regression(521/999): loss=0.5443468309149502
Log Regression(522/999): loss=0.5467015070199387
Log Regression(523/999): loss=0.5471649597166164
Log Regression(524/999): loss=0.5439919701009941
Log Regression(525/999): loss=0.5457287156283142
Log Regression(526/999): loss=0.5438390500964412
Log Regression(527/999): loss=0.5474478029694035
Log Regression(528/999)

Log Regression(677/999): loss=0.5415280555080819
Log Regression(678/999): loss=0.5428139809099993
Log Regression(679/999): loss=0.5420659784025293
Log Regression(680/999): loss=0.541117830993882
Log Regression(681/999): loss=0.5415540233487737
Log Regression(682/999): loss=0.5424899412771108
Log Regression(683/999): loss=0.5543170720195805
Log Regression(684/999): loss=0.5413376988024428
Log Regression(685/999): loss=0.5476965636540082
Log Regression(686/999): loss=0.5461552381250254
Log Regression(687/999): loss=0.5414753485521636
Log Regression(688/999): loss=0.5421525921474162
Log Regression(689/999): loss=0.5409378995215233
Log Regression(690/999): loss=0.5430563274071031
Log Regression(691/999): loss=0.5411481154002322
Log Regression(692/999): loss=0.5414932355803901
Log Regression(693/999): loss=0.5415081396870821
Log Regression(694/999): loss=0.5409798286019922
Log Regression(695/999): loss=0.5421625142286651
Log Regression(696/999): loss=0.5416912099677251
Log Regression(697/99

Log Regression(845/999): loss=0.539407067637474
Log Regression(846/999): loss=0.5391530577375016
Log Regression(847/999): loss=0.5397054083943332
Log Regression(848/999): loss=0.5401301041765436
Log Regression(849/999): loss=0.5392690600035875
Log Regression(850/999): loss=0.5431549131566071
Log Regression(851/999): loss=0.5390469908724074
Log Regression(852/999): loss=0.5420708909758349
Log Regression(853/999): loss=0.5389906581195077
Log Regression(854/999): loss=0.5425893711672887
Log Regression(855/999): loss=0.5389702151303317
Log Regression(856/999): loss=0.5422342178226913
Log Regression(857/999): loss=0.5406554120319438
Log Regression(858/999): loss=0.544182865720048
Log Regression(859/999): loss=0.5413989707524254
Log Regression(860/999): loss=0.5454459315523573
Log Regression(861/999): loss=0.5430414735164323
Log Regression(862/999): loss=0.5391048371960266
Log Regression(863/999): loss=0.5399528703900972
Log Regression(864/999): loss=0.5395874020248194
Log Regression(865/999

In [130]:
y_pred = predict_labels(w, tX_train)
np.mean(y_train.reshape(-1,1)==y_pred)

0.71897

In [131]:
y_pred = predict_labels(w, tX_val)
np.mean(y_val.reshape(-1,1)==y_pred)

0.72184

## Logistic Regression Regularized

In [158]:
y_train_lr = y_train>0

In [177]:
initial_w = np.zeros(tX.shape[1])#np.random.normal(loc=0, scale=1, size=(tX.shape[1],1))
max_iters = 5000
gamma = 1e-7
lambda_ = 1

In [178]:
w, mse = reg_logistic_regression(y_train_lr, tX_train, lambda_, initial_w, max_iters, gamma, batch_size=64)

Log Regression(0/999): loss=0.6847434861837708
Log Regression(1/999): loss=0.6740543586581709
Log Regression(2/999): loss=0.675833238043273
Log Regression(3/999): loss=0.667895811244757
Log Regression(4/999): loss=0.6652965741887917
Log Regression(5/999): loss=0.6739612263753906
Log Regression(6/999): loss=0.6617645032451517
Log Regression(7/999): loss=0.6597164490202116
Log Regression(8/999): loss=0.6610331107981331
Log Regression(9/999): loss=0.6582369194092249
Log Regression(10/999): loss=0.6585558529670379
Log Regression(11/999): loss=0.666904190729708
Log Regression(12/999): loss=0.6714953629361665
Log Regression(13/999): loss=0.6581118092206235
Log Regression(14/999): loss=0.6532562951414744
Log Regression(15/999): loss=0.653325106966108
Log Regression(16/999): loss=0.6516822742983085
Log Regression(17/999): loss=0.6539942115949839
Log Regression(18/999): loss=0.6500018643140412
Log Regression(19/999): loss=0.6487127794352043
Log Regression(20/999): loss=0.6487937402884153
Log Re

Log Regression(170/999): loss=0.5915051709336991
Log Regression(171/999): loss=0.5939238873924769
Log Regression(172/999): loss=0.5906280579767912
Log Regression(173/999): loss=0.5901596642237159
Log Regression(174/999): loss=0.5943488381869433
Log Regression(175/999): loss=0.5960959092662437
Log Regression(176/999): loss=0.5896919798954876
Log Regression(177/999): loss=0.589649543168129
Log Regression(178/999): loss=0.5950302151935393
Log Regression(179/999): loss=0.5894572805837844
Log Regression(180/999): loss=0.590072520916506
Log Regression(181/999): loss=0.5915200875001166
Log Regression(182/999): loss=0.58902674952098
Log Regression(183/999): loss=0.5889047512984115
Log Regression(184/999): loss=0.5888224680001533
Log Regression(185/999): loss=0.5891209318101802
Log Regression(186/999): loss=0.5896810116957186
Log Regression(187/999): loss=0.5889089521623677
Log Regression(188/999): loss=0.5891032241094253
Log Regression(189/999): loss=0.5886109934198536
Log Regression(190/999):

Log Regression(338/999): loss=0.5701382928882396
Log Regression(339/999): loss=0.5701251333169712
Log Regression(340/999): loss=0.5707901798363242
Log Regression(341/999): loss=0.5708013026778506
Log Regression(342/999): loss=0.5717623213177188
Log Regression(343/999): loss=0.5706446069651603
Log Regression(344/999): loss=0.573073151068592
Log Regression(345/999): loss=0.5779157393241955
Log Regression(346/999): loss=0.5703947314615396
Log Regression(347/999): loss=0.5698723167569223
Log Regression(348/999): loss=0.5743860894859782
Log Regression(349/999): loss=0.5719225424657641
Log Regression(350/999): loss=0.5722172775796193
Log Regression(351/999): loss=0.5767199628208879
Log Regression(352/999): loss=0.5700351359791782
Log Regression(353/999): loss=0.5714949778286968
Log Regression(354/999): loss=0.5690895144688847
Log Regression(355/999): loss=0.5691860236518651
Log Regression(356/999): loss=0.5699299546430547
Log Regression(357/999): loss=0.5689672542222726
Log Regression(358/99

Log Regression(507/999): loss=0.5607570180991815
Log Regression(508/999): loss=0.5619392158802953
Log Regression(509/999): loss=0.5627063138593864
Log Regression(510/999): loss=0.5643456878062221
Log Regression(511/999): loss=0.5604185211714874
Log Regression(512/999): loss=0.5623241880799337
Log Regression(513/999): loss=0.5591746220674955
Log Regression(514/999): loss=0.5622710923512323
Log Regression(515/999): loss=0.5584849898664467
Log Regression(516/999): loss=0.5589054217593626
Log Regression(517/999): loss=0.5592891110398046
Log Regression(518/999): loss=0.5582230310448656
Log Regression(519/999): loss=0.5582808437024969
Log Regression(520/999): loss=0.5585401921707166
Log Regression(521/999): loss=0.5590105769651184
Log Regression(522/999): loss=0.5587670326055649
Log Regression(523/999): loss=0.5623199732221874
Log Regression(524/999): loss=0.5580024299331711
Log Regression(525/999): loss=0.5588581635147677
Log Regression(526/999): loss=0.5579811489792861
Log Regression(527/9

Log Regression(675/999): loss=0.5527336636447413
Log Regression(676/999): loss=0.5552969173125292
Log Regression(677/999): loss=0.5542067800893327
Log Regression(678/999): loss=0.5543601307347544
Log Regression(679/999): loss=0.5535750990547301
Log Regression(680/999): loss=0.5523779110978952
Log Regression(681/999): loss=0.5515899762118474
Log Regression(682/999): loss=0.5518208068346944
Log Regression(683/999): loss=0.5514279434698005
Log Regression(684/999): loss=0.5550626741722032
Log Regression(685/999): loss=0.5557215095598765
Log Regression(686/999): loss=0.5542149533567488
Log Regression(687/999): loss=0.5532555842681216
Log Regression(688/999): loss=0.5529341945975622
Log Regression(689/999): loss=0.5516851372529357
Log Regression(690/999): loss=0.5538954964746738
Log Regression(691/999): loss=0.5553461310563369
Log Regression(692/999): loss=0.5511288620602012
Log Regression(693/999): loss=0.5511081159670322
Log Regression(694/999): loss=0.551206164490814
Log Regression(695/99

Log Regression(844/999): loss=0.5478433564367267
Log Regression(845/999): loss=0.5478944219436075
Log Regression(846/999): loss=0.5501566725818431
Log Regression(847/999): loss=0.547686288726132
Log Regression(848/999): loss=0.5474699173616534
Log Regression(849/999): loss=0.5473236431251356
Log Regression(850/999): loss=0.5478893879304243
Log Regression(851/999): loss=0.5476404785069625
Log Regression(852/999): loss=0.5565461030281751
Log Regression(853/999): loss=0.5501860171913776
Log Regression(854/999): loss=0.5513615320212831
Log Regression(855/999): loss=0.5474317477158329
Log Regression(856/999): loss=0.547713117918195
Log Regression(857/999): loss=0.5517380539693602
Log Regression(858/999): loss=0.5489450444664251
Log Regression(859/999): loss=0.5503649098888881
Log Regression(860/999): loss=0.5473155699780443
Log Regression(861/999): loss=0.5512164616506866
Log Regression(862/999): loss=0.5487554432223954
Log Regression(863/999): loss=0.5475626613897722
Log Regression(864/999

In [179]:
y_pred = predict_labels(w, tX_train)
np.mean(y_train.reshape(-1,1)==y_pred)

0.714205

In [180]:
y_pred = predict_labels(w, tX_val)
np.mean(y_val.reshape(-1,1)==y_pred)

0.71508

## Do your thing crazy machine learning thing here :) ...

## Generate predictions and save ouput in csv format for submission:

In [10]:
DATA_TEST_PATH = '' # TODO: download train data and supply path here 
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

In [31]:
OUTPUT_PATH = '' # TODO: fill in desired name of output file for submission
y_pred = predict_labels(weights, tX_test)
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)