In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

## Load the training data into feature matrix, class labels, and event ids:

In [2]:
from proj1_helpers import *
DATA_TRAIN_PATH = '../data/train.csv' # TODO: download train data and supply path here 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

## Data Preprocessing

In [3]:
import data_processor as dp

In [4]:
from implementations import *

In [5]:
ind = np.arange(len(y))
np.random.shuffle(ind)
ind_train = ind[:int(len(ind)*0.9)]
ind_test = ind[int(len(ind)*0.9):]

In [6]:
tX_train = tX[ind_train]
y_train = y[ind_train]

tX_val = tX[ind_test]
y_val = y[ind_test]

In [7]:
tX_train_f, filler = dp.fill_nan(tX_train, nan_value=-999, method='mean')

In [8]:
tX_val_f, _ = dp.fill_nan(tX_val, nan_value=-999, method='use_filler', filler=filler)

In [9]:
tX_train.shape,tX_val.shape

((225000, 30), (25000, 30))

## Least Squares

Fill NaN. Apply LS

In [19]:
w, mse = least_squares(y_train, tX_train)

In [20]:
y_pred = predict_labels(w, tX_train)
print(y_pred)
print(y_pred.shape)
np.mean(y_train==y_pred)

[-1. -1.  1. ...  1. -1. -1.]
(225000,)


0.7404266666666667

In [21]:
y_pred = predict_labels(w, tX_val)
print(y_pred)
print(y_pred.shape)
np.mean(y_val==y_pred)

[-1.  1.  1. ... -1. -1. -1.]
(25000,)


0.74284

## Ridge Regression

In [59]:
for l in [1e-4, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3, 1e4]:
    w, mse = ridge_regression(y_train, tX_train, l)
    y_pred_train = predict_labels(w, tX_train)
    y_pred_val = predict_labels(w, tX_val)
    print(str(l)+": MSE_TRAIN="+str(mse)+ " TRAIN="+str(np.mean(y_train==y_pred_train))+" VAL="+str(np.mean(y_val==y_pred_val)))

0.0001: MSE_TRAIN=0.34383321387881927 TRAIN=0.7404311111111112 VAL=0.74284
0.001: MSE_TRAIN=0.34383321491499214 TRAIN=0.7404355555555555 VAL=0.74284
0.01: MSE_TRAIN=0.3438332573871147 TRAIN=0.7404444444444445 VAL=0.7428
0.1: MSE_TRAIN=0.3438334437031598 TRAIN=0.7404088888888889 VAL=0.74268
1: MSE_TRAIN=0.34383351744321655 TRAIN=0.7404044444444444 VAL=0.74268
10.0: MSE_TRAIN=0.3438335333866524 TRAIN=0.7403866666666666 VAL=0.74268
100.0: MSE_TRAIN=0.34383418712302466 TRAIN=0.7403511111111111 VAL=0.74276
1000.0: MSE_TRAIN=0.3438868882829171 TRAIN=0.7398933333333333 VAL=0.7418
10000.0: MSE_TRAIN=0.3455044343817568 TRAIN=0.7372133333333334 VAL=0.73792


## Least Squares GD

In [26]:
from implementations import *

In [27]:
initial_w = np.zeros(tX.shape[1])#np.random.normal(loc=0, scale=1, size=(tX.shape[1],1))
max_iters = 1000
gamma = 3e-6

In [29]:
w, mse = least_squares_GD(y_train, tX_train, initial_w, max_iters, gamma)

Gradient Descent(0/999): loss=0.5000000000000003
Gradient Descent(1/999): loss=0.47514767720269957
Gradient Descent(2/999): loss=0.4723188235681864
Gradient Descent(3/999): loss=0.4698163342630682
Gradient Descent(4/999): loss=0.4674265566178349
Gradient Descent(5/999): loss=0.46514164772825245
Gradient Descent(6/999): loss=0.4629560507024571
Gradient Descent(7/999): loss=0.4608645305822573
Gradient Descent(8/999): loss=0.45886213900965495
Gradient Descent(9/999): loss=0.45694419825845206
Gradient Descent(10/999): loss=0.45510628631722694
Gradient Descent(11/999): loss=0.4533442227997835
Gradient Descent(12/999): loss=0.45165405563508226
Gradient Descent(13/999): loss=0.4500320484930448
Gradient Descent(14/999): loss=0.4484746689051728
Gradient Descent(15/999): loss=0.4469785770412906
Gradient Descent(16/999): loss=0.44554061510593923
Gradient Descent(17/999): loss=0.4441577973200208
Gradient Descent(18/999): loss=0.44282730045524227
Gradient Descent(19/999): loss=0.44154645489074
Grad

Gradient Descent(164/999): loss=0.38704765922617085
Gradient Descent(165/999): loss=0.3869059128114636
Gradient Descent(166/999): loss=0.386765359410277
Gradient Descent(167/999): loss=0.38662598767417256
Gradient Descent(168/999): loss=0.3864877863886289
Gradient Descent(169/999): loss=0.386350744470261
Gradient Descent(170/999): loss=0.38621485096415037
Gradient Descent(171/999): loss=0.38608009504127805
Gradient Descent(172/999): loss=0.38594646599605514
Gradient Descent(173/999): loss=0.3858139532439485
Gradient Descent(174/999): loss=0.3856825463191928
Gradient Descent(175/999): loss=0.3855522348725884
Gradient Descent(176/999): loss=0.3854230086693781
Gradient Descent(177/999): loss=0.38529485758720106
Gradient Descent(178/999): loss=0.38516777161411636
Gradient Descent(179/999): loss=0.38504174084669796
Gradient Descent(180/999): loss=0.38491675548819293
Gradient Descent(181/999): loss=0.3847928058467415
Gradient Descent(182/999): loss=0.384669882333658
Gradient Descent(183/999)

Gradient Descent(326/999): loss=0.37417480208337905
Gradient Descent(327/999): loss=0.3741345481699916
Gradient Descent(328/999): loss=0.3740945751709227
Gradient Descent(329/999): loss=0.3740548806934829
Gradient Descent(330/999): loss=0.3740154623666381
Gradient Descent(331/999): loss=0.37397631784080665
Gradient Descent(332/999): loss=0.37393744478765994
Gradient Descent(333/999): loss=0.3738988408999242
Gradient Descent(334/999): loss=0.3738605038911842
Gradient Descent(335/999): loss=0.3738224314956887
Gradient Descent(336/999): loss=0.37378462146815833
Gradient Descent(337/999): loss=0.373747071583595
Gradient Descent(338/999): loss=0.3737097796370922
Gradient Descent(339/999): loss=0.373672743443649
Gradient Descent(340/999): loss=0.37363596083798484
Gradient Descent(341/999): loss=0.3735994296743547
Gradient Descent(342/999): loss=0.3735631478263694
Gradient Descent(343/999): loss=0.3735271131868135
Gradient Descent(344/999): loss=0.37349132366746873
Gradient Descent(345/999): 

Gradient Descent(486/999): loss=0.37014173290708646
Gradient Descent(487/999): loss=0.3701264430507177
Gradient Descent(488/999): loss=0.37011122992861756
Gradient Descent(489/999): loss=0.3700960929618928
Gradient Descent(490/999): loss=0.37008103157661537
Gradient Descent(491/999): loss=0.3700660452037776
Gradient Descent(492/999): loss=0.37005113327924855
Gradient Descent(493/999): loss=0.37003629524372883
Gradient Descent(494/999): loss=0.37002153054270864
Gradient Descent(495/999): loss=0.3700068386264241
Gradient Descent(496/999): loss=0.3699922189498144
Gradient Descent(497/999): loss=0.3699776709724799
Gradient Descent(498/999): loss=0.36996319415864004
Gradient Descent(499/999): loss=0.3699487879770923
Gradient Descent(500/999): loss=0.36993445190117064
Gradient Descent(501/999): loss=0.36992018540870497
Gradient Descent(502/999): loss=0.3699059879819809
Gradient Descent(503/999): loss=0.3698918591076998
Gradient Descent(504/999): loss=0.36987779827693884
Gradient Descent(505/

Gradient Descent(646/999): loss=0.36838473696645174
Gradient Descent(647/999): loss=0.3683767837023548
Gradient Descent(648/999): loss=0.36836885609418757
Gradient Descent(649/999): loss=0.36836095398783547
Gradient Descent(650/999): loss=0.3683530772303914
Gradient Descent(651/999): loss=0.36834522567014444
Gradient Descent(652/999): loss=0.3683373991565697
Gradient Descent(653/999): loss=0.3683295975403185
Gradient Descent(654/999): loss=0.36832182067320807
Gradient Descent(655/999): loss=0.3683140684082111
Gradient Descent(656/999): loss=0.3683063405994469
Gradient Descent(657/999): loss=0.36829863710217026
Gradient Descent(658/999): loss=0.3682909577727631
Gradient Descent(659/999): loss=0.3682833024687236
Gradient Descent(660/999): loss=0.3682756710486576
Gradient Descent(661/999): loss=0.36826806337226836
Gradient Descent(662/999): loss=0.3682604793003478
Gradient Descent(663/999): loss=0.3682529186947665
Gradient Descent(664/999): loss=0.36824538141846536
Gradient Descent(665/99

Gradient Descent(806/999): loss=0.3673601733720534
Gradient Descent(807/999): loss=0.3673549618441861
Gradient Descent(808/999): loss=0.36734976160044536
Gradient Descent(809/999): loss=0.36734457259292586
Gradient Descent(810/999): loss=0.3673393947740395
Gradient Descent(811/999): loss=0.36733422809651384
Gradient Descent(812/999): loss=0.3673290725133885
Gradient Descent(813/999): loss=0.3673239279780141
Gradient Descent(814/999): loss=0.3673187944440483
Gradient Descent(815/999): loss=0.3673136718654545
Gradient Descent(816/999): loss=0.3673085601964984
Gradient Descent(817/999): loss=0.3673034593917468
Gradient Descent(818/999): loss=0.3672983694060641
Gradient Descent(819/999): loss=0.36729329019461054
Gradient Descent(820/999): loss=0.3672882217128398
Gradient Descent(821/999): loss=0.3672831639164961
Gradient Descent(822/999): loss=0.3672781167616131
Gradient Descent(823/999): loss=0.3672730802045103
Gradient Descent(824/999): loss=0.3672680542017914
Gradient Descent(825/999): 

Gradient Descent(966/999): loss=0.3666444291967611
Gradient Descent(967/999): loss=0.3666405744042259
Gradient Descent(968/999): loss=0.3666367260181864
Gradient Descent(969/999): loss=0.36663288401996885
Gradient Descent(970/999): loss=0.366629048390995
Gradient Descent(971/999): loss=0.36662521911277995
Gradient Descent(972/999): loss=0.36662139616693284
Gradient Descent(973/999): loss=0.3666175795351552
Gradient Descent(974/999): loss=0.36661376919924094
Gradient Descent(975/999): loss=0.366609965141075
Gradient Descent(976/999): loss=0.3666061673426339
Gradient Descent(977/999): loss=0.3666023757859838
Gradient Descent(978/999): loss=0.36659859045328036
Gradient Descent(979/999): loss=0.36659481132676863
Gradient Descent(980/999): loss=0.36659103838878176
Gradient Descent(981/999): loss=0.3665872716217407
Gradient Descent(982/999): loss=0.36658351100815345
Gradient Descent(983/999): loss=0.3665797565306145
Gradient Descent(984/999): loss=0.3665760081718043
Gradient Descent(985/999)

In [30]:
y_pred = predict_labels(w, tX_train)
np.mean(y_train.reshape(-1,1)==y_pred)

0.7219155555555555

In [31]:
y_pred = predict_labels(w, tX_val)
np.mean(y_val.reshape(-1,1)==y_pred)

0.72184

## Least Squares SGD

In [32]:
from implementations import *

In [33]:
initial_w = np.zeros(tX.shape[1])#np.random.normal(loc=0, scale=1, size=(tX.shape[1],1))
max_iters = 5000
gamma = 3e-7

In [35]:
w, mse = least_squares_SGD(y_train, tX_train, initial_w, max_iters, gamma, batch_size=64)

Gradient Descent(0/999): loss=0.4978445564319043
Gradient Descent(1/999): loss=0.49123923762061833
Gradient Descent(2/999): loss=0.4880131989135347
Gradient Descent(3/999): loss=0.4845408339254324
Gradient Descent(4/999): loss=0.4865696244391807
Gradient Descent(5/999): loss=0.482804517193521
Gradient Descent(6/999): loss=0.48129068967045385
Gradient Descent(7/999): loss=0.4812130371856222
Gradient Descent(8/999): loss=0.48232716417095733
Gradient Descent(9/999): loss=0.4825403986654286
Gradient Descent(10/999): loss=0.4812578237833007
Gradient Descent(11/999): loss=0.4793890881185568
Gradient Descent(12/999): loss=0.4778084057884684
Gradient Descent(13/999): loss=0.4750920367974531
Gradient Descent(14/999): loss=0.4758001275271161
Gradient Descent(15/999): loss=0.47444480301903796
Gradient Descent(16/999): loss=0.4738897010827102
Gradient Descent(17/999): loss=0.47340607204806545
Gradient Descent(18/999): loss=0.4733425343451806
Gradient Descent(19/999): loss=0.47345163269696033
Gradi

Gradient Descent(163/999): loss=0.44479296097341187
Gradient Descent(164/999): loss=0.4450521803310013
Gradient Descent(165/999): loss=0.44483408582899486
Gradient Descent(166/999): loss=0.4450814962856125
Gradient Descent(167/999): loss=0.4453018177058671
Gradient Descent(168/999): loss=0.4447993223783432
Gradient Descent(169/999): loss=0.44486231572298407
Gradient Descent(170/999): loss=0.44440007150573485
Gradient Descent(171/999): loss=0.44472924434548655
Gradient Descent(172/999): loss=0.4447099156371695
Gradient Descent(173/999): loss=0.4438578428253266
Gradient Descent(174/999): loss=0.4435315866956234
Gradient Descent(175/999): loss=0.44369885213643306
Gradient Descent(176/999): loss=0.443308257692218
Gradient Descent(177/999): loss=0.44300010268246703
Gradient Descent(178/999): loss=0.44285566323550085
Gradient Descent(179/999): loss=0.4426808287296232
Gradient Descent(180/999): loss=0.44255065627128665
Gradient Descent(181/999): loss=0.4427642856489929
Gradient Descent(182/99

Gradient Descent(325/999): loss=0.4278609897521303
Gradient Descent(326/999): loss=0.42779406528120495
Gradient Descent(327/999): loss=0.4276754421324031
Gradient Descent(328/999): loss=0.4276411017242782
Gradient Descent(329/999): loss=0.4276320191109741
Gradient Descent(330/999): loss=0.42761395913962785
Gradient Descent(331/999): loss=0.42745753284331794
Gradient Descent(332/999): loss=0.42774808290928823
Gradient Descent(333/999): loss=0.427788650592864
Gradient Descent(334/999): loss=0.4271120367898014
Gradient Descent(335/999): loss=0.4270699318739413
Gradient Descent(336/999): loss=0.427236418410412
Gradient Descent(337/999): loss=0.4267612694181999
Gradient Descent(338/999): loss=0.426704476657892
Gradient Descent(339/999): loss=0.42671167884985506
Gradient Descent(340/999): loss=0.42656256350692234
Gradient Descent(341/999): loss=0.4266029614300968
Gradient Descent(342/999): loss=0.4263973210817566
Gradient Descent(343/999): loss=0.4263075124421799
Gradient Descent(344/999): l

Gradient Descent(487/999): loss=0.4177956887663558
Gradient Descent(488/999): loss=0.41769621363244397
Gradient Descent(489/999): loss=0.4179964946325642
Gradient Descent(490/999): loss=0.4179759650692734
Gradient Descent(491/999): loss=0.4178820222695865
Gradient Descent(492/999): loss=0.4180289532707104
Gradient Descent(493/999): loss=0.4183522268532387
Gradient Descent(494/999): loss=0.41826402246754235
Gradient Descent(495/999): loss=0.41765882827360834
Gradient Descent(496/999): loss=0.4174765794940306
Gradient Descent(497/999): loss=0.41753773504496255
Gradient Descent(498/999): loss=0.4173037013342604
Gradient Descent(499/999): loss=0.4172808343194396
Gradient Descent(500/999): loss=0.4184548031033199
Gradient Descent(501/999): loss=0.41820692025392964
Gradient Descent(502/999): loss=0.41765841754852706
Gradient Descent(503/999): loss=0.4175677639418083
Gradient Descent(504/999): loss=0.417149716141366
Gradient Descent(505/999): loss=0.4170116146030017
Gradient Descent(506/999):

Gradient Descent(649/999): loss=0.410148646797067
Gradient Descent(650/999): loss=0.410062240962285
Gradient Descent(651/999): loss=0.4099868485611347
Gradient Descent(652/999): loss=0.40996956900807463
Gradient Descent(653/999): loss=0.40997028183605916
Gradient Descent(654/999): loss=0.40987434793461025
Gradient Descent(655/999): loss=0.40990187924941207
Gradient Descent(656/999): loss=0.40980453332226024
Gradient Descent(657/999): loss=0.4096999101090274
Gradient Descent(658/999): loss=0.40966767674698457
Gradient Descent(659/999): loss=0.4096238413646568
Gradient Descent(660/999): loss=0.4095764511922953
Gradient Descent(661/999): loss=0.4095869760561112
Gradient Descent(662/999): loss=0.40963625318995944
Gradient Descent(663/999): loss=0.4094798084650021
Gradient Descent(664/999): loss=0.40939146222628736
Gradient Descent(665/999): loss=0.40974363312610707
Gradient Descent(666/999): loss=0.409745612548248
Gradient Descent(667/999): loss=0.4105115441205169
Gradient Descent(668/999)

Gradient Descent(811/999): loss=0.40475525729244444
Gradient Descent(812/999): loss=0.4046635449411427
Gradient Descent(813/999): loss=0.4046172779220924
Gradient Descent(814/999): loss=0.4046749651045595
Gradient Descent(815/999): loss=0.4046346785925506
Gradient Descent(816/999): loss=0.4045372115585433
Gradient Descent(817/999): loss=0.404492050851792
Gradient Descent(818/999): loss=0.404507379660633
Gradient Descent(819/999): loss=0.4047839027677783
Gradient Descent(820/999): loss=0.40440659763661024
Gradient Descent(821/999): loss=0.4043849523605578
Gradient Descent(822/999): loss=0.4045285057510985
Gradient Descent(823/999): loss=0.4044100862038184
Gradient Descent(824/999): loss=0.4043327675691818
Gradient Descent(825/999): loss=0.40441389293964053
Gradient Descent(826/999): loss=0.4043913385939749
Gradient Descent(827/999): loss=0.40453568992761013
Gradient Descent(828/999): loss=0.4047707480440102
Gradient Descent(829/999): loss=0.40470588051498857
Gradient Descent(830/999): l

Gradient Descent(973/999): loss=0.40032485522469
Gradient Descent(974/999): loss=0.4004700702046661
Gradient Descent(975/999): loss=0.4002649992546339
Gradient Descent(976/999): loss=0.4002024784358968
Gradient Descent(977/999): loss=0.4005074822956666
Gradient Descent(978/999): loss=0.4001717596099878
Gradient Descent(979/999): loss=0.4001173744643487
Gradient Descent(980/999): loss=0.40020159142598005
Gradient Descent(981/999): loss=0.4000763310875739
Gradient Descent(982/999): loss=0.400128355062376
Gradient Descent(983/999): loss=0.4000477518601404
Gradient Descent(984/999): loss=0.4000102611611286
Gradient Descent(985/999): loss=0.3999947193680409
Gradient Descent(986/999): loss=0.3999846126166332
Gradient Descent(987/999): loss=0.39994242982539996
Gradient Descent(988/999): loss=0.3999495622167426
Gradient Descent(989/999): loss=0.39993773167350893
Gradient Descent(990/999): loss=0.39997973309878604
Gradient Descent(991/999): loss=0.39987431870917145
Gradient Descent(992/999): lo

In [36]:
y_pred = predict_labels(w, tX_train)
np.mean(y_train.reshape(-1,1)==y_pred)

0.682

In [37]:
y_pred = predict_labels(w, tX_val)
np.mean(y_val.reshape(-1,1)==y_pred)

0.68116

## Logistic Regression

In [38]:
from implementations import *

In [47]:
initial_w = np.zeros(tX.shape[1])#np.random.normal(loc=0, scale=1, size=(tX.shape[1],1))
max_iters = 1000
gamma = 1.5e-7

In [45]:
y_train_lr = y_train>0

In [48]:
w, mse = logistic_regression(y_train_lr, tX_train, initial_w, max_iters, gamma, batch_size=64)

Log Regression(0/999): loss=0.6669003579525978
Log Regression(1/999): loss=0.6633099924185696
Log Regression(2/999): loss=0.6671924049787051
Log Regression(3/999): loss=0.6625946626484256
Log Regression(4/999): loss=0.6555295559377617
Log Regression(5/999): loss=0.6537296167985533
Log Regression(6/999): loss=0.6512040560434509
Log Regression(7/999): loss=0.669039467151037
Log Regression(8/999): loss=0.6545967006305298
Log Regression(9/999): loss=0.6662910205997788
Log Regression(10/999): loss=0.671063695532839
Log Regression(11/999): loss=0.6534930625490328
Log Regression(12/999): loss=0.6442113514190045
Log Regression(13/999): loss=0.6502131136760555
Log Regression(14/999): loss=0.6811939006131291
Log Regression(15/999): loss=0.6503067418237227
Log Regression(16/999): loss=0.6391423637090622
Log Regression(17/999): loss=0.6362725304905471
Log Regression(18/999): loss=0.653626634791935
Log Regression(19/999): loss=0.6486101647910342
Log Regression(20/999): loss=0.6315788207595077
Log R

Log Regression(170/999): loss=0.5756956018932959
Log Regression(171/999): loss=0.5951193760522699
Log Regression(172/999): loss=0.5727527707600798
Log Regression(173/999): loss=0.5694921814001845
Log Regression(174/999): loss=0.5755948804533964
Log Regression(175/999): loss=0.5923561218540812
Log Regression(176/999): loss=0.5715374797465191
Log Regression(177/999): loss=0.5743034042865179
Log Regression(178/999): loss=0.5694402342310692
Log Regression(179/999): loss=0.5821561537038866
Log Regression(180/999): loss=0.5858227375795596
Log Regression(181/999): loss=0.5709957685153568
Log Regression(182/999): loss=0.5766654130162381
Log Regression(183/999): loss=0.5692215456407501
Log Regression(184/999): loss=0.5694268649071155
Log Regression(185/999): loss=0.5674342357603802
Log Regression(186/999): loss=0.5862894127663627
Log Regression(187/999): loss=0.5786403954398477
Log Regression(188/999): loss=0.5686122643535161
Log Regression(189/999): loss=0.5881487475307083
Log Regression(190/9

Log Regression(338/999): loss=0.5507694166378119
Log Regression(339/999): loss=0.5529003783780055
Log Regression(340/999): loss=0.5522306479733862
Log Regression(341/999): loss=0.5535085384676557
Log Regression(342/999): loss=0.5516356195404456
Log Regression(343/999): loss=0.551980499101624
Log Regression(344/999): loss=0.5656792345289345
Log Regression(345/999): loss=0.5751878511371592
Log Regression(346/999): loss=0.5506807716745037
Log Regression(347/999): loss=0.5510272250199534
Log Regression(348/999): loss=0.5513022909092896
Log Regression(349/999): loss=0.5502512059005009
Log Regression(350/999): loss=0.5591084226523675
Log Regression(351/999): loss=0.5511578783568112
Log Regression(352/999): loss=0.5553077436919296
Log Regression(353/999): loss=0.5523258700441149
Log Regression(354/999): loss=0.5571658584450149
Log Regression(355/999): loss=0.554000776022056
Log Regression(356/999): loss=0.5641749816349942
Log Regression(357/999): loss=0.5633548246254152
Log Regression(358/999

Log Regression(506/999): loss=0.5604156682213456
Log Regression(507/999): loss=0.5439440019308672
Log Regression(508/999): loss=0.5456442473149347
Log Regression(509/999): loss=0.5580595975377055
Log Regression(510/999): loss=0.5476082132709089
Log Regression(511/999): loss=0.5436789007672072
Log Regression(512/999): loss=0.543491752256352
Log Regression(513/999): loss=0.5646121768787361
Log Regression(514/999): loss=0.5447078496614186
Log Regression(515/999): loss=0.5436576973476529
Log Regression(516/999): loss=0.5442288206391822
Log Regression(517/999): loss=0.5517763871436627
Log Regression(518/999): loss=0.5444760411240853
Log Regression(519/999): loss=0.5433443334122119
Log Regression(520/999): loss=0.543841158687368
Log Regression(521/999): loss=0.5464316021983273
Log Regression(522/999): loss=0.5463025816565533
Log Regression(523/999): loss=0.5432956873008453
Log Regression(524/999): loss=0.5496647339000572
Log Regression(525/999): loss=0.543810480195864
Log Regression(526/999)

Log Regression(674/999): loss=0.5428736767007154
Log Regression(675/999): loss=0.551038394278235
Log Regression(676/999): loss=0.5516699626371613
Log Regression(677/999): loss=0.5409041826971458
Log Regression(678/999): loss=0.5445383818937107
Log Regression(679/999): loss=0.5405644974128889
Log Regression(680/999): loss=0.5521384626566209
Log Regression(681/999): loss=0.54039874981917
Log Regression(682/999): loss=0.5409340254443054
Log Regression(683/999): loss=0.5407752946696782
Log Regression(684/999): loss=0.5451025401595405
Log Regression(685/999): loss=0.5430722208924603
Log Regression(686/999): loss=0.5527040920235816
Log Regression(687/999): loss=0.5503944723286819
Log Regression(688/999): loss=0.5473269969324154
Log Regression(689/999): loss=0.541712452018906
Log Regression(690/999): loss=0.5696246244597589
Log Regression(691/999): loss=0.5487913074491185
Log Regression(692/999): loss=0.541791507935627
Log Regression(693/999): loss=0.5471340422909883
Log Regression(694/999): 

Log Regression(842/999): loss=0.5389072391609611
Log Regression(843/999): loss=0.5405467987482644
Log Regression(844/999): loss=0.5467330514441289
Log Regression(845/999): loss=0.538877098388668
Log Regression(846/999): loss=0.5401601630839546
Log Regression(847/999): loss=0.5459100567189282
Log Regression(848/999): loss=0.5596430998786941
Log Regression(849/999): loss=0.5415876190572702
Log Regression(850/999): loss=0.5423994656876077
Log Regression(851/999): loss=0.5524695358459355
Log Regression(852/999): loss=0.5524755180429668
Log Regression(853/999): loss=0.5445148082996143
Log Regression(854/999): loss=0.5413455899699575
Log Regression(855/999): loss=0.5395703366310561
Log Regression(856/999): loss=0.5398988266518367
Log Regression(857/999): loss=0.5437558541888022
Log Regression(858/999): loss=0.5389673090269185
Log Regression(859/999): loss=0.5389993746099844
Log Regression(860/999): loss=0.5397761052756934
Log Regression(861/999): loss=0.549665323240502
Log Regression(862/999

In [49]:
y_pred = predict_labels(w, tX_train)
np.mean(y_train.reshape(-1,1)==y_pred)

0.7269155555555555

In [50]:
y_pred = predict_labels(w, tX_val)
np.mean(y_val.reshape(-1,1)==y_pred)

0.727

## Logistic Regression Regularized

In [51]:
from implementations import *

In [52]:
y_train_lr = y_train>0

### lambda = 1

In [53]:
initial_w = np.zeros(tX.shape[1])#np.random.normal(loc=0, scale=1, size=(tX.shape[1],1))
max_iters = 5000
gamma = 2e-7
lambda_ = 1

In [65]:
w, mse = reg_logistic_regression(y_train_lr, tX_train, lambda_, initial_w, max_iters, gamma, batch_size=64)

Log Regression(0/4999): loss=0.6784966228564583
Log Regression(1/4999): loss=0.6728777545647245
Log Regression(2/4999): loss=0.6708599776250701
Log Regression(3/4999): loss=0.6660818263001932
Log Regression(4/4999): loss=0.6649078174277324
Log Regression(5/4999): loss=0.6704532069215893
Log Regression(6/4999): loss=0.6678752254563738
Log Regression(7/4999): loss=0.6694979935716667
Log Regression(8/4999): loss=0.6665048256311488
Log Regression(9/4999): loss=0.6864026823746251
Log Regression(10/4999): loss=0.6587512639394648
Log Regression(11/4999): loss=0.6565441552690753
Log Regression(12/4999): loss=0.6578398256117568
Log Regression(13/4999): loss=0.6547958507810951
Log Regression(14/4999): loss=0.6677523108259908
Log Regression(15/4999): loss=0.6528702864391277
Log Regression(16/4999): loss=0.6535367575972096
Log Regression(17/4999): loss=0.6505342941981145
Log Regression(18/4999): loss=0.6531550543678706
Log Regression(19/4999): loss=0.6540315843090969
Log Regression(20/4999): loss=

Log Regression(168/4999): loss=0.5913439115681086
Log Regression(169/4999): loss=0.5909536986259676
Log Regression(170/4999): loss=0.5937213943878525
Log Regression(171/4999): loss=0.5902936894654216
Log Regression(172/4999): loss=0.5901597914087361
Log Regression(173/4999): loss=0.5905063737293693
Log Regression(174/4999): loss=0.589952027658257
Log Regression(175/4999): loss=0.5903084911694756
Log Regression(176/4999): loss=0.590204336695589
Log Regression(177/4999): loss=0.5907896204674562
Log Regression(178/4999): loss=0.5908814362408069
Log Regression(179/4999): loss=0.5928667646624776
Log Regression(180/4999): loss=0.594538289215381
Log Regression(181/4999): loss=0.5892790135638756
Log Regression(182/4999): loss=0.5906767139308614
Log Regression(183/4999): loss=0.5885797999049345
Log Regression(184/4999): loss=0.5884338620044091
Log Regression(185/4999): loss=0.5882080438819548
Log Regression(186/4999): loss=0.5889101640622264
Log Regression(187/4999): loss=0.5925123731764742
Log

Log Regression(333/4999): loss=0.571252259314384
Log Regression(334/4999): loss=0.5720077486159174
Log Regression(335/4999): loss=0.5723709556180596
Log Regression(336/4999): loss=0.5757301222377004
Log Regression(337/4999): loss=0.5711117668922824
Log Regression(338/4999): loss=0.5728015430116175
Log Regression(339/4999): loss=0.570675002841907
Log Regression(340/4999): loss=0.5707911015097233
Log Regression(341/4999): loss=0.5705621632101079
Log Regression(342/4999): loss=0.570464487800399
Log Regression(343/4999): loss=0.5709135343718872
Log Regression(344/4999): loss=0.5702127804404351
Log Regression(345/4999): loss=0.5706047499878729
Log Regression(346/4999): loss=0.5700222104557217
Log Regression(347/4999): loss=0.5710026654459901
Log Regression(348/4999): loss=0.5700534882019314
Log Regression(349/4999): loss=0.5699120661677908
Log Regression(350/4999): loss=0.571142427381001
Log Regression(351/4999): loss=0.5705791227849579
Log Regression(352/4999): loss=0.5707069530198827
Log 

Log Regression(499/4999): loss=0.5637242267113239
Log Regression(500/4999): loss=0.560898344407372
Log Regression(501/4999): loss=0.5601668831195317
Log Regression(502/4999): loss=0.5605058887371627
Log Regression(503/4999): loss=0.5605171938980315
Log Regression(504/4999): loss=0.5600223472760419
Log Regression(505/4999): loss=0.5612766512714116
Log Regression(506/4999): loss=0.5621813029103014
Log Regression(507/4999): loss=0.5625395578953822
Log Regression(508/4999): loss=0.5606370686192802
Log Regression(509/4999): loss=0.5598064069667685
Log Regression(510/4999): loss=0.5599460660278602
Log Regression(511/4999): loss=0.5600354911005548
Log Regression(512/4999): loss=0.5660438805362955
Log Regression(513/4999): loss=0.5604976708842115
Log Regression(514/4999): loss=0.5603675711175682
Log Regression(515/4999): loss=0.5699975575439977
Log Regression(516/4999): loss=0.5778670361674876
Log Regression(517/4999): loss=0.5676115700491734
Log Regression(518/4999): loss=0.5610090342173989
L

Log Regression(665/4999): loss=0.5602341627163508
Log Regression(666/4999): loss=0.5553961935063999
Log Regression(667/4999): loss=0.5539017214747416
Log Regression(668/4999): loss=0.5539187887560787
Log Regression(669/4999): loss=0.5545400229843725
Log Regression(670/4999): loss=0.5552219488423459
Log Regression(671/4999): loss=0.5579205443548472
Log Regression(672/4999): loss=0.5536761401415047
Log Regression(673/4999): loss=0.5536931758008755
Log Regression(674/4999): loss=0.553513779481798
Log Regression(675/4999): loss=0.5538362921290907
Log Regression(676/4999): loss=0.5538429252799129
Log Regression(677/4999): loss=0.5537105280928998
Log Regression(678/4999): loss=0.5552863597272875
Log Regression(679/4999): loss=0.5533513090002976
Log Regression(680/4999): loss=0.5557320840939914
Log Regression(681/4999): loss=0.5543382306943101
Log Regression(682/4999): loss=0.5537848103909838
Log Regression(683/4999): loss=0.5538487412461147
Log Regression(684/4999): loss=0.5601593131265589
L

Log Regression(831/4999): loss=0.5496711792064161
Log Regression(832/4999): loss=0.5500987188978191
Log Regression(833/4999): loss=0.5503905063085334
Log Regression(834/4999): loss=0.5495951728890114
Log Regression(835/4999): loss=0.5500044646923857
Log Regression(836/4999): loss=0.5543901209839704
Log Regression(837/4999): loss=0.5529786340179351
Log Regression(838/4999): loss=0.5502764597274238
Log Regression(839/4999): loss=0.5532393967631901
Log Regression(840/4999): loss=0.5494233372099698
Log Regression(841/4999): loss=0.5496546051585562
Log Regression(842/4999): loss=0.5494073189114573
Log Regression(843/4999): loss=0.554936968408945
Log Regression(844/4999): loss=0.5551534660851126
Log Regression(845/4999): loss=0.5551763955205227
Log Regression(846/4999): loss=0.5537651052081507
Log Regression(847/4999): loss=0.549716078256842
Log Regression(848/4999): loss=0.5500850048822601
Log Regression(849/4999): loss=0.5519893020028582
Log Regression(850/4999): loss=0.549253897087549
Log

Log Regression(997/4999): loss=0.5489670847356679
Log Regression(998/4999): loss=0.5510808263857452
Log Regression(999/4999): loss=0.5505558116310918
Log Regression(1000/4999): loss=0.5484326146855707
Log Regression(1001/4999): loss=0.5482150904413872
Log Regression(1002/4999): loss=0.549240430456087
Log Regression(1003/4999): loss=0.5494108437399274
Log Regression(1004/4999): loss=0.5496944741082016
Log Regression(1005/4999): loss=0.5475073638619259
Log Regression(1006/4999): loss=0.5471550819875318
Log Regression(1007/4999): loss=0.5471121939556366
Log Regression(1008/4999): loss=0.5479134482149142
Log Regression(1009/4999): loss=0.5500765910108603
Log Regression(1010/4999): loss=0.5479839245357511
Log Regression(1011/4999): loss=0.5481864530430532
Log Regression(1012/4999): loss=0.5512518024485734
Log Regression(1013/4999): loss=0.5474574057850329
Log Regression(1014/4999): loss=0.5478076014617571
Log Regression(1015/4999): loss=0.5501992332386777
Log Regression(1016/4999): loss=0.5

Log Regression(1159/4999): loss=0.545787650385867
Log Regression(1160/4999): loss=0.5461666079939301
Log Regression(1161/4999): loss=0.5495745402303325
Log Regression(1162/4999): loss=0.5496531570190849
Log Regression(1163/4999): loss=0.5474240035103046
Log Regression(1164/4999): loss=0.5456108288624152
Log Regression(1165/4999): loss=0.5454800948441723
Log Regression(1166/4999): loss=0.5463937321283888
Log Regression(1167/4999): loss=0.5478239288272
Log Regression(1168/4999): loss=0.5454884878675017
Log Regression(1169/4999): loss=0.5456633459930224
Log Regression(1170/4999): loss=0.5467255588863774
Log Regression(1171/4999): loss=0.5458905505137147
Log Regression(1172/4999): loss=0.5473629633062504
Log Regression(1173/4999): loss=0.5458682482375803
Log Regression(1174/4999): loss=0.5457913415528953
Log Regression(1175/4999): loss=0.5456930191103688
Log Regression(1176/4999): loss=0.5454586179826976
Log Regression(1177/4999): loss=0.5455267754143096
Log Regression(1178/4999): loss=0.5

Log Regression(1321/4999): loss=0.5475567314862018
Log Regression(1322/4999): loss=0.5481986250978664
Log Regression(1323/4999): loss=0.5498915884692421
Log Regression(1324/4999): loss=0.5442330333934642
Log Regression(1325/4999): loss=0.54630872834265
Log Regression(1326/4999): loss=0.5446039259025917
Log Regression(1327/4999): loss=0.5441891148518238
Log Regression(1328/4999): loss=0.5440226589845009
Log Regression(1329/4999): loss=0.5439750689403238
Log Regression(1330/4999): loss=0.5446981773964197
Log Regression(1331/4999): loss=0.5518572810832788
Log Regression(1332/4999): loss=0.5524841121299108
Log Regression(1333/4999): loss=0.5455958001413855
Log Regression(1334/4999): loss=0.5446219639792904
Log Regression(1335/4999): loss=0.5492590067289007
Log Regression(1336/4999): loss=0.5468526653877224
Log Regression(1337/4999): loss=0.5453474387382841
Log Regression(1338/4999): loss=0.5456442294396284
Log Regression(1339/4999): loss=0.5500542910549533
Log Regression(1340/4999): loss=0

Log Regression(1484/4999): loss=0.5451372848011964
Log Regression(1485/4999): loss=0.5433136654122434
Log Regression(1486/4999): loss=0.5439165262977989
Log Regression(1487/4999): loss=0.5435638867719212
Log Regression(1488/4999): loss=0.5448818100406757
Log Regression(1489/4999): loss=0.5463490137957496
Log Regression(1490/4999): loss=0.5607543576151693
Log Regression(1491/4999): loss=0.5495716472399713
Log Regression(1492/4999): loss=0.5467793730018339
Log Regression(1493/4999): loss=0.5453748837213621
Log Regression(1494/4999): loss=0.5472610232522819
Log Regression(1495/4999): loss=0.5448568517279594
Log Regression(1496/4999): loss=0.543418649307963
Log Regression(1497/4999): loss=0.5441330413111084
Log Regression(1498/4999): loss=0.5435478019871882
Log Regression(1499/4999): loss=0.545108445431921
Log Regression(1500/4999): loss=0.5432990391697522
Log Regression(1501/4999): loss=0.5463524489427874
Log Regression(1502/4999): loss=0.5432229421699072
Log Regression(1503/4999): loss=0

Log Regression(1646/4999): loss=0.5457718174712617
Log Regression(1647/4999): loss=0.5444084072667349
Log Regression(1648/4999): loss=0.54298308288193
Log Regression(1649/4999): loss=0.5425578682665539
Log Regression(1650/4999): loss=0.5446994545899146
Log Regression(1651/4999): loss=0.5448128271136129
Log Regression(1652/4999): loss=0.5429451918512943
Log Regression(1653/4999): loss=0.5425487060528079
Log Regression(1654/4999): loss=0.5434146795599509
Log Regression(1655/4999): loss=0.5425353277189573
Log Regression(1656/4999): loss=0.5426548552349083
Log Regression(1657/4999): loss=0.5433850051224098
Log Regression(1658/4999): loss=0.5478307774772786
Log Regression(1659/4999): loss=0.5438029266077613
Log Regression(1660/4999): loss=0.5511444716000761
Log Regression(1661/4999): loss=0.5458775899360534
Log Regression(1662/4999): loss=0.5466946983589956
Log Regression(1663/4999): loss=0.5428853992397483
Log Regression(1664/4999): loss=0.542527309523876
Log Regression(1665/4999): loss=0.

Log Regression(1808/4999): loss=0.5451721012455576
Log Regression(1809/4999): loss=0.5513311755486996
Log Regression(1810/4999): loss=0.5483335817734512
Log Regression(1811/4999): loss=0.5488804673392723
Log Regression(1812/4999): loss=0.5432017378657991
Log Regression(1813/4999): loss=0.5424440474033688
Log Regression(1814/4999): loss=0.5423170652916111
Log Regression(1815/4999): loss=0.5423453341753116
Log Regression(1816/4999): loss=0.5422741316894325
Log Regression(1817/4999): loss=0.5424793986881559
Log Regression(1818/4999): loss=0.5422411833195894
Log Regression(1819/4999): loss=0.542531752897721
Log Regression(1820/4999): loss=0.5429988494924691
Log Regression(1821/4999): loss=0.5422061584104246
Log Regression(1822/4999): loss=0.5454157570642875
Log Regression(1823/4999): loss=0.547787301777442
Log Regression(1824/4999): loss=0.5428907073848972
Log Regression(1825/4999): loss=0.5439886040136238
Log Regression(1826/4999): loss=0.5424732486438574
Log Regression(1827/4999): loss=0

Log Regression(1970/4999): loss=0.5447306095407446
Log Regression(1971/4999): loss=0.5440708141660722
Log Regression(1972/4999): loss=0.5420749834860887
Log Regression(1973/4999): loss=0.5426112917841309
Log Regression(1974/4999): loss=0.5417862082434349
Log Regression(1975/4999): loss=0.5423076461078433
Log Regression(1976/4999): loss=0.5417900435335147
Log Regression(1977/4999): loss=0.5419468817550696
Log Regression(1978/4999): loss=0.5418313323449636
Log Regression(1979/4999): loss=0.5419723855951579
Log Regression(1980/4999): loss=0.5418173465356454
Log Regression(1981/4999): loss=0.5439155669306065
Log Regression(1982/4999): loss=0.5506233915704812
Log Regression(1983/4999): loss=0.547603979615032
Log Regression(1984/4999): loss=0.5453777855227909
Log Regression(1985/4999): loss=0.5438855627431743
Log Regression(1986/4999): loss=0.5459629068586312
Log Regression(1987/4999): loss=0.5485147787922064
Log Regression(1988/4999): loss=0.5452109516109386
Log Regression(1989/4999): loss=

Log Regression(2133/4999): loss=0.5434718006887275
Log Regression(2134/4999): loss=0.5425792103874361
Log Regression(2135/4999): loss=0.5443474275261514
Log Regression(2136/4999): loss=0.5416244111137289
Log Regression(2137/4999): loss=0.5417471891353125
Log Regression(2138/4999): loss=0.5425783268048706
Log Regression(2139/4999): loss=0.5505754079428269
Log Regression(2140/4999): loss=0.5429156396355758
Log Regression(2141/4999): loss=0.5497513093213198
Log Regression(2142/4999): loss=0.5434979142787488
Log Regression(2143/4999): loss=0.5418450841612417
Log Regression(2144/4999): loss=0.5431424256044162
Log Regression(2145/4999): loss=0.5472300957353533
Log Regression(2146/4999): loss=0.5443529916567366
Log Regression(2147/4999): loss=0.5423197416926007
Log Regression(2148/4999): loss=0.5416172633988374
Log Regression(2149/4999): loss=0.5419382406206646
Log Regression(2150/4999): loss=0.5420812073531389
Log Regression(2151/4999): loss=0.5495232031549514
Log Regression(2152/4999): loss

Log Regression(2295/4999): loss=0.5414535688194247
Log Regression(2296/4999): loss=0.5413402941500168
Log Regression(2297/4999): loss=0.5413793813110434
Log Regression(2298/4999): loss=0.5417549531670421
Log Regression(2299/4999): loss=0.5413371291003092
Log Regression(2300/4999): loss=0.5414106768176369
Log Regression(2301/4999): loss=0.5418375989908968
Log Regression(2302/4999): loss=0.5436281015363047
Log Regression(2303/4999): loss=0.5414529983487
Log Regression(2304/4999): loss=0.5415209139519356
Log Regression(2305/4999): loss=0.5425690613436144
Log Regression(2306/4999): loss=0.541707379258598
Log Regression(2307/4999): loss=0.5416813557566503
Log Regression(2308/4999): loss=0.5413515478043778
Log Regression(2309/4999): loss=0.5416452239756457
Log Regression(2310/4999): loss=0.5414866629070343
Log Regression(2311/4999): loss=0.54213121413172
Log Regression(2312/4999): loss=0.5414635865598814
Log Regression(2313/4999): loss=0.5413993312254501
Log Regression(2314/4999): loss=0.542

Log Regression(2457/4999): loss=0.541604233700527
Log Regression(2458/4999): loss=0.5414758918654431
Log Regression(2459/4999): loss=0.5435743700487787
Log Regression(2460/4999): loss=0.5477102432823401
Log Regression(2461/4999): loss=0.5451396896794214
Log Regression(2462/4999): loss=0.5436083813731124
Log Regression(2463/4999): loss=0.542174420721865
Log Regression(2464/4999): loss=0.5412662692810469
Log Regression(2465/4999): loss=0.541224801308627
Log Regression(2466/4999): loss=0.5412741535692331
Log Regression(2467/4999): loss=0.5414689097058929
Log Regression(2468/4999): loss=0.5411518540745215
Log Regression(2469/4999): loss=0.5422093963627963
Log Regression(2470/4999): loss=0.5416064983377309
Log Regression(2471/4999): loss=0.545385251301042
Log Regression(2472/4999): loss=0.5420742127568524
Log Regression(2473/4999): loss=0.5414808824573022
Log Regression(2474/4999): loss=0.54178847629655
Log Regression(2475/4999): loss=0.5419507042274084
Log Regression(2476/4999): loss=0.541

Log Regression(2619/4999): loss=0.540989666565783
Log Regression(2620/4999): loss=0.5422335815876052
Log Regression(2621/4999): loss=0.5410180663084186
Log Regression(2622/4999): loss=0.5409627975218174
Log Regression(2623/4999): loss=0.5412385048122352
Log Regression(2624/4999): loss=0.5434920923449017
Log Regression(2625/4999): loss=0.5429147934294652
Log Regression(2626/4999): loss=0.5427263540408982
Log Regression(2627/4999): loss=0.5433853899403601
Log Regression(2628/4999): loss=0.5420678022270636
Log Regression(2629/4999): loss=0.5414796072795907
Log Regression(2630/4999): loss=0.5409583278013731
Log Regression(2631/4999): loss=0.5427492085667998
Log Regression(2632/4999): loss=0.5432301746406802
Log Regression(2633/4999): loss=0.5440205570555385
Log Regression(2634/4999): loss=0.5467466411502113
Log Regression(2635/4999): loss=0.5458727282102123
Log Regression(2636/4999): loss=0.54692783436325
Log Regression(2637/4999): loss=0.5409550067398021
Log Regression(2638/4999): loss=0.

Log Regression(2781/4999): loss=0.5410354357622843
Log Regression(2782/4999): loss=0.5417611685877636
Log Regression(2783/4999): loss=0.5421035967840091
Log Regression(2784/4999): loss=0.5410741318482946
Log Regression(2785/4999): loss=0.5417086786614896
Log Regression(2786/4999): loss=0.5409072791248728
Log Regression(2787/4999): loss=0.5415140644424142
Log Regression(2788/4999): loss=0.5423843122454137
Log Regression(2789/4999): loss=0.5420854529240714
Log Regression(2790/4999): loss=0.5421066255594812
Log Regression(2791/4999): loss=0.5410766925539341
Log Regression(2792/4999): loss=0.5476425450149572
Log Regression(2793/4999): loss=0.5426453726201322
Log Regression(2794/4999): loss=0.5480372266310725
Log Regression(2795/4999): loss=0.5416599915340509
Log Regression(2796/4999): loss=0.5426506964079605
Log Regression(2797/4999): loss=0.5436031803669535
Log Regression(2798/4999): loss=0.5473258317966038
Log Regression(2799/4999): loss=0.5416814381931363
Log Regression(2800/4999): loss

Log Regression(2943/4999): loss=0.541886191712387
Log Regression(2944/4999): loss=0.5415318908847851
Log Regression(2945/4999): loss=0.5431041044649894
Log Regression(2946/4999): loss=0.5433659391350115
Log Regression(2947/4999): loss=0.5469714107289719
Log Regression(2948/4999): loss=0.5431334442781163
Log Regression(2949/4999): loss=0.5416060389734687
Log Regression(2950/4999): loss=0.5461146448039937
Log Regression(2951/4999): loss=0.5441426401854158
Log Regression(2952/4999): loss=0.5470847882650426
Log Regression(2953/4999): loss=0.5453382909239147
Log Regression(2954/4999): loss=0.5428687580950444
Log Regression(2955/4999): loss=0.5473457923531133
Log Regression(2956/4999): loss=0.5420094112915178
Log Regression(2957/4999): loss=0.5412282657017718
Log Regression(2958/4999): loss=0.5414109579361516
Log Regression(2959/4999): loss=0.5416622919184623
Log Regression(2960/4999): loss=0.5414052576867299
Log Regression(2961/4999): loss=0.5412270119515549
Log Regression(2962/4999): loss=

Log Regression(3105/4999): loss=0.5424055367455085
Log Regression(3106/4999): loss=0.5416145039455004
Log Regression(3107/4999): loss=0.5406041417678296
Log Regression(3108/4999): loss=0.5443557698343895
Log Regression(3109/4999): loss=0.5422078586726077
Log Regression(3110/4999): loss=0.5451514818488947
Log Regression(3111/4999): loss=0.5420929041976432
Log Regression(3112/4999): loss=0.5437934289490344
Log Regression(3113/4999): loss=0.5417375396666924
Log Regression(3114/4999): loss=0.5417636002521812
Log Regression(3115/4999): loss=0.542653891609096
Log Regression(3116/4999): loss=0.5443322383201395
Log Regression(3117/4999): loss=0.5432128592603941
Log Regression(3118/4999): loss=0.5448979872134242
Log Regression(3119/4999): loss=0.5460585842291205
Log Regression(3120/4999): loss=0.5504546286018792
Log Regression(3121/4999): loss=0.5458852213093288
Log Regression(3122/4999): loss=0.5444740681488272
Log Regression(3123/4999): loss=0.5409474707978015
Log Regression(3124/4999): loss=

Log Regression(3267/4999): loss=0.541404599522358
Log Regression(3268/4999): loss=0.5413373507959988
Log Regression(3269/4999): loss=0.5406678882455588
Log Regression(3270/4999): loss=0.5422666170654095
Log Regression(3271/4999): loss=0.5407185573043405
Log Regression(3272/4999): loss=0.541888151810015
Log Regression(3273/4999): loss=0.5449795846609946
Log Regression(3274/4999): loss=0.5417712608626252
Log Regression(3275/4999): loss=0.5481343693870495
Log Regression(3276/4999): loss=0.555394000283974
Log Regression(3277/4999): loss=0.5512230137686146
Log Regression(3278/4999): loss=0.5410702422382322
Log Regression(3279/4999): loss=0.5409589391510383
Log Regression(3280/4999): loss=0.5426247195813537
Log Regression(3281/4999): loss=0.5428692960563483
Log Regression(3282/4999): loss=0.5504095492108734
Log Regression(3283/4999): loss=0.541041973675185
Log Regression(3284/4999): loss=0.5411809994304259
Log Regression(3285/4999): loss=0.5406423920773125
Log Regression(3286/4999): loss=0.5

Log Regression(3429/4999): loss=0.5417656189176131
Log Regression(3430/4999): loss=0.5411259934437762
Log Regression(3431/4999): loss=0.5404751822498576
Log Regression(3432/4999): loss=0.5406163254043193
Log Regression(3433/4999): loss=0.5417860282355076
Log Regression(3434/4999): loss=0.5465506927454773
Log Regression(3435/4999): loss=0.542245729958542
Log Regression(3436/4999): loss=0.5418787335941746
Log Regression(3437/4999): loss=0.5432486472960247
Log Regression(3438/4999): loss=0.5404918881452714
Log Regression(3439/4999): loss=0.5405174921391179
Log Regression(3440/4999): loss=0.5405334079390696
Log Regression(3441/4999): loss=0.5422904204614665
Log Regression(3442/4999): loss=0.5422045057161786
Log Regression(3443/4999): loss=0.541839931696302
Log Regression(3444/4999): loss=0.5418115846102481
Log Regression(3445/4999): loss=0.5459730150976578
Log Regression(3446/4999): loss=0.5407761589473179
Log Regression(3447/4999): loss=0.5414771214422657
Log Regression(3448/4999): loss=0

Log Regression(3591/4999): loss=0.5422597336641652
Log Regression(3592/4999): loss=0.5407245256309421
Log Regression(3593/4999): loss=0.5433254382286402
Log Regression(3594/4999): loss=0.541470157249919
Log Regression(3595/4999): loss=0.540844117553087
Log Regression(3596/4999): loss=0.5430197217192483
Log Regression(3597/4999): loss=0.5420228159276735
Log Regression(3598/4999): loss=0.5403885045440635
Log Regression(3599/4999): loss=0.5420565860392477
Log Regression(3600/4999): loss=0.5419850962982979
Log Regression(3601/4999): loss=0.5406936157047024
Log Regression(3602/4999): loss=0.5409637546274362
Log Regression(3603/4999): loss=0.5504972571988689
Log Regression(3604/4999): loss=0.5514226766305623
Log Regression(3605/4999): loss=0.5442794184358717
Log Regression(3606/4999): loss=0.5418054199227614
Log Regression(3607/4999): loss=0.5402857698113596
Log Regression(3608/4999): loss=0.5409042463215341
Log Regression(3609/4999): loss=0.5522258032911214
Log Regression(3610/4999): loss=0

Log Regression(3753/4999): loss=0.5408147033793915
Log Regression(3754/4999): loss=0.5403873001271778
Log Regression(3755/4999): loss=0.5403673388483335
Log Regression(3756/4999): loss=0.5405031318286986
Log Regression(3757/4999): loss=0.5414460007493471
Log Regression(3758/4999): loss=0.541424119311992
Log Regression(3759/4999): loss=0.5431876037306046
Log Regression(3760/4999): loss=0.5420842325005896
Log Regression(3761/4999): loss=0.5405866293415367
Log Regression(3762/4999): loss=0.5413259618894486
Log Regression(3763/4999): loss=0.5488725519486036
Log Regression(3764/4999): loss=0.5459520196949205
Log Regression(3765/4999): loss=0.5496023471955069
Log Regression(3766/4999): loss=0.5432380022536774
Log Regression(3767/4999): loss=0.5414576686083515
Log Regression(3768/4999): loss=0.5423323279083615
Log Regression(3769/4999): loss=0.5403578023982488
Log Regression(3770/4999): loss=0.5417621334222997
Log Regression(3771/4999): loss=0.5407856325984257
Log Regression(3772/4999): loss=

Log Regression(3915/4999): loss=0.5403676637859233
Log Regression(3916/4999): loss=0.5414845103467015
Log Regression(3917/4999): loss=0.5413026841730532
Log Regression(3918/4999): loss=0.5402321696286508
Log Regression(3919/4999): loss=0.5405971714519896
Log Regression(3920/4999): loss=0.5402588902293377
Log Regression(3921/4999): loss=0.5403110117586455
Log Regression(3922/4999): loss=0.542361800743062
Log Regression(3923/4999): loss=0.5431652028452253
Log Regression(3924/4999): loss=0.5412260342724589
Log Regression(3925/4999): loss=0.5406525627377795
Log Regression(3926/4999): loss=0.5403976759254189
Log Regression(3927/4999): loss=0.5402824446775532
Log Regression(3928/4999): loss=0.5406741185464622
Log Regression(3929/4999): loss=0.5404156131162352
Log Regression(3930/4999): loss=0.5407166034596728
Log Regression(3931/4999): loss=0.5406912182302103
Log Regression(3932/4999): loss=0.540240537377083
Log Regression(3933/4999): loss=0.5436029884970461
Log Regression(3934/4999): loss=0

Log Regression(4077/4999): loss=0.5436047397649634
Log Regression(4078/4999): loss=0.5440063052743882
Log Regression(4079/4999): loss=0.5417719183955774
Log Regression(4080/4999): loss=0.5422943208237584
Log Regression(4081/4999): loss=0.54038954200668
Log Regression(4082/4999): loss=0.5402263980934636
Log Regression(4083/4999): loss=0.5404925097746157
Log Regression(4084/4999): loss=0.540381135857016
Log Regression(4085/4999): loss=0.5401466540256984
Log Regression(4086/4999): loss=0.5408755421637306
Log Regression(4087/4999): loss=0.543691845133258
Log Regression(4088/4999): loss=0.5431286895231132
Log Regression(4089/4999): loss=0.5403083751068783
Log Regression(4090/4999): loss=0.5412275571718198
Log Regression(4091/4999): loss=0.5405839627812927
Log Regression(4092/4999): loss=0.5438672819172329
Log Regression(4093/4999): loss=0.5401466014968441
Log Regression(4094/4999): loss=0.5418458521488698
Log Regression(4095/4999): loss=0.5404051057938799
Log Regression(4096/4999): loss=0.5

Log Regression(4239/4999): loss=0.5432633105496097
Log Regression(4240/4999): loss=0.5411663317290543
Log Regression(4241/4999): loss=0.5402656710162611
Log Regression(4242/4999): loss=0.5403694469669886
Log Regression(4243/4999): loss=0.5401963860286627
Log Regression(4244/4999): loss=0.5403161765040443
Log Regression(4245/4999): loss=0.540564806361201
Log Regression(4246/4999): loss=0.5472805585940311
Log Regression(4247/4999): loss=0.5409449880624291
Log Regression(4248/4999): loss=0.5404476172705778
Log Regression(4249/4999): loss=0.5424110628620782
Log Regression(4250/4999): loss=0.5432343789380197
Log Regression(4251/4999): loss=0.541915735282912
Log Regression(4252/4999): loss=0.5466250845128692
Log Regression(4253/4999): loss=0.546240706787495
Log Regression(4254/4999): loss=0.5437338861117359
Log Regression(4255/4999): loss=0.5405178915093877
Log Regression(4256/4999): loss=0.5402395585316417
Log Regression(4257/4999): loss=0.5408505774592526
Log Regression(4258/4999): loss=0.

Log Regression(4401/4999): loss=0.5435149366527687
Log Regression(4402/4999): loss=0.5411132035897978
Log Regression(4403/4999): loss=0.5400187352534896
Log Regression(4404/4999): loss=0.5402848061882752
Log Regression(4405/4999): loss=0.5408038509782724
Log Regression(4406/4999): loss=0.5404226879251399
Log Regression(4407/4999): loss=0.5470928898165939
Log Regression(4408/4999): loss=0.552989055908269
Log Regression(4409/4999): loss=0.5411113899209191
Log Regression(4410/4999): loss=0.5440556739145249
Log Regression(4411/4999): loss=0.5466483054176965
Log Regression(4412/4999): loss=0.5446476615443084
Log Regression(4413/4999): loss=0.5530484530185766
Log Regression(4414/4999): loss=0.5511393964978037
Log Regression(4415/4999): loss=0.5422491924422568
Log Regression(4416/4999): loss=0.5400574622932551
Log Regression(4417/4999): loss=0.5406830544589298
Log Regression(4418/4999): loss=0.5426852232374301
Log Regression(4419/4999): loss=0.5417247444340305
Log Regression(4420/4999): loss=

Log Regression(4563/4999): loss=0.5401155364593095
Log Regression(4564/4999): loss=0.5404158436511216
Log Regression(4565/4999): loss=0.5401370992786341
Log Regression(4566/4999): loss=0.5418051084245501
Log Regression(4567/4999): loss=0.5434381889464661
Log Regression(4568/4999): loss=0.5415953044796102
Log Regression(4569/4999): loss=0.5419540331895129
Log Regression(4570/4999): loss=0.5400009747208119
Log Regression(4571/4999): loss=0.5403880484313079
Log Regression(4572/4999): loss=0.5400517322997024
Log Regression(4573/4999): loss=0.5422385505781123
Log Regression(4574/4999): loss=0.5420370811747895
Log Regression(4575/4999): loss=0.5447992782402019
Log Regression(4576/4999): loss=0.5406134526867618
Log Regression(4577/4999): loss=0.5401029497843226
Log Regression(4578/4999): loss=0.5410229556809029
Log Regression(4579/4999): loss=0.5403581372215877
Log Regression(4580/4999): loss=0.5409605559812692
Log Regression(4581/4999): loss=0.5403084776154552
Log Regression(4582/4999): loss

Log Regression(4725/4999): loss=0.5429823624515151
Log Regression(4726/4999): loss=0.54393710426243
Log Regression(4727/4999): loss=0.5449477680614165
Log Regression(4728/4999): loss=0.5402682012594097
Log Regression(4729/4999): loss=0.5400283070980063
Log Regression(4730/4999): loss=0.5410051074733527
Log Regression(4731/4999): loss=0.5403186667105088
Log Regression(4732/4999): loss=0.5400251878623503
Log Regression(4733/4999): loss=0.5401649323342126
Log Regression(4734/4999): loss=0.5407325125193421
Log Regression(4735/4999): loss=0.5406477136683345
Log Regression(4736/4999): loss=0.5405385672747391
Log Regression(4737/4999): loss=0.5402045807536722
Log Regression(4738/4999): loss=0.5400974036805627
Log Regression(4739/4999): loss=0.5420112698713885
Log Regression(4740/4999): loss=0.5408615153305306
Log Regression(4741/4999): loss=0.540137208122748
Log Regression(4742/4999): loss=0.5403393880841589
Log Regression(4743/4999): loss=0.5404710318919508
Log Regression(4744/4999): loss=0.

Log Regression(4887/4999): loss=0.5402863618921366
Log Regression(4888/4999): loss=0.543441649674543
Log Regression(4889/4999): loss=0.5414767785044818
Log Regression(4890/4999): loss=0.5411180907993698
Log Regression(4891/4999): loss=0.5402551408374878
Log Regression(4892/4999): loss=0.5407124541725742
Log Regression(4893/4999): loss=0.5414900652126782
Log Regression(4894/4999): loss=0.5436570324355459
Log Regression(4895/4999): loss=0.5424281085296577
Log Regression(4896/4999): loss=0.5428855704832396
Log Regression(4897/4999): loss=0.5414727448341016
Log Regression(4898/4999): loss=0.5408970558928979
Log Regression(4899/4999): loss=0.5404657294434442
Log Regression(4900/4999): loss=0.5404423602719173
Log Regression(4901/4999): loss=0.5430837714792239
Log Regression(4902/4999): loss=0.5424359772281476
Log Regression(4903/4999): loss=0.5438356371110523
Log Regression(4904/4999): loss=0.5415968136364168
Log Regression(4905/4999): loss=0.5402320373328016
Log Regression(4906/4999): loss=

In [70]:
y_pred = predict_labels(w, tX_train)
np.mean(y_train.reshape(-1,1)==y_pred)

0.7297333333333333

In [71]:
y_pred = predict_labels(w, tX_val)
np.mean(y_val.reshape(-1,1)==y_pred)

0.73176

### lambda = 10

In [57]:
initial_w = np.zeros(tX.shape[1])#np.random.normal(loc=0, scale=1, size=(tX.shape[1],1))
max_iters = 5000
gamma = 1e-7
lambda_ = 10

Run the training process here

In [60]:
y_pred = predict_labels(w, tX_train)
np.mean(y_train.reshape(-1,1)==y_pred)

  


0.0

In [61]:
y_pred = predict_labels(w, tX_val)
np.mean(y_val.reshape(-1,1)==y_pred)

0.5781918208

### lambda = 0.1

In [31]:
initial_w = np.zeros(tX.shape[1])#np.random.normal(loc=0, scale=1, size=(tX.shape[1],1))
max_iters = 5000
gamma = 1e-7
lambda_ = 1

<Run the training step here>

Run the training process here

In [33]:
y_pred = predict_labels(w, tX_train)
np.mean(y_train.reshape(-1,1)==y_pred)

0.728385

In [45]:
y_pred = predict_labels(w, tX_val)
np.mean(y_val.reshape(-1,1)==y_pred)

0.7314

## After the minimal training, we try to do some extra data processing steps

We can begin with removing the outliers

In [10]:
from implementations import *

In [11]:
import data_processor as dp

In [12]:
tX_train_new, outlier_thresh = dp.remove_outliers(tX_train, conf_int=0.05)

In [13]:
# We use the same outlier threshold
tX_val_new, _ = dp.remove_outliers(tX_val, outlier_thresh=outlier_thresh)

In [14]:
tX_train_new.shape, tX_val_new.shape

((225000, 30), (25000, 30))

#### We add polynomial features to help with training

In [15]:
tX_train_aug, tX_val_aug = dp.poly_features(tX_train_new,3), dp.poly_features(tX_val_new,3)
tX_train_aug, tX_val_aug = dp.add_ones(tX_train_aug), dp.add_ones(tX_val_aug)

In [16]:
tX_train_aug.shape, tX_val_aug.shape

((225000, 91), (25000, 91))

#### Ridge Regression with polynomials, without outliers

In [17]:
w, mse = ridge_regression(y_train, tX_train_aug, lambda_=1)

In [18]:
y_pred = predict_labels(w, tX_train_aug)
print(y_pred)
print(y_pred.shape)
np.mean(y_train==y_pred)

[-1. -1.  1. ...  1. -1. -1.]
(225000,)


0.6872355555555556

In [19]:
y_pred = predict_labels(w, tX_val_aug)
print(y_pred)
print(y_pred.shape)
np.mean(y_val==y_pred)

[-1. -1. -1. ... -1.  1. -1.]
(25000,)


0.68652

Clearly removing outliers isn't doing a better job on the data. So we don't do that yet

#### We try to normalize the data, hoping that effect of certain features doesn't dull others'

In [81]:
from implementations import *
import data_processor as dp

In [82]:
tX_train_new, norm_stats = dp.standardize(tX_train_f)
# We use the same outlier threshold
tX_val_new, _ = dp.standardize(tX_val_f, norm_stats=norm_stats)

In [83]:
tX_train_new.shape, tX_val_new.shape

((225000, 30), (25000, 30))

#### We add polynomial features to help with training

In [84]:
tX_train_aug_poly, tX_val_aug_poly = dp.poly_features(tX_train_new,13), dp.poly_features(tX_val_new,13)
tX_train_aug_poly, norm_stats = dp.standardize(tX_train_aug_poly)
# We use the same outlier threshold
tX_val_aug_poly, _ = dp.standardize(tX_val_aug_poly, norm_stats=norm_stats)

tX_train_poly_std, tX_val_poly_std = dp.add_ones(tX_train_aug_poly), dp.add_ones(tX_val_aug_poly)

In [85]:
tX_train_poly_std.shape, tX_val_poly_std.shape

((225000, 391), (25000, 391))

#### Ridge Regression with polynomials, standardizing the data

In [86]:
ws_ridge_poly_std = []
for l in [1e-9,1e-8, 1e-7,1e-6,1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7]:
    w, mse = ridge_regression(y_train, tX_train_poly_std, lambda_=l)
    ws_ridge_poly_std.append(w)
    #print(w)
    y_pred = predict_labels(w, tX_train_poly_std)
    #print(y_pred)
    #print(y_pred.shape)
    print("Lambda="+str(l))
    print(np.mean(y_train==y_pred))
    y_pred = predict_labels(w, tX_val_poly_std)
    print(np.mean(y_val==y_pred))

Lambda=1e-09
0.09902222222222222
0.09748
Lambda=1e-08
0.09448888888888889
0.09488
Lambda=1e-07
0.09209777777777778
0.09164
Lambda=1e-06
0.08792888888888889
0.0876
Lambda=1e-05
0.08610222222222222
0.08576
Lambda=0.0001
0.08498222222222222
0.08484
Lambda=0.001
0.0836
0.08348
Lambda=0.01
0.08057777777777778
0.081
Lambda=0.1
0.07503555555555555
0.07524
Lambda=1
0.06924444444444444
0.07008
Lambda=10.0
0.06581333333333333
0.06644
Lambda=100.0
0.06420888888888888
0.0638
Lambda=1000.0
0.061013333333333336
0.06104
Lambda=10000.0
0.05280888888888889
0.05192
Lambda=100000.0
0.04538222222222222
0.04468
Lambda=1000000.0
0.03361777777777778
0.03188
Lambda=10000000.0
0.0016044444444444444
0.0012


So some good lambda values are 1e-4 to 1e4, each giving good range of normalization

#### We can try using logistic regression here

In [61]:
tX_train_aug_poly, tX_val_aug_poly = dp.poly_features(tX_train_new,4), dp.poly_features(tX_val_new,4)
tX_train_aug_poly, stats2 = dp.standardize(tX_train_aug_poly)
tX_val_aug_poly, _ = dp.standardize(tX_val_aug_poly, norm_stats=stats2)
tX_train_poly_std, tX_val_poly_std = dp.add_ones(tX_train_aug_poly), dp.add_ones(tX_val_aug_poly)

In [62]:
tX_train_poly_std.shape, tX_val_poly_std.shape

((225000, 121), (25000, 121))

#### Regulaized Logistic Regression with polynomials, standardizing the data

In [47]:
from implementations import *

In [138]:
initial_w = np.zeros((tX_train_poly_std.shape[1],1))
#initial_w = np.random.normal(loc=0, scale=0.0001, size=(tX_train_poly_std.shape[1],1))
max_iters = 300
gamma = 3e-4
lambda_ = 0

In [139]:
y_train_lr = 0.5*(y_train+1)
print(y_train_lr.shape)

(225000,)


In [140]:
w, mse = reg_logistic_regression(y_train_lr, tX_train_poly_std, lambda_, w, max_iters, gamma, batch_size=64, lr_decay=True, lr_decay_rate=0.9)

Log Regression(0/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(1/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(2/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(3/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(4/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(5/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(6/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(7/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(8/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(9/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(10/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(11/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(12/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(13/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(14/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(15/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(16/299): loss=nan log_reg_loss=nan 

Log Regression(138/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(139/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(140/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(141/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(142/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(143/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(144/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(145/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(146/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(147/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(148/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(149/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(150/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(151/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(152/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(153/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(154/299):

Log Regression(274/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(275/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(276/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(277/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(278/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(279/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(280/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(281/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(282/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(283/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(284/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(285/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(286/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(287/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(288/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(289/299): loss=nan log_reg_loss=nan reg_l=0.0
Log Regression(290/299):

In [141]:
y_pred1 = predict_labels(w, tX_train_poly_std)
print(np.mean(y_train.reshape(-1,1)==y_pred1))

0.7693111111111111


In [142]:
y_pred = predict_labels(w, tX_val_poly_std)
print(np.mean(y_val.reshape(-1,1)==y_pred))

0.77372


In [120]:
w_good = w.copy()

In [122]:
import pickle
with open('weights.pkl','wb') as f:
    pickle.dump(w_good, f)

In [143]:
w_wut = w.copy()
import pickle
with open('weights_naned.pkl','wb') as f:
    pickle.dump(w_wut, f)

## Do your thing crazy machine learning thing here :) ...

In [87]:
import gc
gc.collect()

2361

In [38]:
tX_train_f, norm=dp.standardize(tX_train_f)
tX_val_f, _=dp.standardize(tX_val_f, norm)

In [20]:
tX_train_int,int_list = dp.interaction_terms(tX_train_f)

In [21]:
tX_train_cap, int_list=dp.var_cap(tX_train_int, int_list)

(435,)


In [22]:
tX_train_int=dp.generate_interactions(tX_train_f,int_list[:150])

In [23]:
tX_val_int=dp.generate_interactions(tX_val_f,int_list[:150])

In [24]:
#y_train_int,int_list = dp.interaction_terms(y_train_f)
#y_val_int,int_list = dp.interaction_terms(y_val)

In [39]:

tX_train_poly=dp.poly_features(tX_train_f,5)
tX_val_poly=dp.poly_features(tX_val_f,5)
                           
tX_train_poly, norm=dp.standardize(tX_train_poly)
tX_val_poly,_=dp.standardize(tX_val_poly, norm)
tX_train_poly, tX_val_poly = dp.add_ones(tX_train_poly), dp.add_ones(tX_val_poly)

In [40]:
tX_val_poly.shape


(25000, 211)

In [41]:
y_train_l = 0.5*(y_train+1)
y_val_l=0.5*(y_val+1)


print(y_train_l)
print(y_val_l)

[0. 0. 1. ... 0. 1. 0.]
[1. 1. 0. ... 0. 0. 1.]


## Logistic Regresion Regularized

In [None]:
max_iters=1500
gamma= 1e-4

initial_w = np.zeros((tX_train_int.shape[1],1))
weights=[]
for l in [1e-8, 1e-7,1e-6,1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7]:
    print(l)
    ws, losses = reg_logistic_regression(y_train_l, tX_train_int, l, initial_w, max_iters, gamma, batch_size = 100)
    y_pred_train=predict_labels(ws[1499],tX_train_int)
    y_pred_val=predict_labels(ws[1499],tX_val_int)
    print(np.mean(y_train_l==y_pred_train.T))
    print(np.mean(y_val_l==y_pred_val.T))
    weights.append(ws)
    #y_pred_train = predict_labels(w, tX_train_f)
    #y_pred_val = predict_labels(w, tX_val_f)
    #print(str(l)+": MSE_TRAIN="+str(mse)+ " TRAIN="+str(np.mean(y_train==y_pred_train))+" VAL="+str(np.mean(y_val==y_pred_val)))

1e-08
Log Regression(0/1499): loss=0.6926725230091082
Log Regression(100/1499): loss=0.6646696115496871
Log Regression(200/1499): loss=0.6523643551717306
Log Regression(300/1499): loss=0.645099254928256
Log Regression(400/1499): loss=0.6406879637232095


In [96]:
y_pred_train=predict_labels(ws[999],tX_train_poly)
y_pred_val=predict_labels(ws[999],tX_val_poly)

print(y_train_l)
print(y_pred_train.T)
print(len(ws[1499]))
print(np.mean(y_train_l==y_pred_train.T))
print(np.mean(y_val_l==y_pred_val.T))


[1. 1. 0. ... 0. 1. 0.]
[[0. 0. 0. ... 0. 1. 0.]]
150
0.67228
0.67444


In [53]:
print(tX_val_poly.shape)

(25000, 301)


In [45]:
def plot(losses,ws,tX_val,y_val):
    # Plot two plots:
    # loss as a function of step
    # accuracy as a function of step  (on validation set)

    steps = [i for i in range(1,len(losses)+1)]

    ax1 = plt.plot(steps,losses)
    plt.title('Loss as a function of step')
    plt.xlabel('number of steps')
    plt.ylabel('loss (approximate)')
    plt.show()
    #compute accuracy
    accuracy = []
    for w in ws:
        y_pred = predict_labels(w,tX_val.T)
        accuracy.append(np.mean(y_val.reshape(-1,1)==y_pred))

        
    ax2 = plt.plot(steps,accuracy)
    plt.title('Accuracy as a function of step')
    plt.xlabel('number of steps')
    plt.ylabel('accuracy on validation set')
    plt.show()
print(tX_val_poly.shape)
print(ws[899].shape)
plot(losses,weights[1],tX_val_poly,y_val)

(25000, 211)
(751, 1)


IndexError: list index out of range

In [24]:
y_pred_train = np.dot(tX_train_f,l2[0])
y_pred_val = predict_labels(l2[0], tX_val_f)

print(str(np.mean(y_train==y_pred_train))+" VAL="+str(np.mean(y_val==y_pred_val)))

  after removing the cwd from sys.path.


0.0 VAL=0.66116


## Generate predictions and save ouput in csv format for submission:

In [18]:
DATA_TEST_PATH = '../data/test.csv' # TODO: download train data and supply path here 
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

In [21]:
import gc
gc.collect()


344

In [19]:
tX_test, _ = dp.fill_nan(tX_test, nan_value=-999, method='use_filler', filler=filler)

tX_test,int_list = dp.interaction_terms(tX_test)
tX_test=dp.add_ones(tX_test)



#tX_test_new, _ = dp.standardize(tX_test, norm_stats=norm_stats)
#tX_test_aug_poly = dp.poly_features(tX_test_new,7)
#tX_test_poly_std = dp.add_ones(tX_test_aug_poly)


In [22]:
tX_test=dp.poly_features(tX_test,3)


MemoryError: 

In [None]:
y_pred = predict_labels(l1[0], tX_test)

In [18]:
#tX_val_f, _ = dp.fill_nan(tX_test, nan_value=-999, method='use_filler', filler=filler)
#tX_val_int, int_list = dp.interaction_terms(tX_val_f)
#tX_val_int=dp.poly_features(tX_val_int, 5)
#y_pred = predict_labels(l1[1], tX_val_f)

MemoryError: 

In [74]:
OUTPUT_PATH = 'out.csv' # TODO: fill in desired name of output file for submission
#y_pred = predict_labels(weights, tX_test)
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)

In [28]:
##Logistic Regresion Regularized

## Logistic Regresion Regularized