In [32]:
import pytest
import regression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss
import numpy as np

In [27]:
X_train, X_val, y_train, y_val = regression.utils.loadDataset(
    features=[
        'Penicillin V Potassium 500 MG',
        'Computed tomography of chest and abdomen',
        'Plain chest X-ray (procedure)',
        'Low Density Lipoprotein Cholesterol',
        'Creatinine',
        'AGE_DIAGNOSIS'
    ],
    split_percent=0.8,
    split_seed=42
)

# Scale the data, since values vary across feature. Note that we
# fit on the training data and use the same scaler for X_val.
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_val = sc.transform(X_val)

# For testing purposes, once you've added your code.
# CAUTION: hyperparameters have not been optimized.
log_model = regression.logreg.LogisticRegressor(num_feats=6, learning_rate=0.00001, tol=0.01, max_iter=10, batch_size=10)

X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
preds = log_model.make_prediction(X_val)[0:10]

# check that we get the expected number of predictions
#assert np.shape(preds) == np.shape(X_val)

In [19]:
np.shape(preds) == (10,)

True

In [28]:
preds

array([0.84940953, 0.51964634, 0.11676285, 0.83266918, 0.20462666,
       0.97990348, 0.30077071, 0.9744497 , 0.32878418, 0.57526369])

In [29]:
losses = log_model.loss_function(y_val, log_model.make_prediction(X_val))
losses

0.7260681224143479

In [34]:
y_val

array([1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0,
       0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1,
       0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0,
       0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0,

In [36]:
my_losses = log_model.loss_function(y_val, log_model.make_prediction(X_val))
sklearn_losses = log_loss(y_val, log_model.make_prediction(X_val))

sklearn_losses

0.7260681224143479

In [37]:
my_losses

0.7260681224143479

In [40]:
gradient = log_model.calculate_gradient(y_val, X_val)
gradient

array([-0.2784112 , -0.1937338 ,  0.01551079,  0.        ,  0.        ,
        0.06515538,  0.07522936])

In [43]:
log_model.W

array([-0.59419986,  0.10745435,  1.83399985, -0.95008541,  0.71772143,
        0.76705803,  0.38425104])

In [45]:
log_model.W != np.random.randn(6 + 1).flatten()

array([ True,  True,  True,  True,  True,  True,  True])