# Exercise 1 - Logistic regression

## Objective

In this exercise, you have to implement 4 different functions:
* `softmax`: compute the softmax of a vector. This function takes as input a tensor and outputs a discrete probability distribution. 

* `cross_entropy`: calculate the cross entropy loss given a vector of predictions (after softmax) and a vector of ground truth (one-hot vector).

* `model`: takes a batch of images (stack of images along the first dimensions) and feeds it through the logistic regression model

* `accuracy`: given a vector of predictions and a vector of ground truth, calculates the accuracy.

As always, you can run `python logistic.py` to check your implementation.

## Tips

You can leverage the `tf.boolean_mask` function to calculate the cross entropy. Keep in mind
that most elements of the ground truth vector are zeros.

In [3]:
import tensorflow as tf

In [7]:
tf.version

<module 'tensorflow._api.v2.version' from '/Users/ruiwang/0_work/0_projects/env/lib/python3.9/site-packages/tensorflow/_api/v2/version/__init__.py'>

In [14]:
logits = tf.constant([1.0, 2.0, 3.0])
logits

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([1., 2., 3.], dtype=float32)>

In [18]:
logits_exp = tf.math.exp(logits)
logits_exp

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 2.7182817,  7.389056 , 20.085537 ], dtype=float32)>

In [19]:
logits_sum = tf.math.reduce_sum(logits_exp)
logits_sum

<tf.Tensor: shape=(), dtype=float32, numpy=30.192875>

In [20]:
logits_exp / logits_sum

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0.09003057, 0.24472848, 0.66524094], dtype=float32)>

In [23]:
logits_sum = tf.math.reduce_sum(logits_exp, keepdims=True)
logits_sum

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([30.192875], dtype=float32)>

In [28]:
soft_logits = logits_exp / logits_sum
soft_logits

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0.09003057, 0.24472848, 0.66524094], dtype=float32)>

In [41]:
scaled_logits = tf.constant([[0.1, 0.9], [0.8, 0.2]])
scaled_logits

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.1, 0.9],
       [0.8, 0.2]], dtype=float32)>

In [52]:
one_hot = tf.constant([[0, 1], [1, 0]])
# one_hot = tf.constant([[0, 1], [1, 0]], dtype=tf.float32)
one_hot

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[0, 1],
       [1, 0]], dtype=int32)>

In [35]:
tf.math.log(scaled_logits)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-2.3025851 , -0.10536055],
       [-0.22314353, -1.609438  ]], dtype=float32)>

In [51]:
one_hot * tf.math.log(scaled_logits)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-0.        , -0.10536055],
       [-0.22314353, -0.        ]], dtype=float32)>

In [54]:
masked_prob = tf.boolean_mask(scaled_logits, one_hot)
masked_prob

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([0.9, 0.8], dtype=float32)>

In [55]:
tf.math.log(masked_prob)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([-0.10536055, -0.22314353], dtype=float32)>

In [59]:
-1 * tf.math.reduce_sum(tf.math.log(masked_prob), keepdims=True)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.3285041], dtype=float32)>

In [62]:
X = tf.random.uniform([5, 5, 3])
X

<tf.Tensor: shape=(5, 5, 3), dtype=float32, numpy=
array([[[0.5335065 , 0.7880422 , 0.78750813],
        [0.171098  , 0.86527383, 0.1821984 ],
        [0.12809324, 0.71805584, 0.66652536],
        [0.00696361, 0.7039901 , 0.76400745],
        [0.76803076, 0.9383755 , 0.9983665 ]],

       [[0.55184126, 0.24155247, 0.8808218 ],
        [0.7940211 , 0.06668591, 0.5435699 ],
        [0.6001693 , 0.953964  , 0.903463  ],
        [0.4600737 , 0.2969998 , 0.0644629 ],
        [0.12009048, 0.19613028, 0.9363071 ]],

       [[0.3580625 , 0.4970411 , 0.7616929 ],
        [0.6750002 , 0.94700336, 0.05755413],
        [0.76035655, 0.20780087, 0.05950677],
        [0.62907803, 0.0131346 , 0.3874601 ],
        [0.36050296, 0.0964973 , 0.85440767]],

       [[0.9351901 , 0.99108076, 0.8628657 ],
        [0.32033944, 0.43172634, 0.5711131 ],
        [0.02393043, 0.5481515 , 0.53499794],
        [0.17507732, 0.11159575, 0.21764553],
        [0.8927821 , 0.8420936 , 0.22883523]],

       [[0.84226024, 

In [93]:
# W = tf.random.normal([5*5*3, 4])
W = tf.Variable(tf.random.normal([5*5*3, 4]))
W

<tf.Variable 'Variable:0' shape=(75, 4) dtype=float32, numpy=
array([[-0.08957626, -0.84186465,  0.19052394,  0.5263986 ],
       [ 1.6550697 ,  0.5275133 , -0.10698907,  1.8569247 ],
       [-1.0891522 , -0.24796969, -0.75228065,  1.2265072 ],
       [ 0.32929093, -0.00876161,  0.52925104,  0.687278  ],
       [ 2.4256089 , -0.06495755,  0.11742056,  0.75189906],
       [-1.508461  , -1.4522682 ,  0.6368607 ,  1.7530802 ],
       [-0.02698978,  0.8169654 ,  1.4449905 , -1.8721476 ],
       [ 0.6815486 ,  0.22876506, -0.8755212 , -1.7186878 ],
       [-0.02988823, -1.8517512 ,  0.87642473, -2.5927796 ],
       [ 1.2027833 , -0.2753757 ,  0.35653493, -0.5339298 ],
       [-0.9168538 , -0.89809966,  1.3880072 ,  0.0411942 ],
       [-0.49547035,  1.1303303 , -0.43632892,  0.13824527],
       [-1.8153014 ,  1.4603785 , -0.6538977 , -0.2654129 ],
       [-0.37866098, -0.7783946 ,  0.6107067 ,  0.8847017 ],
       [-0.338855  ,  0.81892306,  0.44336495, -0.4139337 ],
       [ 0.72317064, -1

In [94]:
# b = tf.random.normal([1, 4])
b = tf.Variable(tf.random.normal([4]))
b

<tf.Variable 'Variable:0' shape=(4,) dtype=float32, numpy=array([ 0.87693435,  0.46967152, -1.519383  , -0.45698765], dtype=float32)>

In [95]:
X.shape, W.shape, W.shape[0]

(TensorShape([5, 5, 3]), TensorShape([75, 4]), 75)

In [96]:
tf.reshape(X, [-1, W.shape[0]])

<tf.Tensor: shape=(1, 75), dtype=float32, numpy=
array([[0.5335065 , 0.7880422 , 0.78750813, 0.171098  , 0.86527383,
        0.1821984 , 0.12809324, 0.71805584, 0.66652536, 0.00696361,
        0.7039901 , 0.76400745, 0.76803076, 0.9383755 , 0.9983665 ,
        0.55184126, 0.24155247, 0.8808218 , 0.7940211 , 0.06668591,
        0.5435699 , 0.6001693 , 0.953964  , 0.903463  , 0.4600737 ,
        0.2969998 , 0.0644629 , 0.12009048, 0.19613028, 0.9363071 ,
        0.3580625 , 0.4970411 , 0.7616929 , 0.6750002 , 0.94700336,
        0.05755413, 0.76035655, 0.20780087, 0.05950677, 0.62907803,
        0.0131346 , 0.3874601 , 0.36050296, 0.0964973 , 0.85440767,
        0.9351901 , 0.99108076, 0.8628657 , 0.32033944, 0.43172634,
        0.5711131 , 0.02393043, 0.5481515 , 0.53499794, 0.17507732,
        0.11159575, 0.21764553, 0.8927821 , 0.8420936 , 0.22883523,
        0.84226024, 0.4948367 , 0.6161331 , 0.6346066 , 0.12583041,
        0.5679556 , 0.44134998, 0.55585575, 0.46636605, 0.80558324,

In [97]:
tf.matmul(tf.reshape(X, [-1, W.shape[0]]), W)

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=
array([[ 0.1802184 , -3.5734706 ,  0.15305923,  8.220545  ]],
      dtype=float32)>

In [98]:
logits = tf.matmul(tf.reshape(X, [-1,  W.shape[0]]), W) + b
logits 

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[ 1.0571527, -3.103799 , -1.3663237,  7.763557 ]], dtype=float32)>

In [99]:
softmax(logits)

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=
array([[1.2214046e-03, 1.9045006e-05, 1.0823235e-04, 9.9865139e-01]],
      dtype=float32)>

In [100]:
y_hat = tf.constant([[0.1, 0.2, 0.7], [0.8, 0.1, 0.1]])
y_hat

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0.1, 0.2, 0.7],
       [0.8, 0.1, 0.1]], dtype=float32)>

In [122]:
Y = tf.constant([2, 1])
Y

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([2, 1], dtype=int32)>

In [123]:
pred_class = tf.cast(tf.argmax(y_hat, axis=1), Y.dtype)
pred_class

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([2, 0], dtype=int32)>

In [124]:
compare_result = tf.cast(pred_class == Y, tf.int32)
compare_result

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([1, 0], dtype=int32)>

In [126]:
tf.math.reduce_sum(compare_result) / Y.shape[0]

<tf.Tensor: shape=(), dtype=float64, numpy=0.5>

In [127]:
import tensorflow as tf

from utils import check_softmax, check_acc, check_model, check_ce

def softmax(logits):
    """
    softmax implementation
    args:
    - logits [tensor]: 1xN logits tensor
    returns:
    - soft_logits [tensor]: softmax of logits
    """
    # IMPLEMENT THIS FUNCTION
    logits_exp = tf.math.exp(logits)
    logits_sum = tf.math.reduce_sum(logits_exp, keepdims=True)
    soft_logits = logits_exp / logits_sum
    return soft_logits

def cross_entropy(scaled_logits, one_hot):
    """
    Cross entropy loss implementation
    args:
    - scaled_logits [tensor]: NxC tensor where N batch size / C number of classes
    - one_hot [tensor]: NxC one hot tensor
    returns:
    - loss [tensor]: cross entropy 
    """
    # IMPLEMENT THIS FUNCTION
    masked_prob = tf.boolean_mask(scaled_logits, one_hot)
    cross_entropy_val = -1 * tf.math.reduce_sum(tf.math.log(masked_prob), keepdims=True)
    return cross_entropy_val

def model(X, W, b):
    """
    logistic regression model
    args:
    - X [tensor]: input HxWx3
    - W [tensor]: weights
    - b [tensor]: bias
    returns:
    - output [tensor]
    """
    # IMPLEMENT THIS FUNCTION
    logits = tf.matmul(tf.reshape(X, [-1,  W.shape[0]]), W) + b
    output_probs = softmax(logits)
    return output_probs

def accuracy(y_hat, Y):
    """
    calculate accuracy
    args:
    - y_hat [tensor]: NxC tensor of models predictions
    - y [tensor]: N tensor of ground truth classes
    returns:
    - acc [tensor]: accuracy
    """
    # IMPLEMENT THIS FUNCTION
    pred_class = tf.cast(tf.argmax(y_hat, axis=1), Y.dtype)
    compare_result = tf.cast(pred_class == Y, tf.int32)
    acc = tf.math.reduce_sum(compare_result) / Y.shape[0]
    return acc

if __name__ == '__main__':
    # checking the softmax implementation
    check_softmax(softmax)

    # checking the NLL implementation
    check_ce(cross_entropy)

#     # check the model implementation
    check_model(model)

#     # check the accuracy implementation
    check_acc(accuracy)

Softmax implementation is correct!
CE implementation is correct!
Model implementation is correct!
Accuracy implementation is correct!
