This notebook is used to make a Logistic Regression instance.

First of all, import all the necessary modules

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from sklearn.model_selection import train_test_split

### Parameters

In [35]:
num_features = 784

learning_rate = 0.1
num_epochs = 400
batch_size = 2000

### Step 1: Preparing Data

Here is a MNIST dataset download from

https://www.kaggle.com/competitions/digit-recognizer/data?select=train.csv

In [36]:
df = pd.read_csv("mnist.csv")
 
df = df[(df["label"] == 1) | (df["label"] == 0)]
    
df.shape

(8816, 785)

### Step 2 : Seperate Data to X and y

In [37]:
X_train, X_test, y_train, y_test = train_test_split(df.drop("label", axis=1), df["label"], test_size=0.25)

### Step 3 : convert [0,255] to [0,1]

In [38]:
X_train, X_test = np.array(X_train, np.float32), np.array(X_test, np.float32)
y_train, y_test = np.array(y_train, np.float32), np.array(y_test, np.float32)

### Step 4 : Suffling Data and batch then by using tensorflow

In [39]:
def load_array(data_arrays, batch_size):
    return tf.data.Dataset.from_tensor_slices(data_arrays).shuffle(buffer_size=1000).batch(batch_size)

data_iter = load_array((X_train, y_train), batch_size)

### Step 5 : Defining the module and Loss function

In [40]:
# setting the module and Activation function

def sigmoid_module(X, w, b):  #@save
    """线性回归模型"""
    return tf.nn.sigmoid(tf.matmul(X, w) + b)

w = tf.Variable(tf.zeros([num_features,1]))
b = tf.Variable(tf.zeros(1))

# Setting Loss function
loss = tf.keras.losses.BinaryCrossentropy()

### Step 6 : Defining the Optimizer

In [41]:
def sgd(params, grads, lr, batch_size):  #@save
    """小批量随机梯度下降"""
    for param, grad in zip(params, grads):
        param.assign_sub(lr*grad/batch_size)

### Step 6 : Defining The M and Loss function

In [42]:
for epoch in range(num_epochs):
    # number of times we will trains this 
    for X, y in data_iter:
        # each batch
        with tf.GradientTape() as tape:
            y_pred = sigmoid_module(X, w, b)
            l = loss(y_pred, y)            
        dw, db = tape.gradient(l, [w, b])
        sgd([w, b], [dw, db], learning_rate, batch_size)
    l = loss(sigmoid_module(X_train, w, b), y_train)
    print(f'epoch {epoch + 1}, loss {l:f}')

epoch 1, loss 7.670411
epoch 2, loss 7.668716
epoch 3, loss 7.667484
epoch 4, loss 7.667148
epoch 5, loss 7.666644
epoch 6, loss 7.666422
epoch 7, loss 7.666276
epoch 8, loss 7.666114
epoch 9, loss 7.666080
epoch 10, loss 7.666067
epoch 11, loss 7.666062
epoch 12, loss 7.666053
epoch 13, loss 7.666042
epoch 14, loss 7.666025
epoch 15, loss 7.666000
epoch 16, loss 7.665970
epoch 17, loss 7.665913
epoch 18, loss 7.665837
epoch 19, loss 7.665744
epoch 20, loss 7.665665
epoch 21, loss 7.665704
epoch 22, loss 7.665602
epoch 23, loss 7.665580
epoch 24, loss 7.665284
epoch 25, loss 7.664994
epoch 26, loss 7.664987
epoch 27, loss 7.664961
epoch 28, loss 7.664938
epoch 29, loss 7.664911
epoch 30, loss 7.664999
epoch 31, loss 7.664937
epoch 32, loss 7.664876
epoch 33, loss 7.664692
epoch 34, loss 7.664554
epoch 35, loss 7.664512
epoch 36, loss 7.664443
epoch 37, loss 7.664429
epoch 38, loss 7.664411
epoch 39, loss 7.664360
epoch 40, loss 7.664359
epoch 41, loss 7.664454
epoch 42, loss 7.664406
e

epoch 335, loss 7.663632
epoch 336, loss 7.663632
epoch 337, loss 7.663631
epoch 338, loss 7.663632
epoch 339, loss 7.663631
epoch 340, loss 7.663631
epoch 341, loss 7.663631
epoch 342, loss 7.663631
epoch 343, loss 7.663631
epoch 344, loss 7.663631
epoch 345, loss 7.663631
epoch 346, loss 7.663631
epoch 347, loss 7.663632
epoch 348, loss 7.663632
epoch 349, loss 7.663632
epoch 350, loss 7.663631
epoch 351, loss 7.663631
epoch 352, loss 7.663632
epoch 353, loss 7.663631
epoch 354, loss 7.663631
epoch 355, loss 7.663631
epoch 356, loss 7.663631
epoch 357, loss 7.663632
epoch 358, loss 7.663632
epoch 359, loss 7.663632
epoch 360, loss 7.663631
epoch 361, loss 7.663632
epoch 362, loss 7.663631
epoch 363, loss 7.663631
epoch 364, loss 7.663632
epoch 365, loss 7.663632
epoch 366, loss 7.663631
epoch 367, loss 7.663631
epoch 368, loss 7.663631
epoch 369, loss 7.663631
epoch 370, loss 7.663632
epoch 371, loss 7.663631
epoch 372, loss 7.663631
epoch 373, loss 7.663631
epoch 374, loss 7.663631


In [43]:
def accuracy(y_pred, y_true):
    correct_prediction = tf.equal(np.around(y_pred), tf.cast(y_true, tf.int64)) # (256,) bool
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [44]:
print(accuracy(sigmoid_module(X_test, w, b), y_test))

tf.Tensor(0.5017869, shape=(), dtype=float32)
