This notebook is used to make a Logistic Regression instance.

First of all, import all the necessary modules

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from sklearn.model_selection import train_test_split

### Parameters

In [3]:
num_features = 784

learning_rate = 0.1
num_epochs = 400
batch_size = 2000

### Step 1: Preparing Data

Here is a MNIST dataset download from

https://www.kaggle.com/competitions/digit-recognizer/data?select=train.csv

In [11]:
df = pd.read_csv("mnist.csv")
 
df = df[(df["label"] == 1) | (df["label"] == 0)]
    
df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Step 2 : Seperate Data to X and y

In [12]:
X_train, X_test, y_train, y_test = train_test_split(df.drop("label", axis=1), df["label"], test_size=0.25)

### Step 3 : convert [0,255] to [0,1]

In [13]:
X_train, X_test = np.array(X_train, np.float32), np.array(X_test, np.float32)
y_train, y_test = np.array(y_train, np.float32), np.array(y_test, np.float32)

### Step 4 : Suffling Data and batch then by using tensorflow

In [14]:
def load_array(data_arrays, batch_size):
    return tf.data.Dataset.from_tensor_slices(data_arrays).shuffle(buffer_size=1000).batch(batch_size)

data_iter = load_array((X_train, y_train), batch_size)

### Step 5 : Defining the module and Loss function

In [22]:
# setting the module and Activation function

def sigmoid_module(X, w, b):  #@save
    """线性回归模型"""
    return tf.nn.sigmoid(tf.matmul(X, w) + b)

w = tf.Variable(tf.zeros([num_features, 1]))
b = tf.Variable(tf.zeros(1))

# Setting Loss function
loss = tf.keras.losses.BinaryCrossentropy()

### Step 6 : Defining the Optimizer

In [25]:
def sgd(params, grads, lr, batch_size):  #@save
    """小批量随机梯度下降"""
    for param, grad in zip(params, grads):
        param.assign_sub(lr*grad/batch_size)

### Step 7 : Run!

In [26]:
for epoch in range(num_epochs):
    # number of times we will trains this 
    for X, y in data_iter:
        # each batch
        with tf.GradientTape() as tape:
            y_pred = sigmoid_module(X, w, b)
            l = loss(y_pred, y)            
        dw, db = tape.gradient(l, [w, b])
        sgd([w, b], [dw, db], learning_rate, batch_size)
    l = loss(sigmoid_module(X_train, w, b), y_train)
    print(f'epoch {epoch + 1}, loss {l:f}')

epoch 1, loss 7.671878
epoch 2, loss 7.671756
epoch 3, loss 7.671586
epoch 4, loss 7.671362
epoch 5, loss 7.671185
epoch 6, loss 7.671130
epoch 7, loss 7.671044
epoch 8, loss 7.670995
epoch 9, loss 7.670968
epoch 10, loss 7.670907
epoch 11, loss 7.670854
epoch 12, loss 7.670837
epoch 13, loss 7.670818
epoch 14, loss 7.670806
epoch 15, loss 7.670816
epoch 16, loss 7.670794
epoch 17, loss 7.670814
epoch 18, loss 7.670825
epoch 19, loss 7.670820
epoch 20, loss 7.670804
epoch 21, loss 7.670797
epoch 22, loss 7.670792
epoch 23, loss 7.670789
epoch 24, loss 7.670787
epoch 25, loss 7.670785
epoch 26, loss 7.670777
epoch 27, loss 7.670780
epoch 28, loss 7.670773
epoch 29, loss 7.670778
epoch 30, loss 7.670784
epoch 31, loss 7.670793
epoch 32, loss 7.670801
epoch 33, loss 7.670813
epoch 34, loss 7.670825
epoch 35, loss 7.670840
epoch 36, loss 7.670853
epoch 37, loss 7.670856
epoch 38, loss 7.670859
epoch 39, loss 7.670854
epoch 40, loss 7.670854
epoch 41, loss 7.670851
epoch 42, loss 7.670835
e

epoch 334, loss 7.668730
epoch 335, loss 7.668730
epoch 336, loss 7.668730
epoch 337, loss 7.668730
epoch 338, loss 7.668730
epoch 339, loss 7.668730
epoch 340, loss 7.668730
epoch 341, loss 7.668729
epoch 342, loss 7.668729
epoch 343, loss 7.668728
epoch 344, loss 7.668728
epoch 345, loss 7.668728
epoch 346, loss 7.668728
epoch 347, loss 7.668728
epoch 348, loss 7.668728
epoch 349, loss 7.668728
epoch 350, loss 7.668728
epoch 351, loss 7.668728
epoch 352, loss 7.668728
epoch 353, loss 7.668728
epoch 354, loss 7.668728
epoch 355, loss 7.668727
epoch 356, loss 7.668727
epoch 357, loss 7.668728
epoch 358, loss 7.668727
epoch 359, loss 7.668728
epoch 360, loss 7.668727
epoch 361, loss 7.668727
epoch 362, loss 7.668728
epoch 363, loss 7.668728
epoch 364, loss 7.668728
epoch 365, loss 7.668728
epoch 366, loss 7.668728
epoch 367, loss 7.668727
epoch 368, loss 7.668727
epoch 369, loss 7.668727
epoch 370, loss 7.668727
epoch 371, loss 7.668727
epoch 372, loss 7.668727
epoch 373, loss 7.668727


In [27]:
def accuracy(y_pred, y_true):
    correct_prediction = tf.equal(np.around(y_pred), tf.cast(y_true, tf.int64)) # (256,) bool
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [28]:
print(accuracy(sigmoid_module(X_test, w, b), y_test))

tf.Tensor(0.5034782, shape=(), dtype=float32)


# Citation
1. Digit Recogniser MLP Classifier for Beginners -- KEERTHANA
    
    https://www.kaggle.com/code/thatsme123/digit-recogniser-mlp-classifier-for-beginners/notebook
    

2. Numpy硬核手写Logistic回归 -- Ma Fei
    
    https://zhuanlan.zhihu.com/p/346717919
    
    
3. 线性回归的从零开始实现  -- DIVE INTO DEEP LEARNING

    https://zh.d2l.ai/chapter_linear-networks/linear-regression-scratch.html
    
    
4. TensorFlow（2）逻辑回归实现  --  驼驼皮裤
    
    https://zhuanlan.zhihu.com/p/374588671