-
Notifications
You must be signed in to change notification settings - Fork 7
/
logistic_regression.py
112 lines (89 loc) · 3.47 KB
/
logistic_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import random
import numpy as np
from ..utils.base import BaseModel
from ..utils.func import sigmoid, judge
from ..utils.preprocessing import matrix_type_cast
class LogisticRegression(BaseModel):
"""Logistic regression model."""
def __init__(self, max_iter=1000, epslion=1e-6,
lr=1e-3, optimizer="gradient_descent",
batch=200):
"""
:@param max_iter: maximum number of iterations.
:type max_iter: int.
:@param epslion: if the distance between new weight and
old weight is less than epslion, the process
of traing will break.
:type epslion: float.
:@param lr: learning rate.
:type lr: float.
:@param optimizer: optional optimization algorithm.
:type optimizer: method in {gradient_descent, SGD},
default is gradient_descent method.
:@param batch: samples of SGD method randomly selected.
:type batch: int.
"""
self.max_iter = max_iter
self.epslion = epslion
self.lr = lr
self.optimizer = optimizer
self.batch = batch
@matrix_type_cast
def fit(self, X, y):
"""Via gradient descent training logistic
regression.
"""
if self.optimizer == "gradient_descent":
self.weight = self.gradient_descent(X, y)
else:
self.weight = self.SGD(X, y)
return self
def gradient_descent(self, X, y):
"""Get the weight parameters.
:@param X: features matrix.
:type X: the N x M dimension np.array or list.
:@param y: class label vector.
:type y: the N dimension np.array or list.
:return: the weight parameters.
:rtype: the N dimension np.array.
"""
X_ = np.c_[np.ones((X.shape[0], 1)), X]
weight = np.random.rand(X_.shape[1], 1)
while self.max_iter:
e_vec = sigmoid(X_ @ weight) - y.reshape(-1, 1)
_weight = weight - self.lr * X_.T @ e_vec
if np.linalg.norm(weight - _weight) < self.epslion:
return _weight
weight = _weight
self.max_iter -= 1
return weight
def SGD(self, X, y):
"""Via Stochastic gradient descent algorithm
get the weight parameters.
"""
if batch > X.shape[0]:
raise Exception("Batch greater than the X dimension!")
X_ = np.c_[np.ones((X.shape[0], 1)), X, y]
weight = np.random.rand(X_.shape[1] - 1, 1) # added a dimension, so subtract one.
index_list = list(range(X_.shape[0]))
while self.max_iter:
index = random.sample(index_list, self.batch)
batch_e_vec = sigmoid(X_[index, :-1] @ weight) - y[index]
_weight = weight - self.lr * X_[index, :-1].T @ batch_e_vec
if np.linalg.norm(weight - _weight) < self.epslion:
return _weight
weight = _weight
self.max_iter -= 1
return weight
@matrix_type_cast
def predict(self, X):
"""Predict class label.
:@param X: unlabeled features matrix.
:type X: the N x M dimension np.array or list.
:return: class label vector.
:rtype: vector and value in {0, 1}.
"""
if not hasattr(self, "weight"):
raise Exception("Please run `fit` before predict!")
X_ = np.c_[np.ones(X.shape[0]), X]
return judge(sigmoid(X_ @ self.weight))