In [2]:
import pandas as pd
import pymysql
import numpy as np
import matplotlib.pyplot as plt
import time

def load_dbscore_data():
    conn = pymysql.connect(host='localhost', user='root', password='1234', db='hw5')
    curs = conn.cursor(pymysql.cursors.DictCursor)
    
    sql = "select * from db_score"
    curs.execute(sql)
    
    data  = curs.fetchall()
    
    curs.close()
    conn.close()
    
    X = [ ( t['attendance'], t['homework'], t['final'] ) for t in data ]
    X = np.array(X)
    
    y = [ (t['score']) for t in data]
    y = np.array(y)

    return X, y

X, y = load_dbscore_data()

In [3]:
def column(matrix, i):
    return np.array([row[i] for row in matrix])

# y = ax1 + bx2 + cx3 + d

import statsmodels.api as sm
X_const = sm.add_constant(X)

model = sm.OLS(y, X_const)
ls = model.fit()

print(ls.summary())

ls_d = ls.params[0]

ls_a = ls.params[1]
ls_b = ls.params[2]
ls_c = ls.params[3]

a = 1.9348
b = 1.7178
c = 1.4972
d = -3.6583

X1 = column(X, 0)
X2 = column(X, 1)
X3 = column(X, 2)

y_pred = a*X1 + b*X2 + c*X3 + d
'''
plt.scatter(X, y) 
plt.plot([min(X), max(X)], [min(y_pred), max(y_pred)], color='red')
plt.show()
'''

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.825
Model:                            OLS   Adj. R-squared:                  0.819
Method:                 Least Squares   F-statistic:                     137.9
Date:                Thu, 04 Nov 2021   Prob (F-statistic):           3.76e-33
Time:                        10:26:48   Log-Likelihood:                -300.38
No. Observations:                  92   AIC:                             608.8
Df Residuals:                      88   BIC:                             618.8
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -3.6583     15.619     -0.234      0.8

"\nplt.scatter(X, y) \nplt.plot([min(X), max(X)], [min(y_pred), max(y_pred)], color='red')\nplt.show()\n"

In [4]:
def gradient_descent_naive(X, y): #벡터연산이 아니기 때문에 naive

    epochs = 100000
    min_grad = 0.0001
    learning_rate = 0.001
    
    a = 0.0
    b = 0.0
    c = 0.0
    d = 0.0
    
    n = len(y)
    
    #a, b, c, d의 초기값을 이렇게 정함
    a_grad = 0.0
    b_grad = 0.0
    c_grad = 0.0
    d_grad = 0.0
    
    for epoch in range(epochs):
        for i in range(n):
            y_pred = a * X1[i] + b*X2[i] + c*X3[i] + d
            a_grad += 2*(y_pred - y[i]) * X1[i]
            b_grad += 2*(y_pred - y[i]) * X2[i]
            c_grad += 2*(y_pred - y[i]) * X3[i]
            d_grad += 2*(y_pred - y[i])

        a_grad /= n
        b_grad /= n
        c_grad /= n
        d_grad /= n
        
        #a, b, c, d값 업데이트
        a = a - learning_rate * a_grad 
        b = b - learning_rate * b_grad 
        c = c - learning_rate * c_grad 
        d = d - learning_rate * d_grad
        
        if ( epoch % 1000 == 0) :
            print("epoch %d: a_grad=%f, b_grad=%f, c_grad=%f, d_grad=%f, a=%f, b=%f, c=%f, d=%f" 
                  %(epoch, a_grad, b_grad, c_grad, d_grad, a, b, c, d) )
        
        if ( abs(a_grad) < min_grad and abs(b_grad) < min_grad and abs(c_grad) < min_grad and abs(d_grad) < min_grad ):
            break
        
    return a, b, c, d

start_time = time.time()
a, b, c, d = gradient_descent_naive(X, y)
end_time = time.time()

print("%f seconds" %(end_time - start_time))

print("\n\nFinal:")
print("gdn_a=%f, gdn_b=%f, gdn_c=%f, gdn_d=%f" %(a, b, c, d))
print("ls_a=%f, ls_b=%f, ls_c=%f, ls_d=%f" %(ls_a, ls_b, ls_c, ls_d))


epoch 0: a_grad=-1003.713820, b_grad=-2385.090065, c_grad=-2010.578272, d_grad=-128.088261, a=1.003714, b=2.385090, c=2.010578, d=0.128088
epoch 1000: a_grad=-0.165676, b_grad=0.068802, c_grad=0.001524, d_grad=-0.008483, a=1.323536, b=1.766352, c=1.501352, d=0.159951
epoch 2000: a_grad=-0.026207, b_grad=0.010236, c_grad=0.000236, d_grad=0.010794, a=1.398460, b=1.735659, c=1.500666, d=0.155882
epoch 3000: a_grad=-0.005531, b_grad=0.001557, c_grad=0.000046, d_grad=0.013607, a=1.411214, b=1.731071, c=1.500554, d=0.143250
epoch 4000: a_grad=-0.002461, b_grad=0.000270, c_grad=0.000017, d_grad=0.013979, a=1.414749, b=1.730351, c=1.500527, d=0.129393
epoch 5000: a_grad=-0.002000, b_grad=0.000079, c_grad=0.000013, d_grad=0.013990, a=1.416911, b=1.730205, c=1.500512, d=0.115400
epoch 6000: a_grad=-0.001926, b_grad=0.000051, c_grad=0.000012, d_grad=0.013947, a=1.418864, b=1.730145, c=1.500500, d=0.101430
epoch 7000: a_grad=-0.001909, b_grad=0.000047, c_grad=0.000012, d_grad=0.013897, a=1.420779,

epoch 64000: a_grad=-0.001543, b_grad=0.000037, c_grad=0.000010, d_grad=0.011248, a=1.518748, b=1.727744, c=1.499862, d=-0.626484
epoch 65000: a_grad=-0.001538, b_grad=0.000037, c_grad=0.000010, d_grad=0.011207, a=1.520289, b=1.727707, c=1.499852, d=-0.637712
epoch 66000: a_grad=-0.001532, b_grad=0.000037, c_grad=0.000010, d_grad=0.011165, a=1.521823, b=1.727670, c=1.499842, d=-0.648898
epoch 67000: a_grad=-0.001526, b_grad=0.000037, c_grad=0.000010, d_grad=0.011124, a=1.523353, b=1.727634, c=1.499832, d=-0.660042
epoch 68000: a_grad=-0.001521, b_grad=0.000037, c_grad=0.000010, d_grad=0.011083, a=1.524876, b=1.727597, c=1.499823, d=-0.671145
epoch 69000: a_grad=-0.001515, b_grad=0.000036, c_grad=0.000010, d_grad=0.011042, a=1.526394, b=1.727561, c=1.499813, d=-0.682207
epoch 70000: a_grad=-0.001509, b_grad=0.000036, c_grad=0.000010, d_grad=0.011001, a=1.527906, b=1.727524, c=1.499803, d=-0.693229
epoch 71000: a_grad=-0.001504, b_grad=0.000036, c_grad=0.000010, d_grad=0.010960, a=1.5294

In [5]:
#이번엔 벡터화해서 연산 -> 속도가 더 빠름
def gradient_descent_vectorized(X, y):
    epochs = 100000
    min_grad = 0.0001
    learning_rate = 0.001
    
    a = 0.0
    b = 0.0
    c = 0.0
    d = 0.0
    
    n = len(y)
    
    #a, b, c, d의 초기값을 이렇게 정함
    a_grad = 0.0
    b_grad = 0.0
    c_grad = 0.0
    d_grad = 0.0

    for epoch in range(epochs):    
    
        y_pred = a*X1 + b*X2 + c*X3 + d #각 샘플에 따라 연산하는게 아닌 벡터로 한번에 연산 -> naive와의 가장 큰 차이!
        a_grad = (2 * (y_pred - y) * X1).sum()/n
        b_grad = (2 * (y_pred - y) * X2).sum()/n
        c_grad = (2 * (y_pred - y) * X3).sum()/n
        d_grad = (2 * (y_pred - y)).sum()/n
        
        a = a - learning_rate * a_grad   
        b = b - learning_rate * b_grad   
        c = c - learning_rate * c_grad   
        d = d - learning_rate * d_grad        

        if ( epoch % 1000 == 0):
            print("epoch %d: a_grad=%f, b_grad=%f, c_grad=%f, d_grad=%f, a=%f, b=%f, c=%f, d=%f" 
                  %(epoch, a_grad, b_grad, c_grad, d_grad, a, b, c, d) )
    
        if ( abs(a_grad) < min_grad and abs(b_grad) < min_grad and abs(c_grad) < min_grad and abs(d_grad) < min_grad ):
            break

    return a, b, c, d

start_time = time.time()
a, b, c, d = gradient_descent_vectorized(X, y)
end_time = time.time()

print("%f seconds" %(end_time - start_time))

print("\n\nFinal:")
print("gdv_a=%f, gdv_b=%f, gdv_c=%f, gdv_d=%f" %(a, b, c, d) )
print("ls_a=%f, ls_b=%f, ls_c=%f ls_d=%f" %(ls_a, ls_b, ls_c, ls_d) )


epoch 0: a_grad=-1003.713820, b_grad=-2385.090065, c_grad=-2010.578272, d_grad=-128.088261, a=1.003714, b=2.385090, c=2.010578, d=0.128088
epoch 1000: a_grad=-0.167274, b_grad=0.069481, c_grad=0.001539, d_grad=-0.008861, a=1.321713, b=1.767109, c=1.501369, d=0.159856
epoch 2000: a_grad=-0.026941, b_grad=0.010553, c_grad=0.000243, d_grad=0.010537, a=1.397878, b=1.735887, c=1.500671, d=0.156115
epoch 3000: a_grad=-0.005700, b_grad=0.001636, c_grad=0.000047, d_grad=0.013429, a=1.411030, b=1.731123, c=1.500555, d=0.143693
epoch 4000: a_grad=-0.002480, b_grad=0.000286, c_grad=0.000017, d_grad=0.013823, a=1.414641, b=1.730364, c=1.500527, d=0.130001
epoch 5000: a_grad=-0.001987, b_grad=0.000082, c_grad=0.000013, d_grad=0.013839, a=1.416802, b=1.730210, c=1.500513, d=0.116160
epoch 6000: a_grad=-0.001907, b_grad=0.000051, c_grad=0.000012, d_grad=0.013799, a=1.418738, b=1.730148, c=1.500500, d=0.102339
epoch 7000: a_grad=-0.001889, b_grad=0.000046, c_grad=0.000012, d_grad=0.013750, a=1.420634,

epoch 66000: a_grad=-0.001519, b_grad=0.000036, c_grad=0.000010, d_grad=0.011073, a=1.520723, b=1.727697, c=1.499849, d=-0.640877
epoch 67000: a_grad=-0.001514, b_grad=0.000036, c_grad=0.000010, d_grad=0.011033, a=1.522240, b=1.727660, c=1.499839, d=-0.651930
epoch 68000: a_grad=-0.001508, b_grad=0.000036, c_grad=0.000010, d_grad=0.010992, a=1.523751, b=1.727624, c=1.499830, d=-0.662942
epoch 69000: a_grad=-0.001503, b_grad=0.000036, c_grad=0.000010, d_grad=0.010952, a=1.525256, b=1.727588, c=1.499820, d=-0.673915
epoch 70000: a_grad=-0.001497, b_grad=0.000036, c_grad=0.000010, d_grad=0.010912, a=1.526756, b=1.727552, c=1.499811, d=-0.684847
epoch 71000: a_grad=-0.001492, b_grad=0.000036, c_grad=0.000010, d_grad=0.010872, a=1.528251, b=1.727516, c=1.499801, d=-0.695738
epoch 72000: a_grad=-0.001486, b_grad=0.000036, c_grad=0.000009, d_grad=0.010832, a=1.529740, b=1.727480, c=1.499791, d=-0.706590
epoch 73000: a_grad=-0.001481, b_grad=0.000036, c_grad=0.000009, d_grad=0.010792, a=1.5312