In [57]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.optimize import fmin_tnc

In [58]:
data = pd.read_csv('train_data.csv', sep = ',')
data.head()

Unnamed: 0,T,V,P,RH,E
0,273.39,420.12,91.84,57.41,1778.12
1,195.26,248.88,92.29,35.21,1824.16
2,377.52,360.42,92.31,27.69,1761.72
3,171.6,244.98,91.61,56.99,1889.64
4,222.56,353.7,92.43,54.25,1806.68


In [61]:
normalized_df = (data-data.min())/(data.max()-data.min())
data = normalized_df

In [62]:
data.head()

Unnamed: 0,T,V,P,RH,E
0,0.55414,0.814666,0.435262,0.939856,0.280608
1,0.362739,0.294053,0.559229,0.441652,0.447178
2,0.809236,0.633163,0.564738,0.27289,0.221274
3,0.304777,0.282196,0.371901,0.930431,0.684081
4,0.429618,0.612733,0.597796,0.868941,0.383936


In [63]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

In [65]:
y[:5]

0    0.280608
1    0.447178
2    0.221274
3    0.684081
4    0.383936
Name: E, dtype: float64

In [66]:
X = np.c_[np.ones((X.shape[0], 1)), X]
y = y[:, np.newaxis]
theta = np.zeros((X.shape[1], 1))

In [67]:
y

array([[0.28060781],
       [0.447178  ],
       [0.22127352],
       ...,
       [0.28885673],
       [0.27916064],
       [0.56628075]])

In [68]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def net_input(theta, x):
    return np.dot(x, theta)

def probability(theta, x):
    return sigmoid(net_input(theta, x))

In [69]:
def cost_function(theta, x, y):
    m = x.shape[0]
    total_cost = -(1 / m) * np.sum(
        y * np.log(probability(theta, x)) + (1 - y) * np.log(
            1 - probability(theta, x)))
    return total_cost

def gradient(theta, x, y):
    m = x.shape[0]
    return (1 / m) * np.dot(x.T, sigmoid(net_input(theta,   x)) - y)

def fit(x, y, theta):
    opt_weights = fmin_tnc(func=cost_function, x0=theta,
                  fprime=gradient,args=(x, y.flatten()))
    return opt_weights[0]

In [75]:
def findsum(num):
    sum = 0
    for j in range (2000):
        hx = probability(theta, X[j])
        sum += (hx - y[j])*X[j][num]
    return sum

In [98]:
lr = 0.001
for i in range (1000):
    th0 = theta[0] - lr*findsum(0)
    th1 = theta[1] - lr*findsum(1)
    th2 = theta[2] - lr*findsum(2)
    th3 = theta[3] - lr*findsum(3)
    th4 = theta[4] - lr*findsum(4)
    theta[0] = th0
    theta[1] = th1
    theta[2] = th2
    theta[3] = th3
    theta[4] = th4

In [99]:
theta

array([[ 2.66424729],
       [-4.19537324],
       [-0.90830495],
       [ 0.14091965],
       [-0.7916716 ]])

In [100]:
parameters = theta.T.flatten()

In [101]:
theta.T.flatten()

array([ 2.66424729, -4.19537324, -0.90830495,  0.14091965, -0.7916716 ])

In [103]:
def predict(x):
    theta = parameters[:, np.newaxis]
    return probability(theta, x)
def accuracy(x, actual_classes, probab_threshold=0.5):
    predicted_classes = (predict(x) >= 
                         probab_threshold).astype(int)
    predicted_classes = predicted_classes.flatten()
    accuracy = np.mean(predicted_classes == actual_classes)
    return accuracy * 100
accuracy(X, y.flatten())

0.1