In [20]:
#
# CISC 452
# Prediction of Blood Glusose Levels based on RTCGM Data
#
# November 10, 2016
#
# This script implements a multi-layer feed-forward neural network for glucose
# prediciton
#
# The network has 7 input nodes and 1 output node. If the current time is 'T',
# then the inputs and output represent the blood glucose measurements at the
# following times:
#   Inputs:     - T
#               - (T - 10 mins)
#               - (T - 20 mins)
#               - (T - 30 mins)
#               - (T - 40 mins)
#               - (T - 50 mins)
#               - (T - 60 mins)
#
#   Output:     - (T + 20 mins)
#

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import matplotlib.pyplot as plt
import math
import pandas as pd
import numpy as np

NUM_EPOCHS = 1500 # Number of training epochs

# readData reads data from the specified pre-processed input data file.
# The function returns an array of input data points and an array of the
# corresponding desired outputs.
def readData(filePath) :
    x_data = []
    y_data = []
    allList = []
    newPointx=[]
    newPointy=[]
    
    with open(filePath, 'r') as f:
        for line in f:
            allList.append(float(line))

    df = pd.Series(allList) #일차원 리스트를 pandas 데이터프레임화

    while True:
        for i in df[0:7]: 
            newPointx.append(float(i)) #데이터프레임 앞의 7개를 newPointx 리스트에 삽입
        newPointy.append(float(df[7])) #데이터프레임 그 다음(8번째)을 newPointy 리스트에 삽입

        x_data.append(newPointx) #x_data 리스트에 newPointx 리스트를 삽입 (x_data는 array of array가 됨)
        y_data.append(newPointy) #위와 동일

        newPointx=[] #다음 반복을 위해 newPointx,y를 빈 리스트로 초기화
        newPointy=[]
        df=df.shift(-1) #데이터프레임 왼 쪽으로 1칸 쉬프트
        if(math.isnan(df[7])): #만약 8번째 데이터프레임이 NaN(Not a Number) 라면 반복 중지
            break

    #배열 갯수 보정작업
    #데이터의 개수가 항상 8의 배수가 아니기 때문에 x_data의 마지막 원소 리스트가 항상 7개가 아닐수 있고
    #y_data의 마지막 원소 리스트가 항상 1개가 아닐 수 있기 때문에 
    #빈 칸들은 0으로 채워주기 위한 작업
    if(len(x_data[-1]) != 7):
       xSize = 7-len(x_data[-1])
       for i in range(xSize):
           x_data[-1].append(0.0)
    if(len(y_data[-1])!=1):
       y_data[-1].append(0.0)  
    
    data = [x_data, y_data]
    return data;

# evaluateNetwork runs the trained network on the the provided network and
# reports the following evaluation metrics:
#   - mean squared prediction error
#   - percentage of lows that were correctly identified
#   - percentage of highs that were corretly identified
#   - number of falsely reported lows
#   - number of falsely reported highs
#
# These metrics are defined as follows:
#   - MSE:
#       -> Average of (y_desired - y_actual)^2 for each test point
#   - Low prediction accuracy:
#       -> 100 * (Number of correct lows) / (Number of lows)
#       -> Lows are any blood glucose level less than 70 mg/dL
#   - High prediction accuracy:
#       -> 100 * (Number of correct highs) / (Number of highs)
#       -> Highs are any blood glucose level greater than 200
#   - Number of false lows:
#       -> Number of false lows where (y_desired - y_actual) > 6
#       -> Note: false alarms are not counted if the prediction error is small
#   - Number of false highs:
#       -> Number of false highs where (y_actual - y_desired) > 6
#       -> Note: false alarms are not counted if the prediciton error is small
def evaluateNetwork(session, inData, outData, prediction) :
    
    # Compute mse:
    mse = session.run(tf.reduce_mean(tf.square(prediction - y_desired)), feed_dict={x: inData, y_desired: outData})
    numTestPoints = len(inData)
    numPredictedLows = 0
    numLows = 0
    numFalseLows = 0
    numPredictedHighs = 0
    numHighs = 0
    numFalseHighs = 0
    for i, inputPoint in enumerate(inData) :
        # Apply network on current point:
        predicted = session.run(prediction, feed_dict={x: [inputPoint]})
        desired = outData[i][0]
        
        #print(predicted[0][0])
        print(desired)
        
        # Update numLows, numHighs:
        if(desired < 100) :
            numLows += 1
        elif(desired > 150) :
            numHighs += 1

        # Update prediction counts:
        if(predicted < 100) : # If predicted low
            if(desired < 100) : # If low prediction was correct
                numPredictedLows += 1
            elif((desired - predicted) > 8) : # If low prediction was incorrect and error was 'large'
                numFalseLows += 1
        elif(predicted > 150) : # If predicted high
            if(desired > 150) : # If high prediction was correct
                numPredictedHighs += 1
            elif((predicted - desired) > 8) : # If high prediction was incorrect and error was 'large'
                numFalseHighs += 1

    # Print results:
    print('Number of test points: ', numTestPoints)
    print('Number of lows: ', numLows)
    print('Number of highs: ', numHighs)
    print("Number of 'normal' points: ", numTestPoints - numLows - numHighs)
    print('') # New line
    print('MSE: ', mse)
    print('')
    print('Low prediction accuracy: ', 100 * numPredictedLows / numLows, '%')
    print('Number of false lows: ', numFalseLows)
    print('')
    print('High prediction accuracy: ', 100 * numPredictedHighs / numHighs, '%')
    print('Number of false highs: ', numFalseHighs)
    
# End evaluateNetwork(...)

x = tf.placeholder(tf.float32, [None, 7], name='x') # Input placeholder
y_desired = tf.placeholder(tf.float32, [None, 1], name='y_desired') # Desired output placeholder

# feedForwardNN describes the model of the feed forward neural network being
# used. The selected architecture consists of two hidden layers containing 15
# nodes each. All nodes employ a linear activation function.
def feedForwardNN(x) :
    # Weights from inputs to first hidden layer (15 nodes):
    Wh1 = tf.Variable(tf.random_uniform([7, 15], minval = -1, maxval = 1, dtype = tf.float32))
    # Bias for first hidden layer:
    bh1 = tf.Variable(tf.zeros([1, 15]))

    # Weights from first hidden layer to second (15 nodes):
    Wh2 = tf.Variable(tf.random_uniform([15, 15], minval = -1, maxval = 1, dtype = tf.float32)) # The weights from each of the 784 inputs to the 10 output nodes
    # Bias for second hidden layer:
    bh2 = tf.Variable(tf.zeros([1, 15])) # One bias input for each of the 10 output nodes

    # Weights from second hidden layer to output layer (1 node):
    Wo = tf.Variable(tf.random_uniform([15, 1], minval = -1, maxval = 1, dtype = tf.float32))
    # Bias to output node:
    bo = tf.Variable(tf.zeros([1, 1]))

    # Nodes have no output function (they simply output their activation):
    h1 = tf.add(tf.matmul(x, Wh1), bh1) # Hidden layer 1 output
    h2 = tf.add(tf.matmul(h1, Wh2), bh2) # Hidden layer 2 output
    output = tf.add(tf.matmul(h2, Wo), bo) # Network output

    return output

def trainFFNN(x):
    
    
    trainData_in, trainData_out = readData('85655_train.csv')
    testData_in, testData_out = readData('85655_test.csv')

    prediction = feedForwardNN(x)

    # Error function to be minimized is the mean square error:
    loss = tf.reduce_mean(tf.square(prediction - y_desired))

    # Define training algorithm (Adam Optimizer):
    # Note: AdamOptimizer produced better results than the GradientDescentOptimizer
    #train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    train_step = tf.train.AdamOptimizer(learning_rate=0.1).minimize(loss)

    # Train:
    errors = []
    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run()
    for i in range(NUM_EPOCHS): # 1000 training epochs
        ### Batch training was tested, but per-epoch produced better results:
        # Train with one batch at a time:
        #for start, end in zip(range(0, len(trainData_in), BATCH_SIZE), range(BATCH_SIZE, len(trainData_in), BATCH_SIZE)):
        #    sess.run(train_step, feed_dict={x: trainData_in[start:end], y_desired: trainData_out[start:end]})

        # Per-Epoch training:
        
        #print({x: trainData_in, y_desired: trainData_out})
        sess.run(train_step, feed_dict={x: trainData_in, y_desired: trainData_out})
        
        # Print MSE on test data after every 10 epochs
        # i % 10 == 0 :
        #    mse = sess.run(tf.reduce_mean(tf.square(prediction - y_desired)), feed_dict={x: testData_in, y_desired: testData_out})
        #    errors.append(mse)
        #    print(mse)

    # Output the desired and actual outputs for each test data point
    #for i, inputPoint in enumerate(testData_in) :
    #    output = sess.run(y, feed_dict={x: [inputPoint]})
    #    print('desired: ', testData_out[i], ', actual: ', output)

    # Test:
    print('Patient 174 data:')
    evaluateNetwork(sess, testData_in, testData_out, prediction)
# End trainFFNN(x)

trainFFNN(x)


path:85655_train.csv, x_data len: 680, y_data len: 680
path:85655_test.csv, x_data len: 165, y_data len: 165




Patient 174 data:
98.0
98.0
98.0
98.0
98.0
98.0
97.0
97.0
97.0
96.0
97.0
97.0
97.0
96.0
97.0
98.0
98.0
98.0
99.0
99.0
99.0
100.0
101.0
102.0
101.0
101.0
101.0
101.0
101.0
102.0
102.0
103.0
104.0
103.0
103.0
102.0
102.0
102.0
102.0
103.0
103.0
103.0
103.0
102.0
102.0
101.0
100.0
100.0
100.0
99.0
97.0
99.0
101.0
103.0
103.0
102.0
101.0
99.0
99.0
98.0
98.0
100.0
101.0
102.0
100.0
100.0
101.0
96.0
100.0
103.0
111.0
115.0
113.0
112.0
113.0
115.0
119.0
115.0
113.0
110.0
110.0
110.0
110.0
109.0
109.0
109.0
106.0
107.0
110.0
114.0
124.0
123.0
121.0
120.0
116.0
112.0
110.0
108.0
104.0
104.0
105.0
107.0
110.0
111.0
111.0
112.0
113.0
119.0
121.0
120.0
121.0
122.0
120.0
120.0
119.0
114.0
122.0
123.0
116.0
115.0
116.0
118.0
125.0
132.0
118.0
105.0
116.0
131.0
130.0
129.0
131.0
131.0
129.0
134.0
129.0
136.0
139.0
147.0
152.0
148.0
151.0
153.0
157.0
155.0
154.0
156.0
155.0
149.0
151.0
146.0
139.0
143.0
150.0
146.0
150.0
156.0
151.0
148.0
155.0
160.0
160.0
155.0
154.0
154.0
155.0
Number of test points