# Project Part 2
This part of the project is to work on the biological age prediction specifically.

In [137]:
# importing module
from pandas import *
from statistics import mean
import numpy as np
from helper760_part2 import read_inputs

In [138]:
# Let's read all data and preprocess them
Clininc_Data,Outcome_Data,CT_Data = read_inputs()


In [139]:
n_samples = len(Clininc_Data[0])

# Average Life Expectancy in US in 2019 is 78.9 years. Therefore, we will use this as our average biological death age
# The general idea is that, if the Patient died, then we use ( 78.9 - death duration ) to compute biological age
# Otherwise, we follow a rough estimation model to compute biological age

ch_age = Clininc_Data[7]    # read patients's chronological age at CT
bio_age = []                # create a list to store biological age of the patients
for i in range(n_samples):
    # if the patient has died, we can obtain a rough estimate of biological age by subtract the death duration from 78.9, average life expectancy in the US
    if Outcome_Data[0][i] != 0:
        temp_bio_age = round(78.9 - float(Outcome_Data[0][i]/365.0),1)
        bio_age.append(temp_bio_age)
    else:
        diff = abs(ch_age[i] - 78.9)
        buff_range = 20
        weighted_sum = 0
        temp_bio_age = 0

        # if the age of the person is younger than 60, we consider them to be young 
        if ch_age[i] < 60:

            # BMI > 30
            if Clininc_Data[5][i] == 1:
                weighted_sum = weighted_sum + 0.06

            # FRS 10 year
            if Clininc_Data[10][i] > 0.2:
                weighted_sum = weighted_sum + 0.3
            elif Clininc_Data[10][i] > 0.1:
                weighted_sum = weighted_sum + 0.15
            
            # FRAX Fx Prob
            if Clininc_Data[11][i] > 30:
                weighted_sum = weighted_sum + 0.12
            elif Clininc_Data[11][i] > 10:
                weighted_sum = weighted_sum + 0.06
            
            # FRAX Fx Hip Prob
            if Clininc_Data[12][i] > 25:
                weighted_sum = weighted_sum + 0.06
            elif Clininc_Data[12][i] > 10:
                weighted_sum = weighted_sum + 0.03

            # Metabolism Syndrome
            if Clininc_Data[13][i] == 1:
                weighted_sum = weighted_sum + 0.06
            
            # Any cardiovascular problems CVD=stroke, Heart failure, MI=heart attack
            if Outcome_Data[1][i] == 1 or Outcome_Data[3][i] == 1 or Outcome_Data[5][i] == 1:
                weighted_sum = weighted_sum + 0.1

            # Diabetes
            if Outcome_Data[7][i] == 1:
                weighted_sum = weighted_sum + 0.05
            
            # Alzheimer
            if Outcome_Data[19][i] == 1:
                weighted_sum = weighted_sum + 0.05
            
            # Cancer
            if Outcome_Data[21][i] == 1:
                weighted_sum = weighted_sum + 0.2

            temp_bio_age = ch_age[i] + diff * weighted_sum

        elif ch_age[i] > 80:
            
            # BMI > 30
            if Clininc_Data[5][i] == 0:
                weighted_sum = weighted_sum + 0.06

            # FRS 10 year
            if Clininc_Data[10][i] < 0.1:
                weighted_sum = weighted_sum + 0.3
            elif Clininc_Data[10][i] < 0.2:
                weighted_sum = weighted_sum + 0.15
            
            # FRAX Fx Prob
            if Clininc_Data[11][i] < 10:
                weighted_sum = weighted_sum + 0.12
            elif Clininc_Data[11][i] < 30:
                weighted_sum = weighted_sum + 0.06
            
            # FRAX Fx Hip Prob
            if Clininc_Data[12][i] < 10:
                weighted_sum = weighted_sum + 0.06
            elif Clininc_Data[12][i] < 25:
                weighted_sum = weighted_sum + 0.03

            # Metabolism Syndrome
            if Clininc_Data[13][i] == 0:
                weighted_sum = weighted_sum + 0.06
            
            # Any cardiovascular problems CVD=stroke, Heart failure, MI=heart attack
            if Outcome_Data[1][i] == 0 and Outcome_Data[3][i] == 0 and Outcome_Data[5][i] == 0:
                weighted_sum = weighted_sum + 0.1

            # Diabetes
            if Outcome_Data[7][i] == 0:
                weighted_sum = weighted_sum + 0.05
            
            # Alzheimer
            if Outcome_Data[19][i] == 0:
                weighted_sum = weighted_sum + 0.05
            
            # Cancer
            if Outcome_Data[21][i] == 0:
                weighted_sum = weighted_sum + 0.2

            temp_bio_age = ch_age[i] - diff * weighted_sum

        # if the sample's age is close to US average life expectancy
        else: 

            # BMI > 30
            if Clininc_Data[5][i] == 0:
                weighted_sum = weighted_sum - 0.03
            else:
                weighted_sum = weighted_sum + 0.03

            # FRS 10 year
            if Clininc_Data[10][i] < 0.1:
                weighted_sum = weighted_sum - 0.15
            elif Clininc_Data[10][i] > 0.3:
                weighted_sum = weighted_sum + 0.15
            
            # FRAX Fx Prob
            if Clininc_Data[11][i] < 10:
                weighted_sum = weighted_sum - 0.06
            elif Clininc_Data[11][i] > 30:
                weighted_sum = weighted_sum + 0.06
            
            # FRAX Fx Hip Prob
            if Clininc_Data[12][i] < 10:
                weighted_sum = weighted_sum - 0.03
            elif Clininc_Data[12][i] > 25:
                weighted_sum = weighted_sum + 0.03

            # Metabolism Syndrome
            if Clininc_Data[13][i] == 1:
                weighted_sum = weighted_sum + 0.03
            else:
                weighted_sum = weighted_sum - 0.03
            
            # Any cardiovascular problems CVD=stroke, Heart failure, MI=heart attack
            if Outcome_Data[1][i] == 0 and Outcome_Data[3][i] == 0 and Outcome_Data[5][i] == 0:
                weighted_sum = weighted_sum - 0.05
            else:
                weighted_sum = weighted_sum + 0.05

            # Diabetes
            if Outcome_Data[7][i] == 0:
                weighted_sum = weighted_sum - 0.025
            else:
                weighted_sum = weighted_sum + 0.025
            
            # Alzheimer
            if Outcome_Data[19][i] == 0:
                weighted_sum = weighted_sum - 0.025
            else:
                weighted_sum = weighted_sum + 0.025
            
            # Cancer
            if Outcome_Data[21][i] == 0:
                weighted_sum = weighted_sum - 0.1
            else:
                weighted_sum = weighted_sum + 0.1

            temp_bio_age = ch_age[i] + buff_range * weighted_sum


        temp_bio_age = round(temp_bio_age,1)
        bio_age.append(temp_bio_age)

            



In [140]:
# Use the Least Square linear regression model to find theta and predict
import numpy as np
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import StandardScaler 

X = np.array(CT_Data)
X = X.T
X = X.astype(float)


y = np.array(bio_age)
y = y.T
y = y.astype(int)
y = y.reshape(-1, 1)

print(X.shape)
print(y.shape)

# Use least square to predict bio_age
theta = np.linalg.inv(X.T@X) @ X.T @ y

print(theta)

# Check regression fit
predicted_bio_age_LReg = []
for i in range(len(bio_age)):
    predicted_temp = X[i,:].T@theta
    predicted_bio_age_LReg.append(predicted_temp[0])
    predicted_bio_age_LReg[i] = round(predicted_bio_age_LReg[i],1)



(9223, 11)
(9223, 1)
[[ 2.35480514e-02]
 [-4.62416580e-02]
 [ 4.86024500e-02]
 [-8.28348089e-03]
 [ 2.25267479e-02]
 [ 2.23556288e+00]
 [-5.43205654e-02]
 [-4.21137481e-02]
 [ 2.54512844e-01]
 [ 1.67953314e-03]
 [ 4.51682949e-01]]


In [141]:
# Use the Least Square linear regression model to find theta and predict
model = Sequential()
model.add(Dense(16, input_dim=11, use_bias=True))
model.add(Dense(32, activation='relu',use_bias=True))
model.add(Dense(64, activation='relu',use_bias=True))
model.add(Dense(1))
opt = keras.optimizers.Adam(learning_rate=0.00005)
# compile the keras model
model.compile(loss='mean_squared_error', optimizer=opt)
# fit the keras model on the dataset
model.fit(X, y, epochs=800, batch_size=20,verbose=1)


predicted_bio_age_NNReg = []
test_y = model.predict(X)
for i in range((test_y.shape)[0]):
    temp_age = round(float(test_y[i,0]),1)
    predicted_bio_age_NNReg.append(temp_age)
    


Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 20/800
Epoch 21/800
Epoch 22/800
Epoch 23/800
Epoch 24/800
Epoch 25/800
Epoch 26/800
Epoch 27/800
Epoch 28/800
Epoch 29/800
Epoch 30/800
Epoch 31/800
Epoch 32/800
Epoch 33/800
Epoch 34/800
Epoch 35/800
Epoch 36/800
Epoch 37/800
Epoch 38/800
Epoch 39/800
Epoch 40/800
Epoch 41/800
Epoch 42/800
Epoch 43/800
Epoch 44/800
Epoch 45/800
Epoch 46/800
Epoch 47/800
Epoch 48/800
Epoch 49/800
Epoch 50/800
Epoch 51/800
Epoch 52/800
Epoch 53/800
Epoch 54/800
Epoch 55/800
Epoch 56/800
Epoch 57/800
Epoch 58/800
Epoch 59/800
Epoch 60/800
Epoch 61/800
Epoch 62/800
Epoch 63/800
Epoch 64/800
Epoch 65/800
Epoch 66/800
Epoch 67/800
Epoch 68/800
Epoch 69/800
Epoch 70/800
Epoch 71/800
Epoch 72/800
Epoch 73/800
Epoch 74/800
Epoch 75/800
Epoch 76/800
Epoch 77/800
Epoch 78

In [142]:
# output chronological age, biological age, predicted Least Square biological age, and predicted Neural Network biological age
f = open("ch_bio_age_comparison.csv", "w")
f.write("ch_age,bio_age,predicted_LS,predicted_NN\n")

for i in range(len(ch_age)):
    f.write("{},{},{},{}\n".format(ch_age[i], bio_age[i], predicted_bio_age_LReg[i],predicted_bio_age_NNReg[i]))

f.close()