# Implementation of the SCORE algorithm to PLIC-MILANO datase
## it will add a new column called 'score'

In [None]:
import pandas as pd
import os
import math

# defining functions for the SCORE algorithm

In [None]:
def step1(age,sex,chd=True):
    #CHD
    if (chd):            #constants for chd
        if (sex):              # 0 for men and 1 for women
            a=-29.8
            p = 6.36
        else:
            a=-22.1
            p = 4.71
        
    else:                #constants for non chd
        if (sex):
            a = -31.0
            p = 6.62
        else:

            a = -26.7
            p = 5.64
            
    #print("a =", a, "; p =",p)
   
    s = math.exp(-(math.exp(a))*(age-20)**p)
    return s

In [None]:
def step2(chol, SBP, smoker, chd=True):
    if chd:  
        c_smoker = 0.71
        c_chol = 0.24
        c_SBP = 0.018
    else:
        c_smoker = 0.63
        c_chol = 0.02
        c_SBP = 0.022
    
    w = (c_chol*(chol-6))+(c_SBP*(SBP-120))+(c_smoker*smoker)
    return w

In [None]:
def score_algorithm( age, chol, SBP, sex, smoker):
    #CHD 
    s = step1(age,sex)
    s10 = step1(age+10,sex)
    
    w = step2(chol, SBP, smoker)
    
    s=s**(math.exp(w))
    s10=s10**(math.exp(w))
    try:
        stot=s10/s
    except:
        stot=1              
    riskc = 1 -stot
    
    
    #NON CHD
    s = step1(age,sex,chd=False)
    s10 = step1(age+10,sex, chd=False)
  
    w = step2(chol, SBP, smoker, chd=False)
    
    s=s**(math.exp(w))
    s10=s10**(math.exp(w)) 
    try:
        stot=s10/s
    except:
        stot=1
    risknon = 1 -stot  
    
    
    #print ("risk CHD: ", riskc *100)
    #print ("risk nonCHD: " ,risknon * 100)
    risktot = 1 - (1-riskc) * (1-risknon)
    
    
    #print('total RISK:',risktot)
    return risktot

# Importing CSV file

In [None]:
#importing csv file
data_frame = pd.read_csv("PLIC-milano.csv",error_bad_lines=False, sep=';', low_memory=False)

In [None]:
#for i, el in enumerate(data_frame.columns):
    #print (i, el)

# processing of column 'smoking'

In [None]:
#function to control smoking column values as 'ex', 'other'.... and convert them to 'si
def check_smoking_column():
        for i,el in enumerate(data_frame['smoking']):
                if (not(el.startswith('si', 0,len(data_frame['smoking'][i])) or el.startswith('no', 0,len(data_frame['smoking'][i])))):
                    data_frame['smoking'][i]='si'
                    print(i, "fatto")

In [None]:
#function for the smoking column and changing every 'si' in 1 and every 'no' in 0
def bool_smoking_column():
        for i,el in enumerate(data_frame['smoking']):
            if (el=='si'):
                data_frame['smoking'][i]=1
                print(i, "fatto")
            elif (el=='no'):
                data_frame['smoking'][i]=0
                print(i, "fatto")

In [None]:
check_smoking_column()


In [None]:
bool_smoking_column()

In [None]:
data_frame['smoking']

# Add new column 

In [None]:
#adding a new column with 'default ' as value
data_frame['SCORE']='default'

In [None]:
column_list=['age','gender','smoking','total cholesterol','pas']
column_list.append('SCORE')
print(column_list)

In [None]:

data_frame[column_list].head()

In [None]:
#new panda frame with the column i want
score_frame=data_frame[column_list]

# Applying the SCORE algorithm for each row of the dataset

In [None]:

for index, row in score_frame.iterrows():
    row['total cholesterol']*=0.02586   #converting total cholesterol level from mg/dL to mmol/L
    
    try:
        row['SCORE']=score_algorithm(row['age'],row['total cholesterol'], row['pas'], row['gender'], row['smoking'])
    except:
        row['SCORE']=0               
    data_frame['SCORE'][index]=row['SCORE']*100      #from 0% to a 100%
    print (index, data_frame['SCORE'][index])
    

In [None]:
data_frame.head(100)

# Exporting the new DATASET

In [None]:
filename="./PLIC_MILANO_wSCORE.csv"

In [None]:
export_csv = data_frame.to_csv (filename, index = None, header=True) 

In [None]:
## read the new dataset

In [None]:
data_frame = pd.read_csv(filename,error_bad_lines=False, sep=',', low_memory=False)
data_frame[column_list][::-1]