# Implementation of the SCORE algorithm to microbiota 345V5 dataset
## it will add a new column called 'score'

In [1]:
import pandas as pd
import os
import math

# defining functions for the SCORE algorithm

In [2]:
def step1(age,sex,chd=True):
    #CHD
    if (chd):            #constants for chd
        if (sex):              # 0 for men and 1 for women
            a=-29.8
            p = 6.36
        else:
            a=-22.1
            p = 4.71
        
    else:                #constants for non chd
        if (sex):
            a = -31.0
            p = 6.62
        else:

            a = -26.7
            p = 5.64
            
    #print("a =", a, "; p =",p)
   
    s = math.exp(-(math.exp(a))*((age-20)**p))
    return s

In [3]:
def step2(chol, SBP, smoker, chd=True):
    if chd:  
        c_smoker = 0.71
        c_chol = 0.24
        c_SBP = 0.018
    else:
        c_smoker = 0.63
        c_chol = 0.02
        c_SBP = 0.022
    
    w = (c_chol*(chol-6))+(c_SBP*(SBP-120))+(c_smoker*smoker)
    return w

In [4]:
def score_algorithm( age, chol, SBP, sex, smoker):
    #CHD 
    s = step1(age,sex)
    s10 = step1(age+10,sex)
    
    w = step2(chol, SBP, smoker)
    
    s=s**(math.exp(w))
    s10=s10**(math.exp(w))
    try:
        stot=s10/s
    except:
        stot=1              
    riskc = 1 -stot
    
    
    #NON CHD
    s = step1(age,sex,chd=False)
    s10 = step1(age+10,sex, chd=False)
  
    w = step2(chol, SBP, smoker, chd=False)
    
    s=s**(math.exp(w))
    s10=s10**(math.exp(w)) 
    try:
        stot=s10/s
    except:
        stot=1
    risknon = 1 -stot  
    
    
    #print ("risk CHD: ", riskc *100)
    #print ("risk nonCHD: " ,risknon * 100)
    risktot = 1 - (1-riskc) * (1-risknon)
    
    
    #print('total RISK:',risktot)
    return risktot

# Importing CSV file

In [6]:
#importing csv file
data_frame = pd.read_excel("./Data/microbiota_clean_wsex.xlsx")

In [None]:
data_frame.head(100)

In [7]:
data_frame['Sesso']

0      F
1      F
2      F
3      M
4      M
5      M
6      F
7      M
8      F
9      F
10     M
11     F
12     M
13     F
14     M
15     F
16     F
17     M
18     F
19     F
20     F
21     F
22     F
23     M
24     F
25     F
26     F
27     F
28     M
29     M
      ..
314    F
315    F
316    F
317    F
318    F
319    F
320    F
321    F
322    M
323    F
324    M
325    F
326    F
327    F
328    M
329    F
330    F
331    F
332    F
333    M
334    M
335    F
336    F
337    M
338    M
339    M
340    F
341    M
342    F
343    F
Name: Sesso, Length: 344, dtype: object

In [8]:
for i, el in enumerate(data_frame.columns):
    print (i, el)

0 Unnamed: 0
1 Codice_PLIC
2 Eta
3 Alcool
4 Fumo
5 Attivita_sportiva
6 Ipolipemizzanti
7 Antipertensivi
8 Ipoglicemizzanti
9 ALTEZZA_cm
10 PESO_kg
11 BMI
12 Vita_V
13 Fianchi_H
14 V_H
15 PAS
16 PAD
17 Col_tot
18 Col_HDL
19 Trigliceridi
20 Col_LDL
21 Apo_A1
22 Apo_B
23 Glicemia
24 Creatininemia
25 Uricemia
26 ALT
27 AST
28 GGT
29 CK
30 leucociti
31 eritrociti
32 emoglobina
33 ematocrito
34 vol_cell_medio
35 emoglobina_cell_medio
36 conc_hb_cell_medio
37 RDW_CV
38 piastrine
39 neutrofili
40 linfociti
41 monociti
42 eosinofili
43 basofili
44 CCA_IMTmedio
45 CCA_IMTmassimo
46 PLACCA
47 g_1
48 g_10
49 g_100
50 g_101
51 g_102
52 g_103
53 g_104
54 g_105
55 g_106
56 g_107
57 g_108
58 g_109
59 g_11
60 g_110
61 g_111
62 g_112
63 g_113
64 g_114
65 g_115
66 g_116
67 g_117
68 g_119
69 g_12
70 g_120
71 g_121
72 g_122
73 g_123
74 g_124
75 g_125
76 g_126
77 g_127
78 g_128
79 g_129
80 g_13
81 g_130
82 g_131
83 g_132
84 g_133
85 g_134
86 g_135
87 g_136
88 g_137
89 g_138
90 g_139
91 g_14
92 g_141
93 g_14

# processing of column 'Sesso'

In [9]:
for index, row in data_frame.iterrows():
    if (row['Sesso']=='F'):
        data_frame['Sesso'][index]=1
    elif (row['Sesso']=='M'):
        data_frame['Sesso'][index]=0
# 0 for men and 1 for female

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [None]:
for index, el in enumerate(data_frame.columns):
    print (index, el)

# Add new column 

In [10]:
#adding a new column with 'default ' as value
data_frame['SCORE']='default'

In [11]:
column_list=['Eta','Sesso','Fumo','Col_tot','PAS']
column_list.append('SCORE')
print(column_list)

['Eta', 'Sesso', 'Fumo', 'Col_tot', 'PAS', 'SCORE']


In [12]:

data_frame[column_list].head()

Unnamed: 0,Eta,Sesso,Fumo,Col_tot,PAS,SCORE
0,83.0,1,0.0,231.0,170.0,default
1,59.0,1,0.0,235.0,140.0,default
2,59.0,1,0.0,202.0,125.0,default
3,57.0,0,0.0,192.0,120.0,default
4,57.0,0,0.0,215.0,115.0,default


In [13]:
#new panda frame with the column i want
score_frame=data_frame[column_list]

In [14]:
score_frame['Col_tot']

0      231.0
1      235.0
2      202.0
3      192.0
4      215.0
5      222.0
6      210.0
7      240.0
8      212.0
9      243.0
10     193.0
11     214.0
12     177.0
13     204.0
14     185.0
15     203.0
16     187.0
17     176.0
18     181.0
19     197.0
20     186.0
21     154.0
22     183.0
23     208.0
24     157.0
25     205.0
26     213.0
27     164.0
28     179.0
29     262.0
       ...  
314    163.0
315    250.0
316    228.0
317    160.0
318    260.0
319    244.0
320    163.0
321    205.0
322    180.0
323    185.0
324    178.0
325    259.0
326    190.0
327    268.0
328    251.0
329    211.0
330    200.0
331    252.0
332    192.0
333    180.0
334    132.0
335    179.0
336    210.0
337    165.0
338    225.0
339    171.0
340    165.0
341    228.0
342      NaN
343      NaN
Name: Col_tot, Length: 344, dtype: float64

# Applying the SCORE algorithm for each row of the dataset

In [15]:
data_frame.shape

(344, 314)

In [16]:
data_frame['Col_tot'][340:]

340    165.0
341    228.0
342      NaN
343      NaN
Name: Col_tot, dtype: float64

In [18]:

for index, row in score_frame.iterrows():
    row['Col_tot']*=0.02586   #converting total cholesterol level from mg/dL to mmol/L
    row['SCORE']=score_algorithm(row['Eta'],row['Col_tot'], row['PAS'], row['Sesso'], row['Fumo'])
    data_frame['SCORE'][index]=row['SCORE']*100        
    print (index, row['Eta'],row['Col_tot'], row['PAS'], data_frame['SCORE'][index])
    

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


0 83.0 5.973660000000001 170.0 22.879006970687243
1 59.0 6.077100000000001 140.0 1.353693741270079
2 59.0 5.22372 125.0 0.8983453824518706
3 57.0 4.965120000000001 120.0 1.4842635988030595
4 57.0 5.5599 115.0 1.491918305705886
5 67.0 5.74092 120.0 3.898358266747304
6 60.0 5.4306 120.0 1.836433057853204
7 60.0 6.2064 115.0 2.1844951734130147
8 59.0 5.4823200000000005 115.0 0.761695281940955
9 79.0 6.283980000000001 160.0 14.683229147533638
10 67.0 4.99098 115.0 3.1445208029300398
11 86.0 5.53404 120.0 10.76877407997655
12 69.0 4.5772200000000005 130.0 4.5745463860679125
13 65.0 5.275440000000001 120.0 3.1540705035762784
14 63.0 4.7841000000000005 135.0 3.2575948549126266
15 68.0 5.24958 115.0 2.0063620429308204
16 78.0 4.83582 110.0 4.354457193536366
17 68.0 4.55136 130.0 4.231668702024816
18 70.0 4.6806600000000005 120.0 2.5202491625678336
19 73.0 5.09442 125.0 3.8855162429266565
20 82.0 4.80996 110.0 6.019057966427832
21 68.0 3.98244 120.0 1.9055933554112614
22 74.0 4.73238 130.0 4.50

197 74.0 4.965120000000001 135.0 5.102725805757579
198 48.0 5.870220000000001 120.0 0.6814615916618361
199 78.0 4.70652 120.0 5.2214847829238575
200 67.0 4.70652 140.0 2.78827344244178
201 77.0 5.068560000000001 125.0 5.520782552820691
202 74.0 3.4393800000000003 127.5 5.314501948655126
203 76.0 5.4823200000000005 120.0 6.981554448059146
204 76.0 5.32716 105.0 3.531055214113543
205 71.0 4.21518 125.0 2.9071413482353536
206 71.0 5.5599 162.5 7.111432650736893
207 69.0 6.90462 130.0 3.7195533937185887
208 81.0 4.86168 120.0 8.680834522218273
209 60.0 5.76678 130.0 1.2015721184546413
210 65.0 5.6892000000000005 122.5 1.8029002673343841
211 70.0 5.66334 120.0 2.8484573865746654
212 79.0 7.189080000000001 152.5 19.526266906415323
213 62.0 4.34448 117.5 1.9172441636884363
214 58.0 1.8102 130.0 1.249896792790528
215 62.0 4.44792 140.0 3.135805413603465
216 74.0 5.71506 122.5 4.377871770380004
217 67.0 3.95658 100.0 1.1463384988512293
218 69.0 4.6548 110.0 1.865098611118643
219 77.0 3.879 150.

In [19]:
data_frame.shape

(344, 314)

In [None]:
data_frame

# Exporting the new DATASET

In [20]:
filename="./microbiota_clean_wSCORE.xlsx"

In [21]:
export_csv = data_frame.to_excel (filename, index = None, header=True) 

In [None]:
## read the new dataset

In [None]:
data_frame = pd.read_csv(filename,error_bad_lines=False, sep=',', low_memory=False)
data_frame[column_list][::-1]