# Choose the right Predicted FEV1

Damian's uses different varaibles to define Predicted FEV1 [here](https://tristantreb.github.io/master_thesis_CF_ML/Code/smartcare/populateDerivedColsInMLTables.html)
- Predicted FEV1: value from the clinical data
- FEV1SetAs = round(PredictedFEV1)
- CalcFEV1SetAs is different than PredictedFEV1 because it uses a corrected Age (floor(years(patientStudyStartDate - patientDOB))), instead of the age that was entered during the study.

Conslusion
- abs(Calc Age and Age) < 1.2 years

In [1]:
import patient_data
from biology import *

from dateutil.relativedelta import relativedelta
import plotly.graph_objects as go

def move_column_next_to(df, col_name, taget_col_name):
    idx = df.columns.get_loc(taget_col_name)
    df.insert(idx+1, col_name, df.pop(col_name))
    return df

In [2]:
df = patient_data.load(use_calc=False)


** Loading patient data **

* Dropping unnecessary columns from patient data *
Columns filtered: ['ID', 'Study Date', 'DOB', 'Age', 'Sex', 'Height', 'Weight', 'Predicted FEV1', 'FEV1 Set As']
Columns dropped: {'Comments', 'Sputum Samples', 'Less Exacerbation', 'Date Last PE Stop', 'GP Letter Sent', 'Remote Monitoring App User ID', 'Age 18 Years', 'Transplant Recipients', 'Unable Informed Consent', 'Genetic Testing', 'Study Email', 'Pulmonary Exacerbation', 'Date Last PE Start', 'Date Consent Obtained', 'CFQR Quest Comp', 'Unable Sputum Samples', 'Informed Consent', 'Telemetric Measures', 'Freezer Required', 'Study Number', 'Inconvenience Payment', 'Hospital'}

* Correcting patient data *
ID 60: Corrected height 60 from 1.63 to 163.0
ID 66: Corrected height for ID 66 from 1.62 to 162.0

* Applying data sanity checks *
Loaded patient data with 147 entries (147 initially)


  for idx, row in parser.parse():
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.Height.loc[df.ID == "60"] = tmp * 100


In [3]:
def get_years_decimal_delta(start_date, end_date):
    return relativedelta(end_date, start_date).years + relativedelta(end_date, start_date).months/12

df["Calc Age Exact"] = df.apply(lambda row: get_years_decimal_delta(row.DOB, row["Study Date"]), axis=1)
df["Calc Age"] = df.apply(lambda row: round(get_years_decimal_delta(row.DOB, row["Study Date"])), axis=1)
df["diff Age - Calc Age Exact"] = df.apply(lambda row: row.Age - row["Calc Age Exact"], axis=1)
move_column_next_to(df, "diff Age - Calc Age Exact", "Age")
move_column_next_to(df, "Calc Age Exact", "Age")
move_column_next_to(df, "Calc Age", "Age")

df[abs(df["diff Age - Calc Age Exact"]) > 1]

Unnamed: 0,ID,Study Date,DOB,Age,Calc Age,Calc Age Exact,diff Age - Calc Age Exact,Sex,Height,Weight,Predicted FEV1,FEV1 Set As
98,138,2015-08-06,1985-10-31,31,30,29.75,1.25,Male,172.0,66.6,4.06,4.0
100,80,2015-08-10,1994-08-16,22,21,20.916667,1.083333,Male,159.0,66.2,3.62,3.6
104,140,2015-09-11,1979-11-09,37,36,35.833333,1.166667,Female,162.0,58.4,2.92,2.9


In [8]:
# Replace Age with Calc Age
df["Age"] = df["Calc Age"]

df['Calc Predicted FEV1'] = df.apply(lambda x: calc_predicted_fev1(x), axis=1)

# Sort df by Predicted FEV1 Calc (L)
df.sort_values(by=['Predicted FEV1'], inplace=True)
# Use go.scatter to plot FEV1 Predicted in y with ID in x
fig = go.Figure(data=go.Scatter(x=df['ID'], y=df['Predicted FEV1'], name="Predicted FEV1", mode='markers', opacity=0.9))
# Add the same with FEV1 Set As with name "FEV1 Set As"
fig.add_trace(go.Scatter(x=df['ID'], y=df['FEV1 Set As'], name="FEV1 Set As", mode='markers', opacity=0.9))
# Add the same with FEV1 Predicted Calc (L) with name "Predicted FEV1 Calc (L)"
fig.add_trace(go.Scatter(x=df['ID'], y=df['Calc Predicted FEV1'], name="Calc Predicted FEV1", mode='markers', opacity=0.9))
fig.update_traces(marker=dict(size=5),
                  selector=dict(mode='markers'))  
fig.show()

# List idx where diff Predicted FEV1 and FEV1 Set As is > 0.1
df['diff Predicted FEV1 and FEV1 Set As'] = df['Predicted FEV1'] - df['FEV1 Set As']

df[df['diff Predicted FEV1 and FEV1 Set As'].abs() > 0.1].sort_values(by=['diff Predicted FEV1 and FEV1 Set As'], ascending=False)

Unnamed: 0,ID,Study Date,DOB,Age,Calc Age,Calc Age Exact,diff Age - Calc Age Exact,Sex,Height,Weight,Predicted FEV1,FEV1 Set As,Calc Predicted FEV1 (L),diff Predicted FEV1 and FEV1 Set As,Calc Predicted FEV1
6,152,2016-08-15,1980-05-23,36,36,36.166667,-0.166667,Male,175.0,73.9,4.2,1.3,3.991,2.9,3.991
12,172,2016-09-27,1991-06-18,25,25,25.25,-0.25,Female,159.0,51.4,3.14,1.13,3.0555,2.01,3.0555
8,153,2016-08-15,1980-12-14,36,36,35.666667,0.333333,Male,189.0,77.4,4.97,3.36,4.593,1.61,4.593
7,151,2016-07-27,1988-10-26,28,28,27.75,0.25,Female,150.6,69.1,2.75,1.2,2.6487,1.55,2.6487
9,169,2016-08-24,1993-11-11,23,23,22.75,0.25,Female,168.0,67.0,4.14,3.3,3.461,0.84,3.461
37,142,2016-02-09,1986-10-14,29,29,29.25,0.75,Male,173.0,73.0,4.81,4.1,4.108,0.71,4.108
10,170,2016-09-16,1994-03-10,22,22,22.5,-0.5,Male,171.0,75.0,4.28,3.61,4.225,0.67,4.225
13,173,2016-09-27,1980-01-17,37,37,36.666667,-0.666667,Female,161.0,61.6,3.03,2.59,2.8345,0.44,2.8345
108,93,2015-12-18,1985-01-27,31,31,30.833333,0.166667,Female,170.0,65.2,3.7,3.4,3.34,0.3,3.34


In [4]:
# Replace Age with Calc Age
df["Age"] = df["Calc Age"]

df['Calc Predicted FEV1'] = df.apply(lambda x: calc_predicted_fev1(x), axis=1)

# Sort df by Predicted FEV1 Calc (L)
df.sort_values(by=['Predicted FEV1'], inplace=True)
# Use go.scatter to plot FEV1 Predicted in y with ID in x
fig = go.Figure(data=go.Scatter(x=df['ID'], y=df['Predicted FEV1'], name="Predicted FEV1", mode='markers', opacity=0.9))
# Add the same with FEV1 Predicted Calc (L) with name "Predicted FEV1 Calc (L)"
fig.add_trace(go.Scatter(x=df['ID'], y=df['Calc Predicted FEV1'], name="Calc Predicted FEV1", mode='markers', opacity=0.9))
fig.update_traces(marker=dict(size=5),
                  selector=dict(mode='markers'))  
fig.show()

# List idx where diff Predicted FEV1 and FEV1 Set As is > 0.1
df['diff Predicted FEV1 and Calc Predicted FEV1'] = df['Predicted FEV1'] - df['Calc Predicted FEV1']

df[df['diff Predicted FEV1 and Calc Predicted FEV1'].abs() > 0.1].sort_values(by=['diff Predicted FEV1 and Calc Predicted FEV1'], ascending=False)

Unnamed: 0,ID,Study Date,DOB,Age,Calc Age,Calc Age Exact,diff Age - Calc Age Exact,Sex,Height,Weight,Predicted FEV1,FEV1 Set As,Calc Predicted FEV1,diff Predicted FEV1 and Calc Predicted FEV1
37,142,2016-02-09,1986-10-14,29,29,29.25,0.75,Male,173.0,73.0,4.81,4.1,4.108,0.702
9,169,2016-08-24,1993-11-11,23,23,22.75,0.25,Female,168.0,67.0,4.14,3.3,3.461,0.679
137,196,2016-08-02,1983-03-17,33,33,33.333333,0.666667,Female,173.5,77.4,4.06,4.1,3.42825,0.63175
54,57,2016-01-25,1985-10-24,30,30,30.25,-0.25,Female,167.7,70.2,3.8,3.8,3.27415,0.52585
8,153,2016-08-15,1980-12-14,36,36,35.666667,0.333333,Male,189.0,77.4,4.97,3.36,4.593,0.377
108,93,2015-12-18,1985-01-27,31,31,30.833333,0.166667,Female,170.0,65.2,3.7,3.4,3.34,0.36
20,180,2016-10-28,1980-11-21,36,36,35.916667,0.083333,Female,183.0,69.9,4.0,4.0,3.7285,0.2715
134,193,2016-07-22,1966-02-25,50,50,50.333333,0.666667,Female,162.5,69.3,2.81,2.8,2.56875,0.24125
6,152,2016-08-15,1980-05-23,36,36,36.166667,-0.166667,Male,175.0,73.9,4.2,1.3,3.991,0.209
141,200,2016-10-21,1987-02-27,30,30,29.583333,0.416667,Female,155.0,54.0,2.98,3.0,2.7725,0.2075
