In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import numpy as np
from scipy import stats
import seaborn as sns
from functools import reduce
pd.options.mode.chained_assignment = None 
from statistics import mean
from profile_generation import *
import warnings
warnings.simplefilter('ignore', np.RankWarning)
from scipy.optimize import curve_fit
import matplotlib.patches as patches
from scipy.optimize import curve_fit
from openpyxl import load_workbook
pd.set_option('display.max_rows', None)

In [3]:
input_file = 'Retrospective Liver Transplant Data.xlsx'
rows_to_skip = 17

# Get list of patients/sheet names
list_of_patients = get_sheet_names(input_file)

# Define lists
list_of_patient_df = []
list_of_cal_pred_df = []
list_of_result_df = []

patients_to_exclude_linear = []
patients_to_exclude_quad = []

for patient in list_of_patients:
        
    df = pd.read_excel(input_file, sheet_name=patient, skiprows=rows_to_skip)
    
    df = clean_data(df, patient)
    df = keep_ideal_data(df, patient, list_of_patient_df)
        
    # Choose and keep data for calibration and efficacy-driven dosing
    cal_pred_linear, patients_to_exclude_linear = cal_pred_data(df, patient, patients_to_exclude_linear, 1)
    cal_pred_quad, patients_to_exclude_quad = cal_pred_data(df, patient, patients_to_exclude_quad, 2)
    
    # Keep patient data with sufficient dose-response pairs and predictions
    cal_pred, list_of_cal_pred_df = keep_target_patients(patient, patients_to_exclude_linear, patients_to_exclude_quad, 
                                                     cal_pred_linear, cal_pred_quad, list_of_cal_pred_df)   
    
    if len(cal_pred) != 0:
    
        # Prepare dataframe for prediction
        # Create result DataFrame
        max_count_input = len(cal_pred_linear)

        col_names = ['patient', 'method', 'pred_day'] + \
                    ['fit_dose_' + str(i) for i in range(1, max_count_input + 1)] + \
                    ['fit_response_' + str(i) for i in range(1, max_count_input + 1)] + \
                    ['dose', 'response', 'prev_coeff_2x', 'prev_coeff_1x', 'prev_coeff_0x',\
                     'prev_deviation', 'coeff_2x', 'coeff_1x', 'coeff_0x', 'prediction', 'deviation',
                     'abs_deviation']
        result = pd.DataFrame(columns=col_names)

        if patient not in patients_to_exclude_linear:
            deg = 1
            list_of_result_df = Cum_wo_origin(deg, cal_pred_linear, result, 'L_Cum_wo_origin', list_of_result_df)
            list_of_result_df = Cum_origin_dp(deg, cal_pred_linear, result, 'L_Cum_origin_dp', list_of_result_df)
            list_of_result_df = PPM(deg, cal_pred_linear, result, 'L_PPM_wo_origin', list_of_result_df)
            list_of_result_df = PPM(deg, cal_pred_linear, result, 'L_PPM_origin_dp', list_of_result_df, 'origin_dp')
                    
        if patient not in patients_to_exclude_quad:
            deg = 2
            list_of_result_df = Cum_wo_origin(deg, cal_pred_quad, result, 'Q_Cum_wo_origin', list_of_result_df)
            list_of_result_df = Cum_origin_dp(deg, cal_pred_quad, result, 'Q_Cum_origin_dp', list_of_result_df)
            list_of_result_df = PPM(deg, cal_pred_quad, result, 'Q_PPM_wo_origin', list_of_result_df)
            list_of_result_df = PPM(deg, cal_pred_quad, result, 'Q_PPM_origin_dp', list_of_result_df)
        
# Print patients to exclude        
patients_to_exclude_linear = sorted(set(patients_to_exclude_linear))
patients_to_exclude_quad = sorted(set(patients_to_exclude_quad))
print(f"Patients to exclude for linear methods: {patients_to_exclude_linear}")
print(f"Patients to exclude for quad methods: {patients_to_exclude_quad}")

# Join dataframes from individual patients
df = pd.concat(list_of_patient_df)
df.reset_index(inplace=True, drop=True)

cal_pred = pd.concat(list_of_cal_pred_df)

result_df = pd.concat(list_of_result_df)
max_count_input = cal_pred[cal_pred['type']=='linear'].groupby('patient').count().max()['dose']
col_names = ['patient', 'method', 'pred_day'] + \
            ['fit_dose_' + str(i) for i in range(1, max_count_input + 1)] + \
            ['fit_response_' + str(i) for i in range(1, max_count_input + 1)] + \
            ['dose', 'response', 'prev_coeff_2x', 'prev_coeff_1x', 'prev_coeff_0x',\
             'prev_deviation', 'coeff_2x', 'coeff_1x', 'coeff_0x', 'prediction', 'deviation',
             'abs_deviation']
result_df = result_df[col_names]
result_df.patient = result_df.patient.apply(int)
result_df.pred_day = result_df.pred_day.apply(int)
result_df.sort_values(['patient', 'method', 'pred_day'], inplace=True)
result_df




Patient #117 has insufficient/<3 predictions (1 predictions) (for linear)!
Patient #117 has insufficient/<3 predictions (0 predictions) (for quadratic)!
Patient #121 has insufficient unique dose-response pairs for calibration (for quad)!
Patient #126 has insufficient unique dose-response pairs for calibration (for quad)!
Patient #130 has insufficient/<3 predictions (2 predictions) (for linear)!
Patient #130 has insufficient/<3 predictions (1 predictions) (for quadratic)!
Patient #133 has insufficient/<3 predictions (0 predictions) (for linear)!
Patient #133 has insufficient unique dose-response pairs for calibration (for quad)!
Patients to exclude for linear methods: ['117', '130', '133']
Patients to exclude for quad methods: ['117', '121', '126', '130', '133']


Unnamed: 0,patient,method,pred_day,fit_dose_1,fit_dose_2,fit_dose_3,fit_dose_4,fit_dose_5,fit_dose_6,fit_dose_7,...,prev_coeff_2x,prev_coeff_1x,prev_coeff_0x,prev_deviation,coeff_2x,coeff_1x,coeff_0x,prediction,deviation,abs_deviation
0,84,L_Cum_origin_dp,4,0.0,0.5,1.0,,,,,...,,,,,,2.8,0.333333,4.533333,-1.333333,1.333333
1,84,L_Cum_origin_dp,5,0.0,0.5,1.0,1.5,,,,...,,,,,,2.0,0.6,3.6,-0.5,0.5
2,84,L_Cum_origin_dp,6,0.0,0.5,1.0,1.5,1.5,,,...,,,,,,1.823529,0.658824,6.129412,1.770588,1.770588
3,84,L_Cum_origin_dp,7,0.0,0.5,1.0,1.5,1.5,3.0,,...,,,,,,2.4,0.233333,7.433333,2.566667,2.566667
4,84,L_Cum_origin_dp,8,0.0,0.5,1.0,1.5,1.5,3.0,3.0,...,,,,,,2.88125,-0.121875,8.521875,1.778125,1.778125
0,84,L_Cum_wo_origin,4,0.5,1.0,,,,,,...,,,,,,0.8,2.0,3.2,0.0,0.0
1,84,L_Cum_wo_origin,5,0.5,1.0,1.5,,,,,...,,,,,,0.8,2.0,3.2,-0.1,0.1
2,84,L_Cum_wo_origin,6,0.5,1.0,1.5,1.5,,,,...,,,,,,0.745455,2.036364,4.272727,3.627273,3.627273
3,84,L_Cum_wo_origin,7,0.5,1.0,1.5,1.5,3.0,,,...,,,,,,2.3,0.43,7.33,2.67,2.67
4,84,L_Cum_wo_origin,8,0.5,1.0,1.5,1.5,3.0,3.0,,...,,,,,,2.92093,-0.211628,8.551163,1.748837,1.748837


In [14]:
cal_pred

Unnamed: 0,index


In [52]:

result




Unnamed: 0,patient,method,pred_day,fit_dose_1,fit_dose_2,fit_dose_3,fit_dose_4,fit_dose_5,fit_dose_6,fit_dose_7,...,prev_coeff_2x,prev_coeff_1x,prev_coeff_0x,prev_deviation,coeff_2x,coeff_1x,coeff_0x,prediction,deviation,abs_deviation


In [14]:
a = pd.DataFrame(columns=['col1', 'col2', 'COL1'])
a.loc[0,:] = [1,2,3]
b = pd.DataFrame(columns=['col1', 'col2', 'col3', 'COL1'])
b.loc[0,:] = [1,2,3,4]
pd.concat([a, b])


Unnamed: 0,col1,col2,COL1,col3
0,1,2,3,
0,1,2,4,3.0


In [45]:
a = pd.DataFrame(columns=['a', 'b', 'c'])
a.loc[0,'a':'c'] = 5
a.loc[1, 'a': 'c'] = 4
a.loc[0:1-1, 'a']

0    5
Name: a, dtype: object