Separately run referrals and claims queries.
This file imports the results of those queries, cleans and standardizes each, and merges them for analysis.

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

## Import Data

In [None]:
refs = pd.read_csv('../data/referrals_2018_all.csv', sep='|')

In [None]:
claims = pd.read_csv('../data/claims_2018_all.csv', sep='|')

In [None]:
touches = pd.read_csv('../data/referral_count_user_event_final_no_view.txt', sep='|')

In [None]:
xwalk = pd.read_csv('../data/CPT_Description_xwalk.csv', sep='|')

## Clean Refs

In [None]:
refs_head = refs.drop_duplicates(subset='HCP_CONNECT_AUTH_NUMBER').reset_index()

refs_head.drop(columns = ['index', 'CPT_Code', 'Date_Decision',
       'Date_Received', 'Type', 'region', 'UNITS'], inplace=True)

refs_head = refs_head[refs_head['HCP_CONNECT_AUTH_NUMBER'].isna()==False].reset_index()

In [None]:
refs = pd.merge(refs, touches, left_on='HCP_CONNECT_AUTH_NUMBER', right_on='referral_ident', how='left')

In [None]:
# ## code below can be used to identify specialties that exist in referrals but not in claims.  

# claim_specs = claims['Specialty'].unique().tolist()

# ref_specs = refs['Specialty'].unique().tolist()

# ref_spec_w_no_claim_spec = []

# for spec in ref_specs:
#     if spec in claim_specs:
#         pass
#     else:
#         ref_spec_w_no_claim_spec.append(spec)

# ref_spec_w_no_claim_spec

In [None]:
refs['Date_Decision'] = pd.to_datetime(refs['Date_Decision'])

In [None]:
refs['Date_Received'] = pd.to_datetime(refs['Date_Received'])

In [None]:
refs['TAT'] = (refs['Date_Decision']-refs['Date_Received']) / timedelta(days=1)

### Clean Claims

In [None]:
claims_sum = claims.groupby(['CPT_Code'], as_index=False).agg({'avg_hcp_cost': 'mean'})

## Feature Engineering

In [None]:
refs.shape[0]

In [None]:
## flag retro statuses with 1 and 0
retro_conditions = [
 (refs['status_name'] == 'APPROVED - RETRO REVIEW') |
 (refs['status_name'] == 'DENIED - RETRO REVIEW') |
 (refs['status_name'] == 'APPROVED - COB RETRO') |
 (refs['status_name'] == 'PENDING - RETRO REVIEW') 
  ]

In [None]:
choices = [1]
refs['is_retro'] = np.select(retro_conditions, choices, default=0)

In [None]:
## remove retros from list and drop 'is_retro' as it is no longer needed
refs = refs[refs['is_retro']==0]

refs.drop(columns='is_retro', inplace=True)

In [None]:
refs_det = refs

refs_det.drop(columns = ['PPL', 'Date_Decision',
       'Date_Received', 'Type', 'status_cat', 'status_name', 'Specialty',
       'region', 'UNITS'])

refs_det = refs_det[refs_det['HCP_CONNECT_AUTH_NUMBER'].isna()==False].reset_index()

In [None]:
refs['is_autoapp'] = np.where(refs['status_name']=='APPROVED - AUTO', 1, 0)

In [None]:
# refs['is_den_notcovben'] = np.where(refs['status_name']=='DENIED - NOT A COVERED BENEFIT', 1, 0)

In [None]:
# refs['is_den_notmednec'] = np.where(refs['status_name']=='DENIED - CM', 1, 0)

In [None]:
refs.PPL.fillna("N", inplace=True)

In [None]:
refs['is_PPL'] = np.where(refs['PPL']=='Y', 1, 0)

In [None]:
den_conditions = [
 (refs['status_name'] == 'DENIED - CM') |
 (refs['status_name'] == 'DENIED - BENEFIT CARVE OUT') |
 (refs['status_name'] == 'DENIED - NOT A COVERED BENEFIT') |
 (refs['status_name'] == 'DENIED - APPEAL') |
 (refs['status_name'] == 'DENIED - CLINICAL TRIAL/EXP/INV') |
 (refs['status_name'] == 'DENIED - TRANSPLANT') |
 (refs['status_name'] == 'DENIED - MD') |
 (refs['status_name'] == 'DENIED - CM/MD') |
 (refs['status_name'] == 'DENIED - REDIRECT OSVN') |
 (refs['status_name'] == 'DENIED - TICKLER')
  ]

In [None]:
choices = [1]
refs['is_den'] = np.select(den_conditions, choices, default=0)

In [None]:
#refs['is_app'] = np.where(refs['status_cat']=='APPROVED', 1, 0)

In [None]:
#refs['is_notapp'] = np.where(refs['status_cat']!='APPROVED', 1, 0)

In [None]:
# mod_conditions = [
#     (refs['status_name'] == 'DENIED - MODIFIED CM') | 
#     (refs['status_name'] == 'DENIED - MODIFIED')
# ]

In [None]:
# choices = [1]
# refs['is_den_mod'] = np.select(mod_conditions, choices, default=0)

In [None]:
# refs['is_den_bencarvout'] = np.where(refs['status_name']=='DENIED - BENEFIT CARVE OUT', 1, 0)

In [None]:
## find TAT and touches for manually reviewed referrals

In [None]:
refs_manual = refs[refs['is_autoapp']==0]

In [None]:
## Calculate Cost per manually reviewed CPT code

ga_cpt = 5000000 / refs[refs['is_autoapp']==0].shape[0]

In [None]:
ga_cpt

In [None]:
refssum_manual = refs_manual.groupby(['Specialty', 'CPT_Code', 'is_PPL'], as_index=False).agg({
    'TAT' : 'mean',
    'NumberOfUserEventsNoViews': 'mean',
    'UNITS' : 'count'
})

In [None]:
refssum_manual.rename(index=str, columns={'UNITS': 'UNITS_man'}, inplace=True)

In [None]:
refssum_manual.rename(index=str, columns={'NumberOfUserEventsNoViews': 'mean_touches_manual'}, inplace=True)

In [None]:
refssum_manual['cost_to_review'] = refssum_manual['UNITS_man']*ga_cpt

In [None]:
refssum = refs.groupby(['Specialty', 'CPT_Code', 'is_PPL'], as_index=False).agg({
    'UNITS': 'count',
    'is_autoapp': 'mean',
    'is_den': 'mean',
})

In [None]:
# refs_denTAT = refs[(refs['status_cat']!='APPROVED') & (refs['is_autoapp']==0)].groupby(['Specialty', 'CPT_Code', 'is_PPL'], as_index=False).agg({
#     'TAT': 'mean',
#     'NumberOfUserEventsNoViews': 'mean'
# })

In [None]:
# refs_denTAT.rename(index=str, columns={'NumberOfUserEventsNoViews': 'mean_touches_manual_den'}, inplace=True)

In [None]:
# refs_denTAT.rename(index=str, columns={'TAT': 'den_TAT'}, inplace=True)

In [None]:
# refs_appTAT = refs[(refs['status_cat']=='APPROVED') & (refs['is_autoapp']==0)].groupby(['Specialty', 'CPT_Code', 'is_PPL'], as_index=False).agg({
#     'TAT': 'mean',
#     'NumberOfUserEventsNoViews': 'mean'
# })

In [None]:
# refs_appTAT.rename(index=str, columns={'NumberOfUserEventsNoViews': 'mean_touches_manual_app'}, inplace=True)

In [None]:
# refs_appTAT.rename(index=str, columns={'TAT': 'app_TAT'}, inplace=True)

In [None]:
refs_w_claims0 = pd.merge(refssum, refssum_manual, on=['Specialty', 'CPT_Code', 'is_PPL'], how='left')

In [None]:
refs_w_claims1 = pd.merge(refs_w_claims0, claims, on=['Specialty', 'CPT_Code'], how='left')

In [None]:
refs_w_claims2 = pd.merge(refs_w_claims1, claims_sum, on='CPT_Code', how='left')

In [None]:
refs_w_claims2['avg_hcp_cost_x'] = np.where(refs_w_claims2['avg_hcp_cost_x'].isnull(), 
                                             refs_w_claims2['avg_hcp_cost_y'],
                                             refs_w_claims2['avg_hcp_cost_x'])

In [None]:
refs_w_claims2.drop(columns=['avg_hcp_cost_y', 'sd_hcp_cost'], inplace=True)

In [None]:
refs_w_claims2.rename(index=str, columns={'avg_hcp_cost_x': 'avg_hcp_cost'}, inplace=True)

In [None]:
refs_w_claims3 = pd.merge(refs_w_claims2, xwalk, left_on='CPT_Code', right_on='PROCEDURE_CODE', how='left')

In [None]:
# refs_w_claims2 = pd.merge(refs_w_claims1, refs_denTAT, on=['Specialty', 'CPT_Code', 'is_PPL'], how='left')

In [None]:
# refs_w_claims_fin = pd.merge(refs_w_claims2, refs_appTAT, on=['Specialty', 'CPT_Code', 'is_PPL'], how='left')

In [None]:
refs_w_claims_fin = refs_w_claims3

In [None]:
refs_w_claims_fin['sum_cost_denied'] = refs_w_claims_fin['is_den']*refs_w_claims_fin['UNITS']*refs_w_claims_fin['avg_hcp_cost']

In [None]:
refs_w_claims_fin['TAT_total'] = refs_w_claims_fin['UNITS_man']*refs_w_claims_fin['TAT']

In [None]:
refs_w_claims_fin['dollars_denied_per_TAT'] = refs_w_claims_fin['sum_cost_denied']/refs_w_claims_fin['TAT_total']

In [None]:
refs_w_claims_fin['ROI'] = refs_w_claims_fin['sum_cost_denied']/refs_w_claims_fin['cost_to_review']

In [None]:
refs_w_claims_fin = refs_w_claims_fin[refs_w_claims_fin['UNITS_man']>0].reset_index()

In [None]:
refs_w_claims_fin['cost_to_review'].sum()

In [None]:
refs_w_claims_fin[refs_w_claims_fin['UNITS_man']>1000]

In [None]:
refs_w_claims_fin['auto_approve'] = np.where(refs_w_claims_fin['ROI']<1, 1, 0)

In [None]:
# refs_w_claims_fin['touches_total'] = refs_w_claims_fin['UNITS_man']*refs_w_claims_fin['mean_touches_manual_app']

In [None]:
# refs_w_claims_fin['Dollars_denied_per_touch'] = refs_w_claims_fin['sum_cost_denied']/refs_w_claims_fin['touches_total']

In [None]:
refs_w_claims_fin.to_csv('../data/refs2018_w_claims_20190219.csv')

In [None]:
## Model Additional Auto Approvals when different thresholds are set.
## Approach: use "given" threshold to determine which CPT codes are "auto-approve"-able for each specialty
##  - For loop through referrals, return 1 if all CPT codes are on "auto-approve"-able list, else 0 

In [None]:
refs_w_claims_fin[(refs_w_claims_fin['Specialty']=='UROLOGY') &
                 (refs_w_claims_fin['is_PPL']==1) &
                 (refs_w_claims_fin['auto_approve']==1) &
                 (refs_w_claims_fin['UNITS_man']>30)]

In [None]:
def create_dict_of_CPT_codes(specialty_cpt):
    list_o_specs = specialty_cpt['Specialty'].unique().tolist()
    spec_dict = {k: [] for k in list_o_specs}
    for index, row in specialty_cpt.iterrows():
        if row['auto_approve'] == 1:
            if row['is_PPL'] == 1:
                if row['UNITS_man'] > 30:
                    spec_dict[row['Specialty']].append(row['CPT_Code'])
    return spec_dict
            

In [None]:
spec_dict = create_dict_of_CPT_codes(refs_w_claims_fin)

In [None]:
spec_dict['CARDIOLOGY']

In [None]:
# def assign_status(referrals, codes, spec_dict):
#     status = list(np.zeros(referrals.shape[0]))
#     for index, row in referrals.iterrows():
#         my_cpts = codes[codes['HCP_CONNECT_AUTH_NUMBER']==(row['HCP_CONNECT_AUTH_NUMBER'])]
#         for index2, row2 in my_cpts.iterrows():
#             if row2['CPT_Code'] in spec_dict[row['Specialty']]:
#                 pass
#             else:
#                 break
#         status[index] = 1
#     return status
            
        

In [None]:
def assign_status(codes, spec_dict):
    status = list(np.zeros(codes.shape[0]))
    for index, row in codes.iterrows():
        if row['PPL'] == 'Y':
            if row['CPT_Code'] in spec_dict[row['Specialty']]:
                status[index] = 1
    return status

In [None]:
refs_det.head()

In [None]:
auto_approve = assign_status(refs_det, spec_dict)

In [None]:
refs_det['auto_approvable'] = auto_approve

In [None]:
refs_det.columns

In [None]:
refs_results = refs_det.groupby(['HCP_CONNECT_AUTH_NUMBER', 'status_name', 'PPL'], as_index=False).agg({'auto_approvable': 'mean'})

In [None]:
refs_results['aa-yn'] = np.where(refs_results['auto_approvable']==1, 1, 0)

In [None]:
# refs_results.pivot_table(values='aa-yn', index=['status_name', 'PPL'], aggfunc=['count', 'sum'], margins=True).to_csv('Data/results_100_percent.csv', sep='|')

In [None]:
refs_results.pivot_table(values='aa-yn', index=['status_name', 'PPL'], aggfunc=['count', 'sum'], margins=True)