In [1]:
import pandas as pd

In [2]:
inpatient = pd.read_csv("inpatient.csv", parse_dates=['AdmissionDate', 'DischargeDate'])
claims = pd.read_csv("claims.csv", parse_dates=['First_Service_Date', 'Last_Service_Date'])
er_only = pd.read_csv("er_only.csv", parse_dates=['AdmissionDate'])

In [144]:
def move_dates(completed_merge, not_merged, claims, forward=True):
    possible_incorrect_dates = not_merged.copy()
    if not forward:
        possible_incorrect_dates['AdmissionDate'] = possible_incorrect_dates['AdmissionDate'] - pd.DateOffset(1)
    else:
        possible_incorrect_dates['AdmissionDate'] = possible_incorrect_dates['AdmissionDate'] + pd.DateOffset(1)
    
    facility_col = [col for col in possible_incorrect_dates.columns if 'Facility' in col]
    possible_incorrect_dates['merge'] = (possible_incorrect_dates['MemberID'].astype(str) +
                                         possible_incorrect_dates['AdmissionDate'].astype(str) +
                                         possible_incorrect_dates[facility_col[0]])
    
    possible_incorrect_dates = possible_incorrect_dates[-(possible_incorrect_dates['merge'].isin(completed_merge['original_merge'].tolist()))].copy()
    
    merged_new = possible_incorrect_dates.merge(claims, on='merge', how='left')
    merged_new.drop_duplicates(subset='merge', inplace=True)
    
    if merged_new['Member_ID'].notnull().sum() == 0:
        return completed_merge
    else:
        completed_merge = completed_merge.append(merged_new[merged_new['Member_ID'].notnull()], sort=True)
        not_merged = possible_incorrect_dates[possible_incorrect_dates['merge'].isin(merged_new[merged_new['Member_ID'].isnull()]['merge'].to_list())].copy()
        return move_dates(completed_merge, not_merged, claims, forward)

In [180]:
def load_clean_claims():
    claims = pd.read_csv("claims.csv", parse_dates=['First_Service_Date', 'Last_Service_Date'])

    claims['Provider'].replace({'Roger Williams Med Center':'Roger Williams Medical Center',
                            'Psych Our Lady of Fatima':'Our Lady Of Fatima Hospital',
                            'The Miriam Hospital Lab': 'The Miriam Hospital',
                            'Hosp The Miriam Hospital': 'The Miriam Hospital'}, inplace=True)
    
    claims['merge'] = claims['Member_ID'].astype(str) + claims['First_Service_Date'].astype(str) + claims['Provider']
    claims.drop_duplicates(subset='merge',inplace=True)
    return claims

In [188]:
def load_utlization(path):
    df = pd.read_csv(path, parse_dates=['AdmissionDate'])
    
    facility_col = [col for col in df.columns if 'Facility' in col]

    df[facility_col[0]].replace({'Roger Williams Hospital':'Roger Williams Medical Center',
                                       'Kent Hospital':'Kent County Memorial Hospital',
                                       'Fatima Hospital':'Our Lady Of Fatima Hospital',
                                       'FirstHealth Moore Reginal Hospital':'Firsthealth of the Carolinas'},
                                      inplace=True)
    
    df['merge'] = df['MemberID'].astype(str) + df['AdmissionDate'].astype(str) + df[facility_col[0]]    
    df = df[df['merge'].notnull()].copy()
    df.drop_duplicates(inplace=True)
    return df

In [192]:
def add_claims_info(df, claims):
    merged = df.merge(claims, on='merge', how='left')
    merged.drop_duplicates(subset='merge', inplace=True)

    not_merged_original = df[df['merge'].isin(merged[merged['Member_ID'].isnull()]['merge'].to_list())].copy()
    not_merged_original['original_merge'] = not_merged_original['merge'].copy()
    completed_merge =  merged[merged['Member_ID'].notnull()].copy()
    completed_merge['original_merge'] = completed_merge['merge'].copy()

    completed_merge = move_dates(completed_merge, not_merged_original, claims, forward=True)
    completed_merge = move_dates(completed_merge, not_merged_original, claims, forward=False)

    merged = completed_merge.append(not_merged_original[-not_merged_original['merge'].isin(completed_merge['original_merge'].tolist())],
                                    sort=True)
    merged.drop_duplicates(subset='original_merge', inplace=True)
    merged.reset_index(drop=True, inplace=True)

    assert df.shape[0] == merged.shape[0]

    return merged

In [190]:
claims = load_clean_claims()
inpatient = load_utlization('inpatient.csv')
merge_inp = add_claims_info(inpatient, claims)

(944, 17)
(944, 43)


In [191]:
claims = load_clean_claims()
er_only = load_utlization('er_only.csv')
add_claims_info(er_only, claims)

(325, 10)
(325, 36)


Unnamed: 0,AdmissionDate,Admission_Type,Admission_Type_Description,Admit_Hour_Code,Admit_Hour_Code_Description,Admitting_DX,Bill_Type,Bill_Type_Description,Center,Claim_ID,...,ParticipantName,Place_of_Service,Principal_DX,Provider,merge,original_merge,textBox2,textBox24,textBox25,textBox5
0,2017-10-02,5.0,TRAUMA,0.0,12:00 (MIDNIGHT)-12:59 AM,,131.0,HOSPITAL OUTPATIENT - ADMIT THROUGH DISCHARGE ...,Providence,UB120717esa0004,...,"Farland, Frederick J ""Fred""",23.0,Z04.3,Rhode Island Hospital,1008112017-10-02Rhode Island Hospital,1008112017-10-02Rhode Island Hospital,DP33 - Emergency Room Episodes,,XXI - Z00–Z99 Factors influencing health statu...,1/1/2001 through 6/12/2019 (6737 days)
1,2017-10-05,1.0,EMERGENCY,0.0,12:00 (MIDNIGHT)-12:59 AM,,131.0,HOSPITAL OUTPATIENT - ADMIT THROUGH DISCHARGE ...,Providence,UB120717esa0005,...,"Prak, Yee",23.0,R10.84,Rhode Island Hospital,1002442017-10-05Rhode Island Hospital,1002442017-10-05Rhode Island Hospital,DP33 - Emergency Room Episodes,,"XVIII - R00–R99 Symptoms, signs and abnormal c...",1/1/2001 through 6/12/2019 (6737 days)
2,2017-10-05,1.0,EMERGENCY,0.0,12:00 (MIDNIGHT)-12:59 AM,,131.0,HOSPITAL OUTPATIENT - ADMIT THROUGH DISCHARGE ...,Woonsocket,UB053019esa0048,...,"Villafane, Aurelio",23.0,E11.65,Landmark Medical Center,1005322017-10-05Landmark Medical Center,1005322017-10-05Landmark Medical Center,DP33 - Emergency Room Episodes,,"IV - E00–E90 Endocrine, nutritional and metabo...",1/1/2001 through 6/12/2019 (6737 days)
3,2017-10-06,1.0,EMERGENCY,13.0,01:00-01:59,I61.5,111.0,HOSPITAL INPATIENT (INCLUDING MEDICARE PART A)...,Providence,UB120717esa0008,...,"Charette, Carol J",21.0,I61.5,Rhode Island Hospital,20008572017-10-06Rhode Island Hospital,20008572017-10-06Rhode Island Hospital,DP33 - Emergency Room Episodes,IX - I00–I99 Diseases of the circulatory system,IX - I00–I99 Diseases of the circulatory system,1/1/2001 through 6/12/2019 (6737 days)
4,2017-10-16,1.0,EMERGENCY,,,,131.0,HOSPITAL OUTPATIENT - ADMIT THROUGH DISCHARGE ...,Providence,UB010218esa0081,...,"Parker, Helen M",23.0,K62.5,Roger Williams Medical Center,1007602017-10-16Roger Williams Medical Center,1007602017-10-16Roger Williams Medical Center,DP33 - Emergency Room Episodes,,XI - K00–K93 Diseases of the digestive system,1/1/2001 through 6/12/2019 (6737 days)
5,2017-10-19,1.0,EMERGENCY,17.0,05:00-05:59,,131.0,HOSPITAL OUTPATIENT - ADMIT THROUGH DISCHARGE ...,Providence,UB010318esa0005,...,"Johansson, Shirley A",23.0,S89.92XA,Kent County Memorial Hospital,1006052017-10-19Kent County Memorial Hospital,1006052017-10-19Kent County Memorial Hospital,DP33 - Emergency Room Episodes,,"XIX - S00–T98 Injury, poisoning and certain ot...",1/1/2001 through 6/12/2019 (6737 days)
6,2017-10-23,1.0,EMERGENCY,,,,131.0,HOSPITAL OUTPATIENT - ADMIT THROUGH DISCHARGE ...,Providence,UB010218esa0069,...,"Brown, Honorata",23.0,S29.012A,Roger Williams Medical Center,1003312017-10-23Roger Williams Medical Center,1003312017-10-23Roger Williams Medical Center,DP33 - Emergency Room Episodes,,"XIX - S00–T98 Injury, poisoning and certain ot...",1/1/2001 through 6/12/2019 (6737 days)
7,2017-10-24,1.0,EMERGENCY,,,,131.0,HOSPITAL OUTPATIENT - ADMIT THROUGH DISCHARGE ...,Providence,UB010218esa0082,...,"Rivera, Victoria",23.0,B37.0,Roger Williams Medical Center,1008292017-10-24Roger Williams Medical Center,1008292017-10-24Roger Williams Medical Center,DP33 - Emergency Room Episodes,,I - A00-B99 Certain infectious and parasitic d...,1/1/2001 through 6/12/2019 (6737 days)
8,2017-10-27,1.0,EMERGENCY,0.0,12:00 (MIDNIGHT)-12:59 AM,,131.0,HOSPITAL OUTPATIENT - ADMIT THROUGH DISCHARGE ...,Providence,UB120717esa0030,...,"Kollie, Juana M",23.0,M25.551,The Miriam Hospital,1006072017-10-27The Miriam Hospital,1006072017-10-27The Miriam Hospital,DP33 - Emergency Room Episodes,,XIII - M00–M99 Diseases of the musculoskeletal...,1/1/2001 through 6/12/2019 (6737 days)
9,2017-10-29,1.0,EMERGENCY,,,,131.0,HOSPITAL OUTPATIENT - ADMIT THROUGH DISCHARGE ...,Providence,UB010218esa0086,...,"Martinez, William E",23.0,K59.00,Roger Williams Medical Center,1007472017-10-29Roger Williams Medical Center,1007472017-10-29Roger Williams Medical Center,DP33 - Emergency Room Episodes,,XI - K00–K93 Diseases of the digestive system,1/1/2001 through 6/12/2019 (6737 days)
