In [1]:
import pandas as pd

In [3]:
df = pd.read_csv('files/all_claims_files.csv') # read consolidated claims file

In [4]:
# Add column for the file year since each row is a unique patient/year combination
df['year'] = df.file_name.map(lambda x: int(x[6:10]))

In [5]:
all_cols = df.columns # all columns of the Data Frame

In [6]:
payment_cols = ['MEDREIMB_IP', 'BENRES_IP', 'PPPYMT_IP', 'MEDREIMB_OP', 'BENRES_OP',
       'PPPYMT_OP', 'MEDREIMB_CAR', 'BENRES_CAR', 'PPPYMT_CAR'] # Only select the payment-related columns

In [7]:
id_vars = ['DESYNPUF_ID', 'year'] # Use the DESYNPUF_ID/year as a unique key for the row
# Alternatively, just use the DESYNPUF_ID as a unique key for the patient
# Use list comprehension to select all non-payment related columns
# Alternative: id_vars = [x for x in all_cols if x not in payment_cols]

In [8]:
df[id_vars].drop_duplicates().shape # Verify uniqueness

(6000, 2)

In [9]:
df_mlt = pd.melt(df, id_vars=id_vars, value_vars=payment_cols) # Melt the data!

In [10]:
# Split variable into two parts
df_mlt['payer'] = df_mlt['variable'].map(lambda x: x.split('_')[0])
df_mlt['service'] = df_mlt['variable'].map(lambda x: x.split('_')[1])

In [11]:
# Now can easily pivot in different ways

In [21]:
# format to show percentages
# Find all available options here: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.set_option.html
pd.set_option('display.float_format', '{:.2f}%'.format)

In [20]:
pd.crosstab(df_mlt.year, df_mlt.service, values=df_mlt.value, aggfunc='sum', normalize=True)

service,CAR,IP,OP
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2008,0.11%,0.20%,0.07%
2009,0.13%,0.20%,0.08%
2010,0.08%,0.09%,0.04%


In [22]:
pd.set_option('display.float_format', '${:,.2f}'.format)

In [23]:
df_mlt.pivot_table(index='year', columns='service', values='value', aggfunc='sum', margins=True).applymap(float)

service,CAR,IP,OP,All
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2008,"$3,231,130.00","$5,799,978.00","$1,956,540.00","$10,987,648.00"
2009,"$3,627,740.00","$5,633,954.00","$2,318,570.00","$11,580,264.00"
2010,"$2,238,090.00","$2,631,430.00","$1,223,250.00","$6,092,770.00"
All,"$9,096,960.00","$14,065,362.00","$5,498,360.00","$28,660,682.00"
