In [160]:
import pandas as pd
import numpy as np


In [161]:
INP = pd.read_csv("VTINP16_upd.csv",usecols=['PPAY','MDC','CHRGS','intage','sex'])

In [162]:
INP.head()

Unnamed: 0,intage,sex,PPAY,CHRGS,MDC
0,14,2,1,67375.25,23
1,14,1,1,22886.0,23
2,14,2,1,120544.05,8
3,14,2,1,32325.8,8
4,12,2,7,29244.1,4


In [170]:
INP["MDC"].unique()

array([23,  8,  4,  9, 21,  5,  6,  7, 11, 17, 24, 12, 16, 14,  1,  3, 10,
       13, 18, 19, 15, 25, 20,  2, 22])

##### solve missing value and convert data type


In [163]:
INP['sex'].replace(' ', np.nan, inplace=True)
INP["MDC"].replace(' ', np.nan, inplace=True)
INP["CHRGS"].replace(' ', np.nan, inplace=True)

In [164]:
INP.dropna(subset=['intage','sex','CHRGS','MDC'],inplace = True)

In [165]:
INP["CHRGS"]=pd.to_numeric(INP["CHRGS"])

In [166]:
INP["sex"] = INP["sex"].astype(int) 

In [167]:
INP["MDC"] = INP["MDC"].astype(int)

#####  filter

In [171]:
PP = [1,2,6,7]
INP = INP.loc[INP["PPAY"].isin(PP)]

In [172]:
conditions_1 = [
    INP["PPAY"]== 1,
    INP["PPAY"]==2,
    ((INP["PPAY"]==6)|(INP["PPAY"]==7))
]
values_1 = ["Medicare","Medicaid","Commercial"]
INP["Payer"] = np.select(conditions_1,values_1)

In [173]:
INP.head()

Unnamed: 0,intage,sex,PPAY,CHRGS,MDC,Payer
0,14,2,1,67375.25,23,Medicare
1,14,1,1,22886.0,23,Medicare
2,14,2,1,120544.05,8,Medicare
3,14,2,1,32325.8,8,Medicare
4,12,2,7,29244.1,4,Commercial


In [174]:
df = INP.groupby(["Payer","MDC"])["CHRGS"].sum().reset_index(name ='total_charge')

In [176]:
df["total_charge(M)"]= (df["total_charge"]/1000000).round()

In [178]:
df["total_charge(M)"]=df["total_charge(M)"].astype(int)

In [179]:
df.head()

Unnamed: 0,Payer,MDC,total_charge,total_charge(M)
0,Commercial,1,38222689.07,38
1,Commercial,2,366541.34,0
2,Commercial,3,2867282.5,3
3,Commercial,4,24257093.97,24
4,Commercial,5,53923864.9,54


In [122]:
df.to_excel("final_result.xlsx")  

#### pie part

In [203]:
INP_Medicare = INP.loc[INP["Payer"]=="Medicare"]
INP_Medicaid = INP.loc[INP["Payer"]=="Medicaid"]
INP_Commerical = INP.loc[INP["Payer"]=="Commercial"]

In [182]:
INP_Medicare = INP_Medicare.groupby(["MDC"])["CHRGS"].sum().reset_index(name ='total_charge')

In [196]:
INP_Medicare

Unnamed: 0,intage,sex,PPAY,CHRGS,MDC,Payer
0,14,2,1,67375.25,23,Medicare
1,14,1,1,22886.00,23,Medicare
2,14,2,1,120544.05,8,Medicare
3,14,2,1,32325.80,8,Medicare
5,12,1,1,124611.40,23,Medicare
6,13,2,1,71292.00,8,Medicare
7,13,2,1,22413.40,23,Medicare
8,14,2,1,106723.00,23,Medicare
9,14,1,1,77200.65,8,Medicare
10,14,2,1,12460.00,8,Medicare


In [150]:
INP_Medicare["total_charge"].sum()

692898406.79

In [184]:
INP_Medicare["share"]= INP_Medicare["total_charge"].div(INP_Medicare["total_charge"].sum())

In [188]:
INP_Medicare.sort_values('share',ascending=False)

Unnamed: 0,MDC,total_charge,share
7,8,145705200.0,0.210284
4,5,135546500.0,0.195622
3,4,78133650.0,0.112763
5,6,63039630.0,0.09098
17,18,61024560.0,0.088071
0,1,57043220.0,0.082326
10,11,27629750.0,0.039876
18,19,24665320.0,0.035597
6,7,17813180.0,0.025708
22,23,15221120.0,0.021967


In [198]:
INP_Medicare_f = INP_Medicare.groupby(["sex","intage"])["intage"].count().reset_index(name ='intage_num')

In [202]:
INP_Medicare_f.to_excel("Medicare_f.xlsx")

In [201]:
INP_Medicare.to_excel("medicare.xlsx")

#### Medicaid

In [190]:
INP_Medicaid = INP_Medicaid.groupby(["MDC"])["CHRGS"].sum().reset_index(name ='total_charge')

In [191]:
INP_Medicaid["share"]= INP_Medicaid["total_charge"].div(INP_Medicaid["total_charge"].sum())

In [192]:
INP_Medicaid.to_excel("medicaid.xlsx")

In [204]:
INP_Medicaid_f = INP_Medicaid.groupby(["sex","intage"])["intage"].count().reset_index(name ='intage_num')

In [205]:
INP_Medicaid_f.to_excel("Medicaid_f.xlsx")

#### Commercial

In [193]:
INP_Commerical = INP_Commerical.groupby(["MDC"])["CHRGS"].sum().reset_index(name ='total_charge')
INP_Commerical["share"]= INP_Commerical["total_charge"].div(INP_Commerical["total_charge"].sum())
INP_Commerical.to_excel("commerical.xlsx")

In [206]:
INP_Commerical_f = INP_Commerical.groupby(["sex","intage"])["intage"].count().reset_index(name ='intage_num')

In [208]:
INP_Commerical_f.to_excel("Commercial_f.xlsx")