In [71]:
import glob
%matplotlib inline
import sys
print(sys.version)
import numpy as np
print(np.__version__)
import pandas as pd
print(pd.__version__)
import matplotlib.pyplot as plt


3.7.3 (default, Mar 27 2019, 16:54:48) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
1.16.2
0.24.2


## 1.Functionality ###

This notebook processes synthetic medicare data to identify palliative care utilization based on various parameters.
The files processed are : 

* Inpatient  
* Outpatient  
* Carrier  
    ** For the above 3 files, raw observations from the first 10 samples are collected if they have at least one ICD9 code = V667,   
    ** then they are aggregated into one record per patient(groupby = DESYNPUF_ID), with count of observations and sum of claim_amount   
<br></br>
* Beneficiary  
    All beneficiaries are processed if they have 12 months coverage.   
    columns :   
    DESYNPUF_ID (groupby),  
    BENE_BIRTH_DT_max,   
    BENE_DEATH_DT,   
    BENE_SEX_IDENT_CD_max,   
    BENE_RACE_CD_max,   
    SP_STATE_CODE_max,     
    BENE_COUNTY_CD_count, -- to get the count of claims   
    BENE_HI_CVRAGE_TOT_MONS_max,  
    BENE_SMI_CVRAGE_TOT_MONS_max

## 2.Utility Functions##

### 2.a Print DataFrame Shape and Dtype ###

In [72]:
def df_print(df):
    print(df.shape)
    print(df.columns)

### 2.b Merge all files in a path into a single dataframe ###

In [73]:
def merge_rows_df(path):
    
    frames = []
    
    for fname in glob.glob(path):
    
        print(fname)
        frames.append(pd.read_csv(fname))
     
    result_df = pd.concat(frames)
    return result_df

### 2.c Merge all files in a path, with ICD9 code = V667 into a single dataframe ###

In [74]:
def palliative_cohort(path):
    
    frames = []
    
    for fname in glob.glob(path):
        
        df = pd.read_csv(fname)
        
        if 'Carrier_Claims_Sample' in fname:
            
            pal_df = df[(df.ICD9_DGNS_CD_1 =='V667')
                        |(df.ICD9_DGNS_CD_2 =='V667')
                        |(df.ICD9_DGNS_CD_3 =='V667')
                        |(df.ICD9_DGNS_CD_4 =='V667')
                        |(df.ICD9_DGNS_CD_5 =='V667')
                        |(df.ICD9_DGNS_CD_6 =='V667')
                        |(df.ICD9_DGNS_CD_7 =='V667')
                        |(df.ICD9_DGNS_CD_8 =='V667')
                       ]
        else:
            pal_df = df[(df.ICD9_DGNS_CD_1 =='V667')
                        |(df.ICD9_DGNS_CD_2 =='V667')
                        |(df.ICD9_DGNS_CD_3 =='V667')
                        |(df.ICD9_DGNS_CD_4 =='V667')
                        |(df.ICD9_DGNS_CD_5 =='V667')
                        |(df.ICD9_DGNS_CD_6 =='V667')
                        |(df.ICD9_DGNS_CD_7 =='V667')
                        |(df.ICD9_DGNS_CD_8 =='V667')
                        |(df.ICD9_DGNS_CD_9 =='V667')
                        |(df.ICD9_DGNS_CD_10 =='V667')
                        |(df.ICD9_PRCDR_CD_1 =='V667')
                        |(df.ICD9_PRCDR_CD_2 =='V667')
                        |(df.ICD9_PRCDR_CD_3 =='V667')
                        |(df.ICD9_PRCDR_CD_4 =='V667')
                        |(df.ICD9_PRCDR_CD_5 =='V667')
                        |(df.ICD9_PRCDR_CD_6 =='V667')
                       ]
        
        frames.append(pal_df)
    
    result_df = pd.concat(frames)
    result_df["CLM_FROM_DT"] = pd.to_datetime(df["CLM_FROM_DT"], format="%Y%m%d")
    result_df["CLM_THRU_DT"] = pd.to_datetime(df["CLM_FROM_DT"], format="%Y%m%d")

    return result_df


### 2.d Write dataframe to csv ###

In [75]:
def df_write_csv(df, path):
    
    df.to_csv(path_or_buf = path, index=False)

## 3.Process Files ##
### 3.a Process inpatient_claims with hospice code into inp_palliative_cohort.csv ###

In [76]:
inp_path = "/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/*Inpatient*.csv"
inp_df = palliative_cohort(inp_path)
df_write_csv(inp_df,"/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/inp_palliative_cohort.csv")

### 3.b Process outpatients with hospice code into out_palliative_cohort.csv ###

In [78]:
outp_path = "/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/*Outpatient*.csv"
outp_df = palliative_cohort(outp_path)
df_write_csv(outp_df,"/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/outp_palliative_cohort.csv")

  if (yield from self.run_code(code, result)):


In [79]:
df_print(inp_df)
df_print(outp_df)

(5453, 81)
Index(['DESYNPUF_ID', 'CLM_ID', 'SEGMENT', 'CLM_FROM_DT', 'CLM_THRU_DT',
       'PRVDR_NUM', 'CLM_PMT_AMT', 'NCH_PRMRY_PYR_CLM_PD_AMT', 'AT_PHYSN_NPI',
       'OP_PHYSN_NPI', 'OT_PHYSN_NPI', 'CLM_ADMSN_DT', 'ADMTNG_ICD9_DGNS_CD',
       'CLM_PASS_THRU_PER_DIEM_AMT', 'NCH_BENE_IP_DDCTBL_AMT',
       'NCH_BENE_PTA_COINSRNC_LBLTY_AM', 'NCH_BENE_BLOOD_DDCTBL_LBLTY_AM',
       'CLM_UTLZTN_DAY_CNT', 'NCH_BENE_DSCHRG_DT', 'CLM_DRG_CD',
       'ICD9_DGNS_CD_1', 'ICD9_DGNS_CD_2', 'ICD9_DGNS_CD_3', 'ICD9_DGNS_CD_4',
       'ICD9_DGNS_CD_5', 'ICD9_DGNS_CD_6', 'ICD9_DGNS_CD_7', 'ICD9_DGNS_CD_8',
       'ICD9_DGNS_CD_9', 'ICD9_DGNS_CD_10', 'ICD9_PRCDR_CD_1',
       'ICD9_PRCDR_CD_2', 'ICD9_PRCDR_CD_3', 'ICD9_PRCDR_CD_4',
       'ICD9_PRCDR_CD_5', 'ICD9_PRCDR_CD_6', 'HCPCS_CD_1', 'HCPCS_CD_2',
       'HCPCS_CD_3', 'HCPCS_CD_4', 'HCPCS_CD_5', 'HCPCS_CD_6', 'HCPCS_CD_7',
       'HCPCS_CD_8', 'HCPCS_CD_9', 'HCPCS_CD_10', 'HCPCS_CD_11', 'HCPCS_CD_12',
       'HCPCS_CD_13', 'HCPCS_CD_14', 'HCP

### 3.c Process carrier_claims with hospice code into car_palliative_cohort.csv ###

In [80]:
car_path = "/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/*Carrier_Claims*.csv"
car_df = palliative_cohort(car_path)
df_write_csv(car_df,"/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/car_palliative_cohort.csv")


  if (yield from self.run_code(code, result)):


### 3.d Process beneficiary files into bene_cohort.csv ###

In [82]:
bene_path = "/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/*Beneficiary*.csv"
bene_df = merge_rows_df(bene_path)


/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/DE1_0_2008_Beneficiary_Summary_File_Sample_9.csv
/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/DE1_0_2009_Beneficiary_Summary_File_Sample_9.csv
/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/DE1_0_2009_Beneficiary_Summary_File_Sample_8.csv
/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/DE1_0_2008_Beneficiary_Summary_File_Sample_8.csv
/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/DE1_0_2008_Beneficiary_Summary_File_Sample_10.csv
/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/DE1_0_2010_Beneficiary_Summary_File_Sample_8.csv
/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/DE1_0_2010_Beneficiary_Summary_File_Sample_9.csv
/Users/nalin

In [84]:
#converting date formats into yyyy-mm-dd
bene_df["BENE_BIRTH_DT"] = pd.to_datetime(bene_df["BENE_BIRTH_DT"], format="%Y%m%d")
bene_df["BENE_DEATH_DT"] = pd.to_datetime(bene_df["BENE_DEATH_DT"], format="%Y%m%d")

df_write_csv(bene_df,"/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/bene_cohort.csv")

    

### 3.f Aggregates per person ( DESYNPUF_ID) ###

In [101]:
#aggregating per person - inpatient
inp_aggr = (inp_df.groupby('DESYNPUF_ID').agg(
    {'CLM_ID':'count', 
     'CLM_PMT_AMT':'sum',
     'NCH_PRMRY_PYR_CLM_PD_AMT':'sum',
     'CLM_UTLZTN_DAY_CNT': 'sum'
    }
    )
             .reset_index()
             .rename(columns={'CLM_ID':'CLM_ID_count', 
                              'CLM_PMT_AMT':'CLM_PMT_AMT_sum',
                              'NCH_PRMRY_PYR_CLM_PD_AMT':'NCH_PRMRY_PYR_CLM_PD_AMT_sum',
                              'CLM_UTLZTN_DAY_CNT': 'CLM_UTLZTN_DAY_CNT_sum'
                             }))
df_write_csv(inp_aggr,"/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/inp_cohort_aggr.csv")

#validate inpatient aggr
df_print(inp_aggr)
inp_aggr.DESYNPUF_ID.nunique()


(5411, 5)
Index(['DESYNPUF_ID', 'CLM_ID_count', 'CLM_PMT_AMT_sum',
       'NCH_PRMRY_PYR_CLM_PD_AMT_sum', 'CLM_UTLZTN_DAY_CNT_sum'],
      dtype='object')


5411

In [102]:
#aggregating per person - outpatient
outp_aggr = (outp_df.groupby('DESYNPUF_ID').agg(
    {'CLM_ID':'count', 
     'CLM_PMT_AMT':'sum',
     'NCH_PRMRY_PYR_CLM_PD_AMT':'sum'
    }
    )
             .reset_index()
             .rename(columns={'CLM_ID':'CLM_ID_count', 
                              'CLM_PMT_AMT':'CLM_PMT_AMT_sum',
                              'NCH_PRMRY_PYR_CLM_PD_AMT':'NCH_PRMRY_PYR_CLM_PD_AMT_sum'
                             }))
df_write_csv(outp_aggr,"/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/outp_cohort_aggr.csv")

#validate inpatient aggr
df_print(outp_aggr)
outp_aggr.DESYNPUF_ID.nunique()

(7331, 4)
Index(['DESYNPUF_ID', 'CLM_ID_count', 'CLM_PMT_AMT_sum',
       'NCH_PRMRY_PYR_CLM_PD_AMT_sum'],
      dtype='object')


7331

In [126]:
#aggregating per person - carrier 
car_df['NCH_PMT_AMT'] = car_df['LINE_NCH_PMT_AMT_1'] + car_df['LINE_NCH_PMT_AMT_2'] +  car_df['LINE_NCH_PMT_AMT_3'] +  car_df['LINE_NCH_PMT_AMT_4'] + \
                        car_df['LINE_NCH_PMT_AMT_5'] + car_df['LINE_NCH_PMT_AMT_6'] + car_df['LINE_NCH_PMT_AMT_7'] + \
                        car_df['LINE_NCH_PMT_AMT_8'] + car_df['LINE_NCH_PMT_AMT_9'] + car_df['LINE_NCH_PMT_AMT_10'] + \
                        car_df['LINE_NCH_PMT_AMT_11'] + car_df['LINE_NCH_PMT_AMT_12'] + car_df['LINE_NCH_PMT_AMT_13'] 

car_aggr = (car_df.groupby('DESYNPUF_ID').agg(
    {'CLM_ID':'count', 
     'NCH_PMT_AMT':'sum'
    }
    )
             .reset_index()
             .rename(columns={'CLM_ID':'CLM_ID_count', 
                              'NCH_PMT_AMT':'NCH_PMT_AMT_sum'
                             }))
df_write_csv(car_aggr,"/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/car_cohort_aggr.csv")

#validate inpatient aggr
df_print(car_aggr)
car_aggr.DESYNPUF_ID.nunique()

(24072, 3)
Index(['DESYNPUF_ID', 'CLM_ID_count', 'NCH_PMT_AMT_sum'], dtype='object')


24072

In [172]:
#aggregating by person - beneficiary
# first set the dummy col for counts
bene_df['BENE_COUNT_OCCURENCE'] = 1
bene_aggr = (bene_df.groupby('DESYNPUF_ID').agg(
    {
     'BENE_BIRTH_DT':'max',
     'BENE_DEATH_DT':'max',
     'BENE_SEX_IDENT_CD':'max',
     'BENE_RACE_CD': 'max',
     'SP_STATE_CODE': 'max',
     'BENE_COUNTY_CD': 'max',
     'BENE_HI_CVRAGE_TOT_MONS':'max',
     'BENE_SMI_CVRAGE_TOT_MONS':'max',
     'BENE_COUNT_OCCURENCE':'sum'
    }
    )
             .reset_index()
             .rename(columns={'BENE_BIRTH_DT':'BENE_BIRTH_DT_max',
                              'BENE_SEX_IDENT_CD':'BENE_SEX_IDENT_CD_max',
                              'BENE_RACE_CD': 'BENE_RACE_CD_max',
                              'SP_STATE_CODE': 'SP_STATE_CODE_max',
                              'BENE_HI_CVRAGE_TOT_MONS':'BENE_HI_CVRAGE_TOT_MONS_max',
                              'BENE_SMI_CVRAGE_TOT_MONS':'BENE_SMI_CVRAGE_TOT_MONS_max',
                              'BENE_COUNTY_CD':'BENE_COUNTY_CD_max'
                             }))
df_write_csv(bene_aggr,"/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/bene_cohort_aggr.csv")

#validate beneficiary
bene_aggr.head()
# bene_aggr.shape
# bene_aggr.count()
# bene_aggr.DESYNPUF_ID.nunique()


Unnamed: 0,DESYNPUF_ID,BENE_BIRTH_DT_max,BENE_DEATH_DT,BENE_SEX_IDENT_CD_max,BENE_RACE_CD_max,SP_STATE_CODE_max,BENE_COUNTY_CD_max,BENE_HI_CVRAGE_TOT_MONS_max,BENE_SMI_CVRAGE_TOT_MONS_max,BENE_COUNT_OCCURENCE
0,00000B48BCF4AD29,1923-09-01,NaT,2,5,10,260,12,12,3
1,0000141F2FECE9BC,1974-04-01,NaT,2,1,39,560,12,12,3
2,000022FFDB0BE2C7,1938-10-01,NaT,1,1,24,260,12,12,3
3,0000525AB30E4DEF,1920-10-01,NaT,2,1,31,300,12,12,3
4,00007A645AD6821D,1938-12-01,NaT,1,2,11,470,12,12,3


In [173]:
#now filtering for beneficiaries with 12 months coverage 
bene_aggr = pd.DataFrame(bene_aggr[(bene_aggr.BENE_HI_CVRAGE_TOT_MONS_max == 12) & (bene_aggr.BENE_SMI_CVRAGE_TOT_MONS_max == 12)])
bene_aggr.count()
df_write_csv(bene_aggr,"/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/bene_cohort_eligible_aggr.csv")


## 4. Merge datasets and process ##

### 4.a Merge Beneficiary, Carrier, Inpatient and Outpatient files ###


In [174]:
#now merge beneficiary, carrier claims, inpatient and outpatient by left join 
bene_car_df = pd.merge(bene_aggr, car_aggr, on='DESYNPUF_ID', how = 'left')
bene_car_inp_df = pd.merge(bene_car_df, inp_aggr, on='DESYNPUF_ID', how = 'left')
bene_car_inp_outp_df = pd.merge(bene_car_inp_df, outp_aggr, on='DESYNPUF_ID', how = 'left')
#should've differentiated inpatient and outpatient columns while creating those dataframes. Ah well!
bene_car_inp_outp_df = bene_car_inp_outp_df.rename(columns= {'CLM_ID_count_x': 'car_CLM_ID_count',
                                                            'CLM_ID_count_y': 'inp_CLM_ID_count',
                                                            'CLM_PMT_AMT_sum_x': 'inp_CLM_PMT_AMT_sum',
                                                            'NCH_PRMRY_PYR_CLM_PD_AMT_sum_x': 'inp_NCH_PRMRY_PYR_CLM_PD_AMT_sum',
                                                            'CLM_ID_count': 'outp_CLM_ID_count',
                                                            'CLM_PMT_AMT_sum_y': 'outp_CLM_PMT_AMT_sum',
                                                            'NCH_PRMRY_PYR_CLM_PD_AMT_sum_y':'outp_NCH_PRMRY_PYR_CLM_PD_AMT_sum'})



In [175]:
#validation cell
#print(bene_car_inp_outp_df.head())
print(bene_car_inp_outp_df.groupby('BENE_SEX_IDENT_CD_max').DESYNPUF_ID.count())
print(bene_car_inp_outp_df.groupby('BENE_RACE_CD_max').DESYNPUF_ID.count())
#print(bene_car_inp_outp_df.groupby(['SP_STATE_CODE_max','BENE_RACE_CD_max']).DESYNPUF_ID.count())

BENE_SEX_IDENT_CD_max
1    479384
2    616693
Name: DESYNPUF_ID, dtype: int64
BENE_RACE_CD_max
1    912682
2    113335
3     44677
5     25383
Name: DESYNPUF_ID, dtype: int64


### 4.b Add State Name ###

In [205]:
#Now add state names
us_state_df = pd.read_csv("/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/us_states_codes.csv")
us_state_df.head()
bene_car_inp_outp_state_df = pd.merge(bene_car_inp_outp_df, us_state_df, on='SP_STATE_CODE_max')

#Now fill NaNs with 0
values = {'car_CLM_ID_count':0, 'NCH_PMT_AMT_sum': 0, 'inp_CLM_ID_count': 0, 'inp_CLM_PMT_AMT_sum': 0, 'outp_CLM_ID_count': 0}
bene_car_inp_outp_state_df.fillna(value=values,inplace=True)

Unnamed: 0,DESYNPUF_ID,BENE_BIRTH_DT_max,BENE_DEATH_DT,BENE_SEX_IDENT_CD_max,BENE_RACE_CD_max,SP_STATE_CODE_max,BENE_COUNTY_CD_max,BENE_HI_CVRAGE_TOT_MONS_max,BENE_SMI_CVRAGE_TOT_MONS_max,BENE_COUNT_OCCURENCE,...,NCH_PMT_AMT_sum,inp_CLM_ID_count,inp_CLM_PMT_AMT_sum,inp_NCH_PRMRY_PYR_CLM_PD_AMT_sum,CLM_UTLZTN_DAY_CNT_sum,outp_CLM_ID_count,outp_CLM_PMT_AMT_sum,outp_NCH_PRMRY_PYR_CLM_PD_AMT_sum,SP_State_iso2a,SP_State_full_name
0,00000B48BCF4AD29,1923-09-01,NaT,2,5,10,260,12,12,3,...,0.0,0.0,0.0,,,0.0,,,FL,Florida
1,0003F63CD84E54A3,1932-07-01,NaT,2,1,10,470,12,12,3,...,0.0,0.0,0.0,,,0.0,,,FL,Florida
2,00045BB8F4C922A3,1934-06-01,NaT,1,5,10,50,12,12,3,...,0.0,0.0,0.0,,,0.0,,,FL,Florida
3,0004FB2ED3823AA8,1941-06-01,NaT,2,1,10,340,12,12,3,...,0.0,0.0,0.0,,,0.0,,,FL,Florida
4,00052CF30906F215,1913-05-01,NaT,2,1,10,490,12,12,3,...,0.0,0.0,0.0,,,0.0,,,FL,Florida


### 4.c Add Hospice Indicator if the beneficiary availed hospice service ###

In [217]:
#Now add hospice indicator 
bene_car_inp_outp_state_df['HOSPICE_FLAG'] = np.where((bene_car_inp_outp_state_df['car_CLM_ID_count'] > 0)
                                 |(bene_car_inp_outp_state_df['inp_CLM_ID_count'] > 0)
                                 |(bene_car_inp_outp_state_df['outp_CLM_ID_count'] > 0)
                                 ,1,0)
#bene_car_inp_outp_state_df.head(1000)

### 4.d Aggregate group by country, race and gender ###

In [220]:
# Total beneficiary and hospice beneficiary numbers by state
bene_country_group_by = (bene_car_inp_outp_state_df.groupby('SP_State_full_name').agg(
    {
     'DESYNPUF_ID':'count',
     'HOSPICE_FLAG':'sum'
    }
    )
             .reset_index()
             .rename(columns={'DESYNPUF_ID':'DESYNPUF_ID_count',
                              'HOSPICE_FLAG':'HOSPICE_FLAG_sum'
                             }))

bene_country_group_by['percent_hospice'] = bene_country_group_by['HOSPICE_FLAG_sum']/bene_country_group_by['DESYNPUF_ID_count']

df_write_csv(bene_country_group_by,"/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/bene_cohort_country_groupby.csv")

In [221]:
# Total beneficiary and hospice beneficiary numbers by race
bene_race_group_by = (bene_car_inp_outp_state_df.groupby('BENE_RACE_CD_max').agg(
    {
     'DESYNPUF_ID':'count',
     'HOSPICE_FLAG':'sum'
    }
    )
             .reset_index()
             .rename(columns={'DESYNPUF_ID':'DESYNPUF_ID_count',
                              'HOSPICE_FLAG':'HOSPICE_FLAG_sum'
                             }))

bene_race_group_by['percent_hospice'] = bene_race_group_by['HOSPICE_FLAG_sum']/bene_race_group_by['DESYNPUF_ID_count']

df_write_csv(bene_race_group_by,"/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/bene_cohort_race_groupby.csv")

In [222]:
# Total beneficiary and hospice beneficiary numbers by gender
bene_gender_group_by = (bene_car_inp_outp_state_df.groupby('BENE_SEX_IDENT_CD_max').agg(
    {
     'DESYNPUF_ID':'count',
     'HOSPICE_FLAG':'sum'
    }
    )
             .reset_index()
             .rename(columns={'DESYNPUF_ID':'DESYNPUF_ID_count',
                              'HOSPICE_FLAG':'HOSPICE_FLAG_sum'
                             }))

bene_gender_group_by['percent_hospice'] = bene_gender_group_by['HOSPICE_FLAG_sum']/bene_gender_group_by['DESYNPUF_ID_count']

df_write_csv(bene_race_group_by,"/Users/nalinivineetha/Documents/MIDS/W200Python/PythonRoughWork/Project_2/files/beneficiary/bene_cohort_gender_groupby.csv")