In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('files/all_claims_files.csv')

In [3]:
df.columns

Index(['DESYNPUF_ID', 'BENE_BIRTH_DT', 'BENE_DEATH_DT', 'BENE_SEX_IDENT_CD',
       'BENE_RACE_CD', 'BENE_ESRD_IND', 'SP_STATE_CODE', 'BENE_COUNTY_CD',
       'BENE_HI_CVRAGE_TOT_MONS', 'BENE_SMI_CVRAGE_TOT_MONS',
       'BENE_HMO_CVRAGE_TOT_MONS', 'PLAN_CVRG_MOS_NUM', 'SP_ALZHDMTA',
       'SP_CHF', 'SP_CHRNKIDN', 'SP_CNCR', 'SP_COPD', 'SP_DEPRESSN',
       'SP_DIABETES', 'SP_ISCHMCHT', 'SP_OSTEOPRS', 'SP_RA_OA', 'SP_STRKETIA',
       'MEDREIMB_IP', 'BENRES_IP', 'PPPYMT_IP', 'MEDREIMB_OP', 'BENRES_OP',
       'PPPYMT_OP', 'MEDREIMB_CAR', 'BENRES_CAR', 'PPPYMT_CAR', 'file_name'],
      dtype='object')

In [4]:
df['year'] = df.file_name.map(lambda x: x[6:10])

In [5]:
df.year.value_counts()

2008    2000
2010    2000
2009    2000
Name: year, dtype: int64

In [6]:
# Re-index the Data Frame to have a two-dimensional index
df = df.set_index(['DESYNPUF_ID', 'year'])

In [7]:
# Split the Data Frame
gb = df.groupby(level=[0, 1])

In [8]:
# Define a trend assumption by service type
trend = {}
trend['IP'] = 1.06
trend['CAR'] = 1.03
trend['OP'] = 1.05

In [9]:
# The apply function is passed the group as a Data Frame.
# x.name is the index (a tuple if the index is more than one dimension)
def calculate_premium(x):
    member_name = x.name[0] # use the x.name attribute to access the first element of the index tuple
    year = x.name[1] # use the x.name attribute to access the second element of the index tuple
    member = x.loc[member_name] # use the loc method to access the member
    member_year = member.loc[year] # use the loc method to access the year
    # calculate the premium
    if year == '2008':
        weight = 0.1
        trend_exp = 2
    elif year == '2009':
        weight = 0.3
        trend_exp = 1
    else:
        weight = 0.6
        trend_exp = 0
    exposure = member_year['PLAN_CVRG_MOS_NUM']
    claims_amt = member_year['MEDREIMB_IP'] * trend['IP'] ** trend_exp + \
        member_year['MEDREIMB_OP'] * trend['OP'] ** trend_exp + \
        member_year['MEDREIMB_CAR'] * trend['CAR'] ** trend_exp
    if exposure == 0:
        premium = 0
    else:
        premium = weight * (claims_amt / exposure) * 12
    return premium

In [10]:
premiums = gb.apply(calculate_premium)

In [11]:
# Combine step
# Find average premium PMPM
premiums.mean()

1381.783704071277