In [306]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns

In [307]:
%matplotlib inline

In [308]:
#define file path
provider_fp="../data/Medicare_Provider_Util_Payment_PUF_CY2017.txt"

In [309]:
#prelim look
pd.read_csv(provider_fp, sep='\t', nrows=100).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 26 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   npi                               100 non-null    int64  
 1   nppes_provider_last_org_name      100 non-null    object 
 2   nppes_provider_first_name         99 non-null     object 
 3   nppes_provider_mi                 67 non-null     object 
 4   nppes_credentials                 99 non-null     object 
 5   nppes_provider_gender             99 non-null     object 
 6   nppes_entity_code                 99 non-null     object 
 7   nppes_provider_street1            99 non-null     object 
 8   nppes_provider_street2            68 non-null     object 
 9   nppes_provider_city               99 non-null     object 
 10  nppes_provider_zip                99 non-null     float64
 11  nppes_provider_state              99 non-null     object 
 12  nppes_pro

In [310]:
#prelim look to confirm reduced columns
pd.read_csv(provider_fp,
            sep = "\t",
            nrows=100000,
            usecols=['npi','nppes_provider_last_org_name','nppes_entity_code',
                    'nppes_provider_zip','nppes_provider_state','provider_type',
                    'place_of_service','hcpcs_code','hcpcs_description',
                   'bene_unique_cnt','bene_day_srvc_cnt','average_Medicare_allowed_amt',
                    'average_Medicare_payment_amt'],
            dtype={'npps_provider_zip':'str'}
            ).dtypes

npi                               int64
nppes_provider_last_org_name     object
nppes_entity_code                object
nppes_provider_zip              float64
nppes_provider_state             object
provider_type                    object
place_of_service                 object
hcpcs_code                       object
hcpcs_description                object
bene_unique_cnt                 float64
bene_day_srvc_cnt               float64
average_Medicare_allowed_amt    float64
average_Medicare_payment_amt    float64
dtype: object

In [311]:
#look at unique provider types
provider_type.provider_type.unique()

array([nan, 'Internal Medicine', 'Pathology', 'Anesthesiology',
       'Family Practice', 'Obstetrics & Gynecology', 'General Surgery',
       'Urology', 'Cardiac Surgery', 'Physician Assistant',
       'Physical Therapist in Private Practice', 'Dermatology',
       'Licensed Clinical Social Worker', 'Optometry',
       'Mass Immunizer Roster Biller',
       'Physical Medicine and Rehabilitation', 'Radiation Oncology',
       'Infectious Disease', 'Orthopedic Surgery',
       'Certified Clinical Nurse Specialist', 'Endocrinology',
       'Chiropractic', 'Psychiatry', 'Diagnostic Radiology',
       'Centralized Flu', 'Rheumatology', 'General Practice', 'Neurology',
       'Emergency Medicine', 'Nephrology', 'Ophthalmology',
       'Nurse Practitioner', 'Pulmonary Disease', 'Otolaryngology',
       'Plastic and Reconstructive Surgery', 'Psychologist, Clinical',
       'Cardiology', 'Ambulance Service Provider',
       'Interventional Cardiology', 'Gastroenterology', 'Hospitalist',
      

In [312]:
provider.dtypes

npi                               int64
nppes_provider_last_org_name     object
nppes_entity_code                object
zip_code                         object
nppes_provider_state             object
provider_type                    object
place_of_service                 object
hcpcs_code                       object
hcpcs_description                object
bene_unique_cnt                 float64
bene_day_srvc_cnt               float64
average_Medicare_allowed_amt    float64
average_Medicare_payment_amt    float64
dtype: object

In [313]:
#import data in chunks
provider_chunks=pd.read_csv(provider_fp,
                     sep = "\t",
                     chunksize=100000,
                     usecols=['npi','nppes_provider_last_org_name','nppes_entity_code',
                             'nppes_provider_zip','nppes_provider_state','provider_type',
                             'place_of_service','hcpcs_code','hcpcs_description',
                             'bene_unique_cnt','bene_day_srvc_cnt','average_Medicare_allowed_amt',
                             'average_Medicare_payment_amt'],
                     dtype={'npps_provider_zip':'str'}
                    )

In [314]:
# read the df starting index 1 and reset index
# filter for Ambulatory Surgical Center
provider_asc=[]
for i,chunk in enumerate(provider_chunks):
    print(i+1)
    provider_asc.append(chunk[chunk['provider_type']=='Ambulatory Surgical Center'])

1
2


  interactivity=interactivity, compiler=compiler, result=result)


3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [315]:
#combine chunks into one new dataset
dataset=pd.concat(provider_asc)
type(dataset)

pandas.core.frame.DataFrame

In [316]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 57329 entries, 2912 to 9847425
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   npi                           57329 non-null  int64  
 1   nppes_provider_last_org_name  57329 non-null  object 
 2   nppes_entity_code             57329 non-null  object 
 3   nppes_provider_zip            57329 non-null  object 
 4   nppes_provider_state          57329 non-null  object 
 5   provider_type                 57329 non-null  object 
 6   place_of_service              57329 non-null  object 
 7   hcpcs_code                    57329 non-null  object 
 8   hcpcs_description             57329 non-null  object 
 9   bene_unique_cnt               57329 non-null  float64
 10  bene_day_srvc_cnt             57329 non-null  float64
 11  average_Medicare_allowed_amt  57329 non-null  float64
 12  average_Medicare_payment_amt  57329 non-null  float64
d

In [317]:
#drop row 0, reset index, rename dataset, rename zip code column
dataset.drop(dataset.index[0]).reset_index(drop=True)
provider_asc=provider.rename(columns={'nppes_provider_zip':'zip_code'})
provider_asc

Unnamed: 0,npi,nppes_provider_last_org_name,nppes_entity_code,zip_code,nppes_provider_state,provider_type,place_of_service,hcpcs_code,hcpcs_description,bene_unique_cnt,bene_day_srvc_cnt,average_Medicare_allowed_amt,average_Medicare_payment_amt
0,1003015843,"NORTHBAY PHYSICIAN'S SURGERY CENTER, L.L.C.",O,95687,CA,Ambulatory Surgical Center,F,0191T,Internal insertion of eye fluid drainage device,11.0,13.0,3042.237692,2385.113846
1,1003015843,"NORTHBAY PHYSICIAN'S SURGERY CENTER, L.L.C.",O,95687,CA,Ambulatory Surgical Center,F,20610,Aspiration and/or injection of large joint or ...,14.0,15.0,26.260000,20.588947
2,1003015843,"NORTHBAY PHYSICIAN'S SURGERY CENTER, L.L.C.",O,95687,CA,Ambulatory Surgical Center,F,29823,Extensive removal of shoulder joint tissue usi...,15.0,15.0,837.130000,656.310000
3,1003015843,"NORTHBAY PHYSICIAN'S SURGERY CENTER, L.L.C.",O,95687,CA,Ambulatory Surgical Center,F,29824,Partial removal of collar bone at shoulder usi...,14.0,14.0,1016.512857,796.945714
4,1003015843,"NORTHBAY PHYSICIAN'S SURGERY CENTER, L.L.C.",O,95687,CA,Ambulatory Surgical Center,F,29827,Repair of shoulder rotator cuff using an endos...,22.0,22.0,3391.399545,2652.337273
...,...,...,...,...,...,...,...,...,...,...,...,...,...
57324,1992999775,"SPINE SURGERY CENTER OF EUGENE, LLC",O,974014604,OR,Ambulatory Surgical Center,F,62321,Injection of substance into spinal canal of up...,33.0,36.0,288.477778,219.702222
57325,1992999775,"SPINE SURGERY CENTER OF EUGENE, LLC",O,974014604,OR,Ambulatory Surgical Center,F,62323,Injection of substance into spinal canal of lo...,40.0,48.0,296.720000,226.542500
57326,1992999775,"SPINE SURGERY CENTER OF EUGENE, LLC",O,974014604,OR,Ambulatory Surgical Center,F,63047,Partial removal of middle spine bone with rele...,22.0,22.0,2803.287273,2197.769091
57327,1992999775,"SPINE SURGERY CENTER OF EUGENE, LLC",O,974014604,OR,Ambulatory Surgical Center,F,64483,Injections of anesthetic and/or steroid drug i...,42.0,49.0,325.651667,248.943030


In [318]:
provider_asc.dtypes

npi                               int64
nppes_provider_last_org_name     object
nppes_entity_code                object
zip_code                         object
nppes_provider_state             object
provider_type                    object
place_of_service                 object
hcpcs_code                       object
hcpcs_description                object
bene_unique_cnt                 float64
bene_day_srvc_cnt               float64
average_Medicare_allowed_amt    float64
average_Medicare_payment_amt    float64
dtype: object

In [319]:
provider_asc['nppes_provider_state'].unique()

array(['CA', 'IL', 'LA', 'OH', 'AZ', 'MO', 'NJ', 'UT', 'TX', 'MA', 'MN',
       'MI', 'IN', 'OK', 'IA', 'FL', 'AL', 'MD', 'NC', 'TN', 'NV', 'KY',
       'NE', 'PA', 'CO', 'WI', 'GA', 'WA', 'VA', 'SC', 'PR', 'AR', 'MT',
       'ME', 'CT', 'OR', 'MS', 'RI', 'SD', 'KS', 'NY', 'NH', 'HI', 'ID',
       'AK', 'NM', 'DE', 'ND', 'DC', 'WY', 'WV', 'GU', 'VT', 'VI'],
      dtype=object)

In [320]:
provider_asc['zip_code'].iloc[0]

95687.0

In [321]:
# zip_code values are floats as indicated by decimal place above, convert to string
provider_asc['zip_code']=provider_asc['zip_code'].astype('str').str.slice(stop=5)

In [322]:
provider_asc

Unnamed: 0,npi,nppes_provider_last_org_name,nppes_entity_code,zip_code,nppes_provider_state,provider_type,place_of_service,hcpcs_code,hcpcs_description,bene_unique_cnt,bene_day_srvc_cnt,average_Medicare_allowed_amt,average_Medicare_payment_amt
0,1003015843,"NORTHBAY PHYSICIAN'S SURGERY CENTER, L.L.C.",O,95687,CA,Ambulatory Surgical Center,F,0191T,Internal insertion of eye fluid drainage device,11.0,13.0,3042.237692,2385.113846
1,1003015843,"NORTHBAY PHYSICIAN'S SURGERY CENTER, L.L.C.",O,95687,CA,Ambulatory Surgical Center,F,20610,Aspiration and/or injection of large joint or ...,14.0,15.0,26.260000,20.588947
2,1003015843,"NORTHBAY PHYSICIAN'S SURGERY CENTER, L.L.C.",O,95687,CA,Ambulatory Surgical Center,F,29823,Extensive removal of shoulder joint tissue usi...,15.0,15.0,837.130000,656.310000
3,1003015843,"NORTHBAY PHYSICIAN'S SURGERY CENTER, L.L.C.",O,95687,CA,Ambulatory Surgical Center,F,29824,Partial removal of collar bone at shoulder usi...,14.0,14.0,1016.512857,796.945714
4,1003015843,"NORTHBAY PHYSICIAN'S SURGERY CENTER, L.L.C.",O,95687,CA,Ambulatory Surgical Center,F,29827,Repair of shoulder rotator cuff using an endos...,22.0,22.0,3391.399545,2652.337273
...,...,...,...,...,...,...,...,...,...,...,...,...,...
57324,1992999775,"SPINE SURGERY CENTER OF EUGENE, LLC",O,97401,OR,Ambulatory Surgical Center,F,62321,Injection of substance into spinal canal of up...,33.0,36.0,288.477778,219.702222
57325,1992999775,"SPINE SURGERY CENTER OF EUGENE, LLC",O,97401,OR,Ambulatory Surgical Center,F,62323,Injection of substance into spinal canal of lo...,40.0,48.0,296.720000,226.542500
57326,1992999775,"SPINE SURGERY CENTER OF EUGENE, LLC",O,97401,OR,Ambulatory Surgical Center,F,63047,Partial removal of middle spine bone with rele...,22.0,22.0,2803.287273,2197.769091
57327,1992999775,"SPINE SURGERY CENTER OF EUGENE, LLC",O,97401,OR,Ambulatory Surgical Center,F,64483,Injections of anesthetic and/or steroid drug i...,42.0,49.0,325.651667,248.943030


In [323]:
provider_asc.columns

Index(['npi', 'nppes_provider_last_org_name', 'nppes_entity_code', 'zip_code',
       'nppes_provider_state', 'provider_type', 'place_of_service',
       'hcpcs_code', 'hcpcs_description', 'bene_unique_cnt',
       'bene_day_srvc_cnt', 'average_Medicare_allowed_amt',
       'average_Medicare_payment_amt'],
      dtype='object')

In [324]:
provider_asc.dtypes

npi                               int64
nppes_provider_last_org_name     object
nppes_entity_code                object
zip_code                         object
nppes_provider_state             object
provider_type                    object
place_of_service                 object
hcpcs_code                       object
hcpcs_description                object
bene_unique_cnt                 float64
bene_day_srvc_cnt               float64
average_Medicare_allowed_amt    float64
average_Medicare_payment_amt    float64
dtype: object

In [325]:
provider_asc.shape

(57329, 13)

In [326]:
provider_asc.nppes_entity_code.unique()

array(['O'], dtype=object)

In [327]:
provider_asc.place_of_service.unique()

array(['F'], dtype=object)

In [328]:
#import hospital.csv, rename, reduce columns
hcpcs_apc_lookup=pd.read_csv('../data/hospital.csv',
                             usecols=['hcpcs_code','apc'],
                            dtype={'apc':'str'})
hcpcs_apc_lookup

Unnamed: 0,hcpcs_code,apc
0,10005,5071
1,10007,5071
2,10009,5071
3,10011,5071
4,10021,5052
...,...,...
5929,Q9950,9085
5930,Q9968,1446
5931,Q9969,1442
5932,Q9991,9073


In [329]:
hcpcs_apc_lookup.dtypes

hcpcs_code    object
apc           object
dtype: object

In [330]:
#filter for apc 5302 to get list of matching hcpcs codes
hcpcs_apc_lookup_5302=hcpcs_apc_lookup[hcpcs_apc_lookup['apc']=='5302']
hcpcs_apc_lookup_5302

Unnamed: 0,hcpcs_code,apc
1875,32556,5302
2339,43191,5302
2340,43192,5302
2341,43193,5302
2342,43194,5302
2348,43201,5302
2349,43202,5302
2350,43204,5302
2351,43205,5302
2352,43206,5302


In [331]:
hcpcs_apc_lookup_5302.shape

(59, 2)

In [332]:
#find unique hcpcs code for APC 5302
uni_hcpcs_apc5302 = hcpcs_apc_lookup_5302['hcpcs_code'].unique()
uni_hcpcs_apc5302.shape

(59,)

In [333]:
#convert hcpcs codes in apc 5302 to list to use in next step
hcpcs_apc_lookup_5302_list=hcpcs_apc_lookup_5302['hcpcs_code'].to_list()
hcpcs_apc_lookup_5302_list

['32556',
 '43191',
 '43192',
 '43193',
 '43194',
 '43201',
 '43202',
 '43204',
 '43205',
 '43206',
 '43211',
 '43213',
 '43214',
 '43215',
 '43216',
 '43217',
 '43220',
 '43226',
 '43227',
 '43231',
 '43232',
 '43233',
 '43237',
 '43238',
 '43241',
 '43242',
 '43243',
 '43244',
 '43245',
 '43246',
 '43249',
 '43250',
 '43251',
 '43253',
 '43254',
 '43255',
 '43259',
 '43270',
 '43453',
 '43830',
 '44360',
 '44361',
 '44363',
 '44364',
 '44365',
 '44366',
 '44369',
 '44372',
 '44373',
 '44376',
 '44377',
 '44378',
 '44381',
 '49423',
 '49436',
 '49440',
 '49441',
 '49446',
 '0355T']

In [334]:
#ASC's with all hcpcs code that have corresponding APC- 5302
provider_asc_5302=provider_asc[provider_asc.hcpcs_code.isin(hcpcs_apc_lookup_5302_list)]

In [335]:
provider_asc_5302.shape

(1187, 13)

In [336]:
#filter for provider_abs_5302 for hcpcs 43249
provider_asc_hcpcs_43249=provider_asc_5302[provider_asc_5302['hcpcs_code'] =='43249']
provider_asc_hcpcs_43249

Unnamed: 0,npi,nppes_provider_last_org_name,nppes_entity_code,zip_code,nppes_provider_state,provider_type,place_of_service,hcpcs_code,hcpcs_description,bene_unique_cnt,bene_day_srvc_cnt,average_Medicare_allowed_amt,average_Medicare_payment_amt
9,1003015843,"NORTHBAY PHYSICIAN'S SURGERY CENTER, L.L.C.",O,95687,CA,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,16.0,20.0,814.534500,596.400500
76,1003079187,WASC LLC,O,44691,OH,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,15.0,17.0,555.310000,433.162941
221,1003220187,"CENTRAL OHIO ENDOSCOPY CENTER, LLC",O,43202,OH,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,79.0,94.0,596.510000,463.132234
325,1003815242,"DOTHAN SURGERY CENTER, LLC",O,36301,AL,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,409.0,446.0,509.857780,391.675538
502,1003887225,CENTRAL TEXAS ENDOSCOPY CENTER LLC,O,77802,TX,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,23.0,26.0,576.830000,445.692692
...,...,...,...,...,...,...,...,...,...,...,...,...,...
57099,1992752224,SPRINGFIELD CLINIC LLP,O,62703,IL,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,134.0,142.0,581.001972,451.678239
57197,1992785232,NORTHWEST FLORIDA A S C LP,O,32405,FL,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,177.0,182.0,553.870000,428.431044
57227,1992796742,"SAN ANTONIO ENDOSCOPY, LP",O,78229,TX,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,97.0,114.0,565.810000,428.748070
57238,1992875454,ELGIN GASTROENTEROLOGY ENDOSCOPY CENTER LLC,O,60123,IL,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,16.0,17.0,618.360000,484.800000


In [337]:
#import zip_cbsa.csv
zip_cbsa=pd.read_csv('../data/zip_cbsa.csv',
                     dtype={'zip_code':'str','cbsa':'str'})
zip_cbsa

Unnamed: 0,zip_code,cbsa
0,00501,35620
1,00601,38660
2,00602,10380
3,00603,10380
4,00604,10380
...,...,...
47410,99925,99999
47411,99926,99999
47412,99927,99999
47413,99928,28540


In [338]:
zip_cbsa.dtypes

zip_code    object
cbsa        object
dtype: object

In [339]:
#find for Nashville CBSA
cbsa_nash=zip_cbsa[zip_cbsa['zip_code']=='37205']
cbsa_nash

Unnamed: 0,zip_code,cbsa
17171,37205,34980


In [340]:
#find all zip codes in CBSA 34980
cbsa_zip_nash=zip_cbsa[zip_cbsa['cbsa']=='34980']
cbsa_zip_nash

Unnamed: 0,zip_code,cbsa
16984,37010,34980
16986,37011,34980
16988,37012,34980
16989,37013,34980
16990,37014,34980
...,...,...
17956,38560,34980
17958,38563,34980
17962,38567,34980
17965,38569,34980


In [341]:
#convert cbsa 34980 to zip code list for use in next step
#hcpcs_apc_lookup_5302_list=hcpcs_apc_lookup_5302['hcpcs_code'].to_list()cbsa_zip_nash_list=cbsa_zip_nash['zip_code'].to_list()
cbsa_zip_nash_list=cbsa_zip_nash['zip_code'].to_list()
cbsa_zip_nash_list

['37010',
 '37011',
 '37012',
 '37013',
 '37014',
 '37015',
 '37016',
 '37018',
 '37020',
 '37022',
 '37024',
 '37025',
 '37026',
 '37027',
 '37029',
 '37030',
 '37031',
 '37032',
 '37035',
 '37036',
 '37037',
 '37043',
 '37046',
 '37048',
 '37049',
 '37051',
 '37052',
 '37055',
 '37056',
 '37057',
 '37060',
 '37062',
 '37063',
 '37064',
 '37065',
 '37066',
 '37067',
 '37068',
 '37069',
 '37070',
 '37071',
 '37072',
 '37073',
 '37074',
 '37075',
 '37076',
 '37077',
 '37080',
 '37082',
 '37083',
 '37085',
 '37086',
 '37087',
 '37088',
 '37089',
 '37090',
 '37091',
 '37095',
 '37101',
 '37110',
 '37115',
 '37116',
 '37118',
 '37119',
 '37121',
 '37122',
 '37127',
 '37128',
 '37129',
 '37130',
 '37131',
 '37132',
 '37133',
 '37135',
 '37138',
 '37141',
 '37143',
 '37145',
 '37146',
 '37148',
 '37149',
 '37150',
 '37151',
 '37152',
 '37153',
 '37160',
 '37165',
 '37166',
 '37167',
 '37171',
 '37172',
 '37174',
 '37179',
 '37180',
 '37181',
 '37184',
 '37186',
 '37187',
 '37188',
 '37189',


In [342]:
#filter for provider_acs_ hcpcs_43249 for Nashville CBSA 
#provider_asc_5302=provider_asc[provider_asc.hcpcs_code.isin(hcpcs_apc_lookup_5302_list)]
provider_asc_hcpcs_43249_Nash=provider_asc_hcpcs_43249[provider_asc_hcpcs_43249.zip_code.isin(cbsa_zip_nash_list)]
provider_asc_hcpcs_43249_Nash

Unnamed: 0,npi,nppes_provider_last_org_name,nppes_entity_code,zip_code,nppes_provider_state,provider_type,place_of_service,hcpcs_code,hcpcs_description,bene_unique_cnt,bene_day_srvc_cnt,average_Medicare_allowed_amt,average_Medicare_payment_amt
7943,1124498357,ENDOSCOPY CENTER OF INLAND EMPIRE LLC,O,37219,TN,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,16.0,16.0,660.99,512.294375
12767,1225039761,ENDOSCOPY CENTER OF ST THOMAS LP,O,37205,TN,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,16.0,17.0,565.636471,435.024706
16881,1295708964,COLUMBIA TN ENDOSCOPY ASC LLC,O,38401,TN,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,29.0,34.0,587.11,454.431471
25382,1427152073,PATIENT PARTNERS LLC,O,37066,TN,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,23.0,23.0,587.11,456.564783
37201,1639299803,MURFREESBORO MEDICAL CLINIC P A,O,37129,TN,Ambulatory Surgical Center,F,43249,Balloon dilation of esophagus using an endoscope,32.0,33.0,587.11,433.111515


In [344]:
# export to data folder
provider_asc_hcpcs_43249_Nash.to_csv(r'../data/provider_asc_hcpcs_43249_Nash.csv',index=False)