In [1]:
import getpass
import os
USERNAME = os.getenv('USERNAME').lower()

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import glob

%matplotlib inline

import datetime as dt
import time
import warnings
from dateutil.relativedelta import relativedelta


from snp_query_box import DsnpHelperFunction

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [3]:
print(f"User: {USERNAME} Started creating iha_df in shared storage")

User: a845514 Started creating iha_df in shared storage


In [4]:
start_time = time.time()

In [5]:
today = dt.datetime.today()
today_str = today.strftime('%Y-%m-%d')
last_date_of_last_month_str = DsnpHelperFunction.last_date_of_last_month(today_str, output_type="string")
last_date_of_last_month = DsnpHelperFunction.last_date_of_last_month(today_str)

last_month = last_date_of_last_month_str[:7]
one_year_rolling_back_date = pd.Timestamp(last_date_of_last_month + dt.timedelta(days=1) - relativedelta(years = 1))
print(one_year_rolling_back_date)

first_date_of_year = DsnpHelperFunction.first_date_of_year(today_str, output_type="string")
print(first_date_of_year)
first_date_of_last_month_str = DsnpHelperFunction.first_date_of_last_month(today_str, output_type="string")
print(first_date_of_last_month_str)

pulled_data_date = last_month
comp_report_data_month = last_month
hra_report_data_month = last_month
reporting_end_date = last_date_of_last_month_str
print(reporting_end_date)

2024-05-01 00:00:00
2025-01-01
2025-04-01
2025-04-30


In [6]:
dir_path = r'\\mbip\medicarepBI\Projects\COE\DSNP\Ting\HHV\Aetna_Coventry_Combined_DSNP_2025' 

def prepare_iha_data_current_year(dir_path = dir_path):
    # load the IHA data and take the latest visit date and append it to the report
    # load the iha files - Source #1 : the flat FINAL file
    most_recent_parquet_path = glob.glob(os.path.join(dir_path, 'iha_df.parquet'))
    print(most_recent_parquet_path)
    most_recent_iha_df = pd.read_parquet(most_recent_parquet_path)

    # now other updated files
    flat_files = glob.glob(os.path.join(dir_path, '*[0-9].xlsx'))

    new_files_dfs = []
    for file_path in flat_files:
        print(file_path)
        xl = pd.ExcelFile(file_path)
        sheets = xl.sheet_names  # see all sheet names
        recent_iha_file_1 = pd.read_excel(file_path, 
                           sheet_name=sheets[0], dtype={'MemberID':str}).rename(columns={'MemberID':'Member_ID', 'HPLAN':'Contract_Number'})

        recent_iha_file_2 = pd.read_excel(file_path, 
                           sheet_name=sheets[1], dtype={'MemberID':str}).rename(columns={'MemberID':'Member_ID', 'HPLAN':'Contract_Number'})
        other_combined_df = pd.concat([recent_iha_file_1, recent_iha_file_2], ignore_index = True)
        new_files_dfs.append(other_combined_df)

    new_iha_df = pd.concat(new_files_dfs, ignore_index=True)

    new_iha_file = new_iha_df[['Member_ID','Contract_Number','Appt_Date', 'ScheduledVisitDate','Status2']].drop_duplicates()
    
    new_iha_file = new_iha_file[new_iha_file.Status2.isin(['Completed Visit', 'In CDI/Coding'])].drop(columns=['Status2'])
    new_iha_file['Member_ID'] = new_iha_file['Member_ID'].astype(str).str.zfill(12)
    new_iha_concat_df = pd.concat([most_recent_iha_df, new_iha_file], ignore_index = True).drop_duplicates()
    # dedup by Member_ID
    #iha_file.sort_values(['Member_ID','Contract_Number','Appt_Date', 'ScheduledVisitDate'], ascending=[True, True,True], inplace=True)
    #iha_latest_df = iha_file.drop_duplicates(['Member_ID','Contract_Number'], keep='last').reset_index(drop=True)
    #iha_latest_df = iha_latest_df[iha_latest_df.Member_ID!='']
    return most_recent_iha_df, new_iha_concat_df

In [7]:
most_recent_iha_df, new_iha_concat_df = prepare_iha_data_current_year()

['\\\\mbip\\medicarepBI\\Projects\\COE\\DSNP\\Ting\\HHV\\Aetna_Coventry_Combined_DSNP_2025\\iha_df.parquet']
\\mbip\medicarepBI\Projects\COE\DSNP\Ting\HHV\Aetna_Coventry_Combined_DSNP_2025\Aetna_Coventry_Combined_DSNP_2025_20250313.xlsx
\\mbip\medicarepBI\Projects\COE\DSNP\Ting\HHV\Aetna_Coventry_Combined_DSNP_2025\Aetna_Coventry_Combined_DSNP_2025_20250320.xlsx
\\mbip\medicarepBI\Projects\COE\DSNP\Ting\HHV\Aetna_Coventry_Combined_DSNP_2025\Aetna_Coventry_Combined_DSNP_2025_20250327.xlsx
\\mbip\medicarepBI\Projects\COE\DSNP\Ting\HHV\Aetna_Coventry_Combined_DSNP_2025\Aetna_Coventry_Combined_DSNP_2025_20250403.xlsx
\\mbip\medicarepBI\Projects\COE\DSNP\Ting\HHV\Aetna_Coventry_Combined_DSNP_2025\Aetna_Coventry_Combined_DSNP_2025_20250417.xlsx
\\mbip\medicarepBI\Projects\COE\DSNP\Ting\HHV\Aetna_Coventry_Combined_DSNP_2025\Aetna_Coventry_Combined_DSNP_2025_20250424.xlsx


In [8]:
len(most_recent_iha_df)

819707

In [9]:
len(most_recent_iha_df.drop_duplicates())

819707

In [10]:
len(new_iha_concat_df.drop_duplicates())

856205

In [11]:
most_recent_iha_df.to_parquet(f"{dir_path}/prev_iha_df.parquet")

In [12]:
new_iha_concat_df.to_parquet(f"{dir_path}/iha_df.parquet")

In [13]:
storage_path = r'//mbip/medicarepBI/Projects/COE/DSNP/dsnp_data_storage/monthly_flat_files'
output_path = f'{storage_path}/{reporting_end_date}'

isExist = os.path.exists(output_path)
print(output_path)
if not isExist:
    # Create a new directory because it does not exist
     os.makedirs(output_path)
     print("The new directory is created!")
else:
      print("The folder already exist")
print("Output files will sit in the folder above.")

//mbip/medicarepBI/Projects/COE/DSNP/dsnp_data_storage/monthly_flat_files/2025-04-30
The folder already exist
Output files will sit in the folder above.


In [14]:
new_iha_concat_df.head()

Unnamed: 0,Member_ID,Contract_Number,Appt_Date,ScheduledVisitDate
0,101230726500,H3312,2021-06-16,2021-06-16
1,101232540600,H3146,2021-10-14,2021-10-14
2,101237344800,H3239,2021-01-07,2021-01-07
3,101247521100,H3312,2021-01-22,2021-01-22
4,101248662300,H5302,2021-06-29,2021-06-29


In [15]:
new_iha_concat_df.to_parquet(f"{output_path}/iha_df.parquet")

In [16]:
print("Process time -- %s seconds" % (time.time() - start_time))

Process time -- 322.98229813575745 seconds


In [17]:
#TODO
#dsnp_inplay_status_path = r'//mbip/medicarepBI/Projects/COE/DSNP/Projects/Med_Adherence_In_Play/DSNP_May_07_2024.xlsx'
#rnpm_path = r'//mbip/medicarepBI/Projects/COE/DSNP/Ting/Top40/Senior Clinical Strategist and PBG groups 202404.xlsx'
#Careplan