In [None]:
import getpass
import os
USERNAME = os.getenv('USERNAME').lower()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import glob

%matplotlib inline

import datetime as dt
import time
import warnings
from dateutil.relativedelta import relativedelta


from snp_query_box import DsnpHelperFunction

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [None]:
print(f"User: {USERNAME} Started creating iha_df in shared storage")

In [None]:
start_time = time.time()

In [None]:
today = dt.datetime.today()
today_str = today.strftime('%Y-%m-%d')
last_date_of_last_month_str = DsnpHelperFunction.last_date_of_last_month(today_str, output_type="string")
last_date_of_last_month = DsnpHelperFunction.last_date_of_last_month(today_str)

last_month = last_date_of_last_month_str[:7]
one_year_rolling_back_date = pd.Timestamp(last_date_of_last_month + dt.timedelta(days=1) - relativedelta(years = 1))
print(one_year_rolling_back_date)

first_date_of_year = DsnpHelperFunction.first_date_of_year(today_str, output_type="string")
print(first_date_of_year)
first_date_of_last_month_str = DsnpHelperFunction.first_date_of_last_month(today_str, output_type="string")
print(first_date_of_last_month_str)

pulled_data_date = last_month
comp_report_data_month = last_month
hra_report_data_month = last_month
reporting_end_date = last_date_of_last_month_str
print(reporting_end_date)

In [None]:
def prepare_iha_data():
    # load the IHA data and take the latest visit date and append it to the report
    # load the iha files - Source #1 : the flat FINAL file
    dir_path = r'\\mbip\medicarepBI\Projects\COE\DSNP\Ting\HHV' 
    final_flat_files = glob.glob(os.path.join(dir_path, '*Final.xlsx'))

    final_dfs = []
    for file_path in final_flat_files:
        print(file_path)

        iha_final_flat_file_1 = pd.read_excel(file_path,
                       sheet_name='Aetna_Coventry_Combined_DSNP', dtype={'MemberID':str}).rename(columns={'MemberID':'Member_ID', 'HPLAN':'Contract_Number'})
        iha_final_flat_file_2 =pd.read_excel(file_path, 
                       sheet_name='AetnaDual_SNP', dtype={'MemberID':str}).rename(columns={'MemberID':'Member_ID', 'HPLAN':'Contract_Number'})
        combined_df = pd.concat([iha_final_flat_file_1, iha_final_flat_file_2], ignore_index = True)
        final_dfs.append(combined_df)
    
    final_df = pd.concat(final_dfs, ignore_index=True)

    # now other updated files
    other_flat_files = glob.glob(os.path.join(dir_path, '*[0-9].xlsx'))

    other_dfs = []
    for file_path in other_flat_files:
        print(file_path)
        xl = pd.ExcelFile(file_path)
        sheets = xl.sheet_names  # see all sheet names
        recent_iha_file_1 = pd.read_excel(file_path, 
                           sheet_name=sheets[0], dtype={'MemberID':str}).rename(columns={'MemberID':'Member_ID', 'HPLAN':'Contract_Number'})

        recent_iha_file_2 = pd.read_excel(file_path, 
                           sheet_name=sheets[1], dtype={'MemberID':str}).rename(columns={'MemberID':'Member_ID', 'HPLAN':'Contract_Number'})
        other_combined_df = pd.concat([recent_iha_file_1, recent_iha_file_2], ignore_index = True)
        other_dfs.append(other_combined_df)        

    other_df = pd.concat(other_dfs, ignore_index=True)

    iha_file = pd.concat([final_df, other_df], ignore_index=True)\
        [['Member_ID','Contract_Number','Appt_Date', 'ScheduledVisitDate','Status2']].drop_duplicates()
    
    iha_file = iha_file[iha_file.Status2.isin(['Completed Visit', 'In CDI/Coding'])].drop(columns=['Status2'])
    iha_file['Member_ID'] = iha_file['Member_ID'].astype(str).str.zfill(12)

    # dedup by Member_ID
    #iha_file.sort_values(['Member_ID','Contract_Number','Appt_Date', 'ScheduledVisitDate'], ascending=[True, True,True], inplace=True)
    #iha_latest_df = iha_file.drop_duplicates(['Member_ID','Contract_Number'], keep='last').reset_index(drop=True)
    #iha_latest_df = iha_latest_df[iha_latest_df.Member_ID!='']
    return iha_file

In [None]:
iha_df = prepare_iha_data()

In [None]:
#TODO this will be gone
list_of_iha_files_2025 = glob.iglob(r'\\mbip\medicarepBI\Projects\COE\DSNP\Ting\HHV\Aetna_Coventry_Combined_DSNP_2025\*')
most_recent_iha_path_2025 = max(list_of_iha_files_2025, key=os.path.getmtime)
print(most_recent_iha_path_2025)
xl = pd.ExcelFile(most_recent_iha_path_2025)
sheets = xl.sheet_names  # see all sheet names
recent_iha_file_1 = pd.read_excel(most_recent_iha_path_2025, 
                   sheet_name=sheets[0], dtype={'MemberID':str}).rename(columns={'MemberID':'Member_ID', 'HPLAN':'Contract_Number'})
recent_iha_file_2 = pd.read_excel(most_recent_iha_path_2025, 
                   sheet_name=sheets[1], dtype={'MemberID':str}).rename(columns={'MemberID':'Member_ID', 'HPLAN':'Contract_Number'})
iha_file_2025 = pd.concat([recent_iha_file_1, recent_iha_file_2], ignore_index=True)\
    [['Member_ID','Contract_Number','Appt_Date', 'ScheduledVisitDate','Status2']].drop_duplicates()

iha_file_2025 = iha_file_2025[iha_file_2025.Status2.isin(['Completed Visit', 'In CDI/Coding'])].drop(columns=['Status2'])
iha_file_2025['Member_ID'] = iha_file_2025['Member_ID'].astype(str).str.zfill(12)

In [None]:
iha_df_all = pd.concat([iha_df, iha_file_2025], ignore_index=True).drop_duplicates()

In [None]:
storage_path = r'//mbip/medicarepBI/Projects/COE/DSNP/dsnp_data_storage/monthly_flat_files'
output_path = f'{storage_path}/{reporting_end_date}'

isExist = os.path.exists(output_path)
print(output_path)
if not isExist:
    # Create a new directory because it does not exist
     os.makedirs(output_path)
     print("The new directory is created!")
else:
      print("The folder already exist")
print("Output files will sit in the folder above.")

In [None]:
iha_df_all.head()

In [None]:
iha_df_all.to_parquet(f"{output_path}/iha_df.parquet")

In [None]:
print("Process time -- %s seconds" % (time.time() - start_time))

In [None]:
#TODO
#dsnp_inplay_status_path = r'//mbip/medicarepBI/Projects/COE/DSNP/Projects/Med_Adherence_In_Play/DSNP_May_07_2024.xlsx'
#rnpm_path = r'//mbip/medicarepBI/Projects/COE/DSNP/Ting/Top40/Senior Clinical Strategist and PBG groups 202404.xlsx'
#Careplan