In [3]:
import pandas as pd
import numpy as np
import datetime
from ast import literal_eval

from icd10cm_to_coding19 import *
from icd9cm_to_icd10cm import *


In [4]:
CODING19_PATH = "resources/old_coding19.tsv"
TREE_FILE_PATH = "resources/coding19_tree.pickle"

MAP_REF_PATH_9CM_10CM = "resources/icd9cm_icd10cm_table.csv"


In [5]:
df = pd.read_csv("2000_patients.csv")

In [6]:
icd9cm_to_10cm_mapper = ICD9CM_ICD10CM_Mapper(MAP_REF_PATH_9CM_10CM)
icd10cm_to_coding19_mapper = ICD10CM_Coding19_Mapper(coding19_csv_path=CODING19_PATH, coding19_tree_hierarchy_path=TREE_FILE_PATH)

In [9]:
def one_patient_coding19_df_not_cumulative(date_df_list):
    rows_list = []
    for date_idx in range(len(date_df_list)):
        cur_df = date_df_list[date_idx]

        icd10cm_codes = cur_df["DIAGNOSIS_ICD10_CD"].dropna().to_list()
        icd9cm_codes = cur_df["DIAGNOSIS_ICD_CD"].dropna().to_list()

        # print(icd10cm_codes)
        # print(icd9cm_codes)

        for icd9cm_code in icd9cm_codes:
            map_success, mapped_icd10cm_codes = icd9cm_to_10cm_mapper.get_icd10cm_codes(icd9cm_code)
            # print(mapped_icd10cm_codes)
            if map_success:
                icd10cm_codes = [*icd10cm_codes, *mapped_icd10cm_codes]

        coding19_one_date = []
        for icd10cm_code in icd10cm_codes:
            coding19_one_date = [*coding19_one_date, *icd10cm_to_coding19_mapper.map_all_relevant_icd10cm_coding19(icd10cm_code)]

        indices = icd10cm_to_coding19_mapper.indices_for_19k_vec_from_desc(coding19_one_date)

        row_dict = {"PATIENT_ID": cur_df.iloc[0,0], "RECORDED_DT": cur_df.iloc[0, 5], "CODING19_DESCRIPTION": str(coding19_one_date), "CODING19_INDICES": str(indices)}
        rows_list.append(row_dict)
    df = pd.DataFrame(rows_list)
    return df

In [10]:
def add_cumulative_col(one_patient_coding19_df_not_cumulative):
    indices = one_patient_coding19_df_not_cumulative["CODING19_INDICES"].to_list()
    accumulated = []
    for i in range(len(one_patient_coding19_df_not_cumulative)):
        cur_indices = indices[i]
        if i == 0:
            accumulated.append(cur_indices)
        else:
            cur_indices = literal_eval(cur_indices)
            existing_indices = literal_eval(accumulated[i-1])
            new_accumulated = list(set([*cur_indices, *existing_indices]))
            accumulated.append(str(new_accumulated))
    one_patient_coding19_df_not_cumulative["ACCUMULATED_INDICES"] = accumulated
    return one_patient_coding19_df_not_cumulative

In [12]:
one_patient_dfs = [d for _, d in df.groupby(['PATIENT_ID'])]

# change 100!

In [17]:
patient_dfs_to_concat = []
for one_patient_df_idx in range(100):
    df_list = [d for _, d in one_patient_dfs[one_patient_df_idx].groupby(['RECORDED_DT'])]
    not_cumul_df = one_patient_coding19_df_not_cumulative(df_list)
    cumul_df = add_cumulative_col(not_cumul_df)
    patient_dfs_to_concat.append(cumul_df)
combined_coding19_df = pd.concat(patient_dfs_to_concat, ignore_index=True)

In [18]:
combined_coding19_df.to_csv("2000_patients_coding19.csv", index=False)