In [1]:
import pandas as pd
import numpy as np
import datetime
from ast import literal_eval

from icd10cm_to_coding19 import *
from icd9cm_to_icd10cm import *


In [2]:
CODING19_PATH = "resources/old_coding19.tsv"
TREE_FILE_PATH = "resources/coding19_tree.pickle"

MAP_REF_PATH_9CM_10CM = "resources/icd9cm_icd10cm_table.csv"


In [4]:
df = pd.read_csv("68950413.csv")

In [5]:
df_list = [d for _, d in df.groupby(['RECORDED_DT'])]
print(df_list[2].head())

   PATIENT_ID PATIENT_BIRTH_DT SNOMEDDESC DIAGNOSIS_ICD10_CD DIAGNOSIS_ICD_CD  \
8    68950413        8/10/1953        NaN                NaN           789.06   

  RECORDED_DT  DIAGPOSKEY  ENCOUNTER_KEY  
8  2011-10-26          30       34616125  


In [6]:
print(len(df_list))


133


In [7]:
icd9cm_to_10cm_mapper = ICD9CM_ICD10CM_Mapper(MAP_REF_PATH_9CM_10CM)
icd10cm_to_coding19_mapper = ICD10CM_Coding19_Mapper(coding19_csv_path=CODING19_PATH, coding19_tree_hierarchy_path=TREE_FILE_PATH)

In [14]:
rows_list = []
for date_idx in range(len(df_list)):
    cur_df = df_list[date_idx]
    
    icd10cm_codes = cur_df["DIAGNOSIS_ICD10_CD"].dropna().to_list()
    icd9cm_codes = cur_df["DIAGNOSIS_ICD_CD"].dropna().to_list()
    
    # print(icd10cm_codes)
    # print(icd9cm_codes)
    
    for icd9cm_code in icd9cm_codes:
        map_success, mapped_icd10cm_codes = icd9cm_to_10cm_mapper.get_icd10cm_codes(icd9cm_code)
        # print(mapped_icd10cm_codes)
        if map_success:
            icd10cm_codes = [*icd10cm_codes, *mapped_icd10cm_codes]
    
    coding19_one_date = []
    for icd10cm_code in icd10cm_codes:
        coding19_one_date = [*coding19_one_date, *icd10cm_to_coding19_mapper.map_all_relevant_icd10cm_coding19(icd10cm_code)]
        
    indices = icd10cm_to_coding19_mapper.indices_for_19k_vec_from_desc(coding19_one_date)
    
    indices = list(set(indices))
    
    row_dict = {"PATIENT_ID": cur_df.iloc[0,0], "RECORDED_DT": cur_df.iloc[0, 5],"CODING19_INDICES": str(indices)}
    rows_list.append(row_dict)

In [15]:
df = pd.DataFrame(rows_list)  

In [16]:
df

Unnamed: 0,PATIENT_ID,RECORDED_DT,CODING19_INDICES
0,68950413,2011-05-20,"[1763, 2916, 18437, 18438, 1452, 18446, 1744, ..."
1,68950413,2011-09-09,"[1351, 2633, 2634, 1452, 1357, 973, 944, 1744,..."
2,68950413,2011-10-26,"[12312, 1761, 12314, 1104]"
3,68950413,2011-11-08,"[12641, 1114, 1761]"
4,68950413,2011-11-16,"[1761, 1032, 1036, 1104, 5086, 12312, 1753, 12..."
...,...,...,...
128,68950413,2022-01-15,"[12994, 12995, 12996, 12997, 1757, 1119]"
129,68950413,2022-01-17,"[1025, 4642, 4646, 1065, 10302, 10310, 3527, 3..."
130,68950413,2022-02-03,"[967, 2633, 2636, 973, 1746, 4051, 1747, 1013,..."
131,68950413,2022-02-14,"[12994, 12995, 14436, 12997, 14438, 1764, 1299..."


In [9]:
indices = df["CODING19_INDICES"].to_list()
cur_indices = indices[0]

In [10]:
print(type(cur_indices))

<class 'str'>


In [11]:
cur_indices = literal_eval(cur_indices)
print(cur_indices)

[18438, 18437, 1177, 1763, 1461, 1452, 948, 1744, 2916, 2910, 977, 1748, 18525, 18520, 1177, 1763, 18451, 18446, 1177, 1763]


In [12]:
accumulated = []
for i in range(len(df)):
    cur_indices = indices[i]
    if i == 0:
        accumulated.append(cur_indices)
    else:
        cur_indices = literal_eval(cur_indices)
        existing_indices = literal_eval(accumulated[i-1])
        new_accumulated = list(set([*cur_indices, *existing_indices]))
        accumulated.append(str(new_accumulated))
    
        

In [13]:
df["ACCUMULATED_INDICES"] = accumulated

In [14]:
df.to_csv("68950413_coding19.csv", index=False)