In [41]:
import pandas as pd
from collections import defaultdict, Counter
#from Authentication import *
import requests
import json 
import time
import progressbar as pb
import numpy as np

## MedDRA

In [117]:
meddra_file_path = "/Users/Fyxstkala/Desktop/GitHub/term_mapping/MedDRA/new_meddra"

pt_df = pd.read_csv(meddra_file_path + "/pt_df.csv", header = None)
pt_df.columns = ["IDX","PT"]

llt_df = pd.read_csv(meddra_file_path + "/llt_df.csv", header = None)
llt_df.columns = ["IDX","LLT"]

ptsoc_df = pd.read_csv(meddra_file_path + "/ptsoc_df.csv", header = None)
ptsoc_df.columns = ["IDX","PT"]

soc_df = pd.read_csv(meddra_file_path + "/soc_df.csv", header = None)
soc_df.columns = ["IDX","SOC"]

print(pt_df.shape, llt_df.shape, ptsoc_df.shape, soc_df.shape)

(80894, 2) (80894, 2) (23954, 2) (23954, 2)


In [24]:
# Get mapping from pt to llt
pt_llt_df = pt_df.merge(llt_df, on = ["IDX"])
print(pt_llt_df.shape)
pt_llt_df['PT'] = pt_llt_df['PT'].str.lower()
pt_llt_df['LLT'] = pt_llt_df['LLT'].str.lower()
pt_llt_df.head()

(80894, 3)


Unnamed: 0,IDX,PT,LLT
0,0,11-beta-hydroxylase deficiency,11-beta-hydroxylase deficiency
1,1,17 ketosteroids urine,17 ketosteroids urine
2,2,17 ketosteroids urine decreased,17 ketosteroids urine decreased
3,3,17 ketosteroids urine decreased,17 ketosteroids urine low
4,4,17 ketosteroids urine increased,17 ketosteroids urine high


In [25]:
pt_soc_df = ptsoc_df.merge(soc_df, on = ["IDX"])
print(pt_soc_df.shape)
pt_soc_df['PT'] = pt_soc_df['PT'].str.lower()
pt_soc_df['SOC'] = pt_soc_df['SOC'].str.lower()
pt_soc_df.head()

(23954, 3)


Unnamed: 0,IDX,PT,SOC
0,0,11-beta-hydroxylase deficiency,"congenital, familial and genetic disorders"
1,1,"17,20-desmolase deficiency","congenital, familial and genetic disorders"
2,2,17-alpha-hydroxylase deficiency,"congenital, familial and genetic disorders"
3,3,"20,22-desmolase deficiency","congenital, familial and genetic disorders"
4,4,21-hydroxylase deficiency,"congenital, familial and genetic disorders"


## OHDSI

In [38]:
ohdsi_path = "/Users/Fyxstkala/Desktop/GitHub/term_mapping/ohdsi_2019"

In [44]:
# retrieve data downlaoded from OHDSI vocabularies (concept file)
def retrieve_ohdsi_vocab(filename, vocab, id_name):
    with open(filename) as file:
        concepts = np.asarray([line.strip().split('\t') for line in file])
    
    # fill invalid_reason column if empty
    for row in concepts:
        if len(row) == 9 and len(row[8])==8:
            row.append("NA")

    concepts = np.asarray(concepts)
    ret = np.asarray([row for row in concepts if row[3] == vocab])
    df = pd.DataFrame([[entry[0],entry[1],entry[3]] for entry in ret], 
        columns = [id_name, "name",'vocab'])

    return df


def retrieve_meddra(filename, id_name, level):
    with open(filename) as file:
        concepts = np.asarray([line.strip().split('\t') for line in file])
    
    # fill invalid_reason column if empty
    for row in concepts:
        if len(row) == 9 and len(row[8])==8:
            row.append("NA")

    concepts = np.asarray(concepts)
    ret = np.asarray([row for row in concepts if row[3] == "MedDRA" and row[4] == level])
    df = pd.DataFrame([[entry[0],entry[1],entry[3]] for entry in ret], 
        columns = [id_name, "name",'vocab'])

    return df

# get all maps-to relationships
def get_concept_relationships(file, relation):
    with open(file) as f:
        next(f)
        f = np.asarray([line.strip().split("\t") for line in f])
        df = pd.DataFrame([entry[:3] for entry in f if (entry[2] == relation)], 
            columns = ['id1','id2',"relation"])
        #df = pd.DataFrame([entry[:3] for entry in f], columns = ['id1','id2',"relation"])
    return df

def mapping_with_snomed(vocab, name, snomed, relation):
    df_relation = get_concept_relationships(ohdsi_path + "/CONCEPT_RELATIONSHIP.csv", relation)
    id_match = pd.merge(vocab, df_relation, how ='inner', on=['id1'])
    df = pd.merge(id_match, snomed, how ='inner', on=["id2"])
    
    df = df[["id1","name_x","id2"]]
    df = df.rename(index=str, columns={"id1": name+"_id", "name_x": name, "id2": "snomed_id"})
    return df

In [42]:
icd9 = retrieve_ohdsi_vocab(ohdsi_path + "/CONCEPT.csv", "ICD9CM", "id1")
icd10 = retrieve_ohdsi_vocab(ohdsi_path + "/CONCEPT.csv", "ICD10CM", "id1")
mdr_llt = retrieve_meddra(ohdsi_path + "/CONCEPT.csv","id1","LLT")
mdr_pt = retrieve_meddra(ohdsi_path + "/CONCEPT.csv","id1","PT")
snomed = retrieve_ohdsi_vocab(ohdsi_path + "/CONCEPT.csv", "SNOMED","id2")

In [45]:
mdr_pt_snomed = mapping_with_snomed(mdr_pt, "mdr", snomed, "MedDRA - SNOMED eq")
mdr_llt_snomed = mapping_with_snomed(mdr_llt, "mdr", snomed, "MedDRA - SNOMED eq")
icd9_snomed = mapping_with_snomed(icd9, "icd", snomed, "Maps to")
icd10_snomed = mapping_with_snomed(icd10, "icd", snomed, "Maps to")

mdr_icd9_pt = pd.merge(mdr_pt_snomed, icd9_snomed, how="inner", on=["snomed_id"])
mdr_icd9_llt = pd.merge(mdr_llt_snomed, icd9_snomed, how="inner", on=["snomed_id"])
mdr_icd10_pt = pd.merge(mdr_pt_snomed, icd10_snomed, how="inner", on=["snomed_id"])
mdr_icd10_llt = pd.merge(mdr_llt_snomed, icd10_snomed, how="inner", on=["snomed_id"])

merged = pd.concat([mdr_icd9_pt[["mdr","icd"]],mdr_icd9_llt[["mdr","icd"]],
                        mdr_icd10_pt[["mdr","icd"]],mdr_icd10_llt[["mdr","icd"]]])
merged = merged.drop_duplicates(keep = "first")
merged.columns = ["MDR","ICD"]
merged.head()

(12841, 3)
(718, 3)
(21100, 3)
(127635, 3)


Unnamed: 0,MDR,ICD
0,Liver function test,Nonspecific abnormal results of function study...
1,Basophilia,Basophilia
2,Humoral immune defect,Deficiency of humoral immunity
3,Humoral immune defect,Other deficiency of humoral immunity
4,Hypogammaglobulinaemia,"Hypogammaglobulinemia, unspecified"


In [82]:
print("OHDSI MDR ICD9 PT: ", len(set(mdr_icd9_pt["mdr"].drop_duplicates().tolist())))
print("OHDSI MDR ICD10 PT: ", len(set(mdr_icd10_pt["mdr"].drop_duplicates().tolist())))
print("OHDSI MDR ICD9 LLT: ", len(set(mdr_icd9_llt["mdr"].drop_duplicates().tolist())))
print("OHDSI MDR ICD10 LLT: ", len(set(mdr_icd10_llt["mdr"].drop_duplicates().tolist())))
ohdsi_mdr_pt = pd.concat([mdr_icd9_pt,mdr_icd10_pt]).drop_duplicates()
ohdsi_mdr_llt = pd.concat([mdr_icd9_llt,mdr_icd10_llt]).drop_duplicates()
print("OHDSI MDR PT: ", len(set(ohdsi_mdr_pt["mdr"].drop_duplicates().tolist())))
print("OHDSI MDR LLT: ", len(set(ohdsi_mdr_llt["mdr"].drop_duplicates().tolist())))

OHDSI MDR ICD9 PT:  3354
OHDSI MDR ICD10 PT:  3551
OHDSI MDR ICD9 LLT:  180
OHDSI MDR ICD10 LLT:  191
OHDSI MDR PT:  4112
OHDSI MDR LLT:  220


In [46]:
merged["MDR"] = merged["MDR"].str.lower()
merged["ICD"] = merged["ICD"].str.lower()
ohdsi_mapping = merged
print(ohdsi_mapping.shape)
ohdsi_mapping.head()

(45173, 2)


Unnamed: 0,MDR,ICD
0,liver function test,nonspecific abnormal results of function study...
1,basophilia,basophilia
2,humoral immune defect,deficiency of humoral immunity
3,humoral immune defect,other deficiency of humoral immunity
4,hypogammaglobulinaemia,"hypogammaglobulinemia, unspecified"


## UMLS

2009-2016: /MRCONSO.RRF.aa <br/>
2017-2019: /MRCONSO.RRF <br/>

In [83]:
year = "2019"
umlsPath = "/Users/Fyxstkala/Desktop/GitHub/term_mapping/umls/" + year + "AB"
mrconso = umlsPath + "/MRCONSO.RRF"

### Helper functions

In [84]:
#df = pd.read_csv(mrconso, sep="|")

# read MRCONSO.RRF file, get terms in English
def read_mrconso(file):
    term_df = pd.read_csv(file, sep="|", usecols=[0,1,11,12,14])
    term_df.columns = ["CUI","LAT","SAB","TTY","STR"]
    term_df = term_df.loc[term_df["LAT"] == "ENG"]
    term_df['STR'] = term_df['STR'].str.lower()
    print(term_df.shape)
    return term_df

df = read_mrconso(mrconso)
df.head()

(10802342, 5)


Unnamed: 0,CUI,LAT,SAB,TTY,STR
0,C0000005,ENG,MSH,ET,(131)i-maa
5,C0000039,ENG,RXNORM,IN,"1,2-dipalmitoylphosphatidylcholine"
6,C0000039,ENG,MTH,PN,"1,2-dipalmitoylphosphatidylcholine"
7,C0000039,ENG,MSH,MH,"1,2-dipalmitoylphosphatidylcholine"
8,C0000039,ENG,MSH,PM,"1,2 dipalmitoylphosphatidylcholine"


In [90]:
icd9cm = df.loc[df["SAB"] == 'ICD9CM'].drop_duplicates()
icd10cm = df.loc[df["SAB"] == 'ICD10CM'].drop_duplicates()
mdr_pt =  df.loc[(df["SAB"] == 'MDR') & (df["TTY"] == "PT")].drop_duplicates()
mdr_llt =  df.loc[(df["SAB"] == 'MDR') & (df["TTY"] == "LLT")].drop_duplicates()
print("MDR PT", len(set(mdr_pt["STR"].tolist())))
print("MDR LLT", len(set(mdr_llt["STR"].tolist())))
mdr_llt.head()

MDR PT 23708
MDR LLT 70977


Unnamed: 0,CUI,LAT,SAB,TTY,STR
5112,C0000727,ENG,MDR,LLT,acute abdomen
5126,C0000727,ENG,MDR,LLT,syndrome abdominal acute
5129,C0000727,ENG,MDR,LLT,abdominal syndrome acute
5249,C0000729,ENG,MDR,LLT,abdominal cramps
5267,C0000729,ENG,MDR,LLT,abdominal cramp


In [120]:
def cui_mapping(mdr, icd):
    matches = pd.merge(mdr, icd, how='inner', on=["CUI"], suffixes=('_mdr', '_icd')).drop_duplicates()
    matches = matches.rename(columns={'STR_mdr': 'MDR', 'STR_icd': 'ICD'})
    return matches[["CUI","MDR","ICD"]]

# mapping percentage is calculated by how many MedDRA terms are in UMLS of that year
def get_count_info(matches, mdr):
    print(matches.shape)
    len_match_cui = len(set(matches["CUI"].tolist()))
    len_match_icd = len(set(matches["ICD"].tolist()))
    len_match_mdr = len(set(matches["MDR"].tolist()))
    len_mdr = len(set(mdr["PT"].tolist()))
    
    print("Mapped CUI: ", len_match_cui)
    print("Mapped ICD: ", len_match_icd)
    print("Mapped MDR: ", len_match_mdr)
    print("Terms in MDR: ", len_mdr)
    print("MDR Matched Percentage: ", float(len_match_mdr)/len_mdr)

In [96]:
map_icd9 = cui_mapping(mdr_pt, icd9cm)
map_icd10 = cui_mapping(mdr_pt, icd10cm)
combined_pt = pd.concat([map_icd9, map_icd10]).drop_duplicates()
print("---  ICD9CM Mapping---")
get_count_info(map_icd9, mdr_pt)
print("\n---ICD10CM Mapping---")
get_count_info(map_icd10, mdr_pt)
print("\n---Combined Mapping---")
get_count_info(combined_pt, mdr_pt)
combined_pt.head()

---  ICD9CM Mapping---
(5734, 3)
Mapped CUI:  2663
Mapped ICD:  4459
Mapped MDR:  2780
Terms in MDR:  23708
MDR Matched Percentage:  0.11725999662561161

---ICD10CM Mapping---
(8779, 3)
Mapped CUI:  3630
Mapped ICD:  5323
Mapped MDR:  3785
Terms in MDR:  23708
MDR Matched Percentage:  0.15965075080141725

---Combined Mapping---
(8805, 3)
Mapped CUI:  4394
Mapped ICD:  8382
Mapped MDR:  4569
Terms in MDR:  23708
MDR Matched Percentage:  0.19271975704403577


Unnamed: 0,CUI,MDR,ICD
0,C0000737,abdominal pain,abdominal pain
1,C0000737,abdominal pain,"abdominal pain, unspecified site"
2,C0000737,abdominal pain,abdmnal pain unspcf site
3,C0000768,congenital anomaly,congenital anomalies
4,C0000768,congenital anomaly,congenital anomaly nos


In [97]:
map_icd9 = cui_mapping(mdr_llt, icd9cm)
map_icd10 = cui_mapping(mdr_llt, icd10cm)
combined_llt = pd.concat([map_icd9, map_icd10]).drop_duplicates()
print("---  ICD9CM Mapping---")
get_count_info(map_icd9, mdr_llt)
print("\n---ICD10CM Mapping---")
get_count_info(map_icd10, mdr_llt)
print("\n---Combined Mapping---")
get_count_info(combined_llt, mdr_llt)
combined_llt.head()

---  ICD9CM Mapping---
(27916, 3)
Mapped CUI:  6792
Mapped ICD:  12064
Mapped MDR:  13417
Terms in MDR:  70977
MDR Matched Percentage:  0.1890330670498894

---ICD10CM Mapping---
(33995, 3)
Mapped CUI:  6141
Mapped ICD:  8669
Mapped MDR:  13745
Terms in MDR:  70977
MDR Matched Percentage:  0.19365428237316315

---Combined Mapping---
(40249, 3)
Mapped CUI:  9394
Mapped ICD:  18280
Mapped MDR:  18539
Terms in MDR:  70977
MDR Matched Percentage:  0.2611972892627189


Unnamed: 0,CUI,MDR,ICD
0,C0000737,pain abdominal,abdominal pain
1,C0000737,pain abdominal,"abdominal pain, unspecified site"
2,C0000737,pain abdominal,abdmnal pain unspcf site
3,C0000737,abdominal pain,abdominal pain
4,C0000737,abdominal pain,"abdominal pain, unspecified site"


In [99]:
umls_mapping = pd.concat([combined_pt, combined_llt]).drop_duplicates()
print(umls_mapping.shape)

(40249, 3)


### UMLS Mapping

In [138]:
def get_umls_mapping(mrconso, mdr_pt_df):
    df = read_mrconso(mrconso)
    
    # get ICD data
    icd9cm = df.loc[df["SAB"] == 'ICD9CM'].drop_duplicates()
    icd10cm = df.loc[df["SAB"] == 'ICD10CM'].drop_duplicates()
    
    # get MedDRA data
    mdr_pt =  df.loc[(df["SAB"] == 'MDR') & (df["TTY"] == "PT")].drop_duplicates()
    mdr_llt =  df.loc[(df["SAB"] == 'MDR') & (df["TTY"] == "LLT")].drop_duplicates()
    mdr_pt_num = len(set(mdr_pt["STR"].drop_duplicates().tolist()))
    
    # MDR and ICD mapping
    map_icd9_pt = cui_mapping(mdr_pt, icd9cm)
    map_icd10_pt = cui_mapping(mdr_pt, icd10cm)
    map_icd9_llt = cui_mapping(mdr_llt, icd9cm)
    map_icd10_llt = cui_mapping(mdr_llt, icd10cm)
    
    combined = pd.concat([map_icd9_pt, map_icd10_pt, map_icd9_llt, map_icd10_llt]).drop_duplicates()
    
#     print("---  ICD9CM Mapping---")
#     get_count_info(map_icd9, mdr_pt)
#     print("\n---ICD10CM Mapping---")
#     get_count_info(map_icd10, mdr_pt)
#     print("\n---Combined Mapping---")
#     get_count_info(combined, mdr_pt_df)
    
    return combined, mdr_pt_num

In [78]:
umls_mapping = get_umls_mapping(mrconso)

(10802342, 5)
---  ICD9CM Mapping---
(5734, 3)
Mapped ICD:  4459
Mapped MDR:  2780
Terms in MDR:  23708
MDR Matched Percentage:  0.11725999662561161

---ICD10CM Mapping---
(8779, 3)
Mapped ICD:  5323
Mapped MDR:  3785
Terms in MDR:  23708
MDR Matched Percentage:  0.15965075080141725

---Combined Mapping---
(8805, 3)
Mapped ICD:  8382
Mapped MDR:  4569
Terms in MDR:  23708
MDR Matched Percentage:  0.19271975704403577


## Combined Mapping

In [100]:
umls_mapping = umls_mapping[["MDR","ICD"]]
print(umls_mapping.shape)
umls_mapping.head()

(40249, 2)


Unnamed: 0,MDR,ICD
0,abdominal pain,abdominal pain
1,abdominal pain,"abdominal pain, unspecified site"
2,abdominal pain,abdmnal pain unspcf site
3,congenital anomaly,congenital anomalies
4,congenital anomaly,congenital anomaly nos


In [101]:
print(ohdsi_mapping.shape)
ohdsi_mapping.head()

(45173, 2)


Unnamed: 0,MDR,ICD
0,liver function test,nonspecific abnormal results of function study...
1,basophilia,basophilia
2,humoral immune defect,deficiency of humoral immunity
3,humoral immune defect,other deficiency of humoral immunity
4,hypogammaglobulinaemia,"hypogammaglobulinemia, unspecified"


In [102]:
print(pt_llt_df.shape)
pt_llt_df.head()

(80894, 3)


Unnamed: 0,IDX,PT,LLT
0,0,11-beta-hydroxylase deficiency,11-beta-hydroxylase deficiency
1,1,17 ketosteroids urine,17 ketosteroids urine
2,2,17 ketosteroids urine decreased,17 ketosteroids urine decreased
3,3,17 ketosteroids urine decreased,17 ketosteroids urine low
4,4,17 ketosteroids urine increased,17 ketosteroids urine high


In [103]:
print(pt_soc_df.shape)
pt_soc_df.head()

(23954, 3)


Unnamed: 0,IDX,PT,SOC
0,0,11-beta-hydroxylase deficiency,"congenital, familial and genetic disorders"
1,1,"17,20-desmolase deficiency","congenital, familial and genetic disorders"
2,2,17-alpha-hydroxylase deficiency,"congenital, familial and genetic disorders"
3,3,"20,22-desmolase deficiency","congenital, familial and genetic disorders"
4,4,21-hydroxylase deficiency,"congenital, familial and genetic disorders"


In [108]:
umls_ohdsi_mapping = pd.concat([umls_mapping, ohdsi_mapping]).drop_duplicates()
print(umls_ohdsi_mapping.shape)
print("UMLS + OHDSI combined: ", len(set(umls_ohdsi_mapping["MDR"].drop_duplicates().tolist())))
umls_ohdsi_mapping.head()

(81407, 2)
19742


Unnamed: 0,MDR,ICD
0,abdominal pain,abdominal pain
1,abdominal pain,"abdominal pain, unspecified site"
2,abdominal pain,abdmnal pain unspcf site
3,congenital anomaly,congenital anomalies
4,congenital anomaly,congenital anomaly nos


In [110]:
pt_filtered = pd.merge(umls_ohdsi_mapping, pt_llt_df, left_on="MDR", right_on="PT", how = "inner")[["MDR","ICD"]].drop_duplicates()
llt_filtered = pd.merge(umls_ohdsi_mapping,pt_llt_df, left_on="MDR", right_on="LLT",how = "inner")[["MDR","ICD"]].drop_duplicates()
print(pt_filtered.shape, llt_filtered.shape)
print("Total PT filtered: ", len(set(pt_filtered["MDR"].drop_duplicates().tolist())))
print("Total LLT filtered: ", len(set(llt_filtered["MDR"].drop_duplicates().tolist())))
llt_filtered.head()

(47301, 2) (81407, 2)
Total PT filtered:  5697
Total LLT filtered:  19742


Unnamed: 0,MDR,ICD
0,abdominal pain,abdominal pain
1,abdominal pain,"abdominal pain, unspecified site"
2,abdominal pain,abdmnal pain unspcf site
3,abdominal pain,unspecified abdominal pain
4,abdominal pain,"abdominal pain, other specified site"


In [111]:
llt_to_pt_filtered = pd.merge(llt_filtered, pt_llt_df, left_on="MDR", right_on="LLT")[["PT","ICD"]]
llt_to_pt_filtered.columns = ["MDR","ICD"]
result = pd.concat([pt_filtered,llt_to_pt_filtered]).drop_duplicates()
print(result.shape)
result.head()

(58951, 2)


Unnamed: 0,MDR,ICD
0,abdominal pain,abdominal pain
32,abdominal pain,"abdominal pain, unspecified site"
64,abdominal pain,abdmnal pain unspcf site
96,abdominal pain,unspecified abdominal pain
128,abdominal pain,"abdominal pain, other specified site"


In [112]:
len_match_icd = len(set(result["ICD"].tolist()))
len_match_mdr = len(set(result["MDR"].tolist()))
len_mdr = len(set(pt_df["PT"].drop_duplicates().tolist()))

print("Mapped ICD: ", len_match_icd)
print("Mapped MDR: ", len_match_mdr)
print("Terms in MDR: ", len_mdr)
print("MDR Matched Percentage: ", float(len_match_mdr)/len_mdr)

Mapped ICD:  49745
Mapped MDR:  6359
Terms in MDR:  23954
MDR Matched Percentage:  0.2654671453619437


## UMLS over the years

In [139]:
for year in range(2009, 2020):
    umls_path = "/Users/Fyxstkala/Desktop/GitHub/term_mapping/umls/" + str(year) + "AB"
    if year < 2017:
        mrconso_file = "/MRCONSO.RRF.aa"
    else:
        mrconso_file = "/MRCONSO.RRF"
    mrconso_path = umls_path + mrconso_file
    print("\n---" + str(year) + "---")
    combined_umls, mdr_pt_num = get_umls_mapping(mrconso_path, pt_df)
    
    # Filter using official MedDRA data
    pt_filtered = pd.merge(combined_umls, pt_llt_df, left_on="MDR", right_on="PT", how = "inner")[["MDR","ICD"]].drop_duplicates()
    llt_filtered = pd.merge(combined_umls,pt_llt_df, left_on="MDR", right_on="LLT",how = "inner")[["MDR","ICD"]].drop_duplicates()
#     print(pt_filtered.shape, llt_filtered.shape)
#     print("Total PT filtered: ", len(set(pt_filtered["MDR"].drop_duplicates().tolist())))
#     print("Total LLT filtered: ", len(set(llt_filtered["MDR"].drop_duplicates().tolist())))

    # converted LLT to PT
    llt_to_pt_filtered = pd.merge(llt_filtered, pt_llt_df, left_on="MDR", right_on="LLT")[["PT","ICD"]]
    llt_to_pt_filtered.columns = ["MDR","ICD"]
    result = pd.concat([pt_filtered,llt_to_pt_filtered]).drop_duplicates()
    
    len_match_icd = len(set(result["ICD"].drop_duplicates().tolist()))
    len_match_mdr = len(set(result["MDR"].drop_duplicates().tolist()))
#     len_mdr = len(set(combined_umls["MDR"].drop_duplicates().tolist()))

    print("Mapped ICD: ", len_match_icd)
    print("Mapped MDR: ", len_match_mdr)
    print("MDR PT in UMLS: ", mdr_pt_num)
    print("MDR Matched Percentage: ", float(len_match_mdr)/mdr_pt_num)


---2009---
(5635063, 5)
Mapped ICD:  4395
Mapped MDR:  2701
MDR PT in UMLS:  17976
MDR Matched Percentage:  0.15025589675122386

---2010---
(5546494, 5)
Mapped ICD:  7989
Mapped MDR:  4215
MDR PT in UMLS:  17833
MDR Matched Percentage:  0.23635955812258172

---2011---
(6063442, 5)
Mapped ICD:  8057
Mapped MDR:  4221
MDR PT in UMLS:  17873
MDR Matched Percentage:  0.2361662843395065

---2012---
(5996159, 5)
Mapped ICD:  8126
Mapped MDR:  4252
MDR PT in UMLS:  17962
MDR Matched Percentage:  0.23672196860037859

---2013---
(6087124, 5)
Mapped ICD:  8117
Mapped MDR:  4286
MDR PT in UMLS:  18102
MDR Matched Percentage:  0.2367694177438957

---2014---
(6063084, 5)
Mapped ICD:  8151
Mapped MDR:  4322
MDR PT in UMLS:  18183
MDR Matched Percentage:  0.23769454985425947

---2015---
(5315282, 5)
Mapped ICD:  8114
Mapped MDR:  4319
MDR PT in UMLS:  17899
MDR Matched Percentage:  0.24129839655846697

---2016---
(5305343, 5)
Mapped ICD:  17590
Mapped MDR:  5379
MDR PT in UMLS:  18000
MDR Matched Pe