# Cross reference the FDA's NDCs with RxNorm's NDCs

2019-04-19

Determine how well RxNorm's data allows us to map the FDA's NDCs to RXCUIs.

In [1]:
import pandas as pd

## Read FDA NDCs

In [2]:
ndc_info = pd.read_csv("../../pipeline/fda_ndc/ndc_info.tsv", sep='\t')

In [3]:
ndc_info.shape

(243483, 19)

In [4]:
ndc_info.head()

Unnamed: 0,PRODUCTID,PRODUCTNDC,NDCPACKAGECODE,PACKAGEDESCRIPTION,PRODUCTTYPENAME,PROPRIETARYNAME,NONPROPRIETARYNAME,DOSAGEFORMNAME,ROUTENAME,MARKETINGCATEGORYNAME,APPLICATIONNUMBER,LABELERNAME,SUBSTANCENAME,ACTIVE_NUMERATOR_STRENGTH,ACTIVE_INGRED_UNIT,PHARM_CLASSES,DEASCHEDULE,NDC_EXCLUDE_FLAG,LISTING_RECORD_CERTIFIED_THROUGH
0,0002-0800_4bb5d1cb-0fa7-48c7-9f6d-8d45f9b91649,0002-0800,0002-0800-01,1 VIAL in 1 CARTON (0002-0800-01) > 10 mL in ...,HUMAN OTC DRUG,Sterile Diluent,diluent,"INJECTION, SOLUTION",SUBCUTANEOUS,NDA,NDA018781,Eli Lilly and Company,WATER,1.0,mL/mL,,,N,20191231.0
1,0002-1200_957ee1b5-dfa7-4e3f-96e1-6bed1ffc0abe,0002-1200,0002-1200-30,"1 VIAL, MULTI-DOSE in 1 CAN (0002-1200-30) > ...",HUMAN PRESCRIPTION DRUG,Amyvid,Florbetapir F 18,"INJECTION, SOLUTION",INTRAVENOUS,NDA,NDA202008,Eli Lilly and Company,FLORBETAPIR F-18,51.0,mCi/mL,"Radioactive Diagnostic Agent [EPC],Positron Em...",,N,20191231.0
2,0002-1200_957ee1b5-dfa7-4e3f-96e1-6bed1ffc0abe,0002-1200,0002-1200-50,"1 VIAL, MULTI-DOSE in 1 CAN (0002-1200-50) > ...",HUMAN PRESCRIPTION DRUG,Amyvid,Florbetapir F 18,"INJECTION, SOLUTION",INTRAVENOUS,NDA,NDA202008,Eli Lilly and Company,FLORBETAPIR F-18,51.0,mCi/mL,"Radioactive Diagnostic Agent [EPC],Positron Em...",,N,20191231.0
3,0002-1407_14757f9d-f641-4836-acf3-229265588d1d,0002-1407,0002-1407-01,10 mL in 1 VIAL (0002-1407-01),HUMAN PRESCRIPTION DRUG,Quinidine Gluconate,Quinidine Gluconate,SOLUTION,INTRAVENOUS,NDA,NDA007529,Eli Lilly and Company,QUINIDINE GLUCONATE,80.0,mg/mL,"Antiarrhythmic [EPC],Cytochrome P450 2D6 Inhib...",,N,20191231.0
4,0002-1433_4468578a-47d2-488e-9fd4-a8322070392f,0002-1433,0002-1433-61,2 SYRINGE in 1 CARTON (0002-1433-61) > .5 mL ...,HUMAN PRESCRIPTION DRUG,Trulicity,Dulaglutide,"INJECTION, SOLUTION",SUBCUTANEOUS,BLA,BLA125469,Eli Lilly and Company,DULAGLUTIDE,0.75,mg/.5mL,"GLP-1 Receptor Agonist [EPC],Glucagon-Like Pep...",,N,20201231.0


## Read RxNorm info

In [5]:
rxnorm = pd.read_csv("../../pipeline/rxnorm/ndc_to_rxcui.tsv", sep='\t')

In [6]:
rxnorm.shape

(309278, 9)

In [7]:
rxnorm.head()

Unnamed: 0,rxcui,rxaui,stype,code,atn,sab,atv,suppress,cvf
0,91349,3507080,AUI,12745-202,NDC,MTHSPL,12745-202-01,N,4096.0
1,91349,3507080,AUI,12745-202,NDC,MTHSPL,12745-202-02,N,4096.0
2,91349,3507080,AUI,12745-202,NDC,MTHSPL,12745-202-03,N,4096.0
3,91349,3518228,AUI,34645-8030,NDC,MTHSPL,34645-8030-4,N,4096.0
4,91349,3520567,AUI,55316-871,NDC,MTHSPL,55316-871-43,N,4096.0


---

# How many of the FDA NDCs can be mapped to RxNorm CUIs?

In [8]:
fdaids = set(ndc_info["NDCPACKAGECODE"])
rxids = set(rxnorm["atv"])

In [9]:
len(fdaids)

243464

In [10]:
len(rxids)

280374

In [11]:
fdaids <= rxids

False

Too optimistic.

In [12]:
len(fdaids & rxids)

239810

In [13]:
len(fdaids & rxids) / len(fdaids) * 100

98.49916209377977

98% of the FDA NDCs can be mapped to RxNorm CUIs! This is excellent.

### Unmappable ids

In [14]:
len(fdaids - rxids)

3654

In [15]:
len(fdaids - rxids) / len(fdaids) * 100

1.5008379062202215

Only 1.5% of the FDA data can't be matched to a RXCUI.

In [16]:
len(rxids - fdaids)

40564

Finally there's some RxNorm data which the FDA does not know about. Might be old drugs?

# Conclusion

We can map 98% of FDA NDCs to RXCUIs.
We will move forward onto determining the active ingredient.

---

# Merge tables and focus on matchable info

We will ignore any data that is not common to both sources for now.

In [17]:
res = (rxnorm
    [["rxcui", "rxaui", "atv", "suppress"]]
    .rename(columns={"atv": "NDCPACKAGECODE"})
    .merge(ndc_info, how="inner", on="NDCPACKAGECODE")
    .drop_duplicates()
    .reset_index(drop=True)
)

In [18]:
res.shape

(265692, 22)

In [19]:
res.head()

Unnamed: 0,rxcui,rxaui,NDCPACKAGECODE,suppress,PRODUCTID,PRODUCTNDC,PACKAGEDESCRIPTION,PRODUCTTYPENAME,PROPRIETARYNAME,NONPROPRIETARYNAME,...,MARKETINGCATEGORYNAME,APPLICATIONNUMBER,LABELERNAME,SUBSTANCENAME,ACTIVE_NUMERATOR_STRENGTH,ACTIVE_INGRED_UNIT,PHARM_CLASSES,DEASCHEDULE,NDC_EXCLUDE_FLAG,LISTING_RECORD_CERTIFIED_THROUGH
0,91349,3507080,12745-202-01,N,12745-202_7d063901-255c-bffc-e053-2a91aa0a91ee,12745-202,"59 mL in 1 BOTTLE, PLASTIC (12745-202-01)",HUMAN OTC DRUG,HYDROGEN PEROXIDE,HYDROGEN PEROXIDE,...,OTC MONOGRAPH NOT FINAL,part333A,Medical Chemical Corporation,HYDROGEN PEROXIDE,8.57,g/100mL,,,N,20191231.0
1,91349,3507080,12745-202-02,N,12745-202_7d063901-255c-bffc-e053-2a91aa0a91ee,12745-202,"118 mL in 1 BOTTLE, PLASTIC (12745-202-02)",HUMAN OTC DRUG,HYDROGEN PEROXIDE,HYDROGEN PEROXIDE,...,OTC MONOGRAPH NOT FINAL,part333A,Medical Chemical Corporation,HYDROGEN PEROXIDE,8.57,g/100mL,,,N,20191231.0
2,91349,3507080,12745-202-03,N,12745-202_7d063901-255c-bffc-e053-2a91aa0a91ee,12745-202,"3785 mL in 1 BOTTLE, PLASTIC (12745-202-03)",HUMAN OTC DRUG,HYDROGEN PEROXIDE,HYDROGEN PEROXIDE,...,OTC MONOGRAPH NOT FINAL,part333A,Medical Chemical Corporation,HYDROGEN PEROXIDE,8.57,g/100mL,,,N,20191231.0
3,91349,3518228,34645-8030-4,N,34645-8030_ffab3590-e3b1-44fb-86fe-73c9221d7635,34645-8030,30 mL in 1 POUCH (34645-8030-4),HUMAN OTC DRUG,Hydrogen Peroxide,Hydrogen Peroxide,...,OTC MONOGRAPH FINAL,part333,Jiangsu Province JianErKang Medical Dressing C...,HYDROGEN PEROXIDE,0.03,mL/mL,,,E,20181231.0
4,91349,3520567,55316-871-43,N,55316-871_fa4b283c-0255-4bba-aa96-48476c19a4d6,55316-871,".473 L in 1 BOTTLE, PLASTIC (55316-871-43)",HUMAN OTC DRUG,Hydrogen Peroxide,Hydrogen Peroxide,...,OTC MONOGRAPH NOT FINAL,part333A,"DZA Brands,",HYDROGEN PEROXIDE,0.3,kg/100L,,,N,20191231.0


---

## Analyze results

In [20]:
res["rxcui"].nunique()

41576

In [21]:
res["NDCPACKAGECODE"].nunique()

239810

Only 41k unique drugs for all 240k NDCs? Must be more packaging numbers than we originally thought.

## Save to file

In [22]:
res.to_csv("../../pipeline/merged_ndc_info.tsv", sep='\t', index=False)