# Extract all NDC information from RxNorm

2019-06-03

Extract the NDC info in RxNorm.
Analyze the information in a later step.

In [1]:
import pandas as pd

## Read `RXNSAT.RRF`

The NDCs should be in `RXNSAT.RRF` according to the technical documentation:
https://www.nlm.nih.gov/research/umls/rxnorm/docs/2019/rxnorm_doco_full_2019-1.html#s6_0

In [2]:
rxsat = (pd
    .read_csv("../../data/rxnorm/RXNSAT.RRF", sep='|', names=[
        "rxcui", "lui", "sui", "rxaui", "stype",
        "code", "atui", "satui", "atn", "sab",
        "atv", "suppress", "cvf",
        "temp"
    ])
    .dropna(axis=1, how="all")
)

In [3]:
rxsat.shape

(8245946, 10)

In [4]:
rxsat.head()

Unnamed: 0,rxcui,rxaui,stype,code,atui,atn,sab,atv,suppress,cvf
0,3,8717795,AUI,58488005,,UMLSAUI,RXNORM,A27769867,,
1,3,8717795,AUI,58488005,,UMLSCUI,RXNORM,C0000052,,
2,3,8717796,AUI,58488005,,UMLSAUI,RXNORM,A27780666,,
3,3,8717796,AUI,58488005,,UMLSCUI,RXNORM,C0000052,,
4,3,8717808,AUI,58488005,,UMLSAUI,RXNORM,A27766654,,


### Basic info

In [5]:
rxsat["atn"].value_counts().head()

NDC           1670442
SPL_SET_ID    1408364
UMLSCUI       1022945
UMLSAUI       1015392
LABELER        319007
Name: atn, dtype: int64

---

## Extract NDCs

In [6]:
ndcs = rxsat.query("atn == 'NDC'")

In [7]:
ndcs.shape

(1670442, 10)

In [8]:
ndcs.head()

Unnamed: 0,rxcui,rxaui,stype,code,atui,atn,sab,atv,suppress,cvf
49554,1356,1473211,AUI,4009746,,NDC,VANDF,395021301,O,
49555,1356,1473211,AUI,4009746,,NDC,VANDF,395021391,O,
49556,1356,1473211,AUI,4009746,,NDC,VANDF,17317004101,O,
49557,1356,1473211,AUI,4009746,,NDC,VANDF,17317004105,O,
49558,1356,1473211,AUI,4009746,,NDC,VANDF,49452815001,O,


## Save to file

In [9]:
ndcs.to_csv("../../pipeline/rxnorm/raw_ndc_info.tsv", sep='\t', index=False)