# Task 1: Create a Prescription Parser using CRF
This task tests your ability to build a Doctor Prescription Parser with the help of CRF model

Your job is to build a Prescription Parser that takes a prescription (sentence) as an input and find / label the words in that sentence with one of the already pre-defined labels

### Problem: SEQUENCE PREDICTION - Label words in a sentence
#### Input : Doctor Prescription in the form of a sentence split into tokens
- Ex: Take 2 tablets once a day for 10 days

#### Output : FHIR Labels
- ('Take', 'Method')
- ('2', 'Qty') 
- ('tablets', 'Form')
- ('once', 'Frequency')
- ('a', 'Period') 
- ('day', 'PeriodUnit')
- ('for', 'FOR')
- ('10', 'Duration')
- ('days', 'DurationUnit') 

### Major Steps
- Install necessary library
- Import the libraries
- Create training data with labels
    - Split the sentence into tokens
    - Compute POS tags
    - Create triples
- Extract features
- Split the data into training and testing set
- Create CRF model
- Save the CRF model
- Load the CRF model
- Predict on test data
- Accuracy

#### Install necesaary library

#### Import the necessary libraries

In [34]:
import nltk
from sklearn_crfsuite import CRF, metrics
from sklearn.metrics import make_scorer,confusion_matrix
from pprint import pprint
from sklearn.metrics import f1_score,classification_report
from sklearn.pipeline import Pipeline
import string
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\yatch\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.


True

### Input data (GIVEN)
#### Creating the inputs to the ML model in the following form:
- sigs --> ['take 3 tabs for 10 days']       INPUT SIG
- input_sigs --> [['take', '3', 'tabs', 'for', '10', 'days']]      TOKENS
- output_labels --> [['Method','Qty', 'Form', 'FOR', 'Duration', 'DurationUnit']]       LABELS

In [35]:
sigs = ["for 5 to 6 days", "inject 2 units", "x 2 weeks", "x 3 days", "every day", "every 2 weeks", "every 3 days", "every 1 to 2 months", "every 2 to 6 weeks", "every 4 to 6 days", "take two to four tabs", "take 2 to 4 tabs", "take 3 tabs orally bid for 10 days at bedtime", "swallow three capsules tid orally", "take 2 capsules po every 6 hours", "take 2 tabs po for 10 days", "take 100 caps by mouth tid for 10 weeks", "take 2 tabs after an hour", "2 tabs every 4-6 hours", "every 4 to 6 hours", "q46h", "q4-6h", "2 hours before breakfast", "before 30 mins at bedtime", "30 mins before bed", "and 100 tabs twice a month", "100 tabs twice a month", "100 tabs once a month", "100 tabs thrice a month", "3 tabs daily for 3 days then 1 tab per day at bed", "30 tabs 10 days tid", "take 30 tabs for 10 days three times a day", "qid q6h", "bid", "qid", "30 tabs before dinner and bedtime", "30 tabs before dinner & bedtime", "take 3 tabs at bedtime", "30 tabs thrice daily for 10 days ", "30 tabs for 10 days three times a day", "Take 2 tablets a day", "qid for 10 days", "every day", "take 2 caps at bedtime", "apply 3 drops before bedtime", "take three capsules daily", "swallow 3 pills once a day", "swallow three pills thrice a day", "apply daily", "apply three drops before bedtime", "every 6 hours", "before food", "after food", "for 20 days", "for twenty days", "with meals"]
input_sigs = [['for', '5', 'to', '6', 'days'], ['inject', '2', 'units'], ['x', '2', 'weeks'], ['x', '3', 'days'], ['every', 'day'], ['every', '2', 'weeks'], ['every', '3', 'days'], ['every', '1', 'to', '2', 'months'], ['every', '2', 'to', '6', 'weeks'], ['every', '4', 'to', '6', 'days'], ['take', 'two', 'to', 'four', 'tabs'], ['take', '2', 'to', '4', 'tabs'], ['take', '3', 'tabs', 'orally', 'bid', 'for', '10', 'days', 'at', 'bedtime'], ['swallow', 'three', 'capsules', 'tid', 'orally'], ['take', '2', 'capsules', 'po', 'every', '6', 'hours'], ['take', '2', 'tabs', 'po', 'for', '10', 'days'], ['take', '100', 'caps', 'by', 'mouth', 'tid', 'for', '10', 'weeks'], ['take', '2', 'tabs', 'after', 'an', 'hour'], ['2', 'tabs', 'every', '4-6', 'hours'], ['every', '4', 'to', '6', 'hours'], ['q46h'], ['q4-6h'], ['2', 'hours', 'before', 'breakfast'], ['before', '30', 'mins', 'at', 'bedtime'], ['30', 'mins', 'before', 'bed'], ['and', '100', 'tabs', 'twice', 'a', 'month'], ['100', 'tabs', 'twice', 'a', 'month'], ['100', 'tabs', 'once', 'a', 'month'], ['100', 'tabs', 'thrice', 'a', 'month'], ['3', 'tabs', 'daily', 'for', '3', 'days', 'then', '1', 'tab', 'per', 'day', 'at', 'bed'], ['30', 'tabs', '10', 'days', 'tid'], ['take', '30', 'tabs', 'for', '10', 'days', 'three', 'times', 'a', 'day'], ['qid', 'q6h'], ['bid'], ['qid'], ['30', 'tabs', 'before', 'dinner', 'and', 'bedtime'], ['30', 'tabs', 'before', 'dinner', '&', 'bedtime'], ['take', '3', 'tabs', 'at', 'bedtime'], ['30', 'tabs', 'thrice', 'daily', 'for', '10', 'days'], ['30', 'tabs', 'for', '10', 'days', 'three', 'times', 'a', 'day'], ['take', '2', 'tablets', 'a', 'day'], ['qid', 'for', '10', 'days'], ['every', 'day'], ['take', '2', 'caps', 'at', 'bedtime'], ['apply', '3', 'drops', 'before', 'bedtime'], ['take', 'three', 'capsules', 'daily'], ['swallow', '3', 'pills', 'once', 'a', 'day'], ['swallow', 'three', 'pills', 'thrice', 'a', 'day'], ['apply', 'daily'], ['apply', 'three', 'drops', 'before', 'bedtime'], ['every', '6', 'hours'], ['before', 'food'], ['after', 'food'], ['for', '20', 'days'], ['for', 'twenty', 'days'], ['with', 'meals']]
output_labels = [['FOR', 'Duration', 'TO', 'DurationMax', 'DurationUnit'], ['Method', 'Qty', 'Form'], ['FOR', 'Duration', 'DurationUnit'], ['FOR', 'Duration', 'DurationUnit'], ['EVERY', 'Period'], ['EVERY', 'Period', 'PeriodUnit'], ['EVERY', 'Period', 'PeriodUnit'], ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit'], ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit'], ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit'], ['Method', 'Qty', 'TO', 'Qty', 'Form'], ['Method', 'Qty', 'TO', 'Qty', 'Form'], ['Method', 'Qty', 'Form', 'PO', 'BID', 'FOR', 'Duration', 'DurationUnit', 'AT', 'WHEN'], ['Method', 'Qty', 'Form', 'TID', 'PO'], ['Method', 'Qty', 'Form', 'PO', 'EVERY', 'Period', 'PeriodUnit'], ['Method', 'Qty', 'Form', 'PO', 'FOR', 'Duration', 'DurationUnit'], ['Method', 'Qty', 'Form', 'BY', 'PO', 'TID', 'FOR', 'Duration', 'DurationUnit'], ['Method', 'Qty', 'Form', 'AFTER', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'EVERY', 'Period', 'PeriodUnit'], ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit'], ['Q46H'], ['Q4-6H'], ['Qty', 'PeriodUnit', 'BEFORE', 'WHEN'], ['BEFORE', 'Qty', 'M', 'AT', 'WHEN'], ['Qty', 'M', 'BEFORE', 'WHEN'], ['AND', 'Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'Frequency', 'FOR', 'Duration', 'DurationUnit', 'THEN', 'Qty', 'Form', 'Frequency', 'PeriodUnit', 'AT', 'WHEN'], ['Qty', 'Form', 'Duration', 'DurationUnit', 'TID'], ['Method', 'Qty', 'Form', 'FOR', 'Duration', 'DurationUnit', 'Qty', 'TIMES', 'Period', 'PeriodUnit'], ['QID', 'Q6H'], ['BID'], ['QID'],['Qty', 'Form', 'BEFORE', 'WHEN', 'AND', 'WHEN'], ['Qty', 'Form', 'BEFORE', 'WHEN', 'AND', 'WHEN'], ['Method', 'Qty', 'Form', 'AT', 'WHEN'], ['Qty', 'Form', 'Frequency', 'DAILY', 'FOR', 'Duration', 'DurationUnit'], ['Qty', 'Form', 'FOR', 'Duration', 'DurationUnit', 'Frequency', 'TIMES', 'Period', 'PeriodUnit'], ['Method', 'Qty', 'Form', 'Period', 'PeriodUnit'], ['QID', 'FOR', 'Duration', 'DurationUnit'], ['EVERY', 'PeriodUnit'], ['Method', 'Qty', 'Form', 'AT', 'WHEN'], ['Method', 'Qty', 'Form', 'BEFORE', 'WHEN'], ['Method', 'Qty', 'Form', 'DAILY'], ['Method', 'Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Method', 'Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Method', 'DAILY'], ['Method', 'Qty', 'Form', 'BEFORE', 'WHEN'], ['EVERY', 'Period', 'PeriodUnit'], ['BEFORE', 'FOOD'], ['AFTER', 'FOOD'], ['FOR', 'Duration', 'DurationUnit'], ['FOR', 'Duration', 'DurationUnit'], ['WITH', 'FOOD']]

In [36]:
len(sigs), len(input_sigs) , len(output_labels)

(56, 56, 56)

### Creating a Tuples Maker method
Create the tuples as given below by writing a function **tuples_maker(input_sigs, output_labels)** and returns **output** as given below

Input(s): 
- input_sigs
- output_lables

Output:

[[('for', 'FOR'),
  ('5', 'Duration'),
  ('to', 'TO'),
  ('6', 'DurationMax'),
  ('days', 'DurationUnit')], [second sentence], ...]

In [37]:
def tuples_maker(input_sigs, output_labels):
    output = []
    for sigs, labels in zip(input_sigs, output_labels):
        sentence = [(sig, label) for sig, label in zip(sigs, labels)]
        output.append(sentence)
    return output

In [38]:
tuples_maker(input_sigs, output_labels)

[[('for', 'FOR'),
  ('5', 'Duration'),
  ('to', 'TO'),
  ('6', 'DurationMax'),
  ('days', 'DurationUnit')],
 [('inject', 'Method'), ('2', 'Qty'), ('units', 'Form')],
 [('x', 'FOR'), ('2', 'Duration'), ('weeks', 'DurationUnit')],
 [('x', 'FOR'), ('3', 'Duration'), ('days', 'DurationUnit')],
 [('every', 'EVERY'), ('day', 'Period')],
 [('every', 'EVERY'), ('2', 'Period'), ('weeks', 'PeriodUnit')],
 [('every', 'EVERY'), ('3', 'Period'), ('days', 'PeriodUnit')],
 [('every', 'EVERY'),
  ('1', 'Period'),
  ('to', 'TO'),
  ('2', 'PeriodMax'),
  ('months', 'PeriodUnit')],
 [('every', 'EVERY'),
  ('2', 'Period'),
  ('to', 'TO'),
  ('6', 'PeriodMax'),
  ('weeks', 'PeriodUnit')],
 [('every', 'EVERY'),
  ('4', 'Period'),
  ('to', 'TO'),
  ('6', 'PeriodMax'),
  ('days', 'PeriodUnit')],
 [('take', 'Method'),
  ('two', 'Qty'),
  ('to', 'TO'),
  ('four', 'Qty'),
  ('tabs', 'Form')],
 [('take', 'Method'),
  ('2', 'Qty'),
  ('to', 'TO'),
  ('4', 'Qty'),
  ('tabs', 'Form')],
 [('take', 'Method'),
  ('3', 

### Creating the triples_maker( ) for feature extraction
- input: tuples_maker_output
- output: 
[[('for', 'IN', 'FOR'),
  ('5', 'CD', 'Duration'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'DurationMax'),
  ('days', 'NNS', 'DurationUnit')], [second sentence], ... ]

In [39]:
def triples_maker(tuples_maker_output, pos_tags):
    output = []
    for sentence, pos_sentence in zip(tuples_maker_output, pos_tags):
        new_sentence = [(word, pos, label) for (word, label), pos in zip(sentence, pos_sentence)]
        output.append(new_sentence)
    return output

def get_pos_tags(sentences):
    pos_tags = []
    for sentence in sentences:
        tagged = nltk.pos_tag(sentence)
        pos_tags.append([tag for word, tag in tagged])
    return pos_tags

In [42]:
tuples_maker_output = tuples_maker(input_sigs, output_labels)
pos_tags = get_pos_tags(input_sigs)
triples_maker(tuples_maker_output, pos_tags)

[[('for', 'IN', 'FOR'),
  ('5', 'CD', 'Duration'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'DurationMax'),
  ('days', 'NNS', 'DurationUnit')],
 [('inject', 'JJ', 'Method'), ('2', 'CD', 'Qty'), ('units', 'NNS', 'Form')],
 [('x', 'RB', 'FOR'),
  ('2', 'CD', 'Duration'),
  ('weeks', 'NNS', 'DurationUnit')],
 [('x', 'RB', 'FOR'),
  ('3', 'CD', 'Duration'),
  ('days', 'NNS', 'DurationUnit')],
 [('every', 'DT', 'EVERY'), ('day', 'NN', 'Period')],
 [('every', 'DT', 'EVERY'),
  ('2', 'CD', 'Period'),
  ('weeks', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('3', 'CD', 'Period'),
  ('days', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('1', 'CD', 'Period'),
  ('to', 'TO', 'TO'),
  ('2', 'CD', 'PeriodMax'),
  ('months', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('2', 'CD', 'Period'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'PeriodMax'),
  ('weeks', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('4', 'CD', 'Period'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'PeriodMax'),
  ('

### Creating the features extractor method (GIVEN as a BASELINE)
#### The features used are:
- SOS, EOS, lowercase, uppercase, title, digit, postag, previous_tag, next_tag
#### Feel free to include more features

In [43]:
def token_to_features(doc, i):
    word = doc[i][0]
    postag = doc[i][1]

    # Common features for all words
    features = [
        'bias',
        'word.lower=' + word.lower(),
        'word[-3:]=' + word[-3:],
        'word[-2:]=' + word[-2:],
        'word.isupper=%s' % word.isupper(),
        'word.istitle=%s' % word.istitle(),
        'word.isdigit=%s' % word.isdigit(),
        'postag=' + postag
    ]

    # Features for words that are not
    # at the beginning of a document
    if i > 0:
        word1 = doc[i-1][0]
        postag1 = doc[i-1][1]
        features.extend([
            '-1:word.lower=' + word1.lower(),
            '-1:word.istitle=%s' % word1.istitle(),
            '-1:word.isupper=%s' % word1.isupper(),
            '-1:word.isdigit=%s' % word1.isdigit(),
            '-1:postag=' + postag1
        ])
    else:
        # Indicate that it is the 'beginning of a document'
        features.append('BOS')

    # Features for words that are not
    # at the end of a document
    if i < len(doc)-1:
        word1 = doc[i+1][0]
        postag1 = doc[i+1][1]
        features.extend([
            '+1:word.lower=' + word1.lower(),
            '+1:word.istitle=%s' % word1.istitle(),
            '+1:word.isupper=%s' % word1.isupper(),
            '+1:word.isdigit=%s' % word1.isdigit(),
            '+1:postag=' + postag1
        ])
    else:
        # Indicate that it is the 'end of a document'
        features.append('EOS')

    return features

### Running the feature extractor on the training data 
- Feature extraction
- Train-test-split

In [89]:
output = triples_maker(tuples_maker_output, pos_tags)
from sklearn.model_selection import train_test_split

def get_features(doc):
    return [token_to_features(doc, i) for i in range(len(doc))]

def get_labels(doc):
    return [label for token, postag, label in doc]

features = [get_features(doc) for doc in output]
labels = [get_labels(doc) for doc in output]

features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.2)

### Training the CRF model with the features extracted using the feature extractor method

In [90]:
import pycrfsuite

# create a trainer
trainer = pycrfsuite.Trainer(verbose=True)

# submit training data to the trainer
for xseq, yseq in zip(features_train, labels_train):
    trainer.append(xseq, yseq)

# set the parameters of the model
trainer.set_params({
    'c1': 0.1,
    'c2': 0.1,
    'max_iterations': 100,
    'feature.possible_transitions': True
})

# provide a file name as a parameter to the train function
trainer.train('crf_model.crfsuite')

Feature generation
type: CRF1d
feature.minfreq: 0.000000
feature.possible_states: 0
feature.possible_transitions: 1
0....1....2....3....4....5....6....7....8....9....10
Number of features: 1744
Seconds required: 0.010

L-BFGS optimization
c1: 0.100000
c2: 0.100000
num_memories: 6
max_iterations: 100
epsilon: 0.000010
stop: 10
delta: 0.000010
linesearch: MoreThuente
linesearch.max_iterations: 20

***** Iteration #1 *****
Loss: 541.990421
Feature norm: 1.000000
Error norm: 108.551529
Active features: 1732
Line search trials: 1
Line search step: 0.007091
Seconds required for this iteration: 0.001

***** Iteration #2 *****
Loss: 325.980636
Feature norm: 5.696520
Error norm: 125.998703
Active features: 1450
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 0.003

***** Iteration #3 *****
Loss: 231.392172
Feature norm: 7.100058
Error norm: 57.041779
Active features: 1301
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 

### Predicting the test data with the built model

In [91]:
# create a tagger
tagger = pycrfsuite.Tagger()

In [92]:
# load the trained model
tagger.open('crf_model.crfsuite')

<contextlib.closing at 0x1bd44488d10>

In [93]:
# assuming features_test is your new data
predictions = [tagger.tag(xseq) for xseq in features_test]

In [110]:
def get_features(tokens):
    # Get the part-of-speech tags of the words
    pos_tags = nltk.pos_tag([token for token, _ in tokens])
    
    features = []
    for i, (token, _) in enumerate(tokens):
        word = token
        postag = pos_tags[i][1]

        # Common features for all words
        feature_list = [
            'bias',
            'word.lower=' + word.lower(),
            'word[-3:]=' + word[-3:],
            'word[-2:]=' + word[-2:],
            'word.isupper=%s' % word.isupper(),
            'word.istitle=%s' % word.istitle(),
            'word.isdigit=%s' % word.isdigit(),
            'postag=' + postag
        ]

        # Features for words that are not at the beginning of a document
        if i > 0:
            word1 = tokens[i-1][0]
            postag1 = pos_tags[i-1][1]
            feature_list.extend([
                '-1:word.lower=' + word1.lower(),
                '-1:word.istitle=%s' % word1.istitle(),
                '-1:word.isupper=%s' % word1.isupper(),
                '-1:word.isdigit=%s' % word1.isdigit(),
                '-1:postag=' + postag1
            ])
        else:
            # Indicate that it is the 'beginning of a document'
            feature_list.append('BOS')

        # Features for words that are not at the end of a document
        if i < len(tokens)-1:
            word1 = tokens[i+1][0]
            postag1 = pos_tags[i+1][1]
            feature_list.extend([
                '+1:word.lower=' + word1.lower(),
                '+1:word.istitle=%s' % word1.istitle(),
                '+1:word.isupper=%s' % word1.isupper(),
                '+1:word.isdigit=%s' % word1.isdigit(),
                '+1:postag=' + postag1
            ])
        else:
            # Indicate that it is the 'end of a document'
            feature_list.append('EOS')

        features.append(feature_list)

    return features

### Putting all the prediction logic inside a predict method

In [195]:
def predict(sig):
    # Load the trained CRF model
    tagger = pycrfsuite.Tagger()
    tagger.open('crf_model.crfsuite')
    
    # Tokenize the input sig and extract features
    sig_tokens = [(token, '') for token in sig.split()]  # Use an empty string as placeholder for POS tags
    features = get_features(sig_tokens)
    
    # Use the model to predict labels
    predicted_labels = tagger.tag(features)
    
    # Combine tokens and predicted labels
    labeled_sig = list(zip(sig_tokens, predicted_labels))
    
    return [labeled_sig]

### Sample predictions

In [196]:
predictions = predict("take 2 tabs every 6 hours x 10 days")
predictions

[[(('take', ''), 'Method'),
  (('2', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('every', ''), 'EVERY'),
  (('6', ''), 'Period'),
  (('hours', ''), 'PeriodUnit'),
  (('x', ''), 'FOR'),
  (('10', ''), 'Duration'),
  (('days', ''), 'DurationUnit')]]

In [114]:
predictions = predict("2 capsu for 10 day at bed")
predictions

[[(('2', ''), 'Qty'),
  (('capsu', ''), 'Form'),
  (('for', ''), 'FOR'),
  (('10', ''), 'Duration'),
  (('day', ''), 'PeriodUnit'),
  (('at', ''), 'AT'),
  (('bed', ''), 'WHEN')]]

In [119]:
predictions = predict("2 capsu for 10 days at bed")
predictions

[[(('2', ''), 'Qty'),
  (('capsu', ''), 'Form'),
  (('for', ''), 'FOR'),
  (('10', ''), 'Duration'),
  (('days', ''), 'DurationUnit'),
  (('at', ''), 'AT'),
  (('bed', ''), 'WHEN')]]

In [120]:
predictions = predict("5 days 2 tabs at bed")
predictions

[[(('5', ''), 'Qty'),
  (('days', ''), 'Form'),
  (('2', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('at', ''), 'AT'),
  (('bed', ''), 'WHEN')]]

In [121]:
predictions = predict("3 tabs qid x 10 weeks")
predictions

[[(('3', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('qid', ''), 'QID'),
  (('x', ''), 'FOR'),
  (('10', ''), 'Duration'),
  (('weeks', ''), 'DurationUnit')]]

In [122]:
predictions = predict("x 30 days")
predictions

[[(('x', ''), 'FOR'),
  (('30', ''), 'Duration'),
  (('days', ''), 'DurationUnit')]]

In [123]:
predictions = predict("x 20 months")
predictions

[[(('x', ''), 'FOR'),
  (('20', ''), 'Duration'),
  (('months', ''), 'DurationUnit')]]

In [124]:
predictions = predict("take 2 tabs po tid for 10 days")
predictions

[[(('take', ''), 'Method'),
  (('2', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('po', ''), 'PO'),
  (('tid', ''), 'TID'),
  (('for', ''), 'FOR'),
  (('10', ''), 'Duration'),
  (('days', ''), 'DurationUnit')]]

In [125]:
predictions = predict("take 2 capsules po every 6 hours")
predictions

[[(('take', ''), 'Method'),
  (('2', ''), 'Qty'),
  (('capsules', ''), 'Form'),
  (('po', ''), 'PO'),
  (('every', ''), 'EVERY'),
  (('6', ''), 'Period'),
  (('hours', ''), 'PeriodUnit')]]

In [126]:
predictions = predict("inject 2 units pu tid")
predictions

[[(('inject', ''), 'Method'),
  (('2', ''), 'Qty'),
  (('units', ''), 'Form'),
  (('pu', ''), 'Frequency'),
  (('tid', ''), 'TID')]]

In [127]:
predictions = predict("swallow 3 caps tid by mouth")
predictions

[[(('swallow', ''), 'Method'),
  (('3', ''), 'Qty'),
  (('caps', ''), 'Form'),
  (('tid', ''), 'TID'),
  (('by', ''), 'BY'),
  (('mouth', ''), 'PO')]]

In [128]:
predictions = predict("inject 3 units orally")
predictions

[[(('inject', ''), 'Method'),
  (('3', ''), 'Qty'),
  (('units', ''), 'Form'),
  (('orally', ''), 'Frequency')]]

In [129]:
predictions = predict("orally take 3 tabs tid")
predictions

[[(('orally', ''), 'Method'),
  (('take', ''), 'Method'),
  (('3', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('tid', ''), 'TID')]]

In [130]:
predictions = predict("by mouth take three caps")
predictions

[[(('by', ''), 'BY'),
  (('mouth', ''), 'PO'),
  (('take', ''), 'Method'),
  (('three', ''), 'Qty'),
  (('caps', ''), 'Form')]]

In [131]:
predictions = predict("take 3 tabs orally three times a day for 10 days at bedtime")
predictions

[[(('take', ''), 'Method'),
  (('3', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('orally', ''), 'Frequency'),
  (('three', ''), 'Frequency'),
  (('times', ''), 'TIMES'),
  (('a', ''), 'Period'),
  (('day', ''), 'PeriodUnit'),
  (('for', ''), 'FOR'),
  (('10', ''), 'Duration'),
  (('days', ''), 'DurationUnit'),
  (('at', ''), 'AT'),
  (('bedtime', ''), 'WHEN')]]

In [132]:
predictions = predict("take 3 tabs orally bid for 10 days at bedtime")
predictions

[[(('take', ''), 'Method'),
  (('3', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('orally', ''), 'Frequency'),
  (('bid', ''), 'BID'),
  (('for', ''), 'FOR'),
  (('10', ''), 'Duration'),
  (('days', ''), 'DurationUnit'),
  (('at', ''), 'AT'),
  (('bedtime', ''), 'WHEN')]]

In [133]:
predictions = predict("take 3 tabs bid orally at bed")
predictions

[[(('take', ''), 'Method'),
  (('3', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('bid', ''), 'Frequency'),
  (('orally', ''), 'PeriodUnit'),
  (('at', ''), 'AT'),
  (('bed', ''), 'WHEN')]]

In [134]:
predictions = predict("take 10 capsules by mouth qid")
predictions

[[(('take', ''), 'Method'),
  (('10', ''), 'Qty'),
  (('capsules', ''), 'Form'),
  (('by', ''), 'BY'),
  (('mouth', ''), 'PO'),
  (('qid', ''), 'QID')]]

In [187]:
predictions = predict("inject 10 units orally qid x 3 months")
predictions

[[(('inject', ''), 'Method'),
  (('10', ''), 'Qty'),
  (('units', ''), 'Form'),
  (('orally', ''), 'Frequency'),
  (('qid', ''), 'QID'),
  (('x', ''), 'FOR'),
  (('3', ''), 'Duration'),
  (('months', ''), 'DurationUnit')]]

In [185]:
prediction = predict("please take 2 tablets per day for a month in the morning and evening each day")
prediction

[[(('please', ''), 'Method'),
  (('take', ''), 'Method'),
  (('2', ''), 'Qty'),
  (('tablets', ''), 'Form'),
  (('per', ''), 'Frequency'),
  (('day', ''), 'PeriodUnit'),
  (('for', ''), 'FOR'),
  (('a', ''), 'Period'),
  (('month', ''), 'PeriodUnit'),
  (('in', ''), 'AFTER'),
  (('the', ''), 'Period'),
  (('morning', ''), 'PeriodUnit'),
  (('and', ''), 'Period'),
  (('evening', ''), 'PeriodUnit'),
  (('each', ''), 'Period'),
  (('day', ''), 'PeriodUnit')]]

In [184]:
prediction = predict("Amoxcicillin QID 30 tablets")
prediction

[[(('Amoxcicillin', ''), 'Method'),
  (('QID', ''), 'Method'),
  (('30', ''), 'Qty'),
  (('tablets', ''), 'Form')]]

In [183]:
prediction = predict("take 3 tabs TID for 90 days with food")
prediction

[[(('take', ''), 'Method'),
  (('3', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('TID', ''), 'Frequency'),
  (('for', ''), 'FOR'),
  (('90', ''), 'Duration'),
  (('days', ''), 'DurationUnit'),
  (('with', ''), 'Frequency'),
  (('food', ''), 'FOOD')]]

In [182]:
prediction = predict("with food take 3 tablets per day for 90 days")
prediction

[[(('with', ''), 'AFTER'),
  (('food', ''), 'FOOD'),
  (('take', ''), 'Method'),
  (('3', ''), 'Qty'),
  (('tablets', ''), 'Form'),
  (('per', ''), 'Frequency'),
  (('day', ''), 'PeriodUnit'),
  (('for', ''), 'FOR'),
  (('90', ''), 'Duration'),
  (('days', ''), 'DurationUnit')]]

In [181]:
prediction = predict("with food take 3 tablets per week for 90 weeks")
prediction

[[(('with', ''), 'AFTER'),
  (('food', ''), 'FOOD'),
  (('take', ''), 'Method'),
  (('3', ''), 'Qty'),
  (('tablets', ''), 'Form'),
  (('per', ''), 'Frequency'),
  (('week', ''), 'PeriodUnit'),
  (('for', ''), 'FOR'),
  (('90', ''), 'Duration'),
  (('weeks', ''), 'DurationUnit')]]

In [180]:
prediction = predict("take 2-4 tabs")
prediction

[[(('take', ''), 'Method'), (('2-4', ''), 'Qty'), (('tabs', ''), 'Form')]]

In [179]:
prediction = predict("take 2 to 4 tabs")
prediction

[[(('take', ''), 'Method'),
  (('2', ''), 'Qty'),
  (('to', ''), 'TO'),
  (('4', ''), 'Qty'),
  (('tabs', ''), 'Form')]]

In [178]:
prediction = predict("take two to four tabs")
prediction

[[(('take', ''), 'Method'),
  (('two', ''), 'Qty'),
  (('to', ''), 'TO'),
  (('four', ''), 'Qty'),
  (('tabs', ''), 'Form')]]

In [177]:
prediction = predict("take 2-4 tabs for 8 to 9 days")
prediction

[[(('take', ''), 'Method'),
  (('2-4', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('for', ''), 'FOR'),
  (('8', ''), 'Duration'),
  (('to', ''), 'TO'),
  (('9', ''), 'PeriodMax'),
  (('days', ''), 'PeriodUnit')]]

In [176]:
prediction = predict("take 20 tabs every 6 to 8 days")
prediction

[[(('take', ''), 'Method'),
  (('20', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('every', ''), 'EVERY'),
  (('6', ''), 'Period'),
  (('to', ''), 'TO'),
  (('8', ''), 'PeriodMax'),
  (('days', ''), 'PeriodUnit')]]

In [175]:
prediction = predict("take 2 tabs every 4 to 6 days")
prediction

[[(('take', ''), 'Method'),
  (('2', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('every', ''), 'EVERY'),
  (('4', ''), 'Period'),
  (('to', ''), 'TO'),
  (('6', ''), 'PeriodMax'),
  (('days', ''), 'PeriodUnit')]]

In [174]:
prediction = predict("take 2 tabs every 2 to 10 weeks")
prediction

[[(('take', ''), 'Method'),
  (('2', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('every', ''), 'EVERY'),
  (('2', ''), 'Period'),
  (('to', ''), 'TO'),
  (('10', ''), 'PeriodMax'),
  (('weeks', ''), 'PeriodUnit')]]

In [173]:
prediction = predict("take 2 tabs every 4 to 6 days")
prediction

[[(('take', ''), 'Method'),
  (('2', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('every', ''), 'EVERY'),
  (('4', ''), 'Period'),
  (('to', ''), 'TO'),
  (('6', ''), 'PeriodMax'),
  (('days', ''), 'PeriodUnit')]]

In [172]:
prediction = predict("take 2 tabs every 2 to 10 months")
prediction

[[(('take', ''), 'Method'),
  (('2', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('every', ''), 'EVERY'),
  (('2', ''), 'Period'),
  (('to', ''), 'TO'),
  (('10', ''), 'PeriodMax'),
  (('months', ''), 'PeriodUnit')]]

In [171]:
prediction = predict("every 60 mins")
prediction

[[(('every', ''), 'EVERY'),
  (('60', ''), 'Period'),
  (('mins', ''), 'PeriodUnit')]]

In [170]:
prediction = predict("every 10 mins")
prediction

[[(('every', ''), 'EVERY'),
  (('10', ''), 'Period'),
  (('mins', ''), 'PeriodUnit')]]

In [169]:
prediction = predict("every two to four months")
prediction

[[(('every', ''), 'EVERY'),
  (('two', ''), 'Period'),
  (('to', ''), 'TO'),
  (('four', ''), 'PeriodMax'),
  (('months', ''), 'PeriodUnit')]]

In [168]:
prediction = predict("take 2 tabs every 3 to 4 days")
prediction

[[(('take', ''), 'Method'),
  (('2', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('every', ''), 'EVERY'),
  (('3', ''), 'Period'),
  (('to', ''), 'TO'),
  (('4', ''), 'PeriodMax'),
  (('days', ''), 'PeriodUnit')]]

In [167]:
prediction = predict("every 3 to 4 days take 20 tabs")
prediction

[[(('every', ''), 'EVERY'),
  (('3', ''), 'Period'),
  (('to', ''), 'TO'),
  (('4', ''), 'Duration'),
  (('days', ''), 'DurationUnit'),
  (('take', ''), 'Method'),
  (('20', ''), 'Qty'),
  (('tabs', ''), 'Form')]]

In [166]:
prediction = predict("once in every 3 days take 3 tabs")
prediction

[[(('once', ''), 'Frequency'),
  (('in', ''), 'PO'),
  (('every', ''), 'EVERY'),
  (('3', ''), 'Period'),
  (('days', ''), 'PeriodUnit'),
  (('take', ''), 'Method'),
  (('3', ''), 'Qty'),
  (('tabs', ''), 'Form')]]

In [165]:
prediction = predict("take 3 tabs once in every 3 days")
prediction

[[(('take', ''), 'Method'),
  (('3', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('once', ''), 'Frequency'),
  (('in', ''), 'PO'),
  (('every', ''), 'EVERY'),
  (('3', ''), 'Period'),
  (('days', ''), 'PeriodUnit')]]

In [164]:
prediction = predict("orally take 20 tabs every 4-6 weeks")
prediction

[[(('orally', ''), 'Method'),
  (('take', ''), 'Method'),
  (('20', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('every', ''), 'EVERY'),
  (('4-6', ''), 'Period'),
  (('weeks', ''), 'PeriodUnit')]]

In [163]:
prediction = predict("10 tabs x 2 days")
prediction

[[(('10', ''), 'Qty'),
  (('tabs', ''), 'Form'),
  (('x', ''), 'FOR'),
  (('2', ''), 'Duration'),
  (('days', ''), 'DurationUnit')]]

In [162]:
prediction = predict("3 capsule x 15 days")
prediction

[[(('3', ''), 'Qty'),
  (('capsule', ''), 'Form'),
  (('x', ''), 'FOR'),
  (('15', ''), 'Duration'),
  (('days', ''), 'DurationUnit')]]

In [161]:
prediction = predict("10 tabs")
prediction

[[(('10', ''), 'Qty'), (('tabs', ''), 'Form')]]